From 02bbb30efea4980c9d133947cbbf69fb599071ad Mon Sep 17 00:00:00 2001 From: Fabrice Desclaux Date: Mon, 25 Feb 2019 11:09:54 +0100 Subject: Support python2/python3 --- example/asm/shellcode.py | 59 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 20 deletions(-) (limited to 'example/asm/shellcode.py') diff --git a/example/asm/shellcode.py b/example/asm/shellcode.py index 9be5b517..b14b7441 100755 --- a/example/asm/shellcode.py +++ b/example/asm/shellcode.py @@ -1,7 +1,9 @@ #! /usr/bin/env python2 +from __future__ import print_function from argparse import ArgumentParser from pdb import pm +from future.utils import viewitems from elfesteem import pe_init from elfesteem.strpatchwork import StrPatchwork @@ -9,6 +11,7 @@ from miasm2.core import parse_asm, asmblock from miasm2.analysis.machine import Machine from miasm2.core.interval import interval from miasm2.core.locationdb import LocationDB +from miasm2.core.utils import iterbytes, int_to_byte parser = ArgumentParser("Multi-arch (32 bits) assembler") parser.add_argument('architecture', help="architecture: " + @@ -41,8 +44,17 @@ if args.PE: pe = pe_init.PE(wsize=size) s_text = pe.SHList.add_section(name="text", addr=0x1000, rawsize=0x1000) s_iat = pe.SHList.add_section(name="iat", rawsize=0x100) - new_dll = [({"name": "USER32.dll", - "firstthunk": s_iat.addr}, ["MessageBoxA"])] + new_dll = [ + ( + { + "name": "USER32.dll", + "firstthunk": s_iat.addr + }, + [ + "MessageBoxA" + ] + ) + ] pe.DirImport.add_dlldesc(new_dll) s_myimp = pe.SHList.add_section(name="myimp", rawsize=len(pe.DirImport)) pe.DirImport.set_rva(s_myimp.addr) @@ -51,8 +63,11 @@ if args.PE: addr_main = pe.rva2virt(s_text.addr) virt = pe.virt output = pe - dst_interval = interval([(pe.rva2virt(s_text.addr), - pe.rva2virt(s_text.addr + s_text.size))]) + dst_interval = interval( + [ + (pe.rva2virt(s_text.addr), pe.rva2virt(s_text.addr + s_text.size)) + ] + ) else: st = StrPatchwork() @@ -74,20 +89,26 @@ asmcfg, loc_db = parse_asm.parse_txt(machine.mn, attrib, source, loc_db) loc_db.set_location_offset(loc_db.get_name_location("main"), addr_main) if args.PE: - loc_db.set_location_offset(loc_db.get_or_create_name_location("MessageBoxA"), - pe.DirImport.get_funcvirt('USER32.dll', - 'MessageBoxA')) + loc_db.set_location_offset( + loc_db.get_or_create_name_location("MessageBoxA"), + pe.DirImport.get_funcvirt( + 'USER32.dll', + 'MessageBoxA' + ) + ) # Print and graph firsts blocks before patching it for block in asmcfg.blocks: - print block + print(block) open("graph.dot", "w").write(asmcfg.dot()) # Apply patches -patches = asmblock.asm_resolve_final(machine.mn, - asmcfg, - loc_db, - dst_interval) +patches = asmblock.asm_resolve_final( + machine.mn, + asmcfg, + loc_db, + dst_interval +) if args.encrypt: # Encrypt code loc_start = loc_db.get_or_create_name_location(args.encrypt[0]) @@ -95,20 +116,18 @@ if args.encrypt: ad_start = loc_db.get_location_offset(loc_start) ad_stop = loc_db.get_location_offset(loc_stop) - new_patches = dict(patches) - for ad, val in patches.items(): + for ad, val in list(viewitems(patches)): if ad_start <= ad < ad_stop: - new_patches[ad] = "".join([chr(ord(x) ^ 0x42) for x in val]) - patches = new_patches + patches[ad] = b"".join(int_to_byte(ord(x) ^ 0x42) for x in iterbytes(val)) -print patches +print(patches) if isinstance(virt, StrPatchwork): - for offset, raw in patches.items(): + for offset, raw in viewitems(patches): virt[offset] = raw else: - for offset, raw in patches.items(): + for offset, raw in viewitems(patches): virt.set(offset, raw) # Produce output -open(args.output, 'wb').write(str(output)) +open(args.output, 'wb').write(bytes(output)) -- cgit 1.4.1 From 944806c506446c918eb74c17a605f5f56d4b75e0 Mon Sep 17 00:00:00 2001 From: Fabrice Desclaux Date: Wed, 27 Feb 2019 20:12:54 +0100 Subject: Rename miasm2 to miasm --- README.md | 22 +- example/asm/shellcode.py | 10 +- example/asm/simple.py | 4 +- example/disasm/callback.py | 6 +- example/disasm/dis_binary.py | 4 +- example/disasm/dis_binary_ir.py | 4 +- example/disasm/dis_binary_ira.py | 5 +- example/disasm/dis_x86_string.py | 4 +- example/disasm/full.py | 18 +- example/disasm/single_instr.py | 6 +- example/expression/access_c.py | 16 +- example/expression/asm_to_ir.py | 12 +- example/expression/basic_op.py | 2 +- example/expression/basic_simplification.py | 4 +- example/expression/constant_propagation.py | 10 +- example/expression/export_llvm.py | 8 +- example/expression/expr_c.py | 8 +- example/expression/expr_grapher.py | 2 +- example/expression/expr_random.py | 2 +- example/expression/expr_reduce.py | 4 +- example/expression/expr_translate.py | 6 +- example/expression/get_read_write.py | 8 +- example/expression/graph_dataflow.py | 14 +- example/expression/simplification_add.py | 6 +- example/expression/simplification_tools.py | 2 +- example/expression/solve_condition_stp.py | 16 +- example/ida/ctype_propagation.py | 24 +- example/ida/depgraph.py | 12 +- example/ida/graph_ir.py | 12 +- example/ida/symbol_exec.py | 8 +- example/ida/utils.py | 10 +- example/jitter/arm.py | 2 +- example/jitter/arm_sc.py | 6 +- example/jitter/example_types.py | 14 +- example/jitter/mips32.py | 6 +- example/jitter/msp430.py | 6 +- example/jitter/run_with_linuxenv.py | 6 +- example/jitter/sandbox_call.py | 8 +- example/jitter/sandbox_elf_aarch64l.py | 4 +- example/jitter/sandbox_elf_ppc32.py | 6 +- example/jitter/sandbox_pe_x86_32.py | 2 +- example/jitter/sandbox_pe_x86_64.py | 2 +- example/jitter/test_x86_32_seh.py | 6 +- example/jitter/trace.py | 6 +- example/jitter/unpack_upx.py | 4 +- example/jitter/x86_32.py | 4 +- example/symbol_exec/depgraph.py | 8 +- example/symbol_exec/dse_crackme.py | 12 +- example/symbol_exec/dse_strategies.py | 8 +- example/symbol_exec/single_instr.py | 8 +- miasm/__init__.py | 1 + miasm/analysis/__init__.py | 1 + miasm/analysis/binary.py | 236 + miasm/analysis/cst_propag.py | 185 + miasm/analysis/data_analysis.py | 204 + miasm/analysis/data_flow.py | 1580 +++++++ miasm/analysis/debugging.py | 499 +++ miasm/analysis/depgraph.py | 651 +++ miasm/analysis/disasm_cb.py | 128 + miasm/analysis/dse.py | 708 +++ miasm/analysis/expression_range.py | 70 + miasm/analysis/gdbserver.py | 453 ++ miasm/analysis/machine.py | 265 ++ miasm/analysis/modularintervals.py | 530 +++ miasm/analysis/outofssa.py | 413 ++ miasm/analysis/sandbox.py | 1026 +++++ miasm/analysis/simplifier.py | 303 ++ miasm/analysis/ssa.py | 1118 +++++ miasm/arch/__init__.py | 1 + miasm/arch/aarch64/__init__.py | 1 + miasm/arch/aarch64/arch.py | 2175 +++++++++ miasm/arch/aarch64/disasm.py | 27 + miasm/arch/aarch64/ira.py | 50 + miasm/arch/aarch64/jit.py | 80 + miasm/arch/aarch64/regs.py | 120 + miasm/arch/aarch64/sem.py | 1502 +++++++ miasm/arch/arm/__init__.py | 1 + miasm/arch/arm/arch.py | 3299 ++++++++++++++ miasm/arch/arm/disasm.py | 61 + miasm/arch/arm/ira.py | 106 + miasm/arch/arm/jit.py | 148 + miasm/arch/arm/regs.py | 114 + miasm/arch/arm/sem.py | 1902 ++++++++ miasm/arch/mep/__init__.py | 0 miasm/arch/mep/arch.py | 2052 +++++++++ miasm/arch/mep/disasm.py | 23 + miasm/arch/mep/ira.py | 45 + miasm/arch/mep/jit.py | 115 + miasm/arch/mep/regs.py | 91 + miasm/arch/mep/sem.py | 1179 +++++ miasm/arch/mips32/__init__.py | 0 miasm/arch/mips32/arch.py | 755 ++++ miasm/arch/mips32/disasm.py | 16 + miasm/arch/mips32/ira.py | 104 + miasm/arch/mips32/jit.py | 151 + miasm/arch/mips32/regs.py | 73 + miasm/arch/mips32/sem.py | 520 +++ miasm/arch/msp430/__init__.py | 1 + miasm/arch/msp430/arch.py | 587 +++ miasm/arch/msp430/ctype.py | 68 + miasm/arch/msp430/disasm.py | 8 + miasm/arch/msp430/ira.py | 31 + miasm/arch/msp430/jit.py | 42 + miasm/arch/msp430/regs.py | 116 + miasm/arch/msp430/sem.py | 509 +++ miasm/arch/ppc/__init__.py | 1 + miasm/arch/ppc/arch.py | 764 ++++ miasm/arch/ppc/disasm.py | 7 + miasm/arch/ppc/ira.py | 87 + miasm/arch/ppc/jit.py | 71 + miasm/arch/ppc/regs.py | 60 + miasm/arch/ppc/sem.py | 924 ++++ miasm/arch/sh4/__init__.py | 0 miasm/arch/sh4/arch.py | 999 +++++ miasm/arch/sh4/regs.py | 84 + miasm/arch/x86/__init__.py | 1 + miasm/arch/x86/arch.py | 4637 ++++++++++++++++++++ miasm/arch/x86/ctype.py | 137 + miasm/arch/x86/disasm.py | 30 + miasm/arch/x86/ira.py | 80 + miasm/arch/x86/jit.py | 286 ++ miasm/arch/x86/regs.py | 454 ++ miasm/arch/x86/sem.py | 5822 +++++++++++++++++++++++++ miasm/core/__init__.py | 1 + miasm/core/asm_ast.py | 93 + miasm/core/asmblock.py | 1629 +++++++ miasm/core/bin_stream.py | 316 ++ miasm/core/bin_stream_ida.py | 45 + miasm/core/cpu.py | 1713 ++++++++ miasm/core/ctypesmngr.py | 771 ++++ miasm/core/graph.py | 1017 +++++ miasm/core/interval.py | 259 ++ miasm/core/locationdb.py | 500 +++ miasm/core/objc.py | 1761 ++++++++ miasm/core/parse_asm.py | 305 ++ miasm/core/sembuilder.py | 355 ++ miasm/core/types.py | 1693 +++++++ miasm/core/utils.py | 234 + miasm/expression/__init__.py | 18 + miasm/expression/expression.py | 2035 +++++++++ miasm/expression/expression_helper.py | 628 +++ miasm/expression/expression_reduce.py | 280 ++ miasm/expression/modint.py | 259 ++ miasm/expression/parser.py | 84 + miasm/expression/simplifications.py | 207 + miasm/expression/simplifications_common.py | 1556 +++++++ miasm/expression/simplifications_cond.py | 178 + miasm/expression/simplifications_explicit.py | 159 + miasm/expression/smt2_helper.py | 296 ++ miasm/ir/__init__.py | 1 + miasm/ir/analysis.py | 113 + miasm/ir/ir.py | 929 ++++ miasm/ir/symbexec.py | 1124 +++++ miasm/ir/symbexec_top.py | 221 + miasm/ir/symbexec_types.py | 131 + miasm/ir/translators/C.py | 528 +++ miasm/ir/translators/__init__.py | 13 + miasm/ir/translators/miasm_ir.py | 45 + miasm/ir/translators/python.py | 98 + miasm/ir/translators/smt2.py | 326 ++ miasm/ir/translators/translator.py | 127 + miasm/ir/translators/z3_ir.py | 281 ++ miasm/jitter/JitCore.c | 257 ++ miasm/jitter/JitCore.h | 306 ++ miasm/jitter/Jitgcc.c | 106 + miasm/jitter/Jitllvm.c | 99 + miasm/jitter/__init__.py | 1 + miasm/jitter/arch/JitCore_aarch64.c | 562 +++ miasm/jitter/arch/JitCore_aarch64.h | 57 + miasm/jitter/arch/JitCore_arm.c | 507 +++ miasm/jitter/arch/JitCore_arm.h | 47 + miasm/jitter/arch/JitCore_mep.c | 617 +++ miasm/jitter/arch/JitCore_mep.h | 82 + miasm/jitter/arch/JitCore_mips32.c | 531 +++ miasm/jitter/arch/JitCore_mips32.h | 343 ++ miasm/jitter/arch/JitCore_msp430.c | 477 ++ miasm/jitter/arch/JitCore_msp430.h | 44 + miasm/jitter/arch/JitCore_ppc32.c | 344 ++ miasm/jitter/arch/JitCore_ppc32.h | 24 + miasm/jitter/arch/JitCore_ppc32_regs.h | 89 + miasm/jitter/arch/JitCore_x86.c | 946 ++++ miasm/jitter/arch/JitCore_x86.h | 136 + miasm/jitter/arch/__init__.py | 0 miasm/jitter/bn.c | 933 ++++ miasm/jitter/bn.h | 163 + miasm/jitter/codegen.py | 650 +++ miasm/jitter/compat_py23.h | 87 + miasm/jitter/csts.py | 30 + miasm/jitter/emulatedsymbexec.py | 140 + miasm/jitter/jitcore.py | 309 ++ miasm/jitter/jitcore_cc_base.py | 121 + miasm/jitter/jitcore_gcc.py | 141 + miasm/jitter/jitcore_llvm.py | 134 + miasm/jitter/jitcore_python.py | 219 + miasm/jitter/jitload.py | 547 +++ miasm/jitter/llvmconvert.py | 1926 ++++++++ miasm/jitter/loader/__init__.py | 0 miasm/jitter/loader/elf.py | 337 ++ miasm/jitter/loader/pe.py | 565 +++ miasm/jitter/loader/utils.py | 100 + miasm/jitter/op_semantics.c | 749 ++++ miasm/jitter/op_semantics.h | 167 + miasm/jitter/queue.h | 553 +++ miasm/jitter/vm_mngr.c | 926 ++++ miasm/jitter/vm_mngr.h | 302 ++ miasm/jitter/vm_mngr_py.c | 1013 +++++ miasm/jitter/vm_mngr_py.h | 15 + miasm/os_dep/__init__.py | 1 + miasm/os_dep/common.py | 168 + miasm/os_dep/linux/__init__.py | 1 + miasm/os_dep/linux/environment.py | 916 ++++ miasm/os_dep/linux/syscall.py | 1040 +++++ miasm/os_dep/linux_stdlib.py | 213 + miasm/os_dep/win_32_structs.py | 231 + miasm/os_dep/win_api_x86_32.py | 2992 +++++++++++++ miasm/os_dep/win_api_x86_32_seh.py | 695 +++ miasm2/__init__.py | 1 - miasm2/analysis/__init__.py | 1 - miasm2/analysis/binary.py | 236 - miasm2/analysis/cst_propag.py | 185 - miasm2/analysis/data_analysis.py | 204 - miasm2/analysis/data_flow.py | 1579 ------- miasm2/analysis/debugging.py | 499 --- miasm2/analysis/depgraph.py | 651 --- miasm2/analysis/disasm_cb.py | 128 - miasm2/analysis/dse.py | 708 --- miasm2/analysis/expression_range.py | 70 - miasm2/analysis/gdbserver.py | 453 -- miasm2/analysis/machine.py | 265 -- miasm2/analysis/modularintervals.py | 530 --- miasm2/analysis/outofssa.py | 413 -- miasm2/analysis/sandbox.py | 1026 ----- miasm2/analysis/simplifier.py | 303 -- miasm2/analysis/ssa.py | 1118 ----- miasm2/arch/__init__.py | 1 - miasm2/arch/aarch64/__init__.py | 1 - miasm2/arch/aarch64/arch.py | 2175 --------- miasm2/arch/aarch64/disasm.py | 27 - miasm2/arch/aarch64/ira.py | 50 - miasm2/arch/aarch64/jit.py | 80 - miasm2/arch/aarch64/regs.py | 120 - miasm2/arch/aarch64/sem.py | 1502 ------- miasm2/arch/arm/__init__.py | 1 - miasm2/arch/arm/arch.py | 3299 -------------- miasm2/arch/arm/disasm.py | 61 - miasm2/arch/arm/ira.py | 106 - miasm2/arch/arm/jit.py | 148 - miasm2/arch/arm/regs.py | 114 - miasm2/arch/arm/sem.py | 1902 -------- miasm2/arch/mep/__init__.py | 0 miasm2/arch/mep/arch.py | 2052 --------- miasm2/arch/mep/disasm.py | 23 - miasm2/arch/mep/ira.py | 45 - miasm2/arch/mep/jit.py | 115 - miasm2/arch/mep/regs.py | 91 - miasm2/arch/mep/sem.py | 1179 ----- miasm2/arch/mips32/__init__.py | 0 miasm2/arch/mips32/arch.py | 755 ---- miasm2/arch/mips32/disasm.py | 16 - miasm2/arch/mips32/ira.py | 104 - miasm2/arch/mips32/jit.py | 151 - miasm2/arch/mips32/regs.py | 73 - miasm2/arch/mips32/sem.py | 520 --- miasm2/arch/msp430/__init__.py | 1 - miasm2/arch/msp430/arch.py | 587 --- miasm2/arch/msp430/ctype.py | 68 - miasm2/arch/msp430/disasm.py | 8 - miasm2/arch/msp430/ira.py | 31 - miasm2/arch/msp430/jit.py | 42 - miasm2/arch/msp430/regs.py | 116 - miasm2/arch/msp430/sem.py | 509 --- miasm2/arch/ppc/__init__.py | 1 - miasm2/arch/ppc/arch.py | 764 ---- miasm2/arch/ppc/disasm.py | 7 - miasm2/arch/ppc/ira.py | 87 - miasm2/arch/ppc/jit.py | 71 - miasm2/arch/ppc/regs.py | 60 - miasm2/arch/ppc/sem.py | 924 ---- miasm2/arch/sh4/__init__.py | 0 miasm2/arch/sh4/arch.py | 999 ----- miasm2/arch/sh4/regs.py | 84 - miasm2/arch/x86/__init__.py | 1 - miasm2/arch/x86/arch.py | 4637 -------------------- miasm2/arch/x86/ctype.py | 137 - miasm2/arch/x86/disasm.py | 30 - miasm2/arch/x86/ira.py | 80 - miasm2/arch/x86/jit.py | 286 -- miasm2/arch/x86/regs.py | 454 -- miasm2/arch/x86/sem.py | 5822 ------------------------- miasm2/core/__init__.py | 1 - miasm2/core/asm_ast.py | 93 - miasm2/core/asmblock.py | 1629 ------- miasm2/core/bin_stream.py | 316 -- miasm2/core/bin_stream_ida.py | 45 - miasm2/core/cpu.py | 1713 -------- miasm2/core/ctypesmngr.py | 771 ---- miasm2/core/graph.py | 1017 ----- miasm2/core/interval.py | 259 -- miasm2/core/locationdb.py | 500 --- miasm2/core/objc.py | 1761 -------- miasm2/core/parse_asm.py | 305 -- miasm2/core/sembuilder.py | 355 -- miasm2/core/types.py | 1693 ------- miasm2/core/utils.py | 234 - miasm2/expression/__init__.py | 18 - miasm2/expression/expression.py | 2035 --------- miasm2/expression/expression_helper.py | 628 --- miasm2/expression/expression_reduce.py | 280 -- miasm2/expression/modint.py | 259 -- miasm2/expression/parser.py | 84 - miasm2/expression/simplifications.py | 207 - miasm2/expression/simplifications_common.py | 1556 ------- miasm2/expression/simplifications_cond.py | 178 - miasm2/expression/simplifications_explicit.py | 159 - miasm2/expression/smt2_helper.py | 296 -- miasm2/ir/__init__.py | 1 - miasm2/ir/analysis.py | 113 - miasm2/ir/ir.py | 929 ---- miasm2/ir/symbexec.py | 1124 ----- miasm2/ir/symbexec_top.py | 221 - miasm2/ir/symbexec_types.py | 131 - miasm2/ir/translators/C.py | 528 --- miasm2/ir/translators/__init__.py | 13 - miasm2/ir/translators/miasm.py | 45 - miasm2/ir/translators/python.py | 98 - miasm2/ir/translators/smt2.py | 326 -- miasm2/ir/translators/translator.py | 127 - miasm2/ir/translators/z3_ir.py | 281 -- miasm2/jitter/JitCore.c | 257 -- miasm2/jitter/JitCore.h | 306 -- miasm2/jitter/Jitgcc.c | 106 - miasm2/jitter/Jitllvm.c | 99 - miasm2/jitter/__init__.py | 1 - miasm2/jitter/arch/JitCore_aarch64.c | 562 --- miasm2/jitter/arch/JitCore_aarch64.h | 57 - miasm2/jitter/arch/JitCore_arm.c | 507 --- miasm2/jitter/arch/JitCore_arm.h | 47 - miasm2/jitter/arch/JitCore_mep.c | 617 --- miasm2/jitter/arch/JitCore_mep.h | 82 - miasm2/jitter/arch/JitCore_mips32.c | 531 --- miasm2/jitter/arch/JitCore_mips32.h | 343 -- miasm2/jitter/arch/JitCore_msp430.c | 477 -- miasm2/jitter/arch/JitCore_msp430.h | 44 - miasm2/jitter/arch/JitCore_ppc32.c | 344 -- miasm2/jitter/arch/JitCore_ppc32.h | 24 - miasm2/jitter/arch/JitCore_ppc32_regs.h | 89 - miasm2/jitter/arch/JitCore_x86.c | 946 ---- miasm2/jitter/arch/JitCore_x86.h | 136 - miasm2/jitter/arch/__init__.py | 0 miasm2/jitter/bn.c | 933 ---- miasm2/jitter/bn.h | 163 - miasm2/jitter/codegen.py | 650 --- miasm2/jitter/compat_py23.h | 87 - miasm2/jitter/csts.py | 30 - miasm2/jitter/emulatedsymbexec.py | 140 - miasm2/jitter/jitcore.py | 309 -- miasm2/jitter/jitcore_cc_base.py | 121 - miasm2/jitter/jitcore_gcc.py | 141 - miasm2/jitter/jitcore_llvm.py | 134 - miasm2/jitter/jitcore_python.py | 219 - miasm2/jitter/jitload.py | 547 --- miasm2/jitter/llvmconvert.py | 1926 -------- miasm2/jitter/loader/__init__.py | 0 miasm2/jitter/loader/elf.py | 337 -- miasm2/jitter/loader/pe.py | 565 --- miasm2/jitter/loader/utils.py | 100 - miasm2/jitter/op_semantics.c | 749 ---- miasm2/jitter/op_semantics.h | 167 - miasm2/jitter/queue.h | 553 --- miasm2/jitter/vm_mngr.c | 926 ---- miasm2/jitter/vm_mngr.h | 302 -- miasm2/jitter/vm_mngr_py.c | 1013 ----- miasm2/jitter/vm_mngr_py.h | 15 - miasm2/os_dep/__init__.py | 1 - miasm2/os_dep/common.py | 168 - miasm2/os_dep/linux/__init__.py | 1 - miasm2/os_dep/linux/environment.py | 916 ---- miasm2/os_dep/linux/syscall.py | 1040 ----- miasm2/os_dep/linux_stdlib.py | 213 - miasm2/os_dep/win_32_structs.py | 231 - miasm2/os_dep/win_api_x86_32.py | 2992 ------------- miasm2/os_dep/win_api_x86_32_seh.py | 695 --- setup.py | 152 +- test/analysis/data_flow.py | 10 +- test/analysis/depgraph.py | 12 +- test/analysis/dse.py | 12 +- test/analysis/modularintervals.py | 8 +- test/analysis/range.py | 6 +- test/analysis/unssa.py | 10 +- test/arch/aarch64/arch.py | 6 +- test/arch/aarch64/unit/asm_test.py | 12 +- test/arch/arm/arch.py | 6 +- test/arch/arm/sem.py | 12 +- test/arch/mep/asm/test_asm.py | 4 +- test/arch/mep/asm/ut_helpers_asm.py | 10 +- test/arch/mep/ir/test_arithmetic.py | 2 +- test/arch/mep/ir/test_bitmanipulation.py | 2 +- test/arch/mep/ir/test_branchjump.py | 2 +- test/arch/mep/ir/test_control.py | 2 +- test/arch/mep/ir/test_coprocessor.py | 2 +- test/arch/mep/ir/test_debug.py | 2 +- test/arch/mep/ir/test_divide.py | 4 +- test/arch/mep/ir/test_extension.py | 2 +- test/arch/mep/ir/test_ir.py | 14 +- test/arch/mep/ir/test_ldz.py | 2 +- test/arch/mep/ir/test_loadstore.py | 2 +- test/arch/mep/ir/test_logical.py | 2 +- test/arch/mep/ir/test_move.py | 2 +- test/arch/mep/ir/test_multiply.py | 2 +- test/arch/mep/ir/test_repeat.py | 2 +- test/arch/mep/ir/test_shift.py | 4 +- test/arch/mep/ir/ut_helpers_ir.py | 20 +- test/arch/mep/jit/ut_helpers_jit.py | 6 +- test/arch/mips32/arch.py | 6 +- test/arch/mips32/unit/asm_test.py | 12 +- test/arch/msp430/arch.py | 6 +- test/arch/msp430/sem.py | 12 +- test/arch/sh4/arch.py | 6 +- test/arch/x86/arch.py | 12 +- test/arch/x86/qemu/testqemu.py | 6 +- test/arch/x86/qemu/testqemu64.py | 6 +- test/arch/x86/sem.py | 16 +- test/arch/x86/unit/access_xmm.py | 2 +- test/arch/x86/unit/asm_test.py | 12 +- test/arch/x86/unit/mn_cdq.py | 2 +- test/arch/x86/unit/mn_int.py | 2 +- test/arch/x86/unit/mn_pushpop.py | 2 +- test/arch/x86/unit/mn_seh.py | 4 +- test/arch/x86/unit/test_asm_x86_64.py | 8 +- test/core/asmblock.py | 12 +- test/core/graph.py | 2 +- test/core/interval.py | 2 +- test/core/locationdb.py | 2 +- test/core/parse_asm.py | 14 +- test/core/sembuilder.py | 6 +- test/core/test_types.py | 12 +- test/core/utils.py | 2 +- test/expr_type/test_chandler.py | 16 +- test/expression/expr_cmp.py | 4 +- test/expression/expr_pickle.py | 2 +- test/expression/expression.py | 4 +- test/expression/expression_helper.py | 4 +- test/expression/modint.py | 3 +- test/expression/parser.py | 4 +- test/expression/simplifications.py | 8 +- test/expression/stp.py | 8 +- test/ir/ir.py | 6 +- test/ir/ir2C.py | 8 +- test/ir/reduce_graph.py | 10 +- test/ir/symbexec.py | 10 +- test/ir/translators/smt2.py | 6 +- test/ir/translators/z3_ir.py | 6 +- test/jitter/bad_block.py | 6 +- test/jitter/jit_options.py | 6 +- test/jitter/jitload.py | 8 +- test/jitter/jmp_out_mem.py | 6 +- test/jitter/test_post_instr.py | 6 +- test/jitter/vm_mngr.py | 4 +- test/os_dep/common.py | 8 +- test/os_dep/linux/stdlib.py | 8 +- test/os_dep/linux/test_env.py | 4 +- test/os_dep/win_api_x86_32.py | 8 +- 462 files changed, 81702 insertions(+), 81699 deletions(-) create mode 100644 miasm/__init__.py create mode 100644 miasm/analysis/__init__.py create mode 100644 miasm/analysis/binary.py create mode 100644 miasm/analysis/cst_propag.py create mode 100644 miasm/analysis/data_analysis.py create mode 100644 miasm/analysis/data_flow.py create mode 100644 miasm/analysis/debugging.py create mode 100644 miasm/analysis/depgraph.py create mode 100644 miasm/analysis/disasm_cb.py create mode 100644 miasm/analysis/dse.py create mode 100644 miasm/analysis/expression_range.py create mode 100644 miasm/analysis/gdbserver.py create mode 100644 miasm/analysis/machine.py create mode 100644 miasm/analysis/modularintervals.py create mode 100644 miasm/analysis/outofssa.py create mode 100644 miasm/analysis/sandbox.py create mode 100644 miasm/analysis/simplifier.py create mode 100644 miasm/analysis/ssa.py create mode 100644 miasm/arch/__init__.py create mode 100644 miasm/arch/aarch64/__init__.py create mode 100644 miasm/arch/aarch64/arch.py create mode 100644 miasm/arch/aarch64/disasm.py create mode 100644 miasm/arch/aarch64/ira.py create mode 100644 miasm/arch/aarch64/jit.py create mode 100644 miasm/arch/aarch64/regs.py create mode 100644 miasm/arch/aarch64/sem.py create mode 100644 miasm/arch/arm/__init__.py create mode 100644 miasm/arch/arm/arch.py create mode 100644 miasm/arch/arm/disasm.py create mode 100644 miasm/arch/arm/ira.py create mode 100644 miasm/arch/arm/jit.py create mode 100644 miasm/arch/arm/regs.py create mode 100644 miasm/arch/arm/sem.py create mode 100644 miasm/arch/mep/__init__.py create mode 100644 miasm/arch/mep/arch.py create mode 100644 miasm/arch/mep/disasm.py create mode 100644 miasm/arch/mep/ira.py create mode 100644 miasm/arch/mep/jit.py create mode 100644 miasm/arch/mep/regs.py create mode 100644 miasm/arch/mep/sem.py create mode 100644 miasm/arch/mips32/__init__.py create mode 100644 miasm/arch/mips32/arch.py create mode 100644 miasm/arch/mips32/disasm.py create mode 100644 miasm/arch/mips32/ira.py create mode 100644 miasm/arch/mips32/jit.py create mode 100644 miasm/arch/mips32/regs.py create mode 100644 miasm/arch/mips32/sem.py create mode 100644 miasm/arch/msp430/__init__.py create mode 100644 miasm/arch/msp430/arch.py create mode 100644 miasm/arch/msp430/ctype.py create mode 100644 miasm/arch/msp430/disasm.py create mode 100644 miasm/arch/msp430/ira.py create mode 100644 miasm/arch/msp430/jit.py create mode 100644 miasm/arch/msp430/regs.py create mode 100644 miasm/arch/msp430/sem.py create mode 100644 miasm/arch/ppc/__init__.py create mode 100644 miasm/arch/ppc/arch.py create mode 100644 miasm/arch/ppc/disasm.py create mode 100644 miasm/arch/ppc/ira.py create mode 100644 miasm/arch/ppc/jit.py create mode 100644 miasm/arch/ppc/regs.py create mode 100644 miasm/arch/ppc/sem.py create mode 100644 miasm/arch/sh4/__init__.py create mode 100644 miasm/arch/sh4/arch.py create mode 100644 miasm/arch/sh4/regs.py create mode 100644 miasm/arch/x86/__init__.py create mode 100644 miasm/arch/x86/arch.py create mode 100644 miasm/arch/x86/ctype.py create mode 100644 miasm/arch/x86/disasm.py create mode 100644 miasm/arch/x86/ira.py create mode 100644 miasm/arch/x86/jit.py create mode 100644 miasm/arch/x86/regs.py create mode 100644 miasm/arch/x86/sem.py create mode 100644 miasm/core/__init__.py create mode 100644 miasm/core/asm_ast.py create mode 100644 miasm/core/asmblock.py create mode 100644 miasm/core/bin_stream.py create mode 100644 miasm/core/bin_stream_ida.py create mode 100644 miasm/core/cpu.py create mode 100644 miasm/core/ctypesmngr.py create mode 100644 miasm/core/graph.py create mode 100644 miasm/core/interval.py create mode 100644 miasm/core/locationdb.py create mode 100644 miasm/core/objc.py create mode 100644 miasm/core/parse_asm.py create mode 100644 miasm/core/sembuilder.py create mode 100644 miasm/core/types.py create mode 100644 miasm/core/utils.py create mode 100644 miasm/expression/__init__.py create mode 100644 miasm/expression/expression.py create mode 100644 miasm/expression/expression_helper.py create mode 100644 miasm/expression/expression_reduce.py create mode 100644 miasm/expression/modint.py create mode 100644 miasm/expression/parser.py create mode 100644 miasm/expression/simplifications.py create mode 100644 miasm/expression/simplifications_common.py create mode 100644 miasm/expression/simplifications_cond.py create mode 100644 miasm/expression/simplifications_explicit.py create mode 100644 miasm/expression/smt2_helper.py create mode 100644 miasm/ir/__init__.py create mode 100644 miasm/ir/analysis.py create mode 100644 miasm/ir/ir.py create mode 100644 miasm/ir/symbexec.py create mode 100644 miasm/ir/symbexec_top.py create mode 100644 miasm/ir/symbexec_types.py create mode 100644 miasm/ir/translators/C.py create mode 100644 miasm/ir/translators/__init__.py create mode 100644 miasm/ir/translators/miasm_ir.py create mode 100644 miasm/ir/translators/python.py create mode 100644 miasm/ir/translators/smt2.py create mode 100644 miasm/ir/translators/translator.py create mode 100644 miasm/ir/translators/z3_ir.py create mode 100644 miasm/jitter/JitCore.c create mode 100644 miasm/jitter/JitCore.h create mode 100644 miasm/jitter/Jitgcc.c create mode 100644 miasm/jitter/Jitllvm.c create mode 100644 miasm/jitter/__init__.py create mode 100644 miasm/jitter/arch/JitCore_aarch64.c create mode 100644 miasm/jitter/arch/JitCore_aarch64.h create mode 100644 miasm/jitter/arch/JitCore_arm.c create mode 100644 miasm/jitter/arch/JitCore_arm.h create mode 100644 miasm/jitter/arch/JitCore_mep.c create mode 100644 miasm/jitter/arch/JitCore_mep.h create mode 100644 miasm/jitter/arch/JitCore_mips32.c create mode 100644 miasm/jitter/arch/JitCore_mips32.h create mode 100644 miasm/jitter/arch/JitCore_msp430.c create mode 100644 miasm/jitter/arch/JitCore_msp430.h create mode 100644 miasm/jitter/arch/JitCore_ppc32.c create mode 100644 miasm/jitter/arch/JitCore_ppc32.h create mode 100644 miasm/jitter/arch/JitCore_ppc32_regs.h create mode 100644 miasm/jitter/arch/JitCore_x86.c create mode 100644 miasm/jitter/arch/JitCore_x86.h create mode 100644 miasm/jitter/arch/__init__.py create mode 100644 miasm/jitter/bn.c create mode 100644 miasm/jitter/bn.h create mode 100644 miasm/jitter/codegen.py create mode 100644 miasm/jitter/compat_py23.h create mode 100644 miasm/jitter/csts.py create mode 100644 miasm/jitter/emulatedsymbexec.py create mode 100644 miasm/jitter/jitcore.py create mode 100644 miasm/jitter/jitcore_cc_base.py create mode 100644 miasm/jitter/jitcore_gcc.py create mode 100644 miasm/jitter/jitcore_llvm.py create mode 100644 miasm/jitter/jitcore_python.py create mode 100644 miasm/jitter/jitload.py create mode 100644 miasm/jitter/llvmconvert.py create mode 100644 miasm/jitter/loader/__init__.py create mode 100644 miasm/jitter/loader/elf.py create mode 100644 miasm/jitter/loader/pe.py create mode 100644 miasm/jitter/loader/utils.py create mode 100644 miasm/jitter/op_semantics.c create mode 100644 miasm/jitter/op_semantics.h create mode 100644 miasm/jitter/queue.h create mode 100644 miasm/jitter/vm_mngr.c create mode 100644 miasm/jitter/vm_mngr.h create mode 100644 miasm/jitter/vm_mngr_py.c create mode 100644 miasm/jitter/vm_mngr_py.h create mode 100644 miasm/os_dep/__init__.py create mode 100644 miasm/os_dep/common.py create mode 100644 miasm/os_dep/linux/__init__.py create mode 100644 miasm/os_dep/linux/environment.py create mode 100644 miasm/os_dep/linux/syscall.py create mode 100644 miasm/os_dep/linux_stdlib.py create mode 100644 miasm/os_dep/win_32_structs.py create mode 100644 miasm/os_dep/win_api_x86_32.py create mode 100644 miasm/os_dep/win_api_x86_32_seh.py delete mode 100644 miasm2/__init__.py delete mode 100644 miasm2/analysis/__init__.py delete mode 100644 miasm2/analysis/binary.py delete mode 100644 miasm2/analysis/cst_propag.py delete mode 100644 miasm2/analysis/data_analysis.py delete mode 100644 miasm2/analysis/data_flow.py delete mode 100644 miasm2/analysis/debugging.py delete mode 100644 miasm2/analysis/depgraph.py delete mode 100644 miasm2/analysis/disasm_cb.py delete mode 100644 miasm2/analysis/dse.py delete mode 100644 miasm2/analysis/expression_range.py delete mode 100644 miasm2/analysis/gdbserver.py delete mode 100644 miasm2/analysis/machine.py delete mode 100644 miasm2/analysis/modularintervals.py delete mode 100644 miasm2/analysis/outofssa.py delete mode 100644 miasm2/analysis/sandbox.py delete mode 100644 miasm2/analysis/simplifier.py delete mode 100644 miasm2/analysis/ssa.py delete mode 100644 miasm2/arch/__init__.py delete mode 100644 miasm2/arch/aarch64/__init__.py delete mode 100644 miasm2/arch/aarch64/arch.py delete mode 100644 miasm2/arch/aarch64/disasm.py delete mode 100644 miasm2/arch/aarch64/ira.py delete mode 100644 miasm2/arch/aarch64/jit.py delete mode 100644 miasm2/arch/aarch64/regs.py delete mode 100644 miasm2/arch/aarch64/sem.py delete mode 100644 miasm2/arch/arm/__init__.py delete mode 100644 miasm2/arch/arm/arch.py delete mode 100644 miasm2/arch/arm/disasm.py delete mode 100644 miasm2/arch/arm/ira.py delete mode 100644 miasm2/arch/arm/jit.py delete mode 100644 miasm2/arch/arm/regs.py delete mode 100644 miasm2/arch/arm/sem.py delete mode 100644 miasm2/arch/mep/__init__.py delete mode 100644 miasm2/arch/mep/arch.py delete mode 100644 miasm2/arch/mep/disasm.py delete mode 100644 miasm2/arch/mep/ira.py delete mode 100644 miasm2/arch/mep/jit.py delete mode 100644 miasm2/arch/mep/regs.py delete mode 100644 miasm2/arch/mep/sem.py delete mode 100644 miasm2/arch/mips32/__init__.py delete mode 100644 miasm2/arch/mips32/arch.py delete mode 100644 miasm2/arch/mips32/disasm.py delete mode 100644 miasm2/arch/mips32/ira.py delete mode 100644 miasm2/arch/mips32/jit.py delete mode 100644 miasm2/arch/mips32/regs.py delete mode 100644 miasm2/arch/mips32/sem.py delete mode 100644 miasm2/arch/msp430/__init__.py delete mode 100644 miasm2/arch/msp430/arch.py delete mode 100644 miasm2/arch/msp430/ctype.py delete mode 100644 miasm2/arch/msp430/disasm.py delete mode 100644 miasm2/arch/msp430/ira.py delete mode 100644 miasm2/arch/msp430/jit.py delete mode 100644 miasm2/arch/msp430/regs.py delete mode 100644 miasm2/arch/msp430/sem.py delete mode 100644 miasm2/arch/ppc/__init__.py delete mode 100644 miasm2/arch/ppc/arch.py delete mode 100644 miasm2/arch/ppc/disasm.py delete mode 100644 miasm2/arch/ppc/ira.py delete mode 100644 miasm2/arch/ppc/jit.py delete mode 100644 miasm2/arch/ppc/regs.py delete mode 100644 miasm2/arch/ppc/sem.py delete mode 100644 miasm2/arch/sh4/__init__.py delete mode 100644 miasm2/arch/sh4/arch.py delete mode 100644 miasm2/arch/sh4/regs.py delete mode 100644 miasm2/arch/x86/__init__.py delete mode 100644 miasm2/arch/x86/arch.py delete mode 100644 miasm2/arch/x86/ctype.py delete mode 100644 miasm2/arch/x86/disasm.py delete mode 100644 miasm2/arch/x86/ira.py delete mode 100644 miasm2/arch/x86/jit.py delete mode 100644 miasm2/arch/x86/regs.py delete mode 100644 miasm2/arch/x86/sem.py delete mode 100644 miasm2/core/__init__.py delete mode 100644 miasm2/core/asm_ast.py delete mode 100644 miasm2/core/asmblock.py delete mode 100644 miasm2/core/bin_stream.py delete mode 100644 miasm2/core/bin_stream_ida.py delete mode 100644 miasm2/core/cpu.py delete mode 100644 miasm2/core/ctypesmngr.py delete mode 100644 miasm2/core/graph.py delete mode 100644 miasm2/core/interval.py delete mode 100644 miasm2/core/locationdb.py delete mode 100644 miasm2/core/objc.py delete mode 100644 miasm2/core/parse_asm.py delete mode 100644 miasm2/core/sembuilder.py delete mode 100644 miasm2/core/types.py delete mode 100644 miasm2/core/utils.py delete mode 100644 miasm2/expression/__init__.py delete mode 100644 miasm2/expression/expression.py delete mode 100644 miasm2/expression/expression_helper.py delete mode 100644 miasm2/expression/expression_reduce.py delete mode 100644 miasm2/expression/modint.py delete mode 100644 miasm2/expression/parser.py delete mode 100644 miasm2/expression/simplifications.py delete mode 100644 miasm2/expression/simplifications_common.py delete mode 100644 miasm2/expression/simplifications_cond.py delete mode 100644 miasm2/expression/simplifications_explicit.py delete mode 100644 miasm2/expression/smt2_helper.py delete mode 100644 miasm2/ir/__init__.py delete mode 100644 miasm2/ir/analysis.py delete mode 100644 miasm2/ir/ir.py delete mode 100644 miasm2/ir/symbexec.py delete mode 100644 miasm2/ir/symbexec_top.py delete mode 100644 miasm2/ir/symbexec_types.py delete mode 100644 miasm2/ir/translators/C.py delete mode 100644 miasm2/ir/translators/__init__.py delete mode 100644 miasm2/ir/translators/miasm.py delete mode 100644 miasm2/ir/translators/python.py delete mode 100644 miasm2/ir/translators/smt2.py delete mode 100644 miasm2/ir/translators/translator.py delete mode 100644 miasm2/ir/translators/z3_ir.py delete mode 100644 miasm2/jitter/JitCore.c delete mode 100644 miasm2/jitter/JitCore.h delete mode 100644 miasm2/jitter/Jitgcc.c delete mode 100644 miasm2/jitter/Jitllvm.c delete mode 100644 miasm2/jitter/__init__.py delete mode 100644 miasm2/jitter/arch/JitCore_aarch64.c delete mode 100644 miasm2/jitter/arch/JitCore_aarch64.h delete mode 100644 miasm2/jitter/arch/JitCore_arm.c delete mode 100644 miasm2/jitter/arch/JitCore_arm.h delete mode 100644 miasm2/jitter/arch/JitCore_mep.c delete mode 100644 miasm2/jitter/arch/JitCore_mep.h delete mode 100644 miasm2/jitter/arch/JitCore_mips32.c delete mode 100644 miasm2/jitter/arch/JitCore_mips32.h delete mode 100644 miasm2/jitter/arch/JitCore_msp430.c delete mode 100644 miasm2/jitter/arch/JitCore_msp430.h delete mode 100644 miasm2/jitter/arch/JitCore_ppc32.c delete mode 100644 miasm2/jitter/arch/JitCore_ppc32.h delete mode 100644 miasm2/jitter/arch/JitCore_ppc32_regs.h delete mode 100644 miasm2/jitter/arch/JitCore_x86.c delete mode 100644 miasm2/jitter/arch/JitCore_x86.h delete mode 100644 miasm2/jitter/arch/__init__.py delete mode 100644 miasm2/jitter/bn.c delete mode 100644 miasm2/jitter/bn.h delete mode 100644 miasm2/jitter/codegen.py delete mode 100644 miasm2/jitter/compat_py23.h delete mode 100644 miasm2/jitter/csts.py delete mode 100644 miasm2/jitter/emulatedsymbexec.py delete mode 100644 miasm2/jitter/jitcore.py delete mode 100644 miasm2/jitter/jitcore_cc_base.py delete mode 100644 miasm2/jitter/jitcore_gcc.py delete mode 100644 miasm2/jitter/jitcore_llvm.py delete mode 100644 miasm2/jitter/jitcore_python.py delete mode 100644 miasm2/jitter/jitload.py delete mode 100644 miasm2/jitter/llvmconvert.py delete mode 100644 miasm2/jitter/loader/__init__.py delete mode 100644 miasm2/jitter/loader/elf.py delete mode 100644 miasm2/jitter/loader/pe.py delete mode 100644 miasm2/jitter/loader/utils.py delete mode 100644 miasm2/jitter/op_semantics.c delete mode 100644 miasm2/jitter/op_semantics.h delete mode 100644 miasm2/jitter/queue.h delete mode 100644 miasm2/jitter/vm_mngr.c delete mode 100644 miasm2/jitter/vm_mngr.h delete mode 100644 miasm2/jitter/vm_mngr_py.c delete mode 100644 miasm2/jitter/vm_mngr_py.h delete mode 100644 miasm2/os_dep/__init__.py delete mode 100644 miasm2/os_dep/common.py delete mode 100644 miasm2/os_dep/linux/__init__.py delete mode 100644 miasm2/os_dep/linux/environment.py delete mode 100644 miasm2/os_dep/linux/syscall.py delete mode 100644 miasm2/os_dep/linux_stdlib.py delete mode 100644 miasm2/os_dep/win_32_structs.py delete mode 100644 miasm2/os_dep/win_api_x86_32.py delete mode 100644 miasm2/os_dep/win_api_x86_32_seh.py (limited to 'example/asm/shellcode.py') diff --git a/README.md b/README.md index bee55db3..010f75d6 100644 --- a/README.md +++ b/README.md @@ -47,8 +47,8 @@ Assembling / Disassembling Import Miasm x86 architecture: ```pycon ->>> from miasm2.arch.x86.arch import mn_x86 ->>> from miasm2.core.locationdb import LocationDB +>>> from miasm.arch.x86.arch import mn_x86 +>>> from miasm.core.locationdb import LocationDB ``` Get a location db: @@ -80,7 +80,7 @@ XOR EAX, ECX Using `Machine` abstraction: ```pycon ->>> from miasm2.analysis.machine import Machine +>>> from miasm.analysis.machine import Machine >>> mn = Machine('x86_32').mn >>> print(mn.dis('\x33\x30', 32)) XOR ESI, DWORD PTR [EAX] @@ -164,16 +164,16 @@ Giving a shellcode: Import the shellcode thanks to the `Container` abstraction: ```pycon ->>> from miasm2.analysis.binary import Container +>>> from miasm.analysis.binary import Container >>> c = Container.from_string(s) >>> c - + ``` Disassembling the shellcode at address `0`: ```pycon ->>> from miasm2.analysis.machine import Machine +>>> from miasm.analysis.machine import Machine >>> machine = Machine('x86_32') >>> mdis = machine.dis_engine(c.bin_stream) >>> asmcfg = mdis.dis_multiblock(0) @@ -208,7 +208,7 @@ Initializing the Jit engine with a stack: Add the shellcode in an arbitrary memory location: ```pycon >>> run_addr = 0x40000000 ->>> from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE +>>> from miasm.jitter.csts import PAGE_READ, PAGE_WRITE >>> jitter.vm.add_memory_page(run_addr, PAGE_READ | PAGE_WRITE, s) ``` @@ -284,7 +284,7 @@ Initializing the IR pool: Initializing the engine with default symbolic values: ```pycon ->>> from miasm2.ir.symbexec import SymbolicExecutionEngine +>>> from miasm.ir.symbexec import SymbolicExecutionEngine >>> sb = SymbolicExecutionEngine(ira) ``` @@ -355,7 +355,7 @@ ________________________________________________________________________________ Retry execution with a concrete ECX. Here, the symbolic / concolic execution reach the shellcode's end: ```pycon ->>> from miasm2.expression.expression import ExprInt +>>> from miasm.expression.expression import ExprInt >>> sb.symbols[machine.mn.regs.ECX] = ExprInt(-3, 32) >>> symbolic_pc = sb.run_at(ircfg, 0, step=True) Instr LEA ECX, DWORD PTR [ECX + 0x4] @@ -571,7 +571,7 @@ Windows & IDA Most of Miasm's IDA plugins use a subset of Miasm functionality. A quick way to have them working is to add: * `elfesteem` directory and `pyparsing.py` to `C:\...\IDA\python\` or `pip install pyparsing elfesteem` -* `miasm2/miasm2` directory to `C:\...\IDA\python\` +* `miasm/miasm` directory to `C:\...\IDA\python\` All features excepting JITter related ones will be available. For a more complete installation, please refer to above paragraphs. @@ -598,7 +598,7 @@ Tools ----- * [Sibyl](https://github.com/cea-sec/Sibyl): A function divination too -* [R2M2](https://github.com/guedou/r2m2): Use miasm2 as a radare2 plugin +* [R2M2](https://github.com/guedou/r2m2): Use miasm as a radare2 plugin * [CGrex](https://github.com/mechaphish/cgrex) : Targeted patcher for CGC binaries * [ethRE](https://github.com/jbcayrou/ethRE) Reversing tool for Ethereum EVM (with corresponding Miasm2 architecture) diff --git a/example/asm/shellcode.py b/example/asm/shellcode.py index b14b7441..ed489bbd 100755 --- a/example/asm/shellcode.py +++ b/example/asm/shellcode.py @@ -7,11 +7,11 @@ from future.utils import viewitems from elfesteem import pe_init from elfesteem.strpatchwork import StrPatchwork -from miasm2.core import parse_asm, asmblock -from miasm2.analysis.machine import Machine -from miasm2.core.interval import interval -from miasm2.core.locationdb import LocationDB -from miasm2.core.utils import iterbytes, int_to_byte +from miasm.core import parse_asm, asmblock +from miasm.analysis.machine import Machine +from miasm.core.interval import interval +from miasm.core.locationdb import LocationDB +from miasm.core.utils import iterbytes, int_to_byte parser = ArgumentParser("Multi-arch (32 bits) assembler") parser.add_argument('architecture', help="architecture: " + diff --git a/example/asm/simple.py b/example/asm/simple.py index e46faa48..8f6aac92 100644 --- a/example/asm/simple.py +++ b/example/asm/simple.py @@ -2,8 +2,8 @@ from __future__ import print_function from pdb import pm from pprint import pprint -from miasm2.arch.x86.arch import mn_x86 -from miasm2.core import parse_asm, asmblock +from miasm.arch.x86.arch import mn_x86 +from miasm.core import parse_asm, asmblock # Assemble code diff --git a/example/disasm/callback.py b/example/disasm/callback.py index 95c165d4..7219462f 100644 --- a/example/disasm/callback.py +++ b/example/disasm/callback.py @@ -1,7 +1,7 @@ from __future__ import print_function -from miasm2.analysis.binary import Container -from miasm2.analysis.machine import Machine -from miasm2.core.asmblock import AsmConstraint +from miasm.analysis.binary import Container +from miasm.analysis.machine import Machine +from miasm.core.asmblock import AsmConstraint def cb_x86_callpop(cur_bloc, loc_db, *args, **kwargs): diff --git a/example/disasm/dis_binary.py b/example/disasm/dis_binary.py index 4ac5ef26..37eabb14 100644 --- a/example/disasm/dis_binary.py +++ b/example/disasm/dis_binary.py @@ -1,7 +1,7 @@ from __future__ import print_function import sys -from miasm2.analysis.binary import Container -from miasm2.analysis.machine import Machine +from miasm.analysis.binary import Container +from miasm.analysis.machine import Machine fdesc = open(sys.argv[1], 'rb') diff --git a/example/disasm/dis_binary_ir.py b/example/disasm/dis_binary_ir.py index ac642a36..ff7a0d36 100644 --- a/example/disasm/dis_binary_ir.py +++ b/example/disasm/dis_binary_ir.py @@ -1,8 +1,8 @@ from __future__ import print_function import sys from future.utils import viewvalues -from miasm2.analysis.binary import Container -from miasm2.analysis.machine import Machine +from miasm.analysis.binary import Container +from miasm.analysis.machine import Machine ##################################### # Common section from dis_binary.py # diff --git a/example/disasm/dis_binary_ira.py b/example/disasm/dis_binary_ira.py index 04bddbbb..3ecd5349 100644 --- a/example/disasm/dis_binary_ira.py +++ b/example/disasm/dis_binary_ira.py @@ -1,8 +1,9 @@ from __future__ import print_function import sys + from future.utils import viewvalues -from miasm2.analysis.binary import Container -from miasm2.analysis.machine import Machine +from miasm.analysis.binary import Container +from miasm.analysis.machine import Machine ##################################### # Common section from dis_binary.py # diff --git a/example/disasm/dis_x86_string.py b/example/disasm/dis_x86_string.py index 175e9264..6d4e2c84 100644 --- a/example/disasm/dis_x86_string.py +++ b/example/disasm/dis_x86_string.py @@ -1,6 +1,6 @@ from __future__ import print_function -from miasm2.analysis.binary import Container -from miasm2.analysis.machine import Machine +from miasm.analysis.binary import Container +from miasm.analysis.machine import Machine # The Container will provide a *bin_stream*, bytes source for the disasm engine cont = Container.from_string(b"\x83\xf8\x10\x74\x07\x89\xc6\x0f\x47\xc3\xeb\x08\x89\xc8\xe8\x31\x33\x22\x11\x40\xc3") diff --git a/example/disasm/full.py b/example/disasm/full.py index de3f82ac..f7268ad0 100644 --- a/example/disasm/full.py +++ b/example/disasm/full.py @@ -5,17 +5,17 @@ from pdb import pm from future.utils import viewitems, viewvalues -from miasm2.analysis.binary import Container -from miasm2.core.asmblock import log_asmblock, AsmCFG -from miasm2.core.interval import interval -from miasm2.analysis.machine import Machine -from miasm2.analysis.data_flow import dead_simp, \ +from miasm.analysis.binary import Container +from miasm.core.asmblock import log_asmblock, AsmCFG +from miasm.core.interval import interval +from miasm.analysis.machine import Machine +from miasm.analysis.data_flow import dead_simp, \ DiGraphDefUse, ReachingDefinitions, \ replace_stack_vars, load_from_int, del_unused_edges -from miasm2.expression.simplifications import expr_simp -from miasm2.analysis.ssa import SSADiGraph -from miasm2.ir.ir import AssignBlock, IRBlock -from miasm2.analysis.simplifier import IRCFGSimplifierCommon, IRCFGSimplifierSSA +from miasm.expression.simplifications import expr_simp +from miasm.analysis.ssa import SSADiGraph +from miasm.ir.ir import AssignBlock, IRBlock +from miasm.analysis.simplifier import IRCFGSimplifierCommon, IRCFGSimplifierSSA log = logging.getLogger("dis") console_handler = logging.StreamHandler() diff --git a/example/disasm/single_instr.py b/example/disasm/single_instr.py index 70b37220..eee527b1 100644 --- a/example/disasm/single_instr.py +++ b/example/disasm/single_instr.py @@ -1,7 +1,7 @@ from __future__ import print_function -from miasm2.arch.x86.arch import mn_x86 -from miasm2.arch.x86.regs import EDX -from miasm2.core.locationdb import LocationDB +from miasm.arch.x86.arch import mn_x86 +from miasm.arch.x86.regs import EDX +from miasm.core.locationdb import LocationDB loc_db = LocationDB() l = mn_x86.fromstring('MOV EAX, EBX', loc_db, 32) diff --git a/example/expression/access_c.py b/example/expression/access_c.py index c6f26a10..c604a0bd 100644 --- a/example/expression/access_c.py +++ b/example/expression/access_c.py @@ -45,16 +45,16 @@ import sys from future.utils import viewitems, viewvalues -from miasm2.analysis.machine import Machine -from miasm2.analysis.binary import Container -from miasm2.expression.expression import ExprOp, ExprCompose, ExprId, ExprInt -from miasm2.analysis.depgraph import DependencyGraph +from miasm.analysis.machine import Machine +from miasm.analysis.binary import Container +from miasm.expression.expression import ExprOp, ExprCompose, ExprId, ExprInt +from miasm.analysis.depgraph import DependencyGraph -from miasm2.arch.x86.ctype import CTypeAMD64_unk +from miasm.arch.x86.ctype import CTypeAMD64_unk -from miasm2.core.objc import ExprToAccessC, CHandler -from miasm2.core.objc import CTypesManagerNotPacked -from miasm2.core.ctypesmngr import CAstTypes, CTypePtr, CTypeStruct +from miasm.core.objc import ExprToAccessC, CHandler +from miasm.core.objc import CTypesManagerNotPacked +from miasm.core.ctypesmngr import CAstTypes, CTypePtr, CTypeStruct def find_call(ircfg): """Returns (irb, index) which call""" diff --git a/example/expression/asm_to_ir.py b/example/expression/asm_to_ir.py index 16f766e1..4bcbb05d 100644 --- a/example/expression/asm_to_ir.py +++ b/example/expression/asm_to_ir.py @@ -3,12 +3,12 @@ from pdb import pm from future.utils import viewitems -from miasm2.arch.x86.arch import mn_x86 -from miasm2.core import parse_asm -from miasm2.expression.expression import * -from miasm2.core import asmblock -from miasm2.arch.x86.ira import ir_a_x86_32 -from miasm2.analysis.data_flow import dead_simp +from miasm.arch.x86.arch import mn_x86 +from miasm.core import parse_asm +from miasm.expression.expression import * +from miasm.core import asmblock +from miasm.arch.x86.ira import ir_a_x86_32 +from miasm.analysis.data_flow import dead_simp # First, asm code diff --git a/example/expression/basic_op.py b/example/expression/basic_op.py index 8b5d7e2b..afeb4081 100644 --- a/example/expression/basic_op.py +++ b/example/expression/basic_op.py @@ -1,5 +1,5 @@ from __future__ import print_function -from miasm2.expression.expression import * +from miasm.expression.expression import * print(""" Simple expression manipulation demo diff --git a/example/expression/basic_simplification.py b/example/expression/basic_simplification.py index 5ecf21db..05ebefc6 100644 --- a/example/expression/basic_simplification.py +++ b/example/expression/basic_simplification.py @@ -1,6 +1,6 @@ from __future__ import print_function -from miasm2.expression.expression import * -from miasm2.expression.simplifications import expr_simp +from miasm.expression.expression import * +from miasm.expression.simplifications import expr_simp print(""" Simple expression simplification demo diff --git a/example/expression/constant_propagation.py b/example/expression/constant_propagation.py index 1259758b..a6efbb46 100644 --- a/example/expression/constant_propagation.py +++ b/example/expression/constant_propagation.py @@ -6,12 +6,12 @@ A "constant expression" is an expression based on constants or init regs. from argparse import ArgumentParser -from miasm2.analysis.machine import Machine -from miasm2.analysis.binary import Container -from miasm2.analysis.cst_propag import propagate_cst_expr -from miasm2.analysis.data_flow import dead_simp, \ +from miasm.analysis.machine import Machine +from miasm.analysis.binary import Container +from miasm.analysis.cst_propag import propagate_cst_expr +from miasm.analysis.data_flow import dead_simp, \ merge_blocks, remove_empty_assignblks -from miasm2.expression.simplifications import expr_simp +from miasm.expression.simplifications import expr_simp parser = ArgumentParser("Constant expression propagation") diff --git a/example/expression/export_llvm.py b/example/expression/export_llvm.py index c8ee14a5..241a907d 100644 --- a/example/expression/export_llvm.py +++ b/example/expression/export_llvm.py @@ -1,11 +1,11 @@ from future.utils import viewitems, viewvalues from argparse import ArgumentParser -from miasm2.analysis.binary import Container -from miasm2.analysis.machine import Machine -from miasm2.jitter.llvmconvert import LLVMType, LLVMContext_IRCompilation, LLVMFunction_IRCompilation +from miasm.analysis.binary import Container +from miasm.analysis.machine import Machine +from miasm.jitter.llvmconvert import LLVMType, LLVMContext_IRCompilation, LLVMFunction_IRCompilation from llvmlite import ir as llvm_ir -from miasm2.expression.simplifications import expr_simp_high_to_explicit +from miasm.expression.simplifications import expr_simp_high_to_explicit parser = ArgumentParser("LLVM export example") parser.add_argument("target", help="Target binary") diff --git a/example/expression/expr_c.py b/example/expression/expr_c.py index 83cc727b..cdfdf4ca 100644 --- a/example/expression/expr_c.py +++ b/example/expression/expr_c.py @@ -5,10 +5,10 @@ Parse C expression to access variables and retrieve information: """ from __future__ import print_function -from miasm2.core.ctypesmngr import CTypeStruct, CAstTypes, CTypePtr -from miasm2.arch.x86.ctype import CTypeAMD64_unk -from miasm2.core.objc import CTypesManagerNotPacked, CHandler -from miasm2.expression.expression import ExprId +from miasm.core.ctypesmngr import CTypeStruct, CAstTypes, CTypePtr +from miasm.arch.x86.ctype import CTypeAMD64_unk +from miasm.core.objc import CTypesManagerNotPacked, CHandler +from miasm.expression.expression import ExprId """ diff --git a/example/expression/expr_grapher.py b/example/expression/expr_grapher.py index e1643b03..2550c5d3 100644 --- a/example/expression/expr_grapher.py +++ b/example/expression/expr_grapher.py @@ -1,6 +1,6 @@ from __future__ import print_function -from miasm2.expression.expression import * +from miasm.expression.expression import * print("Simple Expression grapher demo") diff --git a/example/expression/expr_random.py b/example/expression/expr_random.py index 5ac3be06..e1164f6f 100644 --- a/example/expression/expr_random.py +++ b/example/expression/expr_random.py @@ -3,7 +3,7 @@ from builtins import range import string import random -from miasm2.expression.expression_helper import ExprRandom +from miasm.expression.expression_helper import ExprRandom print("Simple expression generator\n") diff --git a/example/expression/expr_reduce.py b/example/expression/expr_reduce.py index 0f575e57..4c8b6c83 100644 --- a/example/expression/expr_reduce.py +++ b/example/expression/expr_reduce.py @@ -1,5 +1,5 @@ -from miasm2.expression.expression import ExprId, ExprInt, ExprMem -from miasm2.expression.expression_reduce import ExprReducer +from miasm.expression.expression import ExprId, ExprInt, ExprMem +from miasm.expression.expression_reduce import ExprReducer class StructLookup(ExprReducer): diff --git a/example/expression/expr_translate.py b/example/expression/expr_translate.py index 1a36a64c..8562975f 100644 --- a/example/expression/expr_translate.py +++ b/example/expression/expr_translate.py @@ -3,9 +3,9 @@ import random from future.utils import viewitems -from miasm2.expression.expression import * -from miasm2.expression.expression_helper import ExprRandom -from miasm2.ir.translators import Translator +from miasm.expression.expression import * +from miasm.expression.expression_helper import ExprRandom +from miasm.ir.translators import Translator random.seed(0) diff --git a/example/expression/get_read_write.py b/example/expression/get_read_write.py index 0c8bb3dd..cf333d0c 100644 --- a/example/expression/get_read_write.py +++ b/example/expression/get_read_write.py @@ -2,10 +2,10 @@ from __future__ import print_function from future.utils import viewitems -from miasm2.arch.x86.arch import mn_x86 -from miasm2.expression.expression import get_rw -from miasm2.arch.x86.ira import ir_a_x86_32 -from miasm2.core.locationdb import LocationDB +from miasm.arch.x86.arch import mn_x86 +from miasm.expression.expression import get_rw +from miasm.arch.x86.ira import ir_a_x86_32 +from miasm.core.locationdb import LocationDB loc_db = LocationDB() diff --git a/example/expression/graph_dataflow.py b/example/expression/graph_dataflow.py index 55159598..c320fba0 100644 --- a/example/expression/graph_dataflow.py +++ b/example/expression/graph_dataflow.py @@ -3,13 +3,13 @@ from argparse import ArgumentParser from future.utils import viewitems, viewvalues -from miasm2.analysis.binary import Container -from miasm2.analysis.machine import Machine -from miasm2.expression.expression import get_expr_mem -from miasm2.analysis.data_analysis import intra_block_flow_raw, inter_block_flow -from miasm2.core.graph import DiGraph -from miasm2.ir.symbexec import SymbolicExecutionEngine -from miasm2.analysis.data_flow import dead_simp +from miasm.analysis.binary import Container +from miasm.analysis.machine import Machine +from miasm.expression.expression import get_expr_mem +from miasm.analysis.data_analysis import intra_block_flow_raw, inter_block_flow +from miasm.core.graph import DiGraph +from miasm.ir.symbexec import SymbolicExecutionEngine +from miasm.analysis.data_flow import dead_simp parser = ArgumentParser("Simple expression use for generating dataflow graph") diff --git a/example/expression/simplification_add.py b/example/expression/simplification_add.py index 6ac36a17..ff28d56e 100644 --- a/example/expression/simplification_add.py +++ b/example/expression/simplification_add.py @@ -1,13 +1,13 @@ from __future__ import print_function -import miasm2.expression.expression as m2_expr -from miasm2.expression.simplifications import expr_simp +import miasm.expression.expression as m2_expr +from miasm.expression.simplifications import expr_simp from pdb import pm print(""" Expression simplification demo: Adding a simplification: a + a + a == a * 3 -More detailed examples can be found in miasm2/expression/simplification*. +More detailed examples can be found in miasm/expression/simplification*. """) # Define the simplification method diff --git a/example/expression/simplification_tools.py b/example/expression/simplification_tools.py index a9bcc429..b5fafba2 100644 --- a/example/expression/simplification_tools.py +++ b/example/expression/simplification_tools.py @@ -1,5 +1,5 @@ from __future__ import print_function -from miasm2.expression.expression import * +from miasm.expression.expression import * from pdb import pm print(""" diff --git a/example/expression/solve_condition_stp.py b/example/expression/solve_condition_stp.py index e0ab09da..2c654b77 100644 --- a/example/expression/solve_condition_stp.py +++ b/example/expression/solve_condition_stp.py @@ -6,15 +6,15 @@ from pdb import pm from future.utils import viewitems -from miasm2.analysis.machine import Machine -from miasm2.analysis.binary import Container -from miasm2.expression.expression import ExprInt, ExprCond, ExprId, \ +from miasm.analysis.machine import Machine +from miasm.analysis.binary import Container +from miasm.expression.expression import ExprInt, ExprCond, ExprId, \ get_expr_ids, ExprAssign, ExprLoc -from miasm2.core.bin_stream import bin_stream_str -from miasm2.ir.symbexec import SymbolicExecutionEngine, get_block -from miasm2.expression.simplifications import expr_simp -from miasm2.core import parse_asm -from miasm2.ir.translators.translator import Translator +from miasm.core.bin_stream import bin_stream_str +from miasm.ir.symbexec import SymbolicExecutionEngine, get_block +from miasm.expression.simplifications import expr_simp +from miasm.core import parse_asm +from miasm.ir.translators.translator import Translator machine = Machine("x86_32") diff --git a/example/ida/ctype_propagation.py b/example/ida/ctype_propagation.py index a043b9c9..f333d69a 100644 --- a/example/ida/ctype_propagation.py +++ b/example/ida/ctype_propagation.py @@ -5,18 +5,18 @@ import ida_funcs from future.utils import viewitems -from miasm2.core.bin_stream_ida import bin_stream_ida -from miasm2.expression import expression as m2_expr -from miasm2.expression.simplifications import expr_simp -from miasm2.ir.ir import IRBlock, AssignBlock -from miasm2.arch.x86.ctype import CTypeAMD64_unk, CTypeX86_unk -from miasm2.arch.msp430.ctype import CTypeMSP430_unk -from miasm2.core.objc import CTypesManagerNotPacked, ExprToAccessC, CHandler -from miasm2.core.ctypesmngr import CAstTypes -from miasm2.expression.expression import ExprLoc, ExprInt, ExprOp, ExprAssign -from miasm2.ir.symbexec_types import SymbExecCType -from miasm2.expression.parser import str_to_expr -from miasm2.analysis.cst_propag import add_state, propagate_cst_expr +from miasm.core.bin_stream_ida import bin_stream_ida +from miasm.expression import expression as m2_expr +from miasm.expression.simplifications import expr_simp +from miasm.ir.ir import IRBlock, AssignBlock +from miasm.arch.x86.ctype import CTypeAMD64_unk, CTypeX86_unk +from miasm.arch.msp430.ctype import CTypeMSP430_unk +from miasm.core.objc import CTypesManagerNotPacked, ExprToAccessC, CHandler +from miasm.core.ctypesmngr import CAstTypes +from miasm.expression.expression import ExprLoc, ExprInt, ExprOp, ExprAssign +from miasm.ir.symbexec_types import SymbExecCType +from miasm.expression.parser import str_to_expr +from miasm.analysis.cst_propag import add_state, propagate_cst_expr from utils import guess_machine diff --git a/example/ida/depgraph.py b/example/ida/depgraph.py index 3de19cbc..73fc0f87 100644 --- a/example/ida/depgraph.py +++ b/example/ida/depgraph.py @@ -12,13 +12,13 @@ import ida_funcs import ida_kernwin -from miasm2.core.bin_stream_ida import bin_stream_ida -from miasm2.core.asmblock import * -from miasm2.expression import expression as m2_expr +from miasm.core.bin_stream_ida import bin_stream_ida +from miasm.core.asmblock import * +from miasm.expression import expression as m2_expr -from miasm2.expression.simplifications import expr_simp -from miasm2.analysis.depgraph import DependencyGraph -from miasm2.ir.ir import AssignBlock, IRBlock +from miasm.expression.simplifications import expr_simp +from miasm.analysis.depgraph import DependencyGraph +from miasm.ir.ir import AssignBlock, IRBlock from utils import guess_machine diff --git a/example/ida/graph_ir.py b/example/ida/graph_ir.py index de46c22d..c011b4c3 100644 --- a/example/ida/graph_ir.py +++ b/example/ida/graph_ir.py @@ -11,13 +11,13 @@ import idc import ida_funcs import idautils -from miasm2.core.asmblock import is_int -from miasm2.core.bin_stream_ida import bin_stream_ida -from miasm2.expression.simplifications import expr_simp -from miasm2.ir.ir import IRBlock, AssignBlock -from miasm2.analysis.data_flow import load_from_int +from miasm.core.asmblock import is_int +from miasm.core.bin_stream_ida import bin_stream_ida +from miasm.expression.simplifications import expr_simp +from miasm.ir.ir import IRBlock, AssignBlock +from miasm.analysis.data_flow import load_from_int from utils import guess_machine, expr2colorstr -from miasm2.analysis.simplifier import IRCFGSimplifierCommon, IRCFGSimplifierSSA +from miasm.analysis.simplifier import IRCFGSimplifierCommon, IRCFGSimplifierSSA diff --git a/example/ida/symbol_exec.py b/example/ida/symbol_exec.py index aa1d57fe..c0ed89f3 100644 --- a/example/ida/symbol_exec.py +++ b/example/ida/symbol_exec.py @@ -7,8 +7,8 @@ import idaapi import idc -from miasm2.expression.expression_helper import Variables_Identifier -from miasm2.expression.expression import ExprAssign +from miasm.expression.expression_helper import Variables_Identifier +from miasm.expression.expression import ExprAssign from utils import expr2colorstr, translatorForm @@ -130,8 +130,8 @@ class Hooks(idaapi.UI_Hooks): def symbolic_exec(): - from miasm2.ir.symbexec import SymbolicExecutionEngine - from miasm2.core.bin_stream_ida import bin_stream_ida + from miasm.ir.symbexec import SymbolicExecutionEngine + from miasm.core.bin_stream_ida import bin_stream_ida from utils import guess_machine diff --git a/example/ida/utils.py b/example/ida/utils.py index b6d5dac4..cb4ef4d8 100644 --- a/example/ida/utils.py +++ b/example/ida/utils.py @@ -3,9 +3,9 @@ from builtins import map import idaapi from idc import * -from miasm2.analysis.machine import Machine -from miasm2.ir.translators import Translator -import miasm2.expression.expression as m2_expr +from miasm.analysis.machine import Machine +from miasm.ir.translators import Translator +import miasm.expression.expression as m2_expr def guess_machine(addr=None): "Return an instance of Machine corresponding to the IDA guessed processor" @@ -55,8 +55,8 @@ def guess_machine(addr=None): raise NotImplementedError('not fully functional') machine = Machine(info2machine[infos]) - from miasm2.analysis.disasm_cb import guess_funcs, guess_multi_cb - from miasm2.analysis.disasm_cb import arm_guess_subcall, arm_guess_jump_table + from miasm.analysis.disasm_cb import guess_funcs, guess_multi_cb + from miasm.analysis.disasm_cb import arm_guess_subcall, arm_guess_jump_table guess_funcs.append(arm_guess_subcall) guess_funcs.append(arm_guess_jump_table) diff --git a/example/jitter/arm.py b/example/jitter/arm.py index 86772874..daea2428 100755 --- a/example/jitter/arm.py +++ b/example/jitter/arm.py @@ -4,7 +4,7 @@ from __future__ import print_function import logging from pdb import pm -from miasm2.analysis.sandbox import Sandbox_Linux_arml +from miasm.analysis.sandbox import Sandbox_Linux_arml # Get arguments parser = Sandbox_Linux_arml.parser(description="""Sandbox an elf binary with arm diff --git a/example/jitter/arm_sc.py b/example/jitter/arm_sc.py index b81d3784..8d5b5677 100755 --- a/example/jitter/arm_sc.py +++ b/example/jitter/arm_sc.py @@ -1,8 +1,8 @@ #! /usr/bin/env python2 #-*- coding:utf-8 -*- -from miasm2.core.utils import int_to_byte -from miasm2.analysis.sandbox import Sandbox_Linux_armb_str -from miasm2.analysis.sandbox import Sandbox_Linux_arml_str +from miasm.core.utils import int_to_byte +from miasm.analysis.sandbox import Sandbox_Linux_armb_str +from miasm.analysis.sandbox import Sandbox_Linux_arml_str from elfesteem.strpatchwork import StrPatchwork from pdb import pm diff --git a/example/jitter/example_types.py b/example/jitter/example_types.py index d0751bbd..653adaf9 100755 --- a/example/jitter/example_types.py +++ b/example/jitter/example_types.py @@ -1,16 +1,16 @@ #! /usr/bin/env python2 -"""This script is just a short example of common usages for miasm2.core.types. +"""This script is just a short example of common usages for miasm.core.types. For a more complete view of what is possible, tests/core/types.py covers most of the module possibilities, and the module doc gives useful information as well. """ from __future__ import print_function -from miasm2.core.utils import iterbytes -from miasm2.analysis.machine import Machine -from miasm2.core.types import MemStruct, Self, Void, Str, Array, Ptr, \ +from miasm.core.utils import iterbytes +from miasm.analysis.machine import Machine +from miasm.core.types import MemStruct, Self, Void, Str, Array, Ptr, \ Num, Array, set_allocator -from miasm2.os_dep.common import heap +from miasm.os_dep.common import heap # Instantiate a heap my_heap = heap() @@ -152,7 +152,7 @@ print("module in the first part, and how to play with some casts in the second." print() # A random jitter -# You can also use miasm2.jitter.VmMngr.Vm(), but it does not happen in real +# You can also use miasm.jitter.VmMngr.Vm(), but it does not happen in real # life scripts, so here is the usual way: jitter = Machine("x86_32").jitter("python") vm = jitter.vm @@ -255,6 +255,6 @@ print("An argv instance:", repr(argv)) print("argv values:", repr([val.deref.val for val in argv[:-1]])) print() -print("See test/core/types.py and the miasm2.core.types module doc for ") +print("See test/core/types.py and the miasm.core.types module doc for ") print("more information.") diff --git a/example/jitter/mips32.py b/example/jitter/mips32.py index 2eb06c87..4aeb576f 100755 --- a/example/jitter/mips32.py +++ b/example/jitter/mips32.py @@ -2,9 +2,9 @@ #-*- coding:utf-8 -*- from __future__ import print_function from argparse import ArgumentParser -from miasm2.analysis import debugging -from miasm2.jitter.csts import * -from miasm2.analysis.machine import Machine +from miasm.analysis import debugging +from miasm.jitter.csts import * +from miasm.analysis.machine import Machine parser = ArgumentParser( description="""Sandbox raw binary with mips32 engine diff --git a/example/jitter/msp430.py b/example/jitter/msp430.py index 1ecb4cef..972a1fdc 100755 --- a/example/jitter/msp430.py +++ b/example/jitter/msp430.py @@ -2,9 +2,9 @@ #-*- coding:utf-8 -*- from __future__ import print_function from argparse import ArgumentParser -from miasm2.analysis import debugging -from miasm2.jitter.csts import * -from miasm2.analysis.machine import Machine +from miasm.analysis import debugging +from miasm.jitter.csts import * +from miasm.analysis.machine import Machine parser = ArgumentParser( description="""Sandbox raw binary with msp430 engine diff --git a/example/jitter/run_with_linuxenv.py b/example/jitter/run_with_linuxenv.py index fda76f9a..e2869699 100644 --- a/example/jitter/run_with_linuxenv.py +++ b/example/jitter/run_with_linuxenv.py @@ -4,9 +4,9 @@ import re from elfesteem import elf as elf_csts -from miasm2.os_dep.linux import environment, syscall -from miasm2.analysis.machine import Machine -from miasm2.analysis.binary import Container +from miasm.os_dep.linux import environment, syscall +from miasm.analysis.machine import Machine +from miasm.analysis.binary import Container parser = ArgumentParser("Run an ELF in a Linux-like environment") parser.add_argument("target", help="Target ELF") diff --git a/example/jitter/sandbox_call.py b/example/jitter/sandbox_call.py index 3eb0b86e..7d400b7d 100644 --- a/example/jitter/sandbox_call.py +++ b/example/jitter/sandbox_call.py @@ -1,10 +1,10 @@ """This example illustrate the Sandbox.call API, for direct call of a given function""" -from miasm2.analysis.sandbox import Sandbox_Linux_arml -from miasm2.analysis.binary import Container -from miasm2.os_dep.linux_stdlib import linobjs -from miasm2.core.utils import hexdump +from miasm.analysis.sandbox import Sandbox_Linux_arml +from miasm.analysis.binary import Container +from miasm.os_dep.linux_stdlib import linobjs +from miasm.core.utils import hexdump # Parse arguments parser = Sandbox_Linux_arml.parser(description="ELF sandboxer") diff --git a/example/jitter/sandbox_elf_aarch64l.py b/example/jitter/sandbox_elf_aarch64l.py index 0f028876..472b2354 100644 --- a/example/jitter/sandbox_elf_aarch64l.py +++ b/example/jitter/sandbox_elf_aarch64l.py @@ -1,7 +1,7 @@ import logging from pdb import pm -from miasm2.analysis.sandbox import Sandbox_Linux_aarch64l -from miasm2.jitter.jitload import log_func +from miasm.analysis.sandbox import Sandbox_Linux_aarch64l +from miasm.jitter.jitload import log_func # Insert here user defined methods diff --git a/example/jitter/sandbox_elf_ppc32.py b/example/jitter/sandbox_elf_ppc32.py index 04ecfd9e..829381fc 100644 --- a/example/jitter/sandbox_elf_ppc32.py +++ b/example/jitter/sandbox_elf_ppc32.py @@ -1,8 +1,8 @@ import os from pdb import pm -from miasm2.analysis.sandbox import Sandbox_Linux_ppc32b -from miasm2.jitter.csts import * -from miasm2.jitter.jitload import log_func +from miasm.analysis.sandbox import Sandbox_Linux_ppc32b +from miasm.jitter.csts import * +from miasm.jitter.jitload import log_func import logging # Insert here user defined methods diff --git a/example/jitter/sandbox_pe_x86_32.py b/example/jitter/sandbox_pe_x86_32.py index 3a627b19..263fad94 100644 --- a/example/jitter/sandbox_pe_x86_32.py +++ b/example/jitter/sandbox_pe_x86_32.py @@ -1,5 +1,5 @@ from pdb import pm -from miasm2.analysis.sandbox import Sandbox_Win_x86_32 +from miasm.analysis.sandbox import Sandbox_Win_x86_32 # Insert here user defined methods diff --git a/example/jitter/sandbox_pe_x86_64.py b/example/jitter/sandbox_pe_x86_64.py index 773c54b9..4d8f00ce 100644 --- a/example/jitter/sandbox_pe_x86_64.py +++ b/example/jitter/sandbox_pe_x86_64.py @@ -1,5 +1,5 @@ from pdb import pm -from miasm2.analysis.sandbox import Sandbox_Win_x86_64 +from miasm.analysis.sandbox import Sandbox_Win_x86_64 # Insert here user defined methods diff --git a/example/jitter/test_x86_32_seh.py b/example/jitter/test_x86_32_seh.py index e7f8cff4..595b9586 100644 --- a/example/jitter/test_x86_32_seh.py +++ b/example/jitter/test_x86_32_seh.py @@ -1,8 +1,8 @@ import os from pdb import pm -from miasm2.analysis.sandbox import Sandbox_Win_x86_32 -from miasm2.os_dep import win_api_x86_32_seh -from miasm2.jitter.csts import * +from miasm.analysis.sandbox import Sandbox_Win_x86_32 +from miasm.os_dep import win_api_x86_32_seh +from miasm.jitter.csts import * def deal_exception_access_violation(jitter): jitter.pc = win_api_x86_32_seh.fake_seh_handler(jitter, win_api_x86_32_seh.EXCEPTION_ACCESS_VIOLATION) diff --git a/example/jitter/trace.py b/example/jitter/trace.py index 9f025bfd..46b313c1 100644 --- a/example/jitter/trace.py +++ b/example/jitter/trace.py @@ -11,9 +11,9 @@ from __future__ import print_function import os import time from pdb import pm -from miasm2.analysis.sandbox import Sandbox_Linux_arml -from miasm2.jitter.emulatedsymbexec import EmulatedSymbExec -from miasm2.jitter.jitcore_python import JitCore_Python +from miasm.analysis.sandbox import Sandbox_Linux_arml +from miasm.jitter.emulatedsymbexec import EmulatedSymbExec +from miasm.jitter.jitcore_python import JitCore_Python # Function called at each instruction instr_count = 0 diff --git a/example/jitter/unpack_upx.py b/example/jitter/unpack_upx.py index 5d862dd1..0a41d038 100644 --- a/example/jitter/unpack_upx.py +++ b/example/jitter/unpack_upx.py @@ -3,7 +3,7 @@ import os import logging from pdb import pm from elfesteem import pe -from miasm2.analysis.sandbox import Sandbox_Win_x86_32 +from miasm.analysis.sandbox import Sandbox_Win_x86_32 # User defined methods @@ -93,7 +93,7 @@ sb.jitter.add_breakpoint(end_offset, update_binary) sb.run() # Rebuild PE -# Alternative solution: miasm2.jitter.loader.pe.vm2pe(sb.jitter, out_fname, +# Alternative solution: miasm.jitter.loader.pe.vm2pe(sb.jitter, out_fname, # libs=sb.libs, e_orig=sb.pe) new_dll = [] diff --git a/example/jitter/x86_32.py b/example/jitter/x86_32.py index 2a73a2ad..c2273b69 100644 --- a/example/jitter/x86_32.py +++ b/example/jitter/x86_32.py @@ -1,6 +1,6 @@ from argparse import ArgumentParser -from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE -from miasm2.analysis.machine import Machine +from miasm.jitter.csts import PAGE_READ, PAGE_WRITE +from miasm.analysis.machine import Machine from pdb import pm diff --git a/example/symbol_exec/depgraph.py b/example/symbol_exec/depgraph.py index c1dbd422..c7b9017f 100644 --- a/example/symbol_exec/depgraph.py +++ b/example/symbol_exec/depgraph.py @@ -6,10 +6,10 @@ import json from future.utils import viewitems -from miasm2.analysis.machine import Machine -from miasm2.analysis.binary import Container -from miasm2.analysis.depgraph import DependencyGraph -from miasm2.expression.expression import ExprMem, ExprId, ExprInt +from miasm.analysis.machine import Machine +from miasm.analysis.binary import Container +from miasm.analysis.depgraph import DependencyGraph +from miasm.expression.expression import ExprMem, ExprId, ExprInt parser = ArgumentParser("Dependency grapher") parser.add_argument("filename", help="Binary to analyse") diff --git a/example/symbol_exec/dse_crackme.py b/example/symbol_exec/dse_crackme.py index 33ec3b72..90774dc3 100644 --- a/example/symbol_exec/dse_crackme.py +++ b/example/symbol_exec/dse_crackme.py @@ -16,10 +16,10 @@ from pdb import pm from tempfile import NamedTemporaryFile from future.utils import viewitems -from miasm2.core.utils import int_to_byte -from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE -from miasm2.analysis.sandbox import Sandbox_Linux_x86_64 -from miasm2.expression.expression import * +from miasm.core.utils import int_to_byte +from miasm.jitter.csts import PAGE_READ, PAGE_WRITE +from miasm.analysis.sandbox import Sandbox_Linux_x86_64 +from miasm.expression.expression import * is_win = platform.system() == "Windows" @@ -96,8 +96,8 @@ sb.jitter.init_run(sb.entry_point) #### This part handle the DSE #### -from miasm2.analysis.dse import DSEPathConstraint -from miasm2.analysis.machine import Machine +from miasm.analysis.dse import DSEPathConstraint +from miasm.analysis.machine import Machine # File "management" diff --git a/example/symbol_exec/dse_strategies.py b/example/symbol_exec/dse_strategies.py index 8e479d61..3f968215 100644 --- a/example/symbol_exec/dse_strategies.py +++ b/example/symbol_exec/dse_strategies.py @@ -22,10 +22,10 @@ from argparse import ArgumentParser from future.utils import viewitems -from miasm2.analysis.machine import Machine -from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE -from miasm2.analysis.dse import DSEPathConstraint -from miasm2.expression.expression import ExprMem, ExprId, ExprInt, ExprAssign +from miasm.analysis.machine import Machine +from miasm.jitter.csts import PAGE_READ, PAGE_WRITE +from miasm.analysis.dse import DSEPathConstraint +from miasm.expression.expression import ExprMem, ExprId, ExprInt, ExprAssign # Argument handling parser = ArgumentParser("DSE Example") diff --git a/example/symbol_exec/single_instr.py b/example/symbol_exec/single_instr.py index bdc65360..789252df 100644 --- a/example/symbol_exec/single_instr.py +++ b/example/symbol_exec/single_instr.py @@ -1,9 +1,9 @@ from __future__ import print_function # Minimalist Symbol Exec example -from miasm2.analysis.binary import Container -from miasm2.analysis.machine import Machine -from miasm2.ir.symbexec import SymbolicExecutionEngine -from miasm2.core.locationdb import LocationDB +from miasm.analysis.binary import Container +from miasm.analysis.machine import Machine +from miasm.ir.symbexec import SymbolicExecutionEngine +from miasm.core.locationdb import LocationDB START_ADDR = 0 machine = Machine("x86_32") diff --git a/miasm/__init__.py b/miasm/__init__.py new file mode 100644 index 00000000..b7dbe3b4 --- /dev/null +++ b/miasm/__init__.py @@ -0,0 +1 @@ +"Reverse engineering framework in Python" diff --git a/miasm/analysis/__init__.py b/miasm/analysis/__init__.py new file mode 100644 index 00000000..5abdd3a3 --- /dev/null +++ b/miasm/analysis/__init__.py @@ -0,0 +1 @@ +"High-level tools for binary analysis" diff --git a/miasm/analysis/binary.py b/miasm/analysis/binary.py new file mode 100644 index 00000000..82f83112 --- /dev/null +++ b/miasm/analysis/binary.py @@ -0,0 +1,236 @@ +import logging +import warnings + +from miasm.core.bin_stream import bin_stream_str, bin_stream_elf, bin_stream_pe +from miasm.jitter.csts import PAGE_READ +from miasm.core.locationdb import LocationDB + + +log = logging.getLogger("binary") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.ERROR) + + +# Container +## Exceptions +class ContainerSignatureException(Exception): + "The container does not match the current container signature" + + +class ContainerParsingException(Exception): + "Error during container parsing" + + +## Parent class +class Container(object): + """Container abstraction layer + + This class aims to offer a common interface for abstracting container + such as PE or ELF. + """ + + available_container = [] # Available container formats + fallback_container = None # Fallback container format + + @classmethod + def from_string(cls, data, *args, **kwargs): + """Instantiate a container and parse the binary + @data: str containing the binary + """ + log.info('Load binary') + # Try each available format + for container_type in cls.available_container: + try: + return container_type(data, *args, **kwargs) + except ContainerSignatureException: + continue + except ContainerParsingException as error: + log.error(error) + + # Fallback mode + log.warning('Fallback to string input') + return cls.fallback_container(data, *args, **kwargs) + + @classmethod + def register_container(cls, container): + "Add a Container format" + cls.available_container.append(container) + + @classmethod + def register_fallback(cls, container): + "Set the Container fallback format" + cls.fallback_container = container + + @classmethod + def from_stream(cls, stream, *args, **kwargs): + """Instantiate a container and parse the binary + @stream: stream to use as binary + @vm: (optional) VmMngr instance to link with the executable + @addr: (optional) Base address of the parsed binary. If set, + force the unknown format + """ + return Container.from_string(stream.read(), *args, **kwargs) + + def parse(self, data, *args, **kwargs): + """Launch parsing of @data + @data: str containing the binary + """ + raise NotImplementedError("Abstract method") + + def __init__(self, data, loc_db=None, **kwargs): + "Alias for 'parse'" + # Init attributes + self._executable = None + self._bin_stream = None + self._entry_point = None + self._arch = None + if loc_db is None: + self._loc_db = LocationDB() + else: + self._loc_db = loc_db + + # Launch parsing + self.parse(data, **kwargs) + + @property + def bin_stream(self): + "Return the BinStream instance corresponding to container content" + return self._bin_stream + + @property + def executable(self): + "Return the abstract instance standing for parsed executable" + return self._executable + + @property + def entry_point(self): + "Return the detected entry_point" + return self._entry_point + + @property + def arch(self): + "Return the guessed architecture" + return self._arch + + @property + def loc_db(self): + "LocationDB instance preloaded with container symbols (if any)" + return self._loc_db + + @property + def symbol_pool(self): + "[DEPRECATED API]" + warnings.warn("Deprecated API: use 'loc_db'") + return self.loc_db + +## Format dependent classes +class ContainerPE(Container): + "Container abstraction for PE" + + def parse(self, data, vm=None, **kwargs): + from miasm.jitter.loader.pe import vm_load_pe, guess_arch + from elfesteem import pe_init + + # Parse signature + if not data.startswith(b'MZ'): + raise ContainerSignatureException() + + # Build executable instance + try: + if vm is not None: + self._executable = vm_load_pe(vm, data) + else: + self._executable = pe_init.PE(data) + except Exception as error: + raise ContainerParsingException('Cannot read PE: %s' % error) + + # Check instance validity + if not self._executable.isPE() or \ + self._executable.NTsig.signature_value != 0x4550: + raise ContainerSignatureException() + + # Guess the architecture + self._arch = guess_arch(self._executable) + + # Build the bin_stream instance and set the entry point + try: + self._bin_stream = bin_stream_pe(self._executable) + ep_detected = self._executable.Opthdr.AddressOfEntryPoint + self._entry_point = self._executable.rva2virt(ep_detected) + except Exception as error: + raise ContainerParsingException('Cannot read PE: %s' % error) + + +class ContainerELF(Container): + "Container abstraction for ELF" + + def parse(self, data, vm=None, addr=0, apply_reloc=False, **kwargs): + """Load an ELF from @data + @data: bytes containing the ELF bytes + @vm (optional): VmMngr instance. If set, load the ELF in virtual memory + @addr (optional): base address the ELF in virtual memory + @apply_reloc (optional): if set, apply relocation during ELF loading + + @addr and @apply_reloc are only meaningful in the context of a + non-empty @vm + """ + from miasm.jitter.loader.elf import vm_load_elf, guess_arch, \ + fill_loc_db_with_symbols + from elfesteem import elf_init + + # Parse signature + if not data.startswith(b'\x7fELF'): + raise ContainerSignatureException() + + # Build executable instance + try: + if vm is not None: + self._executable = vm_load_elf( + vm, + data, + loc_db=self.loc_db, + base_addr=addr, + apply_reloc=apply_reloc + ) + else: + self._executable = elf_init.ELF(data) + except Exception as error: + raise ContainerParsingException('Cannot read ELF: %s' % error) + + # Guess the architecture + self._arch = guess_arch(self._executable) + + # Build the bin_stream instance and set the entry point + try: + self._bin_stream = bin_stream_elf(self._executable) + self._entry_point = self._executable.Ehdr.entry + addr + except Exception as error: + raise ContainerParsingException('Cannot read ELF: %s' % error) + + if vm is None: + # Add known symbols (vm_load_elf already does it) + fill_loc_db_with_symbols(self._executable, self.loc_db, addr) + + + +class ContainerUnknown(Container): + "Container abstraction for unknown format" + + def parse(self, data, vm=None, addr=0, **kwargs): + self._bin_stream = bin_stream_str(data, base_address=addr) + if vm is not None: + vm.add_memory_page( + addr, + PAGE_READ, + data + ) + self._executable = None + self._entry_point = 0 + + +## Register containers +Container.register_container(ContainerPE) +Container.register_container(ContainerELF) +Container.register_fallback(ContainerUnknown) diff --git a/miasm/analysis/cst_propag.py b/miasm/analysis/cst_propag.py new file mode 100644 index 00000000..d83c0458 --- /dev/null +++ b/miasm/analysis/cst_propag.py @@ -0,0 +1,185 @@ +import logging + +from future.utils import viewitems + +from miasm.ir.symbexec import SymbolicExecutionEngine +from miasm.expression.expression import ExprMem +from miasm.expression.expression_helper import possible_values +from miasm.expression.simplifications import expr_simp +from miasm.ir.ir import IRBlock, AssignBlock + +LOG_CST_PROPAG = logging.getLogger("cst_propag") +CONSOLE_HANDLER = logging.StreamHandler() +CONSOLE_HANDLER.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +LOG_CST_PROPAG.addHandler(CONSOLE_HANDLER) +LOG_CST_PROPAG.setLevel(logging.WARNING) + + +class SymbExecState(SymbolicExecutionEngine): + """ + State manager for SymbolicExecution + """ + def __init__(self, ir_arch, ircfg, state): + super(SymbExecState, self).__init__(ir_arch, {}) + self.set_state(state) + + +def add_state(ircfg, todo, states, addr, state): + """ + Add or merge the computed @state for the block at @addr. Update @todo + @todo: modified block set + @states: dictionary linking a label to its entering state. + @addr: address of the considered block + @state: computed state + """ + addr = ircfg.get_loc_key(addr) + todo.add(addr) + if addr not in states: + states[addr] = state + else: + states[addr] = states[addr].merge(state) + + +def is_expr_cst(ir_arch, expr): + """Return true if @expr is only composed of ExprInt and init_regs + @ir_arch: IR instance + @expr: Expression to test""" + + elements = expr.get_r(mem_read=True) + for element in elements: + if element.is_mem(): + continue + if element.is_id() and element in ir_arch.arch.regs.all_regs_ids_init: + continue + if element.is_int(): + continue + return False + # Expr is a constant + return True + + +class SymbExecStateFix(SymbolicExecutionEngine): + """ + Emul blocks and replace expressions with their corresponding constant if + any. + + """ + # Function used to test if an Expression is considered as a constant + is_expr_cst = lambda _, ir_arch, expr: is_expr_cst(ir_arch, expr) + + def __init__(self, ir_arch, ircfg, state, cst_propag_link): + self.ircfg = ircfg + super(SymbExecStateFix, self).__init__(ir_arch, {}) + self.set_state(state) + self.cst_propag_link = cst_propag_link + + def propag_expr_cst(self, expr): + """Propagate constant expressions in @expr + @expr: Expression to update""" + elements = expr.get_r(mem_read=True) + to_propag = {} + for element in elements: + # Only ExprId can be safely propagated + if not element.is_id(): + continue + value = self.eval_expr(element) + if self.is_expr_cst(self.ir_arch, value): + to_propag[element] = value + return expr_simp(expr.replace_expr(to_propag)) + + def eval_updt_irblock(self, irb, step=False): + """ + Symbolic execution of the @irb on the current state + @irb: IRBlock instance + @step: display intermediate steps + """ + assignblks = [] + for index, assignblk in enumerate(irb): + new_assignblk = {} + links = {} + for dst, src in viewitems(assignblk): + src = self.propag_expr_cst(src) + if dst.is_mem(): + ptr = dst.ptr + ptr = self.propag_expr_cst(ptr) + dst = ExprMem(ptr, dst.size) + new_assignblk[dst] = src + + if assignblk.instr is not None: + for arg in assignblk.instr.args: + new_arg = self.propag_expr_cst(arg) + links[new_arg] = arg + self.cst_propag_link[(irb.loc_key, index)] = links + + self.eval_updt_assignblk(assignblk) + assignblks.append(AssignBlock(new_assignblk, assignblk.instr)) + self.ircfg.blocks[irb.loc_key] = IRBlock(irb.loc_key, assignblks) + + +def compute_cst_propagation_states(ir_arch, ircfg, init_addr, init_infos): + """ + Propagate "constant expressions" in a function. + The attribute "constant expression" is true if the expression is based on + constants or "init" regs values. + + @ir_arch: IntermediateRepresentation instance + @init_addr: analysis start address + @init_infos: dictionary linking expressions to their values at @init_addr + """ + + done = set() + state = SymbExecState.StateEngine(init_infos) + lbl = ircfg.get_loc_key(init_addr) + todo = set([lbl]) + states = {lbl: state} + + while todo: + if not todo: + break + lbl = todo.pop() + state = states[lbl] + if (lbl, state) in done: + continue + done.add((lbl, state)) + if lbl not in ircfg.blocks: + continue + + symbexec_engine = SymbExecState(ir_arch, ircfg, state) + addr = symbexec_engine.run_block_at(ircfg, lbl) + symbexec_engine.del_mem_above_stack(ir_arch.sp) + + for dst in possible_values(addr): + value = dst.value + if value.is_mem(): + LOG_CST_PROPAG.warning('Bad destination: %s', value) + continue + elif value.is_int(): + value = ircfg.get_loc_key(value) + add_state( + ircfg, todo, states, value, + symbexec_engine.get_state() + ) + + return states + + +def propagate_cst_expr(ir_arch, ircfg, addr, init_infos): + """ + Propagate "constant expressions" in a @ir_arch. + The attribute "constant expression" is true if the expression is based on + constants or "init" regs values. + + @ir_arch: IntermediateRepresentation instance + @addr: analysis start address + @init_infos: dictionary linking expressions to their values at @init_addr + + Returns a mapping between replaced Expression and their new values. + """ + states = compute_cst_propagation_states(ir_arch, ircfg, addr, init_infos) + cst_propag_link = {} + for lbl, state in viewitems(states): + if lbl not in ircfg.blocks: + continue + symbexec = SymbExecStateFix(ir_arch, ircfg, state, cst_propag_link) + symbexec.eval_updt_irblock(ircfg.blocks[lbl]) + return cst_propag_link diff --git a/miasm/analysis/data_analysis.py b/miasm/analysis/data_analysis.py new file mode 100644 index 00000000..54876487 --- /dev/null +++ b/miasm/analysis/data_analysis.py @@ -0,0 +1,204 @@ +from __future__ import print_function + +from future.utils import viewitems + +from builtins import object +from functools import cmp_to_key +from miasm.expression.expression \ + import get_expr_mem, get_list_rw, ExprId, ExprInt, \ + compare_exprs +from miasm.ir.symbexec import SymbolicExecutionEngine + + +def get_node_name(label, i, n): + n_name = (label, i, n) + return n_name + + +def intra_block_flow_raw(ir_arch, ircfg, flow_graph, irb, in_nodes, out_nodes): + """ + Create data flow for an irbloc using raw IR expressions + """ + current_nodes = {} + for i, assignblk in enumerate(irb): + dict_rw = assignblk.get_rw(cst_read=True) + current_nodes.update(out_nodes) + + # gen mem arg to mem node links + all_mems = set() + for node_w, nodes_r in viewitems(dict_rw): + for n in nodes_r.union([node_w]): + all_mems.update(get_expr_mem(n)) + if not all_mems: + continue + + for n in all_mems: + node_n_w = get_node_name(irb.loc_key, i, n) + if not n in nodes_r: + continue + o_r = n.ptr.get_r(mem_read=False, cst_read=True) + for n_r in o_r: + if n_r in current_nodes: + node_n_r = current_nodes[n_r] + else: + node_n_r = get_node_name(irb.loc_key, i, n_r) + current_nodes[n_r] = node_n_r + in_nodes[n_r] = node_n_r + flow_graph.add_uniq_edge(node_n_r, node_n_w) + + # gen data flow links + for node_w, nodes_r in viewitems(dict_rw): + for n_r in nodes_r: + if n_r in current_nodes: + node_n_r = current_nodes[n_r] + else: + node_n_r = get_node_name(irb.loc_key, i, n_r) + current_nodes[n_r] = node_n_r + in_nodes[n_r] = node_n_r + + flow_graph.add_node(node_n_r) + + node_n_w = get_node_name(irb.loc_key, i + 1, node_w) + out_nodes[node_w] = node_n_w + + flow_graph.add_node(node_n_w) + flow_graph.add_uniq_edge(node_n_r, node_n_w) + + + +def inter_block_flow_link(ir_arch, ircfg, flow_graph, irb_in_nodes, irb_out_nodes, todo, link_exec_to_data): + lbl, current_nodes, exec_nodes = todo + current_nodes = dict(current_nodes) + + # link current nodes to bloc in_nodes + if not lbl in ircfg.blocks: + print("cannot find bloc!!", lbl) + return set() + irb = ircfg.blocks[lbl] + to_del = set() + for n_r, node_n_r in viewitems(irb_in_nodes[irb.loc_key]): + if not n_r in current_nodes: + continue + flow_graph.add_uniq_edge(current_nodes[n_r], node_n_r) + to_del.add(n_r) + + # if link exec to data, all nodes depends on exec nodes + if link_exec_to_data: + for n_x_r in exec_nodes: + for n_r, node_n_r in viewitems(irb_in_nodes[irb.loc_key]): + if not n_x_r in current_nodes: + continue + if isinstance(n_r, ExprInt): + continue + flow_graph.add_uniq_edge(current_nodes[n_x_r], node_n_r) + + # update current nodes using bloc out_nodes + for n_w, node_n_w in viewitems(irb_out_nodes[irb.loc_key]): + current_nodes[n_w] = node_n_w + + # get nodes involved in exec flow + x_nodes = tuple(sorted(irb.dst.get_r(), key=cmp_to_key(compare_exprs))) + + todo = set() + for lbl_dst in ircfg.successors(irb.loc_key): + todo.add((lbl_dst, tuple(viewitems(current_nodes)), x_nodes)) + + return todo + + +def create_implicit_flow(ir_arch, flow_graph, irb_in_nodes, irb_out_ndes): + + # first fix IN/OUT + # If a son read a node which in not in OUT, add it + todo = set(ir_arch.blocks.keys()) + while todo: + lbl = todo.pop() + irb = ir_arch.blocks[lbl] + for lbl_son in ir_arch.graph.successors(irb.loc_key): + if not lbl_son in ir_arch.blocks: + print("cannot find bloc!!", lbl) + continue + irb_son = ir_arch.blocks[lbl_son] + for n_r in irb_in_nodes[irb_son.loc_key]: + if n_r in irb_out_nodes[irb.loc_key]: + continue + if not isinstance(n_r, ExprId): + continue + + node_n_w = irb.loc_key, len(irb), n_r + irb_out_nodes[irb.loc_key][n_r] = node_n_w + if not n_r in irb_in_nodes[irb.loc_key]: + irb_in_nodes[irb.loc_key][n_r] = irb.loc_key, 0, n_r + node_n_r = irb_in_nodes[irb.loc_key][n_r] + for lbl_p in ir_arch.graph.predecessors(irb.loc_key): + todo.add(lbl_p) + + flow_graph.add_uniq_edge(node_n_r, node_n_w) + + +def inter_block_flow(ir_arch, ircfg, flow_graph, irb_0, irb_in_nodes, irb_out_nodes, link_exec_to_data=True): + + todo = set() + done = set() + todo.add((irb_0, (), ())) + + while todo: + state = todo.pop() + if state in done: + continue + done.add(state) + out = inter_block_flow_link(ir_arch, ircfg, flow_graph, irb_in_nodes, irb_out_nodes, state, link_exec_to_data) + todo.update(out) + + +class symb_exec_func(object): + + """ + This algorithm will do symbolic execution on a function, trying to propagate + states between basic blocks in order to extract inter-blocs dataflow. The + algorithm tries to merge states from blocks with multiple parents. + + There is no real magic here, loops and complex merging will certainly fail. + """ + + def __init__(self, ir_arch): + self.todo = set() + self.stateby_ad = {} + self.cpt = {} + self.states_var_done = set() + self.states_done = set() + self.total_done = 0 + self.ir_arch = ir_arch + + def add_state(self, parent, ad, state): + variables = dict(state.symbols) + + # get bloc dead, and remove from state + b = self.ir_arch.get_block(ad) + if b is None: + raise ValueError("unknown bloc! %s" % ad) + s = parent, ad, tuple(sorted(viewitems(variables))) + self.todo.add(s) + + def get_next_state(self): + state = self.todo.pop() + return state + + def do_step(self): + if len(self.todo) == 0: + return None + if self.total_done > 600: + print("symbexec watchdog!") + return None + self.total_done += 1 + print('CPT', self.total_done) + while self.todo: + state = self.get_next_state() + parent, ad, s = state + self.states_done.add(state) + self.states_var_done.add(state) + + sb = SymbolicExecutionEngine(self.ir_arch, dict(s)) + + return parent, ad, sb + return None diff --git a/miasm/analysis/data_flow.py b/miasm/analysis/data_flow.py new file mode 100644 index 00000000..541e6d24 --- /dev/null +++ b/miasm/analysis/data_flow.py @@ -0,0 +1,1580 @@ +"""Data flow analysis based on miasm intermediate representation""" +from builtins import range +from collections import namedtuple + +from future.utils import viewitems, viewvalues +from miasm.core.utils import encode_hex +from miasm.core.graph import DiGraph +from miasm.ir.ir import AssignBlock, IRBlock +from miasm.expression.expression import ExprLoc, ExprMem, ExprId, ExprInt,\ + ExprAssign, ExprOp +from miasm.expression.simplifications import expr_simp +from miasm.core.interval import interval +from miasm.expression.expression_helper import possible_values +from miasm.analysis.ssa import get_phi_sources_parent_block, \ + irblock_has_phi + + +class ReachingDefinitions(dict): + """ + Computes for each assignblock the set of reaching definitions. + Example: + IR block: + lbl0: + 0 A = 1 + B = 3 + 1 B = 2 + 2 A = A + B + 4 + + Reach definition of lbl0: + (lbl0, 0) => {} + (lbl0, 1) => {A: {(lbl0, 0)}, B: {(lbl0, 0)}} + (lbl0, 2) => {A: {(lbl0, 0)}, B: {(lbl0, 1)}} + (lbl0, 3) => {A: {(lbl0, 2)}, B: {(lbl0, 1)}} + + Source set 'REACHES' in: Kennedy, K. (1979). + A survey of data flow analysis techniques. + IBM Thomas J. Watson Research Division, Algorithm MK + + This class is usable as a dictionary whose structure is + { (block, index): { lvalue: set((block, index)) } } + """ + + ircfg = None + + def __init__(self, ircfg): + super(ReachingDefinitions, self).__init__() + self.ircfg = ircfg + self.compute() + + def get_definitions(self, block_lbl, assignblk_index): + """Returns the dict { lvalue: set((def_block_lbl, def_index)) } + associated with self.ircfg.@block.assignblks[@assignblk_index] + or {} if it is not yet computed + """ + return self.get((block_lbl, assignblk_index), {}) + + def compute(self): + """This is the main fixpoint""" + modified = True + while modified: + modified = False + for block in viewvalues(self.ircfg.blocks): + modified |= self.process_block(block) + + def process_block(self, block): + """ + Fetch reach definitions from predecessors and propagate it to + the assignblk in block @block. + """ + predecessor_state = {} + for pred_lbl in self.ircfg.predecessors(block.loc_key): + pred = self.ircfg.blocks[pred_lbl] + for lval, definitions in viewitems(self.get_definitions(pred_lbl, len(pred))): + predecessor_state.setdefault(lval, set()).update(definitions) + + modified = self.get((block.loc_key, 0)) != predecessor_state + if not modified: + return False + self[(block.loc_key, 0)] = predecessor_state + + for index in range(len(block)): + modified |= self.process_assignblock(block, index) + return modified + + def process_assignblock(self, block, assignblk_index): + """ + Updates the reach definitions with values defined at + assignblock @assignblk_index in block @block. + NB: the effect of assignblock @assignblk_index in stored at index + (@block, @assignblk_index + 1). + """ + + assignblk = block[assignblk_index] + defs = self.get_definitions(block.loc_key, assignblk_index).copy() + for lval in assignblk: + defs.update({lval: set([(block.loc_key, assignblk_index)])}) + + modified = self.get((block.loc_key, assignblk_index + 1)) != defs + if modified: + self[(block.loc_key, assignblk_index + 1)] = defs + + return modified + +ATTR_DEP = {"color" : "black", + "_type" : "data"} + +AssignblkNode = namedtuple('AssignblkNode', ['label', 'index', 'var']) + + +class DiGraphDefUse(DiGraph): + """Representation of a Use-Definition graph as defined by + Kennedy, K. (1979). A survey of data flow analysis techniques. + IBM Thomas J. Watson Research Division. + Example: + IR block: + lbl0: + 0 A = 1 + B = 3 + 1 B = 2 + 2 A = A + B + 4 + + Def use analysis: + (lbl0, 0, A) => {(lbl0, 2, A)} + (lbl0, 0, B) => {} + (lbl0, 1, B) => {(lbl0, 2, A)} + (lbl0, 2, A) => {} + + """ + + + def __init__(self, reaching_defs, + deref_mem=False, *args, **kwargs): + """Instantiate a DiGraph + @blocks: IR blocks + """ + self._edge_attr = {} + + # For dot display + self._filter_node = None + self._dot_offset = None + self._blocks = reaching_defs.ircfg.blocks + + super(DiGraphDefUse, self).__init__(*args, **kwargs) + self._compute_def_use(reaching_defs, + deref_mem=deref_mem) + + def edge_attr(self, src, dst): + """ + Return a dictionary of attributes for the edge between @src and @dst + @src: the source node of the edge + @dst: the destination node of the edge + """ + return self._edge_attr[(src, dst)] + + def _compute_def_use(self, reaching_defs, + deref_mem=False): + for block in viewvalues(self._blocks): + self._compute_def_use_block(block, + reaching_defs, + deref_mem=deref_mem) + + def _compute_def_use_block(self, block, reaching_defs, deref_mem=False): + for index, assignblk in enumerate(block): + assignblk_reaching_defs = reaching_defs.get_definitions(block.loc_key, index) + for lval, expr in viewitems(assignblk): + self.add_node(AssignblkNode(block.loc_key, index, lval)) + + read_vars = expr.get_r(mem_read=deref_mem) + if deref_mem and lval.is_mem(): + read_vars.update(lval.ptr.get_r(mem_read=deref_mem)) + for read_var in read_vars: + for reach in assignblk_reaching_defs.get(read_var, set()): + self.add_data_edge(AssignblkNode(reach[0], reach[1], read_var), + AssignblkNode(block.loc_key, index, lval)) + + def del_edge(self, src, dst): + super(DiGraphDefUse, self).del_edge(src, dst) + del self._edge_attr[(src, dst)] + + def add_uniq_labeled_edge(self, src, dst, edge_label): + """Adds the edge (@src, @dst) with label @edge_label. + if edge (@src, @dst) already exists, the previous label is overridden + """ + self.add_uniq_edge(src, dst) + self._edge_attr[(src, dst)] = edge_label + + def add_data_edge(self, src, dst): + """Adds an edge representing a data dependencie + and sets the label accordingly""" + self.add_uniq_labeled_edge(src, dst, ATTR_DEP) + + def node2lines(self, node): + lbl, index, reg = node + yield self.DotCellDescription(text="%s (%s)" % (lbl, index), + attr={'align': 'center', + 'colspan': 2, + 'bgcolor': 'grey'}) + src = self._blocks[lbl][index][reg] + line = "%s = %s" % (reg, src) + yield self.DotCellDescription(text=line, attr={}) + yield self.DotCellDescription(text="", attr={}) + + +def dead_simp_useful_assignblks(irarch, defuse, reaching_defs): + """Mark useful statements using previous reach analysis and defuse + + Source : Kennedy, K. (1979). A survey of data flow analysis techniques. + IBM Thomas J. Watson Research Division, Algorithm MK + + Return a set of triplets (block, assignblk number, lvalue) of + useful definitions + PRE: compute_reach(self) + + """ + ircfg = reaching_defs.ircfg + useful = set() + + for block_lbl, block in viewitems(ircfg.blocks): + successors = ircfg.successors(block_lbl) + for successor in successors: + if successor not in ircfg.blocks: + keep_all_definitions = True + break + else: + keep_all_definitions = False + + # Block has a nonexistent successor or is a leaf + if keep_all_definitions or (len(successors) == 0): + valid_definitions = reaching_defs.get_definitions(block_lbl, + len(block)) + for lval, definitions in viewitems(valid_definitions): + if lval in irarch.get_out_regs(block) or keep_all_definitions: + for definition in definitions: + useful.add(AssignblkNode(definition[0], definition[1], lval)) + + # Force keeping of specific cases + for index, assignblk in enumerate(block): + for lval, rval in viewitems(assignblk): + if (lval.is_mem() or + irarch.IRDst == lval or + lval.is_id("exception_flags") or + rval.is_function_call()): + useful.add(AssignblkNode(block_lbl, index, lval)) + + # Useful nodes dependencies + for node in useful: + for parent in defuse.reachable_parents(node): + yield parent + + +def dead_simp(irarch, ircfg): + """ + Remove useless assignments. + + This function is used to analyse relation of a * complete function * + This means the blocks under study represent a solid full function graph. + + Source : Kennedy, K. (1979). A survey of data flow analysis techniques. + IBM Thomas J. Watson Research Division, page 43 + + @ircfg: IntermediateRepresentation instance + """ + + modified = False + reaching_defs = ReachingDefinitions(ircfg) + defuse = DiGraphDefUse(reaching_defs, deref_mem=True) + useful = set(dead_simp_useful_assignblks(irarch, defuse, reaching_defs)) + for block in list(viewvalues(ircfg.blocks)): + irs = [] + for idx, assignblk in enumerate(block): + new_assignblk = dict(assignblk) + for lval in assignblk: + if AssignblkNode(block.loc_key, idx, lval) not in useful: + del new_assignblk[lval] + modified = True + irs.append(AssignBlock(new_assignblk, assignblk.instr)) + ircfg.blocks[block.loc_key] = IRBlock(block.loc_key, irs) + return modified + + +def _test_merge_next_block(ircfg, loc_key): + """ + Test if the irblock at @loc_key can be merge with its son + @ircfg: IRCFG instance + @loc_key: LocKey instance of the candidate parent irblock + """ + + if loc_key not in ircfg.blocks: + return None + sons = ircfg.successors(loc_key) + if len(sons) != 1: + return None + son = list(sons)[0] + if ircfg.predecessors(son) != [loc_key]: + return None + if son not in ircfg.blocks: + return None + + return son + + +def _do_merge_blocks(ircfg, loc_key, son_loc_key): + """ + Merge two irblocks at @loc_key and @son_loc_key + + @ircfg: DiGrpahIR + @loc_key: LocKey instance of the parent IRBlock + @loc_key: LocKey instance of the son IRBlock + """ + + assignblks = [] + for assignblk in ircfg.blocks[loc_key]: + if ircfg.IRDst not in assignblk: + assignblks.append(assignblk) + continue + affs = {} + for dst, src in viewitems(assignblk): + if dst != ircfg.IRDst: + affs[dst] = src + if affs: + assignblks.append(AssignBlock(affs, assignblk.instr)) + + assignblks += ircfg.blocks[son_loc_key].assignblks + new_block = IRBlock(loc_key, assignblks) + + ircfg.discard_edge(loc_key, son_loc_key) + + for son_successor in ircfg.successors(son_loc_key): + ircfg.add_uniq_edge(loc_key, son_successor) + ircfg.discard_edge(son_loc_key, son_successor) + del ircfg.blocks[son_loc_key] + ircfg.del_node(son_loc_key) + ircfg.blocks[loc_key] = new_block + + +def _test_jmp_only(ircfg, loc_key, heads): + """ + If irblock at @loc_key sets only IRDst to an ExprLoc, return the + corresponding loc_key target. + Avoid creating predecssors for heads LocKeys + None in other cases. + + @ircfg: IRCFG instance + @loc_key: LocKey instance of the candidate irblock + @heads: LocKey heads of the graph + + """ + + if loc_key not in ircfg.blocks: + return None + irblock = ircfg.blocks[loc_key] + if len(irblock.assignblks) != 1: + return None + items = list(viewitems(dict(irblock.assignblks[0]))) + if len(items) != 1: + return None + if len(ircfg.successors(loc_key)) != 1: + return None + # Don't create predecessors on heads + dst, src = items[0] + assert dst.is_id("IRDst") + if not src.is_loc(): + return None + dst = src.loc_key + if loc_key in heads: + predecessors = set(ircfg.predecessors(dst)) + predecessors.difference_update(set([loc_key])) + if predecessors: + return None + return dst + + +def _relink_block_node(ircfg, loc_key, son_loc_key, replace_dct): + """ + Link loc_key's parents to parents directly to son_loc_key + """ + for parent in set(ircfg.predecessors(loc_key)): + parent_block = ircfg.blocks.get(parent, None) + if parent_block is None: + continue + + new_block = parent_block.modify_exprs( + lambda expr:expr.replace_expr(replace_dct), + lambda expr:expr.replace_expr(replace_dct) + ) + + # Link parent to new dst + ircfg.add_uniq_edge(parent, son_loc_key) + + # Unlink block + ircfg.blocks[new_block.loc_key] = new_block + ircfg.del_node(loc_key) + + +def _remove_to_son(ircfg, loc_key, son_loc_key): + """ + Merge irblocks; The final block has the @son_loc_key loc_key + Update references + + Condition: + - irblock at @loc_key is a pure jump block + - @loc_key is not an entry point (can be removed) + + @irblock: IRCFG instance + @loc_key: LocKey instance of the parent irblock + @son_loc_key: LocKey instance of the son irblock + """ + + # Ircfg loop => don't mess + if loc_key == son_loc_key: + return False + + # Unlink block destinations + ircfg.del_edge(loc_key, son_loc_key) + + replace_dct = { + ExprLoc(loc_key, ircfg.IRDst.size):ExprLoc(son_loc_key, ircfg.IRDst.size) + } + + _relink_block_node(ircfg, loc_key, son_loc_key, replace_dct) + + ircfg.del_node(loc_key) + del ircfg.blocks[loc_key] + + return True + + +def _remove_to_parent(ircfg, loc_key, son_loc_key): + """ + Merge irblocks; The final block has the @loc_key loc_key + Update references + + Condition: + - irblock at @loc_key is a pure jump block + - @son_loc_key is not an entry point (can be removed) + + @irblock: IRCFG instance + @loc_key: LocKey instance of the parent irblock + @son_loc_key: LocKey instance of the son irblock + """ + + # Ircfg loop => don't mess + if loc_key == son_loc_key: + return False + + # Unlink block destinations + ircfg.del_edge(loc_key, son_loc_key) + + old_irblock = ircfg.blocks[son_loc_key] + new_irblock = IRBlock(loc_key, old_irblock.assignblks) + + ircfg.blocks[son_loc_key] = new_irblock + + ircfg.add_irblock(new_irblock) + + replace_dct = { + ExprLoc(son_loc_key, ircfg.IRDst.size):ExprLoc(loc_key, ircfg.IRDst.size) + } + + _relink_block_node(ircfg, son_loc_key, loc_key, replace_dct) + + + ircfg.del_node(son_loc_key) + del ircfg.blocks[son_loc_key] + + return True + + +def merge_blocks(ircfg, heads): + """ + This function modifies @ircfg to apply the following transformations: + - group an irblock with its son if the irblock has one and only one son and + this son has one and only one parent (spaghetti code). + - if an irblock is only made of an assignment to IRDst with a given label, + this irblock is dropped and its parent destination targets are + updated. The irblock must have a parent (avoid deleting the function head) + - if an irblock is a head of the graph and is only made of an assignment to + IRDst with a given label, this irblock is dropped and its son becomes the + head. References are fixed + + This function avoid creating predecessors on heads + + Return True if at least an irblock has been modified + + @ircfg: IRCFG instance + @heads: loc_key to keep + """ + + modified = False + todo = set(ircfg.nodes()) + while todo: + loc_key = todo.pop() + + # Test merge block + son = _test_merge_next_block(ircfg, loc_key) + if son is not None and son not in heads: + _do_merge_blocks(ircfg, loc_key, son) + todo.add(loc_key) + modified = True + continue + + # Test jmp only block + son = _test_jmp_only(ircfg, loc_key, heads) + if son is not None and loc_key not in heads: + ret = _remove_to_son(ircfg, loc_key, son) + modified |= ret + if ret: + todo.add(loc_key) + continue + + # Test head jmp only block + if (son is not None and + son not in heads and + son in ircfg.blocks): + # jmp only test done previously + ret = _remove_to_parent(ircfg, loc_key, son) + modified |= ret + if ret: + todo.add(loc_key) + continue + + + return modified + + +def remove_empty_assignblks(ircfg): + """ + Remove empty assignblks in irblocks of @ircfg + Return True if at least an irblock has been modified + + @ircfg: IRCFG instance + """ + modified = False + for loc_key, block in list(viewitems(ircfg.blocks)): + irs = [] + block_modified = False + for assignblk in block: + if len(assignblk): + irs.append(assignblk) + else: + block_modified = True + if block_modified: + new_irblock = IRBlock(loc_key, irs) + ircfg.blocks[loc_key] = new_irblock + modified = True + return modified + + +class SSADefUse(DiGraph): + """ + Generate DefUse information from SSA transformation + Links are not valid for ExprMem. + """ + + def add_var_def(self, node, src): + index2dst = self._links.setdefault(node.label, {}) + dst2src = index2dst.setdefault(node.index, {}) + dst2src[node.var] = src + + def add_def_node(self, def_nodes, node, src): + if node.var.is_id(): + def_nodes[node.var] = node + + def add_use_node(self, use_nodes, node, src): + sources = set() + if node.var.is_mem(): + sources.update(node.var.ptr.get_r(mem_read=True)) + sources.update(src.get_r(mem_read=True)) + for source in sources: + if not source.is_mem(): + use_nodes.setdefault(source, set()).add(node) + + def get_node_target(self, node): + return self._links[node.label][node.index][node.var] + + def set_node_target(self, node, src): + self._links[node.label][node.index][node.var] = src + + @classmethod + def from_ssa(cls, ssa): + """ + Return a DefUse DiGraph from a SSA graph + @ssa: SSADiGraph instance + """ + + graph = cls() + # First pass + # Link line to its use and def + def_nodes = {} + use_nodes = {} + graph._links = {} + for lbl in ssa.graph.nodes(): + block = ssa.graph.blocks.get(lbl, None) + if block is None: + continue + for index, assignblk in enumerate(block): + for dst, src in viewitems(assignblk): + node = AssignblkNode(lbl, index, dst) + graph.add_var_def(node, src) + graph.add_def_node(def_nodes, node, src) + graph.add_use_node(use_nodes, node, src) + + for dst, node in viewitems(def_nodes): + graph.add_node(node) + if dst not in use_nodes: + continue + for use in use_nodes[dst]: + graph.add_uniq_edge(node, use) + + return graph + + + + +def expr_test_visit(expr, test): + result = set() + expr.visit( + lambda expr: expr, + lambda expr: test(expr, result) + ) + if result: + return True + else: + return False + + +def expr_has_mem_test(expr, result): + if result: + # Don't analyse if we already found a candidate + return False + if expr.is_mem(): + result.add(expr) + return False + return True + + +def expr_has_mem(expr): + """ + Return True if expr contains at least one memory access + @expr: Expr instance + """ + return expr_test_visit(expr, expr_has_mem_test) + + +class PropagateThroughExprId(object): + """ + Propagate expressions though ExprId + """ + + def has_propagation_barrier(self, assignblks): + """ + Return True if propagation cannot cross the @assignblks + @assignblks: list of AssignBlock to check + """ + for assignblk in assignblks: + for dst, src in viewitems(assignblk): + if src.is_function_call(): + return True + if dst.is_mem(): + return True + return False + + def is_mem_written(self, ssa, node_a, node_b): + """ + Return True if memory is written at least once between @node_a and + @node_b + + @node: AssignblkNode representing the start position + @successor: AssignblkNode representing the end position + """ + + block_b = ssa.graph.blocks[node_b.label] + nodes_to_do = self.compute_reachable_nodes_from_a_to_b(ssa.graph, node_a.label, node_b.label) + + if node_a.label == node_b.label: + # src is dst + assert nodes_to_do == set([node_a.label]) + if self.has_propagation_barrier(block_b.assignblks[node_a.index:node_b.index]): + return True + else: + # Check everyone but node_a.label and node_b.label + for loc in nodes_to_do - set([node_a.label, node_b.label]): + block = ssa.graph.blocks[loc] + if self.has_propagation_barrier(block.assignblks): + return True + # Check node_a.label partially + block_a = ssa.graph.blocks[node_a.label] + if self.has_propagation_barrier(block_a.assignblks[node_a.index:]): + return True + if nodes_to_do.intersection(ssa.graph.successors(node_b.label)): + # There is a path from node_b.label to node_b.label => Check node_b.label fully + if self.has_propagation_barrier(block_b.assignblks): + return True + else: + # Check node_b.label partially + if self.has_propagation_barrier(block_b.assignblks[:node_b.index]): + return True + return False + + def compute_reachable_nodes_from_a_to_b(self, ssa, loc_a, loc_b): + reachables_a = set(ssa.reachable_sons(loc_a)) + reachables_b = set(ssa.reachable_parents_stop_node(loc_b, loc_a)) + return reachables_a.intersection(reachables_b) + + def propagation_allowed(self, ssa, to_replace, node_a, node_b): + """ + Return True if we can replace @node_a source present in @to_replace into + @node_b + + @node_a: AssignblkNode position + @node_b: AssignblkNode position + """ + if not expr_has_mem(to_replace[node_a.var]): + return True + if self.is_mem_written(ssa, node_a, node_b): + return False + return True + + + def get_var_definitions(self, ssa): + """ + Return a dictionary linking variable to its assignment location + @ssa: SSADiGraph instance + """ + ircfg = ssa.graph + def_dct = {} + for node in ircfg.nodes(): + for index, assignblk in enumerate(ircfg.blocks[node]): + for dst, src in viewitems(assignblk): + if not dst.is_id(): + continue + if dst in ssa.immutable_ids: + continue + assert dst not in def_dct + def_dct[dst] = node, index + return def_dct + + def phi_has_identical_sources(self, ssa, def_dct, var): + """ + If phi operation has identical source values, return it; else None + @ssa: SSADiGraph instance + @def_dct: dictionary linking variable to its assignment location + @var: Phi destination variable + """ + loc_key, index = def_dct[var] + sources = ssa.graph.blocks[loc_key][index][var] + assert sources.is_op('Phi') + sources_values = set() + for src in sources.args: + assert src in def_dct + loc_key, index = def_dct[src] + value = ssa.graph.blocks[loc_key][index][src] + sources_values.add(value) + if len(sources_values) != 1: + return None + return list(sources_values)[0] + + def get_candidates(self, ssa, head, max_expr_depth): + def_dct = self.get_var_definitions(ssa) + defuse = SSADefUse.from_ssa(ssa) + to_replace = {} + node_to_reg = {} + for node in defuse.nodes(): + if node.var in ssa.immutable_ids: + continue + src = defuse.get_node_target(node) + if max_expr_depth is not None and len(str(src)) > max_expr_depth: + continue + if src.is_function_call(): + continue + if node.var.is_mem(): + continue + if src.is_op('Phi'): + ret = self.phi_has_identical_sources(ssa, def_dct, node.var) + if ret: + to_replace[node.var] = ret + node_to_reg[node] = node.var + continue + to_replace[node.var] = src + node_to_reg[node] = node.var + return node_to_reg, to_replace, defuse + + def propagate(self, ssa, head, max_expr_depth=None): + """ + Do expression propagation + @ssa: SSADiGraph instance + @head: the head location of the graph + @max_expr_depth: the maximum allowed depth of an expression + """ + node_to_reg, to_replace, defuse = self.get_candidates(ssa, head, max_expr_depth) + modified = False + for node, reg in viewitems(node_to_reg): + for successor in defuse.successors(node): + if not self.propagation_allowed(ssa, to_replace, node, successor): + continue + + node_a = node + node_b = successor + block = ssa.graph.blocks[node_b.label] + + replace = {node_a.var: to_replace[node_a.var]} + # Replace + assignblks = list(block) + assignblk = block[node_b.index] + out = {} + for dst, src in viewitems(assignblk): + if src.is_op('Phi'): + out[dst] = src + continue + + if src.is_mem(): + ptr = src.ptr.replace_expr(replace) + new_src = ExprMem(ptr, src.size) + else: + new_src = src.replace_expr(replace) + + if dst.is_id(): + new_dst = dst + elif dst.is_mem(): + ptr = dst.ptr.replace_expr(replace) + new_dst = ExprMem(ptr, dst.size) + else: + new_dst = dst.replace_expr(replace) + if not (new_dst.is_id() or new_dst.is_mem()): + new_dst = dst + if src != new_src or dst != new_dst: + modified = True + out[new_dst] = new_src + out = AssignBlock(out, assignblk.instr) + assignblks[node_b.index] = out + new_block = IRBlock(block.loc_key, assignblks) + ssa.graph.blocks[block.loc_key] = new_block + + return modified + + + +class PropagateExprIntThroughExprId(PropagateThroughExprId): + """ + Propagate ExprInt though ExprId: classic constant propagation + This is a sub family of PropagateThroughExprId. + It reduces leaves in expressions of a program. + """ + + def get_candidates(self, ssa, head, max_expr_depth): + defuse = SSADefUse.from_ssa(ssa) + + to_replace = {} + node_to_reg = {} + for node in defuse.nodes(): + src = defuse.get_node_target(node) + if not src.is_int(): + continue + if src.is_function_call(): + continue + if node.var.is_mem(): + continue + to_replace[node.var] = src + node_to_reg[node] = node.var + return node_to_reg, to_replace, defuse + + def propagation_allowed(self, ssa, to_replace, node_a, node_b): + """ + Propagating ExprInt is always ok + """ + return True + + +class PropagateThroughExprMem(object): + """ + Propagate through ExprMem in very simple cases: + - if no memory write between source and target + - if source does not contain any memory reference + """ + + def propagate(self, ssa, head, max_expr_depth=None): + ircfg = ssa.graph + todo = set() + modified = False + for block in viewvalues(ircfg.blocks): + for i, assignblk in enumerate(block): + for dst, src in viewitems(assignblk): + if not dst.is_mem(): + continue + if expr_has_mem(src): + continue + todo.add((block.loc_key, i + 1, dst, src)) + ptr = dst.ptr + for size in range(8, dst.size, 8): + todo.add((block.loc_key, i + 1, ExprMem(ptr, size), src[:size])) + + while todo: + loc_key, index, mem_dst, mem_src = todo.pop() + block = ircfg.blocks[loc_key] + assignblks = list(block) + block_modified = False + for i in range(index, len(block)): + assignblk = block[i] + write_mem = False + assignblk_modified = False + out = dict(assignblk) + out_new = {} + for dst, src in viewitems(out): + if dst.is_mem(): + write_mem = True + ptr = dst.ptr.replace_expr({mem_dst:mem_src}) + dst = ExprMem(ptr, dst.size) + src = src.replace_expr({mem_dst:mem_src}) + out_new[dst] = src + if out != out_new: + assignblk_modified = True + + if assignblk_modified: + assignblks[i] = AssignBlock(out_new, assignblk.instr) + block_modified = True + if write_mem: + break + else: + # If no memory written, we may propagate to sons + # if son has only parent + for successor in ircfg.successors(loc_key): + predecessors = ircfg.predecessors(successor) + if len(predecessors) != 1: + continue + todo.add((successor, 0, mem_dst, mem_src)) + + if block_modified: + modified = True + new_block = IRBlock(block.loc_key, assignblks) + ircfg.blocks[block.loc_key] = new_block + return modified + + +def stack_to_reg(expr): + if expr.is_mem(): + ptr = expr.arg + SP = ir_arch_a.sp + if ptr == SP: + return ExprId("STACK.0", expr.size) + elif (ptr.is_op('+') and + len(ptr.args) == 2 and + ptr.args[0] == SP and + ptr.args[1].is_int()): + diff = int(ptr.args[1]) + assert diff % 4 == 0 + diff = (0 - diff) & 0xFFFFFFFF + return ExprId("STACK.%d" % (diff // 4), expr.size) + return False + + +def is_stack_access(ir_arch_a, expr): + if not expr.is_mem(): + return False + ptr = expr.ptr + diff = expr_simp(ptr - ir_arch_a.sp) + if not diff.is_int(): + return False + return expr + + +def visitor_get_stack_accesses(ir_arch_a, expr, stack_vars): + if is_stack_access(ir_arch_a, expr): + stack_vars.add(expr) + return expr + + +def get_stack_accesses(ir_arch_a, expr): + result = set() + expr.visit(lambda expr:visitor_get_stack_accesses(ir_arch_a, expr, result)) + return result + + +def get_interval_length(interval_in): + length = 0 + for start, stop in interval_in.intervals: + length += stop + 1 - start + return length + + +def check_expr_below_stack(ir_arch_a, expr): + """ + Return False if expr pointer is below original stack pointer + @ir_arch_a: ira instance + @expr: Expression instance + """ + ptr = expr.ptr + diff = expr_simp(ptr - ir_arch_a.sp) + if not diff.is_int(): + return True + if int(diff) == 0 or int(expr_simp(diff.msb())) == 0: + return False + return True + + +def retrieve_stack_accesses(ir_arch_a, ircfg): + """ + Walk the ssa graph and find stack based variables. + Return a dictionary linking stack base address to its size/name + @ir_arch_a: ira instance + @ircfg: IRCFG instance + """ + stack_vars = set() + for block in viewvalues(ircfg.blocks): + for assignblk in block: + for dst, src in viewitems(assignblk): + stack_vars.update(get_stack_accesses(ir_arch_a, dst)) + stack_vars.update(get_stack_accesses(ir_arch_a, src)) + stack_vars = [expr for expr in stack_vars if check_expr_below_stack(ir_arch_a, expr)] + + base_to_var = {} + for var in stack_vars: + base_to_var.setdefault(var.ptr, set()).add(var) + + + base_to_interval = {} + for addr, vars in viewitems(base_to_var): + var_interval = interval() + for var in vars: + offset = expr_simp(addr - ir_arch_a.sp) + if not offset.is_int(): + # skip non linear stack offset + continue + + start = int(offset) + stop = int(expr_simp(offset + ExprInt(var.size // 8, offset.size))) + mem = interval([(start, stop-1)]) + var_interval += mem + base_to_interval[addr] = var_interval + if not base_to_interval: + return {} + # Check if not intervals overlap + _, tmp = base_to_interval.popitem() + while base_to_interval: + addr, mem = base_to_interval.popitem() + assert (tmp & mem).empty + tmp += mem + + base_to_info = {} + for addr, vars in viewitems(base_to_var): + name = "var_%d" % (len(base_to_info)) + size = max([var.size for var in vars]) + base_to_info[addr] = size, name + return base_to_info + + +def fix_stack_vars(expr, base_to_info): + """ + Replace local stack accesses in expr using information in @base_to_info + @expr: Expression instance + @base_to_info: dictionary linking stack base address to its size/name + """ + if not expr.is_mem(): + return expr + ptr = expr.ptr + if ptr not in base_to_info: + return expr + size, name = base_to_info[ptr] + var = ExprId(name, size) + if size == expr.size: + return var + assert expr.size < size + return var[:expr.size] + + +def replace_mem_stack_vars(expr, base_to_info): + return expr.visit(lambda expr:fix_stack_vars(expr, base_to_info)) + + +def replace_stack_vars(ir_arch_a, ircfg): + """ + Try to replace stack based memory accesses by variables. + + Hypothesis: the input ircfg must have all it's accesses to stack explicitly + done through the stack register, ie every aliases on those variables is + resolved. + + WARNING: may fail + + @ir_arch_a: ira instance + @ircfg: IRCFG instance + """ + + base_to_info = retrieve_stack_accesses(ir_arch_a, ircfg) + modified = False + for block in list(viewvalues(ircfg.blocks)): + assignblks = [] + for assignblk in block: + out = {} + for dst, src in viewitems(assignblk): + new_dst = dst.visit(lambda expr:replace_mem_stack_vars(expr, base_to_info)) + new_src = src.visit(lambda expr:replace_mem_stack_vars(expr, base_to_info)) + if new_dst != dst or new_src != src: + modified |= True + + out[new_dst] = new_src + + out = AssignBlock(out, assignblk.instr) + assignblks.append(out) + new_block = IRBlock(block.loc_key, assignblks) + ircfg.blocks[block.loc_key] = new_block + return modified + + +def memlookup_test(expr, bs, is_addr_ro_variable, result): + if expr.is_mem() and expr.ptr.is_int(): + ptr = int(expr.ptr) + if is_addr_ro_variable(bs, ptr, expr.size): + result.add(expr) + return False + return True + + +def memlookup_visit(expr, bs, is_addr_ro_variable): + result = set() + expr.visit(lambda expr: expr, + lambda expr: memlookup_test(expr, bs, is_addr_ro_variable, result)) + return result + + +def get_memlookup(expr, bs, is_addr_ro_variable): + return memlookup_visit(expr, bs, is_addr_ro_variable) + + +def read_mem(bs, expr): + ptr = int(expr.ptr) + var_bytes = bs.getbytes(ptr, expr.size // 8)[::-1] + try: + value = int(encode_hex(var_bytes), 16) + except ValueError: + return expr + return ExprInt(value, expr.size) + + +def load_from_int(ir_arch, bs, is_addr_ro_variable): + """ + Replace memory read based on constant with static value + @ir_arch: ira instance + @bs: binstream instance + @is_addr_ro_variable: callback(addr, size) to test memory candidate + """ + + modified = False + for block in list(viewvalues(ir_arch.blocks)): + assignblks = list() + for assignblk in block: + out = {} + for dst, src in viewitems(assignblk): + # Test src + mems = get_memlookup(src, bs, is_addr_ro_variable) + src_new = src + if mems: + replace = {} + for mem in mems: + value = read_mem(bs, mem) + replace[mem] = value + src_new = src.replace_expr(replace) + if src_new != src: + modified = True + # Test dst pointer if dst is mem + if dst.is_mem(): + ptr = dst.ptr + mems = get_memlookup(ptr, bs, is_addr_ro_variable) + if mems: + replace = {} + for mem in mems: + value = read_mem(bs, mem) + replace[mem] = value + ptr_new = ptr.replace_expr(replace) + if ptr_new != ptr: + modified = True + dst = ExprMem(ptr_new, dst.size) + out[dst] = src_new + out = AssignBlock(out, assignblk.instr) + assignblks.append(out) + block = IRBlock(block.loc_key, assignblks) + ir_arch.blocks[block.loc_key] = block + return modified + + +class AssignBlockLivenessInfos(object): + """ + Description of live in / live out of an AssignBlock + """ + + __slots__ = ["gen", "kill", "var_in", "var_out", "live", "assignblk"] + + def __init__(self, assignblk, gen, kill): + self.gen = gen + self.kill = kill + self.var_in = set() + self.var_out = set() + self.live = set() + self.assignblk = assignblk + + def __str__(self): + out = [] + out.append("\tVarIn:" + ", ".join(str(x) for x in self.var_in)) + out.append("\tGen:" + ", ".join(str(x) for x in self.gen)) + out.append("\tKill:" + ", ".join(str(x) for x in self.kill)) + out.append( + '\n'.join( + "\t%s = %s" % (dst, src) + for (dst, src) in viewitems(self.assignblk) + ) + ) + out.append("\tVarOut:" + ", ".join(str(x) for x in self.var_out)) + return '\n'.join(out) + + +class IRBlockLivenessInfos(object): + """ + Description of live in / live out of an AssignBlock + """ + __slots__ = ["loc_key", "infos", "assignblks"] + + + def __init__(self, irblock): + self.loc_key = irblock.loc_key + self.infos = [] + self.assignblks = [] + for assignblk in irblock: + gens, kills = set(), set() + for dst, src in viewitems(assignblk): + expr = ExprAssign(dst, src) + read = expr.get_r(mem_read=True) + write = expr.get_w() + gens.update(read) + kills.update(write) + self.infos.append(AssignBlockLivenessInfos(assignblk, gens, kills)) + self.assignblks.append(assignblk) + + def __getitem__(self, index): + """Getitem on assignblks""" + return self.assignblks.__getitem__(index) + + def __str__(self): + out = [] + out.append("%s:" % self.loc_key) + for info in self.infos: + out.append(str(info)) + out.append('') + return "\n".join(out) + + +class DiGraphLiveness(DiGraph): + """ + DiGraph representing variable liveness + """ + + def __init__(self, ircfg, loc_db=None): + super(DiGraphLiveness, self).__init__() + self.ircfg = ircfg + self.loc_db = loc_db + self._blocks = {} + # Add irblocks gen/kill + for node in ircfg.nodes(): + irblock = ircfg.blocks[node] + irblockinfos = IRBlockLivenessInfos(irblock) + self.add_node(irblockinfos.loc_key) + self.blocks[irblockinfos.loc_key] = irblockinfos + for succ in ircfg.successors(node): + self.add_uniq_edge(node, succ) + for pred in ircfg.predecessors(node): + self.add_uniq_edge(pred, node) + + @property + def blocks(self): + return self._blocks + + def init_var_info(self): + """Add ircfg out regs""" + raise NotImplementedError("Abstract method") + + def node2lines(self, node): + """ + Output liveness information in dot format + """ + if self.loc_db is None: + node_name = str(node) + else: + names = self.loc_db.get_location_names(node) + if not names: + node_name = self.loc_db.pretty_str(node) + else: + node_name = "".join("%s:\n" % name for name in names) + yield self.DotCellDescription( + text="%s" % node_name, + attr={ + 'align': 'center', + 'colspan': 2, + 'bgcolor': 'grey', + } + ) + if node not in self._blocks: + yield [self.DotCellDescription(text="NOT PRESENT", attr={})] + return + + for i, info in enumerate(self._blocks[node].infos): + var_in = "VarIn:" + ", ".join(str(x) for x in info.var_in) + var_out = "VarOut:" + ", ".join(str(x) for x in info.var_out) + + assignmnts = ["%s = %s" % (dst, src) for (dst, src) in viewitems(info.assignblk)] + + if i == 0: + yield self.DotCellDescription( + text=var_in, + attr={ + 'bgcolor': 'green', + } + ) + + for assign in assignmnts: + yield self.DotCellDescription(text=assign, attr={}) + yield self.DotCellDescription( + text=var_out, + attr={ + 'bgcolor': 'green', + } + ) + yield self.DotCellDescription(text="", attr={}) + + def back_propagate_compute(self, block): + """ + Compute the liveness information in the @block. + @block: AssignBlockLivenessInfos instance + """ + infos = block.infos + modified = False + for i in reversed(range(len(infos))): + new_vars = set(infos[i].gen.union(infos[i].var_out.difference(infos[i].kill))) + if infos[i].var_in != new_vars: + modified = True + infos[i].var_in = new_vars + if i > 0 and infos[i - 1].var_out != set(infos[i].var_in): + modified = True + infos[i - 1].var_out = set(infos[i].var_in) + return modified + + def back_propagate_to_parent(self, todo, node, parent): + """ + Back propagate the liveness information from @node to @parent. + @node: loc_key of the source node + @parent: loc_key of the node to update + """ + parent_block = self.blocks[parent] + cur_block = self.blocks[node] + if cur_block.infos[0].var_in == parent_block.infos[-1].var_out: + return + var_info = cur_block.infos[0].var_in.union(parent_block.infos[-1].var_out) + parent_block.infos[-1].var_out = var_info + todo.add(parent) + + def compute_liveness(self): + """ + Compute the liveness information for the digraph. + """ + todo = set(self.leaves()) + while todo: + node = todo.pop() + cur_block = self.blocks[node] + modified = self.back_propagate_compute(cur_block) + if not modified: + continue + # We modified parent in, propagate to parents + for pred in self.predecessors(node): + self.back_propagate_to_parent(todo, node, pred) + return True + + +class DiGraphLivenessIRA(DiGraphLiveness): + """ + DiGraph representing variable liveness for IRA + """ + + def init_var_info(self, ir_arch_a): + """Add ircfg out regs""" + + for node in self.leaves(): + irblock = self.ircfg.blocks[node] + var_out = ir_arch_a.get_out_regs(irblock) + irblock_liveness = self.blocks[node] + irblock_liveness.infos[-1].var_out = var_out + + +def discard_phi_sources(ircfg, deleted_vars): + """ + Remove phi sources in @ircfg belonging to @deleted_vars set + @ircfg: IRCFG instance in ssa form + @deleted_vars: unused phi sources + """ + for block in list(viewvalues(ircfg.blocks)): + if not block.assignblks: + continue + assignblk = block[0] + todo = {} + modified = False + for dst, src in viewitems(assignblk): + if not src.is_op('Phi'): + todo[dst] = src + continue + srcs = set(expr for expr in src.args if expr not in deleted_vars) + assert(srcs) + if len(srcs) > 1: + todo[dst] = srcs + continue + todo[dst] = srcs.pop() + modified = True + if not modified: + continue + assignblks = list(block) + assignblk = dict(assignblk) + assignblk.update(todo) + assignblk = AssignBlock(assignblk, assignblks[0].instr) + assignblks[0] = assignblk + new_irblock = IRBlock(block.loc_key, assignblks) + ircfg.blocks[block.loc_key] = new_irblock + return True + + +def get_unreachable_nodes(ircfg, edges_to_del, heads): + """ + Return the unreachable nodes starting from heads and the associated edges to + be deleted. + + @ircfg: IRCFG instance + @edges_to_del: edges already marked as deleted + heads: locations of graph heads + """ + todo = set(heads) + visited_nodes = set() + new_edges_to_del = set() + while todo: + node = todo.pop() + if node in visited_nodes: + continue + visited_nodes.add(node) + for successor in ircfg.successors(node): + if (node, successor) not in edges_to_del: + todo.add(successor) + all_nodes = set(ircfg.nodes()) + nodes_to_del = all_nodes.difference(visited_nodes) + for node in nodes_to_del: + for successor in ircfg.successors(node): + if successor not in nodes_to_del: + # Frontier: link from a deleted node to a living node + new_edges_to_del.add((node, successor)) + return nodes_to_del, new_edges_to_del + + +def update_phi_with_deleted_edges(ircfg, edges_to_del): + """ + Update phi which have a source present in @edges_to_del + @ssa: IRCFG instance in ssa form + @edges_to_del: edges to delete + """ + + modified = False + blocks = dict(ircfg.blocks) + for loc_src, loc_dst in edges_to_del: + block = ircfg.blocks[loc_dst] + assert block.assignblks + assignblks = list(block) + assignblk = assignblks[0] + out = {} + for dst, phi_sources in viewitems(assignblk): + if not phi_sources.is_op('Phi'): + out = assignblk + break + var_to_parents = get_phi_sources_parent_block( + ircfg, + loc_dst, + phi_sources.args + ) + to_keep = set(phi_sources.args) + for src in phi_sources.args: + parents = var_to_parents[src] + if loc_src in parents: + to_keep.discard(src) + modified = True + assert to_keep + if len(to_keep) == 1: + out[dst] = to_keep.pop() + else: + out[dst] = ExprOp('Phi', *to_keep) + assignblk = AssignBlock(out, assignblks[0].instr) + assignblks[0] = assignblk + new_irblock = IRBlock(loc_dst, assignblks) + blocks[block.loc_key] = new_irblock + + for loc_key, block in viewitems(blocks): + ircfg.blocks[loc_key] = block + return modified + + +def del_unused_edges(ircfg, heads): + """ + Delete non accessible edges in the @ircfg graph. + @ircfg: IRCFG instance in ssa form + @heads: location of the heads of the graph + """ + + deleted_vars = set() + modified = False + edges_to_del_1 = set() + for node in ircfg.nodes(): + successors = set(ircfg.successors(node)) + block = ircfg.blocks[node] + dst = block.dst + possible_dsts = set(solution.value for solution in possible_values(dst)) + if not all(dst.is_loc() for dst in possible_dsts): + continue + possible_dsts = set(dst.loc_key for dst in possible_dsts) + if len(possible_dsts) == len(successors): + continue + dsts_to_del = successors.difference(possible_dsts) + for dst in dsts_to_del: + edges_to_del_1.add((node, dst)) + + # Remove edges and update phi accordingly + # Two cases here: + # - edge is directly linked to a phi node + # - edge is indirect linked to a phi node + nodes_to_del, edges_to_del_2 = get_unreachable_nodes(ircfg, edges_to_del_1, heads) + modified |= update_phi_with_deleted_edges(ircfg, edges_to_del_1.union(edges_to_del_2)) + + for src, dst in edges_to_del_1.union(edges_to_del_2): + ircfg.del_edge(src, dst) + for node in nodes_to_del: + block = ircfg.blocks[node] + ircfg.del_node(node) + for assignblock in block: + for dst in assignblock: + deleted_vars.add(dst) + + if deleted_vars: + modified |= discard_phi_sources(ircfg, deleted_vars) + + return modified + + +class DiGraphLivenessSSA(DiGraphLivenessIRA): + """ + DiGraph representing variable liveness is a SSA graph + """ + def __init__(self, ircfg): + super(DiGraphLivenessSSA, self).__init__(ircfg) + + self.loc_key_to_phi_parents = {} + for irblock in viewvalues(self.blocks): + if not irblock_has_phi(irblock): + continue + out = {} + for sources in viewvalues(irblock[0]): + var_to_parents = get_phi_sources_parent_block(self, irblock.loc_key, sources.args) + for var, var_parents in viewitems(var_to_parents): + out.setdefault(var, set()).update(var_parents) + self.loc_key_to_phi_parents[irblock.loc_key] = out + + def back_propagate_to_parent(self, todo, node, parent): + parent_block = self.blocks[parent] + cur_block = self.blocks[node] + irblock = self.ircfg.blocks[node] + if cur_block.infos[0].var_in == parent_block.infos[-1].var_out: + return + var_info = cur_block.infos[0].var_in.union(parent_block.infos[-1].var_out) + + if irblock_has_phi(irblock): + # Remove phi special case + out = set() + phi_sources = self.loc_key_to_phi_parents[irblock.loc_key] + for var in var_info: + if var not in phi_sources: + out.add(var) + continue + if parent in phi_sources[var]: + out.add(var) + var_info = out + + parent_block.infos[-1].var_out = var_info + todo.add(parent) diff --git a/miasm/analysis/debugging.py b/miasm/analysis/debugging.py new file mode 100644 index 00000000..3cbbf482 --- /dev/null +++ b/miasm/analysis/debugging.py @@ -0,0 +1,499 @@ +from __future__ import print_function +from builtins import map +from builtins import range +import cmd +from future.utils import viewitems + +from miasm.core.utils import hexdump +from miasm.core.interval import interval +import miasm.jitter.csts as csts +from miasm.jitter.jitload import ExceptionHandle + + +class DebugBreakpoint(object): + + "Debug Breakpoint parent class" + pass + + +class DebugBreakpointSoft(DebugBreakpoint): + + "Stand for software breakpoint" + + def __init__(self, addr): + self.addr = addr + + def __str__(self): + return "Soft BP @0x%08x" % self.addr + + +class DebugBreakpointTerminate(DebugBreakpoint): + "Stand for an execution termination" + + def __init__(self, status): + self.status = status + + def __str__(self): + return "Terminate with %s" % self.status + + +class DebugBreakpointMemory(DebugBreakpoint): + + "Stand for memory breakpoint" + + type2str = {csts.BREAKPOINT_READ: "R", + csts.BREAKPOINT_WRITE: "W"} + + def __init__(self, addr, size, access_type): + self.addr = addr + self.access_type = access_type + self.size = size + + def __str__(self): + bp_type = "" + for k, v in viewitems(self.type2str): + if k & self.access_type != 0: + bp_type += v + return "Memory BP @0x%08x, Size 0x%08x, Type %s" % ( + self.addr, + self.size, + bp_type + ) + + @classmethod + def get_access_type(cls, read=False, write=False): + value = 0 + for k, v in viewitems(cls.type2str): + if v == "R" and read is True: + value += k + if v == "W" and write is True: + value += k + return value + + +class Debugguer(object): + + "Debugguer linked with a Jitter instance" + + def __init__(self, myjit): + "myjit : jitter instance" + self.myjit = myjit + self.bp_list = [] # DebugBreakpointSoft list + self.hw_bp_list = [] # DebugBreakpointHard list + self.mem_watched = [] # Memory areas watched + + def init_run(self, addr): + self.myjit.init_run(addr) + + def add_breakpoint(self, addr): + "Add bp @addr" + bp = DebugBreakpointSoft(addr) + func = lambda x: bp + bp.func = func + self.bp_list.append(bp) + self.myjit.add_breakpoint(addr, func) + + def init_memory_breakpoint(self): + "Set exception handler on EXCEPT_BREAKPOINT_MEMORY" + raise NotImplementedError("Not implemented") + + def add_memory_breakpoint(self, addr, size, read=False, write=False): + "add mem bp @[addr, addr + size], on read/write/both" + access_type = DebugBreakpointMemory.get_access_type(read=read, + write=write) + dbm = DebugBreakpointMemory(addr, size, access_type) + self.hw_bp_list.append(dbm) + self.myjit.vm.add_memory_breakpoint(addr, size, access_type) + + def remove_breakpoint(self, dbs): + "remove the DebugBreakpointSoft instance" + self.bp_list.remove(dbs) + self.myjit.remove_breakpoints_by_callback(dbs.func) + + def remove_breakpoint_by_addr(self, addr): + "remove breakpoints @ addr" + for bp in self.get_breakpoint_by_addr(addr): + self.remove_breakpoint(bp) + + def remove_memory_breakpoint(self, dbm): + "remove the DebugBreakpointMemory instance" + self.hw_bp_list.remove(dbm) + self.myjit.vm.remove_memory_breakpoint(dbm.addr, dbm.access_type) + + def remove_memory_breakpoint_by_addr_access(self, addr, read=False, + write=False): + "remove breakpoints @ addr" + access_type = DebugBreakpointMemory.get_access_type(read=read, + write=write) + for bp in self.hw_bp_list: + if bp.addr == addr and bp.access_type == access_type: + self.remove_memory_breakpoint(bp) + + def get_breakpoint_by_addr(self, addr): + ret = [] + for dbgsoft in self.bp_list: + if dbgsoft.addr == addr: + ret.append(dbgsoft) + return ret + + def get_breakpoints(self): + return self.bp_list + + def active_trace(self, mn=None, regs=None, newbloc=None): + if mn is not None: + self.myjit.jit.log_mn = mn + if regs is not None: + self.myjit.jit.log_regs = regs + if newbloc is not None: + self.myjit.jit.log_newbloc = newbloc + + def handle_exception(self, res): + if not res: + # A breakpoint has stopped the execution + return DebugBreakpointTerminate(res) + + if isinstance(res, DebugBreakpointSoft): + print("Breakpoint reached @0x%08x" % res.addr) + elif isinstance(res, ExceptionHandle): + if res == ExceptionHandle.memoryBreakpoint(): + print("Memory breakpoint reached!") + + # Remove flag + except_flag = self.myjit.vm.get_exception() + self.myjit.vm.set_exception(except_flag ^ res.except_flag) + + else: + raise NotImplementedError("Unknown Except") + else: + raise NotImplementedError("type res") + + # Repropagate res + return res + + def step(self): + "Step in jit" + + self.myjit.jit.set_options(jit_maxline=1) + # Reset all jitted blocks + self.myjit.jit.clear_jitted_blocks() + + res = self.myjit.continue_run(step=True) + self.handle_exception(res) + + self.myjit.jit.set_options(jit_maxline=50) + self.on_step() + + return res + + def run(self): + status = self.myjit.continue_run() + return self.handle_exception(status) + + def get_mem(self, addr, size=0xF): + "hexdump @addr, size" + + hexdump(self.myjit.vm.get_mem(addr, size)) + + def get_mem_raw(self, addr, size=0xF): + "hexdump @addr, size" + return self.myjit.vm.get_mem(addr, size) + + def watch_mem(self, addr, size=0xF): + self.mem_watched.append((addr, size)) + + def on_step(self): + for addr, size in self.mem_watched: + print("@0x%08x:" % addr) + self.get_mem(addr, size) + + def get_reg_value(self, reg_name): + return getattr(self.myjit.cpu, reg_name) + + def set_reg_value(self, reg_name, value): + + # Handle PC case + if reg_name == self.myjit.ir_arch.pc.name: + self.init_run(value) + + setattr(self.myjit.cpu, reg_name, value) + + def get_gpreg_all(self): + "Return general purposes registers" + return self.myjit.cpu.get_gpreg() + + +class DebugCmd(cmd.Cmd, object): + + "CommandLineInterpreter for Debugguer instance" + + color_g = '\033[92m' + color_e = '\033[0m' + color_b = '\033[94m' + color_r = '\033[91m' + + intro = color_g + "=== Miasm2 Debugging shell ===\nIf you need help, " + intro += "type 'help' or '?'" + color_e + prompt = color_b + "$> " + color_e + + def __init__(self, dbg): + "dbg : Debugguer" + self.dbg = dbg + super(DebugCmd, self).__init__() + + # Debug methods + + def print_breakpoints(self): + bp_list = self.dbg.bp_list + if len(bp_list) == 0: + print("No breakpoints.") + else: + for i, b in enumerate(bp_list): + print("%d\t0x%08x" % (i, b.addr)) + + def print_watchmems(self): + watch_list = self.dbg.mem_watched + if len(watch_list) == 0: + print("No memory watchpoints.") + else: + print("Num\tAddress \tSize") + for i, w in enumerate(watch_list): + addr, size = w + print("%d\t0x%08x\t0x%08x" % (i, addr, size)) + + def print_registers(self): + regs = self.dbg.get_gpreg_all() + + # Display settings + title1 = "Registers" + title2 = "Values" + max_name_len = max(map(len, list(regs) + [title1])) + + # Print value table + s = "%s%s | %s" % ( + title1, " " * (max_name_len - len(title1)), title2) + print(s) + print("-" * len(s)) + for name, value in sorted(viewitems(regs), key=lambda x: x[0]): + print( + "%s%s | %s" % ( + name, + " " * (max_name_len - len(name)), + hex(value).replace("L", "") + ) + ) + + def add_breakpoints(self, bp_addr): + for addr in bp_addr: + addr = int(addr, 0) + + good = True + for i, dbg_obj in enumerate(self.dbg.bp_list): + if dbg_obj.addr == addr: + good = False + break + if good is False: + print("Breakpoint 0x%08x already set (%d)" % (addr, i)) + else: + l = len(self.dbg.bp_list) + self.dbg.add_breakpoint(addr) + print("Breakpoint 0x%08x successfully added ! (%d)" % (addr, l)) + + display_mode = { + "mn": None, + "regs": None, + "newbloc": None + } + + def update_display_mode(self): + self.display_mode = { + "mn": self.dbg.myjit.jit.log_mn, + "regs": self.dbg.myjit.jit.log_regs, + "newbloc": self.dbg.myjit.jit.log_newbloc + } + + # Command line methods + def print_warning(self, s): + print(self.color_r + s + self.color_e) + + def onecmd(self, line): + cmd_translate = { + "h": "help", + "q": "exit", + "e": "exit", + "!": "exec", + "r": "run", + "i": "info", + "b": "breakpoint", + "s": "step", + "d": "dump" + } + + if len(line) >= 2 and \ + line[1] == " " and \ + line[:1] in cmd_translate: + line = cmd_translate[line[:1]] + line[1:] + + if len(line) == 1 and line in cmd_translate: + line = cmd_translate[line] + + r = super(DebugCmd, self).onecmd(line) + return r + + def can_exit(self): + return True + + def do_display(self, arg): + if arg == "": + self.help_display() + return + + args = arg.split(" ") + if args[-1].lower() not in ["on", "off"]: + self.print_warning("/!\ %s not in 'on' / 'off'" % args[-1]) + return + mode = args[-1].lower() == "on" + d = {} + for a in args[:-1]: + d[a] = mode + self.dbg.active_trace(**d) + self.update_display_mode() + + def help_display(self): + print("Enable/Disable tracing.") + print("Usage: display ... on|off") + print("Available modes are:") + for k in self.display_mode: + print("\t%s" % k) + print("Use 'info display' to get current values") + + def do_watchmem(self, arg): + if arg == "": + self.help_watchmem() + return + + args = arg.split(" ") + if len(args) >= 2: + size = int(args[1], 0) + else: + size = 0xF + + addr = int(args[0], 0) + + self.dbg.watch_mem(addr, size) + + def help_watchmem(self): + print("Add a memory watcher.") + print("Usage: watchmem [size]") + print("Use 'info watchmem' to get current memory watchers") + + def do_info(self, arg): + av_info = [ + "registers", + "display", + "breakpoints", + "watchmem" + ] + + if arg == "": + print("'info' must be followed by the name of an info command.") + print("List of info subcommands:") + for k in av_info: + print("\t%s" % k) + + if arg.startswith("b"): + # Breakpoint + self.print_breakpoints() + + if arg.startswith("d"): + # Display + self.update_display_mode() + for k, v in viewitems(self.display_mode): + print("%s\t\t%s" % (k, v)) + + if arg.startswith("w"): + # Watchmem + self.print_watchmems() + + if arg.startswith("r"): + # Registers + self.print_registers() + + def help_info(self): + print("Generic command for showing things about the program being") + print("debugged. Use 'info' without arguments to get the list of") + print("available subcommands.") + + def do_breakpoint(self, arg): + if arg == "": + self.help_breakpoint() + else: + addrs = arg.split(" ") + self.add_breakpoints(addrs) + + def help_breakpoint(self): + print("Add breakpoints to argument addresses.") + print("Example:") + print("\tbreakpoint 0x11223344") + print("\tbreakpoint 1122 0xabcd") + + def do_step(self, arg): + if arg == "": + nb = 1 + else: + nb = int(arg) + for _ in range(nb): + self.dbg.step() + + def help_step(self): + print("Step program until it reaches a different source line.") + print("Argument N means do this N times (or till program stops") + print("for another reason).") + + def do_dump(self, arg): + if arg == "": + self.help_dump() + else: + args = arg.split(" ") + if len(args) >= 2: + size = int(args[1], 0) + else: + size = 0xF + addr = int(args[0], 0) + + self.dbg.get_mem(addr, size) + + def help_dump(self): + print("Dump [size]. Dump size bytes at addr.") + + def do_run(self, _): + self.dbg.run() + + def help_run(self): + print("Launch or continue the current program") + + def do_exit(self, _): + return True + + def do_exec(self, line): + try: + print(eval(line)) + except Exception as error: + print("*** Error: %s" % error) + + def help_exec(self): + print("Exec a python command.") + print("You can also use '!' shortcut.") + + def help_exit(self): + print("Exit the interpreter.") + print("You can also use the Ctrl-D shortcut.") + + def help_help(self): + print("Print help") + + def postloop(self): + print('\nGoodbye !') + super(DebugCmd, self).postloop() + + do_EOF = do_exit + help_EOF = help_exit diff --git a/miasm/analysis/depgraph.py b/miasm/analysis/depgraph.py new file mode 100644 index 00000000..219a32ee --- /dev/null +++ b/miasm/analysis/depgraph.py @@ -0,0 +1,651 @@ +"""Provide dependency graph""" + +from functools import total_ordering + +from future.utils import viewitems + +from miasm.expression.expression import ExprInt, ExprLoc, ExprAssign +from miasm.core.graph import DiGraph +from miasm.core.locationdb import LocationDB +from miasm.expression.simplifications import expr_simp_explicit +from miasm.ir.symbexec import SymbolicExecutionEngine +from miasm.ir.ir import IRBlock, AssignBlock +from miasm.ir.translators import Translator +from miasm.expression.expression_helper import possible_values + +try: + import z3 +except ImportError: + pass + +@total_ordering +class DependencyNode(object): + + """Node elements of a DependencyGraph + + A dependency node stands for the dependency on the @element at line number + @line_nb in the IRblock named @loc_key, *before* the evaluation of this + line. + """ + + __slots__ = ["_loc_key", "_element", "_line_nb", "_hash"] + + def __init__(self, loc_key, element, line_nb): + """Create a dependency node with: + @loc_key: LocKey instance + @element: Expr instance + @line_nb: int + """ + self._loc_key = loc_key + self._element = element + self._line_nb = line_nb + self._hash = hash( + (self._loc_key, self._element, self._line_nb)) + + def __hash__(self): + """Returns a hash of @self to uniquely identify @self""" + return self._hash + + def __eq__(self, depnode): + """Returns True if @self and @depnode are equals.""" + if not isinstance(depnode, self.__class__): + return False + return (self.loc_key == depnode.loc_key and + self.element == depnode.element and + self.line_nb == depnode.line_nb) + + def __ne__(self, depnode): + # required Python 2.7.14 + return not self == depnode + + def __lt__(self, node): + """Compares @self with @node.""" + if not isinstance(node, self.__class__): + return NotImplemented + + return ((self.loc_key, self.element, self.line_nb) < + (node.loc_key, node.element, node.line_nb)) + + def __str__(self): + """Returns a string representation of DependencyNode""" + return "<%s %s %s %s>" % (self.__class__.__name__, + self.loc_key, self.element, + self.line_nb) + + def __repr__(self): + """Returns a string representation of DependencyNode""" + return self.__str__() + + @property + def loc_key(self): + "Name of the current IRBlock" + return self._loc_key + + @property + def element(self): + "Current tracked Expr" + return self._element + + @property + def line_nb(self): + "Line in the current IRBlock" + return self._line_nb + + +class DependencyState(object): + + """ + Store intermediate depnodes states during dependencygraph analysis + """ + + def __init__(self, loc_key, pending, line_nb=None): + self.loc_key = loc_key + self.history = [loc_key] + self.pending = {k: set(v) for k, v in viewitems(pending)} + self.line_nb = line_nb + self.links = set() + + # Init lazy elements + self._graph = None + + def __repr__(self): + return "" % ( + self.loc_key, + self.pending, + self.links + ) + + def extend(self, loc_key): + """Return a copy of itself, with itself in history + @loc_key: LocKey instance for the new DependencyState's loc_key + """ + new_state = self.__class__(loc_key, self.pending) + new_state.links = set(self.links) + new_state.history = self.history + [loc_key] + return new_state + + def get_done_state(self): + """Returns immutable object representing current state""" + return (self.loc_key, frozenset(self.links)) + + def as_graph(self): + """Generates a Digraph of dependencies""" + graph = DiGraph() + for node_a, node_b in self.links: + if not node_b: + graph.add_node(node_a) + else: + graph.add_edge(node_a, node_b) + for parent, sons in viewitems(self.pending): + for son in sons: + graph.add_edge(parent, son) + return graph + + @property + def graph(self): + """Returns a DiGraph instance representing the DependencyGraph""" + if self._graph is None: + self._graph = self.as_graph() + return self._graph + + def remove_pendings(self, nodes): + """Remove resolved @nodes""" + for node in nodes: + del self.pending[node] + + def add_pendings(self, future_pending): + """Add @future_pending to the state""" + for node, depnodes in viewitems(future_pending): + if node not in self.pending: + self.pending[node] = depnodes + else: + self.pending[node].update(depnodes) + + def link_element(self, element, line_nb): + """Link element to its dependencies + @element: the element to link + @line_nb: the element's line + """ + + depnode = DependencyNode(self.loc_key, element, line_nb) + if not self.pending[element]: + # Create start node + self.links.add((depnode, None)) + else: + # Link element to its known dependencies + for node_son in self.pending[element]: + self.links.add((depnode, node_son)) + + def link_dependencies(self, element, line_nb, dependencies, + future_pending): + """Link unfollowed dependencies and create remaining pending elements. + @element: the element to link + @line_nb: the element's line + @dependencies: the element's dependencies + @future_pending: the future dependencies + """ + + depnode = DependencyNode(self.loc_key, element, line_nb) + + # Update pending, add link to unfollowed nodes + for dependency in dependencies: + if not dependency.follow: + # Add non followed dependencies to the dependency graph + parent = DependencyNode( + self.loc_key, dependency.element, line_nb) + self.links.add((parent, depnode)) + continue + # Create future pending between new dependency and the current + # element + future_pending.setdefault(dependency.element, set()).add(depnode) + + +class DependencyResult(DependencyState): + + """Container and methods for DependencyGraph results""" + + def __init__(self, ircfg, initial_state, state, inputs): + + super(DependencyResult, self).__init__(state.loc_key, state.pending) + self.initial_state = initial_state + self.history = state.history + self.pending = state.pending + self.line_nb = state.line_nb + self.inputs = inputs + self.links = state.links + self._ircfg = ircfg + + # Init lazy elements + self._has_loop = None + + @property + def unresolved(self): + """Set of nodes whose dependencies weren't found""" + return set(element for element in self.pending + if element != self._ircfg.IRDst) + + @property + def relevant_nodes(self): + """Set of nodes directly and indirectly influencing inputs""" + output = set() + for node_a, node_b in self.links: + output.add(node_a) + if node_b is not None: + output.add(node_b) + return output + + @property + def relevant_loc_keys(self): + """List of loc_keys containing nodes influencing inputs. + The history order is preserved.""" + # Get used loc_keys + used_loc_keys = set(depnode.loc_key for depnode in self.relevant_nodes) + + # Keep history order + output = [] + for loc_key in self.history: + if loc_key in used_loc_keys: + output.append(loc_key) + + return output + + @property + def has_loop(self): + """True iff there is at least one data dependencies cycle (regarding + the associated depgraph)""" + if self._has_loop is None: + self._has_loop = self.graph.has_loop() + return self._has_loop + + def irblock_slice(self, irb, max_line=None): + """Slice of the dependency nodes on the irblock @irb + @irb: irbloc instance + """ + + assignblks = [] + line2elements = {} + for depnode in self.relevant_nodes: + if depnode.loc_key != irb.loc_key: + continue + line2elements.setdefault(depnode.line_nb, + set()).add(depnode.element) + + for line_nb, elements in sorted(viewitems(line2elements)): + if max_line is not None and line_nb >= max_line: + break + assignmnts = {} + for element in elements: + if element in irb[line_nb]: + # constants, loc_key, ... are not in destination + assignmnts[element] = irb[line_nb][element] + assignblks.append(AssignBlock(assignmnts)) + + return IRBlock(irb.loc_key, assignblks) + + def emul(self, ir_arch, ctx=None, step=False): + """Symbolic execution of relevant nodes according to the history + Return the values of inputs nodes' elements + @ir_arch: IntermediateRepresentation instance + @ctx: (optional) Initial context as dictionary + @step: (optional) Verbose execution + Warning: The emulation is not sound if the inputs nodes depend on loop + variant. + """ + # Init + ctx_init = {} + if ctx is not None: + ctx_init.update(ctx) + assignblks = [] + + # Build a single assignment block according to history + last_index = len(self.relevant_loc_keys) + for index, loc_key in enumerate(reversed(self.relevant_loc_keys), 1): + if index == last_index and loc_key == self.initial_state.loc_key: + line_nb = self.initial_state.line_nb + else: + line_nb = None + assignblks += self.irblock_slice(self._ircfg.blocks[loc_key], + line_nb).assignblks + + # Eval the block + loc_db = LocationDB() + temp_loc = loc_db.get_or_create_name_location("Temp") + symb_exec = SymbolicExecutionEngine(ir_arch, ctx_init) + symb_exec.eval_updt_irblock(IRBlock(temp_loc, assignblks), step=step) + + # Return only inputs values (others could be wrongs) + return {element: symb_exec.symbols[element] + for element in self.inputs} + + +class DependencyResultImplicit(DependencyResult): + + """Stand for a result of a DependencyGraph with implicit option + + Provide path constraints using the z3 solver""" + # Z3 Solver instance + _solver = None + + unsat_expr = ExprAssign(ExprInt(0, 1), ExprInt(1, 1)) + + def _gen_path_constraints(self, translator, expr, expected): + """Generate path constraint from @expr. Handle special case with + generated loc_keys + """ + out = [] + expected = self._ircfg.loc_db.canonize_to_exprloc(expected) + expected_is_loc_key = expected.is_loc() + for consval in possible_values(expr): + value = self._ircfg.loc_db.canonize_to_exprloc(consval.value) + if expected_is_loc_key and value != expected: + continue + if not expected_is_loc_key and value.is_loc_key(): + continue + + conds = z3.And(*[translator.from_expr(cond.to_constraint()) + for cond in consval.constraints]) + if expected != value: + conds = z3.And( + conds, + translator.from_expr( + ExprAssign(value, + expected)) + ) + out.append(conds) + + if out: + conds = z3.Or(*out) + else: + # Ex: expr: lblgen1, expected: 0x1234 + # -> Avoid unconsistent solution lblgen1 = 0x1234 + conds = translator.from_expr(self.unsat_expr) + return conds + + def emul(self, ir_arch, ctx=None, step=False): + # Init + ctx_init = {} + if ctx is not None: + ctx_init.update(ctx) + solver = z3.Solver() + symb_exec = SymbolicExecutionEngine(ir_arch, ctx_init) + history = self.history[::-1] + history_size = len(history) + translator = Translator.to_language("z3") + size = self._ircfg.IRDst.size + + for hist_nb, loc_key in enumerate(history, 1): + if hist_nb == history_size and loc_key == self.initial_state.loc_key: + line_nb = self.initial_state.line_nb + else: + line_nb = None + irb = self.irblock_slice(self._ircfg.blocks[loc_key], line_nb) + + # Emul the block and get back destination + dst = symb_exec.eval_updt_irblock(irb, step=step) + + # Add constraint + if hist_nb < history_size: + next_loc_key = history[hist_nb] + expected = symb_exec.eval_expr(ExprLoc(next_loc_key, size)) + solver.add(self._gen_path_constraints(translator, dst, expected)) + # Save the solver + self._solver = solver + + # Return only inputs values (others could be wrongs) + return { + element: symb_exec.eval_expr(element) + for element in self.inputs + } + + @property + def is_satisfiable(self): + """Return True iff the solution path admits at least one solution + PRE: 'emul' + """ + return self._solver.check() == z3.sat + + @property + def constraints(self): + """If satisfiable, return a valid solution as a Z3 Model instance""" + if not self.is_satisfiable: + raise ValueError("Unsatisfiable") + return self._solver.model() + + +class FollowExpr(object): + + "Stand for an element (expression, depnode, ...) to follow or not" + __slots__ = ["follow", "element"] + + def __init__(self, follow, element): + self.follow = follow + self.element = element + + def __repr__(self): + return '%s(%r, %r)' % (self.__class__.__name__, self.follow, self.element) + + @staticmethod + def to_depnodes(follow_exprs, loc_key, line): + """Build a set of FollowExpr(DependencyNode) from the @follow_exprs set + of FollowExpr + @follow_exprs: set of FollowExpr + @loc_key: LocKey instance + @line: integer + """ + dependencies = set() + for follow_expr in follow_exprs: + dependencies.add(FollowExpr(follow_expr.follow, + DependencyNode(loc_key, + follow_expr.element, + line))) + return dependencies + + @staticmethod + def extract_depnodes(follow_exprs, only_follow=False): + """Extract depnodes from a set of FollowExpr(Depnodes) + @only_follow: (optional) extract only elements to follow""" + return set(follow_expr.element + for follow_expr in follow_exprs + if not(only_follow) or follow_expr.follow) + + +class DependencyGraph(object): + + """Implementation of a dependency graph + + A dependency graph contains DependencyNode as nodes. The oriented edges + stand for a dependency. + The dependency graph is made of the lines of a group of IRblock + *explicitly* or *implicitly* involved in the equation of given element. + """ + + def __init__(self, ircfg, + implicit=False, apply_simp=True, follow_mem=True, + follow_call=True): + """Create a DependencyGraph linked to @ircfg + + @ircfg: IRCFG instance + @implicit: (optional) Track IRDst for each block in the resulting path + + Following arguments define filters used to generate dependencies + @apply_simp: (optional) Apply expr_simp_explicit + @follow_mem: (optional) Track memory syntactically + @follow_call: (optional) Track through "call" + """ + # Init + self._ircfg = ircfg + self._implicit = implicit + + # Create callback filters. The order is relevant. + self._cb_follow = [] + if apply_simp: + self._cb_follow.append(self._follow_simp_expr) + self._cb_follow.append(lambda exprs: self._follow_exprs(exprs, + follow_mem, + follow_call)) + self._cb_follow.append(self._follow_no_loc_key) + + @staticmethod + def _follow_simp_expr(exprs): + """Simplify expression so avoid tracking useless elements, + as: XOR EAX, EAX + """ + follow = set() + for expr in exprs: + follow.add(expr_simp_explicit(expr)) + return follow, set() + + @staticmethod + def get_expr(expr, follow, nofollow): + """Update @follow/@nofollow according to insteresting nodes + Returns same expression (non modifier visitor). + + @expr: expression to handle + @follow: set of nodes to follow + @nofollow: set of nodes not to follow + """ + if expr.is_id(): + follow.add(expr) + elif expr.is_int(): + nofollow.add(expr) + elif expr.is_mem(): + follow.add(expr) + return expr + + @staticmethod + def follow_expr(expr, _, nofollow, follow_mem=False, follow_call=False): + """Returns True if we must visit sub expressions. + @expr: expression to browse + @follow: set of nodes to follow + @nofollow: set of nodes not to follow + @follow_mem: force the visit of memory sub expressions + @follow_call: force the visit of call sub expressions + """ + if not follow_mem and expr.is_mem(): + nofollow.add(expr) + return False + if not follow_call and expr.is_function_call(): + nofollow.add(expr) + return False + return True + + @classmethod + def _follow_exprs(cls, exprs, follow_mem=False, follow_call=False): + """Extracts subnodes from exprs and returns followed/non followed + expressions according to @follow_mem/@follow_call + + """ + follow, nofollow = set(), set() + for expr in exprs: + expr.visit(lambda x: cls.get_expr(x, follow, nofollow), + lambda x: cls.follow_expr(x, follow, nofollow, + follow_mem, follow_call)) + return follow, nofollow + + @staticmethod + def _follow_no_loc_key(exprs): + """Do not follow loc_keys""" + follow = set() + for expr in exprs: + if expr.is_int() or expr.is_loc(): + continue + follow.add(expr) + + return follow, set() + + def _follow_apply_cb(self, expr): + """Apply callback functions to @expr + @expr : FollowExpr instance""" + follow = set([expr]) + nofollow = set() + + for callback in self._cb_follow: + follow, nofollow_tmp = callback(follow) + nofollow.update(nofollow_tmp) + + out = set(FollowExpr(True, expr) for expr in follow) + out.update(set(FollowExpr(False, expr) for expr in nofollow)) + return out + + def _track_exprs(self, state, assignblk, line_nb): + """Track pending expression in an assignblock""" + future_pending = {} + node_resolved = set() + for dst, src in viewitems(assignblk): + # Only track pending + if dst not in state.pending: + continue + # Track IRDst in implicit mode only + if dst == self._ircfg.IRDst and not self._implicit: + continue + assert dst not in node_resolved + node_resolved.add(dst) + dependencies = self._follow_apply_cb(src) + + state.link_element(dst, line_nb) + state.link_dependencies(dst, line_nb, + dependencies, future_pending) + + # Update pending nodes + state.remove_pendings(node_resolved) + state.add_pendings(future_pending) + + def _compute_intrablock(self, state): + """Follow dependencies tracked in @state in the current irbloc + @state: instance of DependencyState""" + + irb = self._ircfg.blocks[state.loc_key] + line_nb = len(irb) if state.line_nb is None else state.line_nb + + for cur_line_nb, assignblk in reversed(list(enumerate(irb[:line_nb]))): + self._track_exprs(state, assignblk, cur_line_nb) + + def get(self, loc_key, elements, line_nb, heads): + """Compute the dependencies of @elements at line number @line_nb in + the block named @loc_key in the current IRCFG, before the execution of + this line. Dependency check stop if one of @heads is reached + @loc_key: LocKey instance + @element: set of Expr instances + @line_nb: int + @heads: set of LocKey instances + Return an iterator on DiGraph(DependencyNode) + """ + # Init the algorithm + inputs = {element: set() for element in elements} + initial_state = DependencyState(loc_key, inputs, line_nb) + todo = set([initial_state]) + done = set() + dpResultcls = DependencyResultImplicit if self._implicit else DependencyResult + + while todo: + state = todo.pop() + self._compute_intrablock(state) + done_state = state.get_done_state() + if done_state in done: + continue + done.add(done_state) + if (not state.pending or + state.loc_key in heads or + not self._ircfg.predecessors(state.loc_key)): + yield dpResultcls(self._ircfg, initial_state, state, elements) + if not state.pending: + continue + + if self._implicit: + # Force IRDst to be tracked, except in the input block + state.pending[self._ircfg.IRDst] = set() + + # Propagate state to parents + for pred in self._ircfg.predecessors_iter(state.loc_key): + todo.add(state.extend(pred)) + + def get_from_depnodes(self, depnodes, heads): + """Alias for the get() method. Use the attributes of @depnodes as + argument. + PRE: Loc_Keys and lines of depnodes have to be equals + @depnodes: set of DependencyNode instances + @heads: set of LocKey instances + """ + lead = list(depnodes)[0] + elements = set(depnode.element for depnode in depnodes) + return self.get(lead.loc_key, elements, lead.line_nb, heads) diff --git a/miasm/analysis/disasm_cb.py b/miasm/analysis/disasm_cb.py new file mode 100644 index 00000000..f3480598 --- /dev/null +++ b/miasm/analysis/disasm_cb.py @@ -0,0 +1,128 @@ +#-*- coding:utf-8 -*- + +from __future__ import print_function + +from future.utils import viewvalues + +from miasm.expression.expression import ExprInt, ExprId, ExprMem, match_expr +from miasm.expression.simplifications import expr_simp +from miasm.core.asmblock import AsmConstraintNext, AsmConstraintTo +from miasm.core.locationdb import LocationDB +from miasm.core.utils import upck32 + + +def get_ira(mnemo, attrib): + arch = mnemo.name, attrib + if arch == ("arm", "arm"): + from miasm.arch.arm.ira import ir_a_arm_base as ira + elif arch == ("x86", 32): + from miasm.arch.x86.ira import ir_a_x86_32 as ira + elif arch == ("x86", 64): + from miasm.arch.x86.ira import ir_a_x86_64 as ira + else: + raise ValueError('unknown architecture: %s' % mnemo.name) + return ira + + +def arm_guess_subcall( + mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, loc_db): + ira = get_ira(mnemo, attrib) + + sp = LocationDB() + ir_arch = ira(sp) + ircfg = ira.new_ircfg() + print('###') + print(cur_bloc) + ir_arch.add_asmblock_to_ircfg(cur_bloc, ircfg) + + to_add = set() + for irblock in viewvalues(ircfg.blocks): + pc_val = None + lr_val = None + for exprs in irblock: + for e in exprs: + if e.dst == ir_arch.pc: + pc_val = e.src + if e.dst == mnemo.regs.LR: + lr_val = e.src + if pc_val is None or lr_val is None: + continue + if not isinstance(lr_val, ExprInt): + continue + + l = cur_bloc.lines[-1] + if lr_val.arg != l.offset + l.l: + continue + l = loc_db.get_or_create_offset_location(int(lr_val)) + c = AsmConstraintNext(l) + + to_add.add(c) + offsets_to_dis.add(int(lr_val)) + + for c in to_add: + cur_bloc.addto(c) + + +def arm_guess_jump_table( + mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, loc_db): + ira = get_ira(mnemo, attrib) + + jra = ExprId('jra') + jrb = ExprId('jrb') + + sp = LocationDB() + ir_arch = ira(sp) + ircfg = ira.new_ircfg() + ir_arch.add_asmblock_to_ircfg(cur_bloc, ircfg) + + for irblock in viewvalues(ircfg.blocks): + pc_val = None + for exprs in irblock: + for e in exprs: + if e.dst == ir_arch.pc: + pc_val = e.src + if pc_val is None: + continue + if not isinstance(pc_val, ExprMem): + continue + assert(pc_val.size == 32) + print(pc_val) + ad = pc_val.arg + ad = expr_simp(ad) + print(ad) + res = match_expr(ad, jra + jrb, set([jra, jrb])) + if res is False: + raise NotImplementedError('not fully functional') + print(res) + if not isinstance(res[jrb], ExprInt): + raise NotImplementedError('not fully functional') + base_ad = int(res[jrb]) + print(base_ad) + addrs = set() + i = -1 + max_table_entry = 10000 + max_diff_addr = 0x100000 # heuristic + while i < max_table_entry: + i += 1 + try: + ad = upck32(pool_bin.getbytes(base_ad + 4 * i, 4)) + except: + break + if abs(ad - base_ad) > max_diff_addr: + break + addrs.add(ad) + print([hex(x) for x in addrs]) + + for ad in addrs: + offsets_to_dis.add(ad) + l = loc_db.get_or_create_offset_location(ad) + c = AsmConstraintTo(l) + cur_bloc.addto(c) + +guess_funcs = [] + + +def guess_multi_cb( + mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, loc_db): + for f in guess_funcs: + f(mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, loc_db) diff --git a/miasm/analysis/dse.py b/miasm/analysis/dse.py new file mode 100644 index 00000000..3f31f344 --- /dev/null +++ b/miasm/analysis/dse.py @@ -0,0 +1,708 @@ +"""Dynamic symbolic execution module. + +Offers a way to have a symbolic execution along a concrete one. +Basically, this is done through DSEEngine class, with scheme: + +dse = DSEEngine(Machine("x86_32")) +dse.attach(jitter) + +The DSE state can be updated through: + + - .update_state_from_concrete: update the values from the CPU, so the symbolic + execution will be completely concrete from this point (until changes) + - .update_state: inject information, for instance RAX = symbolic_RAX + - .symbolize_memory: symbolize (using .memory_to_expr) memory areas (ie, + reading from an address in one of these areas yield a symbol) + +The DSE run can be instrumented through: + - .add_handler: register an handler, modifying the state instead of the current + execution. Can be used for stubbing external API + - .add_lib_handler: register handlers for libraries + - .add_instrumentation: register an handler, modifying the state but continuing + the current execution. Can be used for logging facilities + + +On branch, if the decision is symbolic, one can also collect "path constraints" +and inverse them to produce new inputs potentially reaching new paths. + +Basically, this is done through DSEPathConstraint. In order to produce a new +solution, one can extend this class, and override 'handle_solution' to produce a +solution which fit its needs. It could avoid computing new solution by +overriding 'produce_solution'. + +If one is only interested in constraints associated to its path, the option +"produce_solution" should be set to False, to speed up emulation. +The constraints are accumulated in the .z3_cur z3.Solver object. + +Here are a few remainings TODO: + - handle endianness in check_state / atomic read: currently, but this is also + true for others Miasm2 symbolic engines, the endianness is not take in + account, and assumed to be Little Endian + + - too many memory dependencies in constraint tracking: in order to let z3 find + new solution, it does need information on memory values (for instance, a + lookup in a table with a symbolic index). The estimated possible involved + memory location could be too large to pass to the solver (threshold named + MAX_MEMORY_INJECT). One possible solution, not yet implemented, is to call + the solver for reducing the possible values thanks to its accumulated + constraints. +""" +from builtins import range +from collections import namedtuple + +try: + import z3 +except ImportError: + z3 = None + +from future.utils import viewitems + +from miasm.core.utils import encode_hex, force_bytes +from miasm.expression.expression import ExprMem, ExprInt, ExprCompose, \ + ExprAssign, ExprId, ExprLoc, LocKey +from miasm.core.bin_stream import bin_stream_vm +from miasm.jitter.emulatedsymbexec import EmulatedSymbExec +from miasm.expression.expression_helper import possible_values +from miasm.ir.translators import Translator +from miasm.analysis.expression_range import expr_range +from miasm.analysis.modularintervals import ModularIntervals +from miasm.core.locationdb import LocationDB + +DriftInfo = namedtuple("DriftInfo", ["symbol", "computed", "expected"]) + +class DriftException(Exception): + """Raised when the emulation drift from the reference engine""" + + def __init__(self, info): + super(DriftException, self).__init__() + self.info = info + + def __str__(self): + if len(self.info) == 1: + return "Drift of %s: %s instead of %s" % ( + self.info[0].symbol, + self.info[0].computed, + self.info[0].expected, + ) + else: + return "Drift of:\n\t" + "\n\t".join("%s: %s instead of %s" % ( + dinfo.symbol, + dinfo.computed, + dinfo.expected) + for dinfo in self.info) + + +class ESETrackModif(EmulatedSymbExec): + """Extension of EmulatedSymbExec to be used by DSE engines + + Add the tracking of modified expressions, and the ability to symbolize + memory areas + """ + + def __init__(self, *args, **kwargs): + super(ESETrackModif, self).__init__(*args, **kwargs) + self.modified_expr = set() # Expr modified since the last reset + self.dse_memory_range = [] # List/Intervals of memory addresses to + # symbolize + self.dse_memory_to_expr = None # function(addr) -> Expr used to + # symbolize + + def mem_read(self, expr_mem): + if not expr_mem.ptr.is_int(): + return expr_mem + dst_addr = int(expr_mem.ptr) + + # Split access in atomic accesses + out = [] + for addr in range(dst_addr, dst_addr + expr_mem.size // 8): + if addr in self.dse_memory_range: + # Symbolize memory access + out.append(self.dse_memory_to_expr(addr)) + continue + atomic_access = ExprMem(ExprInt(addr, expr_mem.ptr.size), 8) + if atomic_access in self.symbols: + out.append( super(EmulatedSymbExec, self).mem_read(atomic_access)) + else: + # Get concrete value + atomic_access = ExprMem(ExprInt(addr, expr_mem.ptr.size), 8) + out.append(super(ESETrackModif, self).mem_read(atomic_access)) + + if len(out) == 1: + # Trivial case (optimization) + return out[0] + + # Simplify for constant merging (ex: {ExprInt(1, 8), ExprInt(2, 8)}) + return self.expr_simp(ExprCompose(*out)) + + def mem_write(self, expr, data): + # Call Symbolic mem_write (avoid side effects on vm) + return super(EmulatedSymbExec, self).mem_write(expr, data) + + def reset_modified(self): + """Reset modified expression tracker""" + self.modified_expr.clear() + + def apply_change(self, dst, src): + super(ESETrackModif, self).apply_change(dst, src) + self.modified_expr.add(dst) + + +class ESENoVMSideEffects(EmulatedSymbExec): + """ + Do EmulatedSymbExec without modifying memory + """ + def mem_write(self, expr, data): + return super(EmulatedSymbExec, self).mem_write(expr, data) + + +class DSEEngine(object): + """Dynamic Symbolic Execution Engine + + This class aims to be overridden for each specific purpose + """ + SYMB_ENGINE = ESETrackModif + + def __init__(self, machine): + self.machine = machine + self.loc_db = LocationDB() + self.handler = {} # addr -> callback(DSEEngine instance) + self.instrumentation = {} # addr -> callback(DSEEngine instance) + self.addr_to_cacheblocks = {} # addr -> {label -> IRBlock} + self.ir_arch = self.machine.ir(loc_db=self.loc_db) # corresponding IR + self.ircfg = self.ir_arch.new_ircfg() # corresponding IR + + # Defined after attachment + self.jitter = None # Jitload (concrete execution) + self.symb = None # SymbolicExecutionEngine + self.symb_concrete = None # Concrete SymbExec for path desambiguisation + self.mdis = None # DisasmEngine + + def prepare(self): + """Prepare the environment for attachment with a jitter""" + # Disassembler + self.mdis = self.machine.dis_engine(bin_stream_vm(self.jitter.vm), + lines_wd=1, + loc_db=self.loc_db) + + # Symbexec engine + ## Prepare symbexec engines + self.symb = self.SYMB_ENGINE(self.jitter.cpu, self.jitter.vm, + self.ir_arch, {}) + self.symb.enable_emulated_simplifications() + self.symb_concrete = ESENoVMSideEffects( + self.jitter.cpu, self.jitter.vm, + self.ir_arch, {} + ) + + ## Update registers value + self.symb.symbols[self.ir_arch.IRDst] = ExprInt( + getattr(self.jitter.cpu, self.ir_arch.pc.name), + self.ir_arch.IRDst.size + ) + + # Activate callback on each instr + self.jitter.jit.set_options(max_exec_per_call=1, jit_maxline=1) + self.jitter.exec_cb = self.callback + + # Clean jit cache to avoid multi-line basic blocks already jitted + self.jitter.jit.clear_jitted_blocks() + + def attach(self, emulator): + """Attach the DSE to @emulator + @emulator: jitload (or API equivalent) instance + + To attach *DURING A BREAKPOINT*, one may consider using the following snippet: + + def breakpoint(self, jitter): + ... + dse.attach(jitter) + dse.update... + ... + # Additional call to the exec callback is necessary, as breakpoints are + # honored AFTER exec callback + jitter.exec_cb(jitter) + + return True + + Without it, one may encounteer a DriftException error due to a + "desynchronization" between jitter and dse states. Indeed, on 'handle' + call, the jitter must be one instruction AFTER the dse. + """ + self.jitter = emulator + self.prepare() + + def handle(self, cur_addr): + r"""Handle destination + @cur_addr: Expr of the next address in concrete execution + /!\ cur_addr may be a loc_key + + In this method, self.symb is in the "just before branching" state + """ + pass + + def add_handler(self, addr, callback): + """Add a @callback for address @addr before any state update. + The state IS NOT updated after returning from the callback + @addr: int + @callback: func(dse instance)""" + self.handler[addr] = callback + + def add_lib_handler(self, libimp, namespace): + """Add search for handler based on a @libimp libimp instance + + Known functions will be looked by {name}_symb in the @namespace + """ + namespace = dict( + (force_bytes(name), func) for name, func in viewitems(namespace) + ) + + # lambda cannot contain statement + def default_func(dse): + fname = b"%s_symb" % libimp.fad2cname[dse.jitter.pc] + raise RuntimeError("Symbolic stub '%s' not found" % fname) + + for addr, fname in viewitems(libimp.fad2cname): + fname = force_bytes(fname) + fname = b"%s_symb" % fname + func = namespace.get(fname, None) + if func is not None: + self.add_handler(addr, func) + else: + self.add_handler(addr, default_func) + + def add_instrumentation(self, addr, callback): + """Add a @callback for address @addr before any state update. + The state IS updated after returning from the callback + @addr: int + @callback: func(dse instance)""" + self.instrumentation[addr] = callback + + def _check_state(self): + """Check the current state against the concrete one""" + errors = [] # List of DriftInfo + + for symbol in self.symb.modified_expr: + # Do not consider PC + if symbol in [self.ir_arch.pc, self.ir_arch.IRDst]: + continue + + # Consider only concrete values + symb_value = self.eval_expr(symbol) + if not symb_value.is_int(): + continue + symb_value = int(symb_value) + + # Check computed values against real ones + if symbol.is_id(): + if hasattr(self.jitter.cpu, symbol.name): + value = getattr(self.jitter.cpu, symbol.name) + if value != symb_value: + errors.append(DriftInfo(symbol, symb_value, value)) + elif symbol.is_mem() and symbol.ptr.is_int(): + value_chr = self.jitter.vm.get_mem( + int(symbol.ptr), + symbol.size // 8 + ) + exp_value = int(encode_hex(value_chr[::-1]), 16) + if exp_value != symb_value: + errors.append(DriftInfo(symbol, symb_value, exp_value)) + + # Check for drift, and act accordingly + if errors: + raise DriftException(errors) + + def callback(self, _): + """Called before each instruction""" + # Assert synchronization with concrete execution + self._check_state() + + # Call callbacks associated to the current address + cur_addr = self.jitter.pc + if isinstance(cur_addr, LocKey): + lbl = self.ir_arch.loc_db.loc_key_to_label(cur_addr) + cur_addr = lbl.offset + + if cur_addr in self.handler: + self.handler[cur_addr](self) + return True + + if cur_addr in self.instrumentation: + self.instrumentation[cur_addr](self) + + # Handle current address + self.handle(ExprInt(cur_addr, self.ir_arch.IRDst.size)) + + # Avoid memory issue in ExpressionSimplifier + if len(self.symb.expr_simp.simplified_exprs) > 100000: + self.symb.expr_simp.simplified_exprs.clear() + + # Get IR blocks + if cur_addr in self.addr_to_cacheblocks: + self.ircfg.blocks.clear() + self.ircfg.blocks.update(self.addr_to_cacheblocks[cur_addr]) + else: + + ## Reset cache structures + self.ircfg.blocks.clear()# = {} + + ## Update current state + asm_block = self.mdis.dis_block(cur_addr) + self.ir_arch.add_asmblock_to_ircfg(asm_block, self.ircfg) + self.addr_to_cacheblocks[cur_addr] = dict(self.ircfg.blocks) + + # Emulate the current instruction + self.symb.reset_modified() + + # Is the symbolic execution going (potentially) to jump on a lbl_gen? + if len(self.ircfg.blocks) == 1: + self.symb.run_at(self.ircfg, cur_addr) + else: + # Emulation could stuck in generated IR blocks + # But concrete execution callback is not enough precise to obtain + # the full IR blocks path + # -> Use a fully concrete execution to get back path + + # Update the concrete execution + self._update_state_from_concrete_symb( + self.symb_concrete, cpu=True, mem=True + ) + while True: + + next_addr_concrete = self.symb_concrete.run_block_at( + self.ircfg, cur_addr + ) + self.symb.run_block_at(self.ircfg, cur_addr) + + if not (isinstance(next_addr_concrete, ExprLoc) and + self.ir_arch.loc_db.get_location_offset( + next_addr_concrete.loc_key + ) is None): + # Not a lbl_gen, exit + break + + # Call handle with lbl_gen state + self.handle(next_addr_concrete) + cur_addr = next_addr_concrete + + + # At this stage, symbolic engine is one instruction after the concrete + # engine + + return True + + def _get_gpregs(self): + """Return a dict of regs: value from the jitter + This version use the regs associated to the attrib (!= cpu.get_gpreg()) + """ + out = {} + regs = self.ir_arch.arch.regs.attrib_to_regs[self.ir_arch.attrib] + for reg in regs: + if hasattr(self.jitter.cpu, reg.name): + out[reg.name] = getattr(self.jitter.cpu, reg.name) + return out + + def take_snapshot(self): + """Return a snapshot of the current state (including jitter state)""" + snapshot = { + "mem": self.jitter.vm.get_all_memory(), + "regs": self._get_gpregs(), + "symb": self.symb.symbols.copy(), + } + return snapshot + + def restore_snapshot(self, snapshot, memory=True): + """Restore a @snapshot taken with .take_snapshot + @snapshot: .take_snapshot output + @memory: (optional) if set, also restore the memory + """ + # Restore memory + if memory: + self.jitter.vm.reset_memory_page_pool() + self.jitter.vm.reset_code_bloc_pool() + for addr, metadata in viewitems(snapshot["mem"]): + self.jitter.vm.add_memory_page( + addr, + metadata["access"], + metadata["data"] + ) + + # Restore registers + self.jitter.pc = snapshot["regs"][self.ir_arch.pc.name] + for reg, value in viewitems(snapshot["regs"]): + setattr(self.jitter.cpu, reg, value) + + # Reset intern elements + self.jitter.vm.set_exception(0) + self.jitter.cpu.set_exception(0) + self.jitter.bs._atomic_mode = False + + # Reset symb exec + for key, _ in list(viewitems(self.symb.symbols)): + del self.symb.symbols[key] + for expr, value in viewitems(snapshot["symb"]): + self.symb.symbols[expr] = value + + def update_state(self, assignblk): + """From this point, assume @assignblk in the symbolic execution + @assignblk: AssignBlock/{dst -> src} + """ + for dst, src in viewitems(assignblk): + self.symb.apply_change(dst, src) + + def _update_state_from_concrete_symb(self, symbexec, cpu=True, mem=False): + if mem: + # Values will be retrieved from the concrete execution if they are + # not present + symbexec.symbols.symbols_mem.base_to_memarray.clear() + if cpu: + regs = self.ir_arch.arch.regs.attrib_to_regs[self.ir_arch.attrib] + for reg in regs: + if hasattr(self.jitter.cpu, reg.name): + value = ExprInt(getattr(self.jitter.cpu, reg.name), + size=reg.size) + symbexec.symbols[reg] = value + + def update_state_from_concrete(self, cpu=True, mem=False): + r"""Update the symbolic state with concrete values from the concrete + engine + + @cpu: (optional) if set, update registers' value + @mem: (optional) if set, update memory value + + /!\ all current states will be loss. + This function is usually called when states are no more synchronized + (at the beginning, returning from an unstubbed syscall, ...) + """ + self._update_state_from_concrete_symb(self.symb, cpu, mem) + + def eval_expr(self, expr): + """Return the evaluation of @expr: + @expr: Expr instance""" + return self.symb.eval_expr(expr) + + @staticmethod + def memory_to_expr(addr): + """Translate an address to its corresponding symbolic ID (8bits) + @addr: int""" + return ExprId("MEM_0x%x" % int(addr), 8) + + def symbolize_memory(self, memory_range): + """Register a range of memory addresses to symbolize + @memory_range: object with support of __in__ operation (intervals, list, + ...) + """ + self.symb.dse_memory_range = memory_range + self.symb.dse_memory_to_expr = self.memory_to_expr + + +class DSEPathConstraint(DSEEngine): + """Dynamic Symbolic Execution Engine keeping the path constraint + + Possible new "solutions" are produced along the path, by inversing concrete + path constraint. Thus, a "solution" is a potential initial context leading + to a new path. + + In order to produce a new solution, one can extend this class, and override + 'handle_solution' to produce a solution which fit its needs. It could avoid + computing new solution by overriding 'produce_solution'. + + If one is only interested in constraints associated to its path, the option + "produce_solution" should be set to False, to speed up emulation. + The constraints are accumulated in the .z3_cur z3.Solver object. + + """ + + # Maximum memory size to inject in constraints solving + MAX_MEMORY_INJECT = 0x10000 + + # Produce solution strategies + PRODUCE_NO_SOLUTION = 0 + PRODUCE_SOLUTION_CODE_COV = 1 + PRODUCE_SOLUTION_BRANCH_COV = 2 + PRODUCE_SOLUTION_PATH_COV = 3 + + def __init__(self, machine, produce_solution=PRODUCE_SOLUTION_CODE_COV, + known_solutions=None, + **kwargs): + """Init a DSEPathConstraint + @machine: Machine of the targeted architecture instance + @produce_solution: (optional) if set, new solutions will be computed""" + super(DSEPathConstraint, self).__init__(machine, **kwargs) + + # Dependency check + assert z3 is not None + + # Init PathConstraint specifics structures + self.cur_solver = z3.Solver() + self.new_solutions = {} # solution identifier -> solution's model + self._known_solutions = set() # set of solution identifiers + self.z3_trans = Translator.to_language("z3") + self._produce_solution_strategy = produce_solution + self._previous_addr = None + self._history = None + if produce_solution == self.PRODUCE_SOLUTION_PATH_COV: + self._history = [] # List of addresses in the current path + + def take_snapshot(self, *args, **kwargs): + snap = super(DSEPathConstraint, self).take_snapshot(*args, **kwargs) + snap["new_solutions"] = { + dst: src.copy + for dst, src in viewitems(self.new_solutions) + } + snap["cur_constraints"] = self.cur_solver.assertions() + if self._produce_solution_strategy == self.PRODUCE_SOLUTION_PATH_COV: + snap["_history"] = list(self._history) + elif self._produce_solution_strategy == self.PRODUCE_SOLUTION_BRANCH_COV: + snap["_previous_addr"] = self._previous_addr + return snap + + def restore_snapshot(self, snapshot, keep_known_solutions=True, **kwargs): + """Restore a DSEPathConstraint snapshot + @keep_known_solutions: if set, do not forget solutions already found. + -> They will not appear in 'new_solutions' + """ + super(DSEPathConstraint, self).restore_snapshot(snapshot, **kwargs) + self.new_solutions.clear() + self.new_solutions.update(snapshot["new_solutions"]) + self.cur_solver = z3.Solver() + self.cur_solver.add(snapshot["cur_constraints"]) + if not keep_known_solutions: + self._known_solutions.clear() + if self._produce_solution_strategy == self.PRODUCE_SOLUTION_PATH_COV: + self._history = list(snapshot["_history"]) + elif self._produce_solution_strategy == self.PRODUCE_SOLUTION_BRANCH_COV: + self._previous_addr = snapshot["_previous_addr"] + + def _key_for_solution_strategy(self, destination): + """Return the associated identifier for the current solution strategy""" + if self._produce_solution_strategy == self.PRODUCE_NO_SOLUTION: + # Never produce a solution + return None + elif self._produce_solution_strategy == self.PRODUCE_SOLUTION_CODE_COV: + # Decision based on code coverage + # -> produce a solution if the destination has never been seen + key = destination + + elif self._produce_solution_strategy == self.PRODUCE_SOLUTION_BRANCH_COV: + # Decision based on branch coverage + # -> produce a solution if the current branch has never been take + key = (self._previous_addr, destination) + + elif self._produce_solution_strategy == self.PRODUCE_SOLUTION_PATH_COV: + # Decision based on path coverage + # -> produce a solution if the current path has never been take + key = tuple(self._history + [destination]) + else: + raise ValueError("Unknown produce solution strategy") + + return key + + def produce_solution(self, destination): + """Called to determine if a solution for @destination should be test for + satisfiability and computed + @destination: Expr instance of the target @destination + """ + key = self._key_for_solution_strategy(destination) + if key is None: + return False + return key not in self._known_solutions + + def handle_solution(self, model, destination): + """Called when a new solution for destination @destination is founded + @model: z3 model instance + @destination: Expr instance for an addr which is not on the DSE path + """ + key = self._key_for_solution_strategy(destination) + assert key is not None + self.new_solutions[key] = model + self._known_solutions.add(key) + + def handle_correct_destination(self, destination, path_constraints): + """[DEV] Called by handle() to update internal structures giving the + correct destination (the concrete execution one). + """ + + # Update structure used by produce_solution() + if self._produce_solution_strategy == self.PRODUCE_SOLUTION_PATH_COV: + self._history.append(destination) + elif self._produce_solution_strategy == self.PRODUCE_SOLUTION_BRANCH_COV: + self._previous_addr = destination + + # Update current solver + for cons in path_constraints: + self.cur_solver.add(self.z3_trans.from_expr(cons)) + + def handle(self, cur_addr): + cur_addr = self.ir_arch.loc_db.canonize_to_exprloc(cur_addr) + symb_pc = self.eval_expr(self.ir_arch.IRDst) + possibilities = possible_values(symb_pc) + cur_path_constraint = set() # path_constraint for the concrete path + if len(possibilities) == 1: + dst = next(iter(possibilities)).value + dst = self.ir_arch.loc_db.canonize_to_exprloc(dst) + assert dst == cur_addr + else: + for possibility in possibilities: + target_addr = self.ir_arch.loc_db.canonize_to_exprloc( + possibility.value + ) + path_constraint = set() # Set of ExprAssign for the possible path + + # Get constraint associated to the possible path + memory_to_add = ModularIntervals(symb_pc.size) + for cons in possibility.constraints: + eaff = cons.to_constraint() + # eaff.get_r(mem_read=True) is not enough + # ExprAssign consider a Memory access in dst as a write + mem = eaff.dst.get_r(mem_read=True) + mem.update(eaff.src.get_r(mem_read=True)) + for expr in mem: + if expr.is_mem(): + addr_range = expr_range(expr.ptr) + # At upper bounds, add the size of the memory access + # if addr (- [a, b], then @size[addr] reachables + # values are in @8[a, b + size[ + for start, stop in addr_range: + stop += expr.size // 8 - 1 + full_range = ModularIntervals( + symb_pc.size, + [(start, stop)] + ) + memory_to_add.update(full_range) + path_constraint.add(eaff) + + if memory_to_add.length > self.MAX_MEMORY_INJECT: + # TODO re-croncretize the constraint or z3-try + raise RuntimeError("Not implemented: too long memory area") + + # Inject memory + for start, stop in memory_to_add: + for address in range(start, stop + 1): + expr_mem = ExprMem(ExprInt(address, + self.ir_arch.pc.size), + 8) + value = self.eval_expr(expr_mem) + if not value.is_int(): + raise TypeError("Rely on a symbolic memory case, " \ + "address 0x%x" % address) + path_constraint.add(ExprAssign(expr_mem, value)) + + if target_addr == cur_addr: + # Add path constraint + cur_path_constraint = path_constraint + + elif self.produce_solution(target_addr): + # Looking for a new solution + self.cur_solver.push() + for cons in path_constraint: + trans = self.z3_trans.from_expr(cons) + trans = z3.simplify(trans) + self.cur_solver.add(trans) + + result = self.cur_solver.check() + if result == z3.sat: + model = self.cur_solver.model() + self.handle_solution(model, target_addr) + self.cur_solver.pop() + + self.handle_correct_destination(cur_addr, cur_path_constraint) diff --git a/miasm/analysis/expression_range.py b/miasm/analysis/expression_range.py new file mode 100644 index 00000000..5a31873a --- /dev/null +++ b/miasm/analysis/expression_range.py @@ -0,0 +1,70 @@ +"""Naive range analysis for expression""" + +from future.builtins import zip +from functools import reduce + +from miasm.analysis.modularintervals import ModularIntervals + +_op_range_handler = { + "+": lambda x, y: x + y, + "&": lambda x, y: x & y, + "|": lambda x, y: x | y, + "^": lambda x, y: x ^ y, + "*": lambda x, y: x * y, + "a>>": lambda x, y: x.arithmetic_shift_right(y), + "<<": lambda x, y: x << y, + ">>": lambda x, y: x >> y, + ">>>": lambda x, y: x.rotation_right(y), + "<<<": lambda x, y: x.rotation_left(y), +} + +def expr_range(expr): + """Return a ModularIntervals containing the range of possible values of + @expr""" + max_bound = (1 << expr.size) - 1 + if expr.is_int(): + return ModularIntervals(expr.size, [(int(expr), int(expr))]) + elif expr.is_id() or expr.is_mem(): + return ModularIntervals(expr.size, [(0, max_bound)]) + elif expr.is_slice(): + interval_mask = ((1 << expr.start) - 1) ^ ((1 << expr.stop) - 1) + arg = expr_range(expr.arg) + # Mask for possible range, and shift range + return ((arg & interval_mask) >> expr.start).size_update(expr.size) + elif expr.is_compose(): + sub_ranges = [expr_range(arg) for arg in expr.args] + args_idx = [info[0] for info in expr.iter_args()] + + # No shift for the first one + ret = sub_ranges[0].size_update(expr.size) + + # Doing it progressively (2 by 2) + for shift, sub_range in zip(args_idx[1:], sub_ranges[1:]): + ret |= sub_range.size_update(expr.size) << shift + return ret + elif expr.is_op(): + # A few operation are handled with care + # Otherwise, overapproximate (ie. full range interval) + if expr.op in _op_range_handler: + sub_ranges = [expr_range(arg) for arg in expr.args] + return reduce( + _op_range_handler[expr.op], + (sub_range for sub_range in sub_ranges[1:]), + sub_ranges[0] + ) + elif expr.op == "-": + assert len(expr.args) == 1 + return - expr_range(expr.args[0]) + elif expr.op == "%": + assert len(expr.args) == 2 + op, mod = [expr_range(arg) for arg in expr.args] + if mod.intervals.length == 1: + # Modulo intervals is not supported + return op % mod.intervals.hull()[0] + + # Operand not handled, return the full domain + return ModularIntervals(expr.size, [(0, max_bound)]) + elif expr.is_cond(): + return expr_range(expr.src1).union(expr_range(expr.src2)) + else: + raise TypeError("Unsupported type: %s" % expr.__class__) diff --git a/miasm/analysis/gdbserver.py b/miasm/analysis/gdbserver.py new file mode 100644 index 00000000..ac58cdad --- /dev/null +++ b/miasm/analysis/gdbserver.py @@ -0,0 +1,453 @@ +#-*- coding:utf-8 -*- + +from __future__ import print_function +from future.builtins import map, range + +from miasm.core.utils import decode_hex, encode_hex, int_to_byte + +import socket +import struct +import time +import logging +from io import BytesIO +import miasm.analysis.debugging as debugging +from miasm.jitter.jitload import ExceptionHandle + + +class GdbServer(object): + + "Debugguer binding for GDBServer protocol" + + general_registers_order = [] + general_registers_size = {} # RegName : Size in octet + status = b"S05" + + def __init__(self, dbg, port=4455): + server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + server.bind(('localhost', port)) + server.listen(1) + self.server = server + self.dbg = dbg + + # Communication methods + + def compute_checksum(self, data): + return encode_hex(int_to_byte(sum(map(ord, data)) % 256)) + + def get_messages(self): + all_data = b"" + while True: + data = self.sock.recv(4096) + if not data: + break + all_data += data + + logging.debug("<- %r", all_data) + self.recv_queue += self.parse_messages(all_data) + + def parse_messages(self, data): + buf = BytesIO(data) + msgs = [] + + while (buf.tell() < buf.len): + token = buf.read(1) + if token == b"+": + continue + if token == b"-": + raise NotImplementedError("Resend packet") + if token == b"$": + packet_data = b"" + c = buf.read(1) + while c != b"#": + packet_data += c + c = buf.read(1) + checksum = buf.read(2) + if checksum != self.compute_checksum(packet_data): + raise ValueError("Incorrect checksum") + msgs.append(packet_data) + + return msgs + + def send_string(self, s): + self.send_queue.append(b"O" + encode_hex(s)) + + def process_messages(self): + + while self.recv_queue: + msg = self.recv_queue.pop(0) + buf = BytesIO(msg) + msg_type = buf.read(1) + + self.send_queue.append(b"+") + + if msg_type == b"q": + if msg.startswith(b"qSupported"): + self.send_queue.append(b"PacketSize=3fff") + elif msg.startswith(b"qC"): + # Current thread + self.send_queue.append(b"") + elif msg.startswith(b"qAttached"): + # Not supported + self.send_queue.append(b"") + elif msg.startswith(b"qTStatus"): + # Not supported + self.send_queue.append(b"") + elif msg.startswith(b"qfThreadInfo"): + # Not supported + self.send_queue.append(b"") + else: + raise NotImplementedError() + + elif msg_type == b"H": + # Set current thread + self.send_queue.append(b"OK") + + elif msg_type == b"?": + # Report why the target halted + self.send_queue.append(self.status) # TRAP signal + + elif msg_type == b"g": + # Report all general register values + self.send_queue.append(self.report_general_register_values()) + + elif msg_type == b"p": + # Read a specific register + reg_num = int(buf.read(), 16) + self.send_queue.append(self.read_register(reg_num)) + + elif msg_type == b"P": + # Set a specific register + reg_num, value = buf.read().split(b"=") + reg_num = int(reg_num, 16) + value = int(encode_hex(decode_hex(value)[::-1]), 16) + self.set_register(reg_num, value) + self.send_queue.append(b"OK") + + elif msg_type == b"m": + # Read memory + addr, size = (int(x, 16) for x in buf.read().split(b",", 1)) + self.send_queue.append(self.read_memory(addr, size)) + + elif msg_type == b"k": + # Kill + self.sock.close() + self.send_queue = [] + self.sock = None + + elif msg_type == b"!": + # Extending debugging will be used + self.send_queue.append(b"OK") + + elif msg_type == b"v": + if msg == b"vCont?": + # Is vCont supported ? + self.send_queue.append(b"") + + elif msg_type == b"s": + # Step + self.dbg.step() + self.send_queue.append(b"S05") # TRAP signal + + elif msg_type == b"Z": + # Add breakpoint or watchpoint + bp_type = buf.read(1) + if bp_type == b"0": + # Exec breakpoint + assert(buf.read(1) == b",") + addr, size = (int(x, 16) for x in buf.read().split(b",", 1)) + + if size != 1: + raise NotImplementedError("Bigger size") + self.dbg.add_breakpoint(addr) + self.send_queue.append(b"OK") + + elif bp_type == b"1": + # Hardware BP + assert(buf.read(1) == b",") + addr, size = (int(x, 16) for x in buf.read().split(b",", 1)) + + self.dbg.add_memory_breakpoint( + addr, + size, + read=True, + write=True + ) + self.send_queue.append(b"OK") + + elif bp_type in [b"2", b"3", b"4"]: + # Memory breakpoint + assert(buf.read(1) == b",") + read = bp_type in [b"3", b"4"] + write = bp_type in [b"2", b"4"] + addr, size = (int(x, 16) for x in buf.read().split(b",", 1)) + + self.dbg.add_memory_breakpoint( + addr, + size, + read=read, + write=write + ) + self.send_queue.append(b"OK") + + else: + raise ValueError("Impossible value") + + elif msg_type == b"z": + # Remove breakpoint or watchpoint + bp_type = buf.read(1) + if bp_type == b"0": + # Exec breakpoint + assert(buf.read(1) == b",") + addr, size = (int(x, 16) for x in buf.read().split(b",", 1)) + + if size != 1: + raise NotImplementedError("Bigger size") + dbgsoft = self.dbg.get_breakpoint_by_addr(addr) + assert(len(dbgsoft) == 1) + self.dbg.remove_breakpoint(dbgsoft[0]) + self.send_queue.append(b"OK") + + elif bp_type == b"1": + # Hardware BP + assert(buf.read(1) == b",") + addr, size = (int(x, 16) for x in buf.read().split(b",", 1)) + self.dbg.remove_memory_breakpoint_by_addr_access( + addr, + read=True, + write=True + ) + self.send_queue.append(b"OK") + + elif bp_type in [b"2", b"3", b"4"]: + # Memory breakpoint + assert(buf.read(1) == b",") + read = bp_type in [b"3", b"4"] + write = bp_type in [b"2", b"4"] + addr, size = (int(x, 16) for x in buf.read().split(b",", 1)) + + self.dbg.remove_memory_breakpoint_by_addr_access( + addr, + read=read, + write=write + ) + self.send_queue.append(b"OK") + + else: + raise ValueError("Impossible value") + + elif msg_type == b"c": + # Continue + self.status = b"" + self.send_messages() + ret = self.dbg.run() + if isinstance(ret, debugging.DebugBreakpointSoft): + self.status = b"S05" + self.send_queue.append(b"S05") # TRAP signal + elif isinstance(ret, ExceptionHandle): + if ret == ExceptionHandle.memoryBreakpoint(): + self.status = b"S05" + self.send_queue.append(b"S05") + else: + raise NotImplementedError("Unknown Except") + elif isinstance(ret, debugging.DebugBreakpointTerminate): + # Connexion should close, but keep it running as a TRAP + # The connexion will be close on instance destruction + print(ret) + self.status = b"S05" + self.send_queue.append(b"S05") + else: + raise NotImplementedError() + + else: + raise NotImplementedError( + "Not implemented: message type %r" % msg_type + ) + + def send_messages(self): + for msg in self.send_queue: + if msg == b"+": + data = b"+" + else: + data = b"$%s#%s" % (msg, self.compute_checksum(msg)) + logging.debug("-> %r", data) + self.sock.send(data) + self.send_queue = [] + + def main_loop(self): + self.recv_queue = [] + self.send_queue = [] + + self.send_string(b"Test\n") + + while (self.sock): + self.get_messages() + self.process_messages() + self.send_messages() + + def run(self): + self.sock, self.address = self.server.accept() + self.main_loop() + + # Debugguer processing methods + def report_general_register_values(self): + s = b"" + for i in range(len(self.general_registers_order)): + s += self.read_register(i) + return s + + def read_register(self, reg_num): + reg_name = self.general_registers_order[reg_num] + reg_value = self.read_register_by_name(reg_name) + size = self.general_registers_size[reg_name] + + pack_token = "" + if size == 1: + pack_token = "= 0 + if end is not None: + assert end <= self.mask + + # Helpers + + @staticmethod + def size2mask(size): + """Return the bit mask of size @size""" + return (1 << size) - 1 + + def _range2interval(func): + """Convert a function taking 2 ranges to a function taking a ModularIntervals + and applying to the current instance""" + def ret_func(self, target): + ret = interval() + for left_i, right_i in product(self.intervals, target.intervals): + ret += func(self, left_i[0], left_i[1], right_i[0], + right_i[1]) + return self.__class__(self.size, ret) + return ret_func + + def _range2integer(func): + """Convert a function taking 1 range and optional arguments to a function + applying to the current instance""" + def ret_func(self, *args): + ret = interval() + for x_min, x_max in self.intervals: + ret += func(self, x_min, x_max, *args) + return self.__class__(self.size, ret) + return ret_func + + def _promote(func): + """Check and promote the second argument from integer to + ModularIntervals with one value""" + def ret_func(self, target): + if isinstance(target, int_types): + target = ModularIntervals(self.size, interval([(target, target)])) + if not isinstance(target, ModularIntervals): + raise TypeError("Unsupported operation with %s" % target.__class__) + if target.size != self.size: + raise TypeError("Size are not the same: %s vs %s" % (self.size, + target.size)) + return func(self, target) + return ret_func + + def _unsigned2signed(self, value): + """Return the signed value of @value, based on self.size""" + if (value & (1 << (self.size - 1))): + return -(self.mask ^ value) - 1 + else: + return value + + def _signed2unsigned(self, value): + """Return the unsigned value of @value, based on self.size""" + return value & self.mask + + # Operation internals + # + # Naming convention: + # _range_{op}: takes 2 interval bounds and apply op + # _range_{op}_uniq: takes 1 interval bounds and apply op + # _interval_{op}: apply op on an ModularIntervals + # _integer_{op}: apply op on itself with possible arguments + + def _range_add(self, x_min, x_max, y_min, y_max): + """Bounds interval for x + y, with + - x, y of size 'self.size' + - @x_min <= x <= @x_max + - @y_min <= y <= @y_max + - operations are considered unsigned + From Hacker's Delight: Chapter 4 + """ + max_bound = self.mask + if (x_min + y_min <= max_bound and + x_max + y_max >= max_bound + 1): + # HD returns 0, max_bound; but this is because it cannot handle multiple + # interval. + # x_max + y_max can only overflow once, so returns + # [result_min, overflow] U [0, overflow_rest] + return interval([(x_min + y_min, max_bound), + (0, (x_max + y_max) & max_bound)]) + else: + return interval([((x_min + y_min) & max_bound, + (x_max + y_max) & max_bound)]) + + _interval_add = _range2interval(_range_add) + + def _range_minus_uniq(self, x_min, x_max): + """Bounds interval for -x, with + - x of size self.size + - @x_min <= x <= @x_max + - operations are considered unsigned + From Hacker's Delight: Chapter 4 + """ + max_bound = self.mask + if (x_min == 0 and x_max != 0): + # HD returns 0, max_bound; see _range_add + return interval([(0, 0), ((- x_max) & max_bound, max_bound)]) + else: + return interval([((- x_max) & max_bound, (- x_min) & max_bound)]) + + _interval_minus = _range2integer(_range_minus_uniq) + + def _range_or_min(self, x_min, x_max, y_min, y_max): + """Interval min for x | y, with + - x, y of size self.size + - @x_min <= x <= @x_max + - @y_min <= y <= @y_max + - operations are considered unsigned + From Hacker's Delight: Chapter 4 + """ + max_bit = 1 << (self.size - 1) + while max_bit: + if ~x_min & y_min & max_bit: + temp = (x_min | max_bit) & - max_bit + if temp <= x_max: + x_min = temp + break + elif x_min & ~y_min & max_bit: + temp = (y_min | max_bit) & - max_bit + if temp <= y_max: + y_min = temp + break + max_bit >>= 1 + return x_min | y_min + + def _range_or_max(self, x_min, x_max, y_min, y_max): + """Interval max for x | y, with + - x, y of size self.size + - @x_min <= x <= @x_max + - @y_min <= y <= @y_max + - operations are considered unsigned + From Hacker's Delight: Chapter 4 + """ + max_bit = 1 << (self.size - 1) + while max_bit: + if x_max & y_max & max_bit: + temp = (x_max - max_bit) | (max_bit - 1) + if temp >= x_min: + x_max = temp + break + temp = (y_max - max_bit) | (max_bit - 1) + if temp >= y_min: + y_max = temp + break + max_bit >>= 1 + return x_max | y_max + + def _range_or(self, x_min, x_max, y_min, y_max): + """Interval bounds for x | y, with + - x, y of size self.size + - @x_min <= x <= @x_max + - @y_min <= y <= @y_max + - operations are considered unsigned + From Hacker's Delight: Chapter 4 + """ + return interval([(self._range_or_min(x_min, x_max, y_min, y_max), + self._range_or_max(x_min, x_max, y_min, y_max))]) + + _interval_or = _range2interval(_range_or) + + def _range_and_min(self, x_min, x_max, y_min, y_max): + """Interval min for x & y, with + - x, y of size self.size + - @x_min <= x <= @x_max + - @y_min <= y <= @y_max + - operations are considered unsigned + From Hacker's Delight: Chapter 4 + """ + max_bit = (1 << (self.size - 1)) + while max_bit: + if ~x_min & ~y_min & max_bit: + temp = (x_min | max_bit) & - max_bit + if temp <= x_max: + x_min = temp + break + temp = (y_min | max_bit) & - max_bit + if temp <= y_max: + y_min = temp + break + max_bit >>= 1 + return x_min & y_min + + def _range_and_max(self, x_min, x_max, y_min, y_max): + """Interval max for x & y, with + - x, y of size self.size + - @x_min <= x <= @x_max + - @y_min <= y <= @y_max + - operations are considered unsigned + From Hacker's Delight: Chapter 4 + """ + max_bit = (1 << (self.size - 1)) + while max_bit: + if x_max & ~y_max & max_bit: + temp = (x_max & ~max_bit) | (max_bit - 1) + if temp >= x_min: + x_max = temp + break + elif ~x_max & y_max & max_bit: + temp = (y_max & ~max_bit) | (max_bit - 1) + if temp >= y_min: + y_max = temp + break + max_bit >>= 1 + return x_max & y_max + + def _range_and(self, x_min, x_max, y_min, y_max): + """Interval bounds for x & y, with + - x, y of size @size + - @x_min <= x <= @x_max + - @y_min <= y <= @y_max + - operations are considered unsigned + From Hacker's Delight: Chapter 4 + """ + return interval([(self._range_and_min(x_min, x_max, y_min, y_max), + self._range_and_max(x_min, x_max, y_min, y_max))]) + + _interval_and = _range2interval(_range_and) + + def _range_xor(self, x_min, x_max, y_min, y_max): + """Interval bounds for x ^ y, with + - x, y of size self.size + - @x_min <= x <= @x_max + - @y_min <= y <= @y_max + - operations are considered unsigned + From Hacker's Delight: Chapter 4 + """ + not_size = lambda x: x ^ self.mask + min_xor = self._range_and_min(x_min, x_max, not_size(y_max), not_size(y_min)) | self._range_and_min(not_size(x_max), not_size(x_min), y_min, y_max) + max_xor = self._range_or_max(0, + self._range_and_max(x_min, x_max, not_size(y_max), not_size(y_min)), + 0, + self._range_and_max(not_size(x_max), not_size(x_min), y_min, y_max)) + return interval([(min_xor, max_xor)]) + + _interval_xor = _range2interval(_range_xor) + + def _range_mul(self, x_min, x_max, y_min, y_max): + """Interval bounds for x * y, with + - x, y of size self.size + - @x_min <= x <= @x_max + - @y_min <= y <= @y_max + - operations are considered unsigned + This is a naive version, going to TOP on overflow""" + max_bound = self.mask + if y_max * x_max > max_bound: + return interval([(0, max_bound)]) + else: + return interval([(x_min * y_min, x_max * y_max)]) + + _interval_mul = _range2interval(_range_mul) + + def _range_mod_uniq(self, x_min, x_max, mod): + """Interval bounds for x % @mod, with + - x, @mod of size self.size + - @x_min <= x <= @x_max + - operations are considered unsigned + """ + if (x_max - x_min) >= mod: + return interval([(0, mod - 1)]) + x_max = x_max % mod + x_min = x_min % mod + if x_max < x_min: + return interval([(0, x_max), (x_min, mod - 1)]) + else: + return interval([(x_min, x_max)]) + + _integer_modulo = _range2integer(_range_mod_uniq) + + def _range_shift_uniq(self, x_min, x_max, shift, op): + """Bounds interval for x @op @shift with + - x of size self.size + - @x_min <= x <= @x_max + - operations are considered unsigned + - shift <= self.size + """ + assert shift <= self.size + # Shift operations are monotonic, and overflow results in 0 + max_bound = self.mask + + if op == "<<": + obtain_max = x_max << shift + if obtain_max > max_bound: + # Overflow at least on max, best-effort + # result '0' often happen, include it + return interval([(0, 0), ((1 << shift) - 1, max_bound)]) + else: + return interval([(x_min << shift, obtain_max)]) + elif op == ">>": + return interval([((x_min >> shift) & max_bound, + (x_max >> shift) & max_bound)]) + elif op == "a>>": + # The Miasm2 version (Expr or ModInt) could have been used, but + # introduce unnecessary dependencies for this module + # Python >> is the arithmetic one + ashr = lambda x, y: self._signed2unsigned(self._unsigned2signed(x) >> y) + end_min, end_max = ashr(x_min, shift), ashr(x_max, shift) + end_min, end_max = min(end_min, end_max), max(end_min, end_max) + return interval([(end_min, end_max)]) + else: + raise ValueError("%s is not a shifter" % op) + + def _interval_shift(self, operation, shifter): + """Apply the shifting operation @operation with a shifting + ModularIntervals @shifter on the current instance""" + # Work on a copy of shifter intervals + shifter = interval(shifter.intervals) + if (shifter.hull()[1] >= self.size): + shifter += interval([(self.size, self.size)]) + shifter &= interval([(0, self.size)]) + ret = interval() + for shift_range in shifter: + for shift in range(shift_range[0], shift_range[1] + 1): + for x_min, x_max in self.intervals: + ret += self._range_shift_uniq(x_min, x_max, shift, operation) + return self.__class__(self.size, ret) + + def _range_rotate_uniq(self, x_min, x_max, shift, op): + """Bounds interval for x @op @shift with + - x of size self.size + - @x_min <= x <= @x_max + - operations are considered unsigned + - shift <= self.size + """ + assert shift <= self.size + # Divide in sub-operations: a op b: a left b | a right (size - b) + if op == ">>>": + left, right = ">>", "<<" + elif op == "<<<": + left, right = "<<", ">>" + else: + raise ValueError("Not a rotator: %s" % op) + + left_intervals = self._range_shift_uniq(x_min, x_max, shift, left) + right_intervals = self._range_shift_uniq(x_min, x_max, + self.size - shift, right) + + result = self.__class__(self.size, left_intervals) | self.__class__(self.size, right_intervals) + return result.intervals + + def _interval_rotate(self, operation, shifter): + """Apply the rotate operation @operation with a shifting + ModularIntervals @shifter on the current instance""" + # Consider only rotation without repetition, and enumerate + # -> apply a '% size' on shifter + shifter %= self.size + ret = interval() + for shift_range in shifter: + for shift in range(shift_range[0], shift_range[1] + 1): + for x_min, x_max in self.intervals: + ret += self._range_rotate_uniq(x_min, x_max, shift, + operation) + + return self.__class__(self.size, ret) + + # Operation wrappers + + @_promote + def __add__(self, to_add): + """Add @to_add to the current intervals + @to_add: ModularInstances or integer + """ + return self._interval_add(to_add) + + @_promote + def __or__(self, to_or): + """Bitwise OR @to_or to the current intervals + @to_or: ModularInstances or integer + """ + return self._interval_or(to_or) + + @_promote + def __and__(self, to_and): + """Bitwise AND @to_and to the current intervals + @to_and: ModularInstances or integer + """ + return self._interval_and(to_and) + + @_promote + def __xor__(self, to_xor): + """Bitwise XOR @to_xor to the current intervals + @to_xor: ModularInstances or integer + """ + return self._interval_xor(to_xor) + + @_promote + def __mul__(self, to_mul): + """Multiply @to_mul to the current intervals + @to_mul: ModularInstances or integer + """ + return self._interval_mul(to_mul) + + @_promote + def __rshift__(self, to_shift): + """Logical shift right the current intervals of @to_shift + @to_shift: ModularInstances or integer + """ + return self._interval_shift('>>', to_shift) + + @_promote + def __lshift__(self, to_shift): + """Logical shift left the current intervals of @to_shift + @to_shift: ModularInstances or integer + """ + return self._interval_shift('<<', to_shift) + + @_promote + def arithmetic_shift_right(self, to_shift): + """Arithmetic shift right the current intervals of @to_shift + @to_shift: ModularInstances or integer + """ + return self._interval_shift('a>>', to_shift) + + def __neg__(self): + """Negate the current intervals""" + return self._interval_minus() + + def __mod__(self, modulo): + """Apply % @modulo on the current intervals + @modulo: integer + """ + + if not isinstance(modulo, int_types): + raise TypeError("Modulo with %s is not supported" % modulo.__class__) + return self._integer_modulo(modulo) + + @_promote + def rotation_right(self, to_rotate): + """Right rotate the current intervals of @to_rotate + @to_rotate: ModularInstances or integer + """ + return self._interval_rotate('>>>', to_rotate) + + @_promote + def rotation_left(self, to_rotate): + """Left rotate the current intervals of @to_rotate + @to_rotate: ModularInstances or integer + """ + return self._interval_rotate('<<<', to_rotate) + + # Instance operations + + @property + def mask(self): + """Return the mask corresponding to the instance size""" + return ModularIntervals.size2mask(self.size) + + def __iter__(self): + return iter(self.intervals) + + @property + def length(self): + return self.intervals.length + + def __contains__(self, other): + if isinstance(other, ModularIntervals): + other = other.intervals + return other in self.intervals + + def __str__(self): + return "%s (Size: %s)" % (self.intervals, self.size) + + def size_update(self, new_size): + """Update the instance size to @new_size + The size of elements must be <= @new_size""" + + # Increasing size is always safe + if new_size < self.size: + # Check that current values are indeed included in the new range + assert self.intervals.hull()[1] <= ModularIntervals.size2mask(new_size) + + self.size = new_size + + # For easy chainning + return self + + # Mimic Python's set operations + + @_promote + def union(self, to_union): + """Union set operation with @to_union + @to_union: ModularIntervals instance""" + return ModularIntervals(self.size, self.intervals + to_union.intervals) + + @_promote + def update(self, to_union): + """Union set operation in-place with @to_union + @to_union: ModularIntervals instance""" + self.intervals += to_union.intervals + + @_promote + def intersection(self, to_intersect): + """Intersection set operation with @to_intersect + @to_intersect: ModularIntervals instance""" + return ModularIntervals(self.size, self.intervals & to_intersect.intervals) + + @_promote + def intersection_update(self, to_intersect): + """Intersection set operation in-place with @to_intersect + @to_intersect: ModularIntervals instance""" + self.intervals &= to_intersect.intervals diff --git a/miasm/analysis/outofssa.py b/miasm/analysis/outofssa.py new file mode 100644 index 00000000..497956be --- /dev/null +++ b/miasm/analysis/outofssa.py @@ -0,0 +1,413 @@ +from future.utils import viewitems, viewvalues + +from miasm.expression.expression import ExprId +from miasm.ir.ir import IRBlock, AssignBlock +from miasm.analysis.ssa import get_phi_sources_parent_block, \ + irblock_has_phi + + +class Varinfo(object): + """Store liveness information for a variable""" + __slots__ = ["live_index", "loc_key", "index"] + + def __init__(self, live_index, loc_key, index): + self.live_index = live_index + self.loc_key = loc_key + self.index = index + + +class UnSSADiGraph(object): + """ + Implements unssa algorithm + Revisiting Out-of-SSA Translation for Correctness, Code Quality, and + Efficiency + """ + + def __init__(self, ssa, head, cfg_liveness): + self.cfg_liveness = cfg_liveness + self.ssa = ssa + self.head = head + + # Set of created variables + self.copy_vars = set() + # Virtual parallel copies + + # On loc_key's Phi node dst -> set((parent, src)) + self.phi_parent_sources = {} + # On loc_key's Phi node, loc_key -> set(Phi dsts) + self.phi_destinations = {} + # Phi's dst -> new var + self.phi_new_var = {} + # For a new_var representing dst: + # new_var -> set(parents of Phi's src in dst = Phi(src,...)) + self.new_var_to_srcs_parents = {} + # new_var -> set(variables to be coalesced with, named "merge_set") + self.merge_state = {} + + # Launch the algorithm in several steps + self.isolate_phi_nodes_block() + self.init_phis_merge_state() + self.order_ssa_var_dom() + self.aggressive_coalesce_block() + self.insert_parallel_copy() + self.replace_merge_sets() + self.remove_assign_eq() + + def insert_parallel_copy(self): + """ + Naive Out-of-SSA from CSSA (without coalescing for now) + - Replace Phi + - Create room for parallel copies in Phi's parents + """ + ircfg = self.ssa.graph + + for irblock in list(viewvalues(ircfg.blocks)): + if not irblock_has_phi(irblock): + continue + + # Replace Phi with Phi's dst = new_var + parallel_copies = {} + for dst in self.phi_destinations[irblock.loc_key]: + new_var = self.phi_new_var[dst] + parallel_copies[dst] = new_var + + assignblks = list(irblock) + assignblks[0] = AssignBlock(parallel_copies, irblock[0].instr) + new_irblock = IRBlock(irblock.loc_key, assignblks) + ircfg.blocks[irblock.loc_key] = new_irblock + + # Insert new_var = src in each Phi's parent, at the end of the block + parent_to_parallel_copies = {} + parallel_copies = {} + for dst in irblock[0]: + new_var = self.phi_new_var[dst] + for parent, src in self.phi_parent_sources[dst]: + parent_to_parallel_copies.setdefault(parent, {})[new_var] = src + + for parent, parallel_copies in viewitems(parent_to_parallel_copies): + parent = ircfg.blocks[parent] + assignblks = list(parent) + assignblks.append(AssignBlock(parallel_copies, parent[-1].instr)) + new_irblock = IRBlock(parent.loc_key, assignblks) + ircfg.blocks[parent.loc_key] = new_irblock + + def create_copy_var(self, var): + """ + Generate a new var standing for @var + @var: variable to replace + """ + new_var = ExprId('var%d' % len(self.copy_vars), var.size) + self.copy_vars.add(new_var) + return new_var + + def isolate_phi_nodes_block(self): + """ + Init structures and virtually insert parallel copy before/after each phi + node + """ + ircfg = self.ssa.graph + for irblock in viewvalues(ircfg.blocks): + if not irblock_has_phi(irblock): + continue + for dst, sources in viewitems(irblock[0]): + assert sources.is_op('Phi') + new_var = self.create_copy_var(dst) + self.phi_new_var[dst] = new_var + + var_to_parents = get_phi_sources_parent_block( + self.ssa.graph, + irblock.loc_key, + sources.args + ) + + for src in sources.args: + parents = var_to_parents[src] + self.new_var_to_srcs_parents.setdefault(new_var, set()).update(parents) + for parent in parents: + self.phi_parent_sources.setdefault(dst, set()).add((parent, src)) + + self.phi_destinations[irblock.loc_key] = set(irblock[0]) + + def init_phis_merge_state(self): + """ + Generate trivial coalescing of phi variable and itself + """ + for phi_new_var in viewvalues(self.phi_new_var): + self.merge_state.setdefault(phi_new_var, set([phi_new_var])) + + def order_ssa_var_dom(self): + """Compute dominance order of each ssa variable""" + ircfg = self.ssa.graph + + # compute dominator tree + dominator_tree = ircfg.compute_dominator_tree(self.head) + + # variable -> Varinfo + self.var_to_varinfo = {} + # live_index can later be used to compare dominance of AssignBlocks + live_index = 0 + + # walk in DFS over the dominator tree + for loc_key in dominator_tree.walk_depth_first_forward(self.head): + irblock = ircfg.blocks[loc_key] + + # Create live index for phi new vars + # They do not exist in the graph yet, so index is set to None + if irblock_has_phi(irblock): + for dst in irblock[0]: + if not dst.is_id(): + continue + new_var = self.phi_new_var[dst] + self.var_to_varinfo[new_var] = Varinfo(live_index, loc_key, None) + + live_index += 1 + + # Create live index for remaining assignments + for index, assignblk in enumerate(irblock): + used = False + for dst in assignblk: + if not dst.is_id(): + continue + if dst in self.ssa.immutable_ids: + # Will not be considered by the current algo, ignore it + # (for instance, IRDst) + continue + + assert dst not in self.var_to_varinfo + self.var_to_varinfo[dst] = Varinfo(live_index, loc_key, index) + used = True + if used: + live_index += 1 + + + def ssa_def_dominates(self, node_a, node_b): + """ + Return living index order of @node_a and @node_b + @node_a: Varinfo instance + @node_b: Varinfo instance + """ + ret = self.var_to_varinfo[node_a].live_index <= self.var_to_varinfo[node_b].live_index + return ret + + def merge_set_sort(self, merge_set): + """ + Return a sorted list of (live_index, var) from @merge_set in dominance + order + @merge_set: set of coalescing variables + """ + return sorted( + (self.var_to_varinfo[var].live_index, var) + for var in merge_set + ) + + def ssa_def_is_live_at(self, node_a, node_b, parent): + """ + Return True if @node_a is live during @node_b definition + If @parent is None, this is a liveness test for a post phi variable; + Else, it is a liveness test for a variable source of the phi node + + @node_a: Varinfo instance + @node_b: Varinfo instance + @parent: Optional parent location of the phi source + """ + loc_key_b, index_b = self.var_to_varinfo[node_b].loc_key, self.var_to_varinfo[node_b].index + if parent and index_b is None: + index_b = 0 + if node_a not in self.new_var_to_srcs_parents: + # node_a is not a new var (it is a "classic" var) + # -> use a basic liveness test + liveness_b = self.cfg_liveness.blocks[loc_key_b].infos[index_b] + return node_a in liveness_b.var_out + + for def_loc_key in self.new_var_to_srcs_parents[node_a]: + # Consider node_a as defined at the end of its parents blocks + # and compute liveness check accordingly + + if def_loc_key == parent: + # Same path as node_a definition, so SSA ensure b cannot be live + # on this path (otherwise, a Phi would already happen earlier) + continue + liveness_end_block = self.cfg_liveness.blocks[def_loc_key].infos[-1] + if node_b in liveness_end_block.var_out: + return True + return False + + def merge_nodes_interfere(self, node_a, node_b, parent): + """ + Return True if @node_a and @node_b interfere + @node_a: variable + @node_b: variable + @parent: Optional parent location of the phi source for liveness tests + + Interference check is: is x live at y definition (or reverse) + TODO: add Value-based interference improvement + """ + if self.var_to_varinfo[node_a].live_index == self.var_to_varinfo[node_b].live_index: + # Defined in the same AssignBlock -> interfere + return True + + if self.var_to_varinfo[node_a].live_index < self.var_to_varinfo[node_b].live_index: + return self.ssa_def_is_live_at(node_a, node_b, parent) + return self.ssa_def_is_live_at(node_b, node_a, parent) + + def merge_sets_interfere(self, merge_a, merge_b, parent): + """ + Return True if no variable in @merge_a and @merge_b interferes. + + Implementation of "Algorithm 2: Check intersection in a set of variables" + + @merge_a: a dom ordered list of equivalent variables + @merge_b: a dom ordered list of equivalent variables + @parent: Optional parent location of the phi source for liveness tests + """ + if merge_a == merge_b: + # No need to consider interference if equal + return False + + merge_a_list = self.merge_set_sort(merge_a) + merge_b_list = self.merge_set_sort(merge_b) + dom = [] + while merge_a_list or merge_b_list: + if not merge_a_list: + _, current = merge_b_list.pop(0) + elif not merge_b_list: + _, current = merge_a_list.pop(0) + else: + # compare live_indexes (standing for dominance) + if merge_a_list[-1] < merge_b_list[-1]: + _, current = merge_a_list.pop(0) + else: + _, current = merge_b_list.pop(0) + while dom and not self.ssa_def_dominates(dom[-1], current): + dom.pop() + + # Don't test node in same merge_set + if ( + # Is stack not empty? + dom and + # Trivial non-interference if dom.top() and current come + # from the same merge set + not (dom[-1] in merge_a and current in merge_a) and + not (dom[-1] in merge_b and current in merge_b) and + # Actually test for interference + self.merge_nodes_interfere(current, dom[-1], parent) + ): + return True + dom.append(current) + return False + + def aggressive_coalesce_parallel_copy(self, parallel_copies, parent): + """ + Try to coalesce variables each dst/src couple together from + @parallel_copies + + @parallel_copies: a dictionary representing dst/src parallel + assignments. + @parent: Optional parent location of the phi source for liveness tests + """ + for dst, src in viewitems(parallel_copies): + dst_merge = self.merge_state.setdefault(dst, set([dst])) + src_merge = self.merge_state.setdefault(src, set([src])) + if not self.merge_sets_interfere(dst_merge, src_merge, parent): + dst_merge.update(src_merge) + for node in dst_merge: + self.merge_state[node] = dst_merge + + def aggressive_coalesce_block(self): + """Try to coalesce phi var with their pre/post variables""" + + ircfg = self.ssa.graph + + # Run coalesce on the post phi parallel copy + for irblock in viewvalues(ircfg.blocks): + if not irblock_has_phi(irblock): + continue + parallel_copies = {} + for dst in self.phi_destinations[irblock.loc_key]: + parallel_copies[dst] = self.phi_new_var[dst] + self.aggressive_coalesce_parallel_copy(parallel_copies, None) + + # Run coalesce on the pre phi parallel copy + + # Stand for the virtual parallel copies at the end of Phi's block + # parents + parent_to_parallel_copies = {} + for dst in irblock[0]: + new_var = self.phi_new_var[dst] + for parent, src in self.phi_parent_sources[dst]: + parent_to_parallel_copies.setdefault(parent, {})[new_var] = src + + for parent, parallel_copies in viewitems(parent_to_parallel_copies): + self.aggressive_coalesce_parallel_copy(parallel_copies, parent) + + def get_best_merge_set_name(self, merge_set): + """ + For a given @merge_set, prefer an original SSA variable instead of a + created copy. In other case, take a random name. + @merge_set: set of equivalent expressions + """ + if not merge_set: + raise RuntimeError("Merge set should not be empty") + for var in merge_set: + if var not in self.copy_vars: + return var + # Get random name + return var + + + def replace_merge_sets(self): + """ + In the graph, replace all variables from merge state by their + representative variable + """ + replace = {} + merge_sets = set() + + # Elect representative for merge sets + merge_set_to_name = {} + for merge_set in viewvalues(self.merge_state): + frozen_merge_set = frozenset(merge_set) + merge_sets.add(frozen_merge_set) + var_name = self.get_best_merge_set_name(merge_set) + merge_set_to_name[frozen_merge_set] = var_name + + # Generate replacement of variable by their representative + for merge_set in merge_sets: + var_name = merge_set_to_name[merge_set] + merge_set = list(merge_set) + for var in merge_set: + replace[var] = var_name + + self.ssa.graph.simplify(lambda x: x.replace_expr(replace)) + + def remove_phi(self): + """ + Remove phi operators in @ifcfg + @ircfg: IRDiGraph instance + """ + + for irblock in list(viewvalues(self.ssa.graph.blocks)): + assignblks = list(irblock) + out = {} + for dst, src in viewitems(assignblks[0]): + if src.is_op('Phi'): + assert set([dst]) == set(src.args) + continue + out[dst] = src + assignblks[0] = AssignBlock(out, assignblks[0].instr) + self.ssa.graph.blocks[irblock.loc_key] = IRBlock(irblock.loc_key, assignblks) + + def remove_assign_eq(self): + """ + Remove trivial expressions (a=a) in the current graph + """ + for irblock in list(viewvalues(self.ssa.graph.blocks)): + assignblks = list(irblock) + for i, assignblk in enumerate(assignblks): + out = {} + for dst, src in viewitems(assignblk): + if dst == src: + continue + out[dst] = src + assignblks[i] = AssignBlock(out, assignblk.instr) + self.ssa.graph.blocks[irblock.loc_key] = IRBlock(irblock.loc_key, assignblks) diff --git a/miasm/analysis/sandbox.py b/miasm/analysis/sandbox.py new file mode 100644 index 00000000..e5595071 --- /dev/null +++ b/miasm/analysis/sandbox.py @@ -0,0 +1,1026 @@ +from __future__ import print_function +from builtins import range + +import os +import logging +from argparse import ArgumentParser + +from future.utils import viewitems, viewvalues + +from miasm.core.utils import force_bytes +from miasm.analysis.machine import Machine +from miasm.jitter.csts import PAGE_READ, PAGE_WRITE +from miasm.analysis import debugging +from miasm.jitter.jitload import log_func + + + +class Sandbox(object): + + """ + Parent class for Sandbox abstraction + """ + + CALL_FINISH_ADDR = 0x13371acc + + @staticmethod + def code_sentinelle(jitter): + jitter.run = False + return False + + @classmethod + def _classes_(cls): + """ + Iterator on parent classes except Sanbox + """ + for base_cls in cls.__bases__: + # Avoid infinite loop + if base_cls == Sandbox: + continue + + yield base_cls + + classes = property(lambda x: x.__class__._classes_()) + + def __init__(self, fname, options, custom_methods=None, **kwargs): + """ + Initialize a sandbox + @fname: str file name + @options: namespace instance of specific options + @custom_methods: { str => func } for custom API implementations + """ + + # Initialize + self.fname = fname + self.options = options + if custom_methods is None: + custom_methods = {} + for cls in self.classes: + if cls == Sandbox: + continue + if issubclass(cls, OS): + cls.__init__(self, custom_methods, **kwargs) + else: + cls.__init__(self, **kwargs) + + # Logging options + self.jitter.set_trace_log( + trace_instr=self.options.singlestep, + trace_regs=self.options.singlestep, + trace_new_blocks=self.options.dumpblocs + ) + + if not self.options.quiet_function_calls: + log_func.setLevel(logging.INFO) + + @classmethod + def parser(cls, *args, **kwargs): + """ + Return instance of instance parser with expecting options. + Extra parameters are passed to parser initialisation. + """ + + parser = ArgumentParser(*args, **kwargs) + parser.add_argument('-a', "--address", + help="Force entry point address", default=None) + parser.add_argument('-b', "--dumpblocs", action="store_true", + help="Log disasm blocks") + parser.add_argument('-z', "--singlestep", action="store_true", + help="Log single step") + parser.add_argument('-d', "--debugging", action="store_true", + help="Debug shell") + parser.add_argument('-g', "--gdbserver", type=int, + help="Listen on port @port") + parser.add_argument("-j", "--jitter", + help="Jitter engine. Possible values are: gcc (default), llvm, python", + default="gcc") + parser.add_argument( + '-q', "--quiet-function-calls", action="store_true", + help="Don't log function calls") + parser.add_argument('-i', "--dependencies", action="store_true", + help="Load PE and its dependencies") + + for base_cls in cls._classes_(): + base_cls.update_parser(parser) + return parser + + def run(self, addr=None): + """ + Launch emulation (gdbserver, debugging, basic JIT). + @addr: (int) start address + """ + if addr is None and self.options.address is not None: + addr = int(self.options.address, 0) + + if any([self.options.debugging, self.options.gdbserver]): + dbg = debugging.Debugguer(self.jitter) + self.dbg = dbg + dbg.init_run(addr) + + if self.options.gdbserver: + port = self.options.gdbserver + print("Listen on port %d" % port) + gdb = self.machine.gdbserver(dbg, port) + self.gdb = gdb + gdb.run() + else: + cmd = debugging.DebugCmd(dbg) + self.cmd = cmd + cmd.cmdloop() + + else: + self.jitter.init_run(addr) + self.jitter.continue_run() + + def call(self, prepare_cb, addr, *args): + """ + Direct call of the function at @addr, with arguments @args prepare in + calling convention implemented by @prepare_cb + @prepare_cb: func(ret_addr, *args) + @addr: address of the target function + @args: arguments + """ + self.jitter.init_run(addr) + self.jitter.add_breakpoint(self.CALL_FINISH_ADDR, self.code_sentinelle) + prepare_cb(self.CALL_FINISH_ADDR, *args) + self.jitter.continue_run() + + + +class OS(object): + + """ + Parent class for OS abstraction + """ + + def __init__(self, custom_methods, **kwargs): + pass + + @classmethod + def update_parser(cls, parser): + pass + + +class Arch(object): + + """ + Parent class for Arch abstraction + """ + + # Architecture name + _ARCH_ = None + + def __init__(self, **kwargs): + self.machine = Machine(self._ARCH_) + self.jitter = self.machine.jitter(self.options.jitter) + + @classmethod + def update_parser(cls, parser): + pass + + +class OS_Win(OS): + # DLL to import + ALL_IMP_DLL = ["ntdll.dll", "kernel32.dll", "user32.dll", + "ole32.dll", "urlmon.dll", + "ws2_32.dll", 'advapi32.dll', "psapi.dll", + ] + modules_path = "win_dll" + + def __init__(self, custom_methods, *args, **kwargs): + from miasm.jitter.loader.pe import vm_load_pe, vm_load_pe_libs,\ + preload_pe, libimp_pe, vm_load_pe_and_dependencies + from miasm.os_dep import win_api_x86_32, win_api_x86_32_seh + methods = dict((name.encode(),func) for name, func in viewitems(win_api_x86_32.__dict__)) + methods.update(custom_methods) + + super(OS_Win, self).__init__(methods, *args, **kwargs) + + # Import manager + libs = libimp_pe() + self.libs = libs + win_api_x86_32.winobjs.runtime_dll = libs + + self.name2module = {} + fname_basename = os.path.basename(self.fname).lower() + + # Load main pe + with open(self.fname, "rb") as fstream: + self.pe = vm_load_pe( + self.jitter.vm, + fstream.read(), + load_hdr=self.options.load_hdr, + name=self.fname, + **kwargs + ) + self.name2module[fname_basename] = self.pe + + # Load library + if self.options.loadbasedll: + + # Load libs in memory + self.name2module.update( + vm_load_pe_libs( + self.jitter.vm, + self.ALL_IMP_DLL, + libs, + self.modules_path, + **kwargs + ) + ) + + # Patch libs imports + for pe in viewvalues(self.name2module): + preload_pe(self.jitter.vm, pe, libs) + + if self.options.dependencies: + vm_load_pe_and_dependencies( + self.jitter.vm, + fname_basename, + self.name2module, + libs, + self.modules_path, + **kwargs + ) + + win_api_x86_32.winobjs.current_pe = self.pe + + # Fix pe imports + preload_pe(self.jitter.vm, self.pe, libs) + + # Library calls handler + self.jitter.add_lib_handler(libs, methods) + + # Manage SEH + if self.options.use_windows_structs: + win_api_x86_32_seh.main_pe_name = fname_basename + win_api_x86_32_seh.main_pe = self.pe + win_api_x86_32.winobjs.hcurmodule = self.pe.NThdr.ImageBase + win_api_x86_32_seh.name2module = self.name2module + win_api_x86_32_seh.set_win_fs_0(self.jitter) + win_api_x86_32_seh.init_seh(self.jitter) + + self.entry_point = self.pe.rva2virt( + self.pe.Opthdr.AddressOfEntryPoint) + + @classmethod + def update_parser(cls, parser): + parser.add_argument('-o', "--load-hdr", action="store_true", + help="Load pe hdr") + parser.add_argument('-y', "--use-windows-structs", action="store_true", + help="Create and use windows structures (peb, ldr, seh, ...)") + parser.add_argument('-l', "--loadbasedll", action="store_true", + help="Load base dll (path './win_dll')") + parser.add_argument('-r', "--parse-resources", + action="store_true", help="Load resources") + + +class OS_Linux(OS): + + PROGRAM_PATH = "./program" + + def __init__(self, custom_methods, *args, **kwargs): + from miasm.jitter.loader.elf import vm_load_elf, preload_elf, libimp_elf + from miasm.os_dep import linux_stdlib + methods = linux_stdlib.__dict__ + methods.update(custom_methods) + + super(OS_Linux, self).__init__(methods, *args, **kwargs) + + # Import manager + self.libs = libimp_elf() + + with open(self.fname, "rb") as fstream: + self.elf = vm_load_elf( + self.jitter.vm, + fstream.read(), + name=self.fname, + **kwargs + ) + preload_elf(self.jitter.vm, self.elf, self.libs) + + self.entry_point = self.elf.Ehdr.entry + + # Library calls handler + self.jitter.add_lib_handler(self.libs, methods) + linux_stdlib.ABORT_ADDR = self.CALL_FINISH_ADDR + + # Arguments + self.argv = [self.PROGRAM_PATH] + if self.options.command_line: + self.argv += self.options.command_line + self.envp = self.options.environment_vars + + @classmethod + def update_parser(cls, parser): + parser.add_argument('-c', '--command-line', + action="append", + default=[], + help="Command line arguments") + parser.add_argument('--environment-vars', + action="append", + default=[], + help="Environment variables arguments") + parser.add_argument('--mimic-env', + action="store_true", + help="Mimic the environment of a starting executable") + +class OS_Linux_str(OS): + + PROGRAM_PATH = "./program" + + def __init__(self, custom_methods, *args, **kwargs): + from miasm.jitter.loader.elf import libimp_elf + from miasm.os_dep import linux_stdlib + methods = linux_stdlib.__dict__ + methods.update(custom_methods) + + super(OS_Linux_str, self).__init__(methods, *args, **kwargs) + + # Import manager + libs = libimp_elf() + self.libs = libs + + data = open(self.fname, "rb").read() + self.options.load_base_addr = int(self.options.load_base_addr, 0) + self.jitter.vm.add_memory_page( + self.options.load_base_addr, PAGE_READ | PAGE_WRITE, data, + "Initial Str" + ) + + # Library calls handler + self.jitter.add_lib_handler(libs, methods) + linux_stdlib.ABORT_ADDR = self.CALL_FINISH_ADDR + + # Arguments + self.argv = [self.PROGRAM_PATH] + if self.options.command_line: + self.argv += self.options.command_line + self.envp = self.options.environment_vars + + @classmethod + def update_parser(cls, parser): + parser.add_argument('-c', '--command-line', + action="append", + default=[], + help="Command line arguments") + parser.add_argument('--environment-vars', + action="append", + default=[], + help="Environment variables arguments") + parser.add_argument('--mimic-env', + action="store_true", + help="Mimic the environment of a starting executable") + parser.add_argument("load_base_addr", help="load base address") + + +class Arch_x86(Arch): + _ARCH_ = None # Arch name + STACK_SIZE = 0x10000 + STACK_BASE = 0x130000 + + def __init__(self, **kwargs): + super(Arch_x86, self).__init__(**kwargs) + + if self.options.usesegm: + self.jitter.ir_arch.do_stk_segm = True + self.jitter.ir_arch.do_ds_segm = True + self.jitter.ir_arch.do_str_segm = True + self.jitter.ir_arch.do_all_segm = True + + # Init stack + self.jitter.stack_size = self.STACK_SIZE + self.jitter.stack_base = self.STACK_BASE + self.jitter.init_stack() + + @classmethod + def update_parser(cls, parser): + parser.add_argument('-s', "--usesegm", action="store_true", + help="Use segments") + + +class Arch_x86_32(Arch_x86): + _ARCH_ = "x86_32" + + +class Arch_x86_64(Arch_x86): + _ARCH_ = "x86_64" + + +class Arch_arml(Arch): + _ARCH_ = "arml" + STACK_SIZE = 0x100000 + STACK_BASE = 0x100000 + + def __init__(self, **kwargs): + super(Arch_arml, self).__init__(**kwargs) + + # Init stack + self.jitter.stack_size = self.STACK_SIZE + self.jitter.stack_base = self.STACK_BASE + self.jitter.init_stack() + + +class Arch_armb(Arch): + _ARCH_ = "armb" + STACK_SIZE = 0x100000 + STACK_BASE = 0x100000 + + def __init__(self, **kwargs): + super(Arch_armb, self).__init__(**kwargs) + + # Init stack + self.jitter.stack_size = self.STACK_SIZE + self.jitter.stack_base = self.STACK_BASE + self.jitter.init_stack() + + +class Arch_armtl(Arch): + _ARCH_ = "armtl" + STACK_SIZE = 0x100000 + STACK_BASE = 0x100000 + + def __init__(self, **kwargs): + super(Arch_armtl, self).__init__(**kwargs) + + # Init stack + self.jitter.stack_size = self.STACK_SIZE + self.jitter.stack_base = self.STACK_BASE + self.jitter.init_stack() + + +class Arch_mips32b(Arch): + _ARCH_ = "mips32b" + STACK_SIZE = 0x100000 + STACK_BASE = 0x100000 + + def __init__(self, **kwargs): + super(Arch_mips32b, self).__init__(**kwargs) + + # Init stack + self.jitter.stack_size = self.STACK_SIZE + self.jitter.stack_base = self.STACK_BASE + self.jitter.init_stack() + + +class Arch_aarch64l(Arch): + _ARCH_ = "aarch64l" + STACK_SIZE = 0x100000 + STACK_BASE = 0x100000 + + def __init__(self, **kwargs): + super(Arch_aarch64l, self).__init__(**kwargs) + + # Init stack + self.jitter.stack_size = self.STACK_SIZE + self.jitter.stack_base = self.STACK_BASE + self.jitter.init_stack() + + +class Arch_aarch64b(Arch): + _ARCH_ = "aarch64b" + STACK_SIZE = 0x100000 + STACK_BASE = 0x100000 + + def __init__(self, **kwargs): + super(Arch_aarch64b, self).__init__(**kwargs) + + # Init stack + self.jitter.stack_size = self.STACK_SIZE + self.jitter.stack_base = self.STACK_BASE + self.jitter.init_stack() + +class Arch_ppc(Arch): + _ARCH_ = None + +class Arch_ppc32(Arch): + _ARCH_ = None + +class Arch_ppc32b(Arch_ppc32): + _ARCH_ = "ppc32b" + +class Sandbox_Win_x86_32(Sandbox, Arch_x86_32, OS_Win): + + def __init__(self, *args, **kwargs): + Sandbox.__init__(self, *args, **kwargs) + + # Pre-stack some arguments + self.jitter.push_uint32_t(2) + self.jitter.push_uint32_t(1) + self.jitter.push_uint32_t(0) + self.jitter.push_uint32_t(self.CALL_FINISH_ADDR) + + # Set the runtime guard + self.jitter.add_breakpoint(self.CALL_FINISH_ADDR, self.__class__.code_sentinelle) + + def run(self, addr=None): + """ + If addr is not set, use entrypoint + """ + if addr is None and self.options.address is None: + addr = self.entry_point + super(Sandbox_Win_x86_32, self).run(addr) + + def call(self, addr, *args, **kwargs): + """ + Direct call of the function at @addr, with arguments @args + @addr: address of the target function + @args: arguments + """ + prepare_cb = kwargs.pop('prepare_cb', self.jitter.func_prepare_stdcall) + super(self.__class__, self).call(prepare_cb, addr, *args) + + +class Sandbox_Win_x86_64(Sandbox, Arch_x86_64, OS_Win): + + def __init__(self, *args, **kwargs): + Sandbox.__init__(self, *args, **kwargs) + + # reserve stack for local reg + for _ in range(0x4): + self.jitter.push_uint64_t(0) + + # Pre-stack return address + self.jitter.push_uint64_t(self.CALL_FINISH_ADDR) + + # Set the runtime guard + self.jitter.add_breakpoint( + self.CALL_FINISH_ADDR, + self.__class__.code_sentinelle + ) + + def run(self, addr=None): + """ + If addr is not set, use entrypoint + """ + if addr is None and self.options.address is None: + addr = self.entry_point + super(Sandbox_Win_x86_64, self).run(addr) + + def call(self, addr, *args, **kwargs): + """ + Direct call of the function at @addr, with arguments @args + @addr: address of the target function + @args: arguments + """ + prepare_cb = kwargs.pop('prepare_cb', self.jitter.func_prepare_stdcall) + super(self.__class__, self).call(prepare_cb, addr, *args) + + +class Sandbox_Linux_x86_32(Sandbox, Arch_x86_32, OS_Linux): + + def __init__(self, *args, **kwargs): + Sandbox.__init__(self, *args, **kwargs) + + # Pre-stack some arguments + if self.options.mimic_env: + env_ptrs = [] + for env in self.envp: + env = force_bytes(env) + env += b"\x00" + self.jitter.cpu.ESP -= len(env) + ptr = self.jitter.cpu.ESP + self.jitter.vm.set_mem(ptr, env) + env_ptrs.append(ptr) + argv_ptrs = [] + for arg in self.argv: + arg = force_bytes(arg) + arg += b"\x00" + self.jitter.cpu.ESP -= len(arg) + ptr = self.jitter.cpu.ESP + self.jitter.vm.set_mem(ptr, arg) + argv_ptrs.append(ptr) + + self.jitter.push_uint32_t(self.CALL_FINISH_ADDR) + self.jitter.push_uint32_t(0) + for ptr in reversed(env_ptrs): + self.jitter.push_uint32_t(ptr) + self.jitter.push_uint32_t(0) + for ptr in reversed(argv_ptrs): + self.jitter.push_uint32_t(ptr) + self.jitter.push_uint32_t(len(self.argv)) + else: + self.jitter.push_uint32_t(self.CALL_FINISH_ADDR) + + # Set the runtime guard + self.jitter.add_breakpoint( + self.CALL_FINISH_ADDR, + self.__class__.code_sentinelle + ) + + def run(self, addr=None): + """ + If addr is not set, use entrypoint + """ + if addr is None and self.options.address is None: + addr = self.entry_point + super(Sandbox_Linux_x86_32, self).run(addr) + + def call(self, addr, *args, **kwargs): + """ + Direct call of the function at @addr, with arguments @args + @addr: address of the target function + @args: arguments + """ + prepare_cb = kwargs.pop('prepare_cb', self.jitter.func_prepare_systemv) + super(self.__class__, self).call(prepare_cb, addr, *args) + + + +class Sandbox_Linux_x86_64(Sandbox, Arch_x86_64, OS_Linux): + + def __init__(self, *args, **kwargs): + Sandbox.__init__(self, *args, **kwargs) + + # Pre-stack some arguments + if self.options.mimic_env: + env_ptrs = [] + for env in self.envp: + env = force_bytes(env) + env += b"\x00" + self.jitter.cpu.RSP -= len(env) + ptr = self.jitter.cpu.RSP + self.jitter.vm.set_mem(ptr, env) + env_ptrs.append(ptr) + argv_ptrs = [] + for arg in self.argv: + arg = force_bytes(arg) + arg += b"\x00" + self.jitter.cpu.RSP -= len(arg) + ptr = self.jitter.cpu.RSP + self.jitter.vm.set_mem(ptr, arg) + argv_ptrs.append(ptr) + + self.jitter.push_uint64_t(self.CALL_FINISH_ADDR) + self.jitter.push_uint64_t(0) + for ptr in reversed(env_ptrs): + self.jitter.push_uint64_t(ptr) + self.jitter.push_uint64_t(0) + for ptr in reversed(argv_ptrs): + self.jitter.push_uint64_t(ptr) + self.jitter.push_uint64_t(len(self.argv)) + else: + self.jitter.push_uint64_t(self.CALL_FINISH_ADDR) + + # Set the runtime guard + self.jitter.add_breakpoint( + self.CALL_FINISH_ADDR, + self.__class__.code_sentinelle + ) + + def run(self, addr=None): + """ + If addr is not set, use entrypoint + """ + if addr is None and self.options.address is None: + addr = self.entry_point + super(Sandbox_Linux_x86_64, self).run(addr) + + def call(self, addr, *args, **kwargs): + """ + Direct call of the function at @addr, with arguments @args + @addr: address of the target function + @args: arguments + """ + prepare_cb = kwargs.pop('prepare_cb', self.jitter.func_prepare_systemv) + super(self.__class__, self).call(prepare_cb, addr, *args) + + +class Sandbox_Linux_arml(Sandbox, Arch_arml, OS_Linux): + + def __init__(self, *args, **kwargs): + Sandbox.__init__(self, *args, **kwargs) + + # Pre-stack some arguments + if self.options.mimic_env: + env_ptrs = [] + for env in self.envp: + env = force_bytes(env) + env += b"\x00" + self.jitter.cpu.SP -= len(env) + ptr = self.jitter.cpu.SP + self.jitter.vm.set_mem(ptr, env) + env_ptrs.append(ptr) + argv_ptrs = [] + for arg in self.argv: + arg = force_bytes(arg) + arg += b"\x00" + self.jitter.cpu.SP -= len(arg) + ptr = self.jitter.cpu.SP + self.jitter.vm.set_mem(ptr, arg) + argv_ptrs.append(ptr) + + # Round SP to 4 + self.jitter.cpu.SP = self.jitter.cpu.SP & ~ 3 + + self.jitter.push_uint32_t(0) + for ptr in reversed(env_ptrs): + self.jitter.push_uint32_t(ptr) + self.jitter.push_uint32_t(0) + for ptr in reversed(argv_ptrs): + self.jitter.push_uint32_t(ptr) + self.jitter.push_uint32_t(len(self.argv)) + + self.jitter.cpu.LR = self.CALL_FINISH_ADDR + + # Set the runtime guard + self.jitter.add_breakpoint( + self.CALL_FINISH_ADDR, + self.__class__.code_sentinelle + ) + + def run(self, addr=None): + if addr is None and self.options.address is None: + addr = self.entry_point + super(Sandbox_Linux_arml, self).run(addr) + + def call(self, addr, *args, **kwargs): + """ + Direct call of the function at @addr, with arguments @args + @addr: address of the target function + @args: arguments + """ + prepare_cb = kwargs.pop('prepare_cb', self.jitter.func_prepare_systemv) + super(self.__class__, self).call(prepare_cb, addr, *args) + + +class Sandbox_Linux_armtl(Sandbox, Arch_armtl, OS_Linux): + + def __init__(self, *args, **kwargs): + Sandbox.__init__(self, *args, **kwargs) + + # Pre-stack some arguments + if self.options.mimic_env: + env_ptrs = [] + for env in self.envp: + env = force_bytes(env) + env += b"\x00" + self.jitter.cpu.SP -= len(env) + ptr = self.jitter.cpu.SP + self.jitter.vm.set_mem(ptr, env) + env_ptrs.append(ptr) + argv_ptrs = [] + for arg in self.argv: + arg = force_bytes(arg) + arg += b"\x00" + self.jitter.cpu.SP -= len(arg) + ptr = self.jitter.cpu.SP + self.jitter.vm.set_mem(ptr, arg) + argv_ptrs.append(ptr) + + # Round SP to 4 + self.jitter.cpu.SP = self.jitter.cpu.SP & ~ 3 + + self.jitter.push_uint32_t(0) + for ptr in reversed(env_ptrs): + self.jitter.push_uint32_t(ptr) + self.jitter.push_uint32_t(0) + for ptr in reversed(argv_ptrs): + self.jitter.push_uint32_t(ptr) + self.jitter.push_uint32_t(len(self.argv)) + + self.jitter.cpu.LR = self.CALL_FINISH_ADDR + + # Set the runtime guard + self.jitter.add_breakpoint( + self.CALL_FINISH_ADDR, + self.__class__.code_sentinelle + ) + + def run(self, addr=None): + if addr is None and self.options.address is None: + addr = self.entry_point + super(Sandbox_Linux_armtl, self).run(addr) + + def call(self, addr, *args, **kwargs): + """ + Direct call of the function at @addr, with arguments @args + @addr: address of the target function + @args: arguments + """ + prepare_cb = kwargs.pop('prepare_cb', self.jitter.func_prepare_systemv) + super(self.__class__, self).call(prepare_cb, addr, *args) + + + +class Sandbox_Linux_mips32b(Sandbox, Arch_mips32b, OS_Linux): + + def __init__(self, *args, **kwargs): + Sandbox.__init__(self, *args, **kwargs) + + # Pre-stack some arguments + if self.options.mimic_env: + env_ptrs = [] + for env in self.envp: + env = force_bytes(env) + env += b"\x00" + self.jitter.cpu.SP -= len(env) + ptr = self.jitter.cpu.SP + self.jitter.vm.set_mem(ptr, env) + env_ptrs.append(ptr) + argv_ptrs = [] + for arg in self.argv: + arg = force_bytes(arg) + arg += b"\x00" + self.jitter.cpu.SP -= len(arg) + ptr = self.jitter.cpu.SP + self.jitter.vm.set_mem(ptr, arg) + argv_ptrs.append(ptr) + + self.jitter.push_uint32_t(0) + for ptr in reversed(env_ptrs): + self.jitter.push_uint32_t(ptr) + self.jitter.push_uint32_t(0) + for ptr in reversed(argv_ptrs): + self.jitter.push_uint32_t(ptr) + self.jitter.push_uint32_t(len(self.argv)) + + self.jitter.cpu.RA = 0x1337beef + + # Set the runtime guard + self.jitter.add_breakpoint( + 0x1337beef, + self.__class__.code_sentinelle + ) + + def run(self, addr=None): + if addr is None and self.options.address is None: + addr = self.entry_point + super(Sandbox_Linux_mips32b, self).run(addr) + + def call(self, addr, *args, **kwargs): + """ + Direct call of the function at @addr, with arguments @args + @addr: address of the target function + @args: arguments + """ + prepare_cb = kwargs.pop('prepare_cb', self.jitter.func_prepare_systemv) + super(self.__class__, self).call(prepare_cb, addr, *args) + + +class Sandbox_Linux_armb_str(Sandbox, Arch_armb, OS_Linux_str): + + def __init__(self, *args, **kwargs): + Sandbox.__init__(self, *args, **kwargs) + + self.jitter.cpu.LR = self.CALL_FINISH_ADDR + + # Set the runtime guard + self.jitter.add_breakpoint(self.CALL_FINISH_ADDR, self.__class__.code_sentinelle) + + def run(self, addr=None): + if addr is None and self.options.address is not None: + addr = int(self.options.address, 0) + super(Sandbox_Linux_armb_str, self).run(addr) + + +class Sandbox_Linux_arml_str(Sandbox, Arch_arml, OS_Linux_str): + + def __init__(self, *args, **kwargs): + Sandbox.__init__(self, *args, **kwargs) + + self.jitter.cpu.LR = self.CALL_FINISH_ADDR + + # Set the runtime guard + self.jitter.add_breakpoint(self.CALL_FINISH_ADDR, self.__class__.code_sentinelle) + + def run(self, addr=None): + if addr is None and self.options.address is not None: + addr = int(self.options.address, 0) + super(Sandbox_Linux_arml_str, self).run(addr) + + +class Sandbox_Linux_aarch64l(Sandbox, Arch_aarch64l, OS_Linux): + + def __init__(self, *args, **kwargs): + Sandbox.__init__(self, *args, **kwargs) + + # Pre-stack some arguments + if self.options.mimic_env: + env_ptrs = [] + for env in self.envp: + env = force_bytes(env) + env += b"\x00" + self.jitter.cpu.SP -= len(env) + ptr = self.jitter.cpu.SP + self.jitter.vm.set_mem(ptr, env) + env_ptrs.append(ptr) + argv_ptrs = [] + for arg in self.argv: + arg = force_bytes(arg) + arg += b"\x00" + self.jitter.cpu.SP -= len(arg) + ptr = self.jitter.cpu.SP + self.jitter.vm.set_mem(ptr, arg) + argv_ptrs.append(ptr) + + self.jitter.push_uint64_t(0) + for ptr in reversed(env_ptrs): + self.jitter.push_uint64_t(ptr) + self.jitter.push_uint64_t(0) + for ptr in reversed(argv_ptrs): + self.jitter.push_uint64_t(ptr) + self.jitter.push_uint64_t(len(self.argv)) + + self.jitter.cpu.LR = self.CALL_FINISH_ADDR + + # Set the runtime guard + self.jitter.add_breakpoint( + self.CALL_FINISH_ADDR, + self.__class__.code_sentinelle + ) + + def run(self, addr=None): + if addr is None and self.options.address is None: + addr = self.entry_point + super(Sandbox_Linux_aarch64l, self).run(addr) + + def call(self, addr, *args, **kwargs): + """ + Direct call of the function at @addr, with arguments @args + @addr: address of the target function + @args: arguments + """ + prepare_cb = kwargs.pop('prepare_cb', self.jitter.func_prepare_systemv) + super(self.__class__, self).call(prepare_cb, addr, *args) + +class Sandbox_Linux_ppc32b(Sandbox, Arch_ppc32b, OS_Linux): + + STACK_SIZE = 0x10000 + STACK_BASE = 0xbfce0000 + + # The glue between the kernel and the ELF ABI on Linux/PowerPC is + # implemented in glibc/sysdeps/powerpc/powerpc32/dl-start.S, so we + # have to play the role of ld.so here. + def __init__(self, *args, **kwargs): + super(Sandbox_Linux_ppc32b, self).__init__(*args, **kwargs) + + # Init stack + self.jitter.stack_size = self.STACK_SIZE + self.jitter.stack_base = self.STACK_BASE + self.jitter.init_stack() + self.jitter.cpu.R1 -= 8 + + # Pre-stack some arguments + if self.options.mimic_env: + env_ptrs = [] + for env in self.envp: + env = force_bytes(env) + env += b"\x00" + self.jitter.cpu.R1 -= len(env) + ptr = self.jitter.cpu.R1 + self.jitter.vm.set_mem(ptr, env) + env_ptrs.append(ptr) + argv_ptrs = [] + for arg in self.argv: + arg = force_bytes(arg) + arg += b"\x00" + self.jitter.cpu.R1 -= len(arg) + ptr = self.jitter.cpu.R1 + self.jitter.vm.set_mem(ptr, arg) + argv_ptrs.append(ptr) + + self.jitter.push_uint32_t(0) + for ptr in reversed(env_ptrs): + self.jitter.push_uint32_t(ptr) + self.jitter.cpu.R5 = self.jitter.cpu.R1 # envp + self.jitter.push_uint32_t(0) + for ptr in reversed(argv_ptrs): + self.jitter.push_uint32_t(ptr) + self.jitter.cpu.R4 = self.jitter.cpu.R1 # argv + self.jitter.cpu.R3 = len(self.argv) # argc + self.jitter.push_uint32_t(self.jitter.cpu.R3) + + self.jitter.cpu.R6 = 0 # auxp + self.jitter.cpu.R7 = 0 # termination function + + # From the glibc, we should push a 0 here to distinguish a + # dynamically linked executable from a statically linked one. + # We actually do not do it and attempt to be somehow compatible + # with both types of executables. + #self.jitter.push_uint32_t(0) + + self.jitter.cpu.LR = self.CALL_FINISH_ADDR + + # Set the runtime guard + self.jitter.add_breakpoint( + self.CALL_FINISH_ADDR, + self.__class__.code_sentinelle + ) + + def run(self, addr=None): + """ + If addr is not set, use entrypoint + """ + if addr is None and self.options.address is None: + addr = self.entry_point + super(Sandbox_Linux_ppc32b, self).run(addr) + + def call(self, addr, *args, **kwargs): + """ + Direct call of the function at @addr, with arguments @args + @addr: address of the target function + @args: arguments + """ + prepare_cb = kwargs.pop('prepare_cb', self.jitter.func_prepare_systemv) + super(self.__class__, self).call(prepare_cb, addr, *args) diff --git a/miasm/analysis/simplifier.py b/miasm/analysis/simplifier.py new file mode 100644 index 00000000..870071c8 --- /dev/null +++ b/miasm/analysis/simplifier.py @@ -0,0 +1,303 @@ +""" +Apply simplification passes to an IR cfg +""" + +import logging +from functools import wraps +from miasm.analysis.ssa import SSADiGraph +from miasm.analysis.outofssa import UnSSADiGraph +from miasm.analysis.data_flow import DiGraphLivenessSSA +from miasm.expression.simplifications import expr_simp +from miasm.analysis.data_flow import dead_simp, \ + merge_blocks, remove_empty_assignblks, \ + PropagateExprIntThroughExprId, PropagateThroughExprId, \ + PropagateThroughExprMem, del_unused_edges + + +log = logging.getLogger("simplifier") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.WARNING) + + +def fix_point(func): + @wraps(func) + def ret_func(self, ircfg, head): + log.debug('[%s]: start', func.__name__) + has_been_modified = False + modified = True + while modified: + modified = func(self, ircfg, head) + has_been_modified |= modified + log.debug( + '[%s]: stop %r', + func.__name__, + has_been_modified + ) + return has_been_modified + return ret_func + + +class IRCFGSimplifier(object): + """ + Simplify an IRCFG + This class applies passes until reaching a fix point + """ + + def __init__(self, ir_arch): + self.ir_arch = ir_arch + self.init_passes() + + def init_passes(self): + """ + Init the array of simplification passes + """ + self.passes = [] + + @fix_point + def simplify(self, ircfg, head): + """ + Apply passes until reaching a fix point + Return True if the graph has been modified + + @ircfg: IRCFG instance to simplify + @head: Location instance of the ircfg head + """ + modified = False + for simplify_pass in self.passes: + modified |= simplify_pass(ircfg, head) + return modified + + def __call__(self, ircfg, head): + return self.simplify(ircfg, head) + + +class IRCFGSimplifierCommon(IRCFGSimplifier): + """ + Simplify an IRCFG + This class applies following passes until reaching a fix point: + - simplify_ircfg + - do_dead_simp_ircfg + """ + def __init__(self, ir_arch, expr_simp=expr_simp): + self.expr_simp = expr_simp + super(IRCFGSimplifierCommon, self).__init__(ir_arch) + + def init_passes(self): + self.passes = [ + self.simplify_ircfg, + self.do_dead_simp_ircfg, + ] + + @fix_point + def simplify_ircfg(self, ircfg, _head): + """ + Apply self.expr_simp on the @ircfg until reaching fix point + Return True if the graph has been modified + + @ircfg: IRCFG instance to simplify + """ + modified = ircfg.simplify(self.expr_simp) + return modified + + @fix_point + def do_dead_simp_ircfg(self, ircfg, head): + """ + Apply: + - dead_simp + - remove_empty_assignblks + - merge_blocks + on the @ircfg until reaching fix point + Return True if the graph has been modified + + @ircfg: IRCFG instance to simplify + @head: Location instance of the ircfg head + """ + modified = dead_simp(self.ir_arch, ircfg) + modified |= remove_empty_assignblks(ircfg) + modified |= merge_blocks(ircfg, set([head])) + return modified + + +class IRCFGSimplifierSSA(IRCFGSimplifierCommon): + """ + Simplify an IRCFG. + The IRCF is first transformed in SSA, then apply transformations passes + and apply out-of-ssa. Final passes of IRcfgSimplifier are applied + + This class apply following pass until reaching a fix point: + - do_propagate_int + - do_propagate_mem + - do_propagate_expr + - do_dead_simp_ssa + """ + + def __init__(self, ir_arch, expr_simp=expr_simp): + super(IRCFGSimplifierSSA, self).__init__(ir_arch, expr_simp) + + self.ir_arch.ssa_var = {} + self.all_ssa_vars = {} + + self.ssa_forbidden_regs = self.get_forbidden_regs() + + self.propag_int = PropagateExprIntThroughExprId() + self.propag_expr = PropagateThroughExprId() + self.propag_mem = PropagateThroughExprMem() + + def get_forbidden_regs(self): + """ + Return a set of immutable register during SSA transformation + """ + regs = set( + [ + self.ir_arch.pc, + self.ir_arch.IRDst, + self.ir_arch.arch.regs.exception_flags + ] + ) + return regs + + def init_passes(self): + """ + Init the array of simplification passes + """ + self.passes = [ + self.simplify_ssa, + self.do_propagate_int, + self.do_propagate_mem, + self.do_propagate_expr, + self.do_dead_simp_ssa, + ] + + def ircfg_to_ssa(self, ircfg, head): + """ + Apply the SSA transformation to @ircfg using it's @head + + @ircfg: IRCFG instance to simplify + @head: Location instance of the ircfg head + """ + ssa = SSADiGraph(ircfg) + ssa.immutable_ids.update(self.ssa_forbidden_regs) + ssa.ssa_variable_to_expr.update(self.all_ssa_vars) + ssa.transform(head) + self.all_ssa_vars.update(ssa.ssa_variable_to_expr) + self.ir_arch.ssa_var.update(ssa.ssa_variable_to_expr) + return ssa + + def ssa_to_unssa(self, ssa, head): + """ + Apply the out-of-ssa transformation to @ssa using it's @head + + @ssa: SSADiGraph instance + @head: Location instance of the graph head + """ + cfg_liveness = DiGraphLivenessSSA(ssa.graph) + cfg_liveness.init_var_info(self.ir_arch) + cfg_liveness.compute_liveness() + + UnSSADiGraph(ssa, head, cfg_liveness) + return ssa.graph + + @fix_point + def simplify_ssa(self, ssa, _head): + """ + Apply self.expr_simp on the @ssa.graph until reaching fix point + Return True if the graph has been modified + + @ssa: SSADiGraph instance + """ + modified = ssa.graph.simplify(self.expr_simp) + return modified + + @fix_point + def do_propagate_int(self, ssa, head): + """ + Constant propagation in the @ssa graph + @head: Location instance of the graph head + """ + modified = self.propag_int.propagate(ssa, head) + modified |= ssa.graph.simplify(self.expr_simp) + modified |= del_unused_edges(ssa.graph, set([head])) + return modified + + @fix_point + def do_propagate_mem(self, ssa, head): + """ + Propagation of expression based on ExprInt/ExprId in the @ssa graph + @head: Location instance of the graph head + """ + modified = self.propag_mem.propagate(ssa, head) + modified |= ssa.graph.simplify(self.expr_simp) + modified |= del_unused_edges(ssa.graph, set([head])) + return modified + + @fix_point + def do_propagate_expr(self, ssa, head): + """ + Expressions propagation through ExprId in the @ssa graph + @head: Location instance of the graph head + """ + modified = self.propag_expr.propagate(ssa, head) + modified |= ssa.graph.simplify(self.expr_simp) + modified |= del_unused_edges(ssa.graph, set([head])) + return modified + + @fix_point + def do_dead_simp_ssa(self, ssa, head): + """ + Apply: + - dead_simp + - remove_empty_assignblks + - del_unused_edges + - merge_blocks + on the @ircfg until reaching fix point + Return True if the graph has been modified + + @ircfg: IRCFG instance to simplify + @head: Location instance of the ircfg head + """ + modified = dead_simp(self.ir_arch, ssa.graph) + modified |= remove_empty_assignblks(ssa.graph) + modified |= del_unused_edges(ssa.graph, set([head])) + modified |= merge_blocks(ssa.graph, set([head])) + return modified + + def do_simplify(self, ssa, head): + """ + Apply passes until reaching a fix point + Return True if the graph has been modified + """ + return super(IRCFGSimplifierSSA, self).simplify(ssa, head) + + def do_simplify_loop(self, ssa, head): + """ + Apply do_simplify until reaching a fix point + SSA is updated between each do_simplify + Return True if the graph has been modified + """ + modified = True + while modified: + modified = self.do_simplify(ssa, head) + # Update ssa structs + ssa = self.ircfg_to_ssa(ssa.graph, head) + return ssa + + def simplify(self, ircfg, head): + """ + Apply SSA transformation to @ircfg + Apply passes until reaching a fix point + Apply out-of-ssa transformation + Apply post simplification passes + + Updated simplified IRCFG instance and return it + + @ircfg: IRCFG instance to simplify + @head: Location instance of the ircfg head + """ + ssa = self.ircfg_to_ssa(ircfg, head) + ssa = self.do_simplify_loop(ssa, head) + ircfg = self.ssa_to_unssa(ssa, head) + ircfg_simplifier = IRCFGSimplifierCommon(self.ir_arch) + ircfg_simplifier.simplify(ircfg, head) + return ircfg diff --git a/miasm/analysis/ssa.py b/miasm/analysis/ssa.py new file mode 100644 index 00000000..0aa24286 --- /dev/null +++ b/miasm/analysis/ssa.py @@ -0,0 +1,1118 @@ +from collections import deque +from future.utils import viewitems, viewvalues + +from miasm.expression.expression import ExprId, ExprAssign, ExprOp, \ + ExprLoc, get_expr_ids +from miasm.ir.ir import AssignBlock, IRBlock + + +def sanitize_graph_head(ircfg, head): + """ + In multiple algorithm, the @head of the ircfg may not have predecessors. + The function transform the @ircfg in order to ensure this property + @ircfg: IRCFG instance + @head: the location of the graph's head + """ + + if not ircfg.predecessors(head): + return + original_edges = ircfg.predecessors(head) + sub_head = ircfg.loc_db.add_location() + + # Duplicate graph, replacing references to head by sub_head + replaced_expr = { + ExprLoc(head, ircfg.IRDst.size): + ExprLoc(sub_head, ircfg.IRDst.size) + } + ircfg.simplify( + lambda expr:expr.replace_expr(replaced_expr) + ) + # Duplicate head block + ircfg.add_irblock(IRBlock(sub_head, list(ircfg.blocks[head]))) + + # Remove original head block + ircfg.del_node(head) + + for src in original_edges: + ircfg.add_edge(src, sub_head) + + # Create new head, jumping to sub_head + assignblk = AssignBlock({ircfg.IRDst:ExprLoc(sub_head, ircfg.IRDst.size)}) + new_irblock = IRBlock(head, [assignblk]) + ircfg.add_irblock(new_irblock) + + +class SSA(object): + """ + Generic class for static single assignment (SSA) transformation + + Handling of + - variable generation + - variable renaming + - conversion of an IRCFG block into SSA + + Variables will be renamed to ., whereby the + index will be increased in every definition of . + + Memory expressions are stateless. The addresses are in SSA form, + but memory aliasing will occur. For instance, if it holds + that RAX == RBX.0 + (-0x8) and + + @64[RBX.0 + (-0x8)] = RDX + RCX.0 = @64[RAX], + + then it cannot be tracked that RCX.0 == RDX. + """ + + + def __init__(self, ircfg): + """ + Initialises generic class for SSA + :param ircfg: instance of IRCFG + """ + # IRCFG instance + self.ircfg = ircfg + + # SSA blocks + self.blocks = {} + + # stack for RHS + self._stack_rhs = {} + # stack for LHS + self._stack_lhs = {} + + self.ssa_variable_to_expr = {} + + # dict of SSA expressions + self.expressions = {} + + # dict of SSA to original location + self.ssa_to_location = {} + + # Don't SSA IRDst + self.immutable_ids = set([self.ircfg.IRDst]) + + def get_regs(self, expr): + return get_expr_ids(expr) + + def transform(self, *args, **kwargs): + """Transforms into SSA""" + raise NotImplementedError("Abstract method") + + def get_block(self, loc_key): + """ + Returns an IRBlock + :param loc_key: LocKey instance + :return: IRBlock + """ + irblock = self.ircfg.blocks.get(loc_key, None) + + return irblock + + def reverse_variable(self, ssa_var): + """ + Transforms a variable in SSA form into non-SSA form + :param ssa_var: ExprId, variable in SSA form + :return: ExprId, variable in non-SSA form + """ + expr = self.ssa_variable_to_expr.get(ssa_var, ssa_var) + return expr + + def reset(self): + """Resets SSA transformation""" + self.blocks = {} + self.expressions = {} + self._stack_rhs = {} + self._stack_lhs = {} + self.ssa_to_location = {} + + def _gen_var_expr(self, expr, stack): + """ + Generates a variable expression in SSA form + :param expr: variable expression which will be translated + :param stack: self._stack_rhs or self._stack_lhs + :return: variable expression in SSA form + """ + index = stack[expr] + name = "%s.%d" % (expr.name, index) + ssa_var = ExprId(name, expr.size) + self.ssa_variable_to_expr[ssa_var] = expr + + return ssa_var + + def _transform_var_rhs(self, ssa_var): + """ + Transforms a variable on the right hand side into SSA + :param ssa_var: variable + :return: transformed variable + """ + # variable has never been on the LHS + if ssa_var not in self._stack_rhs: + return ssa_var + # variable has been on the LHS + stack = self._stack_rhs + return self._gen_var_expr(ssa_var, stack) + + def _transform_var_lhs(self, expr): + """ + Transforms a variable on the left hand side into SSA + :param expr: variable + :return: transformed variable + """ + # check if variable has already been on the LHS + if expr not in self._stack_lhs: + self._stack_lhs[expr] = 0 + # save last value for RHS transformation + self._stack_rhs[expr] = self._stack_lhs[expr] + + # generate SSA expression + stack = self._stack_lhs + ssa_var = self._gen_var_expr(expr, stack) + + return ssa_var + + def _transform_expression_lhs(self, dst): + """ + Transforms an expression on the left hand side into SSA + :param dst: expression + :return: expression in SSA form + """ + if dst.is_mem(): + # transform with last RHS instance + ssa_var = self._transform_expression_rhs(dst) + else: + # transform LHS + ssa_var = self._transform_var_lhs(dst) + + # increase SSA variable counter + self._stack_lhs[dst] += 1 + + return ssa_var + + def _transform_expression_rhs(self, src): + """ + Transforms an expression on the right hand side into SSA + :param src: expression + :return: expression in SSA form + """ + # dissect expression in variables + variables = self.get_regs(src) + src_ssa = src + # transform variables + for expr in variables: + ssa_var = self._transform_var_rhs(expr) + src_ssa = src_ssa.replace_expr({expr: ssa_var}) + + return src_ssa + + @staticmethod + def _parallel_instructions(assignblk): + """ + Extracts the instruction from a AssignBlock. + + Since instructions in a AssignBlock are evaluated + in parallel, memory instructions on the left hand + side will be inserted into the start of the list. + Then, memory instruction on the LHS will be + transformed firstly. + + :param assignblk: assignblock + :return: sorted list of expressions + """ + instructions = [] + for dst in assignblk: + # dst = src + aff = assignblk.dst2ExprAssign(dst) + # insert memory expression into start of list + if dst.is_mem(): + instructions.insert(0, aff) + else: + instructions.append(aff) + + return instructions + + @staticmethod + def _convert_block(irblock, ssa_list): + """ + Transforms an IRBlock inplace into SSA + :param irblock: IRBlock to be transformed + :param ssa_list: list of SSA expressions + """ + # iterator over SSA expressions + ssa_iter = iter(ssa_list) + new_irs = [] + # walk over IR blocks' assignblocks + for assignblk in irblock.assignblks: + # list of instructions + instructions = [] + # insert SSA instructions + for _ in assignblk: + instructions.append(next(ssa_iter)) + # replace instructions of assignblock in IRBlock + new_irs.append(AssignBlock(instructions, assignblk.instr)) + return IRBlock(irblock.loc_key, new_irs) + + def _rename_expressions(self, loc_key): + """ + Transforms variables and expressions + of an IRBlock into SSA. + + IR representations of an assembly instruction are evaluated + in parallel. Thus, RHS and LHS instructions will be performed + separately. + :param loc_key: IRBlock loc_key + """ + # list of IRBlock's SSA expressions + ssa_expressions_block = [] + + # retrieve IRBlock + irblock = self.get_block(loc_key) + if irblock is None: + # Incomplete graph + return + + # iterate block's IR expressions + for index, assignblk in enumerate(irblock.assignblks): + # list of parallel instructions + instructions = self._parallel_instructions(assignblk) + # list for transformed RHS expressions + rhs = deque() + + # transform RHS + for expr in instructions: + src = expr.src + src_ssa = self._transform_expression_rhs(src) + # save transformed RHS + rhs.append(src_ssa) + + # transform LHS + for expr in instructions: + if expr.dst in self.immutable_ids or expr.dst in self.ssa_variable_to_expr: + dst_ssa = expr.dst + else: + dst_ssa = self._transform_expression_lhs(expr.dst) + + # retrieve corresponding RHS expression + src_ssa = rhs.popleft() + + # rebuild SSA expression + expr = ExprAssign(dst_ssa, src_ssa) + self.expressions[dst_ssa] = src_ssa + self.ssa_to_location[dst_ssa] = (loc_key, index) + + + # append ssa expression to list + ssa_expressions_block.append(expr) + + # replace blocks IR expressions with corresponding SSA transformations + new_irblock = self._convert_block(irblock, ssa_expressions_block) + self.ircfg.blocks[loc_key] = new_irblock + + +class SSABlock(SSA): + """ + SSA transformation on block level + + It handles + - transformation of a single IRBlock into SSA + - reassembling an SSA expression into a non-SSA + expression through iterative resolving of the RHS + """ + + def transform(self, loc_key): + """ + Transforms a block into SSA form + :param loc_key: IRBlock loc_key + """ + self._rename_expressions(loc_key) + + def reassemble_expr(self, expr): + """ + Reassembles an expression in SSA form into a solely non-SSA expression + :param expr: expression + :return: non-SSA expression + """ + # worklist + todo = {expr.copy()} + + while todo: + # current expression + cur = todo.pop() + # RHS of current expression + cur_rhs = self.expressions[cur] + + # replace cur with RHS in expr + expr = expr.replace_expr({cur: cur_rhs}) + + # parse ExprIDs on RHS + ids_rhs = self.get_regs(cur_rhs) + + # add RHS ids to worklist + for id_rhs in ids_rhs: + if id_rhs in self.expressions: + todo.add(id_rhs) + return expr + + +class SSAPath(SSABlock): + """ + SSA transformation on path level + + It handles + - transformation of a path of IRBlocks into SSA + """ + + def transform(self, path): + """ + Transforms a path into SSA + :param path: list of IRBlock loc_key + """ + for block in path: + self._rename_expressions(block) + + +class SSADiGraph(SSA): + """ + SSA transformation on DiGraph level + + It handles + - transformation of a DiGraph into SSA + - generation, insertion and filling of phi nodes + + The implemented SSA form is known as minimal SSA. + """ + + PHI_STR = 'Phi' + + + def __init__(self, ircfg): + """ + Initialises SSA class for directed graphs + :param ircfg: instance of IRCFG + """ + super(SSADiGraph, self).__init__(ircfg) + + # variable definitions + self.defs = {} + + # dict of blocks' phi nodes + self._phinodes = {} + + # IRCFG control flow graph + self.graph = ircfg + + + def transform(self, head): + """Transforms into SSA""" + sanitize_graph_head(self.graph, head) + self._init_variable_defs(head) + self._place_phi(head) + self._rename(head) + self._insert_phi() + self._convert_phi() + self._fix_no_def_var(head) + + def reset(self): + """Resets SSA transformation""" + super(SSADiGraph, self).reset() + self.defs = {} + self._phinodes = {} + + def _init_variable_defs(self, head): + """ + Initialises all variable definitions and + assigns the corresponding IRBlocks. + + All variable definitions in self.defs contain + a set of IRBlocks in which the variable gets assigned + """ + + for loc_key in self.graph.walk_depth_first_forward(head): + irblock = self.get_block(loc_key) + if irblock is None: + # Incomplete graph + continue + + # search for block's IR definitions/destinations + for assignblk in irblock.assignblks: + for dst in assignblk: + # enforce ExprId + if dst.is_id(): + # exclude immutable ids + if dst in self.immutable_ids or dst in self.ssa_variable_to_expr: + continue + # map variable definition to blocks + self.defs.setdefault(dst, set()).add(irblock.loc_key) + + def _place_phi(self, head): + """ + For all blocks, empty phi functions will be placed for every + variable in the block's dominance frontier. + + self.phinodes contains a dict for every block in the + dominance frontier. In this dict, each variable + definition maps to its corresponding phi function. + + Source: Cytron, Ron, et al. + "An efficient method of computing static single assignment form" + Proceedings of the 16th ACM SIGPLAN-SIGACT symposium on + Principles of programming languages (1989), p. 30 + """ + # dominance frontier + frontier = self.graph.compute_dominance_frontier(head) + + for variable in self.defs: + done = set() + todo = set() + intodo = set() + + for loc_key in self.defs[variable]: + todo.add(loc_key) + intodo.add(loc_key) + + while todo: + loc_key = todo.pop() + + # walk through block's dominance frontier + for node in frontier.get(loc_key, []): + if node in done: + continue + # place empty phi functions for a variable + empty_phi = self._gen_empty_phi(variable) + + # add empty phi node for variable in node + self._phinodes.setdefault(node, {})[variable] = empty_phi.src + done.add(node) + + if node not in intodo: + intodo.add(node) + todo.add(node) + + def _gen_empty_phi(self, expr): + """ + Generates an empty phi function for a variable + :param expr: variable + :return: ExprAssign, empty phi function for expr + """ + phi = ExprId(self.PHI_STR, expr.size) + return ExprAssign(expr, phi) + + def _fill_phi(self, *args): + """ + Fills a phi function with variables. + + phi(x.1, x.5, x.6) + + :param args: list of ExprId + :return: ExprOp + """ + return ExprOp(self.PHI_STR, *set(args)) + + def _rename(self, head): + """ + Transforms each variable expression in the CFG into SSA + by traversing the dominator tree in depth-first search. + + 1. Transform variables of phi functions on LHS into SSA + 2. Transform all non-phi expressions into SSA + 3. Update the successor's phi functions' RHS with current SSA variables + 4. Save current SSA variable stack for successors in the dominator tree + + Source: Cytron, Ron, et al. + "An efficient method of computing static single assignment form" + Proceedings of the 16th ACM SIGPLAN-SIGACT symposium on + Principles of programming languages (1989), p. 31 + """ + # compute dominator tree + dominator_tree = self.graph.compute_dominator_tree(head) + + # init SSA variable stack + stack = [self._stack_rhs] + + # walk in DFS over the dominator tree + for loc_key in dominator_tree.walk_depth_first_forward(head): + # restore SSA variable stack of the predecessor in the dominator tree + self._stack_rhs = stack.pop().copy() + + # Transform variables of phi functions on LHS into SSA + self._rename_phi_lhs(loc_key) + + # Transform all non-phi expressions into SSA + self._rename_expressions(loc_key) + + # Update the successor's phi functions' RHS with current SSA variables + # walk over block's successors in the CFG + for successor in self.graph.successors_iter(loc_key): + self._rename_phi_rhs(successor) + + # Save current SSA variable stack for successors in the dominator tree + for _ in dominator_tree.successors_iter(loc_key): + stack.append(self._stack_rhs) + + def _rename_phi_lhs(self, loc_key): + """ + Transforms phi function's expressions of an IRBlock + on the left hand side into SSA + :param loc_key: IRBlock loc_key + """ + if loc_key in self._phinodes: + # create temporary list of phi function assignments for inplace renaming + tmp = list(self._phinodes[loc_key]) + + # iterate over all block's phi nodes + for dst in tmp: + # transform variables on LHS inplace + self._phinodes[loc_key][self._transform_expression_lhs(dst)] = self._phinodes[loc_key].pop(dst) + + def _rename_phi_rhs(self, successor): + """ + Transforms the right hand side of each successor's phi function + into SSA. Each transformed expression of a phi function's + right hand side is of the form + + phi(., ., ..., .) + + :param successor: loc_key of block's direct successor in the CFG + """ + # if successor is in block's dominance frontier + if successor in self._phinodes: + # walk over all variables on LHS + for dst, src in list(viewitems(self._phinodes[successor])): + # transform variable on RHS in non-SSA form + expr = self.reverse_variable(dst) + # transform expr into it's SSA form using current stack + src_ssa = self._transform_expression_rhs(expr) + + # Add src_ssa to phi args + if src.is_id(self.PHI_STR): + # phi function is empty + expr = self._fill_phi(src_ssa) + else: + # phi function contains at least one value + expr = self._fill_phi(src_ssa, *src.args) + + # update phi function + self._phinodes[successor][dst] = expr + + def _insert_phi(self): + """Inserts phi functions into the list of SSA expressions""" + for loc_key in self._phinodes: + for dst in self._phinodes[loc_key]: + self.expressions[dst] = self._phinodes[loc_key][dst] + + def _convert_phi(self): + """Inserts corresponding phi functions inplace + into IRBlock at the beginning""" + for loc_key in self._phinodes: + irblock = self.get_block(loc_key) + if irblock is None: + continue + assignblk = AssignBlock(self._phinodes[loc_key]) + # insert at the beginning + new_irs = IRBlock(loc_key, [assignblk] + list(irblock.assignblks)) + self.ircfg.blocks[loc_key] = new_irs + + def _fix_no_def_var(self, head): + """ + Replace phi source variables which are not ssa vars by ssa vars. + @head: loc_key of the graph head + """ + var_to_insert = set() + for loc_key in self._phinodes: + for dst, sources in viewitems(self._phinodes[loc_key]): + for src in sources.args: + if src in self.ssa_variable_to_expr: + continue + var_to_insert.add(src) + var_to_newname = {} + newname_to_var = {} + for var in var_to_insert: + new_var = self._transform_var_lhs(var) + var_to_newname[var] = new_var + newname_to_var[new_var] = var + + # Replace non modified node used in phi with new variable + self.ircfg.simplify(lambda expr:expr.replace_expr(var_to_newname)) + + if newname_to_var: + irblock = self.ircfg.blocks[head] + assignblks = list(irblock) + assignblks[0:0] = [AssignBlock(newname_to_var, assignblks[0].instr)] + self.ircfg.blocks[head] = IRBlock(head, assignblks) + + # Updt structure + for loc_key in self._phinodes: + for dst, sources in viewitems(self._phinodes[loc_key]): + self._phinodes[loc_key][dst] = sources.replace_expr(var_to_newname) + + for var, (loc_key, index) in list(viewitems(self.ssa_to_location)): + if loc_key == head: + self.ssa_to_location[var] = loc_key, index + 1 + + for newname, var in viewitems(newname_to_var): + self.ssa_to_location[newname] = head, 0 + self.ssa_variable_to_expr[newname] = var + self.expressions[newname] = var + + +def irblock_has_phi(irblock): + """ + Return True if @irblock has Phi assignments + @irblock: IRBlock instance + """ + if not irblock.assignblks: + return False + for src in viewvalues(irblock[0]): + return src.is_op('Phi') + return False + + +class Varinfo(object): + """Store liveness information for a variable""" + __slots__ = ["live_index", "loc_key", "index"] + + def __init__(self, live_index, loc_key, index): + self.live_index = live_index + self.loc_key = loc_key + self.index = index + + +def get_var_assignment_src(ircfg, node, variables): + """ + Return the variable of @variables which is written by the irblock at @node + @node: Location + @variables: a set of variable to test + """ + irblock = ircfg.blocks[node] + for assignblk in irblock: + result = set(assignblk).intersection(variables) + if not result: + continue + assert len(result) == 1 + return list(result)[0] + return None + + +def get_phi_sources_parent_block(ircfg, loc_key, sources): + """ + Return a dictionary linking a variable to it's direct parent label + which belong to a path which affects the node. + @loc_key: the starting node + @sources: set of variables to resolve + """ + source_to_parent = {} + for parent in ircfg.predecessors(loc_key): + done = set() + todo = set([parent]) + found = False + while todo: + node = todo.pop() + if node in done: + continue + done.add(node) + ret = get_var_assignment_src(ircfg, node, sources) + if ret: + source_to_parent.setdefault(ret, set()).add(parent) + found = True + break + for pred in ircfg.predecessors(node): + todo.add(pred) + assert found + return source_to_parent + + +class UnSSADiGraph(object): + """ + Implements unssa algorithm + Revisiting Out-of-SSA Translation for Correctness, Code Quality, and + Efficiency + """ + + def __init__(self, ssa, head, cfg_liveness): + self.cfg_liveness = cfg_liveness + self.ssa = ssa + self.head = head + + # Set of created variables + self.copy_vars = set() + # Virtual parallel copies + + # On loc_key's Phi node dst -> set((parent, src)) + self.phi_parent_sources = {} + # On loc_key's Phi node, loc_key -> set(Phi dsts) + self.phi_destinations = {} + # Phi's dst -> new var + self.phi_new_var = {} + # For a new_var representing dst: + # new_var -> set(parents of Phi's src in dst = Phi(src,...)) + self.new_var_to_srcs_parents = {} + # new_var -> set(variables to be coalesced with, named "merge_set") + self.merge_state = {} + + # Launch the algorithm in several steps + self.isolate_phi_nodes_block() + self.init_phis_merge_state() + self.order_ssa_var_dom() + self.aggressive_coalesce_block() + self.insert_parallel_copy() + self.replace_merge_sets() + self.remove_assign_eq() + + def insert_parallel_copy(self): + """ + Naive Out-of-SSA from CSSA (without coalescing for now) + - Replace Phi + - Create room for parallel copies in Phi's parents + """ + ircfg = self.ssa.graph + + for irblock in list(viewvalues(ircfg.blocks)): + if not irblock_has_phi(irblock): + continue + + # Replace Phi with Phi's dst = new_var + parallel_copies = {} + for dst in self.phi_destinations[irblock.loc_key]: + new_var = self.phi_new_var[dst] + parallel_copies[dst] = new_var + + assignblks = list(irblock) + assignblks[0] = AssignBlock(parallel_copies, irblock[0].instr) + new_irblock = IRBlock(irblock.loc_key, assignblks) + ircfg.blocks[irblock.loc_key] = new_irblock + + # Insert new_var = src in each Phi's parent, at the end of the block + parent_to_parallel_copies = {} + parallel_copies = {} + for dst in irblock[0]: + new_var = self.phi_new_var[dst] + for parent, src in self.phi_parent_sources[dst]: + parent_to_parallel_copies.setdefault(parent, {})[new_var] = src + + for parent, parallel_copies in viewitems(parent_to_parallel_copies): + parent = ircfg.blocks[parent] + assignblks = list(parent) + assignblks.append(AssignBlock(parallel_copies, parent[-1].instr)) + new_irblock = IRBlock(parent.loc_key, assignblks) + ircfg.blocks[parent.loc_key] = new_irblock + + def create_copy_var(self, var): + """ + Generate a new var standing for @var + @var: variable to replace + """ + new_var = ExprId('var%d' % len(self.copy_vars), var.size) + self.copy_vars.add(new_var) + return new_var + + def isolate_phi_nodes_block(self): + """ + Init structures and virtually insert parallel copy before/after each phi + node + """ + ircfg = self.ssa.graph + for irblock in viewvalues(ircfg.blocks): + if not irblock_has_phi(irblock): + continue + for dst, sources in viewitems(irblock[0]): + assert sources.is_op('Phi') + new_var = self.create_copy_var(dst) + self.phi_new_var[dst] = new_var + + var_to_parents = get_phi_sources_parent_block( + self.ssa.graph, + irblock.loc_key, + sources.args + ) + + for src in sources.args: + parents = var_to_parents[src] + self.new_var_to_srcs_parents.setdefault(new_var, set()).update(parents) + for parent in parents: + self.phi_parent_sources.setdefault(dst, set()).add((parent, src)) + + self.phi_destinations[irblock.loc_key] = set(irblock[0]) + + def init_phis_merge_state(self): + """ + Generate trivial coalescing of phi variable and itself + """ + for phi_new_var in viewvalues(self.phi_new_var): + self.merge_state.setdefault(phi_new_var, set([phi_new_var])) + + def order_ssa_var_dom(self): + """Compute dominance order of each ssa variable""" + ircfg = self.ssa.graph + + # compute dominator tree + dominator_tree = ircfg.compute_dominator_tree(self.head) + + # variable -> Varinfo + self.var_to_varinfo = {} + # live_index can later be used to compare dominance of AssignBlocks + live_index = 0 + + # walk in DFS over the dominator tree + for loc_key in dominator_tree.walk_depth_first_forward(self.head): + irblock = ircfg.blocks[loc_key] + + # Create live index for phi new vars + # They do not exist in the graph yet, so index is set to None + if irblock_has_phi(irblock): + for dst in irblock[0]: + if not dst.is_id(): + continue + new_var = self.phi_new_var[dst] + self.var_to_varinfo[new_var] = Varinfo(live_index, loc_key, None) + + live_index += 1 + + # Create live index for remaining assignments + for index, assignblk in enumerate(irblock): + used = False + for dst in assignblk: + if not dst.is_id(): + continue + if dst in self.ssa.immutable_ids: + # Will not be considered by the current algo, ignore it + # (for instance, IRDst) + continue + + assert dst not in self.var_to_varinfo + self.var_to_varinfo[dst] = Varinfo(live_index, loc_key, index) + used = True + if used: + live_index += 1 + + + def ssa_def_dominates(self, node_a, node_b): + """ + Return living index order of @node_a and @node_b + @node_a: Varinfo instance + @node_b: Varinfo instance + """ + ret = self.var_to_varinfo[node_a].live_index <= self.var_to_varinfo[node_b].live_index + return ret + + def merge_set_sort(self, merge_set): + """ + Return a sorted list of (live_index, var) from @merge_set in dominance + order + @merge_set: set of coalescing variables + """ + return sorted( + (self.var_to_varinfo[var].live_index, var) + for var in merge_set + ) + + def ssa_def_is_live_at(self, node_a, node_b, parent): + """ + Return True if @node_a is live during @node_b definition + If @parent is None, this is a liveness test for a post phi variable; + Else, it is a liveness test for a variable source of the phi node + + @node_a: Varinfo instance + @node_b: Varinfo instance + @parent: Optional parent location of the phi source + """ + loc_key_b, index_b = self.var_to_varinfo[node_b].loc_key, self.var_to_varinfo[node_b].index + if parent and index_b is None: + index_b = 0 + if node_a not in self.new_var_to_srcs_parents: + # node_a is not a new var (it is a "classic" var) + # -> use a basic liveness test + liveness_b = self.cfg_liveness.blocks[loc_key_b].infos[index_b] + return node_a in liveness_b.var_out + + for def_loc_key in self.new_var_to_srcs_parents[node_a]: + # Consider node_a as defined at the end of its parents blocks + # and compute liveness check accordingly + + if def_loc_key == parent: + # Same path as node_a definition, so SSA ensure b cannot be live + # on this path (otherwise, a Phi would already happen earlier) + continue + liveness_end_block = self.cfg_liveness.blocks[def_loc_key].infos[-1] + if node_b in liveness_end_block.var_out: + return True + return False + + def merge_nodes_interfere(self, node_a, node_b, parent): + """ + Return True if @node_a and @node_b interfere + @node_a: variable + @node_b: variable + @parent: Optional parent location of the phi source for liveness tests + + Interference check is: is x live at y definition (or reverse) + TODO: add Value-based interference improvement + """ + if self.var_to_varinfo[node_a].live_index == self.var_to_varinfo[node_b].live_index: + # Defined in the same AssignBlock -> interfere + return True + + if self.var_to_varinfo[node_a].live_index < self.var_to_varinfo[node_b].live_index: + return self.ssa_def_is_live_at(node_a, node_b, parent) + return self.ssa_def_is_live_at(node_b, node_a, parent) + + def merge_sets_interfere(self, merge_a, merge_b, parent): + """ + Return True if no variable in @merge_a and @merge_b interferes. + + Implementation of "Algorithm 2: Check intersection in a set of variables" + + @merge_a: a dom ordered list of equivalent variables + @merge_b: a dom ordered list of equivalent variables + @parent: Optional parent location of the phi source for liveness tests + """ + if merge_a == merge_b: + # No need to consider interference if equal + return False + + merge_a_list = self.merge_set_sort(merge_a) + merge_b_list = self.merge_set_sort(merge_b) + dom = [] + while merge_a_list or merge_b_list: + if not merge_a_list: + _, current = merge_b_list.pop(0) + elif not merge_b_list: + _, current = merge_a_list.pop(0) + else: + # compare live_indexes (standing for dominance) + if merge_a_list[-1] < merge_b_list[-1]: + _, current = merge_a_list.pop(0) + else: + _, current = merge_b_list.pop(0) + while dom and not self.ssa_def_dominates(dom[-1], current): + dom.pop() + + # Don't test node in same merge_set + if ( + # Is stack not empty? + dom and + # Trivial non-interference if dom.top() and current come + # from the same merge set + not (dom[-1] in merge_a and current in merge_a) and + not (dom[-1] in merge_b and current in merge_b) and + # Actually test for interference + self.merge_nodes_interfere(current, dom[-1], parent) + ): + return True + dom.append(current) + return False + + def aggressive_coalesce_parallel_copy(self, parallel_copies, parent): + """ + Try to coalesce variables each dst/src couple together from + @parallel_copies + + @parallel_copies: a dictionary representing dst/src parallel + assignments. + @parent: Optional parent location of the phi source for liveness tests + """ + for dst, src in viewitems(parallel_copies): + dst_merge = self.merge_state.setdefault(dst, set([dst])) + src_merge = self.merge_state.setdefault(src, set([src])) + if not self.merge_sets_interfere(dst_merge, src_merge, parent): + dst_merge.update(src_merge) + for node in dst_merge: + self.merge_state[node] = dst_merge + + def aggressive_coalesce_block(self): + """Try to coalesce phi var with their pre/post variables""" + + ircfg = self.ssa.graph + + # Run coalesce on the post phi parallel copy + for irblock in viewvalues(ircfg.blocks): + if not irblock_has_phi(irblock): + continue + parallel_copies = {} + for dst in self.phi_destinations[irblock.loc_key]: + parallel_copies[dst] = self.phi_new_var[dst] + self.aggressive_coalesce_parallel_copy(parallel_copies, None) + + # Run coalesce on the pre phi parallel copy + + # Stand for the virtual parallel copies at the end of Phi's block + # parents + parent_to_parallel_copies = {} + for dst in irblock[0]: + new_var = self.phi_new_var[dst] + for parent, src in self.phi_parent_sources[dst]: + parent_to_parallel_copies.setdefault(parent, {})[new_var] = src + + for parent, parallel_copies in viewitems(parent_to_parallel_copies): + self.aggressive_coalesce_parallel_copy(parallel_copies, parent) + + def get_best_merge_set_name(self, merge_set): + """ + For a given @merge_set, prefer an original SSA variable instead of a + created copy. In other case, take a random name. + @merge_set: set of equivalent expressions + """ + if not merge_set: + raise RuntimeError("Merge set should not be empty") + for var in merge_set: + if var not in self.copy_vars: + return var + # Get random name + return var + + + def replace_merge_sets(self): + """ + In the graph, replace all variables from merge state by their + representative variable + """ + replace = {} + merge_sets = set() + + # Elect representative for merge sets + merge_set_to_name = {} + for merge_set in viewvalues(self.merge_state): + frozen_merge_set = frozenset(merge_set) + merge_sets.add(frozen_merge_set) + var_name = self.get_best_merge_set_name(merge_set) + merge_set_to_name[frozen_merge_set] = var_name + + # Generate replacement of variable by their representative + for merge_set in merge_sets: + var_name = merge_set_to_name[merge_set] + merge_set = list(merge_set) + for var in merge_set: + replace[var] = var_name + + self.ssa.graph.simplify(lambda x: x.replace_expr(replace)) + + def remove_phi(self): + """ + Remove phi operators in @ifcfg + @ircfg: IRDiGraph instance + """ + + for irblock in list(viewvalues(self.ssa.graph.blocks)): + assignblks = list(irblock) + out = {} + for dst, src in viewitems(assignblks[0]): + if src.is_op('Phi'): + assert set([dst]) == set(src.args) + continue + out[dst] = src + assignblks[0] = AssignBlock(out, assignblks[0].instr) + self.ssa.graph.blocks[irblock.loc_key] = IRBlock(irblock.loc_key, assignblks) + + def remove_assign_eq(self): + """ + Remove trivial expressions (a=a) in the current graph + """ + for irblock in list(viewvalues(self.ssa.graph.blocks)): + assignblks = list(irblock) + for i, assignblk in enumerate(assignblks): + out = {} + for dst, src in viewitems(assignblk): + if dst == src: + continue + out[dst] = src + assignblks[i] = AssignBlock(out, assignblk.instr) + self.ssa.graph.blocks[irblock.loc_key] = IRBlock(irblock.loc_key, assignblks) diff --git a/miasm/arch/__init__.py b/miasm/arch/__init__.py new file mode 100644 index 00000000..78e2dd3c --- /dev/null +++ b/miasm/arch/__init__.py @@ -0,0 +1 @@ +"Architecture implementations" diff --git a/miasm/arch/aarch64/__init__.py b/miasm/arch/aarch64/__init__.py new file mode 100644 index 00000000..bbad893b --- /dev/null +++ b/miasm/arch/aarch64/__init__.py @@ -0,0 +1 @@ +__all__ = ["arch", "disasm", "regs", "sem"] diff --git a/miasm/arch/aarch64/arch.py b/miasm/arch/aarch64/arch.py new file mode 100644 index 00000000..d12fbe72 --- /dev/null +++ b/miasm/arch/aarch64/arch.py @@ -0,0 +1,2175 @@ +#-*- coding:utf-8 -*- + +from builtins import range +from future.utils import viewitems, viewvalues + +import logging +from pyparsing import * +from miasm.expression import expression as m2_expr +from miasm.core.cpu import * +from collections import defaultdict +from miasm.core.bin_stream import bin_stream +from miasm.arch.aarch64 import regs as regs_module +from miasm.arch.aarch64.regs import * +from miasm.core.cpu import log as log_cpu +from miasm.expression.modint import uint32, uint64, mod_size2int +from miasm.core.asm_ast import AstInt, AstId, AstMem, AstOp + +log = logging.getLogger("aarch64dis") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.DEBUG) + +# refs from A_e_armv8_arm.pdf + +# log_cpu.setLevel(logging.DEBUG) + + +replace_regs = { + W0: X0[:32], + W1: X1[:32], + W2: X2[:32], + W3: X3[:32], + W4: X4[:32], + W5: X5[:32], + W6: X6[:32], + W7: X7[:32], + W8: X8[:32], + W9: X9[:32], + + W10: X10[:32], + W11: X11[:32], + W12: X12[:32], + W13: X13[:32], + W14: X14[:32], + W15: X15[:32], + W16: X16[:32], + W17: X17[:32], + W18: X18[:32], + W19: X19[:32], + + W20: X20[:32], + W21: X21[:32], + W22: X22[:32], + W23: X23[:32], + W24: X24[:32], + W25: X25[:32], + W26: X26[:32], + W27: X27[:32], + W28: X28[:32], + W29: X29[:32], + + W30: LR[:32], + + WSP: SP[:32], + + WZR: m2_expr.ExprInt(0, 32), + XZR: m2_expr.ExprInt(0, 64), + +} + + + + +shift2expr_dct = {'LSL': '<<', 'LSR': '>>', 'ASR': 'a>>', 'ROR': '>>>'} +shift_str = ["LSL", "LSR", "ASR", "ROR"] +shift_expr = ["<<", ">>", "a>>", '>>>'] + + +def cb_shift(tokens): + return shift2expr_dct[tokens[0]] + + +def cb_extreg(tokens): + return tokens[0] + + +def cb_shiftreg(tokens): + if len(tokens) == 1: + return tokens[0] + elif len(tokens) == 3: + result = AstOp(tokens[1], tokens[0], tokens[2]) + return result + else: + raise ValueError('bad string') + + +def cb_shift_sc(tokens): + if len(tokens) == 1: + return tokens[0] + elif len(tokens) == 3: + if tokens[1] != '<<': + raise ValueError('bad op') + result = AstOp("slice_at", tokens[0], tokens[2]) + return result + else: + raise ValueError('bad string') + + +def cb_extend(tokens): + if len(tokens) == 1: + return tokens[0] + result = AstOp(tokens[1], tokens[0], tokens[2]) + return result + + +def cb_deref_pc_off(tokens): + if len(tokens) == 2 and tokens[0] == "PC": + result = AstOp('preinc', AstId(ExprId('PC', 64)), tokens[1]) + return result + raise ValueError('bad string') + +def cb_deref_pc_nooff(tokens): + if len(tokens) == 1 and tokens[0] == "PC": + result = AstOp('preinc', AstId(PC)) + return result + raise ValueError('bad string') + +all_binaryop_lsl_t = literal_list(shift_str).setParseAction(cb_shift) + +all_binaryop_shiftleft_t = literal_list(["LSL"]).setParseAction(cb_shift) + +extend_lst = ['UXTB', 'UXTH', 'UXTW', 'UXTX', 'SXTB', 'SXTH', 'SXTW', 'SXTX'] +extend2_lst = ['UXTW', 'LSL', 'SXTW', 'SXTX'] + +all_extend_t = literal_list(extend_lst).setParseAction(cb_extreg) +all_extend2_t = literal_list(extend2_lst).setParseAction(cb_extreg) + + +gpregz32_extend = (gpregsz32_info.parser + Optional(all_extend_t + base_expr)).setParseAction(cb_extend) +gpregz64_extend = (gpregsz64_info.parser + Optional(all_extend_t + base_expr)).setParseAction(cb_extend) + + +shift32_off = (gpregsz32_info.parser + Optional(all_binaryop_lsl_t + base_expr)).setParseAction(cb_shiftreg) +shift64_off = (gpregsz64_info.parser + Optional(all_binaryop_lsl_t + base_expr)).setParseAction(cb_shiftreg) + + +shiftimm_imm_sc = (base_expr + all_binaryop_shiftleft_t + base_expr).setParseAction(cb_shift_sc) + +shiftimm_off_sc = shiftimm_imm_sc | base_expr + + +shift_off = (shift32_off | shift64_off) +reg_ext_off = (gpregz32_extend | gpregz64_extend) + +gpregs_32_64 = (gpregs32_info.parser | gpregs64_info.parser) +gpregsz_32_64 = (gpregsz32_info.parser | gpregsz64_info.parser | base_expr) + +simdregs = (simd08_info.parser | simd16_info.parser | simd32_info.parser | simd64_info.parser) +simdregs_h = (simd32_info.parser | simd64_info.parser | simd128_info.parser) + +simdregs_h_zero = (simd32_info.parser | simd64_info.parser | simd128_info.parser | base_expr) + + +gpregs_info = {32: gpregs32_info, + 64: gpregs64_info} +gpregsz_info = {32: gpregsz32_info, + 64: gpregsz64_info} + + +simds_info = {8: simd08_info, + 16: simd16_info, + 32: simd32_info, + 64: simd64_info, + 128: simd128_info} + + + +def cb_deref_nooff(t): + # XXX default + result = AstOp("preinc", t[0], AstInt(0)) + return result + + +def cb_deref_post(t): + assert len(t) == 2 + if isinstance(t[1], AstId) and isinstance(t[1].name, ExprId): + return + result = AstOp("postinc", *t) + return result + + +def cb_deref_pre(t): + assert len(t) == 2 + if isinstance(t[1], AstId) and isinstance(t[1].name, ExprId): + return + result = AstOp("preinc", *t) + return result + + +def cb_deref_pre_wb(t): + assert len(t) == 2 + if isinstance(t[1], AstId) and isinstance(t[1].name, ExprId): + return + result = AstOp("preinc_wb", *t) + return result + + +LBRACK = Suppress("[") +RBRACK = Suppress("]") +COMMA = Suppress(",") +POSTINC = Suppress("!") + +deref_nooff = (LBRACK + gpregs64_info.parser + RBRACK).setParseAction(cb_deref_nooff) +deref_off_post = (LBRACK + gpregs64_info.parser + RBRACK + COMMA + base_expr).setParseAction(cb_deref_post) +deref_off_pre = (LBRACK + gpregs64_info.parser + COMMA + base_expr + RBRACK).setParseAction(cb_deref_pre) +deref_off_pre_wb = (LBRACK + gpregs64_info.parser + COMMA + base_expr + RBRACK + POSTINC).setParseAction(cb_deref_pre_wb) + +deref = (deref_off_post | deref_off_pre_wb | deref_off_pre | deref_nooff) + + +deref_pc_off = (LBRACK + Literal("PC") + COMMA + base_expr + RBRACK).setParseAction(cb_deref_pc_off) +deref_pc_nooff = (LBRACK + Literal("PC") + RBRACK).setParseAction(cb_deref_pc_nooff) + +deref_pc = (deref_pc_off | deref_pc_nooff) + +def cb_deref_ext2op(t): + if len(t) == 4: + result = AstOp('segm', t[0], AstOp(t[2], t[1], t[3])) + return result + elif len(t) == 2: + result = AstOp('segm', *t) + return result + + raise ValueError("cad deref") + +deref_ext2 = (LBRACK + gpregs_32_64 + COMMA + gpregs_32_64 + Optional(all_extend2_t + base_expr) + RBRACK).setParseAction(cb_deref_ext2op) + + +class additional_info(object): + + def __init__(self): + self.except_on_instr = False + self.lnk = None + self.cond = None + +CONDS = [ + 'EQ', 'NE', 'CS', 'CC', + 'MI', 'PL', 'VS', 'VC', + 'HI', 'LS', 'GE', 'LT', + 'GT', 'LE', 'AL', 'NV'] + +CONDS_INV = [ + 'NE', 'EQ', 'CC', 'CS', + 'PL', 'MI', 'VC', 'VS', + 'LS', 'HI', 'LT', 'GE', + 'LE', 'GT', 'NV', 'AL'] + +BRCOND = ['B.' + cond for cond in CONDS] + ['CBZ', 'CBNZ', 'TBZ', 'TBNZ'] + +# for conditional selec +conds_expr, _, conds_info = gen_regs(CONDS, {}) +conds_inv_expr, _, conds_inv_info = gen_regs(CONDS_INV, {}) + + + +class aarch64_arg(m_arg): + def asm_ast_to_expr(self, value, loc_db, size_hint=None, fixed_size=None): + if size_hint is None: + size_hint = 64 + if fixed_size is None: + fixed_size = set() + if isinstance(value, AstId): + if value.name in all_regs_ids_byname: + reg = all_regs_ids_byname[value.name] + fixed_size.add(reg.size) + return reg + if isinstance(value.name, ExprId): + fixed_size.add(value.name.size) + return value.name + loc_key = loc_db.get_or_create_name_location(value.name.encode()) + return m2_expr.ExprLoc(loc_key, size_hint) + if isinstance(value, AstInt): + assert size_hint is not None + return m2_expr.ExprInt(value.value, size_hint) + if isinstance(value, AstOp): + if value.op == "segm": + segm = self.asm_ast_to_expr(value.args[0], loc_db) + ptr = self.asm_ast_to_expr(value.args[1], loc_db, None, fixed_size) + return m2_expr.ExprOp('segm', segm, ptr) + + args = [self.asm_ast_to_expr(arg, loc_db, None, fixed_size) for arg in value.args] + if len(fixed_size) == 0: + # No fixed size + pass + elif len(fixed_size) == 1: + # One fixed size, regen all + size = list(fixed_size)[0] + args = [self.asm_ast_to_expr(arg, loc_db, size, fixed_size) for arg in value.args] + else: + raise ValueError("Size conflict") + + return m2_expr.ExprOp(value.op, *args) + return None + + +class instruction_aarch64(instruction): + __slots__ = [] + delayslot = 0 + + def __init__(self, *args, **kargs): + super(instruction_aarch64, self).__init__(*args, **kargs) + + @staticmethod + def arg2str(expr, index=None, loc_db=None): + wb = False + if expr.is_id() or expr.is_int(): + return str(expr) + elif expr.is_loc(): + if loc_db is not None: + return loc_db.pretty_str(expr.loc_key) + else: + return str(expr) + elif isinstance(expr, m2_expr.ExprOp) and expr.op in shift_expr: + op_str = shift_str[shift_expr.index(expr.op)] + return "%s %s %s" % (expr.args[0], op_str, expr.args[1]) + elif isinstance(expr, m2_expr.ExprOp) and expr.op == "slice_at": + return "%s LSL %s" % (expr.args[0], expr.args[1]) + elif isinstance(expr, m2_expr.ExprOp) and expr.op in extend_lst: + op_str = expr.op + return "%s %s %s" % (expr.args[0], op_str, expr.args[1]) + elif isinstance(expr, m2_expr.ExprOp) and expr.op == "postinc": + if expr.args[1].arg != 0: + return "[%s], %s" % (expr.args[0], expr.args[1]) + else: + return "[%s]" % (expr.args[0]) + elif isinstance(expr, m2_expr.ExprOp) and expr.op == "preinc_wb": + if expr.args[1].arg != 0: + return "[%s, %s]!" % (expr.args[0], expr.args[1]) + else: + return "[%s]" % (expr.args[0]) + elif isinstance(expr, m2_expr.ExprOp) and expr.op == "preinc": + if len(expr.args) == 1: + return "[%s]" % (expr.args[0]) + elif not isinstance(expr.args[1], m2_expr.ExprInt) or expr.args[1].arg != 0: + return "[%s, %s]" % (expr.args[0], expr.args[1]) + else: + return "[%s]" % (expr.args[0]) + elif isinstance(expr, m2_expr.ExprOp) and expr.op == 'segm': + arg = expr.args[1] + if isinstance(arg, m2_expr.ExprId): + arg = str(arg) + elif arg.op == 'LSL' and arg.args[1].arg == 0: + arg = str(arg.args[0]) + else: + arg = "%s %s %s" % (arg.args[0], arg.op, arg.args[1]) + return '[%s, %s]' % (expr.args[0], arg) + + else: + raise NotImplementedError("bad op") + + def dstflow(self): + return self.name in BRCOND + ["B", "BL", "BR", "BLR"] + + def mnemo_flow_to_dst_index(self, name): + if self.name in ['CBZ', 'CBNZ']: + return 1 + elif self.name in ['TBZ', 'TBNZ']: + return 2 + else: + return 0 + + def dstflow2label(self, loc_db): + index = self.mnemo_flow_to_dst_index(self.name) + expr = self.args[index] + if not expr.is_int(): + return + addr = expr.arg + self.offset + loc_key = loc_db.get_or_create_offset_location(addr) + self.args[index] = m2_expr.ExprLoc(loc_key, expr.size) + + def breakflow(self): + return self.name in BRCOND + ["BR", "BLR", "RET", "ERET", "DRPS", "B", "BL"] + + def is_subcall(self): + return self.name in ["BLR", "BL"] + + def getdstflow(self, loc_db): + index = self.mnemo_flow_to_dst_index(self.name) + return [self.args[index]] + + def splitflow(self): + return self.name in BRCOND + ["BLR", "BL"] + + def get_symbol_size(self, symbol, loc_db): + return 64 + + def fixDstOffset(self): + index = self.mnemo_flow_to_dst_index(self.name) + e = self.args[index] + if self.offset is None: + raise ValueError('symbol not resolved %s' % l) + if not isinstance(e, m2_expr.ExprInt): + log.debug('dyn dst %r', e) + return + off = e.arg - self.offset + if int(off % 4): + raise ValueError('strange offset! %r' % off) + self.args[index] = m2_expr.ExprInt(int(off), 64) + + + +class mn_aarch64(cls_mn): + delayslot = 0 + name = "aarch64" + regs = regs_module + bintree = {} + num = 0 + all_mn = [] + all_mn_mode = defaultdict(list) + all_mn_name = defaultdict(list) + all_mn_inst = defaultdict(list) + pc = {'l': PC, 'b': PC} + sp = {'l': SP, 'b': SP} + instruction = instruction_aarch64 + max_instruction_len = 4 + alignment = 4 + + @classmethod + def getpc(cls, attrib=None): + return PC + + @classmethod + def getsp(cls, attrib=None): + return SP + + def additional_info(self): + info = additional_info() + info.lnk = False + if hasattr(self, "lnk"): + info.lnk = self.lnk.value != 0 + return info + + @classmethod + def getbits(cls, bs, attrib, start, n): + if not n: + return 0 + o = 0 + if n > bs.getlen() * 8: + raise ValueError('not enough bits %r %r' % (n, len(bs.bin) * 8)) + while n: + offset = start // 8 + n_offset = cls.endian_offset(attrib, offset) + c = cls.getbytes(bs, n_offset, 1) + if not c: + raise IOError + c = ord(c) + r = 8 - start % 8 + c &= (1 << r) - 1 + l = min(r, n) + c >>= (r - l) + o <<= l + o |= c + n -= l + start += l + return o + + @classmethod + def endian_offset(cls, attrib, offset): + if attrib == "l": + return (offset & ~3) + 3 - offset % 4 + elif attrib == "b": + return offset + else: + raise NotImplementedError('bad attrib') + + @classmethod + def check_mnemo(cls, fields): + l = sum([x.l for x in fields]) + assert l == 32, "len %r" % l + + @classmethod + def getmn(cls, name): + return name.upper() + + @classmethod + def mod_fields(cls, fields): + l = sum([x.l for x in fields]) + if l == 32: + return fields + return fields + + @classmethod + def gen_modes(cls, subcls, name, bases, dct, fields): + dct['mode'] = None + return [(subcls, name, bases, dct, fields)] + + def value(self, mode): + v = super(mn_aarch64, self).value(mode) + if mode == 'l': + return [x[::-1] for x in v] + elif mode == 'b': + return [x for x in v] + else: + raise NotImplementedError('bad attrib') + + def get_symbol_size(self, symbol, loc_db, mode): + return 32 + + def reset_class(self): + super(mn_aarch64, self).reset_class() + if hasattr(self, "sf"): + self.sf.value = None + + +def aarch64op(name, fields, args=None, alias=False): + dct = {"fields": fields, "alias":alias} + if args is not None: + dct['args'] = args + type(name, (mn_aarch64,), dct) + + +class aarch64_gpreg_noarg(reg_noarg): + parser = gpregs_32_64 + gpregs_info = gpregs_info + + def decode(self, v): + size = 64 if self.parent.sf.value else 32 + self.expr = self.gpregs_info[size].expr[v] + return True + + def encode(self): + if not test_set_sf(self.parent, self.expr.size): + return False + if not self.expr.size in self.gpregs_info: + return False + if not self.expr in self.gpregs_info[self.expr.size].expr: + return False + self.value = self.gpregs_info[self.expr.size].expr.index(self.expr) + return True + + +class aarch64_simdreg(reg_noarg, aarch64_arg): + parser = simdregs + simd_size = [8, 16, 32, 64] + + def decode(self, v): + if self.parent.size.value > len(self.simd_size): + return False + size = self.simd_size[self.parent.size.value] + self.expr = simds_info[size].expr[v] + return True + + def encode(self): + if not self.expr.size in self.simd_size: + return False + if not self.expr in simds_info[self.expr.size].expr: + return False + self.value = simds_info[self.expr.size].expr.index(self.expr) + self.parent.size.value = self.simd_size.index(self.expr.size) + return True + + +class aarch64_simdreg_h(aarch64_simdreg): + parser = simdregs_h + simd_size = [32, 64, 128] + + +class aarch64_simdreg_32_64(aarch64_simdreg): + parser = simdregs_h + simd_size = [32, 64] + + +class aarch64_simdreg_32_64_zero(aarch64_simdreg_32_64): + parser = simdregs_h_zero + + def decode(self, v): + if v == 0 and self.parent.opc.value == 1: + size = 64 if self.parent.size.value else 32 + self.expr = m2_expr.ExprInt(0, size) + return True + else: + return super(aarch64_simdreg_32_64_zero, self).decode(v) + + def encode(self): + if isinstance(self.expr, m2_expr.ExprInt): + self.parent.opc.value = 1 + self.value = 0 + return True + else: + self.parent.opc.value = 0 + return super(aarch64_simdreg_32_64_zero, self).encode() + + +class aarch64_gpreg_isf(reg_noarg, aarch64_arg): + parser = gpregs_32_64 + + def decode(self, v): + size = 32 if self.parent.sf.value else 64 + self.expr = gpregs_info[size].expr[v] + return True + + def encode(self): + if not self.expr in gpregs_info[self.expr.size].expr: + return False + self.value = gpregs_info[self.expr.size].expr.index(self.expr) + self.parent.sf.value = 1 if self.expr.size == 32 else 0 + return True + + +class aarch64_gpreg(aarch64_gpreg_noarg, aarch64_arg): + pass + + +class aarch64_gpreg_n1(aarch64_gpreg): + + def decode(self, v): + if v == 0b11111: + return False + return super(aarch64_gpreg_n1, self).decode(v) + + def encode(self): + super(aarch64_gpreg_n1, self).encode() + return self.value != 0b11111 + + +class aarch64_gpregz(aarch64_gpreg_noarg, aarch64_arg): + parser = gpregsz_32_64 + gpregs_info = gpregsz_info + + +class aarch64_gpreg0(bsi, aarch64_arg): + parser = gpregsz_32_64 + gpregs_info = gpregsz_info + + def decode(self, v): + size = 64 if self.parent.sf.value else 32 + if v == 0x1F: + self.expr = m2_expr.ExprInt(0, size) + else: + self.expr = self.gpregs_info[size].expr[v] + return True + + def encode(self): + if isinstance(self.expr, m2_expr.ExprInt): + if self.expr.arg == 0: + self.value = 0x1F + return True + return False + if not self.expr.size in self.gpregs_info: + return False + if not test_set_sf(self.parent, self.expr.size): + return False + if not self.expr in self.gpregs_info[self.expr.size].expr: + return False + self.value = self.gpregs_info[self.expr.size].expr.index(self.expr) + return True + + +class aarch64_crreg(reg_noarg, aarch64_arg): + reg_info = cr_info + parser = reg_info.parser + + +class aarch64_gpreg32_nodec(bsi): + reg_info = gpregs32_info + + +class aarch64_gpreg64_nodec(bsi): + reg_info = gpregs64_info + + +class aarch64_gpreg32_noarg(reg_noarg): + reg_info = gpregs32_info + parser = reg_info.parser + + +class aarch64_gpreg32(aarch64_gpreg32_noarg, aarch64_arg): + reg_info = gpregs32_info + parser = reg_info.parser + + +class aarch64_gpreg64_noarg(reg_noarg): + reg_info = gpregs64_info + parser = reg_info.parser + + +class aarch64_gpreg64(reg_noarg, aarch64_arg): + reg_info = gpregs64_info + parser = reg_info.parser + + +class aarch64_gpregz32_noarg(reg_noarg): + reg_info = gpregsz32_info + parser = reg_info.parser + + +class aarch64_gpregz32(aarch64_gpreg32_noarg, aarch64_arg): + reg_info = gpregsz32_info + parser = reg_info.parser + + +class aarch64_gpregz64_noarg(reg_noarg): + reg_info = gpregsz64_info + parser = reg_info.parser + + +class aarch64_gpregz64(reg_noarg, aarch64_arg): + reg_info = gpregsz64_info + parser = reg_info.parser + + +class aarch64_simd08_noarg(reg_noarg): + reg_info = simd08_info + parser = reg_info.parser + + +class aarch64_simd08(aarch64_simd08_noarg, aarch64_arg): + reg_info = simd08_info + parser = reg_info.parser + + +class aarch64_simd16_noarg(reg_noarg): + reg_info = simd16_info + parser = reg_info.parser + + +class aarch64_simd16(aarch64_simd16_noarg, aarch64_arg): + reg_info = simd16_info + parser = reg_info.parser + + +class aarch64_simd32_noarg(reg_noarg): + reg_info = simd32_info + parser = reg_info.parser + + +class aarch64_simd32(aarch64_simd32_noarg, aarch64_arg): + reg_info = simd32_info + parser = reg_info.parser + + +class aarch64_simd64_noarg(reg_noarg): + reg_info = simd64_info + parser = reg_info.parser + + +class aarch64_simd64(aarch64_simd64_noarg, aarch64_arg): + reg_info = simd64_info + parser = reg_info.parser + + +class aarch64_simd128_noarg(reg_noarg): + reg_info = simd128_info + parser = reg_info.parser + + +class aarch64_simd128(aarch64_simd128_noarg, aarch64_arg): + reg_info = simd128_info + parser = reg_info.parser + + +class aarch64_imm_32(imm_noarg, aarch64_arg): + parser = base_expr + + +class aarch64_imm_64(aarch64_imm_32): + parser = base_expr + + +class aarch64_int64_noarg(int32_noarg): + parser = base_expr + intsize = 64 + intmask = (1 << intsize) - 1 + int2expr = lambda self, x: m2_expr.ExprInt( + sign_ext(x, self.l, self.intsize), 64) + + +class aarch64_uint64_noarg(imm_noarg): + parser = base_expr + intsize = 64 + intmask = (1 << intsize) - 1 + int2expr = lambda self, x: m2_expr.ExprInt(x, 64) + + +class aarch64_uint64(aarch64_uint64_noarg, aarch64_arg): + parser = base_expr + + +def set_imm_to_size(size, expr): + if size == expr.size: + return expr + if size > expr.size: + expr = m2_expr.ExprInt(int(expr), size) + else: + if expr.arg > (1 << size) - 1: + return None + expr = m2_expr.ExprInt(int(expr), size) + return expr + + +class aarch64_imm_sf(imm_noarg): + parser = base_expr + + def fromstring(self, text, loc_db, parser_result=None): + start, stop = super(aarch64_imm_sf, self).fromstring(text, loc_db, parser_result) + if start is None: + return start, stop + size = self.parent.args[0].expr.size + if self.expr in gpregs64_info.expr + gpregs32_info.expr: + return None, None + if isinstance(self.expr, m2_expr.ExprOp): + return False + expr = set_imm_to_size(size, self.expr) + if expr is None: + return None, None + self.expr = expr + return start, stop + + def encode(self): + if not isinstance(self.expr, m2_expr.ExprInt): + return False + if not test_set_sf(self.parent, self.expr.size): + return False + value = int(self.expr) + if value >= 1 << self.l: + return False + self.value = value + return True + + def decode(self, v): + size = 64 if self.parent.sf.value else 32 + self.expr = m2_expr.ExprInt(v, size) + return True + + +class aarch64_imm_sft(aarch64_imm_sf, aarch64_arg): + + def encode(self): + if not isinstance(self.expr, m2_expr.ExprInt): + return False + if not test_set_sf(self.parent, self.expr.size): + return False + value = int(self.expr) + if value < 1 << self.l: + self.parent.shift.value = 0 + else: + if value & 0xFFF: + return False + value >>= 12 + if value >= 1 << self.l: + return False + self.parent.shift.value = 1 + self.value = value + return True + + def decode(self, v): + size = 64 if self.parent.sf.value else 32 + if self.parent.shift.value == 0: + self.expr = m2_expr.ExprInt(v, size) + elif self.parent.shift.value == 1: + self.expr = m2_expr.ExprInt(v << 12, size) + else: + return False + return True + +OPTION2SIZE = [32, 32, 32, 64, + 32, 32, 32, 64] + + +class aarch64_gpreg_ext(reg_noarg, aarch64_arg): + parser = reg_ext_off + + def encode(self): + if not isinstance(self.expr, m2_expr.ExprOp): + return False + if self.expr.op not in extend_lst: + return False + reg, amount = self.expr.args + + if not reg in gpregsz_info[self.expr.size].expr: + return False + self.value = gpregsz_info[self.expr.size].expr.index(reg) + option = extend_lst.index(self.expr.op) + if self.expr.size != OPTION2SIZE[option]: + if not test_set_sf(self.parent, self.expr.size): + return False + self.parent.option.value = option + self.parent.imm.value = int(amount) + return True + + def decode(self, v): + if self.parent.sf.value == 0: + size = 64 if self.parent.sf.value else 32 + else: + size = OPTION2SIZE[self.parent.option.value] + reg = gpregsz_info[size].expr[v] + + self.expr = m2_expr.ExprOp(extend_lst[self.parent.option.value], + reg, m2_expr.ExprInt(self.parent.imm.value, reg.size)) + return True + +EXT2_OP = { + 0b010: 'UXTW', + 0b011: 'LSL', + 0b110: 'SXTW', + 0b111: 'SXTX' +} + +EXT2_OP_INV = dict((value, key) for key, value in viewitems(EXT2_OP)) + + +class aarch64_gpreg_ext2(reg_noarg, aarch64_arg): + parser = deref_ext2 + + def get_size(self): + return self.parent.size.value + + def encode(self): + if not isinstance(self.expr, m2_expr.ExprOp): + return False + if len(self.expr.args) != 2: + return False + arg0, arg1 = self.expr.args + if (self.expr.is_op("preinc") and arg0.is_id() and arg1.is_id()): + self.parent.shift.value = 0 + self.parent.rn.value = self.parent.rn.reg_info.expr.index(arg0) + self.value = gpregs_info[arg1.size].expr.index(arg1) + self.parent.option.value = 0b011 + return True + if not (isinstance(self.expr, m2_expr.ExprOp) and self.expr.op == 'segm'): + return False + if not arg0 in self.parent.rn.reg_info.expr: + return False + self.parent.rn.value = self.parent.rn.reg_info.expr.index(arg0) + is_reg = False + self.parent.shift.value = 0 + if isinstance(arg1, m2_expr.ExprId): + reg = arg1 + self.parent.option.value = 0b011 + is_reg = True + elif isinstance(arg1, m2_expr.ExprOp) and arg1.op in viewvalues(EXT2_OP): + reg = arg1.args[0] + else: + return False + if not (reg.size in gpregs_info and + reg in gpregs_info[reg.size].expr): + return False + self.value = gpregs_info[reg.size].expr.index(reg) + if is_reg: + return True + if not (isinstance(arg1.args[1], m2_expr.ExprInt)): + return False + if arg1.op not in EXT2_OP_INV: + return False + self.parent.option.value = EXT2_OP_INV[arg1.op] + if arg1.args[1].arg == 0: + self.parent.shift.value = 0 + return True + + if arg1.args[1].arg != self.get_size(): + return False + + self.parent.shift.value = 1 + + return True + + def decode(self, v): + opt = self.parent.option.value + if opt in [0, 1, 4, 5]: + return False + elif opt in [2, 6]: + reg_expr = gpregsz32_info.expr + elif opt in [3, 7]: + reg_expr = gpregsz64_info.expr + arg = reg_expr[v] + + if opt in EXT2_OP: + if self.parent.shift.value == 1: + arg = m2_expr.ExprOp(EXT2_OP[opt], arg, + m2_expr.ExprInt(self.get_size(), arg.size)) + else: + arg = m2_expr.ExprOp(EXT2_OP[opt], arg, + m2_expr.ExprInt(0, arg.size)) + + reg = self.parent.rn.reg_info.expr[self.parent.rn.value] + self.expr = m2_expr.ExprOp('segm', reg, arg) + return True + + +class aarch64_gpreg_ext2_128(aarch64_gpreg_ext2): + + def get_size(self): + return 4 + + +def test_set_sf(parent, size): + if not hasattr(parent, 'sf'): + return False + if parent.sf.value == None: + parent.sf.value = 1 if size == 64 else 0 + return True + psize = 64 if parent.sf.value else 32 + return psize == size + + +class aarch64_gpreg_sftimm(reg_noarg, aarch64_arg): + reg_info = gpregsz_info + parser = shift_off + + def encode(self): + size = self.expr.size + if not test_set_sf(self.parent, size): + return False + if isinstance(self.expr, m2_expr.ExprId): + if not size in gpregs_info: + return False + if not self.expr in self.reg_info[size].expr: + return False + self.parent.shift.value = 0 + self.parent.imm.value = 0 + self.value = self.reg_info[size].expr.index(self.expr) + return True + + if not isinstance(self.expr, m2_expr.ExprOp): + return False + if not self.expr.op in shift_expr: + return False + args = self.expr.args + if not args[0] in self.reg_info[size].expr: + return False + if not isinstance(args[1], m2_expr.ExprInt): + return False + self.parent.shift.value = shift_expr.index(self.expr.op) + self.parent.imm.value = int(args[1]) + self.value = self.reg_info[size].expr.index(args[0]) + return True + + def decode(self, v): + size = 64 if self.parent.sf.value else 32 + e = self.reg_info[size].expr[v] + amount = self.parent.imm.value + if amount != 0: + e = m2_expr.ExprOp( + shift_expr[self.parent.shift.value], e, m2_expr.ExprInt(amount, e.size)) + self.expr = e + return True + + +def ror(value, amount, size): + mask = (1 << size) - 1 + return ((value >> amount) | (value << (size - amount))) & mask + + +def rol(value, amount, size): + mask = (1 << size) - 1 + return ((value << amount) | (value >> (size - amount)) & mask) + +# This implementation is inspired from ARM ISA v8.2 +# Exact Reference name: +# "ARM Architecture Reference Manual ARMv8, for ARMv8-A architecture profile" + +class ReservedValue(Exception): + """Reserved Value, should not happen""" + pass + +class NotEncodable(Exception): + """Instruction is not encodable""" + pass + +class bits(object): + """Stand for ARM ASL 'bits' type, ie. a bit vector""" + + __slots__ = ["size", "value"] + + def __init__(self, size, value): + """Instantiate a bitvector of size @size with value @value""" + value = int(value) + self.size = int(size) + if value & self.mask != value: + raise ValueError( + "Value %r is too large for %r bits (mask %r)", + value, + size, + self.mask + ) + self.value = value + + def concat_left(self, other_bits): + """Return a new bits instance for @other_bits . self""" + return bits(self.size + other_bits.size, + self.value | (other_bits.value << self.size)) + + @property + def mask(self): + return (1 << self.size) - 1 + + def __invert__(self): + return bits(self.size, self.value ^ self.mask) + + def __int__(self): + return self.value + + def __and__(self, other_bits): + assert other_bits.size == self.size + return bits(self.size, self.value & other_bits.value) + + def __eq__(self, other_bits): + return all((self.size == other_bits.size, + self.value == other_bits.value)) + + def __getitem__(self, info): + if isinstance(info, slice): + start = info.start if info.start else 0 + stop = info.stop if info.stop else self.value + if info.step is not None: + raise RuntimeError("Not implemented") + mask = (1 << stop) - 1 + return bits(stop - start, + (self.value >> start) & mask) + else: + raise RuntimeError("Not implemented") + + @property + def pop_count(self): + "Population count: number of bit set" + count = 0 + value = self.value + while (value > 0): + if value & 1 == 1: + count += 1 + value >>= 1 + return count + + def __str__(self): + return "'%s'" % "".join('1' if self.value & (1 << i) else '0' + for i in reversed(range(self.size))) + +# From J1-6035 +def HighestSetBit(x): + for i in reversed(range(x.size)): + if x.value & (1 << i): + return i + return - 1 + +# From J1-6037 +def Ones(N): + return bits(N, (1 << N) - 1) + +# From J1-6038 +def ROR(x, shift): + if shift == 0: + return x + return bits(x.size, ror(UInt(x), shift, x.size)) + +# From J1-6038 +def Replicate(x, N): + assert N % x.size == 0 + new = x + while new.size < N: + new = new.concat_left(x) + return new + +# From J1-6039 +def UInt(x): + return int(x) + +# From J1-6039 +def ZeroExtend(x, N): + assert N >= x.size + return bits(N, x.value) + +# From J1-5906 +def DecodeBitMasks(M, immN, imms, immr, immediate): + """ + @M: 32 or 64 + @immN: 1-bit + @imms: 6-bit + @immr: 6-bit + @immediate: boolean + """ + len_ = HighestSetBit((~imms).concat_left(immN)) + if len_ < 1: + raise ReservedValue() + assert M >= (1 << len_) + + levels = ZeroExtend(Ones(len_), 6) + + if immediate and (imms & levels) == levels: + raise ReservedValue() + S = UInt(imms & levels); + R = UInt(immr & levels); + + esize = 1 << len_ + welem = ZeroExtend(Ones(S + 1), esize) + wmask = Replicate(ROR(welem, R), M) + + # For now, 'tmask' is unused: + # + # diff = S - R; + # d = UInt(bits(len_, diff)) + # telem = ZeroExtend(Ones(d + 1), esize) + # tmask = Replicate(telem, M) + + return wmask, None + +# EncodeBitMasks doesn't have any equivalent in ARM ASL shared functions +# This implementation "reverses" DecodeBitMasks flow +def EncodeBitMasks(wmask): + # Find replicate + M = wmask.size + for i in range(1, M + 1): + if M % i != 0: + continue + if wmask == Replicate(wmask[:i], M): + break + else: + raise NotEncodable + + # Find ROR value: welem is only '1's + welem_after_ror = wmask[:i] + esize = welem_after_ror.size + S = welem_after_ror.pop_count - 1 + welem = ZeroExtend(Ones(S + 1), esize) + for i in range(welem_after_ror.size): + if ROR(welem, i) == welem_after_ror: + break + else: + raise NotEncodable + R = i + + # Find len value + for i in range(M): + if (1 << i) == esize: + break + else: + raise NotEncodable + len_ = i + levels = ZeroExtend(Ones(len_), 6) + levels = UInt(levels) + + if len_ == 6: + # N = 1 + immn = 1 + imms = S + else: + # N = 0, NOT(imms) have to be considered + immn = 0 + mask = (1 << ((6 - len_ - 1))) - 1 + mask <<= (len_ + 1) + imms = S | mask + immr = R + return immr, imms, immn + + +class aarch64_imm_nsr(aarch64_imm_sf, aarch64_arg): + parser = base_expr + + def decode(self, v): + size = 64 if self.parent.sf.value else 32 + bitmask, _ = DecodeBitMasks(size, + bits(1, self.parent.immn.value), + bits(6, v), + bits(6, self.parent.immr.value), + True + ) + self.expr = m2_expr.ExprInt(UInt(bitmask), + size) + return True + + def encode(self): + if not isinstance(self.expr, m2_expr.ExprInt): + return False + if not test_set_sf(self.parent, self.expr.size): + return False + value = self.expr.arg + if value == 0: + return False + + try: + immr, imms, immn = EncodeBitMasks(bits(self.expr.size, value)) + except NotEncodable: + return False + self.parent.immr.value = immr + self.parent.immn.value = immn + self.value = imms + return True + + +class aarch64_pcoff(aarch64_imm_32): + parser = base_expr + + +class aarch64_immhip_page(aarch64_imm_32): + parser = base_expr + + def decode(self, v): + v = ((v << 2) | self.parent.immlo.value) << 12 + v = sign_ext(v, 33, 64) + self.expr = m2_expr.ExprInt(v, 64) + return True + + def encode(self): + v = int(self.expr) + if v & (1 << 63): + v &= (1 << 33) - 1 + if v & 0xfff: + return False + v >>= 12 + self.parent.immlo.value = v & 3 + v >>= 2 + self.value = v + return True + + +class aarch64_immhi_page(aarch64_imm_32): + parser = base_expr + + def decode(self, v): + v = ((v << 2) | self.parent.immlo.value) + v = sign_ext(v, 21, 64) + self.expr = m2_expr.ExprInt(v, 64) + return True + + def encode(self): + v = int(self.expr) + if v & (1 << 63): + v &= (1 << 33) - 1 + self.parent.immlo.value = v & 3 + v >>= 2 + if v > (1 << 19) - 1: + return False + self.value = v & ((1 << 19) - 1) + return True + + +class aarch64_imm_hw(aarch64_arg): + parser = base_expr + shift_op = '<<' + + def decode(self, v): + size = 64 if self.parent.sf.value else 32 + self.expr = m2_expr.ExprInt(v << (16 * self.parent.hw.value), size) + return True + + def encode(self): + if not isinstance(self.expr, m2_expr.ExprInt): + return False + size = self.parent.args[0].expr.size + if set_imm_to_size(size, self.expr) is None: + return False + value = int(self.expr) + mask = (1 << size) - 1 + for i in range(size // 16): + if ((0xffff << (i * 16)) ^ mask) & value: + continue + self.parent.hw.value = i + self.value = value >> (i * 16) + return True + return False + + +class aarch64_imm_hw_sc(aarch64_arg): + parser = shiftimm_off_sc + shift_op = 'slice_at' + + def decode(self, v): + size = 64 if self.parent.sf.value else 32 + expr = m2_expr.ExprInt(v, size) + amount = m2_expr.ExprInt(16 * self.parent.hw.value, size) + if self.parent.hw.value: + self.expr = m2_expr.ExprOp(self.shift_op, expr, amount) + else: + self.expr = expr + return True + + def encode(self): + if isinstance(self.expr, m2_expr.ExprInt): + if self.expr.arg > 0xFFFF: + return False + self.value = int(self.expr) + self.parent.hw.value = 0 + return True + + if not (isinstance(self.expr, m2_expr.ExprOp) and + self.expr.op == self.shift_op and + len(self.expr.args) == 2 and + isinstance(self.expr.args[0], m2_expr.ExprInt) and + isinstance(self.expr.args[1], m2_expr.ExprInt)): + return False + if set_imm_to_size(self.parent.args[0].expr.size, self.expr.args[0]) is None: + return False + if set_imm_to_size(self.parent.args[0].expr.size, self.expr.args[1]) is None: + return False + arg, amount = [int(arg) for arg in self.expr.args] + if arg > 0xFFFF: + return False + if amount % 16 or amount // 16 > 4: + return False + self.value = arg + self.parent.hw.value = amount // 16 + return True + + +class aarch64_offs(imm_noarg, aarch64_arg): + parser = base_expr + + def decode(self, v): + v = v & self.lmask + v = (v << 2) + v = sign_ext(v, (self.l + 2), 64) + self.expr = m2_expr.ExprInt(v, 64) + return True + + def encode(self): + if not isinstance(self.expr, m2_expr.ExprInt): + return False + v = int(self.expr) + if v & (1 << 63): + v &= (1 << (self.l + 2)) - 1 + self.value = v >> 2 + return True + + + +class aarch64_offs_pc(imm_noarg, aarch64_arg): + parser = deref_pc + + def decode(self, v): + v = v & self.lmask + v = (v << 2) + v = sign_ext(v, (self.l + 2), 64) + self.expr = m2_expr.ExprOp("preinc", PC, m2_expr.ExprInt(v, 64)) + return True + + def encode(self): + if not self.expr.is_op('preinc'): + return False + if self.expr.args == (PC,): + v = 0 + elif (len(self.expr.args) == 2 and + self.expr.args[0] == PC and + self.expr.args[1].is_int()): + v = int(self.expr.args[1]) + else: + return None + if v & (1 << 63): + v &= (1 << (self.l + 2)) - 1 + self.value = v >> 2 + return True + + + +def set_mem_off(parent, imm): + if hasattr(parent, 'simm'): + mask = (1 << parent.simm.l) - 1 + if imm != sign_ext(imm & mask, parent.simm.l, 64): + return False + parent.simm.value = imm & mask + elif hasattr(parent, 'uimm'): + mask = (1 << parent.uimm.l) - 1 + if imm > mask: + return False + parent.uimm.value = imm + else: + raise ValueError('unknown imm') + return True + + +def get_size(parent): + if not hasattr(parent, "size"): + return 0 + if hasattr(parent.size, "amount"): + size = parent.size.amount + else: + size = parent.size.value + return size + + +class aarch64_deref(aarch64_arg): + parser = deref + + def decode_w_size(self, off): + return off + + def encode_w_size(self, off): + return off + + def get_postpre(self, parent): + if hasattr(self.parent, "postpre"): + if self.parent.postpre.value == 0: + op = 'postinc' + else: + op = 'preinc_wb' + else: + op = 'preinc' + return op + + def decode(self, v): + reg = gpregs64_info.expr[v] + off = self.parent.imm.expr.arg + op = self.get_postpre(self.parent) + off = self.decode_w_size(off) + self.expr = m2_expr.ExprOp(op, reg, m2_expr.ExprInt(off, 64)) + return True + + def encode(self): + expr = self.expr + if not isinstance(expr, m2_expr.ExprOp): + return False + if not expr.op in ['postinc', 'preinc_wb', 'preinc']: + return False + if hasattr(self.parent, "postpre"): + if expr.op == 'postinc': + self.parent.postpre.value = 0 + else: + self.parent.postpre.value = 1 + if len(expr.args) != 2: + return False + reg, off = expr.args + if not reg in gpregs64_info.expr: + return False + if not isinstance(off, m2_expr.ExprInt): + return False + imm = int(off) + imm = self.encode_w_size(imm) + if imm is False: + return False + self.parent.imm.expr = m2_expr.ExprInt(imm, 64) + if not self.parent.imm.encode(): + return False + self.value = gpregs64_info.expr.index(reg) + return True + + +class aarch64_deref_size(aarch64_deref): + + def decode_w_size(self, off): + size = get_size(self.parent) + return off << size + + def encode_w_size(self, off): + size = get_size(self.parent) + if size: + if off & ((1 << size) - 1): + return False + off >>= size + return off + + +class aarch64_deref_nooff(aarch64_deref): + parser = deref_nooff + + def decode(self, v): + reg = gpregs64_info.expr[v] + self.expr = m2_expr.ExprOp('preinc', reg) + return True + + def encode(self): + expr = self.expr + if not isinstance(expr, m2_expr.ExprOp): + return False + if expr.op != 'preinc': + return False + if len(expr.args) == 1: + reg = expr.args[0] + elif len(expr.args) == 2: + reg, off = expr.args + if not isinstance(off, m2_expr.ExprInt): + return False + if off.arg != 0: + return False + else: + return False + + if not reg in gpregs64_info.expr: + return False + self.value = gpregs64_info.expr.index(reg) + return True + + +class aarch64_sf_scale(aarch64_deref): + size2scale = {32: 2, 64: 3} + + def decode_w_size(self, off): + size = 2 + self.parent.sf.value + return off << size + + def encode_w_size(self, off): + size = self.parent.args[0].expr.size + if not size in self.size2scale: + return False + scale = self.size2scale[size] + off = int(mod_size2int[size](off) >> scale) + return off + + +class aarch64_sd_scale(aarch64_sf_scale): + size2scale = {32: 2, 64: 3, 128: 4} + + def decode_w_size(self, off): + size = 2 + self.parent.size.value + return off << size + + +class aarch64_eq(bsi): + + def decode(self, v): + return getattr(self.parent, self.ref).value == v + + def encode(self): + self.value = getattr(self.parent, self.ref).value + return True +modf = bs_mod_name(l=1, fname='modf', mn_mod=['', 'S']) +sf = bs(l=1, fname='sf', order=-1) + + +class aarch64_cond_arg(reg_noarg, aarch64_arg): + reg_info = conds_info + parser = reg_info.parser + + +class aarch64_cond_inv_arg(reg_noarg, aarch64_arg): + reg_info = conds_inv_info + parser = reg_info.parser + + +class aarch64_b40(aarch64_arg): + parser = base_expr + + def decode(self, v): + self.expr = m2_expr.ExprInt( + (self.parent.sf.value << self.l) | v, self.parent.rt.expr.size) + return True + + def encode(self): + if not isinstance(self.expr, m2_expr.ExprInt): + return False + size = self.parent.args[0].expr.size + value = int(self.expr) + self.value = value & self.lmask + if self.parent.sf.value is None: + self.parent.sf.value = value >> self.l + return True + else: + return value >> self.l == self.parent.sf.value + + +shift = bs(l=2, fname='shift') + +shiftb = bs(l=1, fname='shift', order=-1) + + +rn64_v = bs(l=5, cls=(aarch64_gpreg64_nodec,), fname='rn', order=-1) + +rn = bs(l=5, cls=(aarch64_gpreg,), fname="rn") +rs = bs(l=5, cls=(aarch64_gpreg,), fname="rs") +rm = bs(l=5, cls=(aarch64_gpreg,), fname="rm") +rd = bs(l=5, cls=(aarch64_gpreg,), fname="rd") +ra = bs(l=5, cls=(aarch64_gpregz,), fname="ra") +rt = bs(l=5, cls=(aarch64_gpregz,), fname="rt") +rt2 = bs(l=5, cls=(aarch64_gpregz,), fname="rt2") +rn0 = bs(l=5, cls=(aarch64_gpreg0,), fname="rn") + +rmz = bs(l=5, cls=(aarch64_gpregz,), fname="rm") +rnz = bs(l=5, cls=(aarch64_gpregz,), fname="rn") +rdz = bs(l=5, cls=(aarch64_gpregz,), fname="rd") + + +rn_n1 = bs(l=5, cls=(aarch64_gpreg_n1,), fname="rn") +rm_n1 = bs(l=5, cls=(aarch64_gpreg_n1,), fname="rm") + + +rn_na = bs(l=5, cls=(aarch64_gpreg_noarg,), fname="rn", order=-1) +rn32_na = bs(l=5, cls=(aarch64_gpreg32_noarg,), fname="rn", order=-1) +rn64_na = bs(l=5, cls=(aarch64_gpreg64_noarg,), fname="rn", order=-1) + +sd1 = bs(l=5, cls=(aarch64_simdreg_h,), fname="rt") +sd2 = bs(l=5, cls=(aarch64_simdreg_h,), fname="rt2") + +sdn_32_64 = bs(l=5, cls=(aarch64_simdreg_32_64,), fname="rn") +sdd_32_64 = bs(l=5, cls=(aarch64_simdreg_32_64,), fname="rd") +sdm_32_64 = bs(l=5, cls=(aarch64_simdreg_32_64,), fname="rm") +sda_32_64 = bs(l=5, cls=(aarch64_simdreg_32_64,), fname="ra") + + +sdm_32_64_zero = bs(l=5, cls=(aarch64_simdreg_32_64_zero,), fname="rm") + +crn = bs(l=4, cls=(aarch64_crreg,), fname="crn") +crm = bs(l=4, cls=(aarch64_crreg,), fname="crm") + + +rn64 = bs(l=5, cls=(aarch64_gpreg64,), fname="rn") +rs64 = bs(l=5, cls=(aarch64_gpreg64,), fname="rs") +rm64 = bs(l=5, cls=(aarch64_gpreg64,), fname="rm") +rd64 = bs(l=5, cls=(aarch64_gpreg64,), fname="rd") +rt64 = bs(l=5, cls=(aarch64_gpregz64,), fname="rt") +ra64 = bs(l=5, cls=(aarch64_gpregz64,), fname="ra") + +rn32 = bs(l=5, cls=(aarch64_gpreg32,), fname="rn") +rm32 = bs(l=5, cls=(aarch64_gpreg32,), fname="rm") +rd32 = bs(l=5, cls=(aarch64_gpreg32,), fname="rd") +rs32 = bs(l=5, cls=(aarch64_gpreg32,), fname="rs") + +sd08 = bs(l=5, cls=(aarch64_simd08,), fname="rd") +sd16 = bs(l=5, cls=(aarch64_simd16,), fname="rd") +sd32 = bs(l=5, cls=(aarch64_simd32,), fname="rd") +sd64 = bs(l=5, cls=(aarch64_simd64,), fname="rd") +sd128 = bs(l=5, cls=(aarch64_simd128,), fname="rd") + +sn08 = bs(l=5, cls=(aarch64_simd08,), fname="rn") +sn16 = bs(l=5, cls=(aarch64_simd16,), fname="rn") +sn32 = bs(l=5, cls=(aarch64_simd32,), fname="rn") +sn64 = bs(l=5, cls=(aarch64_simd64,), fname="rn") +sn128 = bs(l=5, cls=(aarch64_simd128,), fname="rn") + + +rt32 = bs(l=5, cls=(aarch64_gpregz32,), fname="rt") + +rt_isf = bs(l=5, cls=(aarch64_gpreg_isf,), fname="rt") + +rn64_deref = bs(l=5, cls=(aarch64_deref,), fname="rn") +rn64_deref_sz = bs(l=5, cls=(aarch64_deref_size,), fname="rn") +rn64_deref_sf = bs(l=5, cls=(aarch64_sf_scale,), fname="rn") +rn64_deref_sd = bs(l=5, cls=(aarch64_sd_scale,), fname="rn") + +rn64_deref_nooff = bs(l=5, cls=(aarch64_deref_nooff,), fname="rn") + +imm_sft_12 = bs(l=12, cls=(aarch64_imm_sft,)) + +# imm32_3 = bs(l=3, cls=(aarch64_imm_32,)) +imm32_3 = bs(l=3, fname="imm") +imm6 = bs(l=6, fname="imm", order=-1) +imm3 = bs(l=3, fname="imm", order=-1) +simm6 = bs(l=6, cls=(aarch64_int64_noarg, aarch64_arg), fname="imm", order=-1) +simm9 = bs(l=9, cls=(aarch64_int64_noarg,), fname="imm", order=-1) +simm7 = bs(l=7, cls=(aarch64_int64_noarg,), fname="imm", order=-1) +nzcv = bs(l=4, cls=(aarch64_uint64_noarg, aarch64_arg), fname="nzcv", order=-1) +uimm5 = bs(l=5, cls=(aarch64_uint64_noarg, aarch64_arg), fname="imm", order=-1) +uimm12 = bs(l=12, cls=(aarch64_uint64_noarg,), fname="imm", order=-1) +uimm16 = bs(l=16, cls=(aarch64_uint64_noarg, aarch64_arg), fname="imm", order=-1) +uimm7 = bs(l=7, cls=(aarch64_uint64_noarg,), fname="imm", order=-1) + +uimm8 = bs(l=8, cls=(aarch64_uint64,), fname="imm", order=-1) + +op1 = bs(l=3, cls=(aarch64_uint64, aarch64_arg), fname="op1") +op2 = bs(l=3, cls=(aarch64_uint64, aarch64_arg), fname="op2") + + +imm16 = bs(l=16, fname="imm", order=-1) + + +immlo = bs(l=2, fname='immlo') +immhip = bs(l=19, cls=(aarch64_immhip_page,)) +immhi = bs(l=19, cls=(aarch64_immhi_page,)) + +option = bs(l=3, fname='option', order=-1) + + +rm_ext = bs(l=5, cls=(aarch64_gpreg_ext,), fname="rm") +rm_sft = bs(l=5, cls=(aarch64_gpreg_sftimm,), fname="rm") + +rm_ext2 = bs(l=5, cls=(aarch64_gpreg_ext2,), fname="rm") +rm_ext2_128 = bs(l=5, cls=(aarch64_gpreg_ext2_128,), fname="rm") + + +imms = bs(l=6, cls=(aarch64_imm_nsr,), fname='imms') +immr = bs(l=6, fname='immr') +immn = bs(l=1, fname='immn') + + +imm16_hw = bs(l=16, cls=(aarch64_imm_hw,), fname='imm') +imm16_hw_sc = bs(l=16, cls=(aarch64_imm_hw_sc,), fname='imm') +hw = bs(l=2, fname='hw') + + +a_imms = bs(l=6, cls=(aarch64_imm_sf, aarch64_arg), fname="imm1", order=-1) +a_immr = bs(l=6, cls=(aarch64_imm_sf, aarch64_arg), fname="imm1", order=-1) + + + +adsu_name = {'ADD': 0, 'SUB': 1} +bs_adsu_name = bs_name(l=1, name=adsu_name) + + +offs19 = bs(l=19, cls=(aarch64_offs,), fname='off') +offs19pc = bs(l=19, cls=(aarch64_offs_pc,), fname='off') + +offs26 = bs(l=26, cls=(aarch64_offs,), fname='off') +offs14 = bs(l=14, cls=(aarch64_offs,), fname='off') + +b40 = bs(l=5, cls=(aarch64_b40,), fname='b40', order=1) + +sdsize1 = bs(l=1, fname="size") + +sdsize = bs(l=2, fname="size") +opsize = bs(l=2, fname="size") +sd = bs(l=5, cls=(aarch64_simdreg,), fname='sd') + +opc = bs(l=1, fname='opc', order=-1) + +# add/sub (imm) +aarch64op("addsub", [sf, bs_adsu_name, modf, bs('10001'), shift, imm_sft_12, rn, rd], [rd, rn, imm_sft_12]) +aarch64op("cmp", [sf, bs('1'), bs('1'), bs('10001'), shift, imm_sft_12, rn, bs('11111')], [rn, imm_sft_12], alias=True) +aarch64op("cmn", [sf, bs('0'), bs('1'), bs('10001'), shift, imm_sft_12, rn, bs('11111')], [rn, imm_sft_12], alias=True) + +aarch64op("adrp", [bs('1'), immlo, bs('10000'), immhip, rd64], [rd64, immhip]) +aarch64op("adr", [bs('0'), immlo, bs('10000'), immhi, rd64], [rd64, immhi]) + +# add/sub (reg shift) +aarch64op("addsub", [sf, bs_adsu_name, modf, bs('01011'), shift, bs('0'), rm_sft, imm6, rn, rd], [rd, rn, rm_sft]) +aarch64op("cmp", [sf, bs('1'), bs('1'), bs('01011'), shift, bs('0'), rm_sft, imm6, rn, bs('11111')], [rn, rm_sft], alias=True) +# add/sub (reg ext) +aarch64op("addsub", [sf, bs_adsu_name, modf, bs('01011'), bs('00'), bs('1'), rm_ext, option, imm3, rn, rd], [rd, rn, rm_ext]) +#aarch64op("cmp", [sf, bs('1'), bs('1'), bs('01011'), bs('00'), bs('1'), rm_ext, option, imm3, rn, bs('11111')], [rn, rm_ext], alias=True) + + +aarch64op("neg", [sf, bs('1'), modf, bs('01011'), shift, bs('0'), rm_sft, imm6, bs('11111'), rd], [rd, rm_sft], alias=True) + + +logic_name = {'AND': 0, 'ORR': 1, 'EOR': 2} +bs_logic_name = bs_name(l=2, name=logic_name) +# logical (imm) +aarch64op("logic", [sf, bs_logic_name, bs('100100'), immn, immr, imms, rn0, rd], [rd, rn0, imms]) +# ANDS +aarch64op("ands", [sf, bs('11'), bs('100100'), immn, immr, imms, rn0, rdz], [rdz, rn0, imms]) +aarch64op("tst", [sf, bs('11'), bs('100100'), immn, immr, imms, rn0, bs('11111')], [rn0, imms], alias=True) + + +# bitfield move p.149 +logicbf_name = {'SBFM': 0b00, 'BFM': 0b01, 'UBFM': 0b10} +bs_logicbf_name = bs_name(l=2, name=logicbf_name) +aarch64op("logic", [sf, bs_logicbf_name, bs('100110'), bs(l=1, cls=(aarch64_eq,), ref="sf"), a_immr, a_imms, rn, rd], [rd, rn, a_immr, a_imms]) + + +# logical (reg shift) +aarch64op("and", [sf, bs('00'), bs('01010'), shift, bs('0'), rm_sft, imm6, rn, rd], [rd, rn, rm_sft]) +aarch64op("bic", [sf, bs('00'), bs('01010'), shift, bs('1'), rm_sft, imm6, rn, rd], [rd, rn, rm_sft]) +aarch64op("orr", [sf, bs('01'), bs('01010'), shift, bs('0'), rm_sft, imm6, rn, rd], [rd, rn, rm_sft]) +aarch64op("orn", [sf, bs('01'), bs('01010'), shift, bs('1'), rm_sft, imm6, rn, rd], [rd, rn, rm_sft]) +aarch64op("mvn", [sf, bs('01'), bs('01010'), shift, bs('1'), rm_sft, imm6, bs('11111'), rd], [rd, rm_sft], alias=True) +aarch64op("eor", [sf, bs('10'), bs('01010'), shift, bs('0'), rm_sft, imm6, rn, rd], [rd, rn, rm_sft]) +aarch64op("eon", [sf, bs('10'), bs('01010'), shift, bs('1'), rm_sft, imm6, rn, rd], [rd, rn, rm_sft]) +aarch64op("ands", [sf, bs('11'), bs('01010'), shift, bs('0'), rm_sft, imm6, rn, rd], [rd, rn, rm_sft]) +aarch64op("tst", [sf, bs('11'), bs('01010'), shift, bs('0'), rm_sft, imm6, rn, bs('11111')], [rn, rm_sft], alias=True) +aarch64op("bics", [sf, bs('11'), bs('01010'), shift, bs('1'), rm_sft, imm6, rn, rd], [rd, rn, rm_sft]) + +# move reg +aarch64op("mov", [sf, bs('01'), bs('01010'), bs('00'), bs('0'), rmz, bs('000000'), bs('11111'), rd], [rd, rmz], alias=True) + + +aarch64op("adc", [sf, bs('00'), bs('11010000'), rm, bs('000000'), rn, rd], [rd, rn, rm]) +aarch64op("adcs", [sf, bs('01'), bs('11010000'), rm, bs('000000'), rn, rd], [rd, rn, rm]) + + +aarch64op("sbc", [sf, bs('10'), bs('11010000'), rm, bs('000000'), rn, rd], [rd, rn, rm]) +aarch64op("sbcs", [sf, bs('11'), bs('11010000'), rm, bs('000000'), rn, rd], [rd, rn, rm]) + + + +bcond = bs_mod_name(l=4, fname='cond', mn_mod=['EQ', 'NE', 'CS', 'CC', + 'MI', 'PL', 'VS', 'VC', + 'HI', 'LS', 'GE', 'LT', + 'GT', 'LE', 'AL', 'NV']) + +cond_arg = bs(l=4, cls=(aarch64_cond_arg,), fname="cond") +cond_inv_arg = bs(l=4, cls=(aarch64_cond_inv_arg,), fname="cond") +# unconditional branch (ret) +aarch64op("br", [bs('1101011'), bs('0000'), bs('11111'), bs('000000'), rn64, bs('00000')], [rn64]) +aarch64op("blr", [bs('1101011'), bs('0001'), bs('11111'), bs('000000'), rn64, bs('00000')], [rn64]) +aarch64op("ret", [bs('1101011'), bs('0010'), bs('11111'), bs('000000'), rn64, bs('00000')], [rn64]) +aarch64op("eret", [bs('1101011'), bs('0100'), bs('11111'), bs('000000'), bs('11111'), bs('00000')]) +aarch64op("drps", [bs('1101011'), bs('0101'), bs('11111'), bs('000000'), bs('11111'), bs('00000')]) + +# unconditional branch (imm) +aarch64op("b", [bs('0'), bs('00101'), offs26], [offs26]) +aarch64op("bl", [bs('1'), bs('00101'), offs26], [offs26]) + + +post_pre = bs(l=1, order=-1, fname='postpre') + +# conditional compare (imm) p.158 +ccmp_name = {'CCMN': 0, 'CCMP': 1} +bs_ccmp_name = bs_name(l=1, name=ccmp_name) +aarch64op("condcmp", [sf, bs_ccmp_name, bs('1'), bs('11010010'), uimm5, cond_arg, bs('1'), bs('0'), rn, bs('0'), nzcv], [rn, uimm5, nzcv, cond_arg]) +aarch64op("condcmp", [sf, bs_ccmp_name, bs('1'), bs('11010010'), rm, cond_arg, bs('0'), bs('0'), rn, bs('0'), nzcv], [rn, rm, nzcv, cond_arg]) + +ldst_b_name = {'STRB': 0, 'LDRB': 1} +bs_ldst_b_name = bs_name(l=1, name=ldst_b_name) +ldst_name = {'STR': 0, 'LDR': 1} +bs_ldst_name = bs_name(l=1, name=ldst_name) +ldst_h_name = {'STRH': 0, 'LDRH': 1} +bs_ldst_h_name = bs_name(l=1, name=ldst_h_name) + +ldst_tb_name = {'STTRB': 0, 'LDTRB': 1} +bs_ldst_tb_name = bs_name(l=1, name=ldst_tb_name) + +ldst_th_name = {'STTRH': 0, 'LDTRH': 1} +bs_ldst_th_name = bs_name(l=1, name=ldst_th_name) + +ldst_ub_name = {'STURB': 0, 'LDURB': 1} +bs_ldst_ub_name = bs_name(l=1, name=ldst_ub_name) +ldst_u_name = {'STUR': 0, 'LDUR': 1} +bs_ldst_u_name = bs_name(l=1, name=ldst_u_name) + +ldst_t_name = {'STTR': 0, 'LDTR': 1} +bs_ldst_st_name = bs_name(l=1, name=ldst_t_name) + +ldst_1u_name = {'STUR': 0b0, 'LDUR': 0b1} +bs_ldst_1u_name = bs_name(l=1, name=ldst_1u_name) + +ldst_uh_name = {'STURH': 0, 'LDURH': 1} +bs_ldst_uh_name = bs_name(l=1, name=ldst_uh_name) + + +ldst_sw_name = {'STRSW': 0, 'LDRSW': 1} +bs_ldst_sw_name = bs_name(l=1, name=ldst_sw_name) + +# load/store register (imm post index) +aarch64op("ldst", [bs('00'), bs('111'), bs('0'), bs('00'), bs('0'), bs_ldst_b_name, bs('0'), simm9, post_pre, bs('1'), rn64_deref, rt32], [rt32, rn64_deref ]) +aarch64op("ldrsb", [bs('00'), bs('111'), bs('0'), bs('00'), bs('1'), sf, bs('0'), simm9, post_pre, bs('1'), rn64_deref, rt_isf], [rt_isf, rn64_deref ]) +aarch64op("ldrsh", [bs('01'), bs('111'), bs('0'), bs('00'), bs('1'), sf, bs('0'), simm9, post_pre, bs('1'), rn64_deref, rt_isf], [rt_isf, rn64_deref ]) +aarch64op("ldst", [bs('01'), bs('111'), bs('0'), bs('00'), bs('0'), bs_ldst_h_name, bs('0'), simm9, post_pre, bs('1'), rn64_deref, rt32], [rt32, rn64_deref ]) +aarch64op("ldst", [bs('10'), bs('111'), bs('0'), bs('00'), bs('0'), bs_ldst_name, bs('0'), simm9, post_pre, bs('1'), rn64_deref, rt32], [rt32, rn64_deref ]) +aarch64op("ldrsw", [bs('10'), bs('111'), bs('0'), bs('00'), bs('10'), bs('0'), simm9, post_pre, bs('1'), rn64_deref, rt64], [rt64, rn64_deref ]) +aarch64op("ldst", [bs('11'), bs('111'), bs('0'), bs('00'), bs('0'), bs_ldst_name, bs('0'), simm9, post_pre, bs('1'), rn64_deref, rt64], [rt64, rn64_deref ]) + +aarch64op("ldst", [sdsize, bs('111'), bs('1'), bs('00'), bs('0'), bs_ldst_name, bs('0'), simm9, post_pre, bs('1'), rn64_deref, sd], [sd, rn64_deref ]) +aarch64op("ldst", [bs('00'), bs('111'), bs('1'), bs('00'), bs('1'), bs_ldst_name, bs('0'), simm9, post_pre, bs('1'), rn64_deref, sd128], [sd128, rn64_deref ]) + +# load/store register (unsigned imm) +aarch64op("ldst", [bs('00', fname="size"), bs('111'), bs('0'), bs('01'), bs('0'), bs_ldst_b_name, uimm12, rn64_deref_sz, rt32], [rt32, rn64_deref_sz ]) +aarch64op("ldrsb", [bs('00', fname="size"), bs('111'), bs('0'), bs('01'), bs('1'), sf, uimm12, rn64_deref_sz, rt_isf], [rt_isf, rn64_deref_sz ]) +aarch64op("ldrsh", [bs('01', fname="size"), bs('111'), bs('0'), bs('01'), bs('1'), sf, uimm12, rn64_deref_sz, rt_isf], [rt_isf, rn64_deref_sz ]) +aarch64op("ldst", [bs('01', fname="size"), bs('111'), bs('0'), bs('01'), bs('0'), bs_ldst_h_name, uimm12, rn64_deref_sz, rt32], [rt32, rn64_deref_sz ]) +aarch64op("ldst", [bs('10', fname="size"), bs('111'), bs('0'), bs('01'), bs('0'), bs_ldst_name, uimm12, rn64_deref_sz, rt32], [rt32, rn64_deref_sz ]) +aarch64op("ldrsw", [bs('10', fname="size"), bs('111'), bs('0'), bs('01'), bs('10'), uimm12, rn64_deref_sz, rt64], [rt64, rn64_deref_sz ]) +aarch64op("ldst", [bs('11', fname="size"), bs('111'), bs('0'), bs('01'), bs('0'), bs_ldst_name, uimm12, rn64_deref_sz, rt64], [rt64, rn64_deref_sz ]) + +aarch64op("ldst", [sdsize, bs('111'), bs('1'), bs('01'), bs('0'), bs_ldst_name, uimm12, rn64_deref_sz, sd], [sd, rn64_deref_sz ]) +aarch64op("ldst", [bs('00'), bs('111'), bs('1'), bs('01'), bs('1', fname='size', amount=4), bs_ldst_name, uimm12, rn64_deref_sz, sd128], [sd128, rn64_deref_sz ]) + +# load/store register (unp) +aarch64op("ldst", [bs('00'), bs('111'), bs('0'), bs('00'), bs('0'), bs_ldst_tb_name, bs('0'), simm9, bs('10'), rn64_deref, rt32], [rt32, rn64_deref ]) +aarch64op("ldtrsb", [bs('00'), bs('111'), bs('0'), bs('00'), bs('1'), sf, bs('0'), simm9, bs('10'), rn64_deref, rt_isf], [rt_isf, rn64_deref ]) +aarch64op("ldtrsh", [bs('01'), bs('111'), bs('0'), bs('00'), bs('1'), sf, bs('0'), simm9, bs('10'), rn64_deref, rt_isf], [rt_isf, rn64_deref ]) +aarch64op("ldsttrh",[bs('01'), bs('111'), bs('0'), bs('00'), bs('0'), bs_ldst_th_name, bs('0'), simm9, bs('10'), rn64_deref, rt32], [rt32, rn64_deref ]) +aarch64op("ldtrsw", [bs('10'), bs('111'), bs('0'), bs('00'), bs('10'), bs('0'), simm9, bs('10'), rn64_deref, rt64], [rt64, rn64_deref ]) +aarch64op("ldstt", [bs('1'), sf, bs('111'), bs('0'), bs('00'), bs('0'), bs_ldst_st_name, bs('0'), simm9, bs('10'), rn64_deref, rt], [rt, rn64_deref ]) + +aarch64op("ldstt", [sdsize, bs('111'), bs('1'), bs('00'), bs('0'), bs_ldst_st_name, bs('0'), simm9, bs('10'), rn64_deref, sd], [sd, rn64_deref ]) +aarch64op("ldst", [bs('00'), bs('111'), bs('1'), bs('00'), bs('1'), bs_ldst_st_name, bs('0'), simm9, bs('10'), rn64_deref, sd128], [sd128, rn64_deref ]) + +# load/store register (unscaled imm) +aarch64op("ldst", [bs('00'), bs('111'), bs('0'), bs('00'), bs('0'), bs_ldst_ub_name, bs('0'), simm9, bs('00'), rn64_deref, rt32], [rt32, rn64_deref ]) +aarch64op("ldursb", [bs('00'), bs('111'), bs('0'), bs('00'), bs('1'), sf, bs('0'), simm9, bs('00'), rn64_deref, rt_isf], [rt_isf, rn64_deref ]) +aarch64op("ldstuh", [bs('01'), bs('111'), bs('0'), bs('00'), bs('0'), bs_ldst_uh_name, bs('0'), simm9, bs('00'), rn64_deref, rt32], [rt32, rn64_deref ]) +aarch64op("ldursh", [bs('01'), bs('111'), bs('0'), bs('00'), bs('1'), sf, bs('0'), simm9, bs('00'), rn64_deref, rt_isf], [rt_isf, rn64_deref ]) +aarch64op("ldursw", [bs('10'), bs('111'), bs('0'), bs('00'), bs('10'), bs('0'), simm9, bs('00'), rn64_deref, rt64], [rt64, rn64_deref ]) +aarch64op("ldst", [bs('1'), sf, bs('111'), bs('0'), bs('00'), bs('0'), bs_ldst_u_name, bs('0'), simm9, bs('00'), rn64_deref, rt], [rt, rn64_deref ]) + +aarch64op("ldstu", [sdsize, bs('111'), bs('1'), bs('00'), bs('0'), bs_ldst_u_name, bs('0'), simm9, bs('00'), rn64_deref, sd], [sd, rn64_deref ]) +aarch64op("ldst", [bs('00'), bs('111'), bs('1'), bs('00'), bs('1'), bs_ldst_1u_name, bs('0'), simm9, bs('00'), rn64_deref, sd128], [sd128, rn64_deref ]) + +# load/store (register) p.728 + +aarch64op("ldstrb",[bs('00', fname="size"), bs('111'), bs('0'), bs('00'), bs('0'), bs_ldst_b_name, bs('1'), rm_ext2, option, shiftb, bs('10'), rn64_v, rt32], [rt32, rm_ext2]) + +aarch64op("ldstrh",[bs('01', fname="size"), bs('111'), bs('0'), bs('00'), bs('0'), bs_ldst_h_name, bs('1'), rm_ext2, option, shiftb, bs('10'), rn64_v, rt32], [rt32, rm_ext2]) + +aarch64op("ldrsb", [bs('00', fname="size"), bs('111'), bs('0'), bs('00'), bs('1'), sf, bs('1'), rm_ext2, option, shiftb, bs('10'), rn64_v, rt_isf], [rt_isf, rm_ext2]) + +aarch64op("ldrsh", [bs('01', fname="size"), bs('111'), bs('0'), bs('00'), bs('1'), sf, bs('1'), rm_ext2, option, shiftb, bs('10'), rn64_v, rt_isf], [rt_isf, rm_ext2]) + +aarch64op("ldst", [sdsize, bs('111'), bs('1'), bs('00'), bs('0'), bs_ldst_name, bs('1'), rm_ext2, option, shiftb, bs('10'), rn64_v, sd], [sd, rm_ext2]) +aarch64op("ldst", [bs('00', fname="size"), bs('111'), bs('1'), bs('00'), bs('1'), bs_ldst_name, bs('1'), rm_ext2_128, option, shiftb, bs('10'), rn64_v, sd128], [sd128, rm_ext2_128]) + +aarch64op("str", [bs('10', fname="size"), bs('111'), bs('0'), bs('00'), bs('0'), bs_ldst_name, bs('1'), rm_ext2, option, shiftb, bs('10'), rn64_v, rt32], [rt32, rm_ext2]) + +aarch64op("ldrsw", [bs('10', fname="size"), bs('111'), bs('0'), bs('00'), bs('10'), bs('1'), rm_ext2, option, shiftb, bs('10'), rn64_v, rt64], [rt64, rm_ext2]) + +aarch64op("ldst", [bs('11', fname="size"), bs('111'), bs('0'), bs('00'), bs('0'), bs_ldst_name, bs('1'), rm_ext2, option, shiftb, bs('10'), rn64_v, rt64], [rt64, rm_ext2]) + +# load/store literal p.137 +aarch64op("ldr", [bs('0'), sf, bs('011'), bs('0'), bs('00'), offs19pc, rt], [rt, offs19pc]) +aarch64op("ldrsw", [bs('10'), bs('011'), bs('0'), bs('00'), offs19pc, rt64], [rt64, offs19pc]) + +# load/store simd literal p.142 +aarch64op("ldr", [sdsize, bs('011'), bs('1'), bs('00'), offs19pc, sd1], [sd1, offs19pc]) + + +# move wide p.203 +movwide_name = {'MOVN': 0b00, 'MOVZ': 0b10} +bs_movwide_name = bs_name(l=2, name=movwide_name) +# mov wide (imm) +aarch64op("mov", [sf, bs_movwide_name, bs('100101'), hw, imm16_hw, rd], [rd, imm16_hw]) +aarch64op("movk", [sf, bs('11'), bs('100101'), hw, imm16_hw_sc, rd], [rd, imm16_hw_sc]) + +# stp/ldp p.139 +ldstp_name = {'STP': 0b0, 'LDP': 0b1} +bs_ldstp_name = bs_name(l=1, name=ldstp_name) +aarch64op("ldstp", [sf, bs('0'), bs('101'), bs('0'), bs('0'), post_pre, bs('1'), bs_ldstp_name, simm7, rt2, rn64_deref_sf, rt], [rt, rt2, rn64_deref_sf]) +aarch64op("ldstp", [sf, bs('0'), bs('101'), bs('0'), bs('0'), bs('1'), bs('0'), bs_ldstp_name, simm7, rt2, rn64_deref_sf, rt], [rt, rt2, rn64_deref_sf]) + +aarch64op("ldstp", [sdsize, bs('101'), bs('1'), bs('0'), post_pre, bs('1'), bs_ldstp_name, uimm7, sd2, rn64_deref_sd, sd1], [sd1, sd2, rn64_deref_sd]) +aarch64op("ldstp", [sdsize, bs('101'), bs('1'), bs('0'), bs('1'), bs('0'), bs_ldstp_name, uimm7, sd2, rn64_deref_sd, sd1], [sd1, sd2, rn64_deref_sd]) + + +# data process p.207 +datap0_name = {'RBIT': 0b000000, 'REV16': 0b000001, + 'REV': 0b000010, + 'CLZ': 0b000100, 'CLS': 0b000101} +bs_datap0_name = bs_name(l=6, name=datap0_name) +aarch64op("ldstp", [bs('0', fname='sf'), bs('1'), modf, bs('11010110'), bs('00000'), bs_datap0_name, rn, rd]) +datap1_name = {'RBIT': 0b000000, 'REV16': 0b000001, + 'REV32': 0b000010, 'REV': 0b000011, + 'CLZ': 0b000100, 'CLS': 0b000101} +bs_datap1_name = bs_name(l=6, name=datap1_name) +aarch64op("ldstp", [bs('1', fname='sf'), bs('1'), modf, bs('11010110'), bs('00000'), bs_datap1_name, rn, rd]) + + +# conditional branch p.132 +aarch64op("b.", [bs('0101010'), bs('0'), offs19, bs('0'), bcond], [offs19]) +aarch64op("cbnz", [sf, bs('011010'), bs('1'), offs19, rt], [rt, offs19]) +aarch64op("cbz", [sf, bs('011010'), bs('0'), offs19, rt], [rt, offs19]) +aarch64op("tbnz", [sf, bs('011011'), bs('1'), b40, offs14, rt], [rt, b40, offs14]) +aarch64op("tbz", [sf, bs('011011'), bs('0'), b40, offs14, rt], [rt, b40, offs14]) + + +# fmov register p.160 +aarch64op("fmov", [bs('000'), bs('11110'), bs('0'), sdsize1, bs('1'), bs('0000'), bs('00'), bs('10000'), sdn_32_64, sdd_32_64], [sdd_32_64, sdn_32_64]) +# fmov scalar imm p.160 +aarch64op("fmov", [bs('000'), bs('11110'), bs('0'), sdsize1, bs('1'), uimm8, bs('100'), bs('00000'), sdd_32_64], [sdd_32_64, uimm8]) +# floating point comparison p.164 +aarch64op("fcmp", [bs('000'), bs('11110'), bs('0'), sdsize1, bs('1'), sdm_32_64_zero, bs('00'), bs('1000'), sdn_32_64, bs('0'), opc, bs('000')], [sdn_32_64, sdm_32_64_zero]) +aarch64op("fcmpe", [bs('000'), bs('11110'), bs('0'), sdsize1, bs('1'), sdm_32_64_zero, bs('00'), bs('1000'), sdn_32_64, bs('1'), opc, bs('000')], [sdn_32_64, sdm_32_64_zero]) +# floating point convert p.161 +aarch64op("fcvtas",[sf, bs('00'), bs('11110'), bs('0'), sdsize1, bs('1'), bs('00'), bs('100'), bs('000000'), sdn_32_64, rd], [rd, sdn_32_64]) +aarch64op("fcvtzu",[sf, bs('00'), bs('11110'), bs('0'), sdsize1, bs('1'), bs('11'), bs('001'), bs('000000'), sdn_32_64, rd], [rd, sdn_32_64]) +aarch64op("fcvtzs",[sf, bs('00'), bs('11110'), bs('0'), sdsize1, bs('1'), bs('11'), bs('000'), bs('000000'), sdn_32_64, rd], [rd, sdn_32_64]) + +aarch64op("fcvt", [bs('000'), bs('11110'), bs('11'), bs('1'), bs('0001'), bs('00'), bs('10000'), sn16, sd32], [sd32, sn16]) +aarch64op("fcvt", [bs('000'), bs('11110'), bs('11'), bs('1'), bs('0001'), bs('01'), bs('10000'), sn16, sd64], [sd64, sn16]) +aarch64op("fcvt", [bs('000'), bs('11110'), bs('00'), bs('1'), bs('0001'), bs('11'), bs('10000'), sn32, sd16], [sd16, sn32]) +aarch64op("fcvt", [bs('000'), bs('11110'), bs('00'), bs('1'), bs('0001'), bs('01'), bs('10000'), sn32, sd64], [sd64, sn32]) +aarch64op("fcvt", [bs('000'), bs('11110'), bs('01'), bs('1'), bs('0001'), bs('11'), bs('10000'), sn64, sd16], [sd16, sn64]) +aarch64op("fcvt", [bs('000'), bs('11110'), bs('01'), bs('1'), bs('0001'), bs('00'), bs('10000'), sn64, sd32], [sd32, sn64]) + + + +swapargs = bs_swapargs(l=1, fname="swap", mn_mod=list(range(1 << 1))) + +aarch64op("fmov", [bs('0'), bs('00'), bs('11110'), bs('00'), bs('1'), bs('00'), bs('110'), bs('000000'), sn32, rd32], [rd32, sn32]) +aarch64op("fmov", [bs('0'), bs('00'), bs('11110'), bs('00'), bs('1'), bs('00'), bs('111'), bs('000000'), rn32, sd32], [sd32, rn32]) +aarch64op("fmov", [bs('1'), bs('00'), bs('11110'), bs('00'), bs('1'), bs('00'), bs('110'), bs('000000'), sd32, rd32], [rd32, sd32]) +aarch64op("fmov", [bs('1'), bs('00'), bs('11110'), bs('01'), bs('1'), bs('00'), bs('111'), bs('000000'), rd64, sd64], [sd64, rd64]) +aarch64op("fmov", [bs('1'), bs('00'), bs('11110'), bs('01'), bs('1'), bs('00'), bs('110'), bs('000000'), sd64, rd64], [rd64, sd64]) + + + +# floating point arith p.163 +aarch64op("fsub", [bs('0'), bs('00'), bs('11110'), bs('0'), sdsize1, bs('1'), sdm_32_64, bs('001'), bs('1'), bs('10'), sdn_32_64, sdd_32_64], [sdd_32_64, sdn_32_64, sdm_32_64]) +aarch64op("fadd", [bs('0'), bs('00'), bs('11110'), bs('0'), sdsize1, bs('1'), sdm_32_64, bs('001'), bs('0'), bs('10'), sdn_32_64, sdd_32_64], [sdd_32_64, sdn_32_64, sdm_32_64]) +aarch64op("fdiv", [bs('0'), bs('00'), bs('11110'), bs('0'), sdsize1, bs('1'), sdm_32_64, bs('000'), bs('1'), bs('10'), sdn_32_64, sdd_32_64], [sdd_32_64, sdn_32_64, sdm_32_64]) +aarch64op("fmul", [bs('0'), bs('00'), bs('11110'), bs('0'), sdsize1, bs('1'), sdm_32_64, bs('000'), bs('0'), bs('10'), sdn_32_64, sdd_32_64], [sdd_32_64, sdn_32_64, sdm_32_64]) +aarch64op("fnmul", [bs('0'), bs('00'), bs('11110'), bs('0'), sdsize1, bs('1'), sdm_32_64, bs('100'), bs('0'), bs('10'), sdn_32_64, sdd_32_64], [sdd_32_64, sdn_32_64, sdm_32_64]) + +aarch64op("fabs", [bs('0'), bs('00'), bs('11110'), bs('0'), sdsize1, bs('1'), bs('0000'), bs('01'), bs('10000'), sdn_32_64, sdd_32_64], [sdd_32_64, sdn_32_64]) +aarch64op("fneg", [bs('0'), bs('00'), bs('11110'), bs('0'), sdsize1, bs('1'), bs('0000'), bs('10'), bs('10000'), sdn_32_64, sdd_32_64], [sdd_32_64, sdn_32_64]) +aarch64op("fsqrt", [bs('0'), bs('00'), bs('11110'), bs('0'), sdsize1, bs('1'), bs('0000'), bs('11'), bs('10000'), sdn_32_64, sdd_32_64], [sdd_32_64, sdn_32_64]) + + +# floating point multiply add p.163 +aarch64op("fmadd", [bs('0'), bs('00'), bs('11111'), bs('0'), sdsize1, bs('0'), sdm_32_64, bs('0'), sda_32_64, sdn_32_64, sdd_32_64], [sdd_32_64, sdn_32_64, sdm_32_64, sda_32_64]) +aarch64op("fmsub", [bs('0'), bs('00'), bs('11111'), bs('0'), sdsize1, bs('0'), sdm_32_64, bs('1'), sda_32_64, sdn_32_64, sdd_32_64], [sdd_32_64, sdn_32_64, sdm_32_64, sda_32_64]) +aarch64op("fnmadd",[bs('0'), bs('00'), bs('11111'), bs('0'), sdsize1, bs('1'), sdm_32_64, bs('0'), sda_32_64, sdn_32_64, sdd_32_64], [sdd_32_64, sdn_32_64, sdm_32_64, sda_32_64]) +aarch64op("fnmsub",[bs('0'), bs('00'), bs('11111'), bs('0'), sdsize1, bs('1'), sdm_32_64, bs('1'), sda_32_64, sdn_32_64, sdd_32_64], [sdd_32_64, sdn_32_64, sdm_32_64, sda_32_64]) + +# conversion float integer p.235 +aarch64op("scvtf", [sf, bs('0'), bs('0'), bs('11110'), bs('0'), sdsize1, bs('1'), bs('00'), bs('010'), bs('000000'), rn, sdd_32_64], [sdd_32_64, rn]) +aarch64op("ucvtf", [sf, bs('0'), bs('0'), bs('11110'), bs('0'), sdsize1, bs('1'), bs('00'), bs('011'), bs('000000'), rn, sdd_32_64], [sdd_32_64, rn]) + + + +# conditional select p.158 +aarch64op("csel", [sf, bs('0'), bs('0'), bs('11010100'), rmz, cond_arg, bs('00'), rnz, rd], [rd, rnz, rmz, cond_arg]) +aarch64op("csinc", [sf, bs('0'), bs('0'), bs('11010100'), rmz, cond_arg, bs('01'), rnz, rd], [rd, rnz, rmz, cond_arg]) +aarch64op("csinv", [sf, bs('1'), bs('0'), bs('11010100'), rmz, cond_arg, bs('00'), rnz, rd], [rd, rnz, rmz, cond_arg]) +aarch64op("csneg", [sf, bs('1'), bs('0'), bs('11010100'), rmz, cond_arg, bs('01'), rnz, rd], [rd, rnz, rmz, cond_arg]) +aarch64op("cset", [sf, bs('0'), bs('0'), bs('11010100'), bs('11111'), cond_inv_arg, bs('01'), bs('11111'), rd], [rd, cond_inv_arg], alias=True) +aarch64op("csetm", [sf, bs('1'), bs('0'), bs('11010100'), bs('11111'), cond_inv_arg, bs('00'), bs('11111'), rd], [rd, cond_inv_arg], alias=True) + + +# multiply p.156 +aarch64op("madd", [sf, bs('00'), bs('11011'), bs('000'), rm, bs('0'), ra, rn, rd], [rd, rn, rm, ra]) +aarch64op("msub", [sf, bs('00'), bs('11011'), bs('000'), rm, bs('1'), ra, rn, rd], [rd, rn, rm, ra]) + +aarch64op("umulh", [bs('1'), bs('00'), bs('11011'), bs('110'), rm64, bs('0'), bs('11111'), rn64, rd64], [rd64, rn64, rm64]) +aarch64op("smulh", [bs('1'), bs('00'), bs('11011'), bs('010'), rm64, bs('0'), bs('11111'), rn64, rd64], [rd64, rn64, rm64]) +aarch64op("umsubh",[bs('1'), bs('00'), bs('11011'), bs('101'), rm32, bs('1'), ra64, rn32, rd64], [rd64, rn32, rm32, ra64]) + + +aarch64op("smaddl",[bs('1'), bs('00'), bs('11011'), bs('001'), rm32, bs('0'), ra64, rn32, rd64], [rd64, rn32, rm32, ra64]) +aarch64op("umaddl",[bs('1'), bs('00'), bs('11011'), bs('101'), rm32, bs('0'), ra64, rn32, rd64], [rd64, rn32, rm32, ra64]) + +aarch64op("smsubl",[bs('1'), bs('00'), bs('11011'), bs('001'), rm32, bs('1'), ra64, rn32, rd64], [rd64, rn32, rm32, ra64]) +aarch64op("umsubl",[bs('1'), bs('00'), bs('11011'), bs('101'), rm32, bs('1'), ra64, rn32, rd64], [rd64, rn32, rm32, ra64]) + +# division p.156 +aarch64op("sdiv", [sf, bs('0'), bs('0'), bs('11010110'), rm, bs('00001'), bs('1'), rn, rd], [rd, rn, rm]) +aarch64op("udiv", [sf, bs('0'), bs('0'), bs('11010110'), rm, bs('00001'), bs('0'), rn, rd], [rd, rn, rm]) + + +# extract register p.150 +aarch64op("extr", [sf, bs('00100111'), bs(l=1, cls=(aarch64_eq,), ref="sf"), bs('0'), rm, simm6, rn, rd], [rd, rn, rm, simm6]) + +# shift reg p.155 +shiftr_name = {'LSL': 0b00, 'LSR': 0b01, 'ASR': 0b10, 'ROR': 0b11} +bs_shiftr_name = bs_name(l=2, name=shiftr_name) + +aarch64op("shiftr", [sf, bs('0'), bs('0'), bs('11010110'), rm, bs('0010'), bs_shiftr_name, rn, rd], [rd, rn, rm]) + +# +aarch64op("NOP", [bs('11010101000000110010000000011111')]) + +# exception p.133 +aarch64op("brk", [bs('11010100'), bs('001'), uimm16, bs('000'), bs('00')], [uimm16]) +aarch64op("hlt", [bs('11010100'), bs('010'), uimm16, bs('000'), bs('00')], [uimm16]) +aarch64op("svc", [bs('11010100'), bs('000'), uimm16, bs('000'), bs('01')], [uimm16]) +aarch64op("hvc", [bs('11010100'), bs('000'), uimm16, bs('000'), bs('10')], [uimm16]) +aarch64op("smc", [bs('11010100'), bs('000'), uimm16, bs('000'), bs('11')], [uimm16]) + +# msr p.631 +msr_name = {'MSR': 0b0, 'MRS': 0b1} +bs_msr_name = bs_name(l=1, name=msr_name) +aarch64op("mrs", [bs('1101010100'), bs('1'), bs('1'), bs('1'), op1, crn, crm, op2, rt64], [rt64, op1, crn, crm, op2]) +aarch64op("msr", [bs('1101010100'), bs('0'), bs('1'), bs('1'), op1, crn, crm, op2, rt64], [op1, crn, crm, op2, rt64]) + +# load/store exclusive p.140 +aarch64op("stxr", [bs('1'), sf, bs('001000'), bs('0'), bs('0'), bs('0'), rs32, bs('0'), bs('11111'), rn64_deref_nooff, rt], [rs32, rt, rn64_deref_nooff]) +aarch64op("ldxr", [bs('1'), sf, bs('001000'), bs('0'), bs('1'), bs('0'), bs('11111'), bs('0'), bs('11111'), rn64_deref_nooff, rt], [rt, rn64_deref_nooff]) + + +aarch64op("stxrb", [bs('0'), bs('0'), bs('001000'), bs('0'), bs('0'), bs('0'), rs32, bs('0'), bs('11111'), rn64_deref_nooff, rt32], [rs32, rt32, rn64_deref_nooff]) +aarch64op("ldxrb", [bs('0'), bs('0'), bs('001000'), bs('0'), bs('1'), bs('0'), bs('11111'), bs('0'), bs('11111'), rn64_deref_nooff, rt32], [rt32, rn64_deref_nooff]) + +aarch64op("stxrb", [bs('0'), bs('1'), bs('001000'), bs('0'), bs('0'), bs('0'), rs32, bs('0'), bs('11111'), rn64_deref_nooff, rt32], [rs32, rt32, rn64_deref_nooff]) +aarch64op("ldxrh", [bs('0'), bs('1'), bs('001000'), bs('0'), bs('1'), bs('0'), bs('11111'), bs('0'), bs('11111'), rn64_deref_nooff, rt32], [rt32, rn64_deref_nooff]) + +aarch64op("stxp", [bs('1'), sf, bs('001000'), bs('0'), bs('0'), bs('1'), rs32, bs('0'), rt2, rn64_deref_nooff, rt], [rs32, rt, rt2, rn64_deref_nooff]) +aarch64op("ldxp", [bs('1'), sf, bs('001000'), bs('0'), bs('1'), bs('1'), bs('11111'), bs('0'), rt2, rn64_deref_nooff, rt], [rt, rt2, rn64_deref_nooff]) + +# load acquire/store release p.141 +aarch64op("ldar", [bs('1'), sf, bs('001000'), bs('1'), bs('1'), bs('0'), bs('11111'), bs('1'), bs('11111'), rn64_deref_nooff, rt], [rt, rn64_deref_nooff]) +aarch64op("ldarb",[bs('0'), bs('0'), bs('001000'), bs('1'), bs('1'), bs('0'), bs('11111'), bs('1'), bs('11111'), rn64_deref_nooff, rt], [rt, rn64_deref_nooff]) +aarch64op("ldarh",[bs('0'), bs('1'), bs('001000'), bs('0'), bs('1'), bs('0'), bs('11111'), bs('1'), bs('11111'), rn64_deref_nooff, rt], [rt, rn64_deref_nooff]) +aarch64op("ldaxp",[bs('1'), sf, bs('001000'), bs('0'), bs('1'), bs('1'), bs('11111'), bs('1'), bs('11111'), rn64_deref_nooff, rt], [rt, rn64_deref_nooff]) +aarch64op("ldaxr",[bs('1'), sf, bs('001000'), bs('0'), bs('1'), bs('0'), bs('11111'), bs('1'), bs('11111'), rn64_deref_nooff, rt], [rt, rn64_deref_nooff]) + +aarch64op("stlxr", [bs('1'), sf, bs('001000'), bs('0'), bs('0'), bs('0'), rs32, bs('1'), bs('11111'), rn64_deref_nooff, rt], [rs32, rt, rn64_deref_nooff]) +aarch64op("stlxrb",[bs('0'), bs('0'), bs('001000'), bs('0'), bs('0'), bs('0'), rs32, bs('1'), bs('11111'), rn64_deref_nooff, rt32], [rs32, rt32, rn64_deref_nooff]) +aarch64op("stlxrh",[bs('0'), bs('1'), bs('001000'), bs('0'), bs('0'), bs('0'), rs32, bs('1'), bs('11111'), rn64_deref_nooff, rt32], [rs32, rt32, rn64_deref_nooff]) +aarch64op("stlxp", [bs('1'), sf, bs('001000'), bs('0'), bs('0'), bs('1'), rs32, bs('1'), rt2, rn64_deref_nooff, rt], [rs32, rt, rt2, rn64_deref_nooff]) + +# barriers p.135 +aarch64op("dsb", [bs('1101010100'), bs('0000110011'), crm, bs('1'), bs('00'), bs('11111')], [crm]) +aarch64op("dmb", [bs('1101010100'), bs('0000110011'), crm, bs('1'), bs('01'), bs('11111')], [crm]) +aarch64op("isb", [bs('1101010100'), bs('0000110011'), crm, bs('1'), bs('10'), bs('11111')], [crm]) + +stacctype = bs_mod_name(l=1, fname='order', mn_mod=['', 'L']) +ltacctype = bs_mod_name(l=1, fname='order', mn_mod=['', 'A']) + + +aarch64op("casp", [bs('0'), sf, bs('001000'), bs('0'), ltacctype, bs('1'), rs, stacctype, bs('11111'), rn64_deref_nooff, rt], [rs, rt, rn64_deref_nooff]) +aarch64op("ldaxrb", [bs('00'), bs('001000'), bs('0'), bs('1'), bs('0'), bs('11111'), bs('1'), bs('11111'), rn64_deref_nooff, rt32], [rt32, rn64_deref_nooff]) diff --git a/miasm/arch/aarch64/disasm.py b/miasm/arch/aarch64/disasm.py new file mode 100644 index 00000000..c4ad9181 --- /dev/null +++ b/miasm/arch/aarch64/disasm.py @@ -0,0 +1,27 @@ +from miasm.core.asmblock import disasmEngine +from miasm.arch.aarch64.arch import mn_aarch64 + +cb_aarch64_funcs = [] + + +def cb_aarch64_disasm(*args, **kwargs): + for func in cb_aarch64_funcs: + func(*args, **kwargs) + + +class dis_aarch64b(disasmEngine): + attrib = "b" + def __init__(self, bs=None, **kwargs): + super(dis_aarch64b, self).__init__( + mn_aarch64, self.attrib, bs, + dis_block_callback = cb_aarch64_disasm, + **kwargs) + + +class dis_aarch64l(disasmEngine): + attrib = "l" + def __init__(self, bs=None, **kwargs): + super(dis_aarch64l, self).__init__( + mn_aarch64, self.attrib, bs, + dis_block_callback = cb_aarch64_disasm, + **kwargs) diff --git a/miasm/arch/aarch64/ira.py b/miasm/arch/aarch64/ira.py new file mode 100644 index 00000000..e20a0943 --- /dev/null +++ b/miasm/arch/aarch64/ira.py @@ -0,0 +1,50 @@ +#-*- coding:utf-8 -*- + +from miasm.ir.analysis import ira +from miasm.arch.aarch64.sem import ir_aarch64l, ir_aarch64b + + +class ir_a_aarch64l_base(ir_aarch64l, ira): + + def __init__(self, loc_db=None): + ir_aarch64l.__init__(self, loc_db) + self.ret_reg = self.arch.regs.X0 + + +class ir_a_aarch64b_base(ir_aarch64b, ira): + + def __init__(self, loc_db=None): + ir_aarch64b.__init__(self, loc_db) + self.ret_reg = self.arch.regs.X0 + + +class ir_a_aarch64l(ir_a_aarch64l_base): + + def __init__(self, loc_db=None): + ir_a_aarch64l_base.__init__(self, loc_db) + self.ret_reg = self.arch.regs.X0 + + def get_out_regs(self, _): + return set([self.ret_reg, self.sp]) + + def sizeof_char(self): + return 8 + + def sizeof_short(self): + return 16 + + def sizeof_int(self): + return 32 + + def sizeof_long(self): + return 32 + + def sizeof_pointer(self): + return 32 + + +class ir_a_aarch64b(ir_a_aarch64b_base, ir_a_aarch64l): + + def __init__(self, loc_db=None): + ir_a_aarch64b_base.__init__(self, loc_db) + self.ret_reg = self.arch.regs.X0 diff --git a/miasm/arch/aarch64/jit.py b/miasm/arch/aarch64/jit.py new file mode 100644 index 00000000..0754f5be --- /dev/null +++ b/miasm/arch/aarch64/jit.py @@ -0,0 +1,80 @@ +from builtins import range +import logging + +from miasm.jitter.jitload import Jitter, named_arguments +from miasm.core.locationdb import LocationDB +from miasm.core.utils import pck64, upck64 +from miasm.arch.aarch64.sem import ir_aarch64b, ir_aarch64l + +log = logging.getLogger('jit_aarch64') +hnd = logging.StreamHandler() +hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) +log.addHandler(hnd) +log.setLevel(logging.CRITICAL) + +class jitter_aarch64l(Jitter): + max_reg_arg = 8 + + def __init__(self, *args, **kwargs): + Jitter.__init__(self, ir_aarch64l(LocationDB()), *args, **kwargs) + self.vm.set_little_endian() + + def push_uint64_t(self, value): + self.cpu.SP -= 8 + self.vm.set_mem(self.cpu.SP, pck64(value)) + + def pop_uint64_t(self): + value = self.vm.get_u64(self.cpu.SP) + self.cpu.SP += 8 + return value + + def get_stack_arg(self, index): + return self.vm.get_u64(self.cpu.SP + 8 * index) + + # calling conventions + + @named_arguments + def func_args_stdcall(self, n_args): + args = [] + for i in range(min(n_args, self.max_reg_arg)): + args.append(getattr(self.cpu, 'X%d' % i)) + for i in range(max(0, n_args - self.max_reg_arg)): + args.append(self.get_stack_arg(i)) + ret_ad = self.cpu.LR + return ret_ad, args + + def func_ret_stdcall(self, ret_addr, ret_value=None): + self.pc = self.cpu.PC = ret_addr + if ret_value is not None: + self.cpu.X0 = ret_value + return True + + def get_arg_n_stdcall(self, index): + if index < self.max_reg_arg: + arg = self.cpu.get_gpreg()['X%d' % index] + else: + arg = self.get_stack_arg(index - self.max_reg_arg) + return arg + + def func_prepare_stdcall(self, ret_addr, *args): + for index in range(min(len(args), 4)): + setattr(self.cpu, 'X%d' % index, args[index]) + for index in range(4, len(args)): + self.vm.set_mem(self.cpu.SP + 8 * (index - 4), pck64(args[index])) + self.cpu.LR = ret_addr + + func_args_systemv = func_args_stdcall + func_ret_systemv = func_ret_stdcall + get_arg_n_systemv = get_arg_n_stdcall + func_prepare_systemv = func_prepare_stdcall + + def init_run(self, *args, **kwargs): + Jitter.init_run(self, *args, **kwargs) + self.cpu.PC = self.pc + + +class jitter_aarch64b(jitter_aarch64l): + + def __init__(self, *args, **kwargs): + Jitter.__init__(self, ir_aarch64b(LocationDB()), *args, **kwargs) + self.vm.set_big_endian() diff --git a/miasm/arch/aarch64/regs.py b/miasm/arch/aarch64/regs.py new file mode 100644 index 00000000..2732323f --- /dev/null +++ b/miasm/arch/aarch64/regs.py @@ -0,0 +1,120 @@ +#-*- coding:utf-8 -*- + +from builtins import range +from miasm.expression.expression import ExprId +from miasm.core.cpu import gen_reg, gen_regs + +exception_flags = ExprId('exception_flags', 32) +interrupt_num = ExprId('interrupt_num', 32) + + +gpregs32_str = ["W%d" % i for i in range(0x1f)] + ["WSP"] +gpregs32_expr, gpregs32_init, gpregs32_info = gen_regs( + gpregs32_str, globals(), 32) + +gpregs64_str = ["X%d" % i for i in range(0x1E)] + ["LR", "SP"] +gpregs64_expr, gpregs64_init, gpregs64_info = gen_regs( + gpregs64_str, globals(), 64) + + +gpregsz32_str = ["W%d" % i for i in range(0x1f)] + ["WZR"] +gpregsz32_expr, gpregsz32_init, gpregsz32_info = gen_regs( + gpregsz32_str, globals(), 32) + +gpregsz64_str = ["X%d" % i for i in range(0x1e)] + ["LR", "XZR"] +gpregsz64_expr, gpregsz64_init, gpregsz64_info = gen_regs( + gpregsz64_str, globals(), 64) + +cr_str = ["c%d" % i for i in range(0xf)] +cr_expr, cr_init, cr_info = gen_regs(cr_str, globals(), 32) + + +simd08_str = ["B%d" % i for i in range(0x20)] +simd08_expr, simd08_init, simd08_info = gen_regs(simd08_str, globals(), 8) + +simd16_str = ["H%d" % i for i in range(0x20)] +simd16_expr, simd16_init, simd16_info = gen_regs(simd16_str, globals(), 16) + +simd32_str = ["S%d" % i for i in range(0x20)] +simd32_expr, simd32_init, simd32_info = gen_regs(simd32_str, globals(), 32) + +simd64_str = ["D%d" % i for i in range(0x20)] +simd64_expr, simd64_init, simd64_info = gen_regs(simd64_str, globals(), 64) + +simd128_str = ["Q%d" % i for i in range(0x20)] +simd128_expr, simd128_init, simd128_info = gen_regs( + simd128_str, globals(), 128) + + +PC, _ = gen_reg("PC", 64) +WZR, _ = gen_reg("WZR", 32) +XZR, _ = gen_reg("XZR", 64) + +PC_init = ExprId("PC_init", 64) +WZR_init = ExprId("WZR_init", 32) +XZR_init = ExprId("XZR_init", 64) + +reg_zf = 'zf' +reg_nf = 'nf' +reg_of = 'of' +reg_cf = 'cf' + +zf = ExprId(reg_zf, size=1) +nf = ExprId(reg_nf, size=1) +of = ExprId(reg_of, size=1) +cf = ExprId(reg_cf, size=1) + +zf_init = ExprId("zf_init", size=1) +nf_init = ExprId("nf_init", size=1) +of_init = ExprId("of_init", size=1) +cf_init = ExprId("cf_init", size=1) + + +all_regs_ids = [ + B0, B1, B2, B3, B4, B5, B6, B7, B8, B9, B10, B11, B12, B13, B14, B15, B16, + B17, B18, B19, B20, B21, B22, B23, B24, B25, B26, B27, B28, B29, B30, B31, + + H0, H1, H2, H3, H4, H5, H6, H7, H8, H9, H10, H11, H12, H13, H14, H15, H16, + H17, H18, H19, H20, H21, H22, H23, H24, H25, H26, H27, H28, H29, H30, H31, + + S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15, S16, + S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31, + + D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15, D16, + D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, D31, + + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, Q16, + Q17, Q18, Q19, Q20, Q21, Q22, Q23, Q24, Q25, Q26, Q27, Q28, Q29, Q30, Q31, + + W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, W12, W13, W14, W15, W16, + W17, W18, W19, W20, W21, W22, W23, W24, W25, W26, W27, W28, W29, W30, WSP, + + X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X12, X13, X14, X15, X16, + X17, X18, X19, X20, X21, X22, X23, X24, X25, X26, X27, X28, X29, LR, SP, + + exception_flags, + interrupt_num, + PC, + WZR, + XZR, + zf, nf, of, cf, + +] + + +all_regs_ids_no_alias = all_regs_ids + +attrib_to_regs = { + 'l': all_regs_ids_no_alias, + 'b': all_regs_ids_no_alias, +} + +all_regs_ids_byname = dict([(x.name, x) for x in all_regs_ids]) + +all_regs_ids_init = [ExprId("%s_init" % x.name, x.size) for x in all_regs_ids] + +regs_init = {} +for i, r in enumerate(all_regs_ids): + regs_init[r] = all_regs_ids_init[i] + +regs_flt_expr = [] diff --git a/miasm/arch/aarch64/sem.py b/miasm/arch/aarch64/sem.py new file mode 100644 index 00000000..ce77aa2c --- /dev/null +++ b/miasm/arch/aarch64/sem.py @@ -0,0 +1,1502 @@ +from builtins import range +from future.utils import viewitems + +from miasm.expression.expression import ExprId, ExprInt, ExprLoc, ExprMem, \ + ExprCond, ExprCompose, ExprOp, ExprAssign +from miasm.ir.ir import IntermediateRepresentation, IRBlock, AssignBlock +from miasm.arch.aarch64.arch import mn_aarch64, conds_expr, replace_regs +from miasm.arch.aarch64.regs import * +from miasm.core.sembuilder import SemBuilder +from miasm.jitter.csts import EXCEPT_DIV_BY_ZERO, EXCEPT_INT_XX + + +# CPSR: N Z C V + + +def update_flag_zf(a): + return [ExprAssign(zf, ExprOp("FLAG_EQ", a))] + + +def update_flag_zf_eq(a, b): + return [ExprAssign(zf, ExprOp("FLAG_EQ_CMP", a, b))] + + +def update_flag_nf(arg): + return [ + ExprAssign( + nf, + ExprOp("FLAG_SIGN_SUB", arg, ExprInt(0, arg.size)) + ) + ] + + +def update_flag_zn(a): + e = [] + e += update_flag_zf(a) + e += update_flag_nf(a) + return e + + +def check_ops_msb(a, b, c): + if not a or not b or not c or a != b or a != c: + raise ValueError('bad ops size %s %s %s' % (a, b, c)) + + +def update_flag_add_cf(op1, op2): + "Compute cf in @op1 + @op2" + return [ExprAssign(cf, ExprOp("FLAG_ADD_CF", op1, op2))] + + +def update_flag_add_of(op1, op2): + "Compute of in @op1 + @op2" + return [ExprAssign(of, ExprOp("FLAG_ADD_OF", op1, op2))] + + +def update_flag_sub_cf(op1, op2): + "Compote CF in @op1 - @op2" + return [ExprAssign(cf, ExprOp("FLAG_SUB_CF", op1, op2) ^ ExprInt(1, 1))] + + +def update_flag_sub_of(op1, op2): + "Compote OF in @op1 - @op2" + return [ExprAssign(of, ExprOp("FLAG_SUB_OF", op1, op2))] + + +def update_flag_arith_add_co(arg1, arg2): + e = [] + e += update_flag_add_cf(arg1, arg2) + e += update_flag_add_of(arg1, arg2) + return e + + +def update_flag_arith_add_zn(arg1, arg2): + """ + Compute zf and nf flags for (arg1 + arg2) + """ + e = [] + e += update_flag_zf_eq(arg1, -arg2) + e += [ExprAssign(nf, ExprOp("FLAG_SIGN_SUB", arg1, -arg2))] + return e + + +def update_flag_arith_sub_co(arg1, arg2): + """ + Compute cf and of flags for (arg1 - arg2) + """ + e = [] + e += update_flag_sub_cf(arg1, arg2) + e += update_flag_sub_of(arg1, arg2) + return e + + +def update_flag_arith_sub_zn(arg1, arg2): + """ + Compute zf and nf flags for (arg1 - arg2) + """ + e = [] + e += update_flag_zf_eq(arg1, arg2) + e += [ExprAssign(nf, ExprOp("FLAG_SIGN_SUB", arg1, arg2))] + return e + + + + +def update_flag_zfaddwc_eq(arg1, arg2, arg3): + return [ExprAssign(zf, ExprOp("FLAG_EQ_ADDWC", arg1, arg2, arg3))] + +def update_flag_zfsubwc_eq(arg1, arg2, arg3): + return [ExprAssign(zf, ExprOp("FLAG_EQ_SUBWC", arg1, arg2, arg3))] + + +def update_flag_arith_addwc_zn(arg1, arg2, arg3): + """ + Compute znp flags for (arg1 + arg2 + cf) + """ + e = [] + e += update_flag_zfaddwc_eq(arg1, arg2, arg3) + e += [ExprAssign(nf, ExprOp("FLAG_SIGN_ADDWC", arg1, arg2, arg3))] + return e + + +def update_flag_arith_subwc_zn(arg1, arg2, arg3): + """ + Compute znp flags for (arg1 - (arg2 + cf)) + """ + e = [] + e += update_flag_zfsubwc_eq(arg1, arg2, arg3) + e += [ExprAssign(nf, ExprOp("FLAG_SIGN_SUBWC", arg1, arg2, arg3))] + return e + + +def update_flag_addwc_cf(op1, op2, op3): + "Compute cf in @res = @op1 + @op2 + @op3" + return [ExprAssign(cf, ExprOp("FLAG_ADDWC_CF", op1, op2, op3))] + + +def update_flag_addwc_of(op1, op2, op3): + "Compute of in @res = @op1 + @op2 + @op3" + return [ExprAssign(of, ExprOp("FLAG_ADDWC_OF", op1, op2, op3))] + + +def update_flag_arith_addwc_co(arg1, arg2, arg3): + e = [] + e += update_flag_addwc_cf(arg1, arg2, arg3) + e += update_flag_addwc_of(arg1, arg2, arg3) + return e + + + +def update_flag_subwc_cf(op1, op2, op3): + "Compute cf in @res = @op1 + @op2 + @op3" + return [ExprAssign(cf, ExprOp("FLAG_SUBWC_CF", op1, op2, op3) ^ ExprInt(1, 1))] + + +def update_flag_subwc_of(op1, op2, op3): + "Compute of in @res = @op1 + @op2 + @op3" + return [ExprAssign(of, ExprOp("FLAG_SUBWC_OF", op1, op2, op3))] + + +def update_flag_arith_subwc_co(arg1, arg2, arg3): + e = [] + e += update_flag_subwc_cf(arg1, arg2, arg3) + e += update_flag_subwc_of(arg1, arg2, arg3) + return e + + +cond2expr = {'EQ': ExprOp("CC_EQ", zf), + 'NE': ExprOp("CC_NE", zf), + 'CS': ExprOp("CC_U>=", cf ^ ExprInt(1, 1)), # inv cf + 'CC': ExprOp("CC_U<", cf ^ ExprInt(1, 1)), # inv cf + 'MI': ExprOp("CC_NEG", nf), + 'PL': ExprOp("CC_POS", nf), + 'VS': ExprOp("CC_sOVR", of), + 'VC': ExprOp("CC_sNOOVR", of), + 'HI': ExprOp("CC_U>", cf ^ ExprInt(1, 1), zf), # inv cf + 'LS': ExprOp("CC_U<=", cf ^ ExprInt(1, 1), zf), # inv cf + 'GE': ExprOp("CC_S>=", nf, of), + 'LT': ExprOp("CC_S<", nf, of), + 'GT': ExprOp("CC_S>", nf, of, zf), + 'LE': ExprOp("CC_S<=", nf, of, zf), + 'AL': ExprInt(1, 1), + 'NV': ExprInt(0, 1) + } + + +def extend_arg(dst, arg): + if not isinstance(arg, ExprOp): + return arg + + op, (reg, shift) = arg.op, arg.args + if op == "SXTB": + base = reg[:8].signExtend(dst.size) + op = "<<" + elif op == "SXTH": + base = reg[:16].signExtend(dst.size) + op = "<<" + elif op == 'SXTW': + base = reg[:32].signExtend(dst.size) + op = "<<" + elif op == "SXTX": + base = reg.signExtend(dst.size) + op = "<<" + + elif op == "UXTB": + base = reg[:8].zeroExtend(dst.size) + op = "<<" + elif op == "UXTH": + base = reg[:16].zeroExtend(dst.size) + op = "<<" + elif op == 'UXTW': + base = reg[:32].zeroExtend(dst.size) + op = "<<" + elif op == "UXTX": + base = reg.zeroExtend(dst.size) + op = "<<" + + elif op in ['<<', '>>', '<>', '<<<', '>>>']: + base = reg.zeroExtend(dst.size) + else: + raise NotImplementedError('Unknown shifter operator') + + out = ExprOp(op, base, (shift.zeroExtend(dst.size) + & ExprInt(dst.size - 1, dst.size))) + return out + + +# SemBuilder context +ctx = {"PC": PC, + "LR": LR, + "nf": nf, + "zf": zf, + "cf": cf, + "of": of, + "cond2expr": cond2expr, + "extend_arg": extend_arg, + "ExprId":ExprId, + "exception_flags": exception_flags, + "interrupt_num": interrupt_num, + "EXCEPT_DIV_BY_ZERO": EXCEPT_DIV_BY_ZERO, + "EXCEPT_INT_XX": EXCEPT_INT_XX, + } + +sbuild = SemBuilder(ctx) + + +# instruction definition ############## + +@sbuild.parse +def add(arg1, arg2, arg3): + arg1 = arg2 + extend_arg(arg2, arg3) + + +@sbuild.parse +def sub(arg1, arg2, arg3): + arg1 = arg2 - extend_arg(arg2, arg3) + + +@sbuild.parse +def neg(arg1, arg2): + arg1 = - arg2 + + +@sbuild.parse +def and_l(arg1, arg2, arg3): + arg1 = arg2 & extend_arg(arg2, arg3) + + +@sbuild.parse +def eor(arg1, arg2, arg3): + arg1 = arg2 ^ extend_arg(arg2, arg3) + + +@sbuild.parse +def eon(arg1, arg2, arg3): + arg1 = arg2 ^ (~extend_arg(arg2, arg3)) + + +@sbuild.parse +def orr(arg1, arg2, arg3): + arg1 = arg2 | extend_arg(arg2, arg3) + + +@sbuild.parse +def orn(arg1, arg2, arg3): + arg1 = arg2 | (~extend_arg(arg2, arg3)) + + +@sbuild.parse +def bic(arg1, arg2, arg3): + arg1 = arg2 & (~extend_arg(arg2, arg3)) + + +def bics(ir, instr, arg1, arg2, arg3): + e = [] + tmp1, tmp2 = arg2, (~extend_arg(arg2, arg3)) + res = tmp1 & tmp2 + + e += [ExprAssign(zf, ExprOp('FLAG_EQ_AND', tmp1, tmp2))] + e += update_flag_nf(res) + + e.append(ExprAssign(arg1, res)) + return e, [] + + +@sbuild.parse +def mvn(arg1, arg2): + arg1 = (~extend_arg(arg1, arg2)) + + +def adds(ir, instr, arg1, arg2, arg3): + e = [] + arg3 = extend_arg(arg2, arg3) + res = arg2 + arg3 + + e += update_flag_arith_add_zn(arg2, arg3) + e += update_flag_arith_add_co(arg2, arg3) + + e.append(ExprAssign(arg1, res)) + + return e, [] + + +def subs(ir, instr, arg1, arg2, arg3): + e = [] + arg3 = extend_arg(arg2, arg3) + res = arg2 - arg3 + + + e += update_flag_arith_sub_zn(arg2, arg3) + e += update_flag_arith_sub_co(arg2, arg3) + + e.append(ExprAssign(arg1, res)) + return e, [] + + +def cmp(ir, instr, arg1, arg2): + e = [] + arg2 = extend_arg(arg1, arg2) + + e += update_flag_arith_sub_zn(arg1, arg2) + e += update_flag_arith_sub_co(arg1, arg2) + + return e, [] + + +def cmn(ir, instr, arg1, arg2): + e = [] + arg2 = extend_arg(arg1, arg2) + + e += update_flag_arith_add_zn(arg1, arg2) + e += update_flag_arith_add_co(arg1, arg2) + + return e, [] + + +def ands(ir, instr, arg1, arg2, arg3): + e = [] + arg3 = extend_arg(arg2, arg3) + res = arg2 & arg3 + + e += [ExprAssign(zf, ExprOp('FLAG_EQ_AND', arg2, arg3))] + e += update_flag_nf(res) + + e.append(ExprAssign(arg1, res)) + return e, [] + +def tst(ir, instr, arg1, arg2): + e = [] + arg2 = extend_arg(arg1, arg2) + res = arg1 & arg2 + + e += [ExprAssign(zf, ExprOp('FLAG_EQ_AND', arg1, arg2))] + e += update_flag_nf(res) + + return e, [] + + +@sbuild.parse +def lsl(arg1, arg2, arg3): + arg1 = arg2 << (arg3 & ExprInt(arg3.size - 1, arg3.size)) + + +@sbuild.parse +def lsr(arg1, arg2, arg3): + arg1 = arg2 >> (arg3 & ExprInt(arg3.size - 1, arg3.size)) + + +@sbuild.parse +def asr(arg1, arg2, arg3): + arg1 = ExprOp( + 'a>>', arg2, (arg3 & ExprInt(arg3.size - 1, arg3.size))) + + +@sbuild.parse +def mov(arg1, arg2): + arg1 = arg2 + + +def movk(ir, instr, arg1, arg2): + e = [] + if isinstance(arg2, ExprOp): + assert(arg2.op == 'slice_at' and + isinstance(arg2.args[0], ExprInt) and + isinstance(arg2.args[1], ExprInt)) + value, shift = int(arg2.args[0].arg), int(arg2.args[1]) + e.append( + ExprAssign(arg1[shift:shift + 16], ExprInt(value, 16))) + else: + e.append(ExprAssign(arg1[:16], ExprInt(int(arg2), 16))) + + return e, [] + + +@sbuild.parse +def movz(arg1, arg2): + arg1 = arg2 + + +@sbuild.parse +def movn(arg1, arg2): + arg1 = ~arg2 + + +@sbuild.parse +def bl(arg1): + PC = arg1 + ir.IRDst = arg1 + LR = ExprInt(instr.offset + instr.l, 64) + +@sbuild.parse +def csel(arg1, arg2, arg3, arg4): + cond_expr = cond2expr[arg4.name] + arg1 = arg2 if cond_expr else arg3 + +def ccmp(ir, instr, arg1, arg2, arg3, arg4): + e = [] + if(arg2.is_int()): + arg2=ExprInt(arg2.arg.arg,arg1.size) + default_nf = arg3[0:1] + default_zf = arg3[1:2] + default_cf = arg3[2:3] + default_of = arg3[3:4] + cond_expr = cond2expr[arg4.name] + res = arg1 - arg2 + new_nf = nf + new_zf = update_flag_zf(res)[0].src + new_cf = update_flag_sub_cf(arg1, arg2)[0].src + new_of = update_flag_sub_of(arg1, arg2)[0].src + + e.append(ExprAssign(nf, ExprCond(cond_expr, + new_nf, + default_nf))) + e.append(ExprAssign(zf, ExprCond(cond_expr, + new_zf, + default_zf))) + e.append(ExprAssign(cf, ExprCond(cond_expr, + new_cf, + default_cf))) + e.append(ExprAssign(of, ExprCond(cond_expr, + new_of, + default_of))) + return e, [] + + +def csinc(ir, instr, arg1, arg2, arg3, arg4): + e = [] + cond_expr = cond2expr[arg4.name] + e.append( + ExprAssign( + arg1, + ExprCond( + cond_expr, + arg2, + arg3 + ExprInt(1, arg3.size) + ) + ) + ) + return e, [] + + +def csinv(ir, instr, arg1, arg2, arg3, arg4): + e = [] + cond_expr = cond2expr[arg4.name] + e.append( + ExprAssign( + arg1, + ExprCond( + cond_expr, + arg2, + ~arg3) + ) + ) + return e, [] + + +def csneg(ir, instr, arg1, arg2, arg3, arg4): + e = [] + cond_expr = cond2expr[arg4.name] + e.append( + ExprAssign( + arg1, + ExprCond( + cond_expr, + arg2, + -arg3) + ) + ) + return e, [] + + +def cset(ir, instr, arg1, arg2): + e = [] + cond_expr = cond2expr[arg2.name] + e.append( + ExprAssign( + arg1, + ExprCond( + cond_expr, + ExprInt(1, arg1.size), + ExprInt(0, arg1.size) + ) + ) + ) + return e, [] + + +def csetm(ir, instr, arg1, arg2): + e = [] + cond_expr = cond2expr[arg2.name] + e.append( + ExprAssign( + arg1, + ExprCond( + cond_expr, + ExprInt(-1, arg1.size), + ExprInt(0, arg1.size) + ) + ) + ) + return e, [] + + +def get_mem_access(mem): + updt = None + if isinstance(mem, ExprOp): + if mem.op == 'preinc': + addr = mem.args[0] + mem.args[1] + elif mem.op == 'segm': + base = mem.args[0] + op, (reg, shift) = mem.args[1].op, mem.args[1].args + if op == 'SXTW': + off = reg.signExtend(base.size) << shift.zeroExtend(base.size) + addr = base + off + elif op == 'UXTW': + off = reg.zeroExtend(base.size) << shift.zeroExtend(base.size) + addr = base + off + elif op == 'LSL': + if isinstance(shift, ExprInt) and int(shift) == 0: + addr = base + reg.zeroExtend(base.size) + else: + addr = base + \ + (reg.zeroExtend(base.size) + << shift.zeroExtend(base.size)) + else: + raise NotImplementedError('bad op') + elif mem.op == "postinc": + addr, off = mem.args + updt = ExprAssign(addr, addr + off) + elif mem.op == "preinc_wb": + base, off = mem.args + addr = base + off + updt = ExprAssign(base, base + off) + else: + raise NotImplementedError('bad op') + else: + raise NotImplementedError('bad op') + return addr, updt + + + +def ldr(ir, instr, arg1, arg2): + e = [] + addr, updt = get_mem_access(arg2) + e.append(ExprAssign(arg1, ExprMem(addr, arg1.size))) + if updt: + e.append(updt) + return e, [] + + +def ldr_size(ir, instr, arg1, arg2, size): + e = [] + addr, updt = get_mem_access(arg2) + e.append( + ExprAssign(arg1, ExprMem(addr, size).zeroExtend(arg1.size))) + if updt: + e.append(updt) + return e, [] + + +def ldrb(ir, instr, arg1, arg2): + return ldr_size(ir, instr, arg1, arg2, 8) + + +def ldrh(ir, instr, arg1, arg2): + return ldr_size(ir, instr, arg1, arg2, 16) + + +def ldrs_size(ir, instr, arg1, arg2, size): + e = [] + addr, updt = get_mem_access(arg2) + e.append( + ExprAssign(arg1, ExprMem(addr, size).signExtend(arg1.size))) + if updt: + e.append(updt) + return e, [] + + +def ldrsb(ir, instr, arg1, arg2): + return ldrs_size(ir, instr, arg1, arg2, 8) + + +def ldrsh(ir, instr, arg1, arg2): + return ldrs_size(ir, instr, arg1, arg2, 16) + + +def ldrsw(ir, instr, arg1, arg2): + return ldrs_size(ir, instr, arg1, arg2, 32) + +def ldaxrb(ir, instr, arg1, arg2): + # TODO XXX no memory lock implemented + assert arg2.is_op('preinc') + assert len(arg2.args) == 1 + ptr = arg2.args[0] + e = [] + e.append(ExprAssign(arg1, ExprMem(ptr, 8).zeroExtend(arg1.size))) + return e, [] + + +def stlxrb(ir, instr, arg1, arg2, arg3): + assert arg3.is_op('preinc') + assert len(arg3.args) == 1 + ptr = arg3.args[0] + e = [] + e.append(ExprAssign(ExprMem(ptr, 8), arg2[:8])) + # TODO XXX here, force update success + e.append(ExprAssign(arg1, ExprInt(0, arg1.size))) + return e, [] + + +def l_str(ir, instr, arg1, arg2): + e = [] + addr, updt = get_mem_access(arg2) + e.append(ExprAssign(ExprMem(addr, arg1.size), arg1)) + if updt: + e.append(updt) + return e, [] + + +def strb(ir, instr, arg1, arg2): + e = [] + addr, updt = get_mem_access(arg2) + e.append(ExprAssign(ExprMem(addr, 8), arg1[:8])) + if updt: + e.append(updt) + return e, [] + + +def strh(ir, instr, arg1, arg2): + e = [] + addr, updt = get_mem_access(arg2) + e.append(ExprAssign(ExprMem(addr, 16), arg1[:16])) + if updt: + e.append(updt) + return e, [] + + +def stp(ir, instr, arg1, arg2, arg3): + e = [] + addr, updt = get_mem_access(arg3) + e.append(ExprAssign(ExprMem(addr, arg1.size), arg1)) + e.append( + ExprAssign(ExprMem(addr + ExprInt(arg1.size // 8, addr.size), arg2.size), arg2)) + if updt: + e.append(updt) + return e, [] + + +def ldp(ir, instr, arg1, arg2, arg3): + e = [] + addr, updt = get_mem_access(arg3) + e.append(ExprAssign(arg1, ExprMem(addr, arg1.size))) + e.append( + ExprAssign(arg2, ExprMem(addr + ExprInt(arg1.size // 8, addr.size), arg2.size))) + if updt: + e.append(updt) + return e, [] + + +def sbfm(ir, instr, arg1, arg2, arg3, arg4): + e = [] + rim, sim = int(arg3.arg), int(arg4) + 1 + if sim > rim: + res = arg2[rim:sim].signExtend(arg1.size) + else: + shift = ExprInt(arg2.size - rim, arg2.size) + res = (arg2[:sim].signExtend(arg1.size) << shift) + e.append(ExprAssign(arg1, res)) + return e, [] + + +def ubfm(ir, instr, arg1, arg2, arg3, arg4): + e = [] + rim, sim = int(arg3.arg), int(arg4) + 1 + if sim != arg1.size - 1 and rim == sim: + # Simple case: lsl + value = int(rim) + assert value < arg1.size + e.append(ExprAssign(arg1, arg2 << (ExprInt(arg1.size - value, arg2.size)))) + return e, [] + if sim == arg1.size: + # Simple case: lsr + value = int(rim) + assert value < arg1.size + e.append(ExprAssign(arg1, arg2 >> (ExprInt(value, arg2.size)))) + return e, [] + + if sim > rim: + res = arg2[rim:sim].zeroExtend(arg1.size) + else: + shift = ExprInt(arg2.size - rim, arg2.size) + res = (arg2[:sim].zeroExtend(arg1.size) << shift) + e.append(ExprAssign(arg1, res)) + return e, [] + +def bfm(ir, instr, arg1, arg2, arg3, arg4): + e = [] + rim, sim = int(arg3.arg), int(arg4) + 1 + if sim > rim: + res = arg2[rim:sim] + e.append(ExprAssign(arg1[:sim-rim], res)) + else: + shift_i = arg2.size - rim + shift = ExprInt(shift_i, arg2.size) + res = arg2[:sim] + e.append(ExprAssign(arg1[shift_i:shift_i+sim], res)) + return e, [] + + + +def mrs(ir, insr, arg1, arg2, arg3, arg4, arg5): + e = [] + if arg2.is_int(3) and arg3.is_id("c4") and arg4.is_id("c2") and arg5.is_int(0): + out = [] + out.append(ExprInt(0x0, 28)) + out.append(of) + out.append(cf) + out.append(zf) + out.append(nf) + e.append(ExprAssign(arg1, ExprCompose(*out).zeroExtend(arg1.size))) + else: + raise NotImplementedError("MRS not implemented") + return e, [] + +def msr(ir, instr, arg1, arg2, arg3, arg4, arg5): + + e = [] + if arg1.is_int(3) and arg2.is_id("c4") and arg3.is_id("c2") and arg4.is_int(0): + e.append(ExprAssign(nf, arg5[31:32])) + e.append(ExprAssign(zf, arg5[30:31])) + e.append(ExprAssign(cf, arg5[29:30])) + e.append(ExprAssign(of, arg5[28:29])) + else: + raise NotImplementedError("MSR not implemented") + return e, [] + + + +def adc(ir, instr, arg1, arg2, arg3): + arg3 = extend_arg(arg2, arg3) + e = [] + r = arg2 + arg3 + cf.zeroExtend(arg3.size) + e.append(ExprAssign(arg1, r)) + return e, [] + + +def adcs(ir, instr, arg1, arg2, arg3): + arg3 = extend_arg(arg2, arg3) + e = [] + r = arg2 + arg3 + cf.zeroExtend(arg3.size) + e.append(ExprAssign(arg1, r)) + e += update_flag_arith_addwc_zn(arg2, arg3, cf) + e += update_flag_arith_addwc_co(arg2, arg3, cf) + return e, [] + + +def sbc(ir, instr, arg1, arg2, arg3): + arg3 = extend_arg(arg2, arg3) + e = [] + r = arg2 - (arg3 + (~cf).zeroExtend(arg3.size)) + e.append(ExprAssign(arg1, r)) + return e, [] + + +def sbcs(ir, instr, arg1, arg2, arg3): + arg3 = extend_arg(arg2, arg3) + e = [] + r = arg2 - (arg3 + (~cf).zeroExtend(arg3.size)) + e.append(ExprAssign(arg1, r)) + e += update_flag_arith_subwc_zn(arg2, arg3, ~cf) + e += update_flag_arith_subwc_co(arg2, arg3, ~cf) + return e, [] + + +@sbuild.parse +def madd(arg1, arg2, arg3, arg4): + arg1 = arg2 * arg3 + arg4 + + +@sbuild.parse +def msub(arg1, arg2, arg3, arg4): + arg1 = arg4 - (arg2 * arg3) + + +@sbuild.parse +def udiv(arg1, arg2, arg3): + if arg3: + arg1 = ExprOp('udiv', arg2, arg3) + else: + exception_flags = ExprInt(EXCEPT_DIV_BY_ZERO, + exception_flags.size) + +@sbuild.parse +def sdiv(arg1, arg2, arg3): + if arg3: + arg1 = ExprOp('sdiv', arg2, arg3) + else: + exception_flags = ExprInt(EXCEPT_DIV_BY_ZERO, + exception_flags.size) + + + +@sbuild.parse +def smaddl(arg1, arg2, arg3, arg4): + arg1 = arg2.signExtend(arg1.size) * arg3.signExtend(arg1.size) + arg4 + + +@sbuild.parse +def cbz(arg1, arg2): + dst = ExprLoc(ir.get_next_loc_key(instr), 64) if arg1 else arg2 + PC = dst + ir.IRDst = dst + + +@sbuild.parse +def cbnz(arg1, arg2): + dst = arg2 if arg1 else ExprLoc(ir.get_next_loc_key(instr), 64) + PC = dst + ir.IRDst = dst + + +@sbuild.parse +def tbz(arg1, arg2, arg3): + bitmask = ExprInt(1, arg1.size) << arg2 + dst = ExprLoc( + ir.get_next_loc_key(instr), + 64 + ) if arg1 & bitmask else arg3 + PC = dst + ir.IRDst = dst + + +@sbuild.parse +def tbnz(arg1, arg2, arg3): + bitmask = ExprInt(1, arg1.size) << arg2 + dst = arg3 if arg1 & bitmask else ExprLoc( + ir.get_next_loc_key(instr), + 64 + ) + PC = dst + ir.IRDst = dst + + +@sbuild.parse +def b_ne(arg1): + cond = cond2expr['NE'] + dst = arg1 if cond else ExprLoc(ir.get_next_loc_key(instr), 64) + PC = dst + ir.IRDst = dst + + +@sbuild.parse +def b_eq(arg1): + cond = cond2expr['EQ'] + dst = arg1 if cond else ExprLoc(ir.get_next_loc_key(instr), 64) + PC = dst + ir.IRDst = dst + + +@sbuild.parse +def b_ge(arg1): + cond = cond2expr['GE'] + dst = arg1 if cond else ExprLoc(ir.get_next_loc_key(instr), 64) + PC = dst + ir.IRDst = dst + + +@sbuild.parse +def b_mi(arg1): + cond = cond2expr['MI'] + dst = arg1 if cond else ExprLoc(ir.get_next_loc_key(instr), 64) + PC = dst + ir.IRDst = dst + + +@sbuild.parse +def b_pl(arg1): + cond = cond2expr['PL'] + dst = arg1 if cond else ExprLoc(ir.get_next_loc_key(instr), 64) + PC = dst + ir.IRDst = dst + + +@sbuild.parse +def b_gt(arg1): + cond = cond2expr['GT'] + dst = arg1 if cond else ExprLoc(ir.get_next_loc_key(instr), 64) + PC = dst + ir.IRDst = dst + + +@sbuild.parse +def b_cc(arg1): + cond = cond2expr['CC'] + dst = arg1 if cond else ExprLoc(ir.get_next_loc_key(instr), 64) + PC = dst + ir.IRDst = dst + + +@sbuild.parse +def b_cs(arg1): + cond = cond2expr['CS'] + dst = arg1 if cond else ExprLoc(ir.get_next_loc_key(instr), 64) + PC = dst + ir.IRDst = dst + + +@sbuild.parse +def b_hi(arg1): + cond = cond2expr['HI'] + dst = arg1 if cond else ExprLoc(ir.get_next_loc_key(instr), 64) + PC = dst + ir.IRDst = dst + + +@sbuild.parse +def b_le(arg1): + cond = cond2expr['LE'] + dst = arg1 if cond else ExprLoc(ir.get_next_loc_key(instr), 64) + PC = dst + ir.IRDst = dst + + +@sbuild.parse +def b_ls(arg1): + cond = cond2expr['LS'] + dst = arg1 if cond else ExprLoc(ir.get_next_loc_key(instr), 64) + PC = dst + ir.IRDst = dst + + +@sbuild.parse +def b_lt(arg1): + cond = cond2expr['LT'] + dst = arg1 if cond else ExprLoc(ir.get_next_loc_key(instr), 64) + PC = dst + ir.IRDst = dst + + +@sbuild.parse +def ret(arg1): + PC = arg1 + ir.IRDst = arg1 + + +@sbuild.parse +def adrp(arg1, arg2): + arg1 = (PC & ExprInt(0xfffffffffffff000, 64)) + arg2 + + +@sbuild.parse +def adr(arg1, arg2): + arg1 = PC + arg2 + + +@sbuild.parse +def b(arg1): + PC = arg1 + ir.IRDst = arg1 + + +@sbuild.parse +def br(arg1): + PC = arg1 + ir.IRDst = arg1 + +@sbuild.parse +def blr(arg1): + PC = arg1 + ir.IRDst = arg1 + LR = ExprLoc(ir.get_next_loc_key(instr), 64) + +@sbuild.parse +def nop(): + """Do nothing""" + + +def rev(ir, instr, arg1, arg2): + out = [] + for i in range(0, arg2.size, 8): + out.append(arg2[i:i+8]) + out.reverse() + e = [] + result = ExprCompose(*out) + e.append(ExprAssign(arg1, result)) + return e, [] + + +def rev16(ir, instr, arg1, arg2): + out = [] + for i in range(0, arg2.size // 8): + index = (i & ~1) + (1 - (i & 1)) + out.append(arg2[index * 8:(index + 1) * 8]) + e = [] + result = ExprCompose(*out) + e.append(ExprAssign(arg1, result)) + return e, [] + + +@sbuild.parse +def extr(arg1, arg2, arg3, arg4): + compose = ExprCompose(arg2, arg3) + arg1 = compose[int(arg4.arg):int(arg4)+arg1.size] + + +@sbuild.parse +def svc(arg1): + exception_flags = ExprInt(EXCEPT_INT_XX, exception_flags.size) + interrupt_num = ExprInt(int(arg1), interrupt_num.size) + + +def fmov(ir, instr, arg1, arg2): + if arg2.is_int(): + # Transform int to signed floating-point constant with 3-bit exponent + # and normalized 4 bits of precision + # VFPExpandImm() of ARM Architecture Reference Manual + imm8 = int(arg2) + N = arg1.size + assert N in [32, 64] + E = 8 if N == 32 else 11 + F = N - E - 1; + # sign = imm8<7>; + sign = (imm8 >> 7) & 1; + # exp = NOT(imm8<6>):Replicate(imm8<6>,E-3):imm8<5:4>; + exp = (((imm8 >> 6) & 1) ^ 1) << (E - 3 + 2) + if (imm8 >> 6) & 1: + tmp = (1 << (E - 3)) - 1 + else: + tmp = 0 + exp |= tmp << 2 + exp |= (imm8 >> 4) & 3 + # frac = imm8<3:0>:Zeros(F-4); + frac = (imm8 & 0xf) << (F - 4) + value = frac + value |= exp << (4 + F - 4) + value |= sign << (4 + F - 4 + 1 + E - 3 + 2) + arg2 = ExprInt(value, N) + e = [ExprAssign(arg1, arg2)] + return e, [] + + +def fadd(ir, instr, arg1, arg2, arg3): + e = [] + e.append(ExprAssign(arg1, ExprOp('fadd', arg2, arg3))) + return e, [] + + +def fsub(ir, instr, arg1, arg2, arg3): + e = [] + e.append(ExprAssign(arg1, ExprOp('fsub', arg2, arg3))) + return e, [] + + +def fmul(ir, instr, arg1, arg2, arg3): + e = [] + e.append(ExprAssign(arg1, ExprOp('fmul', arg2, arg3))) + return e, [] + + +def fdiv(ir, instr, arg1, arg2, arg3): + e = [] + e.append(ExprAssign(arg1, ExprOp('fdiv', arg2, arg3))) + return e, [] + + +def fabs(ir, instr, arg1, arg2): + e = [] + e.append(ExprAssign(arg1, ExprOp('fabs', arg2))) + return e, [] + + +def fmadd(ir, instr, arg1, arg2, arg3, arg4): + e = [] + e.append( + ExprAssign( + arg1, + ExprOp( + 'fadd', + arg4, + ExprOp('fmul', arg2, arg3) + ) + ) + ) + return e, [] + + +def fmsub(ir, instr, arg1, arg2, arg3, arg4): + e = [] + e.append( + ExprAssign( + arg1, + ExprOp( + 'fsub', + arg4, + ExprOp('fmul', arg2, arg3) + ) + ) + ) + return e, [] + + +def fcvt(ir, instr, arg1, arg2): + # XXX TODO: rounding + e = [] + src = ExprOp('fpconvert_fp%d' % arg1.size, arg2) + e.append(ExprAssign(arg1, src)) + return e, [] + + +def scvtf(ir, instr, arg1, arg2): + # XXX TODO: rounding + e = [] + src = ExprOp('sint_to_fp', arg2) + if arg1.size != src.size: + src = ExprOp('fpconvert_fp%d' % arg1.size, src) + e.append(ExprAssign(arg1, src)) + return e, [] + + +def ucvtf(ir, instr, arg1, arg2): + # XXX TODO: rounding + e = [] + src = ExprOp('uint_to_fp', arg2) + if arg1.size != src.size: + src = ExprOp('fpconvert_fp%d' % arg1.size, src) + e.append(ExprAssign(arg1, src)) + return e, [] + + +def fcvtzs(ir, instr, arg1, arg2): + # XXX TODO: rounding + e = [] + e.append( + ExprAssign( + arg1, + ExprOp('fp_to_sint%d' % arg1.size, + ExprOp('fpround_towardszero', arg2) + ) + ) + ) + return e, [] + + +def fcvtzu(ir, instr, arg1, arg2): + # XXX TODO: rounding + e = [] + e.append( + ExprAssign( + arg1, + ExprOp('fp_to_uint%d' % arg1.size, + ExprOp('fpround_towardszero', arg2) + ) + ) + ) + return e, [] + + +def fcmpe(ir, instr, arg1, arg2): + e = [] + e.append( + ExprAssign( + nf, + ExprOp('fcom_c0', arg1, arg2) + ) + ) + e.append( + ExprAssign( + cf, + ~ExprOp('fcom_c0', arg1, arg2) + ) + ) + e.append( + ExprAssign( + zf, + ExprOp('fcom_c3', arg1, arg2) + ) + ) + e.append(ExprAssign(of, ExprInt(0, 1))) + return e, [] + + +def clz(ir, instr, arg1, arg2): + e = [] + e.append(ExprAssign(arg1, ExprOp('cntleadzeros', arg2))) + return e, [] + +def casp(ir, instr, arg1, arg2, arg3): + # XXX TODO: memory barrier + e = [] + if arg1.size == 32: + regs = gpregs32_expr + else: + regs = gpregs64_expr + index1 = regs.index(arg1) + index2 = regs.index(arg2) + + # TODO endianness + comp_value = ExprCompose(regs[index1], regs[index1 + 1]) + new_value = ExprCompose(regs[index2], regs[index2 + 1]) + assert arg3.is_op('preinc') + ptr = arg3.args[0] + data = ExprMem(ptr, comp_value.size) + + loc_store = ExprLoc(ir.loc_db.add_location(), ir.IRDst.size) + loc_do = ExprLoc(ir.loc_db.add_location(), ir.IRDst.size) + loc_next = ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) + + e.append(ExprAssign(ir.IRDst, ExprCond(ExprOp("FLAG_EQ_CMP", data, comp_value), loc_do, loc_store))) + + e_store = [] + e_store.append(ExprAssign(data, new_value)) + e_store.append(ExprAssign(ir.IRDst, loc_do)) + blk_store = IRBlock(loc_store.loc_key, [AssignBlock(e_store, instr)]) + + e_do = [] + e_do.append(ExprAssign(regs[index1], data[:data.size // 2])) + e_do.append(ExprAssign(regs[index1 + 1], data[data.size // 2:])) + e_do.append(ExprAssign(ir.IRDst, loc_next)) + blk_do = IRBlock(loc_do.loc_key, [AssignBlock(e_do, instr)]) + + return e, [blk_store, blk_do] + + +@sbuild.parse +def umaddl(arg1, arg2, arg3, arg4): + arg1 = arg2.zeroExtend(arg1.size) * arg3.zeroExtend(arg1.size) + arg4 + + +@sbuild.parse +def umsubbl(arg1, arg2, arg3, arg4): + arg1 = arg2.zeroExtend(arg1.size) * arg3.zeroExtend(arg1.size) + arg4 + + +@sbuild.parse +def umull(arg1, arg2, arg3): + arg1 = (arg2.zeroExtend(64) * arg3.zeroExtend(64)) + + +@sbuild.parse +def umulh(arg1, arg2, arg3): + arg1 = (arg2.zeroExtend(128) * arg3.zeroExtend(128))[64:] + + +@sbuild.parse +def smulh(arg1, arg2, arg3): + arg1 = (arg2.signExtend(128) * arg3.signExtend(128))[64:] + + +@sbuild.parse +def smull(arg1, arg2, arg3): + arg1 = (arg2.signExtend(64) * arg3.signExtend(64))[64:] + + + +mnemo_func = sbuild.functions +mnemo_func.update({ + 'and': and_l, + 'adds': adds, + 'ands': ands, + 'tst': tst, + 'subs': subs, + 'cmp': cmp, + 'cmn': cmn, + 'movk': movk, + 'ccmp': ccmp, + 'csinc': csinc, + 'csinv': csinv, + 'csneg': csneg, + 'cset': cset, + 'csetm': csetm, + + 'b.ne': b_ne, + 'b.eq': b_eq, + 'b.ge': b_ge, + 'b.mi': b_mi, + 'b.pl': b_pl, + 'b.gt': b_gt, + 'b.cc': b_cc, + 'b.cs': b_cs, + 'b.hi': b_hi, + 'b.le': b_le, + 'b.ls': b_ls, + 'b.lt': b_lt, + + 'bics': bics, + + 'ret': ret, + 'stp': stp, + 'ldp': ldp, + + 'ldr': ldr, + 'ldrb': ldrb, + 'ldrh': ldrh, + + 'ldur': ldr, + 'ldurb': ldrb, + 'ldursb': ldrsb, + 'ldurh': ldrh, + 'ldursh': ldrsh, + 'ldursw': ldrsw, + + 'ldrsb': ldrsb, + 'ldrsh': ldrsh, + 'ldrsw': ldrsw, + + 'ldaxrb': ldaxrb, + 'stlxrb': stlxrb, + + 'str': l_str, + 'strb': strb, + 'strh': strh, + + 'stur': l_str, + 'sturb': strb, + 'sturh': strh, + + + 'bfm': bfm, + 'sbfm': sbfm, + 'ubfm': ubfm, + + 'extr': extr, + 'rev': rev, + 'rev16': rev16, + + 'msr': msr, + 'mrs': mrs, + + 'adc': adc, + 'adcs': adcs, + 'sbc': sbc, + 'sbcs': sbcs, + + 'fmov': fmov, + 'fadd': fadd, + 'fsub': fsub, + 'fmul': fmul, + 'fdiv': fdiv, + 'fabs': fabs, + 'fmadd': fmadd, + 'fmsub': fmsub, + 'fcvt': fcvt, + 'scvtf': scvtf, + 'ucvtf': ucvtf, + 'fcvtzs': fcvtzs, + 'fcvtzu': fcvtzu, + 'fcmpe': fcmpe, + 'clz': clz, + + # XXX TODO: memory barrier + 'casp':casp, + 'caspl':casp, + 'caspa':casp, + 'caspal':casp, + + +}) + + +def get_mnemo_expr(ir, instr, *args): + if not instr.name.lower() in mnemo_func: + raise NotImplementedError('unknown mnemo %s' % instr) + instr, extra_ir = mnemo_func[instr.name.lower()](ir, instr, *args) + return instr, extra_ir + + +class aarch64info(object): + mode = "aarch64" + # offset + + +class ir_aarch64l(IntermediateRepresentation): + + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_aarch64, "l", loc_db) + self.pc = PC + self.sp = SP + self.IRDst = ExprId('IRDst', 64) + self.addrsize = 64 + + def get_ir(self, instr): + args = instr.args + if len(args) and isinstance(args[-1], ExprOp): + if (args[-1].op in ['<<', '>>', '<>', '<<<', '>>>'] and + isinstance(args[-1].args[-1], ExprId)): + args[-1] = ExprOp(args[-1].op, + args[-1].args[0], + args[-1].args[-1][:8].zeroExtend(32)) + instr_ir, extra_ir = get_mnemo_expr(self, instr, *args) + self.mod_pc(instr, instr_ir, extra_ir) + instr_ir, extra_ir = self.del_dst_zr(instr, instr_ir, extra_ir) + return instr_ir, extra_ir + + def expr_fix_regs_for_mode(self, e): + return e.replace_expr(replace_regs) + + def expraff_fix_regs_for_mode(self, e): + dst = self.expr_fix_regs_for_mode(e.dst) + src = self.expr_fix_regs_for_mode(e.src) + return ExprAssign(dst, src) + + def irbloc_fix_regs_for_mode(self, irblock, mode=64): + irs = [] + for assignblk in irblock: + new_assignblk = dict(assignblk) + for dst, src in viewitems(assignblk): + del(new_assignblk[dst]) + # Special case for 64 bits: + # If destination is a 32 bit reg, zero extend the 64 bit reg + if (isinstance(dst, ExprId) and + dst.size == 32 and + dst in replace_regs): + src = src.zeroExtend(64) + dst = replace_regs[dst].arg + + dst = self.expr_fix_regs_for_mode(dst) + src = self.expr_fix_regs_for_mode(src) + new_assignblk[dst] = src + irs.append(AssignBlock(new_assignblk, assignblk.instr)) + return IRBlock(irblock.loc_key, irs) + + def mod_pc(self, instr, instr_ir, extra_ir): + "Replace PC by the instruction's offset" + cur_offset = ExprInt(instr.offset, 64) + pc_fixed = {self.pc: cur_offset} + for i, expr in enumerate(instr_ir): + dst, src = expr.dst, expr.src + if dst != self.pc: + dst = dst.replace_expr(pc_fixed) + src = src.replace_expr(pc_fixed) + instr_ir[i] = ExprAssign(dst, src) + + for idx, irblock in enumerate(extra_ir): + extra_ir[idx] = irblock.modify_exprs(lambda expr: expr.replace_expr(pc_fixed) \ + if expr != self.pc else expr, + lambda expr: expr.replace_expr(pc_fixed)) + + + def del_dst_zr(self, instr, instr_ir, extra_ir): + "Writes to zero register are discarded" + regs_to_fix = [WZR, XZR] + instr_ir = [expr for expr in instr_ir if expr.dst not in regs_to_fix] + + new_irblocks = [] + for irblock in extra_ir: + irs = [] + for assignblk in irblock: + new_dsts = { + dst:src for dst, src in viewitems(assignblk) + if dst not in regs_to_fix + } + irs.append(AssignBlock(new_dsts, assignblk.instr)) + new_irblocks.append(IRBlock(irblock.loc_key, irs)) + + return instr_ir, new_irblocks + + +class ir_aarch64b(ir_aarch64l): + + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_aarch64, "b", loc_db) + self.pc = PC + self.sp = SP + self.IRDst = ExprId('IRDst', 64) diff --git a/miasm/arch/arm/__init__.py b/miasm/arch/arm/__init__.py new file mode 100644 index 00000000..bbad893b --- /dev/null +++ b/miasm/arch/arm/__init__.py @@ -0,0 +1 @@ +__all__ = ["arch", "disasm", "regs", "sem"] diff --git a/miasm/arch/arm/arch.py b/miasm/arch/arm/arch.py new file mode 100644 index 00000000..7668cf0a --- /dev/null +++ b/miasm/arch/arm/arch.py @@ -0,0 +1,3299 @@ +#-*- coding:utf-8 -*- + +from builtins import range +from future.utils import viewitems + +import logging +from pyparsing import * +from miasm.expression.expression import * +from miasm.core.cpu import * +from collections import defaultdict +from miasm.core.bin_stream import bin_stream +import miasm.arch.arm.regs as regs_module +from miasm.arch.arm.regs import * +from miasm.core.asm_ast import AstInt, AstId, AstMem, AstOp + +# A1 encoding + +log = logging.getLogger("armdis") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.DEBUG) + +# arm regs ############## +reg_dum = ExprId('DumReg', 32) + +PC, _ = gen_reg('PC') + +# GP +regs_str = ['R%d' % r for r in range(0x10)] +regs_str[13] = 'SP' +regs_str[14] = 'LR' +regs_str[15] = 'PC' +regs_expr = [ExprId(x, 32) for x in regs_str] + +gpregs = reg_info(regs_str, regs_expr) + +gpregs_pc = reg_info(regs_str[-1:], regs_expr[-1:]) +gpregs_sp = reg_info(regs_str[13:14], regs_expr[13:14]) + +gpregs_nosppc = reg_info(regs_str[:13] + [str(reg_dum), regs_str[14]], + regs_expr[:13] + [reg_dum, regs_expr[14]]) + +gpregs_nopc = reg_info(regs_str[:14], + regs_expr[:14]) + +gpregs_nosp = reg_info(regs_str[:13] + [str(reg_dum), regs_str[14], regs_str[15]], + regs_expr[:13] + [reg_dum, regs_expr[14], regs_expr[15]]) + + +# psr +sr_flags = "cxsf" +cpsr_regs_str = [] +spsr_regs_str = [] +for i in range(0x10): + o = "" + for j in range(4): + if i & (1 << j): + o += sr_flags[j] + cpsr_regs_str.append("CPSR_%s" % o) + spsr_regs_str.append("SPSR_%s" % o) + +# psr_regs_str = ['CPSR', 'SPSR'] +# psr_regs_expr = [ExprId(x, 32) for x in psr_regs_str] + +# psr_regs = reg_info(psr_regs_str, psr_regs_expr) + +cpsr_regs_expr = [ExprId(x, 32) for x in cpsr_regs_str] +spsr_regs_expr = [ExprId(x, 32) for x in spsr_regs_str] + +cpsr_regs = reg_info(cpsr_regs_str, cpsr_regs_expr) +spsr_regs = reg_info(spsr_regs_str, spsr_regs_expr) + +# CP +cpregs_str = ['c%d' % r for r in range(0x10)] +cpregs_expr = [ExprId(x, 32) for x in cpregs_str] + +cp_regs = reg_info(cpregs_str, cpregs_expr) + +# P +pregs_str = ['p%d' % r for r in range(0x10)] +pregs_expr = [ExprId(x, 32) for x in pregs_str] + +p_regs = reg_info(pregs_str, pregs_expr) + +conditional_branch = ["BEQ", "BNE", "BCS", "BCC", "BMI", "BPL", "BVS", + "BVC", "BHI", "BLS", "BGE", "BLT", "BGT", "BLE"] + +unconditional_branch = ["B", "BX", "BL", "BLX"] + +barrier_expr = { + 0b1111: ExprId("SY", 32), + 0b1110: ExprId("ST", 32), + 0b1101: ExprId("LD", 32), + 0b1011: ExprId("ISH", 32), + 0b1010: ExprId("ISHST", 32), + 0b1001: ExprId("ISHLD", 32), + 0b0111: ExprId("NSH", 32), + 0b0110: ExprId("NSHST", 32), + 0b0011: ExprId("OSH", 32), + 0b0010: ExprId("OSHST", 32), + 0b0001: ExprId("OSHLD", 32), +} + +barrier_info = reg_info_dct(barrier_expr) + + + +# parser helper ########### + +def cb_tok_reg_duo(tokens): + tokens = tokens[0] + i1 = gpregs.expr.index(tokens[0].name) + i2 = gpregs.expr.index(tokens[1].name) + o = [] + for i in range(i1, i2 + 1): + o.append(AstId(gpregs.expr[i])) + return o + +LPARENTHESIS = Literal("(") +RPARENTHESIS = Literal(")") + +LACC = Suppress(Literal("{")) +RACC = Suppress(Literal("}")) +MINUS = Suppress(Literal("-")) +CIRCUNFLEX = Literal("^") + + +def check_bounds(left_bound, right_bound, value): + if left_bound <= value and value <= right_bound: + return AstInt(value) + else: + raise ValueError('shift operator immediate value out of bound') + + +def check_values(values, value): + if value in values: + return AstInt(value) + else: + raise ValueError('shift operator immediate value out of bound') + +int_1_31 = str_int.copy().setParseAction(lambda v: check_bounds(1, 31, v[0])) +int_1_32 = str_int.copy().setParseAction(lambda v: check_bounds(1, 32, v[0])) + +int_8_16_24 = str_int.copy().setParseAction(lambda v: check_values([8, 16, 24], v[0])) + + +def cb_reglistparse(tokens): + tokens = tokens[0] + if tokens[-1] == "^": + return AstOp('sbit', AstOp('reglist', *tokens[:-1])) + return AstOp('reglist', *tokens) + + +allshifts = ['<<', '>>', 'a>>', '>>>', 'rrx'] +allshifts_armt = ['<<', '>>', 'a>>', '>>>', 'rrx'] + +shift2expr_dct = {'LSL': '<<', 'LSR': '>>', 'ASR': 'a>>', + 'ROR': ">>>", 'RRX': "rrx"} + +expr2shift_dct = dict((value, key) for key, value in viewitems(shift2expr_dct)) + + +def op_shift2expr(tokens): + return shift2expr_dct[tokens[0]] + +reg_duo = Group(gpregs.parser + MINUS + + gpregs.parser).setParseAction(cb_tok_reg_duo) +reg_or_duo = reg_duo | gpregs.parser +gpreg_list = Group(LACC + delimitedList( + reg_or_duo, delim=',') + RACC + Optional(CIRCUNFLEX)) +gpreg_list.setParseAction(cb_reglistparse) + +LBRACK = Suppress("[") +RBRACK = Suppress("]") +COMMA = Suppress(",") +all_binaryop_1_31_shifts_t = literal_list( + ['LSL', 'ROR']).setParseAction(op_shift2expr) +all_binaryop_1_32_shifts_t = literal_list( + ['LSR', 'ASR']).setParseAction(op_shift2expr) +all_unaryop_shifts_t = literal_list(['RRX']).setParseAction(op_shift2expr) + +ror_shifts_t = literal_list(['ROR']).setParseAction(op_shift2expr) + + +allshifts_t_armt = literal_list( + ['LSL', 'LSR', 'ASR', 'ROR', 'RRX']).setParseAction(op_shift2expr) + +gpreg_p = gpregs.parser + +psr_p = cpsr_regs.parser | spsr_regs.parser + + +def cb_shift(tokens): + if len(tokens) == 1: + ret = tokens[0] + elif len(tokens) == 2: + ret = AstOp(tokens[1], tokens[0]) + elif len(tokens) == 3: + ret = AstOp(tokens[1], tokens[0], tokens[2]) + else: + raise ValueError("Bad arg") + return ret + +shift_off = (gpregs.parser + Optional( + (all_unaryop_shifts_t) | + (all_binaryop_1_31_shifts_t + (gpregs.parser | int_1_31)) | + (all_binaryop_1_32_shifts_t + (gpregs.parser | int_1_32)) +)).setParseAction(cb_shift) +shift_off |= base_expr + + +rot2_expr = (gpregs.parser + Optional( + (ror_shifts_t + (int_8_16_24)) +)).setParseAction(cb_shift) + + +OP_LSL = Suppress("LSL") + +def cb_deref_reg_reg(tokens): + if len(tokens) != 2: + raise ValueError("Bad mem format") + return AstMem(AstOp('+', tokens[0], tokens[1]), 8) + +def cb_deref_reg_reg_lsl_1(tokens): + if len(tokens) != 3: + raise ValueError("Bad mem format") + reg1, reg2, index = tokens + if not isinstance(index, AstInt) or index.value != 1: + raise ValueError("Bad index") + ret = AstMem(AstOp('+', reg1, AstOp('<<', reg2, index)), 16) + return ret + + +deref_reg_reg = (LBRACK + gpregs.parser + COMMA + gpregs.parser + RBRACK).setParseAction(cb_deref_reg_reg) +deref_reg_reg_lsl_1 = (LBRACK + gpregs.parser + COMMA + gpregs.parser + OP_LSL + base_expr + RBRACK).setParseAction(cb_deref_reg_reg_lsl_1) + + + +(gpregs.parser + Optional( + (ror_shifts_t + (int_8_16_24)) +)).setParseAction(cb_shift) + + + +reg_or_base = gpregs.parser | base_expr + +def deref2expr_nooff(tokens): + tokens = tokens[0] + # XXX default + return ExprOp("preinc", tokens[0], ExprInt(0, 32)) + + +def cb_deref_preinc(tokens): + tokens = tokens[0] + if len(tokens) == 1: + return AstOp("preinc", tokens[0], AstInt(0)) + elif len(tokens) == 2: + return AstOp("preinc", tokens[0], tokens[1]) + else: + raise NotImplementedError('len(tokens) > 2') + + +def cb_deref_pre_mem(tokens): + tokens = tokens[0] + if len(tokens) == 1: + return AstMem(AstOp("preinc", tokens[0], AstInt(0)), 32) + elif len(tokens) == 2: + return AstMem(AstOp("preinc", tokens[0], tokens[1]), 32) + else: + raise NotImplementedError('len(tokens) > 2') + + +def cb_deref_post(tokens): + tokens = tokens[0] + return AstOp("postinc", tokens[0], tokens[1]) + + +def cb_deref_wb(tokens): + tokens = tokens[0] + if tokens[-1] == '!': + return AstMem(AstOp('wback', *tokens[:-1]), 32) + return AstMem(tokens[0], 32) + +# shift_off.setParseAction(deref_off) +deref_nooff = Group( + LBRACK + gpregs.parser + RBRACK).setParseAction(deref2expr_nooff) +deref_pre = Group(LBRACK + gpregs.parser + Optional( + COMMA + shift_off) + RBRACK).setParseAction(cb_deref_preinc) +deref_post = Group(LBRACK + gpregs.parser + RBRACK + + COMMA + shift_off).setParseAction(cb_deref_post) +deref = Group((deref_post | deref_pre | deref_nooff) + + Optional('!')).setParseAction(cb_deref_wb) + + +def cb_gpreb_wb(tokens): + assert len(tokens) == 1 + tokens = tokens[0] + if tokens[-1] == '!': + return AstOp('wback', *tokens[:-1]) + return tokens[0] + +gpregs_wb = Group(gpregs.parser + Optional('!')).setParseAction(cb_gpreb_wb) + + +cond_list_full = ['EQ', 'NE', 'CS', 'CC', 'MI', 'PL', 'VS', 'VC', + 'HI', 'LS', 'GE', 'LT', 'GT', 'LE', 'NV'] + + +cond_list = ['EQ', 'NE', 'CS', 'CC', 'MI', 'PL', 'VS', 'VC', + 'HI', 'LS', 'GE', 'LT', 'GT', 'LE', ''] # , 'NV'] +cond_dct = dict([(x[1], x[0]) for x in enumerate(cond_list)]) +bm_cond = bs_mod_name(l=4, fname='cond', mn_mod=cond_list) + + + +cond_dct_barmt = dict([(x[0], x[1]) for x in enumerate(cond_list) if x[0] & 0b1110 != 0b1110]) +bm_cond_barmt = bs_mod_name(l=4, fname='cond', mn_mod=cond_dct_barmt) + + + +def permut_args(order, args): + l = [] + for i, x in enumerate(order): + l.append((x.__class__, i)) + l = dict(l) + out = [None for x in range(len(args))] + for a in args: + out[l[a.__class__]] = a + return out + + +class additional_info(object): + + def __init__(self): + self.except_on_instr = False + self.lnk = None + self.cond = None + + +class instruction_arm(instruction): + __slots__ = [] + delayslot = 0 + + def __init__(self, *args, **kargs): + super(instruction_arm, self).__init__(*args, **kargs) + + @staticmethod + def arg2str(expr, index=None, loc_db=None): + wb = False + if expr.is_id() or expr.is_int(): + return str(expr) + elif expr.is_loc(): + if loc_db is not None: + return loc_db.pretty_str(expr.loc_key) + else: + return str(expr) + if isinstance(expr, ExprOp) and expr.op in expr2shift_dct: + if len(expr.args) == 1: + return '%s %s' % (expr.args[0], expr2shift_dct[expr.op]) + elif len(expr.args) == 2: + return '%s %s %s' % (expr.args[0], expr2shift_dct[expr.op], expr.args[1]) + else: + raise NotImplementedError('zarb arg2str') + + + sb = False + if isinstance(expr, ExprOp) and expr.op == "sbit": + sb = True + expr = expr.args[0] + if isinstance(expr, ExprOp) and expr.op == "reglist": + o = [gpregs.expr.index(x) for x in expr.args] + out = reglist2str(o) + if sb: + out += "^" + return out + + + if isinstance(expr, ExprOp) and expr.op == 'wback': + wb = True + expr = expr.args[0] + if isinstance(expr, ExprId): + out = str(expr) + if wb: + out += "!" + return out + + if not isinstance(expr, ExprMem): + return str(expr) + + expr = expr.ptr + if isinstance(expr, ExprOp) and expr.op == 'wback': + wb = True + expr = expr.args[0] + + + if isinstance(expr, ExprId): + r, s = expr, None + elif len(expr.args) == 1 and isinstance(expr.args[0], ExprId): + r, s = expr.args[0], None + elif isinstance(expr.args[0], ExprId): + r, s = expr.args[0], expr.args[1] + else: + r, s = expr.args[0].args + if isinstance(s, ExprOp) and s.op in expr2shift_dct: + s = ' '.join( + str(x) + for x in (s.args[0], expr2shift_dct[s.op], s.args[1]) + ) + + if isinstance(expr, ExprOp) and expr.op == 'postinc': + o = '[%s]' % r + if s and not (isinstance(s, ExprInt) and s.arg == 0): + o += ', %s' % s + else: + if s and not (isinstance(s, ExprInt) and s.arg == 0): + o = '[%s, %s]' % (r, s) + else: + o = '[%s]' % (r) + + + if wb: + o += "!" + return o + + + def dstflow(self): + if self.is_subcall(): + return True + return self.name in conditional_branch + unconditional_branch + + def dstflow2label(self, loc_db): + expr = self.args[0] + if not isinstance(expr, ExprInt): + return + if self.name == 'BLX': + addr = expr.arg + self.offset + else: + addr = expr.arg + self.offset + loc_key = loc_db.get_or_create_offset_location(addr) + self.args[0] = ExprLoc(loc_key, expr.size) + + def breakflow(self): + if self.is_subcall(): + return True + if self.name in conditional_branch + unconditional_branch: + return True + if self.name.startswith("LDM") and PC in self.args[1].args: + return True + if self.args and PC in self.args[0].get_r(): + return True + return False + + def is_subcall(self): + if self.name == 'BLX': + return True + return self.additional_info.lnk + + def getdstflow(self, loc_db): + return [self.args[0]] + + def splitflow(self): + if self.additional_info.lnk: + return True + if self.name == 'BLX': + return True + if self.name == 'BX': + return False + return self.breakflow() and self.additional_info.cond != 14 + + def get_symbol_size(self, symbol, loc_db): + return 32 + + def fixDstOffset(self): + e = self.args[0] + if self.offset is None: + raise ValueError('symbol not resolved %s' % l) + if not isinstance(e, ExprInt): + log.debug('dyn dst %r', e) + return + off = e.arg - self.offset + if int(off % 4): + raise ValueError('strange offset! %r' % off) + self.args[0] = ExprInt(off, 32) + + def get_args_expr(self): + args = [a for a in self.args] + return args + + def get_asm_offset(self, expr): + # LDR XXX, [PC, offset] => PC is self.offset+8 + return ExprInt(self.offset+8, expr.size) + +class instruction_armt(instruction_arm): + __slots__ = [] + delayslot = 0 + + def __init__(self, *args, **kargs): + super(instruction_armt, self).__init__(*args, **kargs) + + def dstflow(self): + if self.name in ["CBZ", "CBNZ"]: + return True + return self.name in conditional_branch + unconditional_branch + + def dstflow2label(self, loc_db): + if self.name in ["CBZ", "CBNZ"]: + expr = self.args[1] + else: + expr = self.args[0] + if not isinstance(expr, ExprInt): + return + if self.name == 'BLX': + addr = expr.arg + (self.offset & 0xfffffffc) + elif self.name == 'BL': + addr = expr.arg + self.offset + elif self.name.startswith('BP'): + addr = expr.arg + self.offset + elif self.name.startswith('CB'): + addr = expr.arg + self.offset + self.l + 2 + else: + addr = expr.arg + self.offset + + loc_key = loc_db.get_or_create_offset_location(addr) + dst = ExprLoc(loc_key, expr.size) + + if self.name in ["CBZ", "CBNZ"]: + self.args[1] = dst + else: + self.args[0] = dst + + def breakflow(self): + if self.name in conditional_branch + unconditional_branch +["CBZ", "CBNZ", 'TBB', 'TBH']: + return True + if self.name.startswith("LDM") and PC in self.args[1].args: + return True + if self.args and PC in self.args[0].get_r(): + return True + return False + + def getdstflow(self, loc_db): + if self.name in ['CBZ', 'CBNZ']: + return [self.args[1]] + return [self.args[0]] + + def splitflow(self): + if self.name in conditional_branch + ['BL', 'BLX', 'CBZ', 'CBNZ']: + return True + return False + + def is_subcall(self): + return self.name in ['BL', 'BLX'] + + def fixDstOffset(self): + e = self.args[0] + if self.offset is None: + raise ValueError('symbol not resolved %s' % l) + if not isinstance(e, ExprInt): + log.debug('dyn dst %r', e) + return + # The first +2 is to compensate instruction len, but strangely, 32 bits + # thumb2 instructions len is 2... For the second +2, didn't find it in + # the doc. + off = e.arg - self.offset + if int(off % 2): + raise ValueError('strange offset! %r' % off) + self.args[0] = ExprInt(off, 32) + + def get_asm_offset(self, expr): + # ADR XXX, PC, imm => PC is 4 aligned + imm + new_offset = ((self.offset + self.l) // 4) * 4 + return ExprInt(new_offset, expr.size) + + +class mn_arm(cls_mn): + delayslot = 0 + name = "arm" + regs = regs_module + bintree = {} + num = 0 + all_mn = [] + all_mn_mode = defaultdict(list) + all_mn_name = defaultdict(list) + all_mn_inst = defaultdict(list) + pc = {'l':PC, 'b':PC} + sp = {'l':SP, 'b':SP} + instruction = instruction_arm + max_instruction_len = 4 + alignment = 4 + + @classmethod + def getpc(cls, attrib = None): + return PC + + @classmethod + def getsp(cls, attrib = None): + return SP + + def additional_info(self): + info = additional_info() + info.lnk = False + if hasattr(self, "lnk"): + info.lnk = self.lnk.value != 0 + if hasattr(self, "cond"): + info.cond = self.cond.value + else: + info.cond = None + return info + + @classmethod + def getbits(cls, bs, attrib, start, n): + if not n: + return 0 + o = 0 + if n > bs.getlen() * 8: + raise ValueError('not enough bits %r %r' % (n, len(bs.bin) * 8)) + while n: + offset = start // 8 + n_offset = cls.endian_offset(attrib, offset) + c = cls.getbytes(bs, n_offset, 1) + if not c: + raise IOError + c = ord(c) + r = 8 - start % 8 + c &= (1 << r) - 1 + l = min(r, n) + c >>= (r - l) + o <<= l + o |= c + n -= l + start += l + return o + + @classmethod + def endian_offset(cls, attrib, offset): + if attrib == "l": + return (offset & ~3) + 3 - offset % 4 + elif attrib == "b": + return offset + else: + raise NotImplementedError('bad attrib') + + @classmethod + def check_mnemo(cls, fields): + l = sum([x.l for x in fields]) + assert l == 32, "len %r" % l + + @classmethod + def getmn(cls, name): + return name.upper() + + @classmethod + def mod_fields(cls, fields): + l = sum([x.l for x in fields]) + if l == 32: + return fields + return [bm_cond] + fields + + @classmethod + def gen_modes(cls, subcls, name, bases, dct, fields): + dct['mode'] = None + return [(subcls, name, bases, dct, fields)] + + def value(self, mode): + v = super(mn_arm, self).value(mode) + if mode == 'l': + return [x[::-1] for x in v] + elif mode == 'b': + return [x for x in v] + else: + raise NotImplementedError('bad attrib') + + + def get_symbol_size(self, symbol, loc_db, mode): + return 32 + + +class mn_armt(cls_mn): + name = "armt" + regs = regs_module + delayslot = 0 + bintree = {} + num = 0 + all_mn = [] + all_mn_mode = defaultdict(list) + all_mn_name = defaultdict(list) + all_mn_inst = defaultdict(list) + pc = PC + sp = SP + instruction = instruction_armt + max_instruction_len = 4 + alignment = 4 + + @classmethod + def getpc(cls, attrib = None): + return PC + + @classmethod + def getsp(cls, attrib = None): + return SP + + def additional_info(self): + info = additional_info() + info.lnk = False + if hasattr(self, "lnk"): + info.lnk = self.lnk.value != 0 + info.cond = 14 # COND_ALWAYS + return info + + + @classmethod + def getbits(cls, bs, attrib, start, n): + if not n: + return 0 + o = 0 + if n > bs.getlen() * 8: + raise ValueError('not enough bits %r %r' % (n, len(bs.bin) * 8)) + while n: + offset = start // 8 + n_offset = cls.endian_offset(attrib, offset) + c = cls.getbytes(bs, n_offset, 1) + if not c: + raise IOError + c = ord(c) + r = 8 - start % 8 + c &= (1 << r) - 1 + l = min(r, n) + c >>= (r - l) + o <<= l + o |= c + n -= l + start += l + return o + + @classmethod + def endian_offset(cls, attrib, offset): + if attrib == "l": + return (offset & ~1) + 1 - offset % 2 + elif attrib == "b": + return offset + else: + raise NotImplementedError('bad attrib') + + @classmethod + def check_mnemo(cls, fields): + l = sum([x.l for x in fields]) + assert l in [16, 32], "len %r" % l + + @classmethod + def getmn(cls, name): + return name.upper() + + @classmethod + def mod_fields(cls, fields): + return list(fields) + + @classmethod + def gen_modes(cls, subcls, name, bases, dct, fields): + dct['mode'] = None + return [(subcls, name, bases, dct, fields)] + + def value(self, mode): + v = super(mn_armt, self).value(mode) + if mode == 'l': + out = [] + for x in v: + if len(x) == 2: + out.append(x[::-1]) + elif len(x) == 4: + out.append(x[:2][::-1] + x[2:4][::-1]) + return out + elif mode == 'b': + return [x for x in v] + else: + raise NotImplementedError('bad attrib') + + def get_args_expr(self): + args = [a.expr for a in self.args] + return args + + def get_symbol_size(self, symbol, loc_db, mode): + return 32 + + +class arm_arg(m_arg): + def asm_ast_to_expr(self, arg, loc_db): + if isinstance(arg, AstId): + if isinstance(arg.name, ExprId): + return arg.name + if arg.name in gpregs.str: + return None + loc_key = loc_db.get_or_create_name_location(arg.name.encode()) + return ExprLoc(loc_key, 32) + if isinstance(arg, AstOp): + args = [self.asm_ast_to_expr(tmp, loc_db) for tmp in arg.args] + if None in args: + return None + return ExprOp(arg.op, *args) + if isinstance(arg, AstInt): + return ExprInt(arg.value, 32) + if isinstance(arg, AstMem): + ptr = self.asm_ast_to_expr(arg.ptr, loc_db) + if ptr is None: + return None + return ExprMem(ptr, arg.size) + return None + + +class arm_reg(reg_noarg, arm_arg): + pass + + +class arm_gpreg_noarg(reg_noarg): + reg_info = gpregs + parser = reg_info.parser + + +class arm_gpreg(arm_reg): + reg_info = gpregs + parser = reg_info.parser + + +class arm_reg_wb(arm_reg): + reg_info = gpregs + parser = gpregs_wb + + def decode(self, v): + v = v & self.lmask + e = self.reg_info.expr[v] + if self.parent.wback.value: + e = ExprOp('wback', e) + self.expr = e + return True + + def encode(self): + e = self.expr + self.parent.wback.value = 0 + if isinstance(e, ExprOp) and e.op == 'wback': + self.parent.wback.value = 1 + e = e.args[0] + if isinstance(e, ExprId): + self.value = self.reg_info.expr.index(e) + else: + self.parent.wback.value = 1 + self.value = self.reg_info.expr.index(e.args[0]) + return True + + +class arm_psr(arm_arg): + parser = psr_p + + def decode(self, v): + v = v & self.lmask + if self.parent.psr.value == 0: + e = cpsr_regs.expr[v] + else: + e = spsr_regs.expr[v] + self.expr = e + return True + + def encode(self): + e = self.expr + if e in spsr_regs.expr: + self.parent.psr.value = 1 + v = spsr_regs.expr.index(e) + elif e in cpsr_regs.expr: + self.parent.psr.value = 0 + v = cpsr_regs.expr.index(e) + else: + return False + self.value = v + return True + + +class arm_cpreg(arm_reg): + reg_info = cp_regs + parser = reg_info.parser + + +class arm_preg(arm_reg): + reg_info = p_regs + parser = reg_info.parser + + +class arm_imm(imm_noarg, arm_arg): + parser = base_expr + + +class arm_offs(arm_imm): + parser = base_expr + + def int2expr(self, v): + if v & ~self.intmask != 0: + return None + return ExprInt(v, self.intsize) + + def decodeval(self, v): + v <<= 2 + # Add pipeline offset + v += 8 + return v + + def encodeval(self, v): + if v%4 != 0: + return False + # Remove pipeline offset + v -= 8 + return v >> 2 + + def decode(self, v): + v = v & self.lmask + if (1 << (self.l - 1)) & v: + v |= ~0 ^ self.lmask + v = self.decodeval(v) + self.expr = ExprInt(v, 32) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = int(self.expr) + if (1 << (self.l - 1)) & v: + v = -((0xffffffff ^ v) + 1) + v = self.encodeval(v) + if v is False: + return False + self.value = (v & 0xffffffff) & self.lmask + return True + + +class arm_imm8_12(arm_arg): + parser = deref + + def decode(self, v): + v = v & self.lmask + if self.parent.updown.value: + e = ExprInt(v << 2, 32) + else: + e = ExprInt(-v << 2, 32) + if self.parent.ppi.value: + e = ExprOp('preinc', self.parent.rn.expr, e) + else: + e = ExprOp('postinc', self.parent.rn.expr, e) + if self.parent.wback.value == 1: + e = ExprOp('wback', e) + self.expr = ExprMem(e, 32) + return True + + def encode(self): + self.parent.updown.value = 1 + e = self.expr + if not isinstance(e, ExprMem): + return False + e = e.ptr + if isinstance(e, ExprOp) and e.op == 'wback': + self.parent.wback.value = 1 + e = e.args[0] + else: + self.parent.wback.value = 0 + if e.op == "postinc": + self.parent.ppi.value = 0 + elif e.op == "preinc": + self.parent.ppi.value = 1 + else: + # XXX default + self.parent.ppi.value = 1 + self.parent.rn.expr = e.args[0] + if len(e.args) == 1: + self.value = 0 + return True + e = e.args[1] + if not isinstance(e, ExprInt): + log.debug('should be int %r', e) + return False + v = int(e) + if v < 0 or v & (1 << 31): + self.parent.updown.value = 0 + v = -v & 0xFFFFFFFF + if v & 0x3: + log.debug('arg should be 4 aligned') + return False + v >>= 2 + self.value = v + return True + + +class arm_imm_4_12(arm_arg): + parser = reg_or_base + + def decode(self, v): + v = v & self.lmask + imm = (self.parent.imm4.value << 12) | v + self.expr = ExprInt(imm, 32) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = int(self.expr) + if v > 0xffff: + return False + self.parent.imm4.value = v >> 12 + self.value = v & 0xfff + return True + + +class arm_imm_12_4(arm_arg): + parser = base_expr + + def decode(self, v): + v = v & self.lmask + imm = (self.parent.imm.value << 4) | v + self.expr = ExprInt(imm, 32) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = int(self.expr) + if v > 0xffff: + return False + self.parent.imm.value = (v >> 4) & 0xfff + self.value = v & 0xf + return True + + +class arm_op2(arm_arg): + parser = shift_off + + def str_to_imm_rot_form(self, s, neg=False): + if neg: + s = -s & 0xffffffff + for i in range(0, 32, 2): + v = myrol32(s, i) + if 0 <= v < 0x100: + return ((i // 2) << 8) | v + return None + + def decode(self, v): + val = v & self.lmask + if self.parent.immop.value: + rot = val >> 8 + imm = val & 0xff + imm = myror32(imm, rot * 2) + self.expr = ExprInt(imm, 32) + return True + rm = val & 0xf + shift = val >> 4 + shift_kind = shift & 1 + shift_type = (shift >> 1) & 3 + shift >>= 3 + if shift_kind: + # shift kind is reg + if shift & 1: + return False + rs = shift >> 1 + if rs == 0xf: + return False + shift_op = regs_expr[rs] + else: + # shift kind is imm + amount = shift + shift_op = ExprInt(amount, 32) + a = regs_expr[rm] + if shift_op == ExprInt(0, 32): + if shift_type == 3: + self.expr = ExprOp(allshifts[4], a) + else: + self.expr = a + else: + self.expr = ExprOp(allshifts[shift_type], a, shift_op) + return True + + def encode(self): + e = self.expr + # pure imm + if isinstance(e, ExprInt): + val = self.str_to_imm_rot_form(int(e)) + if val is None: + return False + self.parent.immop.value = 1 + self.value = val + return True + + self.parent.immop.value = 0 + # pure reg + if isinstance(e, ExprId): + rm = gpregs.expr.index(e) + shift_kind = 0 + shift_type = 0 + amount = 0 + self.value = ( + ((((amount << 2) | shift_type) << 1) | shift_kind) << 4) | rm + return True + # rot reg + if not isinstance(e, ExprOp): + log.debug('bad reg rot1 %r', e) + return False + rm = gpregs.expr.index(e.args[0]) + shift_type = allshifts.index(e.op) + if e.op == 'rrx': + shift_kind = 0 + amount = 0 + shift_type = 3 + elif isinstance(e.args[1], ExprInt): + shift_kind = 0 + amount = int(e.args[1]) + # LSR/ASR of 32 => 0 + if amount == 32 and e.op in ['>>', 'a>>']: + amount = 0 + else: + shift_kind = 1 + amount = gpregs.expr.index(e.args[1]) << 1 + self.value = ( + ((((amount << 2) | shift_type) << 1) | shift_kind) << 4) | rm + return True + +# op2imm + rn + + +class arm_op2imm(arm_imm8_12): + parser = deref + + def str_to_imm_rot_form(self, s, neg=False): + if neg: + s = -s & 0xffffffff + if 0 <= s < (1 << 12): + return s + return None + + def decode(self, v): + val = v & self.lmask + if self.parent.immop.value == 0: + imm = val + if self.parent.updown.value == 0: + imm = -imm + if self.parent.ppi.value: + e = ExprOp('preinc', self.parent.rn.expr, ExprInt(imm, 32)) + else: + e = ExprOp('postinc', self.parent.rn.expr, ExprInt(imm, 32)) + if self.parent.wback.value == 1: + e = ExprOp('wback', e) + self.expr = ExprMem(e, 32) + return True + rm = val & 0xf + shift = val >> 4 + shift_kind = shift & 1 + shift_type = (shift >> 1) & 3 + shift >>= 3 + # print self.parent.immop.value, hex(shift), hex(shift_kind), + # hex(shift_type) + if shift_kind: + # log.debug('error in disasm xx') + return False + else: + # shift kind is imm + amount = shift + shift_op = ExprInt(amount, 32) + a = regs_expr[rm] + if shift_op == ExprInt(0, 32): + pass + else: + a = ExprOp(allshifts[shift_type], a, shift_op) + if self.parent.ppi.value: + e = ExprOp('preinc', self.parent.rn.expr, a) + else: + e = ExprOp('postinc', self.parent.rn.expr, a) + if self.parent.wback.value == 1: + e = ExprOp('wback', e) + self.expr = ExprMem(e, 32) + return True + + def encode(self): + self.parent.immop.value = 1 + self.parent.updown.value = 1 + + e = self.expr + assert(isinstance(e, ExprMem)) + e = e.ptr + if e.op == 'wback': + self.parent.wback.value = 1 + e = e.args[0] + else: + self.parent.wback.value = 0 + if e.op == "postinc": + self.parent.ppi.value = 0 + elif e.op == "preinc": + self.parent.ppi.value = 1 + else: + # XXX default + self.parent.ppi.value = 1 + + # if len(v) <1: + # raise ValueError('cannot parse', s) + self.parent.rn.expr = e.args[0] + if len(e.args) == 1: + self.parent.immop.value = 0 + self.value = 0 + return True + # pure imm + if isinstance(e.args[1], ExprInt): + self.parent.immop.value = 0 + val = self.str_to_imm_rot_form(int(e.args[1])) + if val is None: + val = self.str_to_imm_rot_form(int(e.args[1]), True) + if val is None: + log.debug('cannot encode inm') + return False + self.parent.updown.value = 0 + self.value = val + return True + # pure reg + if isinstance(e.args[1], ExprId): + rm = gpregs.expr.index(e.args[1]) + shift_kind = 0 + shift_type = 0 + amount = 0 + self.value = ( + ((((amount << 2) | shift_type) << 1) | shift_kind) << 4) | rm + return True + # rot reg + if not isinstance(e.args[1], ExprOp): + log.debug('bad reg rot2 %r', e) + return False + e = e.args[1] + rm = gpregs.expr.index(e.args[0]) + shift_type = allshifts.index(e.op) + if isinstance(e.args[1], ExprInt): + shift_kind = 0 + amount = int(e.args[1]) + else: + shift_kind = 1 + amount = gpregs.expr.index(e.args[1]) << 1 + self.value = ( + ((((amount << 2) | shift_type) << 1) | shift_kind) << 4) | rm + return True + + +def reglist2str(rlist): + out = [] + i = 0 + while i < len(rlist): + j = i + 1 + while j < len(rlist) and rlist[j] < 13 and rlist[j] == rlist[j - 1] + 1: + j += 1 + j -= 1 + if j < i + 2: + out.append(regs_str[rlist[i]]) + i += 1 + else: + out.append(regs_str[rlist[i]] + '-' + regs_str[rlist[j]]) + i = j + 1 + return "{" + ", ".join(out) + '}' + + +class arm_rlist(arm_arg): + parser = gpreg_list + + def encode(self): + self.parent.sbit.value = 0 + e = self.expr + if isinstance(e, ExprOp) and e.op == "sbit": + e = e.args[0] + self.parent.sbit.value = 1 + rlist = [gpregs.expr.index(x) for x in e.args] + v = 0 + for r in rlist: + v |= 1 << r + self.value = v + return True + + def decode(self, v): + v = v & self.lmask + out = [] + for i in range(0x10): + if 1 << i & v: + out.append(gpregs.expr[i]) + if not out: + return False + e = ExprOp('reglist', *out) + if self.parent.sbit.value == 1: + e = ExprOp('sbit', e) + self.expr = e + return True + + +class updown_b_nosp_mn(bs_mod_name): + mn_mod = ['D', 'I'] + + def modname(self, name, f_i): + return name + self.args['mn_mod'][f_i] + + +class ppi_b_nosp_mn(bs_mod_name): + prio = 5 + mn_mod = ['A', 'B'] + + +class updown_b_sp_mn(bs_mod_name): + mn_mod = ['A', 'D'] + + def modname(self, name, f_i): + if name.startswith("STM"): + f_i = [1, 0][f_i] + return name + self.args['mn_mod'][f_i] + + +class ppi_b_sp_mn(bs_mod_name): + mn_mod = ['F', 'E'] + + def modname(self, name, f_i): + if name.startswith("STM"): + f_i = [1, 0][f_i] + return name + self.args['mn_mod'][f_i] + + +class arm_reg_wb_nosp(arm_reg_wb): + + def decode(self, v): + v = v & self.lmask + if v == 13: + return False + e = self.reg_info.expr[v] + if self.parent.wback.value: + e = ExprOp('wback', e) + self.expr = e + return True + + +class arm_offs_blx(arm_imm): + + def decode(self, v): + v = v & self.lmask + v = (v << 2) + (self.parent.lowb.value << 1) + v = sign_ext(v, 26, 32) + # Add pipeline offset + v += 8 + self.expr = ExprInt(v, 32) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + # Remove pipeline offset + v = int(self.expr.arg - 8) + if v & 0x80000000: + v &= (1 << 26) - 1 + self.parent.lowb.value = (v >> 1) & 1 + self.value = v >> 2 + return True + + +class bs_lnk(bs_mod_name): + + def modname(self, name, i): + return name[:1] + self.args['mn_mod'][i] + name[1:] + + +class armt_rm_cp(bsi): + + def decode(self, v): + if v != gpregs.expr.index(self.parent.rm.expr): + return False + return True + + def encode(self): + self.value = gpregs.expr.index(self.parent.rm.expr) + return True + + +accum = bs(l=1) +scc = bs_mod_name(l=1, fname='scc', mn_mod=['', 'S']) +dumscc = bs("1") +rd = bs(l=4, cls=(arm_gpreg,)) +rdl = bs(l=4, cls=(arm_gpreg,)) + +rn = bs(l=4, cls=(arm_gpreg,), fname="rn") +rs = bs(l=4, cls=(arm_gpreg,)) +rm = bs(l=4, cls=(arm_gpreg,), fname='rm') +ra = bs(l=4, cls=(arm_gpreg,)) +rt = bs(l=4, cls=(arm_gpreg,), fname='rt') +rt2 = bs(l=4, cls=(arm_gpreg,)) + +rm_cp = bs(l=4, cls=(armt_rm_cp,)) + +op2 = bs(l=12, cls=(arm_op2,)) +lnk = bs_lnk(l=1, fname='lnk', mn_mod=['', 'L']) +offs = bs(l=24, cls=(arm_offs,), fname="offs") + +rn_noarg = bs(l=4, cls=(arm_gpreg_noarg,), fname="rn") +rm_noarg = bs(l=4, cls=(arm_gpreg_noarg,), fname="rm", order = -1) + +immop = bs(l=1, fname='immop') +dumr = bs(l=4, default_val="0000", fname="dumr") +# psr = bs(l=1, cls=(arm_psr,), fname="psr") + +psr = bs(l=1, fname="psr") +psr_field = bs(l=4, cls=(arm_psr,)) + +ppi = bs(l=1, fname='ppi') +updown = bs(l=1, fname='updown') +trb = bs_mod_name(l=1, fname='trb', mn_mod=['', 'B']) +wback = bs_mod_name(l=1, fname="wback", mn_mod=['', 'T']) +wback_no_t = bs(l=1, fname="wback") + +op2imm = bs(l=12, cls=(arm_op2imm,)) + +updown_b_nosp = updown_b_nosp_mn(l=1, mn_mod=['D', 'I'], fname='updown') +ppi_b_nosp = ppi_b_nosp_mn(l=1, mn_mod=['A', 'B'], fname='ppi') +updown_b_sp = updown_b_sp_mn(l=1, mn_mod=['A', 'D'], fname='updown') +ppi_b_sp = ppi_b_sp_mn(l=1, mn_mod=['F', 'E'], fname='ppi') + +sbit = bs(l=1, fname="sbit") +rn_sp = bs("1101", cls=(arm_reg_wb,), fname='rnsp') +rn_wb = bs(l=4, cls=(arm_reg_wb_nosp,), fname='rn') +rlist = bs(l=16, cls=(arm_rlist,), fname='rlist') + +swi_i = bs(l=24, cls=(arm_imm,), fname="swi_i") + +opc = bs(l=4, cls=(arm_imm, m_arg), fname='opc') +crn = bs(l=4, cls=(arm_cpreg,), fname='crn') +crd = bs(l=4, cls=(arm_cpreg,), fname='crd') +crm = bs(l=4, cls=(arm_cpreg,), fname='crm') +cpnum = bs(l=4, cls=(arm_preg,), fname='cpnum') +cp = bs(l=3, cls=(arm_imm, m_arg), fname='cp') + +imm8_12 = bs(l=8, cls=(arm_imm8_12, m_arg), fname='imm') +tl = bs_mod_name(l=1, fname="tl", mn_mod=['', 'L']) + +cpopc = bs(l=3, cls=(arm_imm, m_arg), fname='cpopc') +imm20 = bs(l=20, cls=(arm_imm, m_arg)) +imm4 = bs(l=4, cls=(arm_imm, m_arg)) +imm12 = bs(l=12, cls=(arm_imm, m_arg)) +imm16 = bs(l=16, cls=(arm_imm, m_arg)) + +imm12_off = bs(l=12, fname="imm") + +imm2_noarg = bs(l=2, fname="imm") +imm4_noarg = bs(l=4, fname="imm4") + + +imm_4_12 = bs(l=12, cls=(arm_imm_4_12,)) + +imm12_noarg = bs(l=12, fname="imm") +imm_12_4 = bs(l=4, cls=(arm_imm_12_4,)) + +lowb = bs(l=1, fname='lowb') +offs_blx = bs(l=24, cls=(arm_offs_blx,), fname="offs") + +fix_cond = bs("1111", fname="cond") + +class mul_part_x(bs_mod_name): + prio = 5 + mn_mod = ['B', 'T'] + +class mul_part_y(bs_mod_name): + prio = 6 + mn_mod = ['B', 'T'] + +mul_x = mul_part_x(l=1, fname='x', mn_mod=['B', 'T']) +mul_y = mul_part_y(l=1, fname='y', mn_mod=['B', 'T']) + +class arm_immed(arm_arg): + parser = deref + + def decode(self, v): + if self.parent.immop.value == 1: + imm = ExprInt((self.parent.immedH.value << 4) | v, 32) + else: + imm = gpregs.expr[v] + if self.parent.updown.value == 0: + imm = -imm + if self.parent.ppi.value: + e = ExprOp('preinc', self.parent.rn.expr, imm) + else: + e = ExprOp('postinc', self.parent.rn.expr, imm) + if self.parent.wback.value == 1: + e = ExprOp('wback', e) + self.expr = ExprMem(e, 32) + + return True + + def encode(self): + self.parent.immop.value = 1 + self.parent.updown.value = 1 + e = self.expr + if not isinstance(e, ExprMem): + return False + e = e.ptr + if isinstance(e, ExprOp) and e.op == 'wback': + self.parent.wback.value = 1 + e = e.args[0] + else: + self.parent.wback.value = 0 + if e.op == "postinc": + self.parent.ppi.value = 0 + elif e.op == "preinc": + self.parent.ppi.value = 1 + else: + # XXX default + self.parent.ppi.value = 1 + self.parent.rn.expr = e.args[0] + if len(e.args) == 1: + self.value = 0 + self.parent.immedH.value = 0 + return True + e = e.args[1] + if isinstance(e, ExprInt): + v = int(e) + if v < 0 or v & (1 << 31): + self.parent.updown.value = 0 + v = (-v) & 0xFFFFFFFF + if v > 0xff: + log.debug('cannot encode imm XXX') + return False + self.value = v & 0xF + self.parent.immedH.value = v >> 4 + return True + + self.parent.immop.value = 0 + if isinstance(e, ExprOp) and len(e.args) == 1 and e.op == "-": + self.parent.updown.value = 0 + e = e.args[0] + if e in gpregs.expr: + self.value = gpregs.expr.index(e) + self.parent.immedH.value = 0x0 + return True + else: + raise ValueError('e should be int: %r' % e) + +immedH = bs(l=4, fname='immedH') +immedL = bs(l=4, cls=(arm_immed, m_arg), fname='immedL') +hb = bs(l=1) + + +class armt2_rot_rm(arm_arg): + parser = shift_off + def decode(self, v): + r = self.parent.rm.expr + if v == 00: + e = r + else: + raise NotImplementedError('rotation') + self.expr = e + return True + def encode(self): + e = self.expr + if isinstance(e, ExprId): + self.value = 0 + else: + raise NotImplementedError('rotation') + return True + +rot_rm = bs(l=2, cls=(armt2_rot_rm,), fname="rot_rm") + + +class arm_mem_rn_imm(arm_arg): + parser = deref + def decode(self, v): + value = self.parent.imm.value + if self.parent.rw.value == 0: + value = -value + imm = ExprInt(value, 32) + reg = gpregs.expr[v] + if value: + expr = ExprMem(reg + imm, 32) + else: + expr = ExprMem(reg, 32) + self.expr = expr + return True + + def encode(self): + self.parent.add_imm.value = 1 + self.parent.imm.value = 0 + expr = self.expr + if not isinstance(expr, ExprMem): + return False + ptr = expr.ptr + if ptr in gpregs.expr: + self.value = gpregs.expr.index(ptr) + elif (isinstance(ptr, ExprOp) and + len(ptr.args) == 2 and + ptr.op == 'preinc'): + reg, imm = ptr.args + if not reg in gpregs.expr: + return False + self.value = gpregs.expr.index(reg) + if not isinstance(imm, ExprInt): + return False + value = int(imm) + if value & 0x80000000: + value = -value + self.parent.add_imm.value = 0 + self.parent.imm.value = value + else: + return False + return True + +mem_rn_imm = bs(l=4, cls=(arm_mem_rn_imm,), order=1) + +def armop(name, fields, args=None, alias=False): + dct = {"fields": fields} + dct["alias"] = alias + if args is not None: + dct['args'] = args + type(name, (mn_arm,), dct) + + +def armtop(name, fields, args=None, alias=False): + dct = {"fields": fields} + dct["alias"] = alias + if args is not None: + dct['args'] = args + type(name, (mn_armt,), dct) + + +op_list = ['AND', 'EOR', 'SUB', 'RSB', 'ADD', 'ADC', 'SBC', 'RSC', + 'TST', 'TEQ', 'CMP', 'CMN', 'ORR', 'MOV', 'BIC', 'MVN'] +data_mov_name = {'MOV': 13, 'MVN': 15} +data_test_name = {'TST': 8, 'TEQ': 9, 'CMP': 10, 'CMN': 11} + +data_name = {} +for i, n in enumerate(op_list): + if n in list(data_mov_name) + list(data_test_name): + continue + data_name[n] = i +bs_data_name = bs_name(l=4, name=data_name) + +bs_data_mov_name = bs_name(l=4, name=data_mov_name) + +bs_data_test_name = bs_name(l=4, name=data_test_name) + + +transfer_name = {'STR': 0, 'LDR': 1} +bs_transfer_name = bs_name(l=1, name=transfer_name) + +transferh_name = {'STRH': 0, 'LDRH': 1} +bs_transferh_name = bs_name(l=1, name=transferh_name) + + +transfer_ldr_name = {'LDRD': 0, 'LDRSB': 1} +bs_transfer_ldr_name = bs_name(l=1, name=transfer_ldr_name) + +btransfer_name = {'STM': 0, 'LDM': 1} +bs_btransfer_name = bs_name(l=1, name=btransfer_name) + +ctransfer_name = {'STC': 0, 'LDC': 1} +bs_ctransfer_name = bs_name(l=1, name=ctransfer_name) + +mr_name = {'MCR': 0, 'MRC': 1} +bs_mr_name = bs_name(l=1, name=mr_name) + + +bs_addi = bs(l=1, fname="add_imm") +bs_rw = bs_mod_name(l=1, fname='rw', mn_mod=['W', '']) + +armop("mul", [bs('000000'), bs('0'), scc, rd, bs('0000'), rs, bs('1001'), rm], [rd, rm, rs]) +armop("umull", [bs('000010'), bs('0'), scc, rd, rdl, rs, bs('1001'), rm], [rdl, rd, rm, rs]) +armop("umlal", [bs('000010'), bs('1'), scc, rd, rdl, rs, bs('1001'), rm], [rdl, rd, rm, rs]) +armop("smull", [bs('000011'), bs('0'), scc, rd, rdl, rs, bs('1001'), rm], [rdl, rd, rm, rs]) +armop("smlal", [bs('000011'), bs('1'), scc, rd, rdl, rs, bs('1001'), rm], [rdl, rd, rm, rs]) +armop("mla", [bs('000000'), bs('1'), scc, rd, rn, rs, bs('1001'), rm], [rd, rm, rs, rn]) +armop("mrs", [bs('00010'), psr, bs('00'), psr_field, rd, bs('000000000000')], [rd, psr]) +armop("msr", [bs('00010'), psr, bs('10'), psr_field, bs('1111'), bs('0000'), bs('0000'), rm], [psr_field, rm]) +armop("data", [bs('00'), immop, bs_data_name, scc, rn, rd, op2], [rd, rn, op2]) +armop("data_mov", [bs('00'), immop, bs_data_mov_name, scc, bs('0000'), rd, op2], [rd, op2]) +armop("data_test", [bs('00'), immop, bs_data_test_name, dumscc, rn, dumr, op2]) +armop("b", [bs('101'), lnk, offs]) + +armop("smul", [bs('00010110'), rd, bs('0000'), rs, bs('1'), mul_y, mul_x, bs('0'), rm], [rd, rm, rs]) + +# TODO TEST +#armop("und", [bs('011'), imm20, bs('1'), imm4]) +armop("transfer", [bs('01'), immop, ppi, updown, trb, wback_no_t, bs_transfer_name, rn_noarg, rd, op2imm], [rd, op2imm]) +armop("transferh", [bs('000'), ppi, updown, immop, wback_no_t, bs_transferh_name, rn_noarg, rd, immedH, bs('1011'), immedL], [rd, immedL]) +armop("ldrd", [bs('000'), ppi, updown, immop, wback_no_t, bs_transfer_ldr_name, rn_noarg, rd, immedH, bs('1101'), immedL], [rd, immedL]) +armop("ldrsh", [bs('000'), ppi, updown, immop, wback_no_t, bs('1'), rn_noarg, rd, immedH, bs('1'), bs('1'), bs('1'), bs('1'), immedL], [rd, immedL]) +armop("strd", [bs('000'), ppi, updown, immop, wback_no_t, bs('0'), rn_noarg, rd, immedH, bs('1'), bs('1'), bs('1'), bs('1'), immedL], [rd, immedL]) +armop("btransfersp", [bs('100'), ppi_b_sp, updown_b_sp, sbit, wback_no_t, bs_btransfer_name, rn_sp, rlist]) +armop("btransfer", [bs('100'), ppi_b_nosp, updown_b_nosp, sbit, wback_no_t, bs_btransfer_name, rn_wb, rlist]) +# TODO: TEST +armop("swp", [bs('00010'), trb, bs('00'), rn, rd, bs('0000'), bs('1001'), rm]) +armop("svc", [bs('1111'), swi_i]) +armop("cdp", [bs('1110'), opc, crn, crd, cpnum, cp, bs('0'), crm], [cpnum, opc, crd, crn, crm, cp]) +armop("cdata", [bs('110'), ppi, updown, tl, wback_no_t, bs_ctransfer_name, rn_noarg, crd, cpnum, imm8_12], [cpnum, crd, imm8_12]) +armop("mr", [bs('1110'), cpopc, bs_mr_name, crn, rd, cpnum, cp, bs('1'), crm], [cpnum, cpopc, rd, crn, crm, cp]) +armop("bkpt", [bs('00010010'), imm12_noarg, bs('0111'), imm_12_4]) +armop("bx", [bs('000100101111111111110001'), rn]) +armop("mov", [bs('00110000'), imm4_noarg, rd, imm_4_12], [rd, imm_4_12]) +armop("movt", [bs('00110100'), imm4_noarg, rd, imm_4_12], [rd, imm_4_12]) +armop("blx", [bs('00010010'), bs('1111'), bs('1111'), bs('1111'), bs('0011'), rm], [rm]) +armop("blx", [fix_cond, bs('101'), lowb, offs_blx], [offs_blx]) +armop("clz", [bs('00010110'), bs('1111'), rd, bs('1111'), bs('0001'), rm], [rd, rm]) +armop("qadd", [bs('00010000'), rn, rd, bs('0000'), bs('0101'), rm], [rd, rm, rn]) + +armop("uxtb", [bs('01101110'), bs('1111'), rd, rot_rm, bs('00'), bs('0111'), rm_noarg]) +armop("uxth", [bs('01101111'), bs('1111'), rd, rot_rm, bs('00'), bs('0111'), rm_noarg]) +armop("sxtb", [bs('01101010'), bs('1111'), rd, rot_rm, bs('00'), bs('0111'), rm_noarg]) +armop("sxth", [bs('01101011'), bs('1111'), rd, rot_rm, bs('00'), bs('0111'), rm_noarg]) + +armop("rev", [bs('01101011'), bs('1111'), rd, bs('1111'), bs('0011'), rm]) +armop("rev16", [bs('01101011'), bs('1111'), rd, bs('1111'), bs('1011'), rm]) + +armop("pld", [bs8(0xF5), bs_addi, bs_rw, bs('01'), mem_rn_imm, bs('1111'), imm12_off]) + +armop("isb", [bs8(0xF5), bs8(0x7F), bs8(0xF0), bs8(0x6F)]) +armop("nop", [bs8(0xE3), bs8(0x20), bs8(0xF0), bs8(0)]) + +class arm_widthm1(arm_imm, m_arg): + def decode(self, v): + self.expr = ExprInt(v+1, 32) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = int(self.expr) + -1 + if v > self.lmask: + return False + self.value = v + return True + + +class arm_rm_rot2(arm_arg): + parser = rot2_expr + def decode(self, v): + expr = gpregs.expr[v] + shift_value = self.parent.rot2.value + if shift_value: + expr = ExprOp(allshifts[3], expr, ExprInt(shift_value * 8, 32)) + self.expr = expr + return True + def encode(self): + if self.expr in gpregs.expr: + self.value = gpregs.expr.index(self.expr) + self.parent.rot2.value = 0 + elif (isinstance(self.expr, ExprOp) and + self.expr.op == allshifts[3]): + reg, value = self.expr.args + if reg not in gpregs.expr: + return False + self.value = gpregs.expr.index(reg) + if not isinstance(value, ExprInt): + return False + value = int(value) + if not value in [8, 16, 24]: + return False + self.parent.rot2.value = value // 8 + return True + +class arm_gpreg_nopc(reg_noarg): + reg_info = gpregs_nopc + parser = reg_info.parser + + + def decode(self, v): + ret = super(arm_gpreg_nopc, self).decode(v) + if ret is False: + return False + if self.expr == reg_dum: + return False + return True + + +class arm_gpreg_nosp(reg_noarg): + reg_info = gpregs_nosp + parser = reg_info.parser + + def decode(self, v): + ret = super(arm_gpreg_nosp, self).decode(v) + if ret is False: + return False + if self.expr == reg_dum: + return False + return True + + +rm_rot2 = bs(l=4, cls=(arm_rm_rot2,), fname="rm") +rot2 = bs(l=2, fname="rot2") + +widthm1 = bs(l=5, cls=(arm_widthm1, m_arg)) +lsb = bs(l=5, cls=(arm_imm, m_arg)) + +rd_nopc = bs(l=4, cls=(arm_gpreg_nopc, arm_arg), fname="rd") +rn_nopc = bs(l=4, cls=(arm_gpreg_nopc, arm_arg), fname="rn") +ra_nopc = bs(l=4, cls=(arm_gpreg_nopc, arm_arg), fname="ra") +rt_nopc = bs(l=4, cls=(arm_gpreg_nopc, arm_arg), fname="rt") + +rn_nosp = bs(l=4, cls=(arm_gpreg_nosp, arm_arg), fname="rn") + +rn_nopc_noarg = bs(l=4, cls=(arm_gpreg_nopc,), fname="rn") + +armop("ubfx", [bs('0111111'), widthm1, rd, lsb, bs('101'), rn], [rd, rn, lsb, widthm1]) + +armop("bfc", [bs('0111110'), widthm1, rd, lsb, bs('001'), bs('1111')], [rd, lsb, widthm1]) + +armop("uxtab", [bs('01101110'), rn_nopc, rd, rot2, bs('000111'), rm_rot2], [rd, rn_nopc, rm_rot2]) + + + +# +# thumnb ####################### +# +# ARM7-TDMI-manual-pt3 +gpregs_l = reg_info(regs_str[:8], regs_expr[:8]) +gpregs_h = reg_info(regs_str[8:], regs_expr[8:]) + +gpregs_sppc = reg_info(regs_str[-1:] + regs_str[13:14], + regs_expr[-1:] + regs_expr[13:14]) + +deref_reg_imm = Group(LBRACK + gpregs.parser + Optional( + COMMA + shift_off) + RBRACK).setParseAction(cb_deref_pre_mem) +deref_low = Group(LBRACK + gpregs_l.parser + Optional( + COMMA + shift_off) + RBRACK).setParseAction(cb_deref_pre_mem) +deref_pc = Group(LBRACK + gpregs_pc.parser + Optional( + COMMA + shift_off) + RBRACK).setParseAction(cb_deref_pre_mem) +deref_sp = Group(LBRACK + gpregs_sp.parser + COMMA + + shift_off + RBRACK).setParseAction(cb_deref_pre_mem) + +gpregs_l_wb = Group( + gpregs_l.parser + Optional('!')).setParseAction(cb_gpreb_wb) + + +gpregs_l_13 = reg_info(regs_str[:13], regs_expr[:13]) + + +class arm_offreg(arm_arg): + parser = deref_pc + + def decodeval(self, v): + return v + + def encodeval(self, v): + return v + + def decode(self, v): + v = v & self.lmask + v = self.decodeval(v) + if v: + self.expr = self.off_reg + ExprInt(v, 32) + else: + self.expr = self.off_reg + + e = self.expr + if isinstance(e, ExprOp) and e.op == 'wback': + self.parent.wback.value = 1 + e = e.args[0] + return True + + def encode(self): + e = self.expr + if not (isinstance(e, ExprOp) and e.op == "preinc"): + log.debug('cannot encode %r', e) + return False + if e.args[0] != self.off_reg: + log.debug('cannot encode reg %r', e.args[0]) + return False + v = int(e.args[1]) + v = self.encodeval(v) + self.value = v + return True + + +class arm_offpc(arm_offreg): + off_reg = regs_expr[15] + + def decode(self, v): + v = v & self.lmask + v <<= 2 + if v: + self.expr = ExprMem(self.off_reg + ExprInt(v, 32), 32) + else: + self.expr = ExprMem(self.off_reg, 32) + + e = self.expr.ptr + if isinstance(e, ExprOp) and e.op == 'wback': + self.parent.wback.value = 1 + e = e.args[0] + return True + + def encode(self): + e = self.expr + if not isinstance(e, ExprMem): + return False + e = e.ptr + if not (isinstance(e, ExprOp) and e.op == "preinc"): + log.debug('cannot encode %r', e) + return False + if e.args[0] != self.off_reg: + log.debug('cannot encode reg %r', e.args[0]) + return False + v = int(e.args[1]) + if v & 3: + return False + v >>= 2 + if v > self.lmask: + return False + self.value = v + return True + + + + +class arm_offsp(arm_offpc): + parser = deref_sp + off_reg = regs_expr[13] + + +class arm_offspc(arm_offs): + + def decodeval(self, v): + v = v << 1 + # Add pipeline offset + v += 2 + 2 + return v + + def encodeval(self, v): + # Remove pipeline offset + v -= 2 + 2 + if v % 2 != 0: + return False + if v > (1 << (self.l - 1)) - 1: + return False + return v >> 1 + + +class arm_off8sppc(arm_imm): + + def decodeval(self, v): + return v << 2 + + def encodeval(self, v): + return v >> 2 + + +class arm_off7(arm_imm): + + def decodeval(self, v): + return v << 2 + + def encodeval(self, v): + return v >> 2 + +class arm_deref_reg_imm(arm_arg): + parser = deref_reg_imm + + def decode(self, v): + v = v & self.lmask + rbase = regs_expr[v] + e = ExprOp('preinc', rbase, self.parent.off.expr) + self.expr = ExprMem(e, 32) + return True + + def encode(self): + self.parent.off.expr = None + e = self.expr + if not isinstance(e, ExprMem): + return False + e = e.ptr + if not (isinstance(e, ExprOp) and e.op == 'preinc'): + log.debug('cannot encode %r', e) + return False + off = e.args[1] + if isinstance(off, ExprId): + self.parent.off.expr = off + elif isinstance(off, ExprInt): + self.parent.off.expr = off + else: + log.debug('cannot encode off %r', off) + return False + self.value = gpregs.expr.index(e.args[0]) + if self.value >= 1 << self.l: + log.debug('cannot encode reg %r', off) + return False + return True + +class arm_derefl(arm_deref_reg_imm): + parser = deref_low + + +class arm_offbw(imm_noarg): + + def decode(self, v): + v = v & self.lmask + if self.parent.trb.value == 0: + v <<= 2 + self.expr = ExprInt(v, 32) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = int(self.expr) + if self.parent.trb.value == 0: + if v & 3: + log.debug('off must be aligned %r', v) + return False + v >>= 2 + if v > self.lmask: + return False + self.value = v + return True + + + +class arm_off(imm_noarg): + + def decode(self, v): + v = v & self.lmask + self.expr = ExprInt(v, 32) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = int(self.expr) + if v > self.lmask: + return False + self.value = v + return True + + +class arm_offh(imm_noarg): + + def decode(self, v): + v = v & self.lmask + v <<= 1 + self.expr = ExprInt(v, 32) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = int(self.expr) + if v & 1: + log.debug('off must be aligned %r', v) + return False + v >>= 1 + if v > self.lmask: + return False + self.value = v + return True + + +class armt_rlist(arm_arg): + parser = gpreg_list + + def encode(self): + e = self.expr + rlist = [gpregs_l.expr.index(x) for x in e.args] + v = 0 + for r in rlist: + v |= 1 << r + self.value = v + return True + + def decode(self, v): + v = v & self.lmask + out = [] + for i in range(0x10): + if 1 << i & v: + out.append(gpregs.expr[i]) + if not out: + return False + e = ExprOp('reglist', *out) + self.expr = e + return True + + +class armt_rlist13(armt_rlist): + parser = gpreg_list + + def encode(self): + e = self.expr + rlist = [] + reg_l = list(e.args) + + self.parent.pclr.value = 0 + if self.parent.name.startswith('PUSH'): + if regs_expr[14] in reg_l: + reg_l.remove(regs_expr[14]) + self.parent.pclr.value = 1 + else: + if regs_expr[15] in reg_l: + reg_l.remove(regs_expr[15]) + self.parent.pclr.value = 1 + + for reg in reg_l: + if reg not in gpregs_l_13.expr: + return False + rlist.append(gpregs_l_13.expr.index(reg)) + v = 0 + for r in rlist: + v |= 1 << r + self.value = v + return True + + def decode(self, v): + v = v & self.lmask + out = [] + for i in range(13): + if 1 << i & v: + out.append(gpregs_l_13.expr[i]) + + if self.parent.pclr.value == 1: + if self.parent.name.startswith("PUSH"): + out += [regs_expr[14]] + else: + out += [regs_expr[15]] + + if not out: + return False + e = ExprOp('reglist', *out) + self.expr = e + return True + + + +class armt_rlist13_pc_lr(armt_rlist): + parser = gpreg_list + + def encode(self): + e = self.expr + rlist = [] + reg_l = list(e.args) + + self.parent.pc_in.value = 0 + self.parent.lr_in.value = 0 + if regs_expr[14] in reg_l: + reg_l.remove(regs_expr[14]) + self.parent.lr_in.value = 1 + if regs_expr[15] in reg_l: + reg_l.remove(regs_expr[15]) + self.parent.pc_in.value = 1 + + for reg in reg_l: + if reg not in gpregs_l_13.expr: + return False + rlist.append(gpregs_l_13.expr.index(reg)) + v = 0 + for r in rlist: + v |= 1 << r + self.value = v + return True + + def decode(self, v): + v = v & self.lmask + out = [] + for i in range(13): + if 1 << i & v: + out.append(gpregs_l_13.expr[i]) + + if self.parent.lr_in.value == 1: + out += [regs_expr[14]] + if self.parent.pc_in.value == 1: + out += [regs_expr[15]] + + if not out: + return False + e = ExprOp('reglist', *out) + self.expr = e + return True + + + +class armt_rlist_pclr(armt_rlist): + + def encode(self): + e = self.expr + reg_l = list(e.args) + self.parent.pclr.value = 0 + if self.parent.pp.value == 0: + if regs_expr[14] in reg_l: + reg_l.remove(regs_expr[14]) + self.parent.pclr.value = 1 + else: + if regs_expr[15] in reg_l: + reg_l.remove(regs_expr[15]) + self.parent.pclr.value = 1 + rlist = [gpregs.expr.index(x) for x in reg_l] + v = 0 + for r in rlist: + v |= 1 << r + if v > self.lmask: + return False + self.value = v + return True + + def decode(self, v): + v = v & self.lmask + out = [] + for i in range(0x10): + if 1 << i & v: + out.append(gpregs.expr[i]) + + if self.parent.pclr.value == 1: + if self.parent.pp.value == 0: + out += [regs_expr[14]] + else: + out += [regs_expr[15]] + if not out: + return False + e = ExprOp('reglist', *out) + self.expr = e + return True + + +class armt_reg_wb(arm_reg_wb): + reg_info = gpregs_l + parser = gpregs_l_wb + + def decode(self, v): + v = v & self.lmask + e = self.reg_info.expr[v] + if not e in self.parent.trlist.expr.args: + e = ExprOp('wback', e) + self.expr = e + return True + + def encode(self): + e = self.expr + if isinstance(e, ExprOp): + if e.op != 'wback': + return False + e = e.args[0] + self.value = self.reg_info.expr.index(e) + return True + + +class arm_gpreg_l(arm_reg): + reg_info = gpregs_l + parser = reg_info.parser + + +class arm_gpreg_h(arm_reg): + reg_info = gpregs_h + parser = reg_info.parser + + +class arm_gpreg_l_noarg(arm_gpreg_noarg): + reg_info = gpregs_l + parser = reg_info.parser + + +class arm_sppc(arm_reg): + reg_info = gpregs_sppc + parser = reg_info.parser + + +class arm_sp(arm_reg): + reg_info = gpregs_sp + parser = reg_info.parser + + +off5 = bs(l=5, cls=(arm_imm,), fname="off") +off3 = bs(l=3, cls=(arm_imm,), fname="off") +off8 = bs(l=8, cls=(arm_imm,), fname="off") +off7 = bs(l=7, cls=(arm_off7,), fname="off") + +rdl = bs(l=3, cls=(arm_gpreg_l,), fname="rd") +rnl = bs(l=3, cls=(arm_gpreg_l,), fname="rn") +rsl = bs(l=3, cls=(arm_gpreg_l,), fname="rs") +rml = bs(l=3, cls=(arm_gpreg_l,), fname="rm") +rol = bs(l=3, cls=(arm_gpreg_l,), fname="ro") +rbl = bs(l=3, cls=(arm_gpreg_l,), fname="rb") +rbl_deref = bs(l=3, cls=(arm_derefl,), fname="rb") +dumrh = bs(l=3, default_val="000") + +rdh = bs(l=3, cls=(arm_gpreg_h,), fname="rd") +rsh = bs(l=3, cls=(arm_gpreg_h,), fname="rs") + +offpc8 = bs(l=8, cls=(arm_offpc,), fname="offs") +offsp8 = bs(l=8, cls=(arm_offsp,), fname="offs") +rol_noarg = bs(l=3, cls=(arm_gpreg_l_noarg,), fname="off") + +off5bw = bs(l=5, cls=(arm_offbw,), fname="off") +off5h = bs(l=5, cls=(arm_offh,), fname="off") +sppc = bs(l=1, cls=(arm_sppc,)) + +off12 = bs(l=12, cls=(arm_off,), fname="off", order=-1) +rn_deref = bs(l=4, cls=(arm_deref_reg_imm,), fname="rt") + + + +pclr = bs(l=1, fname='pclr', order=-2) + + +pc_in = bs(l=1, fname='pc_in', order=-2) +lr_in = bs(l=1, fname='lr_in', order=-2) + + +sp = bs(l=0, cls=(arm_sp,)) + + +off8s = bs(l=8, cls=(arm_offs,), fname="offs") +trlistpclr = bs(l=8, cls=(armt_rlist_pclr,)) +trlist = bs(l=8, cls=(armt_rlist,), fname="trlist", order = -1) +trlist13 = bs(l=13, cls=(armt_rlist13,), fname="trlist", order = -1) +trlist13pclr = bs(l=13, cls=(armt_rlist13_pc_lr,), fname="trlist", order = -1) + + +rbl_wb = bs(l=3, cls=(armt_reg_wb,), fname='rb') + +offs8 = bs(l=8, cls=(arm_offspc,), fname="offs") +offs11 = bs(l=11, cls=(arm_offspc,), fname="offs") + +hl = bs(l=1, prio=default_prio + 1, fname='hl') +off8sppc = bs(l=8, cls=(arm_off8sppc,), fname="off") + +imm8_d1 = bs(l=8, default_val="00000001") +imm8 = bs(l=8, cls=(arm_imm,), default_val = "00000001") + + +mshift_name = {'LSLS': 0, 'LSRS': 1, 'ASRS': 2} +bs_mshift_name = bs_name(l=2, name=mshift_name) + + +addsub_name = {'ADDS': 0, 'SUBS': 1} +bs_addsub_name = bs_name(l=1, name=addsub_name) + +mov_cmp_add_sub_name = {'MOVS': 0, 'CMP': 1, 'ADDS': 2, 'SUBS': 3} +bs_mov_cmp_add_sub_name = bs_name(l=2, name=mov_cmp_add_sub_name) + +alu_name = {'ANDS': 0, 'EORS': 1, 'LSLS': 2, 'LSRS': 3, + 'ASRS': 4, 'ADCS': 5, 'SBCS': 6, 'RORS': 7, + 'TST': 8, 'NEGS': 9, 'CMP': 10, 'CMN': 11, + 'ORRS': 12, 'MULS': 13, 'BICS': 14, 'MVNS': 15} +bs_alu_name = bs_name(l=4, name=alu_name) + +hiregop_name = {'ADDS': 0, 'CMP': 1, 'MOV': 2} +bs_hiregop_name = bs_name(l=2, name=hiregop_name) + +ldr_str_name = {'STR': 0, 'LDR': 1} +bs_ldr_str_name = bs_name(l=1, name=ldr_str_name) + +ldrh_strh_name = {'STRH': 0, 'LDRH': 1} +bs_ldrh_strh_name = bs_name(l=1, name=ldrh_strh_name) + +ldstsp_name = {'STR': 0, 'LDR': 1} +bs_ldstsp_name = bs_name(l=1, name=ldstsp_name) + +addsubsp_name = {'ADD': 0, 'SUB': 1} +bs_addsubsp_name = bs_name(l=1, name=addsubsp_name) + +pushpop_name = {'PUSH': 0, 'POP': 1} +bs_pushpop_name = bs_name(l=1, name=pushpop_name, fname='pp') + +tbtransfer_name = {'STMIA': 0, 'LDMIA': 1} +bs_tbtransfer_name = bs_name(l=1, name=tbtransfer_name) + +br_name = {'BEQ': 0, 'BNE': 1, 'BCS': 2, 'BCC': 3, 'BMI': 4, + 'BPL': 5, 'BVS': 6, 'BVC': 7, 'BHI': 8, 'BLS': 9, + 'BGE': 10, 'BLT': 11, 'BGT': 12, 'BLE': 13} +bs_br_name = bs_name(l=4, name=br_name) + + +armtop("mshift", [bs('000'), bs_mshift_name, off5, rsl, rdl], [rdl, rsl, off5]) +armtop("addsubr", [bs('000110'), bs_addsub_name, rnl, rsl, rdl], [rdl, rsl, rnl]) +armtop("addsubi", [bs('000111'), bs_addsub_name, off3, rsl, rdl], [rdl, rsl, off3]) +armtop("mcas", [bs('001'), bs_mov_cmp_add_sub_name, rnl, off8]) +armtop("alu", [bs('010000'), bs_alu_name, rsl, rdl], [rdl, rsl]) + # should not be used ?? +armtop("hiregop00", [bs('010001'), bs_hiregop_name, bs('00'), rsl, rdl], [rdl, rsl]) +armtop("hiregop01", [bs('010001'), bs_hiregop_name, bs('01'), rsh, rdl], [rdl, rsh]) +armtop("hiregop10", [bs('010001'), bs_hiregop_name, bs('10'), rsl, rdh], [rdh, rsl]) +armtop("hiregop11", [bs('010001'), bs_hiregop_name, bs('11'), rsh, rdh], [rdh, rsh]) +armtop("bx", [bs('010001'), bs('11'), bs('00'), rsl, dumrh]) +armtop("bx", [bs('010001'), bs('11'), bs('01'), rsh, dumrh]) +armtop("ldr", [bs('01001'), rdl, offpc8]) +armtop("ldrstr", [bs('0101'), bs_ldr_str_name, trb, bs('0'), rol_noarg, rbl_deref, rdl], [rdl, rbl_deref]) +armtop("strh", [bs('0101'), bs('00'), bs('1'), rol_noarg, rbl_deref, rdl], [rdl, rbl_deref]) +armtop("ldrh", [bs('0101'), bs('10'), bs('1'), rol_noarg, rbl_deref, rdl], [rdl, rbl_deref]) +armtop("ldsb", [bs('0101'), bs('01'), bs('1'), rol_noarg, rbl_deref, rdl], [rdl, rbl_deref]) +armtop("ldsh", [bs('0101'), bs('11'), bs('1'), rol_noarg, rbl_deref, rdl], [rdl, rbl_deref]) +armtop("ldst", [bs('011'), trb, bs_ldr_str_name, off5bw, rbl_deref, rdl], [rdl, rbl_deref]) +armtop("ldhsth", [bs('1000'), bs_ldrh_strh_name, off5h, rbl_deref, rdl], [rdl, rbl_deref]) +armtop("ldstsp", [bs('1001'), bs_ldstsp_name, rdl, offsp8], [rdl, offsp8]) +armtop("add", [bs('1010'), sppc, rdl, off8sppc], [rdl, sppc, off8sppc]) +armtop("addsp", [bs('10110000'), bs_addsubsp_name, sp, off7], [sp, off7]) +armtop("pushpop", [bs('1011'), bs_pushpop_name, bs('10'), pclr, trlistpclr], [trlistpclr]) +armtop("btransfersp", [bs('1100'), bs_tbtransfer_name, rbl_wb, trlist]) +armtop("br", [bs('1101'), bs_br_name, offs8]) +armtop("blx", [bs("01000111"), bs('1'), rm, bs('000')]) +armtop("svc", [bs('11011111'), imm8]) +armtop("b", [bs('11100'), offs11]) +armtop("und", [bs('1101'), bs('1110'), imm8_d1]) + +armtop("rev", [bs('10111010'), bs('00'), rsl, rdl], [rdl, rsl]) +armtop("rev16", [bs('10111010'), bs('01'), rsl, rdl], [rdl, rsl]) + +armtop("uxtb", [bs('10110010'), bs('11'), rml, rdl], [rdl, rml]) +armtop("uxth", [bs('10110010'), bs('10'), rml, rdl], [rdl, rml]) +armtop("sxtb", [bs('10110010'), bs('01'), rml, rdl], [rdl, rml]) +armtop("sxth", [bs('10110010'), bs('00'), rml, rdl], [rdl, rml]) + +armtop("uxtab", [bs('111110100'), bs('101'), rn_nopc, bs('1111'), rd, bs('10'), rot2, rm_rot2], [rd, rn_nopc, rm_rot2]) +armtop("uxtah", [bs('111110100'), bs('001'), rn_nopc, bs('1111'), rd, bs('10'), rot2, rm_rot2], [rd, rn_nopc, rm_rot2]) + +# thumb2 ###################### +# +# ARM Architecture Reference Manual Thumb-2 Supplement + +armt_gpreg_shift_off = (gpregs_nosppc.parser + allshifts_t_armt + (gpregs.parser | int_1_31)).setParseAction(cb_shift) + + +armt_gpreg_shift_off |= gpregs_nosppc.parser + + +class arm_gpreg_nosppc(arm_reg): + reg_info = gpregs_nosppc + parser = reg_info.parser + + def decode(self, v): + ret = super(arm_gpreg_nosppc, self).decode(v) + if ret is False: + return False + if self.expr == reg_dum: + return False + return True + + +class armt_gpreg_rm_shift_off(arm_reg): + parser = armt_gpreg_shift_off + + def decode(self, v): + v = v & self.lmask + if v >= len(gpregs_nosppc.expr): + return False + r = gpregs_nosppc.expr[v] + if r == reg_dum: + return False + + i = int(self.parent.imm5_3.value) << 2 + i |= int(self.parent.imm5_2.value) + + if self.parent.stype.value < 3 or i != 0: + shift = allshifts_armt[self.parent.stype.value] + else: + shift = allshifts_armt[4] + self.expr = ExprOp(shift, r, ExprInt(i, 32)) + return True + + def encode(self): + e = self.expr + if isinstance(e, ExprId): + if e not in gpregs_nosppc.expr: + return False + self.value = gpregs_nosppc.expr.index(e) + self.parent.stype.value = 0 + self.parent.imm5_3.value = 0 + self.parent.imm5_2.value = 0 + return True + if not e.is_op(): + return False + shift = e.op + r = gpregs_nosppc.expr.index(e.args[0]) + self.value = r + i = int(e.args[1]) + if shift == 'rrx': + if i != 1: + log.debug('rrx shift must be 1') + return False + self.parent.imm5_3.value = 0 + self.parent.imm5_2.value = 0 + self.parent.stype.value = 3 + return True + self.parent.stype.value = allshifts_armt.index(shift) + self.parent.imm5_2.value = i & 3 + self.parent.imm5_3.value = i >> 2 + return True + +rn_nosppc = bs(l=4, cls=(arm_gpreg_nosppc,), fname="rn") +rd_nosppc = bs(l=4, cls=(arm_gpreg_nosppc,), fname="rd") +rm_sh = bs(l=4, cls=(armt_gpreg_rm_shift_off,), fname="rm") + + +class armt2_imm12(arm_imm): + + def decode(self, v): + v = v & self.lmask + v |= int(self.parent.imm12_3.value) << 8 + v |= int(self.parent.imm12_1.value) << 11 + + # simple encoding + if 0 <= v < 0x100: + self.expr = ExprInt(v, 32) + return True + # 00XY00XY form + if v >> 8 == 1: + v &= 0xFF + self.expr = ExprInt((v << 16) | v, 32) + return True + # XY00XY00 form + if v >> 8 == 2: + v &= 0xFF + self.expr = ExprInt((v << 24) | (v << 8), 32) + return True + # XYXYXYXY + if v >> 8 == 3: + v &= 0xFF + self.expr = ExprInt((v << 24) | (v << 16) | (v << 8) | v, 32) + return True + r = v >> 7 + v = 0x80 | (v & 0x7F) + self.expr = ExprInt(myror32(v, r), 32) + return True + + def encode(self): + if not self.expr.is_int(): + return False + v = int(self.expr) + value = None + # simple encoding + if 0 <= v < 0x100: + value = v + elif v & 0xFF00FF00 == 0 and v & 0xFF == (v >> 16) & 0xff: + # 00XY00XY form + value = (1 << 8) | (v & 0xFF) + elif v & 0x00FF00FF == 0 and (v >> 8) & 0xff == (v >> 24) & 0xff: + # XY00XY00 form + value = (2 << 8) | ((v >> 8) & 0xff) + elif (v & 0xFF == + (v >> 8) & 0xFF == + (v >> 16) & 0xFF == + (v >> 24) & 0xFF): + # XYXYXYXY form + value = (3 << 8) | ((v >> 16) & 0xff) + else: + # rol encoding + for i in range(32): + o = myrol32(v, i) + if 0x80 <= o <= 0xFF: + value = (i << 7) | (o & 0x7F) + break + if value is None: + log.debug('cannot encode imm12') + return False + self.value = value & self.lmask + self.parent.imm12_3.value = (value >> 8) & self.parent.imm12_3.lmask + self.parent.imm12_1.value = (value >> 11) & self.parent.imm12_1.lmask + return True + + + + +class armt4_imm12(arm_imm): + + def decode(self, v): + v = v & self.lmask + v |= int(self.parent.imm12_3.value) << 8 + v |= int(self.parent.imm12_1.value) << 11 + self.expr = ExprInt(v, 32) + return True + + def encode(self): + if not self.expr.is_int(): + return False + value = int(self.expr) + if value >= (1 << 16): + return False + self.value = value & self.lmask + self.parent.imm12_3.value = (value >> 8) & self.parent.imm12_3.lmask + self.parent.imm12_1.value = (value >> 11) & self.parent.imm12_1.lmask + return True + + + +class armt2_imm16(arm_imm): + + def decode(self, v): + v = v & self.lmask + v |= int(self.parent.imm16_3.value) << 8 + v |= int(self.parent.imm16_1.value) << 11 + v |= int(self.parent.imm16_4.value) << 12 + self.expr = ExprInt(v, 32) + return True + + def encode(self): + if not self.expr.is_int(): + return False + value = int(self.expr) + if value >= (1 << 16): + return False + self.value = value & self.lmask + self.parent.imm16_3.value = (value >> 8) & self.parent.imm16_3.lmask + self.parent.imm16_1.value = (value >> 11) & self.parent.imm16_1.lmask + self.parent.imm16_4.value = (value >> 12) & self.parent.imm16_4.lmask + return True + + +class armt2_lsb5(arm_imm): + + def decode(self, v): + v = v & self.lmask + v |= int(self.parent.lsb5_3.value) << 2 + self.expr = ExprInt(v, 32) + return True + + def encode(self): + if not self.expr.is_int(): + return False + value = int(self.expr) + self.value = value & self.lmask + self.parent.lsb5_3.value = (value >> 2) & self.parent.lsb5_3.lmask + return True + + +class armt_widthm1(arm_imm): + parser = base_expr + + def decodeval(self, v): + return v + 1 + + def encodeval(self, v): + if v <= 0: + return False + return v - 1 + + + + +class armt2_off20(arm_imm): + + def decode(self, v): + v = v & self.lmask + v <<= 1 + v |= int(self.parent.off20_6.value) << 12 + v |= int(self.parent.off20_j1.value) << 18 + v |= int(self.parent.off20_j2.value) << 19 + v |= int(self.parent.off20_s.value) << 20 + self.expr = ExprInt(v, 32) + return True + + def encode(self): + if not self.expr.is_int(): + return False + value = int(self.expr) + if value & 1: + return False + self.value = (value >> 1) & self.lmask + self.parent.off20_6.value = (value >> 12) & self.parent.off20_6.lmask + self.parent.off20_j1.value = (value >> 18) & self.parent.off20_j1.lmask + self.parent.off20_j2.value = (value >> 19) & self.parent.off20_j2.lmask + self.parent.off20_s.value = (value >> 20) & self.parent.off20_s.lmask + return True + + + +class armt2_imm10l(arm_imm): + + def decode(self, v): + v = v & self.lmask + s = self.parent.sign.value + j1 = self.parent.j1.value + j2 = self.parent.j2.value + imm10h = self.parent.imm10h.value + imm10l = v + + i1, i2 = j1 ^ s ^ 1, j2 ^ s ^ 1 + + v = (s << 24) | (i1 << 23) | (i2 << 22) | (imm10h << 12) | (imm10l << 2) + v = sign_ext(v, 25, 32) + self.expr = ExprInt(v, 32) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = self.expr.arg.arg + s = 0 + if v & 0x80000000: + s = 1 + v &= (1<<26) - 1 + if v >= (1 << 26): + return False + i1, i2, imm10h, imm10l = (v >> 23) & 1, (v >> 22) & 1, (v >> 12) & 0x3ff, (v >> 2) & 0x3ff + j1, j2 = i1 ^ s ^ 1, i2 ^ s ^ 1 + self.parent.sign.value = s + self.parent.j1.value = j1 + self.parent.j2.value = j2 + self.parent.imm10h.value = imm10h + self.value = imm10l + return True + + +class armt2_imm11l(arm_imm): + + def decode(self, v): + v = v & self.lmask + s = self.parent.sign.value + j1 = self.parent.j1.value + j2 = self.parent.j2.value + imm10h = self.parent.imm10h.value + imm11l = v + + i1, i2 = j1 ^ s ^ 1, j2 ^ s ^ 1 + + v = (s << 24) | (i1 << 23) | (i2 << 22) | (imm10h << 12) | (imm11l << 1) + v = sign_ext(v, 25, 32) + self.expr = ExprInt(v + 4, 32) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = self.expr.arg.arg - 4 + s = 0 + if v & 0x80000000: + s = 1 + v &= (1<<26) - 1 + if v >= (1 << 26): + return False + if v & 1: + return False + i1, i2, imm10h, imm11l = (v >> 23) & 1, (v >> 22) & 1, (v >> 12) & 0x3ff, (v >> 1) & 0x7ff + j1, j2 = i1 ^ s ^ 1, i2 ^ s ^ 1 + self.parent.sign.value = s + self.parent.j1.value = j1 + self.parent.j2.value = j2 + self.parent.imm10h.value = imm10h + self.value = imm11l + return True + + + +class armt2_imm6_11l(arm_imm): + + def decode(self, v): + v = v & self.lmask + s = self.parent.sign.value + j1 = self.parent.j1.value + j2 = self.parent.j2.value + imm6h = self.parent.imm6h.value + imm11l = v + + v = (s << 20) | (j2 << 19) | (j1 << 18) | (imm6h << 12) | (imm11l << 1) + v = sign_ext(v, 21, 32) + self.expr = ExprInt(v + 4, 32) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = self.expr.arg.arg - 4 + s = 0 + if v != sign_ext(v & ((1 << 22) - 1), 21, 32): + return False + if v & 0x80000000: + s = 1 + v &= (1<<22) - 1 + if v & 1: + return False + i2, i1, imm6h, imm11l = (v >> 19) & 1, (v >> 18) & 1, (v >> 12) & 0x3f, (v >> 1) & 0x7ff + self.parent.sign.value = s + self.parent.j1.value = i1 + self.parent.j2.value = i2 + self.parent.imm6h.value = imm6h + self.value = imm11l + return True + + + +imm12_1 = bs(l=1, fname="imm12_1", order=1) +imm12_3 = bs(l=3, fname="imm12_3", order=1) +imm12_8 = bs(l=8, cls=(armt2_imm12,), fname="imm", order=2) + + +imm12_8_t4 = bs(l=8, cls=(armt4_imm12,), fname="imm", order=2) + + +imm16_1 = bs(l=1, fname="imm16_1", order=1) +imm16_3 = bs(l=3, fname="imm16_3", order=1) +imm16_4 = bs(l=4, fname="imm16_4", order=1) +imm16_8 = bs(l=8, cls=(armt2_imm16,), fname="imm", order=2) + + +imm5_3 = bs(l=3, fname="imm5_3") +imm5_2 = bs(l=2, fname="imm5_2") +imm_stype = bs(l=2, fname="stype") + +imm_stype_00 = bs('00', fname="stype") +imm_stype_01 = bs('01', fname="stype") +imm_stype_11 = bs('11', fname="stype") + + +imm1 = bs(l=1, fname="imm1") + + + +off20_6 = bs(l=6, fname="off20_6", order=1) +off20_11 = bs(l=11, cls=(armt2_off20,), fname="imm", order=2) + + + +lsb5_3 = bs(l=3, fname="lsb5_3", order=1) +lsb5_2 = bs(l=2, cls=(armt2_lsb5,), fname="imm", order=2) + +widthm1 = bs(l=5, cls=(armt_widthm1,), fname="imm", order=2) + + + +class armt_imm5_1(arm_imm): + + def decode(self, v): + v = ((self.parent.imm1.value << 5) | v) << 1 + self.expr = ExprInt(v, 32) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = self.expr.arg.arg + if v & 0x1: + return False + self.parent.imm1.value = (v >> 6) & 1 + self.value = (v >> 1) & 0x1f + return True + +aif_str = ["X", "F", "I", "IF", "A", "AF", "AI", "AIF"] +aif_expr = [ExprId(x, 32) if x != None else None for x in aif_str] + +aif_reg = reg_info(aif_str, aif_expr) + +class armt_aif(reg_noarg, arm_arg): + reg_info = aif_reg + parser = reg_info.parser + + def decode(self, v): + if v == 0: + return False + return super(armt_aif, self).decode(v) + + def encode(self): + ret = super(armt_aif, self).encode() + if not ret: + return ret + return self.value != 0 + + def fromstring(self, text, loc_db, parser_result=None): + start, stop = super(armt_aif, self).fromstring(text, loc_db, parser_result) + if self.expr.name == "X": + return None, None + return start, stop + + +class armt_it_arg(arm_arg): + arg_E = ExprId('E', 1) + arg_NE = ExprId('NE', 1) + + def decode(self, v): + if v: + return self.arg_E + else: + return self.arg_NE + + def encode(self): + if self.expr == self.arg_E: + return 1 + elif self.expr == self.arg_NE: + return 0 + +class armt_itmask(bs_divert): + prio = 2 + + def divert(self, i, candidates): + out = [] + for cls, _, bases, dct, fields in candidates: + for value in range(1, 0x10): + nfields = fields[:] + s = int2bin(value, self.args['l']) + args = dict(self.args) + args.update({'strbits': s}) + f = bs(**args) + nfields[i] = f + inv = nfields[-2].value + ndct = dict(dct) + ndct['name'] = self.modname(ndct['name'], value, inv) + out.append((cls, ndct['name'], bases, ndct, nfields)) + return out + + def modname(self, name, value, inv): + count = 0 + while value & (1 << count) == 0: + count += 1 + out = [] + values = ['E', 'T'] + if inv== 1: + values.reverse() + for index in range(3 - count): + if value & (1 << (3 - index)): + out.append(values[0]) + else: + out.append(values[1]) + return name + "".join(out) + + + +class armt_cond_lsb(bs_divert): + prio = 2 + + def divert(self, i, candidates): + out = [] + for cls, _, bases, dct, fields in candidates: + for value in range(2): + nfields = fields[:] + s = int2bin(value, self.args['l']) + args = dict(self.args) + args.update({'strbits': s}) + f = bs(**args) + nfields[i] = f + ndct = dict(dct) + out.append((cls, ndct['name'], bases, ndct, nfields)) + return out + + +cond_expr = [ExprId(x, 32) for x in cond_list_full] +cond_info = reg_info(cond_list_full, cond_expr) + +class armt_cond_arg(arm_arg): + parser = cond_info.parser + + def decode(self, v): + v = (v << 1) | self.parent.condlsb.value + self.expr = ExprId(cond_list_full[v], 32) + return True + + def encode(self): + index = cond_list_full.index(self.expr.name) + self.value = index >> 1 + if index & 1 != self.parent.condlsb.value: + return False + return True + + +class armt_op2imm(arm_imm8_12): + parser = deref + + def str_to_imm_rot_form(self, s, neg=False): + if neg: + s = -s & 0xffffffff + if 0 <= s < (1 << 12): + return s + return None + + def decodeval(self, v): + return v + + def encodeval(self, v): + return v + + def decode(self, v): + val = v & self.lmask + val = self.decodeval(val) + if val is False: + return False + imm = val + if self.parent.updown.value == 0: + imm = -imm + if self.parent.ppi.value == 0 and self.parent.wback.value == 0: + return False + if self.parent.ppi.value: + e = ExprOp('preinc', self.parent.rn.expr, ExprInt(imm, 32)) + if self.parent.wback.value == 1: + e = ExprOp('wback', e) + else: + e = ExprOp('postinc', self.parent.rn.expr, ExprInt(imm, 32)) + self.expr = ExprMem(e, 32) + return True + + def encode(self): + self.parent.updown.value = 1 + self.parent.wback.value = 0 + + e = self.expr + assert(isinstance(e, ExprMem)) + e = e.ptr + if e.op == 'wback': + self.parent.wback.value = 1 + e = e.args[0] + if e.op == "postinc": + self.parent.ppi.value = 0 + self.parent.wback.value = 1 + elif e.op == "preinc": + self.parent.ppi.value = 1 + else: + # XXX default + self.parent.ppi.value = 1 + + self.parent.rn.expr = e.args[0] + + if len(e.args) == 1: + self.value = 0 + return True + # pure imm + if isinstance(e.args[1], ExprInt): + val = self.str_to_imm_rot_form(int(e.args[1])) + if val is None: + val = self.str_to_imm_rot_form(int(e.args[1]), True) + if val is None: + log.debug('cannot encode inm') + return False + self.parent.updown.value = 0 + val = self.encodeval(val) + if val is False: + return False + self.value = val + return True + # pure reg + if isinstance(e.args[1], ExprId): + rm = gpregs.expr.index(e.args[1]) + shift_kind = 0 + shift_type = 0 + amount = 0 + val = (((((amount << 2) | shift_type) << 1) | shift_kind) << 4) | rm + val = self.encodeval(val) + if val is False: + return False + self.value = val + return True + return False + + +class armt_op2imm00(armt_op2imm): + + def decodeval(self, v): + return v << 2 + + def encodeval(self, v): + if v & 3: + return False + return v >> 2 + + +class armt_deref_reg(arm_imm8_12): + parser = deref + + def decode(self, v): + base = self.parent.rn.expr + off = gpregs.expr[v] + if self.parent.imm.value != 0: + off = off << ExprInt(self.parent.imm.value, 32) + e = ExprMem(ExprOp('preinc', base, off), 8) + self.expr = e + return True + + def encode(self): + if not isinstance(self.expr, ExprMem): + return False + ptr = self.expr.ptr + if not ptr.is_op('preinc'): + return False + if len(ptr.args) != 2: + return False + base, off = ptr.args + if base.is_id() and off.is_id(): + self.parent.rn.expr = base + self.parent.imm.value = 0 + self.value = gpregs.expr.index(off) + elif off.is_int(): + return False + elif off.is_op('<<'): + if len(off.args) != 2: + return False + reg, off = off.args + self.parent.rn.expr = base + self.parent.imm.value = 0 + self.value = gpregs.expr.index(reg) + off = int(off) + if off > self.parent.imm.lmask: + return False + self.parent.imm.value = off + return True + + +class armt_deref_reg_reg(arm_arg): + parser = deref_reg_reg + reg_info = gpregs + + def decode(self, v): + expr = self.reg_info.expr[v] + expr = ExprMem(self.parent.rn.expr + expr, 8) + self.expr = expr + return True + + def encode(self): + expr = self.expr + if not expr.is_mem(): + return False + ptr = expr.ptr + if not ptr.is_op('+') or len(ptr.args) != 2: + return False + reg1, reg2 = ptr.args + self.parent.rn.expr = reg1 + self.value = self.reg_info.expr.index(reg2) + return True + + +class armt_deref_reg_reg_lsl_1(arm_reg): + parser = deref_reg_reg_lsl_1 + reg_info = gpregs + + def decode(self, v): + expr = self.reg_info.expr[v] + expr = ExprMem(self.parent.rn.expr + (expr << ExprInt(1, 32)), 16) + self.expr = expr + return True + + def encode(self): + expr = self.expr + if not expr.is_mem(): + return False + ptr = expr.ptr + if not ptr.is_op('+') or len(ptr.args) != 2: + return False + reg1, reg_shift = ptr.args + self.parent.rn.expr = reg1 + if not reg_shift.is_op('<<') or len(reg_shift.args) != 2: + return False + if reg_shift.args[1] != ExprInt(1, 32): + return False + self.value = self.reg_info.expr.index(reg_shift.args[0]) + return True + + +aif = bs(l=3, cls=(armt_aif,)) + + +imm5_off = bs(l=5, cls=(armt_imm5_1,), fname="imm5_off") + +tsign = bs(l=1, fname="sign") +tj1 = bs(l=1, fname="j1") +tj2 = bs(l=1, fname="j2") + +timm6h = bs(l=6, fname="imm6h") +timm10H = bs(l=10, fname="imm10h") +timm10L = bs(l=10, cls=(armt2_imm10l,), fname="imm10l") +timm11L = bs(l=11, cls=(armt2_imm11l,), fname="imm11l") + +timm6h11l = bs(l=11, cls=(armt2_imm6_11l,), fname="imm6h11l") + +itcond = bs(l=4, fname="itcond") +itmask = armt_itmask(l=4, fname="itmask") +bs_cond_arg_msb = bs(l=3, cls=(armt_cond_arg,)) + + +condlsb = armt_cond_lsb(l=1, fname="condlsb") + +deref_immpuw = bs(l=8, cls=(armt_op2imm,)) +deref_immpuw00 = bs(l=8, cls=(armt_op2imm00,)) + + +rm_deref_reg = bs(l=4, cls=(armt_deref_reg,)) + +bs_deref_reg_reg = bs(l=4, cls=(armt_deref_reg_reg,)) +bs_deref_reg_reg_lsl_1 = bs(l=4, cls=(armt_deref_reg_reg_lsl_1,)) + + +class armt_barrier_option(reg_noarg, arm_arg): + reg_info = barrier_info + parser = reg_info.parser + + def decode(self, v): + v = v & self.lmask + if v not in self.reg_info.dct_expr: + return False + self.expr = self.reg_info.dct_expr[v] + return True + + def encode(self): + if not self.expr in self.reg_info.dct_expr_inv: + log.debug("cannot encode reg %r", self.expr) + return False + self.value = self.reg_info.dct_expr_inv[self.expr] + if self.value > self.lmask: + log.debug("cannot encode field value %x %x", + self.value, self.lmask) + return False + return True + + def check_fbits(self, v): + return v & self.fmask == self.fbits + +barrier_option = bs(l=4, cls=(armt_barrier_option,)) + +armtop("adc", [bs('11110'), imm12_1, bs('0'), bs('1010'), scc, rn_nosppc, bs('0'), imm12_3, rd_nosppc, imm12_8]) +armtop("adc", [bs('11101'), bs('01'), bs('1010'), scc, rn_nosppc, bs('0'), imm5_3, rd_nosppc, imm5_2, imm_stype, rm_sh]) +armtop("bl", [bs('11110'), tsign, timm10H, bs('11'), tj1, bs('1'), tj2, timm11L]) +armtop("blx", [bs('11110'), tsign, timm10H, bs('11'), tj1, bs('0'), tj2, timm10L, bs('0')]) +armtop("cbz", [bs('101100'), imm1, bs('1'), imm5_off, rnl], [rnl, imm5_off]) +armtop("cbnz", [bs('101110'), imm1, bs('1'), imm5_off, rnl], [rnl, imm5_off]) + +armtop("bkpt", [bs('1011'), bs('1110'), imm8]) + + +armtop("it", [bs('10111111'), bs_cond_arg_msb, condlsb, itmask]) + + +armtop("nop", [bs8(0xBF),bs8(0x0)]) +armtop("wfi", [bs8(0xBF),bs8(0x30)]) +armtop("cpsid", [bs8(0xB6),bs('0111'), bs('0'), aif], [aif]) +armtop("cpsie", [bs8(0xB6),bs('0110'), bs('0'), aif], [aif]) + +armtop("push", [bs('1110100'), bs('10'), bs('0'), bs('1'), bs('0'), bs('1101'), bs('0'), pclr, bs('0'), trlist13], [trlist13]) +armtop("pop", [bs('1110100'), bs('01'), bs('0'), bs('1'), bs('1'), bs('1101'), pc_in, lr_in, bs('0'), trlist13pclr], [trlist13pclr]) +armtop("mov", [bs('11110'), imm12_1, bs('00010'), scc, bs('1111'), bs('0'), imm12_3, rd_nosppc, imm12_8]) +armtop("asr", [bs('11111010'), bs('0100'), rm, bs('1111'), rd, bs('0000'), rs], [rd, rm, rs]) +armtop("lsl", [bs('11111010'), bs('0000'), rm, bs('1111'), rd, bs('0000'), rs], [rd, rm, rs]) +armtop("sel", [bs('11111010'), bs('1010'), rm, bs('1111'), rd, bs('1000'), rs], [rd, rm, rs]) +armtop("rev", [bs('11111010'), bs('1001'), rm, bs('1111'), rd, bs('1000'), rm_cp], [rd, rm]) +armtop("uadd8", [bs('111110101000'), rn, bs('1111'), rd, bs('0100'), rm], [rd, rn, rm]) +armtop("mvn", [bs('11101010011'), scc, bs('11110'), imm5_3, rd_nosppc, imm5_2, imm_stype, rm_sh] ) +armtop("and", [bs('11101010000'), scc, rn_nosppc, bs('0'), imm5_3, rd_nosppc, imm5_2, imm_stype, rm_sh], [rd_nosppc, rn_nosppc, rm_sh] ) +armtop("orr", [bs('11101010010'), scc, rn_nosppc, bs('0'), imm5_3, rd_nosppc, imm5_2, imm_stype, rm_sh], [rd_nosppc, rn_nosppc, rm_sh] ) +armtop("bic", [bs('11101010001'), scc, rn_nosppc, bs('0'), imm5_3, rd_nosppc, imm5_2, imm_stype, rm_sh], [rd_nosppc, rn_nosppc, rm_sh] ) +armtop("add", [bs('11101011000'), scc, rn_nosppc, bs('0'), imm5_3, rd_nosppc, imm5_2, imm_stype, rm_sh], [rd_nosppc, rn_nosppc, rm_sh] ) +armtop("sub", [bs('11101011101'), scc, rn_nosppc, bs('0'), imm5_3, rd_nosppc, imm5_2, imm_stype, rm_sh], [rd_nosppc, rn_nosppc, rm_sh] ) +armtop("eor", [bs('11101010100'), scc, rn_nosppc, bs('0'), imm5_3, rd_nosppc, imm5_2, imm_stype, rm_sh], [rd_nosppc, rn_nosppc, rm_sh] ) +armtop("rsb", [bs('11101011110'), scc, rn, bs('0'), imm5_3, rd, imm5_2, imm_stype, rm_sh], [rd, rn, rm_sh] ) +armtop("orn", [bs('11101010011'), scc, rn_nopc, bs('0'), imm5_3, rd, imm5_2, imm_stype, rm_sh], [rd, rn_nopc, rm_sh] ) +# lsl +armtop("mov", [bs('11101010010'), scc, bs('1111'), bs('0'), imm5_3, rd_nosppc, imm5_2, imm_stype_00, rm_sh], [rd_nosppc, rm_sh] ) +armtop("mov", [bs('11101010010'), scc, bs('1111'), bs('0'), imm5_3, rd_nosppc, imm5_2, imm_stype_01, rm_sh], [rd_nosppc, rm_sh] ) +armtop("mov", [bs('11101010010'), scc, bs('1111'), bs('0'), imm5_3, rd_nosppc, imm5_2, imm_stype_11, rm_sh], [rd_nosppc, rm_sh] ) + + +armtop("orr", [bs('11110'), imm12_1, bs('00010'), scc, rn_nosppc, bs('0'), imm12_3, rd, imm12_8] ) +armtop("add", [bs('11110'), imm12_1, bs('01000'), bs('0'), rn, bs('0'), imm12_3, rd_nopc, imm12_8], [rd_nopc, rn, imm12_8]) +armtop("adds",[bs('11110'), imm12_1, bs('01000'), bs('1'), rn, bs('0'), imm12_3, rd_nopc, imm12_8], [rd_nopc, rn, imm12_8]) +armtop("bic", [bs('11110'), imm12_1, bs('00001'), scc, rn_nosppc, bs('0'), imm12_3, rd, imm12_8], [rd, rn_nosppc, imm12_8]) +armtop("and", [bs('11110'), imm12_1, bs('00000'), scc, rn, bs('0'), imm12_3, rd_nopc, imm12_8], [rd_nopc, rn, imm12_8]) +armtop("sub", [bs('11110'), imm12_1, bs('01101'), scc, rn, bs('0'), imm12_3, rd_nopc, imm12_8], [rd_nopc, rn, imm12_8]) +armtop("eor", [bs('11110'), imm12_1, bs('00100'), scc, rn, bs('0'), imm12_3, rd_nopc, imm12_8], [rd_nopc, rn, imm12_8]) +armtop("add", [bs('11110'), imm12_1, bs('10000'), scc, rn_nosppc, bs('0'), imm12_3, rd, imm12_8_t4], [rd, rn_nosppc, imm12_8_t4]) +armtop("cmp", [bs('11110'), imm12_1, bs('01101'), bs('1'), rn, bs('0'), imm12_3, bs('1111'), imm12_8] ) + +armtop("cmp", [bs('11101011101'), bs('1'), rn, bs('0'), imm5_3, bs('1111'), imm5_2, imm_stype, rm_sh], [rn, rm_sh] ) + +armtop("cmn", [bs('11110'), imm12_1, bs('01000'), bs('1'), rn, bs('0'), imm12_3, bs('1111'), imm12_8], [rn, imm12_8]) + + +armtop("mvn", [bs('11110'), imm12_1, bs('00011'), scc, bs('1111'), bs('0'), imm12_3, rd, imm12_8]) +armtop("rsb", [bs('11110'), imm12_1, bs('01110'), scc, rn_nosppc, bs('0'), imm12_3, rd, imm12_8], [rd, rn_nosppc, imm12_8]) +armtop("sub", [bs('11110'), imm12_1, bs('101010'), rn_nosppc, bs('0'), imm12_3, rd, imm12_8_t4], [rd, rn_nosppc, imm12_8_t4]) +armtop("tst", [bs('11110'), imm12_1, bs('000001'), rn, bs('0'), imm12_3, bs('1111'), imm12_8], [rn, imm12_8]) + +armtop("mov", [bs('11110'), imm16_1, bs('100100'), imm16_4, bs('0'), imm16_3, rd, imm16_8] ) +armtop("movt", [bs('11110'), imm16_1, bs('101100'), imm16_4, bs('0'), imm16_3, rd, imm16_8] ) + +armtop("sdiv", [bs('111110111001'), rn, bs('1111'), rd, bs('1111'), rm], [rd, rn, rm] ) +armtop("udiv", [bs('111110111011'), rn, bs('1111'), rd, bs('1111'), rm], [rd, rn, rm] ) +armtop("mls", [bs('111110110000'), rn, ra, rd, bs('0001'), rm], [rd, rn, rm, ra] ) +armtop("mla", [bs('111110110000'), rn, ra_nopc, rd, bs('0000'), rm], [rd, rn, rm, ra_nopc] ) +armtop("mul", [bs('111110110000'), rn, bs('1111'), rd, bs('0000'), rm], [rd, rn, rm] ) + +armtop("smlabb", [bs('111110110001'), rn, ra_nopc, rd, bs('00'), bs('00'), rm], [rd, rn, rm, ra_nopc]) +armtop("smlabt", [bs('111110110001'), rn, ra_nopc, rd, bs('00'), bs('01'), rm], [rd, rn, rm, ra_nopc]) +armtop("smlatb", [bs('111110110001'), rn, ra_nopc, rd, bs('00'), bs('10'), rm], [rd, rn, rm, ra_nopc]) +armtop("smlatt", [bs('111110110001'), rn, ra_nopc, rd, bs('00'), bs('11'), rm], [rd, rn, rm, ra_nopc]) + +armtop("b", [bs('11110'), tsign, bm_cond_barmt, timm6h, bs('10'), tj1, bs('0'), tj2, timm6h11l], [timm6h11l]) +armtop("b", [bs('11110'), tsign, timm10H, bs('10'), tj1, bs('1'), tj2, timm11L], [timm11L]) + +armtop("ubfx", [bs('111100111100'), rn, bs('0'), lsb5_3, rd, lsb5_2, bs('0'), widthm1], [rd, rn, lsb5_2, widthm1]) +armtop("uxth", [bs('111110100001'), bs('1111'), bs('1111'), rd, bs('10'), rot2, rm_rot2], [rd, rm_rot2]) + + + +armtop("str", [bs('111110001100'), rn_deref, rt, off12], [rt, rn_deref]) +armtop("str", [bs('111110000100'), rn_noarg, rt, bs('000000'), imm2_noarg, rm_deref_reg], [rt, rm_deref_reg]) +armtop("str", [bs('111110000100'), rn_noarg, rt, bs('1'), ppi, updown, wback_no_t, deref_immpuw], [rt, deref_immpuw]) +armtop("strb", [bs('111110001000'), rn_deref, rt, off12], [rt, rn_deref]) +armtop("strb", [bs('111110000000'), rn_noarg, rt, bs('1'), ppi, updown, wback_no_t, deref_immpuw], [rt, deref_immpuw]) +armtop("strh", [bs('111110001010'), rn_deref, rt, off12], [rt, rn_deref]) +armtop("strh", [bs('111110000010'), rn_noarg, rt, bs('1'), ppi, updown, wback_no_t, deref_immpuw], [rt, deref_immpuw]) + +armtop("strd", [bs('1110100'), ppi, updown, bs('1'), wback_no_t, bs('0'), rn_nopc_noarg, rt, rt2, deref_immpuw00], [rt, rt2, deref_immpuw00]) +armtop("ldrd", [bs('1110100'), ppi, updown, bs('1'), wback_no_t, bs('1'), rn_nopc_noarg, rt, rt2, deref_immpuw00], [rt, rt2, deref_immpuw00]) + + +armtop("ldr", [bs('111110001101'), rn_deref, rt, off12], [rt, rn_deref]) +armtop("ldr", [bs('111110000101'), rn_noarg, rt, bs('1'), ppi, updown, wback_no_t, deref_immpuw], [rt, deref_immpuw]) +armtop("ldr", [bs('111110000101'), rn_noarg, rt, bs('000000'), imm2_noarg, rm_deref_reg], [rt, rm_deref_reg]) +armtop("ldrb", [bs('111110000001'), rn_noarg, rt, bs('000000'), imm2_noarg, rm_deref_reg], [rt, rm_deref_reg]) +armtop("ldrb", [bs('111110000001'), rn_noarg, rt, bs('1'), ppi, updown, wback_no_t, deref_immpuw], [rt, deref_immpuw]) +armtop("ldrb", [bs('111110001001'), rn_deref, rt_nopc, off12], [rt_nopc, rn_deref]) +armtop("ldrsb",[bs('111110011001'), rn_deref, rt, off12], [rt, rn_deref]) +armtop("ldrsh",[bs('111110011011'), rn_deref, rt, off12], [rt, rn_deref]) +armtop("ldrh", [bs('111110001011'), rn_deref, rt, off12], [rt, rn_deref]) +armtop("ldrh", [bs('111110000011'), rn_noarg, rt, bs('1'), ppi, updown, wback_no_t, deref_immpuw], [rt, deref_immpuw]) + +armtop("pld", [bs('111110001001'), rn_deref, bs('1111'), off12], [rn_deref]) +armtop("pldw", [bs('111110001011'), rn_deref, bs('1111'), off12], [rn_deref]) + +armtop("clz", [bs('111110101011'), rm, bs('1111'), rd, bs('1000'), rm_cp], [rd, rm]) +armtop("tbb", [bs('111010001101'), rn_noarg, bs('11110000000'), bs('0'), bs_deref_reg_reg], [bs_deref_reg_reg]) +armtop("tbh", [bs('111010001101'), rn_noarg, bs('11110000000'), bs('1'), bs_deref_reg_reg_lsl_1], [bs_deref_reg_reg_lsl_1]) +armtop("dsb", [bs('111100111011'), bs('1111'), bs('1000'), bs('1111'), bs('0100'), barrier_option]) + +armtop("adr", [bs('11110'), imm12_1, bs('100000'), bs('1111'), bs('0'), imm12_3, rd, imm12_8_t4], [rd, imm12_8_t4]) diff --git a/miasm/arch/arm/disasm.py b/miasm/arch/arm/disasm.py new file mode 100644 index 00000000..4c92bf6a --- /dev/null +++ b/miasm/arch/arm/disasm.py @@ -0,0 +1,61 @@ +from future.utils import viewvalues + +from miasm.core.asmblock import AsmConstraint, disasmEngine +from miasm.arch.arm.arch import mn_arm, mn_armt + + +def cb_arm_fix_call(mn, cur_bloc, loc_db, offsets_to_dis, *args, **kwargs): + """ + for arm: + MOV LR, PC + LDR PC, [R5, 0x14] + * is a subcall * + + """ + if len(cur_bloc.lines) < 2: + return + l1 = cur_bloc.lines[-1] + l2 = cur_bloc.lines[-2] + if l1.name != "LDR": + return + if l2.name != "MOV": + return + + values = viewvalues(mn.pc) + if not l1.args[0] in values: + return + if not l2.args[1] in values: + return + loc_key_cst = loc_db.get_or_create_offset_location(l1.offset + 4) + cur_bloc.add_cst(loc_key_cst, AsmConstraint.c_next) + offsets_to_dis.add(l1.offset + 4) + +cb_arm_funcs = [cb_arm_fix_call] + + +def cb_arm_disasm(*args, **kwargs): + for func in cb_arm_funcs: + func(*args, **kwargs) + + +class dis_armb(disasmEngine): + attrib = 'b' + def __init__(self, bs=None, **kwargs): + super(dis_armb, self).__init__(mn_arm, self.attrib, bs, **kwargs) + self.dis_block_callback = cb_arm_disasm + +class dis_arml(disasmEngine): + attrib = 'l' + def __init__(self, bs=None, **kwargs): + super(dis_arml, self).__init__(mn_arm, self.attrib, bs, **kwargs) + self.dis_block_callback = cb_arm_disasm + +class dis_armtb(disasmEngine): + attrib = 'b' + def __init__(self, bs=None, **kwargs): + super(dis_armtb, self).__init__(mn_armt, self.attrib, bs, **kwargs) + +class dis_armtl(disasmEngine): + attrib = 'l' + def __init__(self, bs=None, **kwargs): + super(dis_armtl, self).__init__(mn_armt, self.attrib, bs, **kwargs) diff --git a/miasm/arch/arm/ira.py b/miasm/arch/arm/ira.py new file mode 100644 index 00000000..178e8abc --- /dev/null +++ b/miasm/arch/arm/ira.py @@ -0,0 +1,106 @@ +#-*- coding:utf-8 -*- + +from miasm.ir.analysis import ira +from miasm.ir.ir import IRBlock +from miasm.arch.arm.sem import ir_arml, ir_armtl, ir_armb, ir_armtb, tab_cond +from miasm.expression.expression import ExprAssign, ExprOp, ExprLoc, ExprCond +from miasm.ir.ir import AssignBlock + +class ir_a_arml_base(ir_arml, ira): + def __init__(self, loc_db=None): + ir_arml.__init__(self, loc_db) + self.ret_reg = self.arch.regs.R0 + +class ir_a_armb_base(ir_armb, ira): + def __init__(self, loc_db=None): + ir_armb.__init__(self, loc_db) + self.ret_reg = self.arch.regs.R0 + + +class ir_a_arml(ir_a_arml_base): + + def __init__(self, loc_db=None): + ir_a_arml_base.__init__(self, loc_db) + self.ret_reg = self.arch.regs.R0 + + def call_effects(self, ad, instr): + call_assignblk = AssignBlock( + [ + ExprAssign( + self.ret_reg, + ExprOp( + 'call_func_ret', + ad, + self.arch.regs.R0, + self.arch.regs.R1, + self.arch.regs.R2, + self.arch.regs.R3, + ) + ), + ExprAssign( + self.sp, + ExprOp('call_func_stack', ad, self.sp) + ), + ], + instr + ) + + + cond = instr.additional_info.cond + if cond == 14: # COND_ALWAYS: + return [call_assignblk], [] + + # Call is a conditional instruction + cond = tab_cond[cond] + + loc_next = self.get_next_loc_key(instr) + loc_next_expr = ExprLoc(loc_next, 32) + loc_do = self.loc_db.add_location() + loc_do_expr = ExprLoc(loc_do, 32) + dst_cond = ExprCond(cond, loc_do_expr, loc_next_expr) + + call_assignblks = [ + call_assignblk, + AssignBlock([ExprAssign(self.IRDst, loc_next_expr)], instr), + ] + e_do = IRBlock(loc_do, call_assignblks) + assignblks_out = [ + AssignBlock([ExprAssign(self.IRDst, dst_cond)], instr) + ] + return assignblks_out, [e_do] + + + def get_out_regs(self, _): + return set([self.ret_reg, self.sp]) + + def sizeof_char(self): + return 8 + + def sizeof_short(self): + return 16 + + def sizeof_int(self): + return 32 + + def sizeof_long(self): + return 32 + + def sizeof_pointer(self): + return 32 + +class ir_a_armb(ir_a_armb_base, ir_a_arml): + + def __init__(self, loc_db=None): + ir_a_armb_base.__init__(self, loc_db) + self.ret_reg = self.arch.regs.R0 + + +class ir_a_armtl(ir_armtl, ir_a_arml): + def __init__(self, loc_db=None): + ir_armtl.__init__(self, loc_db) + self.ret_reg = self.arch.regs.R0 + +class ir_a_armtb(ir_a_armtl, ir_armtb, ir_a_armb): + def __init__(self, loc_db=None): + ir_armtb.__init__(self, loc_db) + self.ret_reg = self.arch.regs.R0 diff --git a/miasm/arch/arm/jit.py b/miasm/arch/arm/jit.py new file mode 100644 index 00000000..6252862a --- /dev/null +++ b/miasm/arch/arm/jit.py @@ -0,0 +1,148 @@ +from builtins import range +import logging + +from miasm.jitter.jitload import Jitter, named_arguments +from miasm.core.locationdb import LocationDB +from miasm.core.utils import pck32, upck32 +from miasm.arch.arm.sem import ir_armb, ir_arml, ir_armtl, ir_armtb, cond_dct_inv, tab_cond +from miasm.jitter.codegen import CGen +from miasm.expression.expression import ExprId, ExprAssign, ExprCond +from miasm.ir.ir import IRBlock, AssignBlock +from miasm.ir.translators.C import TranslatorC +from miasm.expression.simplifications import expr_simp_high_to_explicit + +log = logging.getLogger('jit_arm') +hnd = logging.StreamHandler() +hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) +log.addHandler(hnd) +log.setLevel(logging.CRITICAL) + + + +class arm_CGen(CGen): + + def block2assignblks(self, block): + """ + Return the list of irblocks for a native @block + @block: AsmBlock + """ + irblocks_list = [] + index = -1 + while index + 1 < len(block.lines): + index += 1 + instr = block.lines[index] + + if instr.name.startswith("IT"): + assignments = [] + label = self.ir_arch.get_instr_label(instr) + irblocks = [] + index, irblocks = self.ir_arch.do_it_block(label, index, block, assignments, True) + irblocks_list += irblocks + continue + + + assignblk_head, assignblks_extra = self.ir_arch.instr2ir(instr) + # Keep result in ordered list as first element is the assignblk head + # The remainings order is not really important + irblock_head = self.assignblk_to_irbloc(instr, assignblk_head) + irblocks = [irblock_head] + assignblks_extra + + + # Simplify high level operators + out = [] + for irblock in irblocks: + new_irblock = irblock.simplify(expr_simp_high_to_explicit)[1] + out.append(new_irblock) + irblocks = out + + + for irblock in irblocks: + assert irblock.dst is not None + irblocks_list.append(irblocks) + return irblocks_list + + +class jitter_arml(Jitter): + C_Gen = arm_CGen + + def __init__(self, *args, **kwargs): + sp = LocationDB() + Jitter.__init__(self, ir_arml(sp), *args, **kwargs) + self.vm.set_little_endian() + + def push_uint32_t(self, value): + self.cpu.SP -= 4 + self.vm.set_mem(self.cpu.SP, pck32(value)) + + def pop_uint32_t(self): + value = self.vm.get_u32(self.cpu.SP) + self.cpu.SP += 4 + return value + + def get_stack_arg(self, index): + return self.vm.get_u32(self.cpu.SP + 4 * index) + + # calling conventions + + @named_arguments + def func_args_stdcall(self, n_args): + args = [self.get_arg_n_stdcall(i) for i in range(n_args)] + ret_ad = self.cpu.LR + return ret_ad, args + + def func_ret_stdcall(self, ret_addr, ret_value1=None, ret_value2=None): + self.pc = self.cpu.PC = ret_addr + if ret_value1 is not None: + self.cpu.R0 = ret_value1 + if ret_value2 is not None: + self.cpu.R1 = ret_value2 + return True + + def func_prepare_stdcall(self, ret_addr, *args): + for index in range(min(len(args), 4)): + setattr(self.cpu, 'R%d' % index, args[index]) + for index in reversed(range(4, len(args))): + self.push_uint32_t(args[index]) + self.cpu.LR = ret_addr + + def get_arg_n_stdcall(self, index): + if index < 4: + arg = getattr(self.cpu, 'R%d' % index) + else: + arg = self.get_stack_arg(index-4) + return arg + + func_args_systemv = func_args_stdcall + func_ret_systemv = func_ret_stdcall + func_prepare_systemv = func_prepare_stdcall + get_arg_n_systemv = get_arg_n_stdcall + + def syscall_args_systemv(self, n_args): + args = [self.cpu.R0, self.cpu.R1, self.cpu.R2, self.cpu.R3, + self.cpu.R4, self.cpu.R5][:n_args] + return args + + def syscall_ret_systemv(self, value): + self.cpu.R0 = value + + def init_run(self, *args, **kwargs): + Jitter.init_run(self, *args, **kwargs) + self.cpu.PC = self.pc + + +class jitter_armb(jitter_arml): + C_Gen = arm_CGen + + def __init__(self, *args, **kwargs): + sp = LocationDB() + Jitter.__init__(self, ir_armb(sp), *args, **kwargs) + self.vm.set_big_endian() + + +class jitter_armtl(jitter_arml): + C_Gen = arm_CGen + + def __init__(self, *args, **kwargs): + sp = LocationDB() + Jitter.__init__(self, ir_armtl(sp), *args, **kwargs) + self.vm.set_little_endian() diff --git a/miasm/arch/arm/regs.py b/miasm/arch/arm/regs.py new file mode 100644 index 00000000..63caada3 --- /dev/null +++ b/miasm/arch/arm/regs.py @@ -0,0 +1,114 @@ +#-*- coding:utf-8 -*- + +from builtins import range +from miasm.expression.expression import * + + +# GP + +regs32_str = ["R%d" % i for i in range(13)] + ["SP", "LR", "PC"] +regs32_expr = [ExprId(x, 32) for x in regs32_str] + +exception_flags = ExprId('exception_flags', 32) +interrupt_num = ExprId('interrupt_num', 32) +bp_num = ExprId('bp_num', 32) + + +R0 = regs32_expr[0] +R1 = regs32_expr[1] +R2 = regs32_expr[2] +R3 = regs32_expr[3] +R4 = regs32_expr[4] +R5 = regs32_expr[5] +R6 = regs32_expr[6] +R7 = regs32_expr[7] +R8 = regs32_expr[8] +R9 = regs32_expr[9] +R10 = regs32_expr[10] +R11 = regs32_expr[11] +R12 = regs32_expr[12] +SP = regs32_expr[13] +LR = regs32_expr[14] +PC = regs32_expr[15] + +R0_init = ExprId("R0_init", 32) +R1_init = ExprId("R1_init", 32) +R2_init = ExprId("R2_init", 32) +R3_init = ExprId("R3_init", 32) +R4_init = ExprId("R4_init", 32) +R5_init = ExprId("R5_init", 32) +R6_init = ExprId("R6_init", 32) +R7_init = ExprId("R7_init", 32) +R8_init = ExprId("R8_init", 32) +R9_init = ExprId("R9_init", 32) +R10_init = ExprId("R10_init", 32) +R11_init = ExprId("R11_init", 32) +R12_init = ExprId("R12_init", 32) +SP_init = ExprId("SP_init", 32) +LR_init = ExprId("LR_init", 32) +PC_init = ExprId("PC_init", 32) + + +reg_zf = 'zf' +reg_nf = 'nf' +reg_of = 'of' +reg_cf = 'cf' + +zf = ExprId(reg_zf, size=1) +nf = ExprId(reg_nf, size=1) +of = ExprId(reg_of, size=1) +cf = ExprId(reg_cf, size=1) + +zf_init = ExprId("zf_init", size=1) +nf_init = ExprId("nf_init", size=1) +of_init = ExprId("of_init", size=1) +cf_init = ExprId("cf_init", size=1) + + +reg_ge0 = 'ge0' +reg_ge1 = 'ge1' +reg_ge2 = 'ge2' +reg_ge3 = 'ge3' + +ge0 = ExprId(reg_ge0, size=1) +ge1 = ExprId(reg_ge1, size=1) +ge2 = ExprId(reg_ge2, size=1) +ge3 = ExprId(reg_ge3, size=1) + +ge0_init = ExprId("ge0_init", size=1) +ge1_init = ExprId("ge1_init", size=1) +ge2_init = ExprId("ge2_init", size=1) +ge3_init = ExprId("ge3_init", size=1) + +ge_regs = [ge0, ge1, ge2, ge3] + +all_regs_ids = [ + R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, SP, LR, PC, + zf, nf, of, cf, + ge0, ge1, ge2, ge3, + exception_flags, interrupt_num, bp_num +] + +all_regs_ids_no_alias = all_regs_ids + +attrib_to_regs = { + 'l': all_regs_ids_no_alias, + 'b': all_regs_ids_no_alias, +} + +all_regs_ids_byname = dict([(x.name, x) for x in all_regs_ids]) + +all_regs_ids_init = [R0_init, R1_init, R2_init, R3_init, + R4_init, R5_init, R6_init, R7_init, + R8_init, R9_init, R10_init, R11_init, + R12_init, SP_init, LR_init, PC_init, + zf_init, nf_init, of_init, cf_init, + ge0_init, ge1_init, ge2_init, ge3_init, + ExprInt(0, 32), ExprInt(0, 32), ExprInt(0, 32) + ] + +regs_init = {} +for i, r in enumerate(all_regs_ids): + regs_init[r] = all_regs_ids_init[i] + +regs_flt_expr = [] diff --git a/miasm/arch/arm/sem.py b/miasm/arch/arm/sem.py new file mode 100644 index 00000000..bbffc05b --- /dev/null +++ b/miasm/arch/arm/sem.py @@ -0,0 +1,1902 @@ +from builtins import range +from future.utils import viewitems, viewvalues + +from miasm.expression.expression import * +from miasm.ir.ir import IntermediateRepresentation, IRBlock, AssignBlock +from miasm.arch.arm.arch import mn_arm, mn_armt +from miasm.arch.arm.regs import * + +from miasm.jitter.csts import EXCEPT_DIV_BY_ZERO, EXCEPT_INT_XX + +# liris.cnrs.fr/~mmrissa/lib/exe/fetch.php?media=armv7-a-r-manual.pdf +EXCEPT_SOFT_BP = (1 << 1) + +EXCEPT_PRIV_INSN = (1 << 17) + +# CPSR: N Z C V + + +def update_flag_zf(a): + return [ExprAssign(zf, ExprOp("FLAG_EQ", a))] + + +def update_flag_zf_eq(a, b): + return [ExprAssign(zf, ExprOp("FLAG_EQ_CMP", a, b))] + + +def update_flag_nf(arg): + return [ + ExprAssign( + nf, + ExprOp("FLAG_SIGN_SUB", arg, ExprInt(0, arg.size)) + ) + ] + + +def update_flag_zn(a): + e = [] + e += update_flag_zf(a) + e += update_flag_nf(a) + return e + + + +# XXX TODO: set cf if ROT imm in argument + + +def check_ops_msb(a, b, c): + if not a or not b or not c or a != b or a != c: + raise ValueError('bad ops size %s %s %s' % (a, b, c)) + +def update_flag_add_cf(op1, op2): + "Compute cf in @op1 + @op2" + return [ExprAssign(cf, ExprOp("FLAG_ADD_CF", op1, op2))] + + +def update_flag_add_of(op1, op2): + "Compute of in @op1 + @op2" + return [ExprAssign(of, ExprOp("FLAG_ADD_OF", op1, op2))] + + +def update_flag_sub_cf(op1, op2): + "Compote CF in @op1 - @op2" + return [ExprAssign(cf, ExprOp("FLAG_SUB_CF", op1, op2) ^ ExprInt(1, 1))] + + +def update_flag_sub_of(op1, op2): + "Compote OF in @op1 - @op2" + return [ExprAssign(of, ExprOp("FLAG_SUB_OF", op1, op2))] + + +def update_flag_arith_add_co(arg1, arg2): + e = [] + e += update_flag_add_cf(arg1, arg2) + e += update_flag_add_of(arg1, arg2) + return e + + +def update_flag_arith_add_zn(arg1, arg2): + """ + Compute zf and nf flags for (arg1 + arg2) + """ + e = [] + e += update_flag_zf_eq(arg1, -arg2) + e += [ExprAssign(nf, ExprOp("FLAG_SIGN_SUB", arg1, -arg2))] + return e + + +def update_flag_arith_sub_co(arg1, arg2): + """ + Compute cf and of flags for (arg1 - arg2) + """ + e = [] + e += update_flag_sub_cf(arg1, arg2) + e += update_flag_sub_of(arg1, arg2) + return e + + +def update_flag_arith_sub_zn(arg1, arg2): + """ + Compute zf and nf flags for (arg1 - arg2) + """ + e = [] + e += update_flag_zf_eq(arg1, arg2) + e += [ExprAssign(nf, ExprOp("FLAG_SIGN_SUB", arg1, arg2))] + return e + + + + +def update_flag_zfaddwc_eq(arg1, arg2, arg3): + return [ExprAssign(zf, ExprOp("FLAG_EQ_ADDWC", arg1, arg2, arg3))] + +def update_flag_zfsubwc_eq(arg1, arg2, arg3): + return [ExprAssign(zf, ExprOp("FLAG_EQ_SUBWC", arg1, arg2, arg3))] + + +def update_flag_arith_addwc_zn(arg1, arg2, arg3): + """ + Compute znp flags for (arg1 + arg2 + cf) + """ + e = [] + e += update_flag_zfaddwc_eq(arg1, arg2, arg3) + e += [ExprAssign(nf, ExprOp("FLAG_SIGN_ADDWC", arg1, arg2, arg3))] + return e + + +def update_flag_arith_subwc_zn(arg1, arg2, arg3): + """ + Compute znp flags for (arg1 - (arg2 + cf)) + """ + e = [] + e += update_flag_zfsubwc_eq(arg1, arg2, arg3) + e += [ExprAssign(nf, ExprOp("FLAG_SIGN_SUBWC", arg1, arg2, arg3))] + return e + + +def update_flag_addwc_cf(op1, op2, op3): + "Compute cf in @res = @op1 + @op2 + @op3" + return [ExprAssign(cf, ExprOp("FLAG_ADDWC_CF", op1, op2, op3))] + + +def update_flag_addwc_of(op1, op2, op3): + "Compute of in @res = @op1 + @op2 + @op3" + return [ExprAssign(of, ExprOp("FLAG_ADDWC_OF", op1, op2, op3))] + + +def update_flag_arith_addwc_co(arg1, arg2, arg3): + e = [] + e += update_flag_addwc_cf(arg1, arg2, arg3) + e += update_flag_addwc_of(arg1, arg2, arg3) + return e + + + +def update_flag_subwc_cf(op1, op2, op3): + "Compute cf in @res = @op1 + @op2 + @op3" + return [ExprAssign(cf, ExprOp("FLAG_SUBWC_CF", op1, op2, op3) ^ ExprInt(1, 1))] + + +def update_flag_subwc_of(op1, op2, op3): + "Compute of in @res = @op1 + @op2 + @op3" + return [ExprAssign(of, ExprOp("FLAG_SUBWC_OF", op1, op2, op3))] + + +def update_flag_arith_subwc_co(arg1, arg2, arg3): + e = [] + e += update_flag_subwc_cf(arg1, arg2, arg3) + e += update_flag_subwc_of(arg1, arg2, arg3) + return e + + + +def get_dst(a): + if a == PC: + return PC + return None + +# instruction definition ############## + + +def adc(ir, instr, a, b, c=None): + e = [] + if c is None: + b, c = a, b + arg1, arg2 = b, c + r = b + c + cf.zeroExtend(32) + if instr.name == 'ADCS' and a != PC: + e += update_flag_arith_addwc_zn(arg1, arg2, cf) + e += update_flag_arith_addwc_co(arg1, arg2, cf) + e.append(ExprAssign(a, r)) + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + + +def add(ir, instr, a, b, c=None): + e = [] + if c is None: + b, c = a, b + arg1, arg2 = b, c + r = b + c + if instr.name == 'ADDS' and a != PC: + e += update_flag_arith_add_zn(arg1, arg2) + e += update_flag_arith_add_co(arg1, arg2) + e.append(ExprAssign(a, r)) + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + + +def l_and(ir, instr, a, b, c=None): + e = [] + if c is None: + b, c = a, b + r = b & c + if instr.name == 'ANDS' and a != PC: + e += [ExprAssign(zf, ExprOp('FLAG_EQ_AND', b, c))] + e += update_flag_nf(r) + + e.append(ExprAssign(a, r)) + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + + +def sub(ir, instr, a, b, c=None): + e = [] + if c is None: + b, c = a, b + r = b - c + e.append(ExprAssign(a, r)) + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + + +def subs(ir, instr, a, b, c=None): + e = [] + if c is None: + b, c = a, b + arg1, arg2 = b, c + r = b - c + e += update_flag_arith_sub_zn(arg1, arg2) + e += update_flag_arith_sub_co(arg1, arg2) + e.append(ExprAssign(a, r)) + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + + +def eor(ir, instr, a, b, c=None): + e = [] + if c is None: + b, c = a, b + r = b ^ c + e.append(ExprAssign(a, r)) + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + + +def eors(ir, instr, a, b, c=None): + e = [] + if c is None: + b, c = a, b + arg1, arg2 = b, c + r = arg1 ^ arg2 + + e += [ExprAssign(zf, ExprOp('FLAG_EQ_CMP', arg1, arg2))] + e += update_flag_nf(r) + + e.append(ExprAssign(a, r)) + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + + +def rsb(ir, instr, a, b, c=None): + e = [] + if c is None: + b, c = a, b + arg1, arg2 = c, b + r = arg1 - arg2 + e.append(ExprAssign(a, r)) + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + + +def rsbs(ir, instr, a, b, c=None): + e = [] + if c is None: + b, c = a, b + arg1, arg2 = c, b + r = arg1 - arg2 + e += update_flag_arith_sub_zn(arg1, arg2) + e += update_flag_arith_sub_co(arg1, arg2) + e.append(ExprAssign(a, r)) + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + + +def sbc(ir, instr, a, b, c=None): + e = [] + if c is None: + b, c = a, b + arg1, arg2 = b, c + r = arg1 - (arg2 + (~cf).zeroExtend(32)) + e.append(ExprAssign(a, r)) + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + + +def sbcs(ir, instr, a, b, c=None): + e = [] + if c is None: + b, c = a, b + arg1, arg2 = b, c + r = arg1 - (arg2 + (~cf).zeroExtend(32)) + + e += update_flag_arith_subwc_zn(arg1, arg2, ~cf) + e += update_flag_arith_subwc_co(arg1, arg2, ~cf) + + e.append(ExprAssign(a, r)) + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + + +def rsc(ir, instr, a, b, c=None): + e = [] + if c is None: + b, c = a, b + arg1, arg2 = c, b + r = arg1 - (arg2 + (~cf).zeroExtend(32)) + e.append(ExprAssign(a, r)) + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + + +def rscs(ir, instr, a, b, c=None): + e = [] + if c is None: + b, c = a, b + arg1, arg2 = c, b + r = arg1 - (arg2 + (~cf).zeroExtend(32)) + + e += update_flag_arith_subwc_zn(arg1, arg2, ~cf) + e += update_flag_arith_subwc_co(arg1, arg2, ~cf) + + e.append(ExprAssign(a, r)) + + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + + +def tst(ir, instr, a, b): + e = [] + arg1, arg2 = a, b + r = arg1 & arg2 + + e += [ExprAssign(zf, ExprOp('FLAG_EQ_AND', arg1, arg2))] + e += update_flag_nf(r) + + return e, [] + + +def teq(ir, instr, a, b, c=None): + e = [] + if c is None: + b, c = a, b + arg1, arg2 = b, c + r = arg1 ^ arg2 + + e += [ExprAssign(zf, ExprOp('FLAG_EQ_CMP', arg1, arg2))] + e += update_flag_nf(r) + + return e, [] + + +def l_cmp(ir, instr, a, b, c=None): + e = [] + if c is None: + b, c = a, b + arg1, arg2 = b, c + e += update_flag_arith_sub_zn(arg1, arg2) + e += update_flag_arith_sub_co(arg1, arg2) + return e, [] + + +def cmn(ir, instr, a, b, c=None): + e = [] + if c is None: + b, c = a, b + arg1, arg2 = b, c + e += update_flag_arith_add_zn(arg1, arg2) + e += update_flag_arith_add_co(arg1, arg2) + return e, [] + + +def orr(ir, instr, a, b, c=None): + e = [] + if c is None: + b, c = a, b + r = b | c + e.append(ExprAssign(a, r)) + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + + +def orn(ir, instr, a, b, c=None): + e = [] + if c is None: + b, c = a, b + r = ~(b | c) + e.append(ExprAssign(a, r)) + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + + +def orrs(ir, instr, a, b, c=None): + e = [] + if c is None: + b, c = a, b + arg1, arg2 = b, c + r = arg1 | arg2 + + e += [ExprAssign(zf, ExprOp('FLAG_EQ', r))] + e += update_flag_nf(r) + + e.append(ExprAssign(a, r)) + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + + +def mov(ir, instr, a, b): + e = [ExprAssign(a, b)] + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, b)) + return e, [] + + +def movt(ir, instr, a, b): + r = a | b << ExprInt(16, 32) + e = [ExprAssign(a, r)] + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + + +def movs(ir, instr, a, b): + e = [] + e.append(ExprAssign(a, b)) + # XXX TODO check + e += [ExprAssign(zf, ExprOp('FLAG_EQ', b))] + e += update_flag_nf(b) + + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, b)) + return e, [] + + +def mvn(ir, instr, a, b): + r = b ^ ExprInt(-1, 32) + e = [ExprAssign(a, r)] + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + + +def mvns(ir, instr, a, b): + e = [] + r = b ^ ExprInt(-1, 32) + e.append(ExprAssign(a, r)) + # XXX TODO check + e += [ExprAssign(zf, ExprOp('FLAG_EQ', r))] + e += update_flag_nf(r) + + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + + + +def mrs(ir, instr, a, b): + e = [] + if b.is_id('CPSR_cxsf'): + out = [] + out.append(ExprInt(0x10, 28)) + out.append(of) + out.append(cf) + out.append(zf) + out.append(nf) + e.append(ExprAssign(a, ExprCompose(*out))) + else: + raise NotImplementedError("MRS not implemented") + return e, [] + +def msr(ir, instr, a, b): + e = [] + if a.is_id('CPSR_cf'): + e.append(ExprAssign(nf, b[31:32])) + e.append(ExprAssign(zf, b[30:31])) + e.append(ExprAssign(cf, b[29:30])) + e.append(ExprAssign(of, b[28:29])) + else: + raise NotImplementedError("MSR not implemented") + return e, [] + + +def neg(ir, instr, a, b): + e = [] + r = - b + e.append(ExprAssign(a, r)) + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + +def negs(ir, instr, a, b): + return subs(ir, instr, a, ExprInt(0, b.size), b) + +def bic(ir, instr, a, b, c=None): + e = [] + if c is None: + b, c = a, b + r = b & (c ^ ExprInt(-1, 32)) + e.append(ExprAssign(a, r)) + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + + +def bics(ir, instr, a, b, c=None): + e = [] + if c is None: + b, c = a, b + tmp1, tmp2 = b, ~c + r = tmp1 & tmp2 + + e += [ExprAssign(zf, ExprOp('FLAG_EQ_AND', tmp1, tmp2))] + e += update_flag_nf(r) + + e.append(ExprAssign(a, r)) + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + + +def sdiv(ir, instr, a, b, c=None): + e = [] + if c is None: + b, c = a, b + + loc_div = ExprLoc(ir.loc_db.add_location(), ir.IRDst.size) + loc_except = ExprId(ir.loc_db.add_location(), ir.IRDst.size) + loc_next = ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) + + e.append(ExprAssign(ir.IRDst, ExprCond(c, loc_div, loc_except))) + + do_except = [] + do_except.append(ExprAssign(exception_flags, ExprInt(EXCEPT_DIV_BY_ZERO, exception_flags.size))) + do_except.append(ExprAssign(ir.IRDst, loc_next)) + blk_except = IRBlock(loc_except.loc_key, [AssignBlock(do_except, instr)]) + + + + r = ExprOp("sdiv", b, c) + do_div = [] + do_div.append(ExprAssign(a, r)) + dst = get_dst(a) + if dst is not None: + do_div.append(ExprAssign(ir.IRDst, r)) + + do_div.append(ExprAssign(ir.IRDst, loc_next)) + blk_div = IRBlock(loc_div.loc_key, [AssignBlock(do_div, instr)]) + + return e, [blk_div, blk_except] + + +def udiv(ir, instr, a, b, c=None): + e = [] + if c is None: + b, c = a, b + + + + loc_div = ExprLoc(ir.loc_db.add_location(), ir.IRDst.size) + loc_except = ExprLoc(ir.loc_db.add_location(), ir.IRDst.size) + loc_next = ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) + + e.append(ExprAssign(ir.IRDst, ExprCond(c, loc_div, loc_except))) + + do_except = [] + do_except.append(ExprAssign(exception_flags, ExprInt(EXCEPT_DIV_BY_ZERO, exception_flags.size))) + do_except.append(ExprAssign(ir.IRDst, loc_next)) + blk_except = IRBlock(loc_except.loc_key, [AssignBlock(do_except, instr)]) + + + r = ExprOp("udiv", b, c) + do_div = [] + do_div.append(ExprAssign(a, r)) + dst = get_dst(a) + if dst is not None: + do_div.append(ExprAssign(ir.IRDst, r)) + + do_div.append(ExprAssign(ir.IRDst, loc_next)) + blk_div = IRBlock(loc_div.loc_key, [AssignBlock(do_div, instr)]) + + return e, [blk_div, blk_except] + + +def mla(ir, instr, a, b, c, d): + e = [] + r = (b * c) + d + e.append(ExprAssign(a, r)) + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + + +def mlas(ir, instr, a, b, c, d): + e = [] + r = (b * c) + d + e += update_flag_zn(r) + e.append(ExprAssign(a, r)) + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + + +def mls(ir, instr, a, b, c, d): + e = [] + r = d - (b * c) + e.append(ExprAssign(a, r)) + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + + +def mul(ir, instr, a, b, c=None): + e = [] + if c is None: + b, c = a, b + r = b * c + e.append(ExprAssign(a, r)) + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + + +def muls(ir, instr, a, b, c=None): + e = [] + if c is None: + b, c = a, b + r = b * c + e += update_flag_zn(r) + e.append(ExprAssign(a, r)) + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + +def umull(ir, instr, a, b, c, d): + e = [] + r = c.zeroExtend(64) * d.zeroExtend(64) + e.append(ExprAssign(a, r[0:32])) + e.append(ExprAssign(b, r[32:64])) + # r15/IRDst not allowed as output + return e, [] + +def umlal(ir, instr, a, b, c, d): + e = [] + r = c.zeroExtend(64) * d.zeroExtend(64) + ExprCompose(a, b) + e.append(ExprAssign(a, r[0:32])) + e.append(ExprAssign(b, r[32:64])) + # r15/IRDst not allowed as output + return e, [] + +def smull(ir, instr, a, b, c, d): + e = [] + r = c.signExtend(64) * d.signExtend(64) + e.append(ExprAssign(a, r[0:32])) + e.append(ExprAssign(b, r[32:64])) + # r15/IRDst not allowed as output + return e, [] + +def smlal(ir, instr, a, b, c, d): + e = [] + r = c.signExtend(64) * d.signExtend(64) + ExprCompose(a, b) + e.append(ExprAssign(a, r[0:32])) + e.append(ExprAssign(b, r[32:64])) + # r15/IRDst not allowed as output + return e, [] + +def b(ir, instr, a): + e = [] + e.append(ExprAssign(PC, a)) + e.append(ExprAssign(ir.IRDst, a)) + return e, [] + + +def bl(ir, instr, a): + e = [] + l = ExprInt(instr.offset + instr.l, 32) + e.append(ExprAssign(PC, a)) + e.append(ExprAssign(ir.IRDst, a)) + e.append(ExprAssign(LR, l)) + return e, [] + + +def bx(ir, instr, a): + e = [] + e.append(ExprAssign(PC, a)) + e.append(ExprAssign(ir.IRDst, a)) + return e, [] + + +def blx(ir, instr, a): + e = [] + l = ExprInt(instr.offset + instr.l, 32) + e.append(ExprAssign(PC, a)) + e.append(ExprAssign(ir.IRDst, a)) + e.append(ExprAssign(LR, l)) + return e, [] + + +def st_ld_r(ir, instr, a, a2, b, store=False, size=32, s_ext=False, z_ext=False): + e = [] + wb = False + b = b.copy() + postinc = False + b = b.ptr + if isinstance(b, ExprOp): + if b.op == "wback": + wb = True + b = b.args[0] + if b.op == "postinc": + postinc = True + if isinstance(b, ExprOp) and b.op in ["postinc", 'preinc']: + # XXX TODO CHECK + base, off = b.args[0], b.args[1] # ExprInt(size/8, 32) + else: + base, off = b, ExprInt(0, 32) + if postinc: + ad = base + else: + ad = base + off + + # PC base lookup uses PC 4 byte alignment + ad = ad.replace_expr({PC: PC & ExprInt(0xFFFFFFFC, 32)}) + + dmem = False + if size in [8, 16]: + if store: + a = a[:size] + m = ExprMem(ad, size=size) + elif s_ext: + m = ExprMem(ad, size=size).signExtend(a.size) + elif z_ext: + m = ExprMem(ad, size=size).zeroExtend(a.size) + else: + raise ValueError('unhandled case') + elif size == 32: + m = ExprMem(ad, size=size) + elif size == 64: + assert a2 is not None + m = ExprMem(ad, size=32) + dmem = True + size = 32 + else: + raise ValueError('the size DOES matter') + dst = None + + if store: + e.append(ExprAssign(m, a)) + if dmem: + e.append(ExprAssign(ExprMem(ad + ExprInt(4, 32), size=size), a2)) + else: + if a == PC: + dst = PC + e.append(ExprAssign(ir.IRDst, m)) + e.append(ExprAssign(a, m)) + if dmem: + e.append(ExprAssign(a2, ExprMem(ad + ExprInt(4, 32), size=size))) + + # XXX TODO check multiple write cause by wb + if wb or postinc: + e.append(ExprAssign(base, base + off)) + return e, [] + + +def ldr(ir, instr, a, b): + return st_ld_r(ir, instr, a, None, b, store=False) + + +def ldrd(ir, instr, a, b, c=None): + if c is None: + a2 = ir.arch.regs.all_regs_ids[ir.arch.regs.all_regs_ids.index(a) + 1] + else: + a2 = b + b = c + return st_ld_r(ir, instr, a, a2, b, store=False, size=64) + + +def l_str(ir, instr, a, b): + return st_ld_r(ir, instr, a, None, b, store=True) + + +def l_strd(ir, instr, a, b, c=None): + if c is None: + a2 = ir.arch.regs.all_regs_ids[ir.arch.regs.all_regs_ids.index(a) + 1] + else: + a2 = b + b = c + return st_ld_r(ir, instr, a, a2, b, store=True, size=64) + +def ldrb(ir, instr, a, b): + return st_ld_r(ir, instr, a, None, b, store=False, size=8, z_ext=True) + +def ldrsb(ir, instr, a, b): + return st_ld_r(ir, instr, a, None, b, store=False, size=8, s_ext=True, z_ext=False) + +def strb(ir, instr, a, b): + return st_ld_r(ir, instr, a, None, b, store=True, size=8) + +def ldrh(ir, instr, a, b): + return st_ld_r(ir, instr, a, None, b, store=False, size=16, z_ext=True) + + +def strh(ir, instr, a, b): + return st_ld_r(ir, instr, a, None, b, store=True, size=16, z_ext=True) + + +def ldrsh(ir, instr, a, b): + return st_ld_r(ir, instr, a, None, b, store=False, size=16, s_ext=True, z_ext=False) + + +def st_ld_m(ir, instr, a, b, store=False, postinc=False, updown=False): + e = [] + wb = False + dst = None + if isinstance(a, ExprOp) and a.op == 'wback': + wb = True + a = a.args[0] + if isinstance(b, ExprOp) and b.op == 'sbit': + b = b.args[0] + regs = b.args + base = a + if updown: + step = 4 + else: + step = -4 + regs = regs[::-1] + if postinc: + pass + else: + base += ExprInt(step, 32) + for i, r in enumerate(regs): + ad = base + ExprInt(i * step, 32) + if store: + e.append(ExprAssign(ExprMem(ad, 32), r)) + else: + e.append(ExprAssign(r, ExprMem(ad, 32))) + if r == PC: + e.append(ExprAssign(ir.IRDst, ExprMem(ad, 32))) + # XXX TODO check multiple write cause by wb + if wb: + if postinc: + e.append(ExprAssign(a, base + ExprInt(len(regs) * step, 32))) + else: + e.append(ExprAssign(a, base + ExprInt((len(regs) - 1) * step, 32))) + if store: + pass + else: + assert(isinstance(b, ExprOp) and b.op == "reglist") + + return e, [] + + +def ldmia(ir, instr, a, b): + return st_ld_m(ir, instr, a, b, store=False, postinc=True, updown=True) + + +def ldmib(ir, instr, a, b): + return st_ld_m(ir, instr, a, b, store=False, postinc=False, updown=True) + + +def ldmda(ir, instr, a, b): + return st_ld_m(ir, instr, a, b, store=False, postinc=True, updown=False) + + +def ldmdb(ir, instr, a, b): + return st_ld_m(ir, instr, a, b, store=False, postinc=False, updown=False) + + +def stmia(ir, instr, a, b): + return st_ld_m(ir, instr, a, b, store=True, postinc=True, updown=True) + + +def stmib(ir, instr, a, b): + return st_ld_m(ir, instr, a, b, store=True, postinc=False, updown=True) + + +def stmda(ir, instr, a, b): + return st_ld_m(ir, instr, a, b, store=True, postinc=True, updown=False) + + +def stmdb(ir, instr, a, b): + return st_ld_m(ir, instr, a, b, store=True, postinc=False, updown=False) + + +def svc(ir, instr, a): + e = [] + except_int = EXCEPT_INT_XX + e.append(ExprAssign(exception_flags, ExprInt(except_int, 32))) + e.append(ExprAssign(interrupt_num, a)) + return e, [] + + +def und(ir, instr, a, b): + # XXX TODO implement + e = [] + return e, [] + +# TODO XXX implement correct CF for shifters +def lsr(ir, instr, a, b, c=None): + e = [] + if c is None: + b, c = a, b + r = b >> c + e.append(ExprAssign(a, r)) + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + + +def lsrs(ir, instr, a, b, c=None): + e = [] + if c is None: + b, c = a, b + r = b >> c + e.append(ExprAssign(a, r)) + + e += [ExprAssign(zf, ExprOp('FLAG_EQ', r))] + e += update_flag_nf(r) + + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + +def asr(ir, instr, a, b, c=None): + e = [] + if c is None: + b, c = a, b + r = ExprOp("a>>", b, c) + e.append(ExprAssign(a, r)) + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + +def asrs(ir, instr, a, b, c=None): + e = [] + if c is None: + b, c = a, b + r = ExprOp("a>>", b, c) + e.append(ExprAssign(a, r)) + + e += [ExprAssign(zf, ExprOp('FLAG_EQ', r))] + e += update_flag_nf(r) + + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + +def lsl(ir, instr, a, b, c=None): + e = [] + if c is None: + b, c = a, b + r = b << c + e.append(ExprAssign(a, r)) + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + + +def lsls(ir, instr, a, b, c=None): + e = [] + if c is None: + b, c = a, b + r = b << c + e.append(ExprAssign(a, r)) + + e += [ExprAssign(zf, ExprOp('FLAG_EQ', r))] + e += update_flag_nf(r) + + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + + +def rors(ir, instr, a, b): + e = [] + r = ExprOp(">>>", a, b) + e.append(ExprAssign(a, r)) + + e += [ExprAssign(zf, ExprOp('FLAG_EQ', r))] + e += update_flag_nf(r) + + dst = get_dst(a) + if dst is not None: + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + + +def push(ir, instr, a): + e = [] + regs = list(a.args) + for i in range(len(regs)): + r = SP + ExprInt(-4 * len(regs) + 4 * i, 32) + e.append(ExprAssign(ExprMem(r, 32), regs[i])) + r = SP + ExprInt(-4 * len(regs), 32) + e.append(ExprAssign(SP, r)) + return e, [] + + +def pop(ir, instr, a): + e = [] + regs = list(a.args) + dst = None + for i in range(len(regs)): + r = SP + ExprInt(4 * i, 32) + e.append(ExprAssign(regs[i], ExprMem(r, 32))) + if regs[i] == ir.pc: + dst = ExprMem(r, 32) + r = SP + ExprInt(4 * len(regs), 32) + e.append(ExprAssign(SP, r)) + if dst is not None: + e.append(ExprAssign(ir.IRDst, dst)) + return e, [] + + +def cbz(ir, instr, a, b): + e = [] + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = ExprLoc(loc_next, 32) + e.append(ExprAssign(ir.IRDst, ExprCond(a, loc_next_expr, b))) + return e, [] + + +def cbnz(ir, instr, a, b): + e = [] + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = ExprLoc(loc_next, 32) + e.append(ExprAssign(ir.IRDst, ExprCond(a, b, loc_next_expr))) + return e, [] + + +def uxtb(ir, instr, a, b): + e = [] + r = b[:8].zeroExtend(32) + e.append(ExprAssign(a, r)) + dst = None + if PC in a.get_r(): + dst = PC + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + +def uxth(ir, instr, a, b): + e = [] + r = b[:16].zeroExtend(32) + e.append(ExprAssign(a, r)) + dst = None + if PC in a.get_r(): + dst = PC + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + +def sxtb(ir, instr, a, b): + e = [] + r = b[:8].signExtend(32) + e.append(ExprAssign(a, r)) + dst = None + if PC in a.get_r(): + dst = PC + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + +def sxth(ir, instr, a, b): + e = [] + r = b[:16].signExtend(32) + e.append(ExprAssign(a, r)) + dst = None + if PC in a.get_r(): + dst = PC + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + + +def ubfx(ir, instr, a, b, c, d): + e = [] + c = int(c) + d = int(d) + r = b[c:c+d].zeroExtend(32) + e.append(ExprAssign(a, r)) + dst = None + if PC in a.get_r(): + dst = PC + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + +def bfc(ir, instr, a, b, c): + e = [] + start = int(b) + stop = start + int(c) + out = [] + last = 0 + if start: + out.append(a[:start]) + last = start + if stop - start: + out.append(ExprInt(0, 32)[last:stop]) + last = stop + if last < 32: + out.append(a[last:]) + r = ExprCompose(*out) + e.append(ExprAssign(a, r)) + dst = None + if PC in a.get_r(): + dst = PC + e.append(ExprAssign(ir.IRDst, r)) + return e, [] + + +def pld(ir, instr, a): + e = [] + return e, [] + + +def pldw(ir, instr, a): + e = [] + return e, [] + + +def clz(ir, instr, a, b): + e = [] + e.append(ExprAssign(a, ExprOp('cntleadzeros', b))) + return e, [] + +def uxtab(ir, instr, a, b, c): + e = [] + e.append(ExprAssign(a, b + (c & ExprInt(0xff, 32)))) + return e, [] + + +def uxtah(ir, instr, a, b, c): + e = [] + e.append(ExprAssign(a, b + (c & ExprInt(0xffff, 32)))) + return e, [] + + +def bkpt(ir, instr, a): + e = [] + e.append(ExprAssign(exception_flags, ExprInt(EXCEPT_SOFT_BP, 32))) + e.append(ExprAssign(bp_num, a)) + return e, [] + + +def _extract_s16(arg, part): + if part == 'B': # bottom 16 bits + return arg[0:16] + elif part == 'T': # top 16 bits + return arg[16:32] + + +def smul(ir, instr, a, b, c): + e = [] + e.append(ExprAssign(a, _extract_s16(b, instr.name[4]).signExtend(32) * _extract_s16(c, instr.name[5]).signExtend(32))) + return e, [] + + +def smulw(ir, instr, a, b, c): + e = [] + prod = b.signExtend(48) * _extract_s16(c, instr.name[5]).signExtend(48) + e.append(ExprAssign(a, prod[16:48])) + return e, [] # signed most significant 32 bits of the 48-bit result + + +def tbb(ir, instr, a): + e = [] + dst = PC + ExprInt(2, 32) * a.zeroExtend(32) + e.append(ExprAssign(PC, dst)) + e.append(ExprAssign(ir.IRDst, dst)) + return e, [] + + +def tbh(ir, instr, a): + e = [] + dst = PC + ExprInt(2, 32) * a.zeroExtend(32) + e.append(ExprAssign(PC, dst)) + e.append(ExprAssign(ir.IRDst, dst)) + return e, [] + + +def smlabb(ir, instr, a, b, c, d): + e = [] + result = (b[:16].signExtend(32) * c[:16].signExtend(32)) + d + e.append(ExprAssign(a, result)) + return e, [] + + +def smlabt(ir, instr, a, b, c, d): + e = [] + result = (b[:16].signExtend(32) * c[16:32].signExtend(32)) + d + e.append(ExprAssign(a, result)) + return e, [] + + +def smlatb(ir, instr, a, b, c, d): + e = [] + result = (b[16:32].signExtend(32) * c[:16].signExtend(32)) + d + e.append(ExprAssign(a, result)) + return e, [] + + +def smlatt(ir, instr, a, b, c, d): + e = [] + result = (b[16:32].signExtend(32) * c[16:32].signExtend(32)) + d + e.append(ExprAssign(a, result)) + return e, [] + + +def uadd8(ir, instr, a, b, c): + e = [] + sums = [] + ges = [] + for i in range(0, 32, 8): + sums.append(b[i:i+8] + c[i:i+8]) + ges.append((b[i:i+8].zeroExtend(9) + c[i:i+8].zeroExtend(9))[8:9]) + + e.append(ExprAssign(a, ExprCompose(*sums))) + + for i, value in enumerate(ges): + e.append(ExprAssign(ge_regs[i], value)) + return e, [] + + +def sel(ir, instr, a, b, c): + e = [] + cond = nf ^ of ^ ExprInt(1, 1) + parts = [] + for i in range(4): + parts.append(ExprCond(ge_regs[i], b[i*8:(i+1)*8], c[i*8:(i+1)*8])) + result = ExprCompose(*parts) + e.append(ExprAssign(a, result)) + return e, [] + + +def rev(ir, instr, a, b): + e = [] + result = ExprCompose(b[24:32], b[16:24], b[8:16], b[:8]) + e.append(ExprAssign(a, result)) + return e, [] + + +def rev16(ir, instr, a, b): + e = [] + result = ExprCompose(b[8:16], b[:8], b[24:32], b[16:24]) + e.append(ExprAssign(a, result)) + return e, [] + + +def nop(ir, instr): + e = [] + return e, [] + + +def dsb(ir, instr, a): + # XXX TODO + e = [] + return e, [] + + +def cpsie(ir, instr, a): + # XXX TODO + e = [] + return e, [] + + +def cpsid(ir, instr, a): + # XXX TODO + e = [] + return e, [] + + +def wfe(ir, instr): + # XXX TODO + e = [] + return e, [] + + +def wfi(ir, instr): + # XXX TODO + e = [] + return e, [] + +def adr(ir, instr, arg1, arg2): + e = [] + e.append(ExprAssign(arg1, (PC & ExprInt(0xfffffffc, 32)) + arg2)) + return e, [] + +COND_EQ = 0 +COND_NE = 1 +COND_CS = 2 +COND_CC = 3 +COND_MI = 4 +COND_PL = 5 +COND_VS = 6 +COND_VC = 7 +COND_HI = 8 +COND_LS = 9 +COND_GE = 10 +COND_LT = 11 +COND_GT = 12 +COND_LE = 13 +COND_AL = 14 +COND_NV = 15 + +cond_dct = { + COND_EQ: "EQ", + COND_NE: "NE", + COND_CS: "CS", + COND_CC: "CC", + COND_MI: "MI", + COND_PL: "PL", + COND_VS: "VS", + COND_VC: "VC", + COND_HI: "HI", + COND_LS: "LS", + COND_GE: "GE", + COND_LT: "LT", + COND_GT: "GT", + COND_LE: "LE", + COND_AL: "AL", + # COND_NV: "NV", +} + +cond_dct_inv = dict((name, num) for num, name in viewitems(cond_dct)) + + +""" +Code Meaning (for cmp or subs) Flags Tested +eq Equal. Z==1 +ne Not equal. Z==0 +cs or hs Unsigned higher or same (or carry set). C==1 +cc or lo Unsigned lower (or carry clear). C==0 +mi Negative. The mnemonic stands for "minus". N==1 +pl Positive or zero. The mnemonic stands for "plus". N==0 +vs Signed overflow. The mnemonic stands for "V set". V==1 +vc No signed overflow. The mnemonic stands for "V clear". V==0 +hi Unsigned higher. (C==1) && (Z==0) +ls Unsigned lower or same. (C==0) || (Z==1) +ge Signed greater than or equal. N==V +lt Signed less than. N!=V +gt Signed greater than. (Z==0) && (N==V) +le Signed less than or equal. (Z==1) || (N!=V) +al (or omitted) Always executed. None tested. +""" + +tab_cond = {COND_EQ: ExprOp("CC_EQ", zf), + COND_NE: ExprOp("CC_NE", zf), + COND_CS: ExprOp("CC_U>=", cf ^ ExprInt(1, 1)), # inv cf + COND_CC: ExprOp("CC_U<", cf ^ ExprInt(1, 1)), # inv cf + COND_MI: ExprOp("CC_NEG", nf), + COND_PL: ExprOp("CC_POS", nf), + COND_VS: ExprOp("CC_sOVR", of), + COND_VC: ExprOp("CC_sNOOVR", of), + COND_HI: ExprOp("CC_U>", cf ^ ExprInt(1, 1), zf), # inv cf + COND_LS: ExprOp("CC_U<=", cf ^ ExprInt(1, 1), zf), # inv cf + COND_GE: ExprOp("CC_S>=", nf, of), + COND_LT: ExprOp("CC_S<", nf, of), + COND_GT: ExprOp("CC_S>", nf, of, zf), + COND_LE: ExprOp("CC_S<=", nf, of, zf), + } + + + + + +def is_pc_written(ir, instr_ir): + all_pc = viewvalues(ir.mn.pc) + for ir in instr_ir: + if ir.dst in all_pc: + return True, ir.dst + return False, None + + +def add_condition_expr(ir, instr, cond, instr_ir, extra_ir): + if cond == COND_AL: + return instr_ir, extra_ir + if not cond in tab_cond: + raise ValueError('unknown condition %r' % cond) + cond = tab_cond[cond] + + + + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = ExprLoc(loc_next, 32) + loc_do = ir.loc_db.add_location() + loc_do_expr = ExprLoc(loc_do, 32) + + dst_cond = ExprCond(cond, loc_do_expr, loc_next_expr) + assert(isinstance(instr_ir, list)) + + has_irdst = False + for e in instr_ir: + if e.dst == ir.IRDst: + has_irdst = True + break + if not has_irdst: + instr_ir.append(ExprAssign(ir.IRDst, loc_next_expr)) + e_do = IRBlock(loc_do, [AssignBlock(instr_ir, instr)]) + e = [ExprAssign(ir.IRDst, dst_cond)] + return e, [e_do] + extra_ir + +mnemo_func = {} +mnemo_func_cond = {} +mnemo_condm0 = {'add': add, + 'sub': sub, + 'eor': eor, + 'and': l_and, + 'rsb': rsb, + 'adc': adc, + 'sbc': sbc, + 'rsc': rsc, + + 'tst': tst, + 'teq': teq, + 'cmp': l_cmp, + 'cmn': cmn, + 'orr': orr, + 'mov': mov, + 'movt': movt, + 'bic': bic, + 'mvn': mvn, + 'neg': neg, + + 'sdiv': sdiv, + 'udiv': udiv, + + 'mul': mul, + 'umull': umull, + 'umlal': umlal, + 'smull': smull, + 'smlal': smlal, + 'mla': mla, + 'ldr': ldr, + 'ldrd': ldrd, + 'ldrsb': ldrsb, + 'str': l_str, + 'strd': l_strd, + 'b': b, + 'bl': bl, + 'svc': svc, + 'und': und, + 'bx': bx, + 'ldrh': ldrh, + 'strh': strh, + 'ldrsh': ldrsh, + 'ldsh': ldrsh, + 'uxtb': uxtb, + 'uxth': uxth, + 'sxtb': sxtb, + 'sxth': sxth, + 'ubfx': ubfx, + 'bfc': bfc, + 'rev': rev, + 'rev16': rev16, + 'clz': clz, + 'uxtab': uxtab, + 'uxtah': uxtah, + 'bkpt': bkpt, + 'smulbb': smul, + 'smulbt': smul, + 'smultb': smul, + 'smultt': smul, + 'smulwt': smulw, + 'smulwb': smulw, + } + +mnemo_condm1 = {'adds': add, + 'subs': subs, + 'eors': eors, + 'ands': l_and, + 'rsbs': rsbs, + 'adcs': adc, + 'sbcs': sbcs, + 'rscs': rscs, + + 'orrs': orrs, + 'movs': movs, + 'bics': bics, + 'mvns': mvns, + + 'mrs': mrs, + 'msr': msr, + + 'negs': negs, + + 'muls': muls, + 'mls': mls, + 'mlas': mlas, + 'blx': blx, + + 'ldrb': ldrb, + 'ldsb': ldrsb, + 'strb': strb, + } + +mnemo_condm2 = {'ldmia': ldmia, + 'ldmib': ldmib, + 'ldmda': ldmda, + 'ldmdb': ldmdb, + + 'ldmfa': ldmda, + 'ldmfd': ldmia, + 'ldmea': ldmdb, + 'ldmed': ldmib, # XXX + + + 'stmia': stmia, + 'stmib': stmib, + 'stmda': stmda, + 'stmdb': stmdb, + + 'stmfa': stmib, + 'stmed': stmda, + 'stmfd': stmdb, + 'stmea': stmia, + } + + +mnemo_nocond = {'lsr': lsr, + 'lsrs': lsrs, + 'lsl': lsl, + 'lsls': lsls, + 'rors': rors, + 'push': push, + 'pop': pop, + 'asr': asr, + 'asrs': asrs, + 'cbz': cbz, + 'cbnz': cbnz, + 'pld': pld, + 'pldw': pldw, + 'tbb': tbb, + 'tbh': tbh, + 'nop': nop, + 'dsb': dsb, + 'cpsie': cpsie, + 'cpsid': cpsid, + 'wfe': wfe, + 'wfi': wfi, + 'adr': adr, + 'orn': orn, + 'smlabb': smlabb, + 'smlabt': smlabt, + 'smlatb': smlatb, + 'smlatt': smlatt, + 'uadd8': uadd8, + 'sel': sel, + } + +mn_cond_x = [mnemo_condm0, + mnemo_condm1, + mnemo_condm2] + +for index, mn_base in enumerate(mn_cond_x): + for mn, mf in viewitems(mn_base): + for cond, cn in viewitems(cond_dct): + if cond == COND_AL: + cn = "" + cn = cn.lower() + if index == 0: + mn_mod = mn + cn + else: + mn_mod = mn[:-index] + cn + mn[-index:] + # print mn_mod + mnemo_func_cond[mn_mod] = cond, mf + +for name, mf in viewitems(mnemo_nocond): + mnemo_func_cond[name] = COND_AL, mf + + +def split_expr_dst(ir, instr_ir): + out = [] + dst = None + for i in instr_ir: + if i.dst == ir.pc: + out.append(i) + dst = ir.pc # i.src + else: + out.append(i) + return out, dst + + +def get_mnemo_expr(ir, instr, *args): + if not instr.name.lower() in mnemo_func_cond: + raise ValueError('unknown mnemo %s' % instr) + cond, mf = mnemo_func_cond[instr.name.lower()] + instr_ir, extra_ir = mf(ir, instr, *args) + instr, extra_ir = add_condition_expr(ir, instr, cond, instr_ir, extra_ir) + return instr, extra_ir + +get_arm_instr_expr = get_mnemo_expr + + +class arminfo(object): + mode = "arm" + # offset + + +class ir_arml(IntermediateRepresentation): + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_arm, "l", loc_db) + self.pc = PC + self.sp = SP + self.IRDst = ExprId('IRDst', 32) + self.addrsize = 32 + + + + def mod_pc(self, instr, instr_ir, extra_ir): + # fix PC (+8 for arm) + pc_fixed = {self.pc: ExprInt(instr.offset + 8, 32)} + + for i, expr in enumerate(instr_ir): + dst, src = expr.dst, expr.src + if dst != self.pc: + dst = dst.replace_expr(pc_fixed) + src = src.replace_expr(pc_fixed) + instr_ir[i] = ExprAssign(dst, src) + + for idx, irblock in enumerate(extra_ir): + extra_ir[idx] = irblock.modify_exprs(lambda expr: expr.replace_expr(pc_fixed) \ + if expr != self.pc else expr, + lambda expr: expr.replace_expr(pc_fixed)) + + def get_ir(self, instr): + args = instr.args + # ir = get_mnemo_expr(self, self.name.lower(), *args) + if len(args) and isinstance(args[-1], ExprOp): + if args[-1].op == 'rrx': + args[-1] = ExprCompose(args[-1].args[0][1:], cf) + elif (args[-1].op in ['<<', '>>', '<>', '<<<', '>>>'] and + isinstance(args[-1].args[-1], ExprId)): + args[-1] = ExprOp(args[-1].op, + args[-1].args[0], + args[-1].args[-1][:8].zeroExtend(32)) + instr_ir, extra_ir = get_mnemo_expr(self, instr, *args) + + self.mod_pc(instr, instr_ir, extra_ir) + return instr_ir, extra_ir + + def parse_itt(self, instr): + name = instr.name + assert name.startswith('IT') + name = name[1:] + out = [] + for hint in name: + if hint == 'T': + out.append(0) + elif hint == "E": + out.append(1) + else: + raise ValueError("IT name invalid %s" % instr) + return out, instr.args[0] + + def do_it_block(self, loc, index, block, assignments, gen_pc_updt): + instr = block.lines[index] + it_hints, it_cond = self.parse_itt(instr) + cond_num = cond_dct_inv[it_cond.name] + cond_eq = tab_cond[cond_num] + + if not index + len(it_hints) <= len(block.lines): + raise NotImplementedError("Split IT block non supported yet") + + ir_blocks_all = [] + + # Gen dummy irblock for IT instr + loc_next = self.get_next_loc_key(instr) + dst = ExprAssign(self.IRDst, ExprLoc(loc_next, 32)) + dst_blk = AssignBlock([dst], instr) + assignments.append(dst_blk) + irblock = IRBlock(loc, assignments) + ir_blocks_all.append([irblock]) + + loc = loc_next + assignments = [] + for hint in it_hints: + irblocks = [] + index += 1 + instr = block.lines[index] + + # Add conditionnal jump to current irblock + loc_do = self.loc_db.add_location() + loc_next = self.get_next_loc_key(instr) + + if hint: + local_cond = ~cond_eq + else: + local_cond = cond_eq + dst = ExprAssign(self.IRDst, ExprCond(local_cond, ExprLoc(loc_do, 32), ExprLoc(loc_next, 32))) + dst_blk = AssignBlock([dst], instr) + assignments.append(dst_blk) + irblock = IRBlock(loc, assignments) + + irblocks.append(irblock) + + it_instr_irblocks = [] + assignments = [] + loc = loc_do + + split = self.add_instr_to_current_state( + instr, block, assignments, + it_instr_irblocks, gen_pc_updt + ) + if split: + raise NotImplementedError("Unsupported instr in IT block (%s)" % instr) + + if it_instr_irblocks: + assert len(it_instr_irblocks) == 1 + it_instr_irblocks = it_instr_irblocks.pop() + # Remove flags assignment if instr != [CMP, CMN, TST] + if instr.name not in ["CMP", "CMN", "TST"]: + # Fix assignments + out = [] + for assignment in assignments: + assignment = AssignBlock( + { + dst: src for (dst, src) in viewitems(assignment) + if dst not in [zf, nf, of, cf] + }, + assignment.instr + ) + out.append(assignment) + assignments = out + # Fix extra irblocksx + new_irblocks = [] + for irblock in it_instr_irblocks: + out = [] + for tmp_assignment in irblock: + assignment = AssignBlock( + { + dst: src for (dst, src) in viewitems(assignment) + if dst not in [zf, nf, of, cf] + }, + assignment.instr + ) + out.append(assignment) + new_irblock = IRBlock(irblock.loc_key, out) + new_irblocks.append(new_irblock) + it_instr_irblocks = new_irblocks + + irblocks += it_instr_irblocks + dst = ExprAssign(self.IRDst, ExprLoc(loc_next, 32)) + dst_blk = AssignBlock([dst], instr) + assignments.append(dst_blk) + irblock = IRBlock(loc, assignments) + irblocks.append(irblock) + loc = loc_next + assignments = [] + ir_blocks_all.append(irblocks) + return index, ir_blocks_all + + def add_asmblock_to_ircfg(self, block, ircfg, gen_pc_updt=False): + """ + Add a native block to the current IR + @block: native assembly block + @gen_pc_updt: insert PC update effects between instructions + """ + + it_hints = None + it_cond = None + label = block.loc_key + assignments = [] + ir_blocks_all = [] + index = -1 + while index + 1 < len(block.lines): + index += 1 + instr = block.lines[index] + if label is None: + assignments = [] + label = self.get_loc_key_for_instr(instr) + if instr.name.startswith("IT"): + index, irblocks_it = self.do_it_block(label, index, block, assignments, gen_pc_updt) + for irblocks in irblocks_it: + ir_blocks_all += irblocks + label = None + continue + + split = self.add_instr_to_current_state( + instr, block, assignments, + ir_blocks_all, gen_pc_updt + ) + if split: + ir_blocks_all.append(IRBlock(label, assignments)) + label = None + assignments = [] + if label is not None: + ir_blocks_all.append(IRBlock(label, assignments)) + + new_ir_blocks_all = self.post_add_asmblock_to_ircfg(block, ircfg, ir_blocks_all) + for irblock in new_ir_blocks_all: + ircfg.add_irblock(irblock) + return new_ir_blocks_all + + + +class ir_armb(ir_arml): + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_arm, "b", loc_db) + self.pc = PC + self.sp = SP + self.IRDst = ExprId('IRDst', 32) + self.addrsize = 32 + + +class ir_armtl(ir_arml): + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_armt, "l", loc_db) + self.pc = PC + self.sp = SP + self.IRDst = ExprId('IRDst', 32) + self.addrsize = 32 + + + def mod_pc(self, instr, instr_ir, extra_ir): + # fix PC (+4 for thumb) + pc_fixed = {self.pc: ExprInt(instr.offset + 4, 32)} + + for i, expr in enumerate(instr_ir): + dst, src = expr.dst, expr.src + if dst != self.pc: + dst = dst.replace_expr(pc_fixed) + src = src.replace_expr(pc_fixed) + instr_ir[i] = ExprAssign(dst, src) + + for idx, irblock in enumerate(extra_ir): + extra_ir[idx] = irblock.modify_exprs(lambda expr: expr.replace_expr(pc_fixed) \ + if expr != self.pc else expr, + lambda expr: expr.replace_expr(pc_fixed)) + + +class ir_armtb(ir_armtl): + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_armt, "b", loc_db) + self.pc = PC + self.sp = SP + self.IRDst = ExprId('IRDst', 32) + self.addrsize = 32 + diff --git a/miasm/arch/mep/__init__.py b/miasm/arch/mep/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/miasm/arch/mep/arch.py b/miasm/arch/mep/arch.py new file mode 100644 index 00000000..171f5fab --- /dev/null +++ b/miasm/arch/mep/arch.py @@ -0,0 +1,2052 @@ +# Toshiba MeP-c4 - miasm architecture definition +# Guillaume Valadon + +from builtins import range +from miasm.core.cpu import * +from miasm.core.utils import Disasm_Exception +from miasm.expression.expression import Expr, ExprId, ExprInt, ExprLoc, \ + ExprMem, ExprOp +from miasm.core.asm_ast import AstId, AstMem + +from miasm.arch.mep.regs import * +import miasm.arch.mep.regs as mep_regs_module # will be used to set mn_mep.regs + + +# Note: pyparsing is used to alter the way special operands are parsed +from pyparsing import Literal, Group, Word, hexnums + + +# These definitions will help parsing dereferencing instructions (i.e. that uses +# parenthesis) with pyparsing +LPARENTHESIS = Literal("(") +RPARENTHESIS = Literal(")") +PLUSSIGN = Literal("+") +HEX_INTEGER = str_int_pos | str_int_neg + + +def ExprInt2SignedString(expr, pos_fmt="%d", neg_fmt="%d", size=None, offset=0): + """Return the signed string corresponding to an ExprInt + + Note: this function is only useful to mimic objdump output""" + + # Apply a mask to the integer + if size is None: + mask_length = expr.size + else: + mask_length = size + mask = (1 << mask_length) - 1 + value = int(expr.arg) & mask + + # Return a signed integer if necessary + if (value >> mask_length - 1) == 1: + value = offset - ((value ^ mask) + 1) + if value < 0: + return "-" + neg_fmt % -value + else: + value += offset + + return pos_fmt % value + + +class instruction_mep(instruction): + """Generic MeP-c4 instruction + + Notes: + - this object is used to build internal miasm instructions based + on mnemonics + - it must be implemented ! + """ + + # Default delay slot + # Note: + # - mandatory for the miasm Machine + delayslot = 0 + + @staticmethod + def arg2str(expr, pos=None, loc_db=None): + """Convert mnemonics arguments into readable strings according to the + MeP-c4 architecture manual and their internal types + + Notes: + - it must be implemented ! However, a simple 'return str(expr)' + could do the trick. + - it is used to mimic objdump output + + Args: + expr: argument as a miasm expression + pos: position index in the arguments list + """ + + if isinstance(expr, ExprId) or isinstance(expr, ExprInt): + return str(expr) + + elif isinstance(expr, ExprLoc): + if loc_db is not None: + return loc_db.pretty_str(expr.loc_key) + else: + return str(expr) + + elif isinstance(expr, ExprMem) and (isinstance(expr.ptr, ExprId) or isinstance(expr.ptr, ExprInt)): + return "(%s)" % expr.ptr + + elif isinstance(expr, ExprMem) and isinstance(expr.ptr, ExprOp): + return "0x%X(%s)" % (expr.ptr.args[1].arg, expr.ptr.args[0]) + + # Raise an exception if the expression type was not processed + message = "instruction_mep.arg2str(): don't know what \ + to do with a '%s' instance." % type(expr) + raise Disasm_Exception(message) + + def __str__(self): + """Return the mnemonic as a string. + + Note: + - it is not mandatory as the instruction class already implement + it. It used to get rid of the padding between the opcode and the + arguments. + - most of this code is copied from miasm/core/cpu.py + """ + + o = "%s" % self.name + + if self.name == "SSARB": + # The first operand is displayed in decimal, not in hex + o += " %d" % self.args[0].arg + o += self.arg2str(self.args[1]) + + elif self.name in ["MOV", "ADD"] and isinstance(self.args[1], ExprInt): + # The second operand is displayed in decimal, not in hex + o += " " + self.arg2str(self.args[0]) + o += ", %s" % ExprInt2SignedString(self.args[1].arg) + + elif "CPI" in self.name: + # The second operand ends with the '+' sign + o += " " + self.arg2str(self.args[0]) + deref_reg_str = self.arg2str(self.args[1]) + o += ", %s+)" % deref_reg_str[:-1] # GV: looks ugly + + elif self.name[0] in ["S", "L"] and self.name[-3:] in ["CPA", "PM0", "PM1"]: + # The second operand ends with the '+' sign + o += " " + self.arg2str(self.args[0]) + deref_reg_str = self.arg2str(self.args[1]) + o += ", %s+)" % deref_reg_str[:-1] # GV: looks ugly + # The third operand is displayed in decimal, not in hex + o += ", %s" % ExprInt2SignedString(self.args[2].arg) + + elif len(self.args) == 2 and self.name in ["SB", "SH", "LBU", "LB", "LH", "LW"] and \ + isinstance(self.args[1], ExprMem) and isinstance(self.args[1].ptr, ExprOp): # Major Opcodes #12 + # The second operand is an offset to a register + o += " " + self.arg2str(self.args[0]) + o += ", %s" % ExprInt2SignedString(self.args[1].ptr.args[1], "0x%X") + o += "(%s)" % self.arg2str(self.args[1].ptr.args[0]) + + elif len(self.args) == 2 and self.name in ["SWCP", "LWCP", "SMCP", "LMCP"] \ + and isinstance(self.args[1], ExprMem) and isinstance(self.args[1].ptr, ExprOp): # Major Opcodes #12 + # The second operand is an offset to a register + o += " " + self.arg2str(self.args[0]) + o += ", %s" % ExprInt2SignedString(self.args[1].ptr.args[1]) + o += "(%s)" % self.arg2str(self.args[1].ptr.args[0]) + + elif self.name == "SLL" and isinstance(self.args[1], ExprInt): # Major Opcodes #6 + # The second operand is displayed in hex, not in decimal + o += " " + self.arg2str(self.args[0]) + o += ", 0x%X" % self.args[1].arg + + elif self.name in ["ADD3", "SLT3"] and isinstance(self.args[2], ExprInt): + o += " %s" % self.arg2str(self.args[0]) + o += ", %s" % self.arg2str(self.args[1]) + # The third operand is displayed in decimal, not in hex + o += ", %s" % ExprInt2SignedString(self.args[2].arg, pos_fmt="0x%X") + + elif self.name == "(RI)": + return o + + else: + args = [] + if self.args: + o += " " + for i, arg in enumerate(self.args): + if not isinstance(arg, Expr): + raise ValueError('zarb arg type') + x = self.arg2str(arg, pos=i) + args.append(x) + o += self.gen_args(args) + + return o + + def breakflow(self): + """Instructions that stop a basic bloc.""" + + if self.name in ["BRA", "BEQZ", "BNEZ", "BEQI", "BNEI", "BLTI", "BGEI", "BEQ", "BNE", "BSR"]: + return True + + if self.name in ["JMP", "JSR", "RET"]: + return True + + if self.name in ["RETI", "HALT", "SLEEP"]: + return True + + return False + + def splitflow(self): + """Instructions that splits a basic bloc, i.e. the CPU can go somewhere else.""" + + if self.name in ["BEQZ", "BNEZ", "BEQI", "BNEI", "BLTI", "BGEI", "BEQ", "BNE", "BSR"]: + return True + + return False + + def dstflow(self): + """Instructions that explicitly provide the destination.""" + + if self.name in ["BRA", "BEQZ", "BNEZ", "BEQI", "BNEI", "BLTI", "BGEI", "BEQ", "BNE", "BSR"]: + return True + + if self.name in ["JMP"]: + return True + + return False + + def dstflow2label(self, loc_db): + """Set the label for the current destination. + + Note: it is used at disassembly""" + + if self.name == "JMP" and isinstance(self.args[0], ExprId): + # 'JMP RM' does not provide the destination + return + + # Compute the correct address + num = self.get_dst_num() + addr = self.args[num].arg + if not self.name == "JMP": + addr += self.offset + + # Get a new label at the address + label = loc_db.get_or_create_offset_location(addr) + + # Assign the label to the correct instruction argument + self.args[num] = ExprLoc(label, self.args[num].size) + + def get_dst_num(self): + """Get the index of the argument that points to the instruction destination.""" + + if self.name[-1] == "Z": + num = 1 + elif self.name in ["BEQI", "BNEI", "BLTI", "BGEI", "BEQ", "BNE"]: + num = 2 + else: + num = 0 + + return num + + def getdstflow(self, loc_db): + """Get the argument that points to the instruction destination.""" + + num = self.get_dst_num() + return [self.args[num]] + + def is_subcall(self): + """Instructions used to call sub functions.""" + + return self.name in ["JSR", "BSR"] + + def fixDstOffset(self): + """Fix/correct the instruction immediate according to the current offset + + Note: - it is used at assembly + - code inspired by miasm/arch/mips32/arch.py""" + + if self.name == "JMP" and isinstance(self.args[0], ExprInt): + # 'JMP IMMEDIATE' does not need to be fixed + return + + # Get the argument that needs to be fixed + if not len(self.args): + return + num = self.get_dst_num() + expr = self.args[num] + + # Check that the argument can be fixed + if self.offset is None: + raise ValueError("Symbol not resolved %s" % self.l) + if not isinstance(expr, ExprInt): + return + + # Adjust the immediate according to the current instruction offset + off = expr.arg - self.offset + if int(off % 2): + raise ValueError("Strange offset! %r" % off) + self.args[num] = ExprInt(off, 32) + + +class mep_additional_info(object): + """Additional MeP instructions information + """ + + def __init__(self): + self.except_on_instr = False + + +class mn_mep(cls_mn): + """Toshiba MeP-c4 disassembler & assembler + """ + + # Define variables that stores information used to disassemble & assemble + # Notes: - theses variables are mandatory + # - they could be moved to the cls_mn class + + num = 0 # holds the number of mnemonics + + all_mn = list() # list of mnenomnics, converted to metamn objects + + all_mn_mode = defaultdict(list) # mneomnics, converted to metamn objects + # Note: + # - the key is the mode # GV: what is it ? + # - the data is a list of mnemonics + + all_mn_name = defaultdict(list) # mnenomnics strings + # Note: + # - the key is the mnemonic string + # - the data is the corresponding + # metamn object + + all_mn_inst = defaultdict(list) # mnemonics objects + # Note: + # - the key is the mnemonic Python class + # - the data is an instantiated object + + bintree = dict() # Variable storing internal values used to guess a + # mnemonic during disassembly + + # Defines the instruction set that will be used + instruction = instruction_mep + + # Python module that stores registers information + regs = mep_regs_module + + # Default delay slot + # Note: + # - mandatory for the miasm Machine + delayslot = 0 + + # Architecture name + name = "mep" + + # PC name depending on architecture attributes (here, l or b) + pc = {'l': PC, 'b': PC} + + def additional_info(self): + """Define instruction side effects # GV: not fully understood yet + + When used, it must return an object that implements specific + variables, such as except_on_instr. + + Notes: + - it must be implemented ! + - it could be moved to the cls_mn class + """ + + return mep_additional_info() + + @classmethod + def gen_modes(cls, subcls, name, bases, dct, fields): + """Ease populating internal variables used to disassemble & assemble, such + as self.all_mn_mode, self.all_mn_name and self.all_mn_inst + + Notes: + - it must be implemented ! + - it could be moved to the cls_mn class. All miasm architectures + use the same code + + Args: + cls: ? + sublcs: + name: mnemonic name + bases: ? + dct: ? + fields: ? + + Returns: + a list of ? + + """ + + dct["mode"] = None + return [(subcls, name, bases, dct, fields)] + + @classmethod + def getmn(cls, name): + """Get the mnemonic name + + Notes: + - it must be implemented ! + - it could be moved to the cls_mn class. Most miasm architectures + use the same code + + Args: + cls: the mnemonic class + name: the mnemonic string + """ + + return name.upper() + + @classmethod + def getpc(cls, attrib=None): + """"Return the ExprId that represents the Program Counter. + + Notes: + - mandatory for the symbolic execution + - PC is defined in regs.py + + Args: + attrib: architecture dependent attributes (here, l or b) + """ + + return PC + + @classmethod + def getsp(cls, attrib=None): + """"Return the ExprId that represents the Stack Pointer. + + Notes: + - mandatory for the symbolic execution + - SP is defined in regs.py + + Args: + attrib: architecture dependent attributes (here, l or b) + """ + + return SP + + @classmethod + def getbits(cls, bitstream, attrib, start, n): + """Return an integer of n bits at the 'start' offset + + Note: code from miasm/arch/mips32/arch.py + """ + + # Return zero if zero bits are requested + if not n: + return 0 + + o = 0 # the returned value + while n: + # Get a byte, the offset is adjusted according to the endianness + offset = start // 8 # the offset in bytes + n_offset = cls.endian_offset(attrib, offset) # the adjusted offset + c = cls.getbytes(bitstream, n_offset, 1) + if not c: + raise IOError + + # Extract the bits value + c = ord(c) + r = 8 - start % 8 + c &= (1 << r) - 1 + l = min(r, n) + c >>= (r - l) + o <<= l + o |= c + n -= l + start += l + + return o + + @classmethod + def endian_offset(cls, attrib, offset): + """Adjust the byte offset according to the endianness""" + + if attrib == "l": # Little Endian + if offset % 2: + return offset - 1 + else: + return offset + 1 + + elif attrib == "b": # Big Endian + return offset + + else: + raise NotImplementedError("Bad MeP endianness") + + def value(self, mode): + """Adjust the assembled instruction based on the endianness + + Note: code inspired by miasm/arch/mips32/arch.py + """ + + # Get the candidated + candidates = super(mn_mep, self).value(mode) + + if mode == "l": + # Invert bytes per 16-bits + for i in range(len(candidates)): + tmp = candidates[i][1] + candidates[i][0] + if len(candidates[i]) == 4: + tmp += candidates[i][3] + candidates[i][2] + candidates[i] = tmp + return candidates + + elif mode == "b": + return candidates + + else: + raise NotImplementedError("Bad MeP endianness (%s)" % mode) + + +def addop(name, fields, args=None, alias=False): + """Dynamically create the "name" object + + Notes: + - it could be moved to a generic function such as: + addop(name, fields, cls_mn, args=None, alias=False). + - most architectures use the same code + + Args: + name: the mnemonic name + fields: used to fill the object.__dict__'fields' attribute # GV: not understood yet + args: used to fill the object.__dict__'fields' attribute # GV: not understood yet + alias: used to fill the object.__dict__'fields' attribute # GV: not understood yet + """ + + namespace = {"fields": fields, "alias": alias} + + if args is not None: + namespace["args"] = args + + # Dynamically create the "name" object + type(name, (mn_mep,), namespace) + + +# Define specific operand parsers & converters + +def deref2expr(s, l, parse_results): + """Convert a parsed dereferenced register to an ExprMem""" + + # Only use the first results + parse_results = parse_results[0] + + if type(parse_results[0]) == AstInt and isinstance(parse_results[2], AstId): + return AstMem(parse_results[2] + parse_results[0], 32) # 1 == "(" and 3 == ")" + + elif type(parse_results[0]) == int and isinstance(parse_results[2], AstId): + return AstMem(parse_results[2] + AstOp('-', AstInt(-parse_results[0])), 32) # 1 == "(" and 3 == ")" + + else: + return AstMem(parse_results[1], 32) # 0 == "(" and 2 == ")" + + +deref_reg_parser = Group(LPARENTHESIS + gpr_infos.parser + RPARENTHESIS).setParseAction(deref2expr) +deref_inc_reg_parser = Group(LPARENTHESIS + gpr_infos.parser + PLUSSIGN + RPARENTHESIS).setParseAction(deref2expr) +abs24_deref_parser = Group(LPARENTHESIS + HEX_INTEGER + RPARENTHESIS).setParseAction(deref2expr) +offset_deref_reg_parser = Group(HEX_INTEGER + LPARENTHESIS + gpr_infos.parser + RPARENTHESIS).setParseAction(deref2expr) + +# Define registers decoders and encoders + +class mep_arg(m_arg): + def asm_ast_to_expr(self, arg, loc_db): + """Convert AST to expressions + + Note: - code inspired by miasm/arch/mips32/arch.py""" + + if isinstance(arg, AstId): + if isinstance(arg.name, ExprId): + return arg.name + if isinstance(arg.name, str) and arg.name in gpr_names: + return None # GV: why? + loc_key = loc_db.get_or_create_name_location(arg.name.encode()) + return ExprLoc(loc_key, 32) + + elif isinstance(arg, AstMem): + addr = self.asm_ast_to_expr(arg.ptr, loc_db) + if addr is None: + return None + return ExprMem(addr, 32) + + elif isinstance(arg, AstInt): + return ExprInt(arg.value, 32) + + elif isinstance(arg, AstOp): + args = [self.asm_ast_to_expr(tmp, loc_db) for tmp in arg.args] + if None in args: + return None + return ExprOp(arg.op, *args) + + # Raise an exception if the argument was not processed + message = "mep_arg.asm_ast_to_expr(): don't know what \ + to do with a '%s' instance." % type(arg) + raise Exception(message) + +class mep_reg(reg_noarg, mep_arg): + """Generic Toshiba MeP-c4 register + + Note: + - the register size will be set using bs() + """ + reg_info = gpr_infos # the list of MeP-c4 registers defined in regs.py + parser = reg_info.parser # GV: not understood yet + + +class mep_deref_reg(mep_arg): + """Generic Toshiba MeP-c4 dereferenced register + + Note: + - the arg2str() method could be defined to change the output string + """ + parser = deref_reg_parser + + def decode(self, v): + """Transform the decoded value to a ExprMem(ExprId()) expression""" + r = gpr_infos.expr[v] # get the ExprId, i.e. the register expression + self.expr = ExprMem(r, 32) + return True + + def encode(self): + """Ensure that we have a ExprMem(ExprId()) expression, and return the + register value.""" + + if not isinstance(self.expr, ExprMem): + return False + if not isinstance(self.expr.ptr, ExprId): + return False + + # Get the ExprId index, i.e. its value + self.value = gpr_exprs.index(self.expr.ptr) + return True + + +class mep_reg_sp(mep_reg): + """Dummy Toshiba MeP-c4 register that represents SP. It is used in + instructions that implicitly use SP, such as ADD3. + """ + implicit_reg = SP + + def decode(self, v): + """Always return 'implicit_reg.""" + self.expr = self.implicit_reg + return True + + def encode(self): + """Do nothing""" + return True + + +class mep_reg_tp(mep_reg_sp): + """Dummy Toshiba MeP-c4 register that represents TP. + """ + implicit_reg = TP + + +class mep_deref_reg_offset(mep_arg): + """Toshiba MeP-c4 dereferenced register that represents SP, plus an + offset. + """ + parser = offset_deref_reg_parser + + def decode(self, v): + """Modify the decoded value using the previously decoded + register id. + """ + + # Apply the immediate mask + se = sign_ext(v & 0xFFFF, 16, 32) # GV: might not belong here + int_id = ExprInt(se, 32) + + # Get the register expression + reg_id = gpr_infos.expr[self.parent.reg04_deref.value] + + # Build the internal expression + self.expr = ExprMem(reg_id + int_id, 32) + + return True + + def encode(self): + """Modify the encoded value. One part is stored in this object, and + the other one in reg04_deref. + """ + + # Verify the expression + if not isinstance(self.expr, ExprMem): + return False + if not isinstance(self.expr.ptr, ExprOp): + return False + + # Get the integer and check the upper bound + v = int(self.expr.ptr.args[1].arg & 0xFFFF) + + # Encode the values + self.parent.reg04_deref.value = gpr_exprs.index(self.expr.ptr.args[0]) + self.value = v & 0xFFFF + return True + + +class mep_deref_sp_offset(mep_deref_reg): + """Dummy Toshiba MeP-c4 dereferenced register that represents SP, plus an + offset. + Note: it is as generic as possible to ease its use in different instructions + """ + implicit_reg = SP + parser = offset_deref_reg_parser + + def decode(self, v): + """Modify the decoded value using the previously decoded + immediate. + """ + + immediate = None + if getattr(self.parent, "imm7_align4", False): + # Apply the immediate mask + v = self.parent.imm7_align4.value & 0x1F + + # Shift value such as: + # imm7=iii_ii||00 + immediate = v << 2 + + elif getattr(self.parent, "imm7", False): + # Apply the immediate mask + immediate = self.parent.imm7.value & 0x7F + + elif getattr(self.parent, "disp7_align2", False): + # Apply the immediate mask + disp7_align2 = self.parent.disp7_align2.value & 0x3F + + # Shift value such as: + # disp7 = ddd_ddd||0 + immediate = disp7_align2 << 1 + + if immediate is not None: + self.expr = ExprMem(self.implicit_reg + ExprInt(immediate, 32), 32) + return True + else: + return False + + def encode(self): + """Modify the encoded value. One part is stored in this object, and + the other one in a parent immediate. + """ + + # Verify the expression + if not isinstance(self.expr, ExprMem): + return False + if not isinstance(self.expr.ptr, ExprOp): + return False + if self.expr.ptr.args[0] != self.implicit_reg: + return False + + if getattr(self.parent, "imm7_align4", False): + + # Get the integer and check the upper bound + v = int(self.expr.ptr.args[1].arg) + if v > 0x80: + return False + + # Encode the value + self.parent.imm7_align4.value = v >> 2 + + return True + + elif getattr(self.parent, "imm7", False): + + # Get the integer and check the upper bound + v = int(self.expr.ptr.args[1].arg) + if v > 0x80: + return False + + # Encode the value + self.parent.imm7.value = v + + return True + + elif getattr(self.parent, "disp7_align2", False): + + # Get the integer and check the upper bound + v = int(self.expr.ptr.args[1].arg) + if v > 0x80: + return False + + # Encode the value + self.parent.disp7_align2.value = v >> 1 + + return True + + return False + + +class mep_deref_tp_offset(mep_deref_sp_offset): + """Dummy Toshiba MeP-c4 dereferenced register that represents TP, plus an + offset. + """ + implicit_reg = TP + + +class mep_copro_reg(reg_noarg, mep_arg): + """Generic Toshiba MeP-c4 coprocessor register + """ + reg_info = copro_gpr_infos # the list of MeP-c4 coprocessor registers defined in regs.py + parser = reg_info.parser # GV: not understood yet + + +class mep_copro_reg_split(mep_copro_reg): + """Generic Toshiba MeP-c4 coprocessor register encode into different fields + """ + + def decode(self, v): + """Modify the decoded value using the previously decoded imm4_noarg. + """ + + # Apply the immediate mask + v = v & self.lmask + + # Shift values such as: + # CRn=NNnnnn + crn = (v << 4) + (self.parent.imm4.value & 0xF) + + # Build the internal expression + self.expr = ExprId("C%d" % crn, 32) + return True + + def encode(self): + """Modify the encoded value. One part is stored in this object, and + the other one in imm4_noarg. + """ + + if not isinstance(self.expr, ExprId): + return False + + # Get the register and check the upper bound + reg_name = self.expr.name + if reg_name[0] != "C": + return False + reg_value = copro_gpr_names.index(reg_name) + if reg_value > 0x3f: + return False + + # Encode the value into two parts + self.parent.imm4.value = (reg_value & 0xF) + self.value = (reg_value >> 4) & 0x3 + return True + + +class mep_deref_inc_reg(mep_deref_reg): + """Generic Toshiba MeP-c4 coprocess dereferenced & incremented register + """ + parser = deref_inc_reg_parser + + +# Immediate decoders and encoders + +class mep_int32_noarg(int32_noarg): + """Generic Toshiba MeP-c4 signed immediate + + Note: encode() is copied from int32_noarg.encode() and modified to allow + small (< 32 bits) signed immediate to be manipulated. + + """ + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = int(self.expr.arg) + # Note: the following lines were commented on purpose + #if sign_ext(v & self.lmask, self.l, self.intsize) != v: + # return False + v = self.encodeval(v & self.lmask) + self.value = v & self.lmask + return True + + +class mep_imm(imm_noarg, mep_arg): + """Generic Toshiba MeP-c4 immediate + + Note: + - the immediate size will be set using bs() + """ + parser = base_expr + + +class mep_imm6(mep_int32_noarg): + """Toshiba MeP-c4 signed 6 bits immediate.""" + parser = base_expr + intsize = 6 + intmask = (1 << intsize) - 1 + int2expr = lambda self, x: ExprInt(sign_ext(x, self.l, 32), 32) + + +class mep_imm8(mep_int32_noarg): + """Toshiba MeP-c4 signed 8 bits immediate.""" + parser = base_expr + intsize = 8 + intmask = (1 << intsize) - 1 + int2expr = lambda self, x: ExprInt(sign_ext(x, self.l, 32), 32) + + +class mep_imm16(mep_int32_noarg): + """Toshiba MeP-c4 16 bits immediate.""" + parser = base_expr + intsize = 16 + intmask = (1 << intsize) - 1 + int2expr = lambda self, x: ExprInt(x, 32) + + +class mep_imm16_signed(mep_int32_noarg): + """Toshiba MeP-c4 signed 16 bits immediate.""" + parser = base_expr + intsize = 16 + intmask = (1 << intsize) - 1 + int2expr = lambda self, x: ExprInt(sign_ext(x, self.l, 32), 32) + + +class mep_target24(mep_imm): + """Toshiba MeP-c4 target24 immediate, as used in JMP + """ + + def decode(self, v): + """Modify the decoded value using the previously decoded imm7. + """ + + # Apply the immediate mask + v = v & self.lmask + + # Shift values such as: + # target24=tttt_tttt_tttt_tttt||TTT_TTTT||0 + target24 = (v << 8) + ((self.parent.imm7.value & 0x7F) << 1) + + # Build the internal expression + self.expr = ExprInt(target24, 32) + return True + + def encode(self): + """Modify the encoded value. One part is stored in this object, and + the other one in imm7. + """ + + if not isinstance(self.expr, ExprInt): + return False + + # Get the integer and apply a mask + v = int(self.expr.arg) & 0x00FFFFFF + + # Encode the value into two parts + self.parent.imm7.value = (v & 0xFF) >> 1 + self.value = v >> 8 + return True + + +class mep_target24_signed(mep_target24): + """Toshiba MeP-c4 target24 signed immediate, as used in BSR + """ + + def decode(self, v): + """Perform sign extension + """ + + mep_target24.decode(self, v) + v = int(self.expr.arg) + self.expr = ExprInt(sign_ext(v, 24, 32), 32) + + return True + + +class mep_code20(mep_imm): + """Toshiba MeP-c4 code20 immediate, as used in DSP1 + """ + + def decode(self, v): + """Modify the decoded value using the previously decoded imm4_noarg. + """ + + # Apply the immediate mask + v = v & self.lmask + + # Shift values such as: + # code20=mmmm_cccc_cccc_cccc_cccc + code20 = v + ((self.parent.imm4.value & 0xFF) << 16) + + # Build the internal expression + self.expr = ExprInt(code20, 32) + return True + + def encode(self): + """Modify the encoded value. One part is stored in this object, and + the other one in imm4_noarg. + """ + + if not isinstance(self.expr, ExprInt): + return False + + # Get the integer and check the upper bound + v = int(self.expr.arg) + if v > 0xffffff: + return False + + # Encode the value into two parts + self.parent.imm4 = ((v >> 16) & 0xFF) + self.value = v + return True + + +class mep_code24(mep_imm): + """Toshiba MeP-c4 code24 immediate, as used in CP + """ + + def decode(self, v): + """Modify the decoded value using the previously decoded imm8_CCCC_CCCC. + """ + + # Shift values such as: + # code24=CCCC_CCCC||cccc_cccc_cccc_cccc + code24 = v + ((self.parent.imm8_CCCC_CCCC.value & 0xFF) << 16) + + # Build the internal expression + self.expr = ExprInt(code24, 32) + return True + + def encode(self): + """Modify the encoded value. One part is stored in this object, and + the other one in imm8_CCCC_CCCC. + """ + + if not isinstance(self.expr, ExprInt): + return False + + # Get the integer and check the upper bound + v = int(self.expr.arg) + if v > 0xFFFFFF: + return False + + # Encode the value into two parts + self.parent.imm8_CCCC_CCCC.value = ((v >> 16) & 0xFF) + self.value = v & 0xFFFF + return True + + +class mep_imm7_align4(mep_imm): + """Toshiba MeP-c4 imm7.align4 immediate, as used in Major #4 opcodes + """ + + def decode(self, v): + """Modify the decoded value. + """ + + # Apply the immediate mask + v = v & self.lmask + + # Shift value such as: + # imm7=iii_ii||00 + imm7_align4 = v << 2 + + # Build the internal expression + self.expr = ExprInt(imm7_align4, 32) + return True + + def encode(self): + """Modify the encoded value. + """ + + if not isinstance(self.expr, ExprInt): + return False + + # Get the integer and check the upper bound + v = int(self.expr.arg) + if v > 0x80: + return False + + # Encode the value + self.value = v >> 2 + return True + + +class mep_imm5_Iiiii (mep_imm): + """Toshiba MeP-c4 imm5 immediate, as used in STC & LDC. It encodes a + control/special register. + """ + + reg_info = csr_infos # the list of MeP-c4 control/special registers defined in regs.py + parser = reg_info.parser # GV: not understood yet + + def decode(self, v): + """Modify the decoded value using the previously decoded imm4_iiii + """ + + # Apply the immediate mask + I = v & self.lmask + + # Shift values such as: + # imm5=I||iiii + imm5 = (I << 4) + (self.parent.imm4_iiii.value & 0xF) + + # Build the internal register expression + self.expr = ExprId(csr_names[imm5], 32) + return True + + def encode(self): + """Modify the encoded value. One part is stored in this object, and + the other one in imm4_iiii. + """ + + if not isinstance(self.expr, ExprId): + return False + + # Get the register number and check the upper bound + v = csr_names.index(self.expr.name) + if v > 0x1F: + return False + + # Encode the value into two parts + self.parent.imm4_iiii.value = v & 0xF # iiii + self.value = (v >> 4) & 0b1 # I + return True + + +class mep_disp7_align2(mep_imm): + """Toshiba MeP-c4 disp7.align2 immediate, as used in Major #8 opcodes + """ + upper_bound = 0x7F + bits_shift = 1 + + def decode(self, v): + """Modify the decoded value. + """ + + # Apply the immediate mask + v = v & self.lmask + + # Shift value such as: + # disp7 = ddd_ddd||0 + disp7_align2 = (v << self.bits_shift) + + # Sign extension + disp7_align2 = sign_ext(disp7_align2, self.l + self.bits_shift, 32) + + # Build the internal expression + self.expr = ExprInt(disp7_align2, 32) + return True + + def encode(self): + """Modify the encoded value. + """ + + if not isinstance(self.expr, ExprInt): + return False + + # Get the integer + v = int(self.expr.arg) & self.upper_bound + + # Encode the value + self.value = (v >> self.bits_shift) & self.upper_bound + self.value = (v & self.upper_bound) >> self.bits_shift + return True + + +class mep_disp8_align2(mep_disp7_align2): + upper_bound = 0xFF + + +class mep_disp8_align4(mep_disp7_align2): + upper_bound = 0xFF + bits_shift = 2 + + +class mep_imm8_align8(mep_disp7_align2): + upper_bound = 0xFF + bits_shift = 3 + + +class mep_disp12_align2(mep_disp7_align2): + upper_bound = 0xFFF + + +class mep_disp12_align2_signed(mep_disp12_align2): + + def decode(self, v): + """Perform sign extension. + """ + mep_disp12_align2.decode(self, v) + v = int(self.expr.arg) + + self.expr = ExprInt(sign_ext(v, 12, 32), 32) + return True + + +class mep_disp17(mep_disp7_align2): + upper_bound = 0x1FFFF + + +class mep_imm24(mep_imm): + """Toshiba MeP-c4 imm24 immediate, as used in MOVU + """ + + def decode(self, v): + """Modify the decoded value. + """ + + # Apply the immediate mask + v = v & self.lmask + + # Shift values such as: + # imm24=iiii_iiii_iiii_iiii||IIII_IIIII + imm24 = ((v & 0xFFFF) << 8) + ((v & 0xFF0000) >> 16) + + # Build the internal expression + self.expr = ExprInt(imm24, 32) + return True + + def encode(self): + """Modify the encoded value. + """ + + if not isinstance(self.expr, ExprInt): + return False + + # Get the integer and check the upper bound + v = int(self.expr.arg) + if v > 0xFFFFFF: + return False + + # Encode the value + self.value = ((v & 0xFFFF00) >> 8) + ((v & 0xFF) << 16) + return True + + +class mep_abs24(mep_imm): + """Toshiba MeP-c4 abs24 immediate + """ + parser = abs24_deref_parser + + def decode(self, v): + """Modify the decoded value using the previously decoded imm6. + """ + + # Apply the immediate mask + v = v & self.lmask + + # Shift values such as: + # abs24=dddd_dddd_dddd_dddd||DDDD_DD||00 + abs24 = (v << 8) + ((self.parent.imm6.value & 0x3F) << 2) + + # Build the internal expression + self.expr = ExprMem(ExprInt(abs24, 32), 32) + return True + + def encode(self): + """Modify the encoded value. One part is stored in this object, and + the other one in imm6. + """ + + if not (isinstance(self.expr, ExprMem) and isinstance(self.expr.ptr, ExprInt)): + return False + + # Get the integer and check the upper bound + v = int(self.expr.ptr.arg) + if v > 0xffffff: + return False + + # Encode the value into two parts + self.parent.imm6.value = (v & 0xFF) >> 2 + self.value = v >> 8 + return True + + +# Define MeP-c4 assembly operands + +reg04 = bs(l=4, # length in bits + cls=(mep_reg, )) # class implementing decoding & encoding + +reg04_l = bs(l=4, cls=(mep_reg, )) + +reg04_m = bs(l=4, cls=(mep_reg, )) + +reg04_n = bs(l=4, cls=(mep_reg, )) + +reg00 = bs(l=0, cls=(mep_reg, )) + +reg00_sp = bs(l=0, cls=(mep_reg_sp, )) + +reg00_tp = bs(l=0, cls=(mep_reg_tp, )) + +reg00_deref_sp = bs(l=0, cls=(mep_deref_sp_offset, )) + +reg00_deref_tp = bs(l=0, cls=(mep_deref_tp_offset, )) + +reg03 = bs(l=3, cls=(mep_reg, )) + +reg04_deref = bs(l=4, cls=(mep_deref_reg,)) + +reg04_deref_noarg = bs(l=4, fname="reg04_deref") + +reg04_inc_deref = bs(l=4, cls=(mep_deref_inc_reg,)) + +copro_reg04 = bs(l=4, cls=(mep_copro_reg,)) + +copro_reg05 = bs(l=1, cls=(mep_copro_reg_split,)) + +copro_reg06 = bs(l=2, cls=(mep_copro_reg_split,)) + +disp2 = bs(l=2, cls=(mep_imm, )) + +imm2 = disp2 + +imm3 = bs(l=3, cls=(mep_imm, )) + +imm4 = bs(l=4, cls=(mep_imm, )) + +imm4_noarg = bs(l=4, fname="imm4") + +imm4_iiii_noarg = bs(l=4, fname="imm4_iiii") + +imm5 = bs(l=5, cls=(mep_imm, )) + +imm5_Iiiii = bs(l=1, cls=(mep_imm5_Iiiii, )) # it is not an immediate, but a + # control/special register. + +imm6 = bs(l=6, cls=(mep_imm6, mep_arg)) + +imm6_noarg = bs(l=6, fname="imm6") + +imm7 = bs(l=7, cls=(mep_imm, )) + +imm7_noarg = bs(l=7, fname="imm7") # Note: + # - will be decoded as a 7 bits immediate + # - fname is used to set the operand name + # used in mep_target24 to merge operands + # values. By default, the bs class fills + # fname with an hex string compute from + # arguments passed to __init__ + +imm7_align4 = bs(l=5, cls=(mep_imm7_align4,)) + +imm7_align4_noarg = bs(l=5, fname="imm7_align4") + +disp7_align2 = bs(l=6, cls=(mep_disp7_align2,)) + +disp7_align2_noarg = bs(l=6, fname="disp7_align2") + +imm8 = bs(l=8, cls=(mep_imm8, mep_arg)) + +imm8_noarg = bs(l=8, fname="imm8_CCCC_CCCC") + +disp8 = bs(l=7, cls=(mep_disp8_align2, )) + +imm8_align2 = bs(l=7, cls=(mep_disp8_align2, )) + +imm8_align4 = bs(l=6, cls=(mep_disp8_align4, )) + +imm8_align8 = bs(l=5, cls=(mep_imm8_align8, )) + +imm12 = bs(l=12, cls=(mep_imm, )) + +disp12_signed = bs(l=11, cls=(mep_disp12_align2_signed, )) + +imm16 = bs(l=16, cls=(mep_imm16, mep_arg)) +imm16_signed = bs(l=16, cls=(mep_imm16_signed, mep_arg)) + +disp16_reg_deref = bs(l=16, cls=(mep_deref_reg_offset,)) + +disp17 = bs(l=16, cls=(mep_disp17, )) + +imm18 = bs(l=19, cls=(mep_imm, )) + +imm_code20 = bs(l=16, cls=(mep_code20, )) + +imm24 = bs(l=24, cls=(mep_imm24, )) + +imm_target24 = bs(l=16, cls=(mep_target24, )) +imm_target24_signed = bs(l=16, cls=(mep_target24_signed, )) + +imm_code24 = bs(l=16, cls=(mep_code24, )) + +abs24 = bs(l=16, cls=(mep_abs24, )) + + +# MeP-c4 mnemonics objects + +### + +# MOV Rn,Rm - 0000_nnnn_mmmm_0000 +addop("MOV", [bs("0000"), reg04, reg04, bs("0000")]) + +# NEG Rn,Rm - 0000_nnnn_mmmm_0001 +addop("NEG", [bs("0000"), reg04, reg04, bs("0001")]) + +# SLT3 R0,Rn,Rm - 0000_nnnn_mmmm_0010 +addop("SLT3", [bs("0000"), reg00, reg04, reg04, bs("0010")]) + +# SLTU3 R0,Rn,Rm - 0000_nnnn_mmmm_0011 +addop("SLTU3", [bs("0000"), reg00, reg04, reg04, bs("0011")]) + +# SUB Rn,Rm - 0000_nnnn_mmmm_0100 +addop("SUB", [bs("0000"), reg04, reg04, bs("0100")]) + +# SBVCK3 R0,Rn,Rm - 0000_nnnn_mmmm_0101 +addop("SBVCK3", [bs("0000"), reg00, reg04, reg04, bs("0101")]) + +# (RI) - 0000_xxxx_xxxx_0110 +addop("(RI)", [bs("0000"), reg04, reg04, bs("0110")]) + +# ADVCK3 R0,Rn,Rm - 0000_nnnn_mmmm_0111 +addop("ADVCK3", [bs("0000"), reg00, reg04, reg04, bs("0111")]) + +# SB Rn,(Rm) - 0000_nnnn_mmmm_1000 +addop("SB", [bs("0000"), reg04, reg04_deref, bs("1000")]) + +# SH Rn,(Rm) - 0000_nnnn_mmmm_1001 +addop("SH", [bs("0000"), reg04, reg04_deref, bs("1001")]) + +# SW Rn,(Rm) - 0000_nnnn_mmmm_1010 +addop("SW", [bs("0000"), reg04, reg04_deref, bs("1010")]) + +# LBU Rn,(Rm) - 0000_nnnn_mmmm_1011 +addop("LBU", [bs("0000"), reg04, reg04_deref, bs("1011")]) + +# LB Rn,(Rm) - 0000_nnnn_mmmm_1100 +addop("LB", [bs("0000"), reg04, reg04_deref, bs("1100")]) + +# LH Rn,(Rm) - 0000_nnnn_mmmm_1101 +addop("LH", [bs("0000"), reg04, reg04_deref, bs("1101")]) + +# LW Rn,(Rm) - 0000_nnnn_mmmm_1110 +addop("LW", [bs("0000"), reg04, reg04_deref, bs("1110")]) + +# LHU Rn,(Rm) - 0000_nnnn_mmmm_1111 +addop("LHU", [bs("0000"), reg04, reg04_deref, bs("1111")]) + + +### + +# OR Rn,Rm - 0001_nnnn_mmmm_0000 +addop("OR", [bs("0001"), reg04, reg04, bs("0000")]) + +# AND Rn,Rm - 0001_nnnn_mmmm_0001 +addop("AND", [bs("0001"), reg04, reg04, bs("0001")]) + +# XOR Rn,Rm - 0001_nnnn_mmmm_0010 +addop("XOR", [bs("0001"), reg04, reg04, bs("0010")]) + +# NOR Rn,Rm - 0001_nnnn_mmmm_0011 +addop("NOR", [bs("0001"), reg04, reg04, bs("0011")]) + +# MUL Rn,Rm - 0001_nnnn_mmmm_0100 +addop("MUL", [bs("0001"), reg04, reg04, bs("0100")]) + +# MULU Rn,Rm - 0001_nnnn_mmmm_0101 +addop("MULU", [bs("0001"), reg04, reg04, bs("0101")]) + +# MULR Rn,Rm - 0001_nnnn_mmmm_0110 +addop("MULR", [bs("0001"), reg04, reg04, bs("0110")]) + +# MULRU Rn,Rm - 0001_nnnn_mmmm_0111 +addop("MULRU", [bs("0001"), reg04, reg04, bs("0111")]) + +# DIV Rn,Rm - 0001_nnnn_mmmm_1000 +addop("DIV", [bs("0001"), reg04, reg04, bs("1000")]) + +# DIVU Rn,Rm - 0001_nnnn_mmmm_1001 +addop("DIVU", [bs("0001"), reg04, reg04, bs("1001")]) + +# (RI) - 0001_xxxx_xxxx_1010 +addop("(RI)", [bs("0001"), reg04, reg04, bs("1010")]) + +# (RI) - 0001_xxxx_xxxx_1011 +addop("(RI)", [bs("0001"), reg04, reg04, bs("1011")]) + +# SSARB disp2(Rm) - 0001_00dd_mmmm_1100 +addop("SSARB", [bs("000100"), disp2, reg04_deref, bs("1100")]) + +# EXTB Rn - 0001_nnnn_0000_1101 +addop("EXTB", [bs("0001"), reg04, bs("00001101")]) + +# EXTH Rn - 0001_nnnn_0010_1101 +addop("EXTH", [bs("0001"), reg04, bs("00101101")]) + +# EXTUB Rn - 0001_nnnn_1000_1101 +addop("EXTUB", [bs("0001"), reg04, bs("10001101")]) + +# EXTUH Rn - 0001_nnnn_1010_1101 +addop("EXTUH", [bs("0001"), reg04, bs("10101101")]) + +# JMP Rm - 0001_0000_mmmm_1110 +addop("JMP", [bs("00010000"), reg04, bs("1110")]) + +# JSR Rm - 0001_0000_mmmm_1111 +addop("JSR", [bs("00010000"), reg04, bs("1111")]) + +# JSRV Rm - 0001_1000_mmmm_1111 +addop("JSRV", [bs("00011000"), reg04, bs("1111")]) + + +### + +# BSETM (Rm),imm3 - 0010_0iii_mmmm_0000 +addop("BSETM", [bs("00100"), imm3, reg04_deref, bs("0000")], [reg04_deref, imm3]) + +# BCLRM (Rn),imm3 - 0010_0iii_mmmm_0001 +addop("BCLRM", [bs("00100"), imm3, reg04_deref, bs("0001")], [reg04_deref, imm3]) + +# BNOTM (Rm),imm3 - 0010_0iii_mmmm_0010 +addop("BNOTM", [bs("00100"), imm3, reg04_deref, bs("0010")], [reg04_deref, imm3]) + +# BTSTM R0,(Rm),imm3 - 0010_0iii_mmmm_0011 +addop("BTSTM", [bs("00100"), reg00, imm3, reg04_deref, bs("0011")], [reg00, reg04_deref, imm3]) + +# TAS Rn,(Rm) - 0010_nnnn_mmmm_0100 +addop("TAS", [bs("0010"), reg04, reg04_deref, bs("0100")]) + +# (RI) - 0010_xxxx_xxxx_0101 +addop("(RI)", [bs("0010"), reg04, reg04, bs("0101")]) + +# SL1AD3 R0,Rn,Rm - 0010_nnnn_mmmm_0110 +addop("SL1AD3", [bs("0010"), reg00, reg04, reg04, bs("0110")]) + +# SL2AD3 R0,Rn,Rm - 0010_nnnn_mmmm_0111 +addop("SL2AD3", [bs("0010"), reg00, reg04, reg04, bs("0111")]) + +# (RI) - 0010_xxxx_xxxx_1000 +addop("(RI)", [bs("0010"), reg04, reg04, bs("1000")]) + +# (RI) - 0010_xxxx_xxxx_1001 +addop("(RI)", [bs("0010"), reg04, reg04, bs("1001")]) + +# (RI) - 0010_xxxx_xxxx_1010 +addop("(RI)", [bs("0010"), reg04, reg04, bs("1010")]) + +# (RI) - 0010_xxxx_xxxx_1011 +addop("(RI)", [bs("0010"), reg04, reg04, bs("1011")]) + +# SRL Rn,Rm - 0010_nnnn_mmmm_1100 +addop("SRL", [bs("0010"), reg04, reg04, bs("1100")]) + +# SRA Rn,Rm - 0010_nnnn_mmmm_1101 +addop("SRA", [bs("0010"), reg04, reg04, bs("1101")]) + +# SLL Rn,Rm - 0010_nnnn_mmmm_1110 +addop("SLL", [bs("0010"), reg04, reg04, bs("1110")]) + +# FSFT Rn,Rm - 0010_nnnn_mmmm_1111 +addop("FSFT", [bs("0010"), reg04, reg04, bs("1111")]) + + +### + +# SWCPI CRn,(Rm+) - 0011_nnnn_mmmm_0000 +addop("SWCPI", [bs("0011"), copro_reg04, reg04_inc_deref, bs("0000")]) + +# LWCPI CRn,(Rm+) - 0011_nnnn_mmmm_0001 +addop("LWCPI", [bs("0011"), copro_reg04, reg04_inc_deref, bs("0001")]) + +# SMCPI CRn,(Rm+) - 0011_nnnn_mmmm_0010 +addop("SMCPI", [bs("0011"), copro_reg04, reg04_inc_deref, bs("0010")]) + +# LMCPI CRn,(Rm+) - 0011_nnnn_mmmm_0011 +addop("LMCPI", [bs("0011"), copro_reg04, reg04_inc_deref, bs("0011")]) + +# SWCP CRn,(Rm) - 0011_nnnn_mmmm_1000 +addop("SWCP", [bs("0011"), copro_reg04, reg04_deref, bs("1000")]) + +# LWCP CRn,(Rm) - 0011_nnnn_mmmm_1001 +addop("LWCP", [bs("0011"), copro_reg04, reg04_deref, bs("1001")]) + +# SMCP CRn,(Rm) - 0011_nnnn_mmmm_1010 +addop("SMCP", [bs("0011"), copro_reg04, reg04_deref, bs("1010")]) + +# LMCP CRn,(Rm) - 0011_nnnn_mmmm_1011 +addop("LMCP", [bs("0011"), copro_reg04, reg04_deref, bs("1011")]) + + +### + +# ADD3 Rn,SP,imm7.align4 - 0100_nnnn_0iii_ii00 +addop("ADD3", [bs("0100"), reg04, reg00_sp, bs("0"), imm7_align4, bs("00")]) + +# SW Rn,disp7.align4(SP) - 0100_nnnn_0ddd_dd10 +# Note: disp7.align4 is the same as imm7.align4 +addop("SW", [bs("0100"), reg04, bs("0"), imm7_align4_noarg, reg00_deref_sp, bs("10")]) + +# LW Rn,disp7.align4(SP) - 0100_nnnn_0ddd_dd11 +addop("LW", [bs("0100"), reg04, bs("0"), imm7_align4_noarg, reg00_deref_sp, bs("11")]) + +# SW Rn[0-7],disp7.align4(TP) - 0100_0nnn_1ddd_dd10 +addop("SW", [bs("01000"), reg03, bs("1"), imm7_align4_noarg, reg00_deref_tp, bs("10")]) + +# LW Rn[0-7],disp7.align4(TP) - 0100_0nnn_1ddd_dd11 +addop("LW", [bs("01000"), reg03, bs("1"), imm7_align4_noarg, reg00_deref_tp, bs("11")]) + +# LBU Rn[0-7],disp7(TP) - 0100_1nnn_1ddd_dddd +addop("LBU", [bs("01001"), reg03, bs("1"), imm7_noarg, reg00_deref_tp], [reg03, reg00_deref_tp]) + +### + +# MOV Rn,imm8 - 0101_nnnn_iiii_iiii +addop("MOV", [bs("0101"), reg04, imm8]) + + +### + +# ADD Rn,imm6 - 0110_nnnn_iiii_ii00 +addop("ADD", # mnemonic name + [bs("0110"), reg04, imm6, bs("00")]) # mnemonic description + +# SLT3 R0,Rn,imm5 - 0110_nnnn_iiii_i001 +addop("SLT3", [bs("0110"), reg00, reg04, imm5, bs("001")]) + +# SRL Rn,imm5 - 0110_nnnn_iiii_i010 +addop("SRL", [bs("0110"), reg04, imm5, bs("010")]) + +# SRA Rn,imm5 - 0110_nnnn_iiii_i011 +addop("SRA", [bs("0110"), reg04, imm5, bs("011")]) + +# SLTU3 R0,Rn,imm5 - 0110_nnnn_iiii_i101 +addop("SLTU3", [bs("0110"), reg00, reg04, imm5, bs("101")]) + +# SLL Rn,imm5 - 0110_nnnn_iiii_i110 +addop("SLL", [bs("0110"), reg04, imm5, bs("110")]) + +# SLL3 R0,Rn,imm5 - 0110_nnnn_iiii_i111 +addop("SLL3", [bs("0110"), reg00, reg04, imm5, bs("111")]) + + +### + +# DI - 0111_0000_0000_0000 +addop("DI", [bs("0111000000000000")]) + +# EI - 0111_0000_0001_0000 +addop("EI", [bs("0111000000010000")]) + +# SYNCM - 0111_0000_0001_0001 +addop("SYNCM", [bs("0111000000010001")]) + +# SYNCCP - 0111_0000_0010_0001 +addop("SYNCCP", [bs("0111000000100001")]) + +# RET - 0111_0000_0000_0010 +addop("RET", [bs("0111000000000010")]) + +# RETI - 0111_0000_0001_0010 +addop("RETI", [bs("0111000000010010")]) + +# HALT - 0111_0000_0010_0010 +addop("HALT", [bs("0111000000100010")]) + +# BREAK - 0111_0000_0011_0010 +addop("BREAK", [bs("0111000000110010")]) + +# SLEEP - 0111_0000_0110_0010 +addop("SLEEP", [bs("0111000001100010")]) + +# DRET - 0111_0000_0001_0011 +addop("DRET", [bs("0111000000010011")]) + +# DBREAK - 0111_0000_0011_0011 +addop("DBREAK", [bs("0111000000110011")]) + +# CACHE imm4,(Rm) - 0111_iiii_mmmm_0100 +addop("CACHE", [bs("0111"), imm4, reg04_deref, bs("0100")]) + +# (RI) - 0111_xxxx_xxxx_0101 +addop("(RI)", [bs("0111"), reg04, reg04, bs("0101")]) + +# SWI imm2 - 0111_0000_00ii_0110 +addop("SWI", [bs("0111000000"), imm2, bs("0110")]) + +# (RI) - 0111_xxxx_xxxx_0111 +addop("(RI)", [bs("0111"), reg04, reg04, bs("0111")]) + +# STC Rn,imm5 - 0111_nnnn_iiii_100I +addop("STC", [bs("0111"), reg04, imm4_iiii_noarg, bs("100"), imm5_Iiiii]) + +# LDC Rn,imm5 - 0111_nnnn_iiii_101I +addop("LDC", [bs("0111"), reg04, imm4_iiii_noarg, bs("101"), imm5_Iiiii]) + +# (RI) - 0111_xxxx_xxxx_1100 +addop("(RI)", [bs("0111"), reg04, reg04, bs("1100")]) + +# (RI) - 0111_xxxx_xxxx_1101 +addop("(RI)", [bs("0111"), reg04, reg04, bs("1101")]) + +# (RI) - 0111_xxxx_xxxx_1110 +addop("(RI)", [bs("0111"), reg04, reg04, bs("1110")]) + +# (RI) - 0111_xxxx_xxxx_1111 +addop("(RI)", [bs("0111"), reg04, reg04, bs("1111")]) + + +### + +# SB Rn[0-7],disp7(TP) - 1000_0nnn_0ddd_dddd +addop("SB", [bs("10000"), reg03, bs("0"), imm7_noarg, reg00_deref_tp]) + +# SH Rn[0-7],disp7.align2(TP) - 1000_0nnn_1ddd_ddd0 +# (disp7.align2 = ddd_ddd||0) +addop("SH", [bs("10000"), reg03, bs("1"), disp7_align2_noarg, bs("0"), reg00_deref_tp]) + +# LB Rn[0-7],disp7(TP) - 1000_1nnn_0ddd_dddd +addop("LB", [bs("10001"), reg03, bs("0"), imm7_noarg, reg00_deref_tp]) + +# LH Rn[0-7],disp7.align2(TP) - 1000_1nnn_1ddd_ddd0 +addop("LH", [bs("10001"), reg03, bs("1"), disp7_align2_noarg, bs("0"), reg00_deref_tp]) + +# LHU Rn[0-7],disp7.align2(TP) - 1000_1nnn_1ddd_ddd1 +addop("LHU", [bs("10001"), reg03, bs("1"), disp7_align2_noarg, bs("1"), reg00_deref_tp]) + + +### + +# ADD3 Rl,Rn,Rm - 1001_nnnn_mmmm_llll +addop("ADD3", [bs("1001"), reg04_n, reg04_m, reg04_l], [reg04_l, reg04_n, reg04_m]) + + +### + +# BEQZ Rn,disp8.align2 - 1010_nnnn_dddd_ddd0 +# (disp8=dddd_ddd||0) +addop("BEQZ", [bs("1010"), reg04, disp8, bs("0")]) + +# BNEZ Rn,disp8.align2 - 1010_nnnn_dddd_ddd1 +addop("BNEZ", [bs("1010"), reg04, disp8, bs("1")]) + + +### + +# BRA disp12.align2 - 1011_dddd_dddd_ddd0 +# (disp12=dddd_dddd_ddd||0) +addop("BRA", [bs("1011"), disp12_signed, bs("0")]) + +# BSR disp12.align2 - 1011_dddd_dddd_ddd1 +addop("BSR", [bs("1011"), disp12_signed, bs("1")]) + + +### + +# ADD3 Rn,Rm,imm16 - 1100_nnnn_mmmm_0000 iiii_iiii_iiii_iiii +addop("ADD3", [bs("1100"), reg04, reg04, bs("0000"), imm16_signed]) + +# MOV Rn,imm16 - 1100_nnnn_0000_0001 iiii_iiii_iiii_iiii +addop("MOV", [bs("1100"), reg04, bs("00000001"), imm16]) + +# MOVU Rn,imm16 - 1100_nnnn_0001_0001 iiii_iiii_iiii_iiii +addop("MOVU", [bs("1100"), reg04, bs("00010001"), imm16]) + +# MOVH Rn,imm16 - 1100_nnnn_0010_0001 iiii_iiii_iiii_iiii +addop("MOVH", [bs("1100"), reg04, bs("00100001"), imm16]) + +# SLT3 Rn,Rm,imm16 - 1100_nnnn_mmmm_0010 iiii_iiii_iiii_iiii +addop("SLT3", [bs("1100"), reg04, reg04, bs("0010"), imm16_signed]) + +# SLTU3 Rn,Rm,imm16 - 1100_nnnn_mmmm_0011 iiii_iiii_iiii_iiii +addop("SLTU3", [bs("1100"), reg04, reg04, bs("0011"), imm16]) + +# OR3 Rn,Rm,imm16 - 1100_nnnn_mmmm_0100 iiii_iiii_iiii_iiii +addop("OR3", [bs("1100"), reg04, reg04, bs("0100"), imm16]) + +# AND3 Rn,Rm,imm16 - 1100_nnnn_mmmm_0101 iiii_iiii_iiii_iiii +addop("AND3", [bs("1100"), reg04, reg04, bs("0101"), imm16]) + +# XOR3 Rn,Rm,imm16 - 1100_nnnn_mmmm_0110 iiii_iiii_iiii_iiii +addop("XOR3", [bs("1100"), reg04, reg04, bs("0110"), imm16]) + +# (RI) - 1100_xxxx_xxxx_0111 xxxx_xxxx_xxxx_xxxx +addop("(RI)", [bs("1100"), imm8, bs("0111"), imm16]) + +# SB Rn,disp16(Rm) - 1100_nnnn_mmmm_1000 dddd_dddd_dddd_dddd +addop("SB", [bs("1100"), reg04, reg04_deref_noarg, bs("1000"), disp16_reg_deref], [reg04, disp16_reg_deref]) + +# SH Rn,disp16(Rm) - 1100_nnnn_mmmm_1001 dddd_dddd_dddd_dddd +addop("SH", [bs("1100"), reg04, reg04_deref_noarg, bs("1001"), disp16_reg_deref], [reg04, disp16_reg_deref]) + +# SW Rn,disp16(Rm) - 1100_nnnn_mmmm_1010 dddd_dddd_dddd_dddd +addop("SW", [bs("1100"), reg04, reg04_deref_noarg, bs("1010"), disp16_reg_deref], [reg04, disp16_reg_deref]) + +# LBU Rn,disp16(Rm) - 1100_nnnn_mmmm_1011 dddd_dddd_dddd_dddd +addop("LBU", [bs("1100"), reg04, reg04_deref_noarg, bs("1011"), disp16_reg_deref], [reg04, disp16_reg_deref]) + +# LB Rn,disp16(Rm) - 1100_nnnn_mmmm_1100 dddd_dddd_dddd_dddd +addop("LB", [bs("1100"), reg04, reg04_deref_noarg, bs("1100"), disp16_reg_deref], [reg04, disp16_reg_deref]) + +# LH Rn,disp16(Rm) - 1100_nnnn_mmmm_1101 dddd_dddd_dddd_dddd +addop("LH", [bs("1100"), reg04, reg04_deref_noarg, bs("1101"), disp16_reg_deref], [reg04, disp16_reg_deref]) + +# LW Rn,disp16(Rm) - 1100_nnnn_mmmm_1110 dddd_dddd_dddd_dddd +addop("LW", [bs("1100"), reg04, reg04_deref_noarg, bs("1110"), disp16_reg_deref], [reg04, disp16_reg_deref]) + +# LHU Rn,disp16(Rm) - 1100_nnnn_mmmm_1111 dddd_dddd_dddd_dddd +addop("LHU", [bs("1100"), reg04, reg04_deref_noarg, bs("1111"), disp16_reg_deref], [reg04, disp16_reg_deref]) + + +### + +# MOVU Rn[0-7],imm24 - 1101_0nnn_IIII_IIII iiii_iiii_iiii_iiii +addop("MOVU", [bs("11010"), reg03, imm24]) + +# BCPEQ cccc,disp17 - 1101_1000_cccc_0100 dddd_dddd_dddd_dddd +addop("BCPEQ", [bs("11011000"), imm4, bs("0100"), disp17]) + +# BCPNE cccc,disp17 - 1101_1000_cccc_0101 dddd_dddd_dddd_dddd +addop("BCPNE", [bs("11011000"), imm4, bs("0101"), disp17]) + +# BCPAT cccc,disp17 - 1101_1000_cccc_0110 dddd_dddd_dddd_dddd +addop("BCPAT", [bs("11011000"), imm4, bs("0110"), disp17]) + +# BCPAF cccc,disp17 - 1101_1000_cccc_0111 dddd_dddd_dddd_dddd +addop("BCPAF", [bs("11011000"), imm4, bs("0111"), disp17]) + +# JMP target24 - 1101_1TTT_TTTT_1000 tttt_tttt_tttt_tttt +addop("JMP", [bs("11011"), imm7_noarg, bs("1000"), imm_target24], + [imm_target24]) # the only interesting operand is imm_target24 + +# BSR disp24 - 1101_1DDD_DDDD_1001 dddd_dddd_dddd_dddd +addop("BSR", [bs("11011"), imm7_noarg, bs("1001"), imm_target24_signed], [imm_target24_signed]) + +# BSRV disp24 1101_1DDD_DDDD_1011 dddd_dddd_dddd_dddd +addop("BSRV", [bs("11011"), imm7_noarg, bs("1011"), imm_target24], [imm_target24]) + + +### + +# BEQI Rn,imm4,disp17 - 1110_nnnn_iiii_0000 dddd_dddd_dddd_dddd +addop("BEQI", [bs("1110"), reg04, imm4, bs("0000"), disp17]) + +# BEQ Rn,Rm,disp17 - 1110_nnnn_mmmm_0001 dddd_dddd_dddd_dddd +addop("BEQ", [bs("1110"), reg04, reg04, bs("0001"), disp17]) + +# BNEI Rn,imm4,disp17 - 1110_nnnn_iiii_0100 dddd_dddd_dddd_dddd +addop("BNEI", [bs("1110"), reg04, imm4, bs("0100"), disp17]) + +# BNE Rn,Rm,disp17 - 1110_nnnn_mmmm_0101 dddd_dddd_dddd_dddd +addop("BNE", [bs("1110"), reg04, reg04, bs("0101"), disp17]) + +# BGEI Rn,imm4,disp17 - 1110_nnnn_iiii_1000 dddd_dddd_dddd_dddd +addop("BGEI", [bs("1110"), reg04, imm4, bs("1000"), disp17]) + +# REPEAT Rn,disp17 - 1110_nnnn_0000_1001 dddd_dddd_dddd_dddd +addop("REPEAT", [bs("1110"), reg04, bs("00001001"), disp17]) + +# EREPEAT disp17 - 1110_0000_0001_1001 dddd_dddd_dddd_dddd +addop("EREPEAT", [bs("1110000000011001"), disp17]) + +# BLTI Rn,imm4,disp17 - 1110_nnnn_iiii_1100 dddd_dddd_dddd_dddd +addop("BLTI", [bs("1110"), reg04, imm4, bs("1100"), disp17]) + +# (RI) - 1110_xxxx_xxxx_1101 xxxx_xxxx_xxxx_xxxx +addop("(RI)", [bs("1110"), imm8, bs("1101"), imm16]) + +# SW Rn,(abs24) - 1110_nnnn_DDDD_DD10 dddd_dddd_dddd_dddd +addop("SW", [bs("1110"), reg04, imm6_noarg, bs("10"), abs24]) + +# LW Rn,(abs24) - 1110_nnnn_DDDD_DD11 dddd_dddd_dddd_dddd +addop("LW", [bs("1110"), reg04, imm6_noarg, bs("11"), abs24]) + + +### + +# DSP Rn,Rm,code16 - 1111_nnnn_mmmm_0000 cccc_cccc_cccc_cccc +addop("DSP", [bs("1111"), reg04, reg04, bs("0000"), imm16]) + +# Note: DSP, DSP0 & DSP1 look exactly the same. This is ambiguous, and prevent +# them for being correctly disassembled. DSP0 & DSP1 are arbitrarily +# disabled. + +# DSP0 code24 - 1111_nnnn_mmmm_0000 cccc_cccc_cccc_cccc +#addop("DSP0", [bs("1111"), imm8_noarg, bs("0000"), imm_code24], [imm_code24]) + +# DSP1 Rn,code20 - 1111_nnnn_mmmm_0000 cccc_cccc_cccc_cccc +#addop("DSP1", [bs("1111"), reg04, imm4_noarg, bs("0000"), imm_code20]) + +# LDZ Rn,Rm - 1111_nnnn_mmmm_0001 0000_0000_0000_0000 +addop("LDZ", [bs("1111"), reg04, reg04, bs("00010000000000000000")]) + +# AVE Rn,Rm - 1111_nnnn_mmmm_0001 0000_0000_0000_0010 +addop("AVE", [bs("1111"), reg04, reg04, bs("00010000000000000010")]) + +# ABS Rn,Rm - 1111_nnnn_mmmm_0001 0000_0000_0000_0011 +addop("ABS", [bs("1111"), reg04, reg04, bs("00010000000000000011")]) + +# MIN Rn,Rm - 1111_nnnn_mmmm_0001 0000_0000_0000_0100 +addop("MIN", [bs("1111"), reg04, reg04, bs("00010000000000000100")]) + +# MAX Rn,Rm - 1111_nnnn_mmmm_0001 0000_0000_0000_0101 +addop("MAX", [bs("1111"), reg04, reg04, bs("00010000000000000101")]) + +# MINU Rn,Rm - 1111_nnnn_mmmm_0001 0000_0000_0000_0110 +addop("MINU", [bs("1111"), reg04, reg04, bs("00010000000000000110")]) + +# MAXU Rn,Rm - 1111_nnnn_mmmm_0001 0000_0000_0000_0111 +addop("MAXU", [bs("1111"), reg04, reg04, bs("00010000000000000111")]) + +# SADD Rn,Rm - 1111_nnnn_mmmm_0001 0000_0000_0000_1000 +addop("SADD", [bs("1111"), reg04, reg04, bs("00010000000000001000")]) + +# SADDU Rn,Rm - 1111_nnnn_mmmm_0001 0000_0000_0000_1001 +addop("SADDU", [bs("1111"), reg04, reg04, bs("00010000000000001001")]) + +# SSUB Rn,Rm - 1111_nnnn_mmmm_0001 0000_0000_0000_1010 +addop("SSUB", [bs("1111"), reg04, reg04, bs("00010000000000001010")]) + +# SSUBU Rn,Rm - 1111_nnnn_mmmm_0001 0000_0000_0000_1011 +addop("SSUBU", [bs("1111"), reg04, reg04, bs("00010000000000001011")]) + +# CLIP Rn,imm5 - 1111_nnnn_0000_0001 0001_0000_iiii_i000 +addop("CLIP", [bs("1111"), reg04, bs("0000000100010000"), imm5, bs("000")]) + +# CLIPU Rn,imm5 - 1111_nnnn_0000_0001 0001_0000_iiii_i001 +addop("CLIPU", [bs("1111"), reg04, bs("0000000100010000"), imm5, bs("001")]) + +# (RI) - 1111_xxxx_xxxx_0001 0010_xxxx_xxxx_xxxx +addop("(RI)", [bs("1111"), imm8, bs("00010010"), imm12]) + +# MADD Rn,Rm - 1111_nnnn_mmmm_0001 0011_0000_0000_0100 +addop("MADD", [bs("1111"), reg04, reg04, bs("00010011000000000100")]) + +# MADDU Rn,Rm - 1111_nnnn_mmmm_0001 0011_0000_0000_0101 +addop("MADDU", [bs("1111"), reg04, reg04, bs("00010011000000000101")]) + +# MADDR Rn,Rm - 1111_nnnn_mmmm_0001 0011_0000_0000_0110 +addop("MADDR", [bs("1111"), reg04, reg04, bs("00010011000000000110")]) + +# MADDRU Rn,Rm - 1111_nnnn_mmmm_0001 0011_0000_0000_0111 +addop("MADDRU", [bs("1111"), reg04, reg04, bs("00010011000000000111")]) + +# UCI Rn,Rm,code16 - 1111_nnnn_mmmm_0010 cccc_cccc_cccc_cccc +addop("UCI", [bs("1111"), reg04, reg04, bs("0010"), imm16]) + +# (RI) - 1111_xxxx_xxxx_0011 xxxx_xxxx_xxxx_xxxx +addop("(RI)", [bs("1111"), imm8, bs("0011"), imm16]) + +# STCB Rn,abs16 - 1111_nnnn_0000_0100 aaaa_aaaa_aaaa_aaaa +addop("STCB", [bs("1111"), reg04, bs("00000100"), imm16]) + +# LDCB Rn,abs16 - 1111_nnnn_0001_0100 aaaa_aaaa_aaaa_aaaa +addop("LDCB", [bs("1111"), reg04, bs("00010100"), imm16]) + +# SBCPA CRn,(Rm+),imm8 - 1111_nnnn_mmmm_0101 0000_0000_iiii_iiii +addop("SBCPA", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010100000000"), imm8]) + +# SHCPA CRn,(Rm+),imm8.align2 - 1111_nnnn_mmmm_0101 0001_0000_iiii_iii0 +addop("SHCPA", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010100010000"), imm8_align2, bs("0")]) + +# SWCPA CRn,(Rm+),imm8.align4 - 1111_nnnn_mmmm_0101 0010_0000_iiii_ii00 +addop("SWCPA", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010100100000"), imm8_align4, bs("00")]) + +# SMCPA CRn,(Rm+),imm8.align8 - 1111_nnnn_mmmm_0101 0011_0000_iiii_i000 +addop("SMCPA", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010100110000"), imm8_align8, bs("000")]) + +# LBCPA CRn,(Rm+),imm8 - 1111_nnnn_mmmm_0101 0100_0000_iiii_iiii +addop("LBCPA", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010101000000"), imm8]) + +# LHCPA CRn,(Rm+),imm8.align2 - 1111_nnnn_mmmm_0101 0101_0000_iiii_iii0 +addop("LHCPA", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010101010000"), imm8_align2, bs("0")]) + +# LWCPA CRn,(Rm+),imm8.align4 - 1111_nnnn_mmmm_0101 0110_0000_iiii_ii00 +addop("LWCPA", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010101100000"), imm8_align4, bs("00")]) + +# LMCPA CRn,(Rm+),imm8.align8 - 1111_nnnn_mmmm_0101 0111_0000_iiii_i000 +addop("LMCPA", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010101110000"), imm8_align8, bs("000")]) + +# SBCPM0 CRn,(Rm+),imm8 - 1111_nnnn_mmmm_0101 0000_1000_iiii_iiii +addop("SBCPM0", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010100001000"), imm8]) + +# SHCPM0 CRn,(Rm+),imm8.align2 - 1111_nnnn_mmmm_0101 0001_1000_iiii_iii0 +addop("SHCPM0", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010100011000"), imm8_align2, bs("0")]) + +# SWCPM0 CRn,(Rm+),imm8.align4 - 1111_nnnn_mmmm_0101 0010_1000_iiii_ii00 +addop("SWCPM0", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010100101000"), imm8_align4, bs("00")]) + +# SMCPM0 CRn,(Rm+),imm8.align8 - 1111_nnnn_mmmm_0101 0011_1000_iiii_i000 +addop("SMCPM0", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010100111000"), imm8_align8, bs("000")]) + +# LBCPM0 CRn,(Rm+),imm8 - 1111_nnnn_mmmm_0101 0100_1000_iiii_iiii +addop("LBCPM0", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010101001000"), imm8]) + +# LHCPM0 CRn,(Rm+),imm8.align2 - 1111_nnnn_mmmm_0101 0101_1000_iiii_iii0 +addop("LHCPM0", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010101011000"), imm8_align2, bs("0")]) + +# LWCPM0 CRn,(Rm+),imm8.align4 - 1111_nnnn_mmmm_0101 0110_1000_iiii_ii00 +addop("LWCPM0", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010101101000"), imm8_align4, bs("00")]) + +# LMCPM0 CRn,(Rm+),imm8.align8 - 1111_nnnn_mmmm_0101 0111_1000_iiii_i000 +addop("LMCPM0", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010101111000"), imm8_align8, bs("000")]) + +# SBCPM1 CRn,(Rm+),imm8 - 1111_nnnn_mmmm_0101 0000_1100_iiii_iiii +addop("SBCPM1", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010100001100"), imm8]) + +# SHCPM1 CRn,(Rm+),imm8.align2 - 1111_nnnn_mmmm_0101 0001_1100_iiii_iii0 +addop("SHCPM1", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010100011100"), imm8_align2, bs("0")]) + +# SWCPM1 CRn,(Rm+),imm8.align4 - 1111_nnnn_mmmm_0101 0010_1100_iiii_ii00 +addop("SWCPM1", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010100101100"), imm8_align4, bs("00")]) + +# SMCPM1 CRn,(Rm+),imm8.align8 - 1111_nnnn_mmmm_0101 0011_1100_iiii_i000 +addop("SMCPM1", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010100111100"), imm8_align8, bs("000")]) + +# LBCPM1 CRn,(Rm+),imm8 - 1111_nnnn_mmmm_0101 0100_1100_iiii_iiii +addop("LBCPM1", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010101001100"), imm8]) + +# LHCPM1 CRn,(Rm+),imm8.align2 - 1111_nnnn_mmmm_0101 0101_1100_iiii_iii0 +addop("LHCPM1", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010101011100"), imm8_align2, bs("0")]) + +# LWCPM1 CRn,(Rm+),imm8.align4 - 1111_nnnn_mmmm_0101 0110_1100_iiii_ii00 +addop("LWCPM1", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010101101100"), imm8_align4, bs("00")]) + +# LMCPM1 CRn,(Rm+),imm8.align8 - 1111_nnnn_mmmm_0101 0111_1100_iiii_i000 +addop("LMCPM1", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010101111100"), imm8_align8, bs("000")]) + +# (RI) - 1111_xxxx_xxxx_0110 xxxx_xxxx_xxxx_xxxx +addop("(RI)", [bs("1111"), imm8, bs("0110"), imm16]) + +# CP code24 - 1111_CCCC_CCCC_0111 cccc_cccc_cccc_cccc +#addop("CP", [bs("1111"), imm8_noarg, bs("0111"), imm_code24], [imm_code24]) +# Note: CP & CMOV* look exactly the same. This is ambiguous, and prevent +# them for being correctly disassembled. CP was arbitrarily disabled. + +# CP code56 - 1111_CCCC_CCCC_0111 cccc_cccc_cccc_cccc cccc_cccc_cccc_cccc +# 64-bit VLIW operation mode - not implemented + +# CMOV CRn,Rm - 1111_nnnn_mmmm_0111 1111_0000_0000_0000 +#addop("CMOV", [bs("1111"), copro_reg04, reg04, bs("01111111000000000000")]) + +# CMOV Rm,CRn - 1111_nnnn_mmmm_0111 1111_0000_0000_0001 +#addop("CMOV", [bs("1111"), copro_reg04, reg04, bs("01111111000000000001")], [reg04, copro_reg04]) + +# CMOVC CCRn,Rm - 1111_nnnn_mmmm_0111 1111_0000_0000_NN10 +# CRn=NNnnnn +addop("CMOVC", [bs("1111"), imm4_noarg, reg04, bs("0111111100000000"), copro_reg06, bs("10")], [copro_reg06, reg04]) + +# CMOVC Rm,CCRn - 1111_nnnn_mmmm_0111 1111_0000_0000_NN11 +# CRn=NNnnnn +addop("CMOVC", [bs("1111"), imm4_noarg, reg04, bs("0111111100000000"), copro_reg06, bs("11")], [reg04, copro_reg06]) + +# CMOVH CRn,Rm - 1111_nnnn_mmmm_0111 1111_0001_0000_0000 +#addop("CMOVH", [bs("1111"), copro_reg04, reg04, bs("01111111000100000000")]) + +# CMOVH Rm,CRn - 1111_nnnn_mmmm_0111 1111_0001_0000_0001 +#addop("CMOVH", [bs("1111"), copro_reg04, reg04, bs("01111111000100000001")], [reg04, copro_reg04]) + +# Note: the following CMOV* instructions are extensions used when the processor +# has more than 16 coprocessor general-purpose registers. They can be +# used to assemble and disassemble both CMOV* instructuons sets. + +# CMOV CRn,Rm - 1111_nnnn_mmmm_0111 1111_0000_0000_N000 +# CRn=Nnnnn +addop("CMOV", [bs("1111"), imm4_noarg, reg04, bs("0111111100000000"), copro_reg05, bs("000")], [copro_reg05, reg04]) + +# CMOV Rm,CRn - 1111_nnnn_mmmm_0111 1111_0000_0000_N001 +addop("CMOV", [bs("1111"), imm4_noarg, reg04, bs("0111111100000000"), copro_reg05, bs("001")], [reg04, copro_reg05]) + +# CMOVH CRn,Rm - 1111_nnnn_mmmm_0111 1111_0001_0000_N000 +addop("CMOVH", [bs("1111"), imm4_noarg, reg04, bs("0111111100010000"), copro_reg05, bs("000")], [copro_reg05, reg04]) + +# CMOVH Rm,CRn - 1111_nnnn_mmmm_0111 1111_0001_0000_N001 +addop("CMOVH", [bs("1111"), imm4_noarg, reg04, bs("0111111100010000"), copro_reg05, bs("001")], [reg04, copro_reg05]) + +# (RI) - 1111_xxxx_xxxx_10xx xxxx_xxxx_xxxx_xxxx +addop("(RI)", [bs("1111"), imm8, bs("10"), imm18]) + +# SWCP CRn,disp16(Rm) - 1111_nnnn_mmmm_1100 dddd_dddd_dddd_dddd +addop("SWCP", [bs("1111"), copro_reg04, reg04_deref_noarg, bs("1100"), disp16_reg_deref], [copro_reg04, disp16_reg_deref]) + +# LWCP CRn,disp16(Rm) - 1111_nnnn_mmmm_1101 dddd_dddd_dddd_dddd +addop("LWCP", [bs("1111"), copro_reg04, reg04_deref_noarg, bs("1101"), disp16_reg_deref], [copro_reg04, disp16_reg_deref, reg04_deref]) + +# SMCP CRn,disp16(Rm) - 1111_nnnn_mmmm_1110 dddd_dddd_dddd_dddd +addop("SMCP", [bs("1111"), copro_reg04, reg04_deref_noarg, bs("1110"), disp16_reg_deref], [copro_reg04, disp16_reg_deref, reg04_deref]) + +# LMCP CRn,disp16(Rm) - 1111_nnnn_mmmm_1111 dddd_dddd_dddd_dddd +addop("LMCP", [bs("1111"), copro_reg04, reg04_deref_noarg, bs("1111"), disp16_reg_deref], [copro_reg04, disp16_reg_deref]) diff --git a/miasm/arch/mep/disasm.py b/miasm/arch/mep/disasm.py new file mode 100644 index 00000000..0260c01d --- /dev/null +++ b/miasm/arch/mep/disasm.py @@ -0,0 +1,23 @@ +# Toshiba MeP-c4 - miasm disassembly engine +# Guillaume Valadon + +from miasm.core.asmblock import disasmEngine +from miasm.arch.mep.arch import mn_mep + + +class dis_mepb(disasmEngine): + """MeP miasm disassembly engine - Big Endian + + Notes: + - its is mandatory to call the miasm Machine + """ + + attrib = "b" + + def __init__(self, bs=None, **kwargs): + super(dis_mepb, self).__init__(mn_mep, self.attrib, bs, **kwargs) + + +class dis_mepl(dis_mepb): + """MeP miasm disassembly engine - Little Endian""" + attrib = "l" diff --git a/miasm/arch/mep/ira.py b/miasm/arch/mep/ira.py new file mode 100644 index 00000000..2de4b5ae --- /dev/null +++ b/miasm/arch/mep/ira.py @@ -0,0 +1,45 @@ +# Toshiba MeP-c4 - miasm IR analysis +# Guillaume Valadon + +from miasm.arch.mep.sem import ir_mepb, ir_mepl +from miasm.ir.analysis import ira + + +class ir_a_mepb(ir_mepb, ira): + """MeP high level IR manipulations - Big Endian + + Notes: + - it is mandatory for symbolic execution. + """ + + def __init__(self, loc_db=None): + ir_mepb.__init__(self, loc_db) + self.ret_reg = self.arch.regs.R0 + + # Note: the following are abstract method and must be implemented + def sizeof_char(self): + "Return the size of a char in bits" + return 8 + + def sizeof_short(self): + "Return the size of a short in bits" + return 16 + + def sizeof_int(self): + "Return the size of an int in bits" + return 32 + + def sizeof_long(self): + "Return the size of a long in bits" + return 32 + + def sizeof_pointer(self): + "Return the size of a void* in bits" + return 32 + + +class ir_a_mepl(ir_mepl, ir_a_mepb): + """MeP high level IR manipulations - Little Endian""" + + def __init__(self, loc_db=None): + ir_a_mepb.__init__(self, loc_db) diff --git a/miasm/arch/mep/jit.py b/miasm/arch/mep/jit.py new file mode 100644 index 00000000..98cc1805 --- /dev/null +++ b/miasm/arch/mep/jit.py @@ -0,0 +1,115 @@ +# Toshiba MeP-c4 - miasm jitter +# Guillaume Valadon +# Note: inspiration from msp430/jit.py + +from miasm.jitter.jitload import Jitter +from miasm.core.locationdb import LocationDB +from miasm.core.utils import * +from miasm.jitter.codegen import CGen +from miasm.ir.translators.C import TranslatorC +from miasm.arch.mep.sem import ir_mepl, ir_mepb + +import logging + +log = logging.getLogger("jit_mep") +hnd = logging.StreamHandler() +hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) +log.addHandler(hnd) +log.setLevel(logging.CRITICAL) + + +class mep_CGen(CGen): + """ + Translate a bloc containing MeP instructions to C + + Note: it is used to emulate the *REPEAT instructions + """ + + def __init__(self, ir_arch): + self.ir_arch = ir_arch + self.PC = self.ir_arch.arch.regs.PC + self.translator = TranslatorC(self.ir_arch.loc_db) + self.init_arch_C() + + def gen_pre_code(self, attrib): + """Generate C code inserted before the current bloc""" + + # Call the base class method + out = super(mep_CGen, self).gen_pre_code(attrib) + + # Set the PC register value explicitly + out.append("mycpu->PC = 0x%X;" % attrib.instr.offset) + out.append("mycpu->last_addr = mycpu->PC;"); + + return out + + def gen_post_code(self, attrib, pc_value): + """Generate C code inserted after the current bloc""" + + # Call the base class method + out = super(mep_CGen, self).gen_post_code(attrib, pc_value) + + # Implement the *REPEAT instructions logics + tmp = r""" + /* *REPEAT instructions logic */ + { + uint32_t is_repeat_end = mycpu->is_repeat_end; + mycpu->is_repeat_end = !!(mycpu->last_addr == (mycpu->RPE&~0x1)); + + if (is_repeat_end && !mycpu->take_jmp && + (mycpu->in_erepeat || mycpu->RPC)) { + if (mycpu->RPC) + mycpu->RPC --; + + //printf("Go repeat %X\n", mycpu->RPB); + DST_value = mycpu->RPB; + BlockDst->address = mycpu->RPB; + return JIT_RET_NO_EXCEPTION; + } + } + """ + + out += tmp.split('`\n') + return out + + +class jitter_mepl(Jitter): + + C_Gen = mep_CGen + + def __init__(self, *args, **kwargs): + sp = LocationDB() + Jitter.__init__(self, ir_mepl(sp), *args, **kwargs) + self.vm.set_little_endian() + self.ir_arch.jit_pc = self.ir_arch.arch.regs.PC + + def push_uint16_t(self, v): + regs = self.cpu.get_gpreg() + regs["SP"] -= 2 + self.cpu.set_gpreg(regs) + self.vm.set_mem(regs["SP"], pck16(v)) + + def pop_uint16_t(self): + regs = self.cpu.get_gpreg() + x = self.vm.get_u16(regs["SP"]) + regs["SP"] += 2 + self.cpu.set_gpreg(regs) + return x + + def get_stack_arg(self, n): + regs = self.cpu.get_gpreg() + x = self.vm.get_u16(regs["SP"] + 2 * n) + return x + + def init_run(self, *args, **kwargs): + Jitter.init_run(self, *args, **kwargs) + self.cpu.PC = self.pc + + +class jitter_mepb(jitter_mepl): + + def __init__(self, *args, **kwargs): + sp = LocationDB() + Jitter.__init__(self, ir_mepb(sp), *args, **kwargs) + self.vm.set_big_endian() + self.ir_arch.jit_pc = self.ir_arch.arch.regs.PC diff --git a/miasm/arch/mep/regs.py b/miasm/arch/mep/regs.py new file mode 100644 index 00000000..b7fa2a78 --- /dev/null +++ b/miasm/arch/mep/regs.py @@ -0,0 +1,91 @@ +# Toshiba MeP-c4 - miasm registers definition +# Guillaume Valadon + +from builtins import range +from miasm.expression.expression import ExprId +from miasm.core.cpu import reg_info, gen_reg, gen_regs + +# Used by internal miasm exceptions +exception_flags = ExprId("exception_flags", 32) +exception_flags_init = ExprId("exception_flags_init", 32) + +is_repeat_end = ExprId("is_repeat_end", 32) +is_repeat_end_init = ExprId("is_repeat_end_init", 32) +last_addr = ExprId("last_addr", 32) +last_addr_init = ExprId("last_addr_init", 32) +take_jmp = ExprId("take_jmp", 32) +take_jmp_init = ExprId("take_jmp_init", 32) +in_erepeat = ExprId("in_erepeat", 32) +in_erepeat_init = ExprId("take_jmp_init", 32) + + +# General-purpose registers (R0 to R15) names +gpr_names = ["R%d" % r for r in range(13)] # register names +gpr_names += ["TP", "GP", "SP"] # according to the manual GP does not exist +gpr_exprs, gpr_inits, gpr_infos = gen_regs(gpr_names, globals()) # sz=32 bits (default) + +# Notes: +# - gpr_exprs: register ExprIds on 32 bits. The size is important for +# symbolic execution. +# - gpr_inits: register initial values. +# - gpr_infos: object that binds names & ExprIds + +# Define aliases to general-purpose registers +TP = gpr_exprs[13] # Tiny data area Pointer +GP = gpr_exprs[14] # Global Pointer +SP = gpr_exprs[15] # Stack Pointer + + +# Control/special registers name +csr_names = ["PC", "LP", "SAR", "S3", "RPB", "RPE", "RPC", "HI", "LO", + "S9", "S10", "S11", "MB0", "ME0", "MB1", "ME1", "PSW", + "ID", "TMP", "EPC", "EXC", "CFG", "S22", "NPC", "DBG", + "DEPC", "OPT", "RCFG", "CCFG", "S29", "S30", "S31", "S32"] +csr_exprs, csr_inits, csr_infos = gen_regs(csr_names, globals()) + +# Define aliases to control/special registers +PC = csr_exprs[0] # Program Conter. On MeP, it is the special register R0 +LP = csr_exprs[1] # Link Pointer. On MeP, it is the special register R1 +SAR = csr_exprs[2] # Shift Amount Register. On MeP, it is the special register R2 +RPB = csr_exprs[4] # Repeat Begin. On MeP, it is the special register R4 +RPE = csr_exprs[5] # Repeat End. On MeP, it is the special register R5 +RPC = csr_exprs[6] # Repeat Counter. On MeP, it is the special register R6 + + +# Coprocesssor general-purpose registers (C0 to C15) names +# Note: a processor extension allows up to 32 coprocessor general-purpose registers +copro_gpr_names = ["C%d" % r for r in range(32)] # register names +copro_gpr_exprs, copro_gpr_inits, copro_gpr_infos = gen_regs(copro_gpr_names, globals()) + + +# Set registers initial values +all_regs_ids = gpr_exprs + csr_exprs + copro_gpr_exprs + [ + exception_flags, take_jmp, last_addr, is_repeat_end, + in_erepeat +] + +all_regs_ids_init = gpr_inits + csr_inits + copro_gpr_inits + [ + exception_flags_init, take_jmp_init, last_addr_init, is_repeat_end_init, + in_erepeat_init +] + +all_regs_ids_no_alias = all_regs_ids[:] # GV: not understood yet ! +all_regs_ids_byname = dict([(x.name, x) for x in all_regs_ids]) + + +float_st0 = ExprId("float_st0", 64) +float_st1 = ExprId("float_st1", 64) +float_st2 = ExprId("float_st2", 64) +float_st3 = ExprId("float_st3", 64) +float_st4 = ExprId("float_st4", 64) +float_st5 = ExprId("float_st5", 64) +float_st6 = ExprId("float_st6", 64) +float_st7 = ExprId("float_st7", 64) + +regs_flt_expr = [float_st0, float_st1, float_st2, float_st3, + float_st4, float_st5, float_st6, float_st7] + + +regs_init = dict() # mandatory name +for i, r in enumerate(all_regs_ids): + regs_init[r] = all_regs_ids_init[i] diff --git a/miasm/arch/mep/sem.py b/miasm/arch/mep/sem.py new file mode 100644 index 00000000..13a4d6ca --- /dev/null +++ b/miasm/arch/mep/sem.py @@ -0,0 +1,1179 @@ +# Toshiba MeP-c4 - miasm instructions side effects +# Guillaume Valadon + +from miasm.core.sembuilder import SemBuilder +from miasm.ir.ir import IntermediateRepresentation +from miasm.arch.mep.arch import mn_mep +from miasm.arch.mep.regs import PC, SP, LP, SAR, TP, RPB, RPE, RPC, EPC, NPC, \ + take_jmp, in_erepeat +from miasm.arch.mep.regs import EXC, HI, LO, PSW, DEPC, DBG +from miasm.expression.expression import ExprId, ExprInt, ExprOp, TOK_EQUAL +from miasm.expression.expression import ExprAssign, ExprCond, ExprMem +from miasm.core.cpu import sign_ext +from miasm.jitter.csts import EXCEPT_DIV_BY_ZERO + +from miasm.arch.mep.regs import exception_flags + + +def compute_s_inf(arg1, arg2): + """Signed comparison operator""" + return ((arg1 - arg2) ^ ((arg1 ^ arg2) & ((arg1 - arg2) ^ arg1))).msb() + +def compute_u_inf(x, y): + """Unsigned comparison operator""" + result = (((x - y) ^ ((x ^ y) & ((x - y) ^ x))) ^ x ^ y).msb() + return result + + +# SemBuilder context +ctx = {"PC": PC, "SP": SP, "LP": LP, "SAR": SAR, "TP": TP, + "RPB": RPB, "RPE": RPE, "RPC": RPC, "EPC": EPC, "NPC": NPC, + "EXC": EXC, "HI": HI, "LO": LO, "PSW": PSW, "DEPC": DEPC, "DBG": DBG, + "exception_flags": exception_flags, "compute_s_inf": compute_s_inf, + "compute_u_inf": compute_u_inf, "take_jmp": take_jmp, + "in_erepeat": in_erepeat, "EXCEPT_DIV_BY_ZERO": EXCEPT_DIV_BY_ZERO} +sbuild = SemBuilder(ctx) + + +# Functions used to get an instruction IR +manual_functions = dict() + + +@sbuild.parse +def mep_nop(): + """Dummy instruction""" + + +@sbuild.parse +def mep_nop_2_args(arg1, arg2): + """Dummy instruction with two arguments""" + + +### Load/Store instructions + +# Register indirect addressing mode + +@sbuild.parse +def sb(reg_src, deref_dst): + """SB - Store Byte into memory""" + + # MemByte(Rm31..0) <- Rn7..0 + # MemByte((ZeroExt(disp7)+TP)31..0)) <- Rn7..0 + # MemByte((SignExt(disp16)+Rm)31..0) <- Rn7..0 + mem8[deref_dst.ptr] = reg_src[:8] + + +@sbuild.parse +def sh(reg_src, deref_dst): + """SH - Store Halfword into memory""" + + # MemHword(Rm31..1||0) <- Rn15..0 + # MemHword((ZeroExt((disp7)6..1||0)+TP)31..1||0)) <- Rn15..0 + # MemHword((SignExt(disp16)+Rm)31..1||0) <- Rn15..0 + mem16[deref_dst.ptr & i32(0xFFFFFFFE)] = reg_src[:16] + + +@sbuild.parse +def sw(reg_src, deref_dst): + """SW - Store Word into memory""" + + # MemWord(Rm31..2||00) <- Rn31..0 + # MemWord((ZeroExt((disp7)6..2||00)+SP)31..2||00)) <- Rn31..0 + # MemWord((ZeroExt((disp7)6..2||00)+TP)31..2||00)) <- Rn31..0 + # MemWord((SignExt(disp16)+Rm)31..2||00) <- Rn31..0 + # MemWord(ZeroExt((abs24)23..2||00)) - Rn31..0 + + mem32[deref_dst.ptr & i32(0xFFFFFFFC)] = reg_src + +# Without the sembuilder +#def sw(ir, instr, reg_src, deref_reg_or_imm, deref_reg=None): +# """SW - store Word into memory. +# +# Note: there are three variants to get the memory address: +# - from a register +# - relatively to SP +# - relatively to TP""" +# +# if isinstance(deref_reg_or_imm, ExprMem): +# # MemWord(Rm31..2||00) <- Rn31..0 +# dst = deref_reg_or_imm +# +# elif isinstance(deref_reg_or_imm, ExprInt) and deref_reg: +# # MemWord((ZeroExt((disp7)6..2||00)+SP)31..2||00)) <- Rn31..0 +# # MemWord((ZeroExt((disp7)6..2||00)+TP)31..2||00)) <- Rn31..0 +# +# imm = deref_reg_or_imm.zeroExtend(32) +# dst = ExprMem(ExprOp("+", imm, deref_reg.arg)) +# +# return [ExprAssign(dst, reg_src)], [] + + +@sbuild.parse +def lb(reg_dst, deref_dst): + """LB - Load Byte from memory""" + + # Rn <- SignExt(MemByte(Rm31..0)) + # Rn <- SignExt(MemByte((ZeroExt(disp7)+TP)31..0)) + # Rn <- SignExt(MemByte((SignExt(disp16)+Rm)31..0) + reg_dst = mem8[deref_dst.ptr].signExtend(32) + + +@sbuild.parse +def lh(reg_dst, deref_dst): + """LH - Load Halfword from memory""" + + # Rn <- SignExt(MemHword(Rm31..1||0)) + # Rn <- SignExt(MemHword((ZeroExt((disp7)6..1||0)+TP)31..1||0) + # Rn <- SignExt(MemHword((SignExt(disp16)+Rm)31..1||0)) + reg_dst = mem16[deref_dst.ptr & i32(0xFFFFFFFE)].signExtend(32) + + +@sbuild.parse +def lw(reg_dst, deref_dst): + """LW - Load Word from memory""" + + # Rn <- MemWord(Rm31..2||00) + # Rn <- MemWord((ZeroExt((disp7)6..2||00)+TP)31..2||00) + # Rn <- MemWord((SignExt(disp16)+Rm)31..2||00) + # Rn <- MemWord(ZeroExt((abs24)23..2||00)) + reg_dst = mem32[deref_dst.ptr & i32(0xFFFFFFFC)] + + +@sbuild.parse +def lbu(reg_dst, deref_dst): + """LBU - Load an unsigned Byte from memory""" + + # Rn <- ZeroExt(MemByte(Rm31..0)) + # Rn <- ZeroExt(MemByte((ZeroExt(disp7)+TP)31..0)) + # Rn <- ZeroExt(MemByte((SignExt(disp16)+Rm)31..0)) + reg_dst = mem8[deref_dst.ptr].zeroExtend(32) + + +@sbuild.parse +def lhu(reg_dst, deref_dst): + """LHU - Load an unsigned Halfword from memory""" + + # Rn <- ZeroExt(MemHword(Rm31..1||0)) + # Rn <- ZeroExt(MemHword((SignExt(disp16)+Rm)31..1||0)) + # Rn <- ZeroExt(MemHword((ZeroExt((disp7)6..1||0)+TP)31..1||0)) + reg_dst = mem16[deref_dst.ptr & i32(0xFFFFFFFE)].zeroExtend(32) + + +### Byte/Halfword extension instructions + +@sbuild.parse +def extb(reg): + """EXTB - Sign extend a byte""" + + # Rn <- SignExt(Rn7..0) + reg = reg[:8].signExtend(32) + + +@sbuild.parse +def exth(reg): + """EXTH - Sign extend a word""" + + # Rn <- ZeroExt(Rn15..0) + reg = reg[:16].signExtend(32) + + +@sbuild.parse +def extub(reg): + """EXUTB - Zero extend a byte""" + + # Rn <- SignExt(Rn7..0) + reg = reg[:8].zeroExtend(32) + + +@sbuild.parse +def extuh(reg): + """EXTUH - Zero extend a word""" + + # Rn <- ZeroExt(Rn15..0) + reg = reg[:16].zeroExtend(32) + + +### Shift amount manipulation instructions + +#@sbuild.parse +#def ssarb(deref_reg): + + +### Move instructions + +@sbuild.parse +def mov(reg, value): + """MOV - Copy 'value' to a register. The three alternatives are handled.""" + + # Rn <- Rm + # Rn <- SignExt(imm8) + # Rn <- SignExt(imm16) + reg = value.signExtend(32) + + +@sbuild.parse +def movu(reg, value): + """MOV - Copy 'value' to a register. The two alternatives are handled.""" + + # Rn[0-7] <- ZeroExt(imm24) + # Rn <- ZeroExt(imm16) + reg = value.zeroExtend(32) + + +@sbuild.parse +def movh(reg, imm16): + """MOVH - Copy a shifted imm16 to a register.""" + + # Rn <- imm16 <<16 + reg = imm16.zeroExtend(32) << i32(16) + + +### Arithmetic instructions + +def add3(ir, instr, reg_dst, reg_src, reg_or_imm): + """ADD3 - Add two register and store the result to a register, or + add a register and an immediate and store the result to a register""" + + if isinstance(reg_or_imm, ExprId): + # Rl <- Rn + Rm + result = ExprOp("+", reg_src, reg_or_imm) + else: + # Rn <- Rm + SignExt(imm16) + value = int(reg_or_imm.arg) + result = ExprOp("+", reg_src, ExprInt(value, 32)) + + return [ExprAssign(reg_dst, result)], [] + +manual_functions["add3"] = add3 + + +@sbuild.parse +def add(arg1, arg2): + """ADD - Add a register and an immediate.""" + + # Rn <- Rn + SignExt(imm6) + arg1 = arg1 + arg2.signExtend(32) + + +@sbuild.parse +def advck3(r0, rn, rm): + """ADVCK3 - Check addition overflow.""" + + # if(Overflow(Rn+Rm)) R0<-1 else R0<-0 (Signed) + r0 = i32(1) if compute_u_inf(i64(0xFFFFFFFF), rn.zeroExtend(64) + rm.zeroExtend(64)) else i32(0) + + +@sbuild.parse +def sub(reg1, reg2): + """SUB - Subtract one register to another.""" + + # Rn <- Rn - Rm + reg1 = reg1 - reg2 + + +def sbvck3(ir, instr, r0, rn, rm): + """SBVCK3 - Check subtraction overflow""" + + # if(Overflow(Rn-Rm)) R0<-1 else R0<-0 (Signed) + + # Subtract registers + reg_sub = ExprOp("+", rn, rm) + + # Get the register storing the highest value + max_rn_rm = ExprCond(ExprOp(">", rn, rm), rn, rm) + + # Check for an overflow + overflow_test = ExprOp(">", reg_sub, max_rn_rm) + + # Return the result + condition = ExprCond(overflow_test, ExprInt(1, 32), ExprInt(0, 32)) + return [ExprAssign(r0, condition)], [] + +manual_functions["sbvck3"] = sbvck3 + + +@sbuild.parse +def neg(reg1, reg2): + """NEG - Negate one register.""" + + # Rn <- - Rm + reg1 = - reg2 + + +@sbuild.parse +def slt3(r0, rn, rm_or_imm5): + """SLT3 - Set on less than (signed).""" + + # if (Rn> i32(31) + + # rn is positive and rm negative, return 1 + r0_mixed = i32(1) if sign_rn else i32(0) + + # rn & rm are both positives, test and return 1 or 0 + r0_pos = (i32(1) if "<"(rn, rm_ext) else i32(0)) if are_both_pos else r0_mixed + + # rn & rm are both negatives, test and return 0 or 1 + r0 = (i32(0) if "<"(rn, rm_ext) else i32(1)) if are_both_neg else r0_pos + + +@sbuild.parse +def sltu3(r0, rn, rm_or_imm5): + """SLTU3 - Set on less than (unsigned).""" + + # if (Rn> Rm4..0 + # Rn <- (Signed) Rn >> imm5 + + # Unsigned result + shift_u = rn >> rm_or_imm5 + + # Signed result + shift_mask = i32(32) - rm_or_imm5 + mask = (i32(0xFFFFFFFF) >> shift_mask) << shift_mask + shift_s = shift_u | mask + + rn = shift_s if rn.msb() else shift_u + + +@sbuild.parse +def srl(rn, rm_or_imm5): + """SRL - Shift Right unsigned.""" + + # Rn <- (Unsigned) Rn >> Rm4..0 + # Rn <- (Unsigned) Rn >> imm5 + rn = rn >> rm_or_imm5 + + +@sbuild.parse +def sll(rn, rm_or_imm5): + """SLL - Shift Left unsigned.""" + + # Rn <- (Unsigned) Rn >> Rm4..0 + # Rn <- (Unsigned) Rn << imm5 + rn = rn << rm_or_imm5 + + +@sbuild.parse +def sll3(r0, rn, imm5): + """SLL3 - Shift Left unsigned, with 3 arguments.""" + + # R0 <- (Unsigned) Rn << imm5 + r0 = rn << imm5 + + +@sbuild.parse +def fsft(rn, rm): + "FSFT - Funnel shift.""" + + # Rn <- ((Rn||Rm)<> (i32(32) - sar) # Shift Rm in the reverse order + rn = tmp_rn | tmp_rm # Concatenate registers + + +## Branch/Jump instructions + +@sbuild.parse +def bra(disp12): + """BRA - Branch to an address.""" + + # PC <- PC + SignExt((disp12)11..1||0) + dst = disp12 + PC = dst + take_jmp = ExprInt(1, 32) + ir.IRDst = dst + + +@sbuild.parse +def beqz(reg_test, disp8): + """BEQZ - Branch if the register stores zero.""" + + # if(Rn==0) PC <- PC +SignExt((disp8)7..1||0) + dst = ExprLoc(ir.get_next_break_loc_key(instr), 32) if reg_test else disp8 + take_jmp = ExprInt(0, 32) if reg_test else ExprInt(1, 32) + PC = dst + ir.IRDst = dst + + +@sbuild.parse +def bnez(reg_test, disp8): + """BNEZ - Branch if the register does not store zero.""" + + # if(Rn!=0) PC <- PC + SignExt((disp8)7..1||0) + dst = disp8 if reg_test else ExprLoc(ir.get_next_break_loc_key(instr), 32) + take_jmp = ExprInt(1, 32) if reg_test else ExprInt(0, 32) + PC = dst + ir.IRDst = dst + + +@sbuild.parse +def beqi(reg_test, imm4, disp16): + """BEQI - Branch if the register stores imm4.""" + + # if(Rn==ZeroExt(imm4)) PC <- PC +SignExt((disp17)16..1||0) + dst = ExprLoc(ir.get_next_break_loc_key(instr), 32) if (reg_test - imm4) else disp16 + take_jmp = ExprInt(0, 32) if (reg_test - imm4) else ExprInt(1, 32) + PC = dst + ir.IRDst = dst + + +@sbuild.parse +def bnei(reg_test, imm4, disp16): + """BNEI - Branch if the register does not store imm4.""" + + # if(Rn!=ZeroExt(imm4)) PC <- PC+SignExt((disp17)16..1||0) + dst = disp16 if (reg_test - imm4) else ExprLoc(ir.get_next_break_loc_key(instr), 32) + take_jmp = ExprInt(1, 32) if (reg_test - imm4) else ExprInt(0, 32) + PC = dst + ir.IRDst = dst + + +@sbuild.parse +def blti(reg_test, imm4, disp16): + """BLTI - Branch if the register is lower than imm4.""" + + # if(Rn< ZeroExt(imm4)) PC <- PC +SignExt((disp17)16..1||0) - (Signed comparison) + dst = disp16 if compute_s_inf(reg_test, imm4) else ExprLoc(ir.get_next_break_loc_key(instr), 32) + take_jmp = ExprInt(1, 32) if compute_s_inf(reg_test, imm4) else ExprInt(0, 32) + PC = dst + ir.IRDst = dst + + +@sbuild.parse +def bgei(reg_test, imm4, disp16): + """BGEI - Branch if the register is greater or equal to imm4.""" + + # if(Rn>=ZeroExt(imm4)) PC <- PC +SignExt((disp17)16..1||0) - (Signed comparison) + cond = i32(1) if ExprOp(TOK_EQUAL, reg_test, imm4) else compute_s_inf(imm4, reg_test).zeroExtend(32) + dst = disp16 if cond else ExprLoc(ir.get_next_break_loc_key(instr), 32) + take_jmp = ExprInt(1, 32) if cond else ExprInt(0, 32) + PC = dst + ir.IRDst = dst + + +@sbuild.parse +def beq(rn, rm, disp16): + """BEQ - Branch if the two registers are equal.""" + + # if(Rn==Rm) PC <- PC +SignExt((disp17)16..1||0) + dst = ExprLoc(ir.get_next_break_loc_key(instr), 32) if (rn - rm) else disp16 + take_jmp = ExprInt(0, 32) if (rn - rm) else ExprInt(1, 32) + PC = dst + ir.IRDst = dst + + +@sbuild.parse +def bne(rn, rm, disp16): + """BNE - Branch if the two registers are not equal.""" + + # if(Rn!=Rm) PC <- PC +SignExt((disp17)16..1||0) + dst = disp16 if (rn - rm) else ExprLoc(ir.get_next_break_loc_key(instr), 32) + take_jmp = ExprInt(1, 32) if (rn - rm) else ExprInt(0, 32) + PC = dst + ir.IRDst = dst + + +@sbuild.parse +def bsr(disp): + """BSR - Branch to an address, and store the return address.""" + + # 16-bit variant: LP <- PC + 2; PC <- PC +SignExt((disp12)11..1||0) + # 32-bit variant: LP <- PC + 4; PC <- PC +SignExt((disp24)23..1||0) + + # Set LP + LP = ExprLoc(ir.get_next_break_loc_key(instr), 32) + take_jmp = ExprInt(1, 32) + + # Set PC according to the immediate size + dst = disp + PC = dst + ir.IRDst = dst + + +def jmp(ir, instr, reg_or_imm): + """JMP - Change PC to a register content or an immediate. + Note: the behavior in VLIW mode is not implemented""" + + take_jmp = ExprInt(1, 32) + + if isinstance(reg_or_imm, ExprId): + # PC <- Rm31..1||0 + new_PC = ExprAssign(PC, reg_or_imm) + else: + # PC <- PC31..28||0000||(target24)23..1||0 + new_PC = ExprAssign(PC, ExprOp("+", ExprOp("&", PC, ExprInt(0xF0000000, 32)), reg_or_imm)) + + return [new_PC, ExprAssign(ir.IRDst, new_PC)], [] + +manual_functions["jmp"] = jmp + + +@sbuild.parse +def jsr(reg): + """JSR - Jump to the register, and store the return address.""" + + # LP <- PC + 2; PC <- Rm31..1||0 + LP = ExprLoc(ir.get_next_break_loc_key(instr), 32) + take_jmp = ExprInt(1, 32) + PC = reg + ir.IRDst = reg + + +@sbuild.parse +def ret(): + """RET - Return from a function call. + Note: the behavior in VLIW mode is not implemented""" + + # PC <- LP31..1||0 + dst = LP + PC = dst + ir.IRDst = dst + + +# Repeat instructions + +@sbuild.parse +def repeat(rn, disp17): + """REPEAT - This instruction repeats an instruction block. It sets the RPB, + RPE and RPC control registers.""" + + # RPB <- pc+4 // Repeat Begin + RPB = PC + i32(4) + # RPE <- pc+SignExt((disp17)16..1||0)) // Repeat End + RPE = PC + i32(disp17.arg & 0xFFFFFFFE) + # RPC <- Rn + RPC = rn + in_erepeat = ExprInt(0, 32) + + +@sbuild.parse +def erepeat(disp17): + """EREPEAT - This instruction repeats an instruction block. It sets the RPB + and RPE control registers. To distinguish from the repeat instruction, + the least significant bit in the RPE register (ELR) is set to 1.""" + + # RPB <- pc+4 // Repeat Begin + RPB = PC + i32(4) + # RPE <- pc+SignExt((disp17)16..1||1)) (EREPEAT) + RPE = PC + i32(disp17.arg + 1) + # RPC <- undefined + in_erepeat = ExprInt(1, 32) + + +## Control Instructions + +@sbuild.parse +def stc(reg, control_reg): + """STC - Copy a general-purpose register into a control register.""" + + # ControlReg(imm5) <- Rn + control_reg = reg + + +@sbuild.parse +def ldc(reg, control_reg): + """LDC - Copy a control register into a general-purpose register.""" + + # Rn <- ControlReg(imm5) + reg = control_reg + + +@sbuild.parse +def di(): + """DI - Disable Interrupt""" + + # PSW.IEC<-0 + PSW = PSW & i32(0xFFFFFFFE) # PSW.IEC: bit 0 + + +@sbuild.parse +def ei(): + """EI - Enable Interrupt""" + + # PSW.IEC<-1 + PSW = PSW ^ i32(0b1) # PSW.IEC: bit 0 + + +@sbuild.parse +def reti(): + """RETI - Return from the exception/interrupt handler. + Note: the behavior in VLIW mode is not implemented""" + + #if (PSW.NMI==1) { + # PC <- NPC31..1 || 0; PSW.NMI<-0; + #} else { + # PC <- EPC31..1 || 0; + # PSW.UMC <- PSW.UMP; PSW.IEC <- PSW.IEP + #} + + # PSW.NMI == bit 9 + NMI_mask = i32(1 << 9) + + # PSW.UMP == bit 3 + # PSW.IEP == bit 1 + UMP_IEP_mask = i32((1 << 3) ^ (1 << 1)) + + # PSW.UMC == bit 2 + # PSW.IEC == bit 0 + UMC_IEC_mask = (PSW & UMP_IEP_mask) >> i32(1) + + # Get PSW.NMI + PSW_NMI = (PSW & NMI_mask) >> i32(9) + + # Set PC + dst = NPC & i32(0xFFFFFFFE) if PSW_NMI else EPC & i32(0xFFFFFFFE) + PC = dst + + # Set flags + PSW = PSW ^ NMI_mask if PSW_NMI else PSW ^ UMC_IEC_mask + + ir.IRDst = dst + + +@sbuild.parse +def swi(imm2): + """SWI - Software Interrupt""" + + # if(imm2==0) EXC.SIP0 <- 1 + # else if (imm2==1) EXC.SIP1 <- 1 + # else if (imm2==2) EXC.SIP2 <- 1 + # else if (imm2==3) EXC.SIP3 <- 1 + + # EXC.SIP0 == bit 4 + # EXC.SIP1 == bit 5 + # EXC.SIP2 == bit 6 + # EXC.SIP3 == bit 7 + + EXC = EXC ^ (i32(1) << (i32(4) + imm2)) + + +# Note: the following instructions can't be implemented +manual_functions["halt"] = mep_nop +manual_functions["sleep"] = mep_nop +manual_functions["break"] = mep_nop +manual_functions["syncm"] = mep_nop +manual_functions["stcb"] = mep_nop_2_args +manual_functions["ldcb"] = mep_nop_2_args + + +### Bit manipulation instruction option + +@sbuild.parse +def bsetm(rm_deref, imm3): + """BSETM - Bit Set Memory""" + + # MemByte(Rm) <- MemByte(Rm) or (1< reversed_rm = 0b1110 + + # Test bits individually + b3 = (reversed_rm & i32(2**3)) >> i32(3) if reversed_rm else i32(0) + -> b3 = (0b1110 & 0b1000 >> 3) = 1 + + b2 = (reversed_rm & i32(2**2)) >> i32(2) if b3 else i32(0) + -> b2 = (0b1110 & 0b0100 >> 2) = 1 + + b1 = (reversed_rm & i32(2**1)) >> i32(1) if b2 else i32(0) + -> b1 = (0b1110 & 0b0010 >> 1) = 1 + + b0 = (reversed_rm & i32(2**0)) >> i32(0) if b1 else i32(0) + -> b0 = (0b1110 & 0b0001 >> 0) = 0 + + # Sum all partial results + rn = b3 + b2 + b1 + b0 + -> rn = 1 + 1 + 1 + 0 = 3 + """ + + # Rn <- LeadingZeroDetect(Rm) + + # Invert the value + reversed_rm = ~rm + + # Test bits individually + b31 = (reversed_rm & i32(2**31)) >> i32(31) if reversed_rm else i32(0) + b30 = (reversed_rm & i32(2**30)) >> i32(30) if b31 else i32(0) + b29 = (reversed_rm & i32(2**29)) >> i32(29) if b30 else i32(0) + b28 = (reversed_rm & i32(2**28)) >> i32(28) if b29 else i32(0) + b27 = (reversed_rm & i32(2**27)) >> i32(27) if b28 else i32(0) + b26 = (reversed_rm & i32(2**26)) >> i32(26) if b27 else i32(0) + b25 = (reversed_rm & i32(2**25)) >> i32(25) if b26 else i32(0) + b24 = (reversed_rm & i32(2**24)) >> i32(24) if b25 else i32(0) + b23 = (reversed_rm & i32(2**23)) >> i32(23) if b24 else i32(0) + b22 = (reversed_rm & i32(2**22)) >> i32(22) if b23 else i32(0) + b21 = (reversed_rm & i32(2**21)) >> i32(21) if b22 else i32(0) + b20 = (reversed_rm & i32(2**20)) >> i32(20) if b21 else i32(0) + b19 = (reversed_rm & i32(2**19)) >> i32(19) if b20 else i32(0) + b18 = (reversed_rm & i32(2**18)) >> i32(18) if b19 else i32(0) + b17 = (reversed_rm & i32(2**17)) >> i32(17) if b18 else i32(0) + b16 = (reversed_rm & i32(2**16)) >> i32(16) if b17 else i32(0) + b15 = (reversed_rm & i32(2**15)) >> i32(15) if b16 else i32(0) + b14 = (reversed_rm & i32(2**14)) >> i32(14) if b15 else i32(0) + b13 = (reversed_rm & i32(2**13)) >> i32(13) if b14 else i32(0) + b12 = (reversed_rm & i32(2**12)) >> i32(12) if b13 else i32(0) + b11 = (reversed_rm & i32(2**11)) >> i32(11) if b12 else i32(0) + b10 = (reversed_rm & i32(2**10)) >> i32(10) if b11 else i32(0) + b09 = (reversed_rm & i32(2 ** 9)) >> i32(9) if b10 else i32(0) + b08 = (reversed_rm & i32(2 ** 8)) >> i32(8) if b09 else i32(0) + b07 = (reversed_rm & i32(2 ** 7)) >> i32(7) if b08 else i32(0) + b06 = (reversed_rm & i32(2 ** 6)) >> i32(6) if b07 else i32(0) + b05 = (reversed_rm & i32(2 ** 5)) >> i32(5) if b06 else i32(0) + b04 = (reversed_rm & i32(2 ** 4)) >> i32(4) if b05 else i32(0) + b03 = (reversed_rm & i32(2 ** 3)) >> i32(3) if b04 else i32(0) + b02 = (reversed_rm & i32(2 ** 2)) >> i32(2) if b03 else i32(0) + b01 = (reversed_rm & i32(2 ** 1)) >> i32(1) if b02 else i32(0) + b00 = (reversed_rm & i32(2 ** 0)) >> i32(0) if b01 else i32(0) + + # Sum all partial results + rn = b31 + b30 + b29 + b28 + b27 + b26 + b25 + b24 + b23 + b22 + b21 + b20 \ + + b19 + b18 + b17 + b16 + b15 + b14 + b13 + b12 + b11 + b10 + b09 + b08 \ + + b07 + b06 + b05 + b04 + b03 + b02 + b01 + b00 + + +### Coprocessor option + +# Note: these instructions are implemented when needed + +# SWCP - Store Word to memory from a coprocessor register +# MemWord(Rm31..2||00) <- CRn 31..0 +manual_functions["swcp"] = sw + + +# LWCP - Load Word from memory to a coprocessor register +# CRn <- MemWord(Rm31..2||00) +manual_functions["lwcp"] = lw + + +@sbuild.parse +def smcp(reg_src, deref_dst): + """SMCP - Store Word to memory from a coprocessor register""" + + # MemDword(Rm31..3||000) <- CRn + mem32[deref_dst.ptr & i32(0xFFFFFFF8)] = reg_src + + +@sbuild.parse +def lmcp(reg_dst, deref_src): + """LMCP - Load Word from memory to a coprocessor register""" + + # CRn <- MemDword(Rm31..3||000) + reg_dst = mem32[deref_src.ptr & i32(0xFFFFFFF8)] + + +@sbuild.parse +def swcpi(reg_src, deref_dst): + """SWCPI - Store Word to memory, and increment the address""" + + # MemWord(Rm31..2||00) <- CRn 31..0; Rm<-Rm+4 + mem32[deref_dst.ptr & i32(0xFFFFFFFC)] = reg_src + deref_dst.ptr = deref_dst.ptr + i32(4) + + +@sbuild.parse +def lwcpi(reg_dst, deref_src): + """LWCPI - Load Word from memory, and increment the address""" + + # CRn <- MemWord(Rm31..2||00); Rm<-Rm+4 + reg_dst = mem32[deref_src.ptr & i32(0xFFFFFFFC)] + deref_src.ptr = deref_src.ptr + i32(4) + + +@sbuild.parse +def smcpi(reg_src, deref_dst): + """SMCPI - Store Word to memory, and increment the address""" + + # MemDword(Rm31..3||000) <- CRn; Rm<-Rm+8 + mem32[deref_dst.ptr & i32(0xFFFFFFF8)] = reg_src + deref_dst.ptr = deref_dst.ptr + i32(8) + + +@sbuild.parse +def lmcpi(reg_dst, deref_src): + """LMCPI - Load Word from memory, and increment the address""" + + # CRn <- MemDword(Rm31..3||000); Rm<-Rm+8 + reg_dst = mem32[deref_src.ptr & i32(0xFFFFFFFC)] + deref_src.ptr = deref_src.ptr + i32(8) + + +### IR MeP definitions + +def get_mnemo_expr(ir, instr, *args): + """Simplify getting the IR from a miasm instruction.""" + + if instr.name.lower() in sbuild.functions: + mnemo_func = sbuild.functions[instr.name.lower()] + else: + mnemo_func = manual_functions[instr.name.lower()] + + ir, extra_ir = mnemo_func(ir, instr, *args) + return ir, extra_ir + + +class ir_mepb(IntermediateRepresentation): + """Toshiba MeP miasm IR - Big Endian + + It transforms an instructon into an IR. + """ + + addrsize = 32 + + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_mep, "b", loc_db) + self.pc = mn_mep.getpc() + self.sp = mn_mep.getsp() + self.IRDst = ExprId("IRDst", 32) + + def get_ir(self, instr): + """Get the IR from a miasm instruction.""" + + instr_ir, extra_ir = get_mnemo_expr(self, instr, *instr.args) + + return instr_ir, extra_ir + + def get_next_break_loc_key(self, instr): + """Returns a new label that identifies where the instruction is going. + + Note: it eases linking IR blocs + """ + + l = self.loc_db.get_or_create_offset_location(instr.offset + instr.l) + return l + + +class ir_mepl(ir_mepb): + """Toshiba MeP miasm IR - Little Endian""" + + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_mep, "l", loc_db) + self.pc = mn_mep.getpc() + self.sp = mn_mep.getsp() + self.IRDst = ExprId("IRDst", 32) diff --git a/miasm/arch/mips32/__init__.py b/miasm/arch/mips32/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/miasm/arch/mips32/arch.py b/miasm/arch/mips32/arch.py new file mode 100644 index 00000000..68841e1e --- /dev/null +++ b/miasm/arch/mips32/arch.py @@ -0,0 +1,755 @@ +#-*- coding:utf-8 -*- + +import logging +from collections import defaultdict + +from pyparsing import Literal, Optional + +from miasm.expression.expression import ExprMem, ExprInt, ExprId, ExprOp, ExprLoc +from miasm.core.bin_stream import bin_stream +import miasm.arch.mips32.regs as regs +import miasm.core.cpu as cpu + +from miasm.core.asm_ast import AstInt, AstId, AstMem, AstOp + +log = logging.getLogger("mips32dis") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.DEBUG) + + +gpregs = cpu.reg_info(regs.regs32_str, regs.regs32_expr) + + +LPARENTHESIS = Literal("(") +RPARENTHESIS = Literal(")") + +def cb_deref(tokens): + if len(tokens) != 4: + raise NotImplementedError("TODO") + return AstMem(tokens[2] + tokens[0], 32) + +def cb_deref_nooff(tokens): + if len(tokens) != 3: + raise NotImplementedError("TODO") + return AstMem(tokens[1], 32) + +base_expr = cpu.base_expr + +deref_off = (Optional(base_expr) + LPARENTHESIS + gpregs.parser + RPARENTHESIS).setParseAction(cb_deref) +deref_nooff = (LPARENTHESIS + gpregs.parser + RPARENTHESIS).setParseAction(cb_deref_nooff) +deref = deref_off | deref_nooff + + +class additional_info(object): + def __init__(self): + self.except_on_instr = False + +br_0 = ['B', 'J', 'JR', 'BAL', 'JAL', 'JALR'] +br_1 = ['BGEZ', 'BLTZ', 'BGTZ', 'BLEZ', 'BC1T', 'BC1F'] +br_2 = ['BEQ', 'BEQL', 'BNE'] + + +class instruction_mips32(cpu.instruction): + __slots__ = [] + delayslot = 1 + + def __init__(self, *args, **kargs): + super(instruction_mips32, self).__init__(*args, **kargs) + + + @staticmethod + def arg2str(expr, index=None, loc_db=None): + if expr.is_id() or expr.is_int(): + return str(expr) + elif expr.is_loc(): + if loc_db is not None: + return loc_db.pretty_str(expr.loc_key) + else: + return str(expr) + assert(isinstance(expr, ExprMem)) + arg = expr.ptr + if isinstance(arg, ExprId): + return "(%s)"%arg + assert(len(arg.args) == 2 and arg.op == '+') + return "%s(%s)"%(arg.args[1], arg.args[0]) + + def dstflow(self): + if self.name == 'BREAK': + return False + if self.name in br_0 + br_1 + br_2: + return True + return False + + def get_dst_num(self): + if self.name in br_0: + i = 0 + elif self.name in br_1: + i = 1 + elif self.name in br_2: + i = 2 + else: + raise NotImplementedError("TODO %s"%self) + return i + + def dstflow2label(self, loc_db): + if self.name in ["J", 'JAL']: + expr = self.args[0].arg + addr = (self.offset & (0xFFFFFFFF ^ ((1<< 28)-1))) + expr + loc_key = loc_db.get_or_create_offset_location(addr) + self.args[0] = ExprLoc(loc_key, expr.size) + return + + ndx = self.get_dst_num() + expr = self.args[ndx] + + if not isinstance(expr, ExprInt): + return + addr = expr.arg + self.offset + loc_key = loc_db.get_or_create_offset_location(addr) + self.args[ndx] = ExprLoc(loc_key, expr.size) + + def breakflow(self): + if self.name == 'BREAK': + return False + if self.name in br_0 + br_1 + br_2: + return True + return False + + def is_subcall(self): + if self.name in ['JAL', 'JALR', 'BAL']: + return True + return False + + def getdstflow(self, loc_db): + if self.name in br_0: + return [self.args[0]] + elif self.name in br_1: + return [self.args[1]] + elif self.name in br_2: + return [self.args[2]] + elif self.name in ['JAL', 'JALR', 'JR', 'J']: + return [self.args[0]] + else: + raise NotImplementedError("fix mnemo %s"%self.name) + + def splitflow(self): + if self.name in ["B", 'JR', 'J']: + return False + if self.name in br_0: + return True + if self.name in br_1: + return True + if self.name in br_2: + return True + if self.name in ['JAL', 'JALR']: + return True + return False + + def get_symbol_size(self, symbol, loc_db): + return 32 + + def fixDstOffset(self): + ndx = self.get_dst_num() + e = self.args[ndx] + if self.offset is None: + raise ValueError('symbol not resolved %s' % self.l) + if not isinstance(e, ExprInt): + return + off = e.arg - self.offset + if int(off % 4): + raise ValueError('strange offset! %r' % off) + self.args[ndx] = ExprInt(off, 32) + + def get_args_expr(self): + args = [a for a in self.args] + return args + + +class mn_mips32(cpu.cls_mn): + delayslot = 1 + name = "mips32" + regs = regs + bintree = {} + num = 0 + all_mn = [] + all_mn_mode = defaultdict(list) + all_mn_name = defaultdict(list) + all_mn_inst = defaultdict(list) + pc = {'l':regs.PC, 'b':regs.PC} + sp = {'l':regs.SP, 'b':regs.SP} + instruction = instruction_mips32 + max_instruction_len = 4 + + @classmethod + def getpc(cls, attrib = None): + return regs.PC + + @classmethod + def getsp(cls, attrib = None): + return regs.SP + + def additional_info(self): + info = additional_info() + return info + + @classmethod + def getbits(cls, bitstream, attrib, start, n): + if not n: + return 0 + o = 0 + while n: + offset = start // 8 + n_offset = cls.endian_offset(attrib, offset) + c = cls.getbytes(bitstream, n_offset, 1) + if not c: + raise IOError + c = ord(c) + r = 8 - start % 8 + c &= (1 << r) - 1 + l = min(r, n) + c >>= (r - l) + o <<= l + o |= c + n -= l + start += l + return o + + @classmethod + def endian_offset(cls, attrib, offset): + if attrib == "l": + return (offset & ~3) + 3 - offset % 4 + elif attrib == "b": + return offset + else: + raise NotImplementedError('bad attrib') + + @classmethod + def check_mnemo(cls, fields): + l = sum([x.l for x in fields]) + assert l == 32, "len %r" % l + + @classmethod + def getmn(cls, name): + return name.upper() + + @classmethod + def gen_modes(cls, subcls, name, bases, dct, fields): + dct['mode'] = None + return [(subcls, name, bases, dct, fields)] + + def value(self, mode): + v = super(mn_mips32, self).value(mode) + if mode == 'l': + return [x[::-1] for x in v] + elif mode == 'b': + return [x for x in v] + else: + raise NotImplementedError('bad attrib') + + + +def mips32op(name, fields, args=None, alias=False): + dct = {"fields": fields} + dct["alias"] = alias + if args is not None: + dct['args'] = args + type(name, (mn_mips32,), dct) + #type(name, (mn_mips32b,), dct) + +class mips32_arg(cpu.m_arg): + def asm_ast_to_expr(self, arg, loc_db): + if isinstance(arg, AstId): + if isinstance(arg.name, ExprId): + return arg.name + if arg.name in gpregs.str: + return None + loc_key = loc_db.get_or_create_name_location(arg.name.encode()) + return ExprLoc(loc_key, 32) + if isinstance(arg, AstOp): + args = [self.asm_ast_to_expr(tmp, loc_db) for tmp in arg.args] + if None in args: + return None + return ExprOp(arg.op, *args) + if isinstance(arg, AstInt): + return ExprInt(arg.value, 32) + if isinstance(arg, AstMem): + ptr = self.asm_ast_to_expr(arg.ptr, loc_db) + if ptr is None: + return None + return ExprMem(ptr, arg.size) + return None + + +class mips32_reg(cpu.reg_noarg, mips32_arg): + pass + +class mips32_gpreg(mips32_reg): + reg_info = gpregs + parser = reg_info.parser + +class mips32_fltpreg(mips32_reg): + reg_info = regs.fltregs + parser = reg_info.parser + + +class mips32_fccreg(mips32_reg): + reg_info = regs.fccregs + parser = reg_info.parser + +class mips32_imm(cpu.imm_noarg): + parser = base_expr + + +class mips32_s16imm_noarg(mips32_imm): + def decode(self, v): + v = v & self.lmask + v = cpu.sign_ext(v, 16, 32) + self.expr = ExprInt(v, 32) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = self.expr.arg.arg + if v & 0x80000000: + nv = v & ((1 << 16) - 1) + assert( v == cpu.sign_ext(nv, 16, 32)) + v = nv + self.value = v + return True + +class mips32_soff_noarg(mips32_imm): + def decode(self, v): + v = v & self.lmask + v <<= 2 + v = cpu.sign_ext(v, 16+2, 32) + # Add pipeline offset + self.expr = ExprInt(v + 4, 32) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + # Remove pipeline offset + v = int(self.expr.arg - 4) + if v & 0x80000000: + nv = v & ((1 << 16+2) - 1) + assert( v == cpu.sign_ext(nv, 16+2, 32)) + v = nv + self.value = v>>2 + return True + + +class mips32_s16imm(mips32_s16imm_noarg, mips32_arg): + pass + +class mips32_soff(mips32_soff_noarg, mips32_arg): + pass + + +class mips32_instr_index(mips32_imm, mips32_arg): + def decode(self, v): + v = v & self.lmask + self.expr = ExprInt(v<<2, 32) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = self.expr.arg.arg + if v & 3: + return False + v>>=2 + if v > (1<>=2 + self.parent.cpr0.value = index + return True + +rs = cpu.bs(l=5, cls=(mips32_gpreg,)) +rt = cpu.bs(l=5, cls=(mips32_gpreg,)) +rd = cpu.bs(l=5, cls=(mips32_gpreg,)) +ft = cpu.bs(l=5, cls=(mips32_fltpreg,)) +fs = cpu.bs(l=5, cls=(mips32_fltpreg,)) +fd = cpu.bs(l=5, cls=(mips32_fltpreg,)) + +s16imm = cpu.bs(l=16, cls=(mips32_s16imm,)) +u16imm = cpu.bs(l=16, cls=(mips32_u16imm,)) +sa = cpu.bs(l=5, cls=(mips32_u16imm,)) +base = cpu.bs(l=5, cls=(mips32_dreg_imm,)) +soff = cpu.bs(l=16, cls=(mips32_soff,)) + +cpr0 = cpu.bs(l=5, cls=(mips32_imm,), fname="cpr0") +cpr = cpu.bs(l=3, cls=(mips32_cpr,)) + + +s16imm_noarg = cpu.bs(l=16, cls=(mips32_s16imm_noarg,), fname="imm", + order=-1) + +hint = cpu.bs(l=5, default_val="00000") +fcc = cpu.bs(l=3, cls=(mips32_fccreg,)) + +sel = cpu.bs(l=3, cls=(mips32_u16imm,)) + +code = cpu.bs(l=20, cls=(mips32_u16imm,)) + +esize = cpu.bs(l=5, cls=(mips32_esize,)) +epos = cpu.bs(l=5, cls=(mips32_u16imm,), fname="epos", + order=-1) + +eposh = cpu.bs(l=5, cls=(mips32_eposh,)) + +instr_index = cpu.bs(l=26, cls=(mips32_instr_index,)) +bs_fmt = cpu.bs_mod_name(l=5, fname='fmt', mn_mod={0x10: '.S', 0x11: '.D', + 0x14: '.W', 0x15: '.L', + 0x16: '.PS'}) +class bs_cond(cpu.bs_mod_name): + mn_mod = ['.F', '.UN', '.EQ', '.UEQ', + '.OLT', '.ULT', '.OLE', '.ULE', + '.SF', '.NGLE', '.SEQ', '.NGL', + '.LT', '.NGE', '.LE', '.NGT' + ] + + def modname(self, name, f_i): + raise NotImplementedError("Not implemented") + + +class bs_cond_name(cpu.bs_divert): + prio = 2 + mn_mod = [['.F', '.UN', '.EQ', '.UEQ', + '.OLT', '.ULT', '.OLE', '.ULE'], + ['.SF', '.NGLE', '.SEQ', '.NGL', + '.LT', '.NGE', '.LE', '.NGT'] + ] + + def divert(self, index, candidates): + out = [] + for candidate in candidates: + cls, name, bases, dct, fields = candidate + cond1 = [f for f in fields if f.fname == "cond1"] + assert(len(cond1) == 1) + cond1 = cond1.pop() + mm = self.mn_mod[cond1.value] + for value, new_name in enumerate(mm): + nfields = fields[:] + s = cpu.int2bin(value, self.args['l']) + args = dict(self.args) + args.update({'strbits': s}) + f = cpu.bs(**args) + nfields[index] = f + ndct = dict(dct) + ndct['name'] = name + new_name + out.append((cls, new_name, bases, ndct, nfields)) + return out + + + +class bs_cond_mod(cpu.bs_mod_name): + prio = 1 + +bs_cond = bs_cond_mod(l=4, + mn_mod = ['.F', '.UN', '.EQ', '.UEQ', + '.OLT', '.ULT', '.OLE', '.ULE', + '.SF', '.NGLE', '.SEQ', '.NGL', + '.LT', '.NGE', '.LE', '.NGT']) + + + +bs_arith = cpu.bs_name(l=6, name={'ADDU':0b100001, + 'SUBU':0b100011, + 'OR':0b100101, + 'AND':0b100100, + 'SLTU':0b101011, + 'XOR':0b100110, + 'SLT':0b101010, + 'SUBU':0b100011, + 'NOR':0b100111, + 'MOVN':0b001011, + 'MOVZ':0b001010, + }) + +bs_shift = cpu.bs_name(l=6, name={'SLL':0b000000, + 'SRL':0b000010, + 'SRA':0b000011, + }) + +bs_shift1 = cpu.bs_name(l=6, name={'SLLV':0b000100, + 'SRLV':0b000110, + 'SRAV':0b000111, + }) + + +bs_arithfmt = cpu.bs_name(l=6, name={'ADD':0b000000, + 'SUB':0b000001, + 'MUL':0b000010, + 'DIV':0b000011, + }) + +bs_s_l = cpu.bs_name(l=6, name = {"SW": 0b101011, + "SH": 0b101001, + "SB": 0b101000, + "LW": 0b100011, + "LH": 0b100001, + "LB": 0b100000, + "LHU": 0b100101, + "LBU": 0b100100, + "LWL": 0b100010, + "LWR": 0b100110, + + "SWL": 0b101010, + "SWR": 0b101110, + }) + + +bs_oax = cpu.bs_name(l=6, name = {"ORI": 0b001101, + "ANDI": 0b001100, + "XORI": 0b001110, + }) + +bs_bcc = cpu.bs_name(l=5, name = {"BGEZ": 0b00001, + "BGEZL": 0b00011, + "BGEZAL": 0b10001, + "BGEZALL": 0b10011, + "BLTZ": 0b00000, + "BLTZL": 0b00010, + "BLTZAL": 0b10000, + "BLTZALL": 0b10010, + }) + + +bs_code = cpu.bs(l=10) + + +mips32op("addi", [cpu.bs('001000'), rs, rt, s16imm], [rt, rs, s16imm]) +mips32op("addiu", [cpu.bs('001001'), rs, rt, s16imm], [rt, rs, s16imm]) +mips32op("nop", [cpu.bs('0'*32)], alias = True) +mips32op("lui", [cpu.bs('001111'), cpu.bs('00000'), rt, u16imm]) +mips32op("oax", [bs_oax, rs, rt, u16imm], [rt, rs, u16imm]) + +mips32op("arith", [cpu.bs('000000'), rs, rt, rd, cpu.bs('00000'), bs_arith], + [rd, rs, rt]) +mips32op("shift1", [cpu.bs('000000'), rs, rt, rd, cpu.bs('00000'), bs_shift1], + [rd, rt, rs]) + +mips32op("shift", [cpu.bs('000000'), cpu.bs('00000'), rt, rd, sa, bs_shift], + [rd, rt, sa]) + +mips32op("rotr", [cpu.bs('000000'), cpu.bs('00001'), rt, rd, sa, + cpu.bs('000010')], [rd, rt, sa]) + +mips32op("mul", [cpu.bs('011100'), rs, rt, rd, cpu.bs('00000'), + cpu.bs('000010')], [rd, rs, rt]) +mips32op("div", [cpu.bs('000000'), rs, rt, cpu.bs('0000000000'), + cpu.bs('011010')]) + +mips32op("s_l", [bs_s_l, base, rt, s16imm_noarg], [rt, base]) + +#mips32op("mfc0", [bs('010000'), bs('00000'), rt, rd, bs('00000000'), sel]) +mips32op("mfc0", [cpu.bs('010000'), cpu.bs('00000'), rt, cpr0, + cpu.bs('00000000'), cpr]) +mips32op("mfc1", [cpu.bs('010001'), cpu.bs('00000'), rt, fs, + cpu.bs('00000000000')]) + +mips32op("ldc1", [cpu.bs('110101'), base, ft, s16imm_noarg], [ft, base]) + +mips32op("mov", [cpu.bs('010001'), bs_fmt, cpu.bs('00000'), fs, fd, + cpu.bs('000110')], [fd, fs]) + +mips32op("add", [cpu.bs('010001'), bs_fmt, ft, fs, fd, bs_arithfmt], + [fd, fs, ft]) + +mips32op("divu", [cpu.bs('000000'), rs, rt, cpu.bs('0000000000'), + cpu.bs('011011')]) +mips32op("mult", [cpu.bs('000000'), rs, rt, cpu.bs('0000000000'), + cpu.bs('011000')]) +mips32op("multu", [cpu.bs('000000'), rs, rt, cpu.bs('0000000000'), + cpu.bs('011001')]) +mips32op("mflo", [cpu.bs('000000'), cpu.bs('0000000000'), rd, + cpu.bs('00000'), cpu.bs('010010')]) +mips32op("mfhi", [cpu.bs('000000'), cpu.bs('0000000000'), rd, + cpu.bs('00000'), cpu.bs('010000')]) + + +mips32op("b", [cpu.bs('000100'), cpu.bs('00000'), cpu.bs('00000'), soff], + alias = True) +mips32op("bne", [cpu.bs('000101'), rs, rt, soff]) +mips32op("beq", [cpu.bs('000100'), rs, rt, soff]) + +mips32op("blez", [cpu.bs('000110'), rs, cpu.bs('00000'), soff]) + +mips32op("bcc", [cpu.bs('000001'), rs, bs_bcc, soff]) + +mips32op("bgtz", [cpu.bs('000111'), rs, cpu.bs('00000'), soff]) +mips32op("bal", [cpu.bs('000001'), cpu.bs('00000'), cpu.bs('10001'), soff], + alias = True) + + +mips32op("slti", [cpu.bs('001010'), rs, rt, s16imm], [rt, rs, s16imm]) +mips32op("sltiu", [cpu.bs('001011'), rs, rt, s16imm], [rt, rs, s16imm]) + + +mips32op("j", [cpu.bs('000010'), instr_index]) +mips32op("jal", [cpu.bs('000011'), instr_index]) +mips32op("jalr", [cpu.bs('000000'), rs, cpu.bs('00000'), rd, hint, + cpu.bs('001001')]) +mips32op("jr", [cpu.bs('000000'), rs, cpu.bs('0000000000'), hint, + cpu.bs('001000')]) + +mips32op("lwc1", [cpu.bs('110001'), base, ft, s16imm_noarg], [ft, base]) + +#mips32op("mtc0", [bs('010000'), bs('00100'), rt, rd, bs('00000000'), sel]) +mips32op("mtc0", [cpu.bs('010000'), cpu.bs('00100'), rt, cpr0, + cpu.bs('00000000'), cpr]) +mips32op("mtc1", [cpu.bs('010001'), cpu.bs('00100'), rt, fs, + cpu.bs('00000000000')]) + +# XXXX TODO CFC1 +mips32op("cfc1", [cpu.bs('010001'), cpu.bs('00010'), rt, fs, + cpu.bs('00000000000')]) +# XXXX TODO CTC1 +mips32op("ctc1", [cpu.bs('010001'), cpu.bs('00110'), rt, fs, + cpu.bs('00000000000')]) + +mips32op("break", [cpu.bs('000000'), code, cpu.bs('001101')]) +mips32op("syscall", [cpu.bs('000000'), code, cpu.bs('001100')]) + + +mips32op("c", [cpu.bs('010001'), bs_fmt, ft, fs, fcc, cpu.bs('0'), + cpu.bs('0'), cpu.bs('11'), bs_cond], [fcc, fs, ft]) + + +mips32op("bc1t", [cpu.bs('010001'), cpu.bs('01000'), fcc, cpu.bs('0'), + cpu.bs('1'), soff]) +mips32op("bc1f", [cpu.bs('010001'), cpu.bs('01000'), fcc, cpu.bs('0'), + cpu.bs('0'), soff]) + +mips32op("swc1", [cpu.bs('111001'), base, ft, s16imm_noarg], [ft, base]) + +mips32op("cvt.d", [cpu.bs('010001'), bs_fmt, cpu.bs('00000'), fs, fd, + cpu.bs('100001')], [fd, fs]) +mips32op("cvt.w", [cpu.bs('010001'), bs_fmt, cpu.bs('00000'), fs, fd, + cpu.bs('100100')], [fd, fs]) +mips32op("cvt.s", [cpu.bs('010001'), bs_fmt, cpu.bs('00000'), fs, fd, + cpu.bs('100000')], [fd, fs]) + +mips32op("ext", [cpu.bs('011111'), rs, rt, esize, epos, cpu.bs('000000')], + [rt, rs, epos, esize]) +mips32op("ins", [cpu.bs('011111'), rs, rt, eposh, epos, cpu.bs('000100')], + [rt, rs, epos, eposh]) + +mips32op("seb", [cpu.bs('011111'), cpu.bs('00000'), rt, rd, cpu.bs('10000'), + cpu.bs('100000')], [rd, rt]) +mips32op("seh", [cpu.bs('011111'), cpu.bs('00000'), rt, rd, cpu.bs('11000'), + cpu.bs('100000')], [rd, rt]) +mips32op("wsbh", [cpu.bs('011111'), cpu.bs('00000'), rt, rd, cpu.bs('00010'), + cpu.bs('100000')], [rd, rt]) + +mips32op("di", [cpu.bs('010000'), cpu.bs('01011'), rt, cpu.bs('01100'), + cpu.bs('00000'), cpu.bs('0'), cpu.bs('00'), cpu.bs('000')]) +mips32op("ei", [cpu.bs('010000'), cpu.bs('01011'), rt, cpu.bs('01100'), + cpu.bs('00000'), cpu.bs('1'), cpu.bs('00'), cpu.bs('000')]) + + +mips32op("tlbp", [cpu.bs('010000'), cpu.bs('1'), cpu.bs('0'*19), + cpu.bs('001000')]) +mips32op("tlbwi", [cpu.bs('010000'), cpu.bs('1'), cpu.bs('0'*19), + cpu.bs('000010')]) + + +mips32op("teq", [cpu.bs('000000'), rs, rt, bs_code, cpu.bs('110100')], + [rs, rt]) diff --git a/miasm/arch/mips32/disasm.py b/miasm/arch/mips32/disasm.py new file mode 100644 index 00000000..b6c05cb7 --- /dev/null +++ b/miasm/arch/mips32/disasm.py @@ -0,0 +1,16 @@ +from miasm.core.asmblock import disasmEngine +from miasm.arch.mips32.arch import mn_mips32 + + + +class dis_mips32b(disasmEngine): + attrib = 'b' + def __init__(self, bs=None, **kwargs): + super(dis_mips32b, self).__init__(mn_mips32, self.attrib, bs, **kwargs) + + +class dis_mips32l(disasmEngine): + attrib = "l" + def __init__(self, bs=None, **kwargs): + super(dis_mips32l, self).__init__(mn_mips32, self.attrib, bs, **kwargs) + diff --git a/miasm/arch/mips32/ira.py b/miasm/arch/mips32/ira.py new file mode 100644 index 00000000..04a51c6c --- /dev/null +++ b/miasm/arch/mips32/ira.py @@ -0,0 +1,104 @@ +#-*- coding:utf-8 -*- + +from miasm.expression.expression import ExprAssign, ExprOp +from miasm.ir.ir import IRBlock, AssignBlock +from miasm.ir.analysis import ira +from miasm.arch.mips32.sem import ir_mips32l, ir_mips32b + +class ir_a_mips32l(ir_mips32l, ira): + def __init__(self, loc_db=None): + ir_mips32l.__init__(self, loc_db) + self.ret_reg = self.arch.regs.V0 + + def call_effects(self, ad, instr): + call_assignblk = AssignBlock( + [ + ExprAssign( + self.ret_reg, + ExprOp( + 'call_func_ret', + ad, + self.arch.regs.A0, + self.arch.regs.A1, + self.arch.regs.A2, + self.arch.regs.A3, + ) + ), + ], + instr + ) + + return [call_assignblk], [] + + + def add_asmblock_to_ircfg(self, block, ircfg, gen_pc_updt=False): + """ + Add a native block to the current IR + @block: native assembly block + @ircfg: IRCFG instance + @gen_pc_updt: insert PC update effects between instructions + """ + loc_key = block.loc_key + ir_blocks_all = [] + + assignments = [] + for index, instr in enumerate(block.lines): + if loc_key is None: + assignments = [] + loc_key = self.get_loc_key_for_instr(instr) + if instr.is_subcall(): + assert index == len(block.lines) - 2 + + # Add last instruction first (before call) + split = self.add_instr_to_current_state( + block.lines[-1], block, assignments, + ir_blocks_all, gen_pc_updt + ) + assert not split + # Add call effects after the delay splot + split = self.add_instr_to_current_state( + instr, block, assignments, + ir_blocks_all, gen_pc_updt + ) + assert split + break + split = self.add_instr_to_current_state( + instr, block, assignments, + ir_blocks_all, gen_pc_updt + ) + if split: + ir_blocks_all.append(IRBlock(loc_key, assignments)) + loc_key = None + assignments = [] + if loc_key is not None: + ir_blocks_all.append(IRBlock(loc_key, assignments)) + + new_ir_blocks_all = self.post_add_asmblock_to_ircfg(block, ircfg, ir_blocks_all) + for irblock in new_ir_blocks_all: + ircfg.add_irblock(irblock) + return new_ir_blocks_all + + def get_out_regs(self, _): + return set([self.ret_reg, self.sp]) + + def sizeof_char(self): + return 8 + + def sizeof_short(self): + return 16 + + def sizeof_int(self): + return 32 + + def sizeof_long(self): + return 32 + + def sizeof_pointer(self): + return 32 + + + +class ir_a_mips32b(ir_mips32b, ir_a_mips32l): + def __init__(self, loc_db=None): + ir_mips32b.__init__(self, loc_db) + self.ret_reg = self.arch.regs.V0 diff --git a/miasm/arch/mips32/jit.py b/miasm/arch/mips32/jit.py new file mode 100644 index 00000000..5e8d13f6 --- /dev/null +++ b/miasm/arch/mips32/jit.py @@ -0,0 +1,151 @@ +from builtins import range +import logging + +from miasm.jitter.jitload import Jitter, named_arguments +from miasm.core.locationdb import LocationDB +from miasm.core.utils import pck32, upck32 +from miasm.arch.mips32.sem import ir_mips32l, ir_mips32b +from miasm.jitter.codegen import CGen +from miasm.ir.ir import AssignBlock, IRBlock +import miasm.expression.expression as m2_expr + +log = logging.getLogger('jit_mips32') +hnd = logging.StreamHandler() +hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) +log.addHandler(hnd) +log.setLevel(logging.CRITICAL) + + +class mipsCGen(CGen): + CODE_INIT = CGen.CODE_INIT + r""" + unsigned int branch_dst_pc; + unsigned int branch_dst_irdst; + unsigned int branch_dst_set=0; + """ + + CODE_RETURN_NO_EXCEPTION = r""" + %s: + if (branch_dst_set) { + %s = %s; + BlockDst->address = %s; + } else { + BlockDst->address = %s; + } + return JIT_RET_NO_EXCEPTION; + """ + + def __init__(self, ir_arch): + super(mipsCGen, self).__init__(ir_arch) + self.delay_slot_dst = m2_expr.ExprId("branch_dst_irdst", 32) + self.delay_slot_set = m2_expr.ExprId("branch_dst_set", 32) + + def block2assignblks(self, block): + irblocks_list = super(mipsCGen, self).block2assignblks(block) + for irblocks in irblocks_list: + for blk_idx, irblock in enumerate(irblocks): + has_breakflow = any(assignblock.instr.breakflow() for assignblock in irblock) + if not has_breakflow: + continue + + irs = [] + for assignblock in irblock: + if self.ir_arch.pc not in assignblock: + irs.append(AssignBlock(assignments, assignblock.instr)) + continue + assignments = dict(assignblock) + # Add internal branch destination + assignments[self.delay_slot_dst] = assignblock[ + self.ir_arch.pc] + assignments[self.delay_slot_set] = m2_expr.ExprInt(1, 32) + # Replace IRDst with next instruction + dst_loc_key = self.ir_arch.get_next_instr(assignblock.instr) + assignments[self.ir_arch.IRDst] = m2_expr.ExprLoc(dst_loc_key, 32) + irs.append(AssignBlock(assignments, assignblock.instr)) + irblocks[blk_idx] = IRBlock(irblock.loc_key, irs) + + return irblocks_list + + def gen_finalize(self, block): + """ + Generate the C code for the final block instruction + """ + + loc_key = self.get_block_post_label(block) + offset = self.ir_arch.loc_db.get_location_offset(loc_key) + out = (self.CODE_RETURN_NO_EXCEPTION % (loc_key, + self.C_PC, + m2_expr.ExprId('branch_dst_irdst', 32), + m2_expr.ExprId('branch_dst_irdst', 32), + self.id_to_c(m2_expr.ExprInt(offset, 32))) + ).split('\n') + return out + + +class jitter_mips32l(Jitter): + + C_Gen = mipsCGen + + def __init__(self, *args, **kwargs): + sp = LocationDB() + Jitter.__init__(self, ir_mips32l(sp), *args, **kwargs) + self.vm.set_little_endian() + + def push_uint32_t(self, value): + self.cpu.SP -= 4 + self.vm.set_mem(self.cpu.SP, pck32(value)) + + def pop_uint32_t(self): + value = self.vm.get_u32(self.cpu.SP) + self.cpu.SP += 4 + return value + + def get_stack_arg(self, index): + return self.vm.get_u32(self.cpu.SP + 4 * index) + + def init_run(self, *args, **kwargs): + Jitter.init_run(self, *args, **kwargs) + self.cpu.PC = self.pc + + # calling conventions + + @named_arguments + def func_args_stdcall(self, n_args): + args = [self.get_arg_n_stdcall(i) for i in range(n_args)] + ret_ad = self.cpu.RA + return ret_ad, args + + def func_ret_stdcall(self, ret_addr, ret_value1=None, ret_value2=None): + self.pc = self.cpu.PC = ret_addr + if ret_value1 is not None: + self.cpu.V0 = ret_value1 + if ret_value2 is not None: + self.cpu.V1 = ret_value2 + return True + + def func_prepare_stdcall(self, ret_addr, *args): + for index in range(min(len(args), 4)): + setattr(self.cpu, 'A%d' % index, args[index]) + for index in range(4, len(args)): + self.vm.set_mem(self.cpu.SP + 4 * (index - 4), pck32(args[index])) + self.cpu.RA = ret_addr + + def get_arg_n_stdcall(self, index): + if index < 4: + arg = getattr(self.cpu, 'A%d' % index) + else: + arg = self.get_stack_arg(index-4) + return arg + + + func_args_systemv = func_args_stdcall + func_ret_systemv = func_ret_stdcall + func_prepare_systemv = func_prepare_stdcall + get_arg_n_systemv = get_arg_n_stdcall + + +class jitter_mips32b(jitter_mips32l): + + def __init__(self, *args, **kwargs): + sp = LocationDB() + Jitter.__init__(self, ir_mips32b(sp), *args, **kwargs) + self.vm.set_big_endian() diff --git a/miasm/arch/mips32/regs.py b/miasm/arch/mips32/regs.py new file mode 100644 index 00000000..1513e989 --- /dev/null +++ b/miasm/arch/mips32/regs.py @@ -0,0 +1,73 @@ +#-*- coding:utf-8 -*- + +from builtins import range +from miasm.expression.expression import ExprId +from miasm.core.cpu import gen_reg, gen_regs + + +PC, _ = gen_reg('PC') +PC_FETCH, _ = gen_reg('PC_FETCH') + +R_LO, _ = gen_reg('R_LO') +R_HI, _ = gen_reg('R_HI') + +exception_flags = ExprId('exception_flags', 32) + +PC_init = ExprId("PC_init", 32) +PC_FETCH_init = ExprId("PC_FETCH_init", 32) + +regs32_str = ["ZERO", 'AT', 'V0', 'V1'] +\ + ['A%d'%i for i in range(4)] +\ + ['T%d'%i for i in range(8)] +\ + ['S%d'%i for i in range(8)] +\ + ['T%d'%i for i in range(8, 10)] +\ + ['K0', 'K1'] +\ + ['GP', 'SP', 'FP', 'RA'] + +regs32_expr = [ExprId(x, 32) for x in regs32_str] +ZERO = regs32_expr[0] + +regs_flt_str = ['F%d'%i for i in range(0x20)] + +regs_fcc_str = ['FCC%d'%i for i in range(8)] + +R_LO = ExprId('R_LO', 32) +R_HI = ExprId('R_HI', 32) + +R_LO_init = ExprId('R_LO_init', 32) +R_HI_init = ExprId('R_HI_init', 32) + + +cpr0_str = ["CPR0_%d"%x for x in range(0x100)] +cpr0_str[0] = "INDEX" +cpr0_str[16] = "ENTRYLO0" +cpr0_str[24] = "ENTRYLO1" +cpr0_str[40] = "PAGEMASK" +cpr0_str[72] = "COUNT" +cpr0_str[80] = "ENTRYHI" +cpr0_str[104] = "CAUSE" +cpr0_str[112] = "EPC" +cpr0_str[128] = "CONFIG" +cpr0_str[152] = "WATCHHI" + +regs_cpr0_expr, regs_cpr0_init, regs_cpr0_info = gen_regs(cpr0_str, globals()) + +gpregs_expr, gpregs_init, gpregs = gen_regs(regs32_str, globals()) +regs_flt_expr, regs_flt_init, fltregs = gen_regs(regs_flt_str, globals(), sz=64) +regs_fcc_expr, regs_fcc_init, fccregs = gen_regs(regs_fcc_str, globals()) + + +all_regs_ids = [PC, PC_FETCH, R_LO, R_HI, exception_flags] + gpregs_expr + regs_flt_expr + \ + regs_fcc_expr + regs_cpr0_expr +all_regs_ids_byname = dict([(x.name, x) for x in all_regs_ids]) +all_regs_ids_init = [ExprId("%s_init" % reg.name, reg.size) for reg in all_regs_ids] +all_regs_ids_no_alias = all_regs_ids[:] + +attrib_to_regs = { + 'l': all_regs_ids_no_alias, + 'b': all_regs_ids_no_alias, +} + +regs_init = {} +for i, r in enumerate(all_regs_ids): + regs_init[r] = all_regs_ids_init[i] diff --git a/miasm/arch/mips32/sem.py b/miasm/arch/mips32/sem.py new file mode 100644 index 00000000..5fc491a7 --- /dev/null +++ b/miasm/arch/mips32/sem.py @@ -0,0 +1,520 @@ +import miasm.expression.expression as m2_expr +from miasm.ir.ir import IntermediateRepresentation, IRBlock, AssignBlock +from miasm.arch.mips32.arch import mn_mips32 +from miasm.arch.mips32.regs import R_LO, R_HI, PC, RA, ZERO, exception_flags +from miasm.core.sembuilder import SemBuilder +from miasm.jitter.csts import EXCEPT_DIV_BY_ZERO + + +# SemBuilder context +ctx = { + "R_LO": R_LO, + "R_HI": R_HI, + "PC": PC, + "RA": RA, + "m2_expr": m2_expr +} + +sbuild = SemBuilder(ctx) + + +@sbuild.parse +def addiu(arg1, arg2, arg3): + """Adds a register @arg3 and a sign-extended immediate value @arg2 and + stores the result in a register @arg1""" + arg1 = arg2 + arg3 + +@sbuild.parse +def lw(arg1, arg2): + "A word is loaded into a register @arg1 from the specified address @arg2." + arg1 = arg2 + +@sbuild.parse +def sw(arg1, arg2): + "The contents of @arg2 is stored at the specified address @arg1." + arg2 = arg1 + +@sbuild.parse +def jal(arg1): + "Jumps to the calculated address @arg1 and stores the return address in $RA" + PC = arg1 + ir.IRDst = arg1 + RA = ExprLoc(ir.get_next_break_loc_key(instr), RA.size) + +@sbuild.parse +def jalr(arg1, arg2): + """Jump to an address stored in a register @arg1, and store the return + address in another register @arg2""" + PC = arg1 + ir.IRDst = arg1 + arg2 = ExprLoc(ir.get_next_break_loc_key(instr), arg2.size) + +@sbuild.parse +def bal(arg1): + PC = arg1 + ir.IRDst = arg1 + RA = ExprLoc(ir.get_next_break_loc_key(instr), RA.size) + +@sbuild.parse +def l_b(arg1): + PC = arg1 + ir.IRDst = arg1 + +@sbuild.parse +def lbu(arg1, arg2): + """A byte is loaded (unsigned extended) into a register @arg1 from the + specified address @arg2.""" + arg1 = mem8[arg2.ptr].zeroExtend(32) + +@sbuild.parse +def lhu(arg1, arg2): + """A word is loaded (unsigned extended) into a register @arg1 from the + specified address @arg2.""" + arg1 = mem16[arg2.ptr].zeroExtend(32) + +@sbuild.parse +def lb(arg1, arg2): + "A byte is loaded into a register @arg1 from the specified address @arg2." + arg1 = mem8[arg2.ptr].signExtend(32) + +@sbuild.parse +def beq(arg1, arg2, arg3): + "Branches on @arg3 if the quantities of two registers @arg1, @arg2 are eq" + dst = arg3 if ExprOp(m2_expr.TOK_EQUAL, arg1, arg2) else ExprLoc(ir.get_next_break_loc_key(instr), ir.IRDst.size) + PC = dst + ir.IRDst = dst + +@sbuild.parse +def bgez(arg1, arg2): + """Branches on @arg2 if the quantities of register @arg1 is greater than or + equal to zero""" + dst = ExprLoc(ir.get_next_break_loc_key(instr), ir.IRDst.size) if ExprOp(m2_expr.TOK_INF_SIGNED, arg1, ExprInt(0, arg1.size)) else arg2 + PC = dst + ir.IRDst = dst + +@sbuild.parse +def bne(arg1, arg2, arg3): + """Branches on @arg3 if the quantities of two registers @arg1, @arg2 are NOT + equal""" + dst = ExprLoc(ir.get_next_break_loc_key(instr), ir.IRDst.size) if ExprOp(m2_expr.TOK_EQUAL, arg1, arg2) else arg3 + PC = dst + ir.IRDst = dst + +@sbuild.parse +def lui(arg1, arg2): + """The immediate value @arg2 is shifted left 16 bits and stored in the + register @arg1. The lower 16 bits are zeroes.""" + arg1 = ExprCompose(i16(0), arg2[:16]) + +@sbuild.parse +def nop(): + """Do nothing""" + +@sbuild.parse +def j(arg1): + """Jump to an address @arg1""" + PC = arg1 + ir.IRDst = arg1 + +@sbuild.parse +def l_or(arg1, arg2, arg3): + """Bitwise logical ors two registers @arg2, @arg3 and stores the result in a + register @arg1""" + arg1 = arg2 | arg3 + +@sbuild.parse +def nor(arg1, arg2, arg3): + """Bitwise logical Nors two registers @arg2, @arg3 and stores the result in + a register @arg1""" + arg1 = (arg2 | arg3) ^ i32(-1) + +@sbuild.parse +def l_and(arg1, arg2, arg3): + """Bitwise logical ands two registers @arg2, @arg3 and stores the result in + a register @arg1""" + arg1 = arg2 & arg3 + +@sbuild.parse +def ext(arg1, arg2, arg3, arg4): + pos = int(arg3) + size = int(arg4) + arg1 = arg2[pos:pos + size].zeroExtend(32) + +@sbuild.parse +def mul(arg1, arg2, arg3): + """Multiplies @arg2 by $arg3 and stores the result in @arg1.""" + arg1 = 'imul'(arg2, arg3) + +@sbuild.parse +def sltu(arg1, arg2, arg3): + """If @arg2 is less than @arg3 (unsigned), @arg1 is set to one. It gets zero + otherwise.""" + arg1 = ExprCond( + ExprOp(m2_expr.TOK_INF_UNSIGNED, arg2, arg3), + ExprInt(1, arg1.size), + ExprInt(0, arg1.size) + ) + +@sbuild.parse +def slt(arg1, arg2, arg3): + """If @arg2 is less than @arg3 (signed), @arg1 is set to one. It gets zero + otherwise.""" + arg1 = ExprCond( + ExprOp(m2_expr.TOK_INF_SIGNED, arg2, arg3), + ExprInt(1, arg1.size), + ExprInt(0, arg1.size) + ) + + +@sbuild.parse +def l_sub(arg1, arg2, arg3): + arg1 = arg2 - arg3 + +@sbuild.parse +def sb(arg1, arg2): + """The least significant byte of @arg1 is stored at the specified address + @arg2.""" + mem8[arg2.ptr] = arg1[:8] + +@sbuild.parse +def sh(arg1, arg2): + mem16[arg2.ptr] = arg1[:16] + +@sbuild.parse +def movn(arg1, arg2, arg3): + if arg3: + arg1 = arg2 + +@sbuild.parse +def movz(arg1, arg2, arg3): + if not arg3: + arg1 = arg2 + +@sbuild.parse +def srl(arg1, arg2, arg3): + """Shifts arg1 register value @arg2 right by the shift amount @arg3 and + places the value in the destination register @arg1. + Zeroes are shifted in.""" + arg1 = arg2 >> arg3 + +@sbuild.parse +def sra(arg1, arg2, arg3): + """Shifts arg1 register value @arg2 right by the shift amount @arg3 and + places the value in the destination register @arg1. The sign bit is shifted + in.""" + arg1 = 'a>>'(arg2, arg3) + +@sbuild.parse +def srav(arg1, arg2, arg3): + arg1 = 'a>>'(arg2, arg3 & i32(0x1F)) + +@sbuild.parse +def sll(arg1, arg2, arg3): + arg1 = arg2 << arg3 + +@sbuild.parse +def srlv(arg1, arg2, arg3): + """Shifts a register value @arg2 right by the amount specified in @arg3 and + places the value in the destination register @arg1. + Zeroes are shifted in.""" + arg1 = arg2 >> (arg3 & i32(0x1F)) + +@sbuild.parse +def sllv(arg1, arg2, arg3): + """Shifts a register value @arg2 left by the amount specified in @arg3 and + places the value in the destination register @arg1. + Zeroes are shifted in.""" + arg1 = arg2 << (arg3 & i32(0x1F)) + +@sbuild.parse +def l_xor(arg1, arg2, arg3): + """Exclusive ors two registers @arg2, @arg3 and stores the result in a + register @arg3""" + arg1 = arg2 ^ arg3 + +@sbuild.parse +def seb(arg1, arg2): + arg1 = arg2[:8].signExtend(32) + +@sbuild.parse +def seh(arg1, arg2): + arg1 = arg2[:16].signExtend(32) + +@sbuild.parse +def bltz(arg1, arg2): + """Branches on @arg2 if the register @arg1 is less than zero""" + dst_o = arg2 if ExprOp(m2_expr.TOK_INF_SIGNED, arg1, ExprInt(0, arg1.size)) else ExprLoc(ir.get_next_break_loc_key(instr), ir.IRDst.size) + PC = dst_o + ir.IRDst = dst_o + +@sbuild.parse +def blez(arg1, arg2): + """Branches on @arg2 if the register @arg1 is less than or equal to zero""" + cond = ExprOp(m2_expr.TOK_INF_EQUAL_SIGNED, arg1, ExprInt(0, arg1.size)) + dst_o = arg2 if cond else ExprLoc(ir.get_next_break_loc_key(instr), ir.IRDst.size) + PC = dst_o + ir.IRDst = dst_o + +@sbuild.parse +def bgtz(arg1, arg2): + """Branches on @arg2 if the register @arg1 is greater than zero""" + cond = ExprOp(m2_expr.TOK_INF_EQUAL_SIGNED, arg1, ExprInt(0, arg1.size)) + dst_o = ExprLoc(ir.get_next_break_loc_key(instr), ir.IRDst.size) if cond else arg2 + PC = dst_o + ir.IRDst = dst_o + +@sbuild.parse +def wsbh(arg1, arg2): + arg1 = ExprCompose(arg2[8:16], arg2[0:8], arg2[24:32], arg2[16:24]) + +@sbuild.parse +def rotr(arg1, arg2, arg3): + arg1 = '>>>'(arg2, arg3) + +@sbuild.parse +def add_d(arg1, arg2, arg3): + # XXX TODO check + arg1 = 'fadd'(arg2, arg3) + +@sbuild.parse +def sub_d(arg1, arg2, arg3): + # XXX TODO check + arg1 = 'fsub'(arg2, arg3) + +@sbuild.parse +def div_d(arg1, arg2, arg3): + # XXX TODO check + arg1 = 'fdiv'(arg2, arg3) + +@sbuild.parse +def mul_d(arg1, arg2, arg3): + # XXX TODO check + arg1 = 'fmul'(arg2, arg3) + +@sbuild.parse +def mov_d(arg1, arg2): + # XXX TODO check + arg1 = arg2 + +@sbuild.parse +def mfc0(arg1, arg2): + arg1 = arg2 + +@sbuild.parse +def mfc1(arg1, arg2): + arg1 = arg2 + +@sbuild.parse +def mtc0(arg1, arg2): + arg2 = arg1 + +@sbuild.parse +def mtc1(arg1, arg2): + arg2 = arg1 + +@sbuild.parse +def tlbwi(): + "TODO XXX" + +@sbuild.parse +def tlbp(): + "TODO XXX" + +def ins(ir, instr, a, b, c, d): + e = [] + pos = int(c) + l = int(d) + + my_slices = [] + if pos != 0: + my_slices.append((a[:pos], 0, pos)) + if l != 0: + my_slices.append((b[:l], pos, pos+l)) + if pos + l != 32: + my_slices.append((a[pos+l:], pos+l, 32)) + r = m2_expr.ExprCompose(my_slices) + e.append(m2_expr.ExprAssign(a, r)) + return e, [] + + +@sbuild.parse +def lwc1(arg1, arg2): + arg1 = ('mem_%.2d_to_single' % arg2.size)(arg2) + +@sbuild.parse +def swc1(arg1, arg2): + arg2 = ('single_to_mem_%.2d' % arg1.size)(arg1) + +@sbuild.parse +def c_lt_d(arg1, arg2, arg3): + arg1 = 'fcomp_lt'(arg2, arg3) + +@sbuild.parse +def c_eq_d(arg1, arg2, arg3): + arg1 = 'fcomp_eq'(arg2, arg3) + +@sbuild.parse +def c_le_d(arg1, arg2, arg3): + arg1 = 'fcomp_le'(arg2, arg3) + +@sbuild.parse +def bc1t(arg1, arg2): + dst_o = arg2 if arg1 else ExprLoc(ir.get_next_break_loc_key(instr), ir.IRDst.size) + PC = dst_o + ir.IRDst = dst_o + +@sbuild.parse +def bc1f(arg1, arg2): + dst_o = ExprLoc(ir.get_next_break_loc_key(instr), ir.IRDst.size) if arg1 else arg2 + PC = dst_o + ir.IRDst = dst_o + +@sbuild.parse +def cvt_d_w(arg1, arg2): + # TODO XXX + arg1 = 'flt_d_w'(arg2) + +@sbuild.parse +def mult(arg1, arg2): + """Multiplies (signed) @arg1 by @arg2 and stores the result in $R_HI:$R_LO""" + size = arg1.size + result = arg1.signExtend(size * 2) * arg2.signExtend(size * 2) + R_LO = result[:32] + R_HI = result[32:] + +@sbuild.parse +def multu(arg1, arg2): + """Multiplies (unsigned) @arg1 by @arg2 and stores the result in $R_HI:$R_LO""" + size = arg1.size + result = arg1.zeroExtend(size * 2) * arg2.zeroExtend(size * 2) + R_LO = result[:32] + R_HI = result[32:] + +@sbuild.parse +def div(arg1, arg2): + """Divide (signed) @arg1 by @arg2 and stores the remaining/result in $R_HI/$R_LO""" + R_LO = ExprOp('sdiv' ,arg1, arg2) + R_HI = ExprOp('smod', arg1, arg2) + +@sbuild.parse +def divu(arg1, arg2): + """Divide (unsigned) @arg1 by @arg2 and stores the remaining/result in $R_HI/$R_LO""" + R_LO = ExprOp('udiv', arg1, arg2) + R_HI = ExprOp('umod', arg1, arg2) + +@sbuild.parse +def mfhi(arg1): + "The contents of register $R_HI are moved to the specified register @arg1." + arg1 = R_HI + +@sbuild.parse +def mflo(arg1): + "The contents of register R_LO are moved to the specified register @arg1." + arg1 = R_LO + +@sbuild.parse +def di(arg1): + "NOP" + +@sbuild.parse +def ei(arg1): + "NOP" + +@sbuild.parse +def ehb(arg1): + "NOP" + + +def teq(ir, instr, arg1, arg2): + e = [] + + loc_except, loc_except_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = m2_expr.ExprLoc(loc_next, ir.IRDst.size) + + do_except = [] + do_except.append(m2_expr.ExprAssign(exception_flags, m2_expr.ExprInt( + EXCEPT_DIV_BY_ZERO, exception_flags.size))) + do_except.append(m2_expr.ExprAssign(ir.IRDst, loc_next_expr)) + blk_except = IRBlock(loc_except.index, [AssignBlock(do_except, instr)]) + + cond = arg1 - arg2 + + + e = [] + e.append(m2_expr.ExprAssign(ir.IRDst, + m2_expr.ExprCond(cond, loc_next_expr, loc_except_expr))) + + return e, [blk_except] + + +mnemo_func = sbuild.functions +mnemo_func.update({ + 'add.d': add_d, + 'addu': addiu, + 'addi': addiu, + 'and': l_and, + 'andi': l_and, + 'b': l_b, + 'c.eq.d': c_eq_d, + 'c.le.d': c_le_d, + 'c.lt.d': c_lt_d, + 'cvt.d.w': cvt_d_w, + 'div.d': div_d, + 'ins': ins, + 'jr': j, + 'mov.d': mov_d, + 'mul.d': mul_d, + 'or': l_or, + 'ori': l_or, + 'slti': slt, + 'sltiu': sltu, + 'sub.d': sub_d, + 'subu': l_sub, + 'xor': l_xor, + 'xori': l_xor, + 'teq': teq, +}) + +def get_mnemo_expr(ir, instr, *args): + instr, extra_ir = mnemo_func[instr.name.lower()](ir, instr, *args) + return instr, extra_ir + +class ir_mips32l(IntermediateRepresentation): + + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_mips32, 'l', loc_db) + self.pc = mn_mips32.getpc() + self.sp = mn_mips32.getsp() + self.IRDst = m2_expr.ExprId('IRDst', 32) + self.addrsize = 32 + + def get_ir(self, instr): + args = instr.args + instr_ir, extra_ir = get_mnemo_expr(self, instr, *args) + + fixed_regs = { + self.pc: m2_expr.ExprInt(instr.offset + 4, 32), + ZERO: m2_expr.ExprInt(0, 32) + } + + instr_ir = [m2_expr.ExprAssign(expr.dst, expr.src.replace_expr(fixed_regs)) + for expr in instr_ir] + + new_extra_ir = [irblock.modify_exprs(mod_src=lambda expr: expr.replace_expr(fixed_regs)) + for irblock in extra_ir] + return instr_ir, new_extra_ir + + def get_next_instr(self, instr): + return self.loc_db.get_or_create_offset_location(instr.offset + 4) + + def get_next_break_loc_key(self, instr): + return self.loc_db.get_or_create_offset_location(instr.offset + 8) + +class ir_mips32b(ir_mips32l): + def __init__(self, loc_db=None): + self.addrsize = 32 + IntermediateRepresentation.__init__(self, mn_mips32, 'b', loc_db) + self.pc = mn_mips32.getpc() + self.sp = mn_mips32.getsp() + self.IRDst = m2_expr.ExprId('IRDst', 32) diff --git a/miasm/arch/msp430/__init__.py b/miasm/arch/msp430/__init__.py new file mode 100644 index 00000000..bbad893b --- /dev/null +++ b/miasm/arch/msp430/__init__.py @@ -0,0 +1 @@ +__all__ = ["arch", "disasm", "regs", "sem"] diff --git a/miasm/arch/msp430/arch.py b/miasm/arch/msp430/arch.py new file mode 100644 index 00000000..65dd435e --- /dev/null +++ b/miasm/arch/msp430/arch.py @@ -0,0 +1,587 @@ +#-*- coding:utf-8 -*- + +from builtins import range + +import logging +from pyparsing import * +from miasm.expression.expression import * +from miasm.core.cpu import * +from collections import defaultdict +from miasm.core.bin_stream import bin_stream +import miasm.arch.msp430.regs as regs_module +from miasm.arch.msp430.regs import * +from miasm.core.asm_ast import AstInt, AstId, AstMem, AstOp + +log = logging.getLogger("msp430dis") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.DEBUG) + +conditional_branch = ['jnz', 'jz', 'jnc', 'jc', + 'jn', 'jge', 'jl'] +unconditional_branch = ['jmp'] + +def cb_deref_nooff(tokens): + assert len(tokens) == 1 + result = AstMem(tokens[0], 16) + return result + + +def cb_deref_pinc(tokens): + assert len(tokens) == 1 + + result = AstOp('autoinc', *tokens) + return result + + +def cb_deref_off(tokens): + assert len(tokens) == 2 + result = AstMem(tokens[1] + tokens[0], 16) + return result + + +def cb_expr(tokens): + assert(len(tokens) == 1) + result = tokens[0] + return result + + +ARO = Suppress("@") +LPARENT = Suppress("(") +RPARENT = Suppress(")") + +PINC = Suppress("+") + +deref_nooff = (ARO + base_expr).setParseAction(cb_deref_nooff) +deref_pinc = (ARO + base_expr + PINC).setParseAction(cb_deref_pinc) +deref_off = (base_expr + LPARENT + gpregs.parser + RPARENT).setParseAction(cb_deref_off) +sreg_p = (deref_pinc | deref_nooff | deref_off | base_expr).setParseAction(cb_expr) + + + +class msp430_arg(m_arg): + def asm_ast_to_expr(self, value, loc_db): + if isinstance(value, AstId): + name = value.name + if isinstance(name, Expr): + return name + assert isinstance(name, str) + if name in gpregs.str: + index = gpregs.str.index(name) + reg = gpregs.expr[index] + return reg + loc_key = loc_db.get_or_create_name_location(value.name.encode()) + return ExprLoc(loc_key, 16) + if isinstance(value, AstOp): + args = [self.asm_ast_to_expr(tmp, loc_db) for tmp in value.args] + if None in args: + return None + return ExprOp(value.op, *args) + if isinstance(value, AstInt): + return ExprInt(value.value, 16) + if isinstance(value, AstMem): + ptr = self.asm_ast_to_expr(value.ptr, loc_db) + if ptr is None: + return None + return ExprMem(ptr, value.size) + return None + + +class additional_info(object): + + def __init__(self): + self.except_on_instr = False + + +class instruction_msp430(instruction): + __slots__ = [] + delayslot = 0 + + def dstflow(self): + if self.name.startswith('j'): + return True + return self.name in ['call'] + + @staticmethod + def arg2str(expr, index=None, loc_db=None): + if isinstance(expr, ExprId): + o = str(expr) + elif isinstance(expr, ExprInt): + o = str(expr) + elif expr.is_loc(): + if loc_db is not None: + return loc_db.pretty_str(expr.loc_key) + else: + return str(expr) + elif isinstance(expr, ExprOp) and expr.op == "autoinc": + o = "@%s+" % str(expr.args[0]) + elif isinstance(expr, ExprMem): + if isinstance(expr.ptr, ExprId): + if index == 0: + o = "@%s" % expr.ptr + else: + o = "0x0(%s)" % expr.ptr + elif isinstance(expr.ptr, ExprInt): + o = "@%s" % expr.ptr + elif isinstance(expr.ptr, ExprOp): + o = "%s(%s)" % (expr.ptr.args[1], expr.ptr.args[0]) + else: + raise NotImplementedError('unknown instance expr = %s' % type(expr)) + return o + + + def dstflow2label(self, loc_db): + expr = self.args[0] + if not isinstance(expr, ExprInt): + return + if self.name == "call": + addr = expr.arg + else: + addr = expr.arg + int(self.offset) + + loc_key = loc_db.get_or_create_offset_location(addr) + self.args[0] = ExprLoc(loc_key, expr.size) + + def breakflow(self): + if self.name in conditional_branch + unconditional_branch: + return True + if self.name.startswith('ret'): + return True + if self.name.startswith('int'): + return True + if self.name.startswith('mov') and self.args[1] == PC: + return True + return self.name in ['call'] + + def splitflow(self): + if self.name in conditional_branch: + return True + if self.name in unconditional_branch: + return False + return self.name in ['call'] + + def setdstflow(self, a): + return + + def is_subcall(self): + return self.name in ['call'] + + def getdstflow(self, loc_db): + return [self.args[0]] + + def get_symbol_size(self, symbol, loc_db): + return 16 + + def fixDstOffset(self): + e = self.args[0] + if self.offset is None: + raise ValueError('symbol not resolved %s' % l) + if not isinstance(e, ExprInt): + # raise ValueError('dst must be int or label') + log.warning('dynamic dst %r', e) + return + + # Call argument is an absolute offset + # Other offsets are relative to instruction offset + if self.name != "call": + self.args[0] = ExprInt(int(e) - self.offset, 16) + + def get_info(self, c): + pass + + def __str__(self): + o = super(instruction_msp430, self).__str__() + return o + + def get_args_expr(self): + args = [] + for a in self.args: + args.append(a) + return args + + +mode_msp430 = None + + +class mn_msp430(cls_mn): + name = "msp430" + regs = regs_module + all_mn = [] + bintree = {} + num = 0 + delayslot = 0 + pc = {None: PC} + sp = {None: SP} + all_mn_mode = defaultdict(list) + all_mn_name = defaultdict(list) + all_mn_inst = defaultdict(list) + instruction = instruction_msp430 + max_instruction_len = 8 + + @classmethod + def getpc(cls, attrib): + return PC + + @classmethod + def getsp(cls, attrib): + return SP + + @classmethod + def check_mnemo(cls, fields): + l = sum([x.l for x in fields]) + assert l % 16 == 00, "len %r" % l + + @classmethod + def getbits(cls, bs, attrib, start, n): + if not n: + return 0 + o = 0 + if n > bs.getlen() * 8: + raise ValueError('not enough bits %r %r' % (n, len(bs.bin) * 8)) + while n: + i = start // 8 + c = cls.getbytes(bs, i) + if not c: + raise IOError + c = ord(c) + r = 8 - start % 8 + c &= (1 << r) - 1 + l = min(r, n) + c >>= (r - l) + o <<= l + o |= c + n -= l + start += l + return o + + @classmethod + def getbytes(cls, bs, offset, l=1): + out = b"" + for _ in range(l): + n_offset = (offset & ~1) + 1 - offset % 2 + out += bs.getbytes(n_offset, 1) + offset += 1 + return out + + def decoded2bytes(self, result): + tmp = super(mn_msp430, self).decoded2bytes(result) + out = [] + for x in tmp: + o = b"" + while x: + o += x[:2][::-1] + x = x[2:] + out.append(o) + return out + + @classmethod + def gen_modes(cls, subcls, name, bases, dct, fields): + dct['mode'] = None + return [(subcls, name, bases, dct, fields)] + + def additional_info(self): + info = additional_info() + return info + + @classmethod + def getmn(cls, name): + return name.upper() + + def reset_class(self): + super(mn_msp430, self).reset_class() + + def getnextflow(self, loc_db): + raise NotImplementedError('not fully functional') + + +def addop(name, fields, args=None, alias=False): + dct = {"fields": fields} + dct["alias"] = alias + if args is not None: + dct['args'] = args + type(name, (mn_msp430,), dct) + + +class bw_mn(bs_mod_name): + prio = 5 + mn_mod = ['.w', '.b'] + + +class msp430_sreg_arg(reg_noarg, msp430_arg): + prio = default_prio + 1 + reg_info = gpregs + parser = sreg_p + + def decode(self, v): + size = 16 + if hasattr(self.parent, 'size'): + size = [16, 8][self.parent.size.value] + v = v & self.lmask + e = self.reg_info.expr[v] + if self.parent.a_s.value == 0b00: + if e == R3: + self.expr = ExprInt(0, size) + else: + self.expr = e + elif self.parent.a_s.value == 0b01: + if e == SR: + self.expr = ExprMem(ExprInt(self.parent.off_s.value, 16), size) + elif e == R3: + self.expr = ExprInt(1, size) + else: + self.expr = ExprMem( + e + ExprInt(self.parent.off_s.value, 16), size) + elif self.parent.a_s.value == 0b10: + if e == SR: + self.expr = ExprInt(4, size) + elif e == R3: + self.expr = ExprInt(2, size) + else: + self.expr = ExprMem(e, size) + elif self.parent.a_s.value == 0b11: + if e == SR: + self.expr = ExprInt(8, size) + elif e == R3: + if self.parent.size.value == 0: + self.expr = ExprInt(0xffff, size) + else: + self.expr = ExprInt(0xff, size) + elif e == PC: + self.expr = ExprInt(self.parent.off_s.value, size) + else: + self.expr = ExprOp('autoinc', e) + else: + raise NotImplementedError( + "unknown value self.parent.a_s.value = " + + "%d" % self.parent.a_s.value) + return True + + def encode(self): + e = self.expr + if e in self.reg_info.expr: + self.parent.a_s.value = 0 + self.value = self.reg_info.expr.index(e) + elif isinstance(e, ExprInt): + v = int(e) + if v == 0xffff and self.parent.size.value == 0: + self.parent.a_s.value = 0b11 + self.value = 3 + elif v == 0xff and self.parent.size.value == 1: + self.parent.a_s.value = 0b11 + self.value = 3 + elif v == 2: + self.parent.a_s.value = 0b10 + self.value = 3 + elif v == 1: + self.parent.a_s.value = 0b01 + self.value = 3 + elif v == 8: + self.parent.a_s.value = 0b11 + self.value = 2 + elif v == 4: + self.parent.a_s.value = 0b10 + self.value = 2 + elif v == 0: + self.parent.a_s.value = 0b00 + self.value = 3 + else: + self.parent.a_s.value = 0b11 + self.value = 0 + self.parent.off_s.value = v + elif isinstance(e, ExprMem): + if isinstance(e.ptr, ExprId): + self.parent.a_s.value = 0b10 + self.value = self.reg_info.expr.index(e.ptr) + elif isinstance(e.ptr, ExprInt): + self.parent.a_s.value = 0b01 + self.value = self.reg_info.expr.index(SR) + self.parent.off_s.value = int(e.ptr) + elif isinstance(e.ptr, ExprOp): + self.parent.a_s.value = 0b01 + self.value = self.reg_info.expr.index(e.ptr.args[0]) + self.parent.off_s.value = int(e.ptr.args[1]) + else: + raise NotImplementedError( + 'unknown instance e.ptr = %s' % type(e.ptr)) + elif isinstance(e, ExprOp) and e.op == "autoinc": + self.parent.a_s.value = 0b11 + self.value = self.reg_info.expr.index(e.args[0]) + else: + raise NotImplementedError('unknown instance e = %s' % type(e)) + return True + + +class msp430_dreg_arg(msp430_sreg_arg): + prio = default_prio + 1 + reg_info = gpregs + parser = sreg_p + + def decode(self, v): + if hasattr(self.parent, 'size'): + size = [16, 8][self.parent.size.value] + else: + size = 16 + + v = v & self.lmask + e = self.reg_info.expr[v] + if self.parent.a_d.value == 0: + self.expr = e + elif self.parent.a_d.value == 1: + if e == SR: + x = ExprInt(self.parent.off_d.value, 16) + else: + x = e + ExprInt(self.parent.off_d.value, 16) + self.expr = ExprMem(x, size) + else: + raise NotImplementedError( + "unknown value self.parent.a_d.value = " + + "%d" % self.parent.a_d.value) + return True + + def encode(self): + e = self.expr + if e in self.reg_info.expr: + self.parent.a_d.value = 0 + self.value = self.reg_info.expr.index(e) + elif isinstance(e, ExprMem): + if isinstance(e.ptr, ExprId): + r, i = e.ptr, ExprInt(0, 16) + elif isinstance(e.ptr, ExprOp): + r, i = e.ptr.args[0], e.ptr.args[1] + elif isinstance(e.ptr, ExprInt): + r, i = SR, e.ptr + else: + raise NotImplementedError( + 'unknown instance e.arg = %s' % type(e.ptr)) + self.parent.a_d.value = 1 + self.value = self.reg_info.expr.index(r) + self.parent.off_d.value = int(i) + else: + raise NotImplementedError('unknown instance e = %s' % type(e)) + return True + +class bs_cond_off_s(bs_cond): + + @classmethod + def flen(cls, mode, v): + if v['a_s'] == 0b00: + return None + elif v['a_s'] == 0b01: + if v['sreg'] in [3]: + return None + else: + return 16 + elif v['a_s'] == 0b10: + return None + elif v['a_s'] == 0b11: + """ + if v['sreg'] in [2, 3]: + return None + else: + return 16 + """ + if v['sreg'] in [0]: + return 16 + else: + return None + else: + raise NotImplementedError("unknown value v[a_s] = %d" % v['a_s']) + + def encode(self): + return super(bs_cond_off_s, self).encode() + + def decode(self, v): + if self.l == 0: + self.value = None + self.value = v + return True + + +class bs_cond_off_d(bs_cond_off_s): + + @classmethod + def flen(cls, mode, v): + if v['a_d'] == 0: + return None + elif v['a_d'] == 1: + return 16 + else: + raise NotImplementedError("unknown value v[a_d] = %d" % v['a_d']) + + +class msp430_offs(imm_noarg, msp430_arg): + parser = base_expr + + def int2expr(self, v): + if v & ~self.intmask != 0: + return None + return ExprInt(v, 16) + + def decodeval(self, v): + v <<= 1 + v += self.parent.l + return v + + def encodeval(self, v): + plen = self.parent.l + self.l + assert(plen % 8 == 0) + v -= plen // 8 + if v % 2 != 0: + return False + return v >> 1 + + def decode(self, v): + v = v & self.lmask + if (1 << (self.l - 1)) & v: + v |= ~0 ^ self.lmask + v = self.decodeval(v) + self.expr = ExprInt(v, 16) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = int(self.expr) + if (1 << (self.l - 1)) & v: + v = -((0xffff ^ v) + 1) + v = self.encodeval(v) + self.value = (v & 0xffff) & self.lmask + return True + + +off_s = bs(l=16, order=-10, cls=(bs_cond_off_s,), fname = "off_s") +off_d = bs(l=16, order=-10, cls=(bs_cond_off_d,), fname = "off_d") + +a_s = bs(l=2, order=-4, fname='a_s') +a_d = bs(l=1, order=-6, fname='a_d') + +a_d2 = bs(l=2, order=-2, fname='a_d') + +sreg = bs(l=4, order=-3, cls=(msp430_sreg_arg,), fname='sreg') +dreg = bs(l=4, order=-5, cls=(msp430_dreg_arg,), fname='dreg') + +bw = bw_mn(l=1, order=-10, mn_mod=['.w', '.b'], fname='size') + +bs_f1 = bs_name( + l=4, name={ + 'mov': 4, 'add': 5, 'addc': 6, 'subc': 7, 'sub': 8, 'cmp': 9, + 'dadd': 10, 'bit': 11, 'bic': 12, 'bis': 13, 'xor': 14, 'and': 15}) +addop("f1", [bs_f1, sreg, a_d, bw, a_s, dreg, off_s, off_d]) + +bs_f2 = bs_name(l=3, name={'rrc': 0, 'rra': 2, + 'push': 4}) +addop("f2_1", [bs('000100'), bs_f2, bw, a_s, sreg, off_s]) + + +bs_f2_nobw = bs_name(l=3, name={'swpb': 1, 'sxt': 3, + 'call': 5}) +addop("f2_2", [bs('000100'), bs_f2_nobw, bs('0'), a_s, sreg, off_s]) + +# Offset must be decoded in last position to have final instruction len +offimm = bs(l=10, cls=(msp430_offs,), fname="offs", order=-1) + +bs_f2_jcc = bs_name(l=3, name={'jnz': 0, 'jz': 1, 'jnc': 2, 'jc': 3, 'jn': 4, + 'jge': 5, 'jl': 6, 'jmp': 7}) +addop("f2_3", [bs('001'), bs_f2_jcc, offimm]) + diff --git a/miasm/arch/msp430/ctype.py b/miasm/arch/msp430/ctype.py new file mode 100644 index 00000000..0e6562e8 --- /dev/null +++ b/miasm/arch/msp430/ctype.py @@ -0,0 +1,68 @@ +from miasm.core.objc import CLeafTypes, ObjCDecl, PADDING_TYPE_NAME +from miasm.core.ctypesmngr import CTypeId, CTypePtr + + +class CTypeMSP430_unk(CLeafTypes): + """Define C types sizes/alignment for msp430 architecture""" + + obj_pad = ObjCDecl(PADDING_TYPE_NAME, 1, 1) # __padding__ is size 1/align 1 + + obj_char = ObjCDecl("char", 1, 1) + obj_short = ObjCDecl("short", 2, 2) + obj_int = ObjCDecl("int", 2, 2) + obj_long = ObjCDecl("long", 2, 2) + + obj_uchar = ObjCDecl("uchar", 1, 1) + obj_ushort = ObjCDecl("ushort", 2, 2) + obj_uint = ObjCDecl("uint", 2, 2) + obj_ulong = ObjCDecl("ulong", 2, 2) + + obj_void = ObjCDecl("void", 1, 1) + + obj_enum = ObjCDecl("enum", 2, 2) + + obj_float = ObjCDecl("float", 4, 4) + obj_double = ObjCDecl("double", 8, 8) + obj_ldouble = ObjCDecl("ldouble", 16, 16) + + def __init__(self): + self.types = { + CTypeId(PADDING_TYPE_NAME): self.obj_pad, + + CTypeId('char'): self.obj_char, + CTypeId('short'): self.obj_short, + CTypeId('int'): self.obj_int, + CTypeId('void'): self.obj_void, + CTypeId('long',): self.obj_long, + CTypeId('float'): self.obj_float, + CTypeId('double'): self.obj_double, + + CTypeId('signed', 'char'): self.obj_char, + CTypeId('unsigned', 'char'): self.obj_uchar, + + CTypeId('short', 'int'): self.obj_short, + CTypeId('signed', 'short'): self.obj_short, + CTypeId('signed', 'short', 'int'): self.obj_short, + CTypeId('unsigned', 'short'): self.obj_ushort, + CTypeId('unsigned', 'short', 'int'): self.obj_ushort, + + CTypeId('unsigned', ): self.obj_uint, + CTypeId('unsigned', 'int'): self.obj_uint, + CTypeId('signed', 'int'): self.obj_int, + + CTypeId('long', 'int'): self.obj_long, + CTypeId('long', 'long'): self.obj_long, + CTypeId('long', 'long', 'int'): self.obj_long, + CTypeId('signed', 'long', 'long'): self.obj_long, + CTypeId('unsigned', 'long', 'long'): self.obj_ulong, + CTypeId('signed', 'long', 'long', 'int'): self.obj_long, + CTypeId('unsigned', 'long', 'long', 'int'): self.obj_ulong, + + CTypeId('signed', 'long'): self.obj_long, + CTypeId('unsigned', 'long'): self.obj_ulong, + CTypeId('signed', 'long', 'int'): self.obj_long, + CTypeId('unsigned', 'long', 'int'): self.obj_ulong, + + CTypeId('long', 'double'): self.obj_ldouble, + CTypePtr(CTypeId('void')): self.obj_uint, + } diff --git a/miasm/arch/msp430/disasm.py b/miasm/arch/msp430/disasm.py new file mode 100644 index 00000000..eff77d2d --- /dev/null +++ b/miasm/arch/msp430/disasm.py @@ -0,0 +1,8 @@ +from miasm.core.asmblock import disasmEngine +from miasm.arch.msp430.arch import mn_msp430 + + +class dis_msp430(disasmEngine): + + def __init__(self, bs=None, **kwargs): + super(dis_msp430, self).__init__(mn_msp430, None, bs, **kwargs) diff --git a/miasm/arch/msp430/ira.py b/miasm/arch/msp430/ira.py new file mode 100644 index 00000000..72889149 --- /dev/null +++ b/miasm/arch/msp430/ira.py @@ -0,0 +1,31 @@ +#-*- coding:utf-8 -*- + +from miasm.ir.analysis import ira +from miasm.arch.msp430.sem import ir_msp430 +from miasm.ir.ir import AssignBlock +from miasm.expression.expression import * + +class ir_a_msp430_base(ir_msp430, ira): + + def __init__(self, loc_db=None): + ir_msp430.__init__(self, loc_db) + self.ret_reg = self.arch.regs.R15 + + def call_effects(self, addr, instr): + call_assignblk = AssignBlock( + [ + ExprAssign(self.ret_reg, ExprOp('call_func_ret', addr, self.sp, self.arch.regs.R15)), + ExprAssign(self.sp, ExprOp('call_func_stack', addr, self.sp)) + ], + instr + ) + return [call_assignblk], [] + +class ir_a_msp430(ir_a_msp430_base): + + def __init__(self, loc_db=None): + ir_a_msp430_base.__init__(self, loc_db) + + def get_out_regs(self, _): + return set([self.ret_reg, self.sp]) + diff --git a/miasm/arch/msp430/jit.py b/miasm/arch/msp430/jit.py new file mode 100644 index 00000000..ea30922c --- /dev/null +++ b/miasm/arch/msp430/jit.py @@ -0,0 +1,42 @@ +from miasm.jitter.jitload import Jitter +from miasm.core.locationdb import LocationDB +from miasm.core.utils import pck16, upck16 +from miasm.arch.msp430.sem import ir_msp430 + +import logging + +log = logging.getLogger('jit_msp430') +hnd = logging.StreamHandler() +hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) +log.addHandler(hnd) +log.setLevel(logging.CRITICAL) + +class jitter_msp430(Jitter): + + def __init__(self, *args, **kwargs): + sp = LocationDB() + Jitter.__init__(self, ir_msp430(sp), *args, **kwargs) + self.vm.set_little_endian() + + def push_uint16_t(self, value): + regs = self.cpu.get_gpreg() + regs['SP'] -= 2 + self.cpu.set_gpreg(regs) + self.vm.set_mem(regs['SP'], pck16(value)) + + def pop_uint16_t(self): + regs = self.cpu.get_gpreg() + value = self.vm.get_u16(regs['SP']) + regs['SP'] += 2 + self.cpu.set_gpreg(regs) + return value + + def get_stack_arg(self, index): + regs = self.cpu.get_gpreg() + value = self.vm.get_u16(regs['SP'] + 2 * index) + return value + + def init_run(self, *args, **kwargs): + Jitter.init_run(self, *args, **kwargs) + self.cpu.PC = self.pc + diff --git a/miasm/arch/msp430/regs.py b/miasm/arch/msp430/regs.py new file mode 100644 index 00000000..2bcaa055 --- /dev/null +++ b/miasm/arch/msp430/regs.py @@ -0,0 +1,116 @@ +from builtins import range +from miasm.expression.expression import * +from miasm.core.cpu import reg_info + + +# GP + +regs16_str = ["PC", "SP", "SR"] + ["R%d" % i for i in range(3, 16)] +regs16_expr = [ExprId(x, 16) for x in regs16_str] + +exception_flags = ExprId('exception_flags', 32) + +gpregs = reg_info(regs16_str, regs16_expr) + +PC = regs16_expr[0] +SP = regs16_expr[1] +SR = regs16_expr[2] +R3 = regs16_expr[3] +R4 = regs16_expr[4] +R5 = regs16_expr[5] +R6 = regs16_expr[6] +R7 = regs16_expr[7] +R8 = regs16_expr[8] +R9 = regs16_expr[9] +R10 = regs16_expr[10] +R11 = regs16_expr[11] +R12 = regs16_expr[12] +R13 = regs16_expr[13] +R14 = regs16_expr[14] +R15 = regs16_expr[15] + +PC_init = ExprId("PC_init", 16) +SP_init = ExprId("SP_init", 16) +SR_init = ExprId("SR_init", 16) +R3_init = ExprId("R3_init", 16) +R4_init = ExprId("R4_init", 16) +R5_init = ExprId("R5_init", 16) +R6_init = ExprId("R6_init", 16) +R7_init = ExprId("R7_init", 16) +R8_init = ExprId("R8_init", 16) +R9_init = ExprId("R9_init", 16) +R10_init = ExprId("R10_init", 16) +R11_init = ExprId("R11_init", 16) +R12_init = ExprId("R12_init", 16) +R13_init = ExprId("R13_init", 16) +R14_init = ExprId("R14_init", 16) +R15_init = ExprId("R15_init", 16) + + +reg_zf = 'zf' +reg_nf = 'nf' +reg_of = 'of' +reg_cf = 'cf' +reg_cpuoff = 'cpuoff' +reg_gie = 'gie' +reg_osc = 'osc' +reg_scg0 = 'scg0' +reg_scg1 = 'scg1' +reg_res = 'res' + +zf = ExprId(reg_zf, size=1) +nf = ExprId(reg_nf, size=1) +of = ExprId(reg_of, size=1) +cf = ExprId(reg_cf, size=1) + +cpuoff = ExprId(reg_cpuoff, size=1) +gie = ExprId(reg_gie, size=1) +osc = ExprId(reg_osc, size=1) +scg0 = ExprId(reg_scg0, size=1) +scg1 = ExprId(reg_scg1, size=1) +res = ExprId(reg_res, size=7) + + +zf_init = ExprId("zf_init", size=1) +nf_init = ExprId("nf_init", size=1) +of_init = ExprId("of_init", size=1) +cf_init = ExprId("cf_init", size=1) + + +cpuoff_init = ExprId("cpuoff_init", size=1) +gie_init = ExprId("gie_init", size=1) +osc_init = ExprId("osc_init", size=1) +scg0_init = ExprId("scg0_init", size=1) +scg1_init = ExprId("scg1_init", size=1) +res_init = ExprId("res_init", size=7) + + +all_regs_ids = [ + PC, SP, SR, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, + zf, nf, of, cf, + cpuoff, gie, osc, scg0, scg1, res, +] + +all_regs_ids_no_alias = all_regs_ids + +attrib_to_regs = { + 'l': all_regs_ids_no_alias, + 'b': all_regs_ids_no_alias, +} + +all_regs_ids_byname = dict([(x.name, x) for x in all_regs_ids]) + +all_regs_ids_init = [PC_init, SP_init, SR_init, R3_init, + R4_init, R5_init, R6_init, R7_init, + R8_init, R9_init, R10_init, R11_init, + R12_init, R13_init, R14_init, R15_init, + zf_init, nf_init, of_init, cf_init, + cpuoff_init, gie_init, osc_init, + scg0_init, scg1_init, res_init, + ] + +regs_init = {} +for i, r in enumerate(all_regs_ids): + regs_init[r] = all_regs_ids_init[i] + +regs_flt_expr = [] diff --git a/miasm/arch/msp430/sem.py b/miasm/arch/msp430/sem.py new file mode 100644 index 00000000..52056e5a --- /dev/null +++ b/miasm/arch/msp430/sem.py @@ -0,0 +1,509 @@ +#-*- coding:utf-8 -*- + +from miasm.expression.expression import * +from miasm.arch.msp430.regs import * +from miasm.arch.msp430.arch import mn_msp430 +from miasm.ir.ir import IntermediateRepresentation + + +# Utils +def hex2bcd(val): + "Return val as BCD" + try: + return int("%x" % val, 10) + except ValueError: + raise NotImplementedError("Not defined behaviour") + + +def bcd2hex(val): + "Return the hex value of a BCD" + try: + return int("0x%d" % val, 16) + except ValueError: + raise NotImplementedError("Not defined behaviour") + + +def reset_sr_res(): + return [ExprAssign(res, ExprInt(0, 7))] + + +def update_flag_cf_inv_zf(a): + return [ExprAssign(cf, ExprCond(a, ExprInt(1, 1), ExprInt(0, 1)))] + + +def update_flag_zf_eq(a, b): + return [ExprAssign(zf, ExprOp("FLAG_EQ_CMP", a, b))] + + +def update_flag_zf(a): + return [ExprAssign(zf, ExprOp("FLAG_EQ", a))] + + +def update_flag_nf(arg): + return [ + ExprAssign( + nf, + ExprOp("FLAG_SIGN_SUB", arg, ExprInt(0, arg.size)) + ) + ] + + +def update_flag_add_cf(op1, op2, res): + "Compute cf in @res = @op1 + @op2" + return [ExprAssign(cf, ExprOp("FLAG_ADD_CF", op1, op2))] + + +def update_flag_add_of(op1, op2, res): + "Compute of in @res = @op1 + @op2" + return [ExprAssign(of, ExprOp("FLAG_ADD_OF", op1, op2))] + + +# checked: ok for sbb add because b & c before +cf +def update_flag_sub_cf(op1, op2, res): + "Compote CF in @op1 - @op2" + return [ExprAssign(cf, ExprOp("FLAG_SUB_CF", op1, op2) ^ ExprInt(1, 1))] + + +def update_flag_sub_of(op1, op2, res): + "Compote OF in @res = @op1 - @op2" + return [ExprAssign(of, ExprOp("FLAG_SUB_OF", op1, op2))] + + +def update_flag_arith_sub_zn(arg1, arg2): + """ + Compute znp flags for (arg1 - arg2) + """ + e = [] + e += update_flag_zf_eq(arg1, arg2) + e += [ExprAssign(nf, ExprOp("FLAG_SIGN_SUB", arg1, arg2))] + return e + + +def update_flag_arith_add_zn(arg1, arg2): + """ + Compute zf and nf flags for (arg1 + arg2) + """ + e = [] + e += update_flag_zf_eq(arg1, -arg2) + e += [ExprAssign(nf, ExprOp("FLAG_SIGN_SUB", arg1, -arg2))] + return e + + + +def mng_autoinc(a, b, size): + e = [] + if not (isinstance(a, ExprOp) and a.op == "autoinc"): + return e, a, b + + a_r = a.args[0] + e.append(ExprAssign(a_r, a_r + ExprInt(size // 8, a_r.size))) + a = ExprMem(a_r, size) + if isinstance(b, ExprMem) and a_r in b.arg: + b = ExprMem(b.arg + ExprInt(size // 8, 16), b.size) + return e, a, b + +# Mnemonics + + +def mov_b(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 8) + if isinstance(b, ExprMem): + b = ExprMem(b.arg, 8) + a = a[:8] + else: + a = a[:8].zeroExtend(16) + e.append(ExprAssign(b, a)) + return e, [] + + +def mov_w(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 16) + e.append(ExprAssign(b, a)) + if b == ir.pc: + e.append(ExprAssign(ir.IRDst, a)) + return e, [] + + +def and_b(ir, instr, a, b): + e, arg1, arg2 = mng_autoinc(a, b, 8) + arg1, arg2 = arg1[:8], arg2[:8] + res = arg1 & arg2 + e.append(ExprAssign(b, res.zeroExtend(16))) + + e += [ExprAssign(zf, ExprOp('FLAG_EQ_AND', arg1, arg2))] + e += [ExprAssign(nf, ExprOp("FLAG_SIGN_SUB", res, ExprInt(0, res.size)))] + e += reset_sr_res() + e += update_flag_cf_inv_zf(res) + e += [ExprAssign(of, ExprInt(0, 1))] + + return e, [] + + +def and_w(ir, instr, a, b): + e, arg1, arg2 = mng_autoinc(a, b, 16) + res = arg1 & arg2 + e.append(ExprAssign(arg2, res)) + + e += [ExprAssign(zf, ExprOp('FLAG_EQ_AND', arg1, arg2))] + e += [ExprAssign(nf, ExprOp("FLAG_SIGN_SUB", res, ExprInt(0, res.size)))] + e += reset_sr_res() + e += update_flag_cf_inv_zf(res) + e += [ExprAssign(of, ExprInt(0, 1))] + + return e, [] + + +def bic_b(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 8) + c = (a[:8] ^ ExprInt(0xff, 8)) & b[:8] + c = c.zeroExtend(b.size) + e.append(ExprAssign(b, c)) + return e, [] + + +def bic_w(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 16) + if b == SR: + # Special case + if a.is_int(1): + # cf + e.append(ExprAssign(cf, ExprInt(0, 1))) + return e, [] + c = (a ^ ExprInt(0xffff, 16)) & b + e.append(ExprAssign(b, c)) + return e, [] + + +def bis_w(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 16) + c = a | b + e.append(ExprAssign(b, c)) + return e, [] + + +def bit_w(ir, instr, a, b): + e, arg1, arg2 = mng_autoinc(a, b, 16) + res = arg1 & arg2 + + e += [ExprAssign(zf, ExprOp('FLAG_EQ_AND', arg1, arg2))] + e += [ExprAssign(nf, ExprOp("FLAG_SIGN_SUB", res, ExprInt(0, res.size)))] + e += reset_sr_res() + e += update_flag_cf_inv_zf(res) + e += [ExprAssign(of, ExprInt(0, 1))] + + return e, [] + + +def sub_w(ir, instr, a, b): + e, arg1, arg2 = mng_autoinc(a, b, 16) + res = arg2 - arg1 + + e.append(ExprAssign(b, res)) + + e += update_flag_arith_sub_zn(arg2, arg1) + e += update_flag_sub_cf(arg2, arg1, res) + e += update_flag_sub_of(arg2, arg1, res) + e += reset_sr_res() + + # micrcorruption + # e += update_flag_sub_of(a, b, c) + # e += update_flag_sub_of(b, a, c) + return e, [] + + +def add_b(ir, instr, a, b): + e, arg1, arg2 = mng_autoinc(a, b, 8) + if isinstance(arg2, ExprMem): + arg2 = ExprMem(arg2.arg, 8) + else: + arg2 = arg2[:8] + arg1 = arg1[:8] + res = arg2 + arg1 + e.append(ExprAssign(b, res)) + + e += update_flag_arith_add_zn(arg2, arg1) + e += update_flag_add_cf(arg2, arg1, res) + e += update_flag_add_of(arg2, arg1, res) + e += reset_sr_res() + + return e, [] + + +def add_w(ir, instr, a, b): + e, arg1, arg2 = mng_autoinc(a, b, 16) + res = arg2 + arg1 + e.append(ExprAssign(b, res)) + + e += update_flag_arith_add_zn(arg2, arg1) + e += update_flag_add_cf(arg2, arg1, res) + e += update_flag_add_of(arg2, arg1, res) + e += reset_sr_res() + + return e, [] + + +def dadd_w(ir, instr, a, b): + e, a, b = mng_autoinc(a, b, 16) + # TODO: microcorruption no carryflag + c = ExprOp("bcdadd", b, a) # +zeroExtend(cf, 16)) + + e.append(ExprAssign(b, c)) + + # micrcorruption + e += update_flag_zf(a) + # e += update_flag_nf(a) + e += reset_sr_res() + + e.append(ExprAssign(cf, ExprOp("bcdadd_cf", b, a))) # +zeroExtend(cf, 16)))) + + # of : undefined + return e, [] + + +def xor_w(ir, instr, a, b): + e, arg1, arg2 = mng_autoinc(a, b, 16) + res = arg2 ^ arg1 + e.append(ExprAssign(b, res)) + + e += [ExprAssign(zf, ExprOp('FLAG_EQ_CMP', arg2, arg1))] + e += update_flag_nf(res) + e += reset_sr_res() + e += update_flag_cf_inv_zf(c) + e.append(ExprAssign(of, arg2.msb() & arg1.msb())) + + return e, [] + + +def push_w(ir, instr, a): + e = [] + e.append(ExprAssign(ExprMem(SP - ExprInt(2, 16), 16), a)) + e.append(ExprAssign(SP, SP - ExprInt(2, 16))) + return e, [] + + +def call(ir, instr, a): + e, a, dummy = mng_autoinc(a, None, 16) + + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = ExprLoc(loc_next, 16) + + e.append(ExprAssign(ExprMem(SP - ExprInt(2, 16), 16), loc_next_expr)) + e.append(ExprAssign(SP, SP - ExprInt(2, 16))) + e.append(ExprAssign(PC, a)) + e.append(ExprAssign(ir.IRDst, a)) + return e, [] + + +def swpb(ir, instr, a): + e = [] + x, y = a[:8], a[8:16] + e.append(ExprAssign(a, ExprCompose(y, x))) + return e, [] + + +def cmp_w(ir, instr, a, b): + e, arg1, arg2 = mng_autoinc(a, b, 16) + res = arg2 - arg1 + + e += update_flag_arith_sub_zn(arg2, arg1) + e += update_flag_sub_cf(arg2, arg1, res) + e += update_flag_sub_of(arg2, arg1, res) + e += reset_sr_res() + + return e, [] + + +def cmp_b(ir, instr, a, b): + e, arg1, arg2 = mng_autoinc(a, b, 8) + arg1, arg2 = arg1[:8], arg2[:8] + res = arg2 - arg1 + + e += update_flag_arith_sub_zn(arg2, arg1) + e += update_flag_sub_cf(arg2, arg1, res) + e += update_flag_sub_of(arg2, arg1, res) + e += reset_sr_res() + + return e, [] + + +def jz(ir, instr, a): + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = ExprLoc(loc_next, 16) + e = [] + e.append(ExprAssign(PC, ExprCond(ExprOp("CC_EQ", zf), a, loc_next_expr))) + e.append(ExprAssign(ir.IRDst, ExprCond(ExprOp("CC_EQ", zf), a, loc_next_expr))) + return e, [] + + +def jnz(ir, instr, a): + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = ExprLoc(loc_next, 16) + e = [] + e.append(ExprAssign(PC, ExprCond(ExprOp("CC_EQ", zf), loc_next_expr, a))) + e.append(ExprAssign(ir.IRDst, ExprCond(ExprOp("CC_EQ", zf), loc_next_expr, a))) + return e, [] + + +def jl(ir, instr, a): + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = ExprLoc(loc_next, 16) + e = [] + e.append(ExprAssign(PC, ExprCond(ExprOp("CC_S<", nf, of), a, loc_next_expr))) + e.append(ExprAssign(ir.IRDst, ExprCond(ExprOp("CC_S<", nf, of), a, loc_next_expr))) + return e, [] + + +def jc(ir, instr, a): + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = ExprLoc(loc_next, 16) + e = [] + e.append(ExprAssign(PC, ExprCond(ExprOp("CC_U>=", cf ^ ExprInt(1, 1)), a, loc_next_expr))) + e.append(ExprAssign(ir.IRDst, ExprCond(ExprOp("CC_U>=", cf ^ ExprInt(1, 1)), a, loc_next_expr))) + return e, [] + + +def jnc(ir, instr, a): + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = ExprLoc(loc_next, 16) + e = [] + e.append(ExprAssign(PC, ExprCond(ExprOp("CC_U>=", cf ^ ExprInt(1, 1)), loc_next_expr, a))) + e.append(ExprAssign(ir.IRDst, ExprCond(ExprOp("CC_U>=", cf ^ ExprInt(1, 1)), loc_next_expr, a))) + return e, [] + + +def jge(ir, instr, a): + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = ExprLoc(loc_next, 16) + e = [] + e.append(ExprAssign(PC, ExprCond(ExprOp("CC_S>=", nf, of), a, loc_next_expr))) + e.append(ExprAssign(ir.IRDst, ExprCond(ExprOp("CC_S>=", nf, of), a, loc_next_expr))) + return e, [] + + +def jmp(ir, instr, a): + e = [] + e.append(ExprAssign(PC, a)) + e.append(ExprAssign(ir.IRDst, a)) + return e, [] + + +def rrc_w(ir, instr, a): + e = [] + c = ExprCompose(a[1:16], cf) + e.append(ExprAssign(a, c)) + e.append(ExprAssign(cf, a[:1])) + + # micrcorruption + e += update_flag_zf(a) + # e += update_flag_nf(a) + e += reset_sr_res() + + e.append(ExprAssign(of, ExprInt(0, 1))) + return e, [] + + +def rra_w(ir, instr, a): + e = [] + c = ExprCompose(a[1:16], a[15:16]) + e.append(ExprAssign(a, c)) + # TODO: error in disasm microcorruption? + # e.append(ExprAssign(cf, a[:1])) + + # micrcorruption + e += update_flag_zf(a) + # e += update_flag_nf(a) + e += reset_sr_res() + + e.append(ExprAssign(of, ExprInt(0, 1))) + return e, [] + + +def sxt(ir, instr, a): + e = [] + c = a[:8].signExtend(16) + e.append(ExprAssign(a, c)) + + e += update_flag_zf(a) + e += update_flag_nf(a) + e += reset_sr_res() + e += update_flag_cf_inv_zf(c) + e.append(ExprAssign(of, ExprInt(0, 1))) + + return e, [] + +mnemo_func = { + "mov.b": mov_b, + "mov.w": mov_w, + "and.b": and_b, + "and.w": and_w, + "bic.b": bic_b, + "bic.w": bic_w, + "bis.w": bis_w, + "bit.w": bit_w, + "sub.w": sub_w, + "add.b": add_b, + "add.w": add_w, + "push.w": push_w, + "dadd.w": dadd_w, + "xor.w": xor_w, + "call": call, + "swpb": swpb, + "cmp.w": cmp_w, + "cmp.b": cmp_b, + "jz": jz, + "jnz": jnz, + "jl": jl, + "jc": jc, + "jnc": jnc, + "jmp": jmp, + "jge": jge, + "rrc.w": rrc_w, + "rra.w": rra_w, + "sxt": sxt, +} + + +composed_sr = ExprCompose(cf, zf, nf, gie, cpuoff, osc, scg0, scg1, of, res) + + +def ComposeExprAssign(dst, src): + e = [] + for start, arg in dst.iter_args(): + e.append(ExprAssign(arg, src[start:start+arg.size])) + return e + + +class ir_msp430(IntermediateRepresentation): + + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_msp430, None, loc_db) + self.pc = PC + self.sp = SP + self.IRDst = ExprId('IRDst', 16) + self.addrsize = 16 + + def mod_pc(self, instr, instr_ir, extra_ir): + pass + + def get_ir(self, instr): + args = instr.args + instr_ir, extra_ir = mnemo_func[instr.name](self, instr, *args) + self.mod_sr(instr, instr_ir, extra_ir) + + return instr_ir, extra_ir + + def mod_sr(self, instr, instr_ir, extra_ir): + for i, x in enumerate(instr_ir): + x = ExprAssign(x.dst, x.src.replace_expr({SR: composed_sr})) + instr_ir[i] = x + if x.dst != SR: + continue + xx = ComposeExprAssign(composed_sr, x.src) + instr_ir[i:i+1] = xx + for i, x in enumerate(instr_ir): + x = ExprAssign(x.dst, x.src.replace_expr( + {self.pc: ExprInt(instr.offset + instr.l, 16)})) + instr_ir[i] = x + + if extra_ir: + raise NotImplementedError('not fully functional') diff --git a/miasm/arch/ppc/__init__.py b/miasm/arch/ppc/__init__.py new file mode 100644 index 00000000..bbad893b --- /dev/null +++ b/miasm/arch/ppc/__init__.py @@ -0,0 +1 @@ +__all__ = ["arch", "disasm", "regs", "sem"] diff --git a/miasm/arch/ppc/arch.py b/miasm/arch/ppc/arch.py new file mode 100644 index 00000000..8f700bff --- /dev/null +++ b/miasm/arch/ppc/arch.py @@ -0,0 +1,764 @@ +from builtins import range + +import logging +from pyparsing import * +from miasm.expression.expression import * +from miasm.core.cpu import * +from collections import defaultdict +from miasm.core.bin_stream import bin_stream +import miasm.arch.ppc.regs as regs_module +from miasm.arch.ppc.regs import * +from miasm.core.asm_ast import AstInt, AstId, AstMem, AstOp + +log = logging.getLogger("ppcdis") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.DEBUG) + +LPARENTHESIS = Suppress(Literal("(")) +RPARENTHESIS = Suppress(Literal(")")) + +def cb_deref_imm_reg(tokens): + if len(tokens) == 1: + return AstMem(tokens[0], 32) + elif len(tokens) == 2: + return AstMem(tokens[1] + tokens[0], 32) + else: + raise NotImplementedError('len(tokens) > 2') + + +deref_reg_disp = (Optional(base_expr) + LPARENTHESIS + gpregs.parser + RPARENTHESIS).setParseAction(cb_deref_imm_reg) +deref_reg = (LPARENTHESIS + gpregs.parser + RPARENTHESIS).setParseAction(cb_deref_imm_reg) + +deref = deref_reg | deref_reg_disp + + +class ppc_arg(m_arg): + def asm_ast_to_expr(self, arg, loc_db): + if isinstance(arg, AstId): + if isinstance(arg.name, ExprId): + return arg.name + if arg.name in gpregs.str: + return None + loc_key = loc_db.get_or_create_name_location(arg.name.encode()) + return ExprLoc(loc_key, 32) + if isinstance(arg, AstOp): + args = [self.asm_ast_to_expr(tmp, loc_db) for tmp in arg.args] + if None in args: + return None + return ExprOp(arg.op, *args) + if isinstance(arg, AstInt): + return ExprInt(arg.value, 32) + if isinstance(arg, AstMem): + ptr = self.asm_ast_to_expr(arg.ptr, loc_db) + if ptr is None: + return None + return ExprMem(ptr, arg.size) + return None + + +class additional_info(object): + + def __init__(self): + self.except_on_instr = False + self.bo_bi_are_defined = False + self.bi = 0 + self.bo = 0 + + +class instruction_ppc(instruction): + delayslot = 0 + + def __init__(self, *args, **kargs): + super(instruction_ppc, self).__init__(*args, **kargs) + + @staticmethod + def arg2str(e, pos = None, loc_db=None): + if isinstance(e, ExprId) or isinstance(e, ExprInt): + return str(e) + elif isinstance(e, ExprMem): + addr = e.ptr + if isinstance(addr, ExprInt) or isinstance(addr, ExprId): + out = '(%s)'%addr + elif isinstance(addr, ExprOp): + if len(addr.args) == 1: + out = '(%s)'%addr + elif len(addr.args) == 2: + out = '%s(%s)'%(addr.args[1], addr.args[0]) + else: + raise NotImplementedError('More than two args to ExprOp of address') + else: + raise NotImplementedError('Invalid memory expression') + return out + + return str(e) + + @staticmethod + def is_conditional_jump(s): + return (s[0] == 'B' and + s[1:3] in { 'DN', 'DZ', 'LT', 'GT', 'EQ', 'SO', + 'GE', 'LE', 'NE', 'NS' }) + + def dstflow(self): + name = self.name + if name[-1] == '+' or name[-1] == '-': + name = name[:-1] + return (name[0] == 'B' and + name[-2:] != 'LR' and + name[-3:] != 'LRL' and + name[-3:] != 'CTR' and + name[-4:] != 'CTRL') + + def dstflow2label(self, loc_db): + name = self.name + if name[-1] == '+' or name[-1] == '-': + name = name[:-1] + + if name[-1] == 'L': + name = name[:-1] + elif name[-2:] == 'LA': + name = name[:-2] + 'A' + + if name[-2:] != 'LR' and name[-3:] != 'CTR': + if len(self.args) == 2: + address_index = 1 + else: + address_index = 0 + e = self.args[address_index] + if not isinstance(e, ExprInt): + return + if name[-1] != 'A': + ad = e.arg + self.offset + else: + ad = e.arg + loc_key = loc_db.get_or_create_offset_location(ad) + s = ExprLoc(loc_key, e.size) + self.args[address_index] = s + + def breakflow(self): + return self.name[0] == 'B' + + def is_subcall(self): + name = self.name + if name[-1] == '+' or name[-1] == '-': + name = name[0:-1] + return name[0] == 'B' and (name[-1] == 'L' or name[-2:-1] == 'LA') + + def getdstflow(self, loc_db): + if 'LR' in self.name: + return [ LR ] + elif 'CTR' in self.name: + return [ CTR ] + elif len(self.args) == 2: + address_index = 1 + else: + address_index = 0 + return [ self.args[address_index] ] + + def splitflow(self): + ret = False + if self.is_conditional_jump(self.name): + if self.additional_info.bo & 0b10100 != 0b10100: + ret = True + ret = ret or self.is_subcall() + return ret + + def get_symbol_size(self, symbol, loc_db): + return 32 + + def fixDstOffset(self): + e = self.args[0] + if not isinstance(e, ExprInt): + log.debug('Dynamic destination offset %r' % e) + return + if self.name[-1] != 'A': + if self.offset is None: + raise ValueError('symbol not resolved %s' % self.l) + off = e.arg - (self.offset + self.l) + if int(off % 4): + raise ValueError('Offset %r must be a multiple of four' % off) + else: + off = e.arg + self.args[0] = ExprInt(off, 32) + + def get_args_expr(self): + args = [a for a in self.args] + return args + + def get_asm_offset(self, x): + return ExprInt_from(x, self.offset) + + +class mn_ppc(cls_mn): + delayslot = 0 + name = "ppc32" + regs = regs_module + bintree = {} + num = 0 + all_mn = [] + all_mn_mode = defaultdict(list) + all_mn_name = defaultdict(list) + all_mn_inst = defaultdict(list) + instruction = instruction_ppc + max_instruction_len = 4 + + @classmethod + def getpc(cls, attrib = None): + return PC + + @classmethod + def getsp(cls, attrib = None): + return R1 + + def additional_info(self): + info = additional_info() + info.bo_bi_are_defined = False + if hasattr(self, "bo"): + info.bo_bi_are_defined = True + info.bi = int(self.bi.strbits, 2) + info.bo = int(self.bo.strbits, 2) + return info + + @classmethod + def getbits(cls, bs, attrib, start, n): + if not n: + return 0 + o = 0 + if n > bs.getlen() * 8: + raise ValueError('not enough bits %r %r' % (n, len(bs.bin) * 8)) + while n: + offset = start // 8 + n_offset = cls.endian_offset(attrib, offset) + c = cls.getbytes(bs, n_offset, 1) + if not c: + raise IOError + c = ord(c) + r = 8 - start % 8 + c &= (1 << r) - 1 + l = min(r, n) + c >>= (r - l) + o <<= l + o |= c + n -= l + start += l + return o + + @classmethod + def endian_offset(cls, attrib, offset): + if attrib == "b": + return offset + else: + raise NotImplementedError("bad attrib") + + @classmethod + def check_mnemo(cls, fields): + l = sum([x.l for x in fields]) + assert l == 32, "len %r" % l + + @classmethod + def getmn(cls, name): + return name.upper() + + @classmethod + def mod_fields(cls, fields): + l = sum([x.l for x in fields]) + return fields + + @classmethod + def gen_modes(cls, subcls, name, bases, dct, fields): + dct['mode'] = None + return [(subcls, name, bases, dct, fields)] + + def post_dis(self): + return self + + def value(self, mode): + v = super(mn_ppc, self).value(mode) + if mode == 'b': + return [x for x in v] + else: + raise NotImplementedError("bad attrib") + + def get_symbol_size(self, symbol, loc_db, mode): + return 32 + + +class ppc_reg(reg_noarg, ppc_arg): + pass + + +class ppc_gpreg_noarg(reg_noarg): + reg_info = gpregs + parser = reg_info.parser + +class ppc_gpreg_or_0_noarg(reg_noarg): + reg_info = gpregs + parser = reg_info.parser + + def decode(self, v): + ret = super(ppc_gpreg_or_0_noarg, self).decode(v) + if ret == False: + return False + reg = self.expr + if reg == R0: + self.expr = ExprInt(0, 32) + return ret + +class ppc_gpreg(ppc_reg): + reg_info = gpregs + parser = reg_info.parser + +class ppc_gpreg_or_0(ppc_reg): + reg_info = gpregs + parser = reg_info.parser + + def decode(self, v): + ret = super(ppc_gpreg_or_0, self).decode(v) + if ret == False: + return False + reg = self.expr + if reg == R0: + self.expr = ExprInt(0, 32) + return ret + +class ppc_crfreg_noarg(reg_noarg): + reg_info = crfregs + parser = reg_info.parser + +class ppc_crfreg(ppc_reg): + reg_info = crfregs + parser = reg_info.parser + +class ppc_imm(imm_noarg, ppc_arg): + parser = base_expr + +class ppc_s14imm_branch(ppc_imm): + + def decode(self, v): + v = sign_ext(v << 2, 16, 32) + self.expr = ExprInt(v, 32) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = self.expr.arg.arg + if v & 0x3: + return False + v = v >> 2 + if sign_ext(v & self.lmask, 14, 32) != v: + return False + self.value = v & self.lmask + return True + +class ppc_s24imm_branch(ppc_imm): + + def decode(self, v): + v = sign_ext(v << 2, 26, 32) + self.expr = ExprInt(v, 32) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = self.expr.arg.arg + if v & 0x3: + return False + v = v >> 2 + if sign_ext(v & self.lmask, 24, 32) != v: + return False + self.value = v & self.lmask + return True + +class ppc_s16imm(ppc_imm): + + def decode(self, v): + v = sign_ext(v, 16, 32) + self.expr = ExprInt(v, 32) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = self.expr.arg.arg + if sign_ext(v & self.lmask, 16, 32) != v: + return False + self.value = v & self.lmask + return True + +class ppc_u16imm(ppc_imm): + + def decode(self, v): + if v & self.lmask != v: + return False + self.expr = ExprInt(v, 32) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = self.expr.arg.arg + if v & self.lmask != v: + return False + self.value = v & self.lmask + return True + +def ppc_swap_10(v): + return ((v & 0b11111) << 5) | ((v & 0b1111100000) >> 5) + +class ppc_spr(ppc_imm): + + def decode(self, v): + self.expr = ExprInt(ppc_swap_10(v), 32) + return True + + def encode(self, e): + if not isinstance(e, ExprInt): + return False + self.value = ppc_swap_10(e.arg) + return True + +class ppc_tbr(ppc_imm): + + def decode(self, v): + self.expr = ExprInt(ppc_swap_10(v), 32) + return True + + def encode(self, e): + if not isinstance(e, ExprInt): + return False + self.value = ppc_swap_10(e.arg) + return True + +class ppc_u08imm(ppc_u16imm): + pass + +class ppc_u05imm(ppc_u16imm): + pass + +class ppc_u04imm(ppc_u16imm): + pass + +class ppc_u02imm_noarg(imm_noarg): + pass + + +def ppc_bo_bi_to_mnemo(bo, bi, prefer_taken=True, default_taken=True): + bo2mnemo = { 0: 'DNZF', 2: 'DZF', 4: 'F', 8: 'DNZT', + 10: 'DZT', 12: 'T', 16: 'DNZ', 18: 'DZ', + 20: '' } + bi2cond = { 0b00: 'LT', 0b01: 'GT', 0b10: 'EQ', 0b11: 'SO' } + bi2ncond = { 0b00: 'GE', 0b01: 'LE', 0b10: 'NE', 0b11: 'NS' } + n = bo & 0b11110 + if not n in bo2mnemo: + raise NotImplementedError("Unknown BO field") + mnem = 'B' + bo2mnemo[n] + if mnem[-1] == 'T': + mnem = mnem[:-1] + bi2cond[bi & 0b11] + if mnem[-1] == 'F': + mnem = mnem[:-1] + bi2ncond[bi & 0b11] + + if prefer_taken != default_taken: + if prefer_taken: + mnem += '+' + else: + mnem += '-' + + return mnem + +def ppc_all_bo_bi(): + for bo in [0, 2, 4, 8, 10, 12, 16, 18, 20]: + for bi in range(4): + yield bo, bi + +class ppc_divert_conditional_branch(bs_divert): + prio=3 + def divert(self, i, candidates): + out = [] + for cls, _, bases, dct, fields in candidates: + bi_i = getfieldindexby_name(fields, 'bi')[1] + bo_i = getfieldindexby_name(fields, 'bo')[1] + + for bo, bi in ppc_all_bo_bi(): + nfields = fields[:] + nfields[bi_i] = bs(int2bin(bi, 2), fname="bi") + nfields[bo_i] = bs(int2bin(bo, 5), fname="bo") + ndct = dict(dct) + ndct['name'] = ppc_bo_bi_to_mnemo(bo, bi) + out.append((cls, ndct['name'], bases, ndct, nfields)) + + nfields = fields[:] + nfields[bi_i] = bs(int2bin(bi, 2), fname="bi") + nfields[bo_i] = bs(int2bin(bo+1, 5), fname="bo") + ndct = dict(dct) + ndct['name'] = ppc_bo_bi_to_mnemo(bo, bi) + out.append((cls, ndct['name'], bases, ndct, nfields)) + + return out + +class ppc_deref32(ppc_arg): + parser = deref + + def decode(self, v): + v = sign_ext(v, 16, 32) + e = self.parent.ra.expr + ExprInt(v, 32) + self.expr = ExprMem(e, size=32) + return True + + def encode(self): + e = self.expr + if not isinstance(e, ExprMem): + return False + addr = e.ptr + if isinstance(addr, ExprId) or isinstance(addr, ExprInt): + addr = addr + ExprInt(0, 32) + elif not isinstance(addr, ExprOp): + return False + if addr.op != '+': + return False + if len(addr.args) != 2: + return False + reg, disp = addr.args[0], addr.args[1] + v = int(disp.arg) + if sign_ext(v & 0xFFFF, 16, 32) != v: + return False + v &= 0xFFFF + self.value = v + self.parent.ra.expr = reg + return True + + +def ppcop(name, fields, args=None, alias=False): + dct = {"fields": fields} + dct["alias"] = alias + if args is not None: + dct['args'] = args + type(name, (mn_ppc,), dct) + +rd = bs(l=5, cls=(ppc_gpreg,)) +ra = bs(l=5, cls=(ppc_gpreg,)) +ra_or_0 = bs(l=5, cls=(ppc_gpreg_or_0,)) +rb = bs(l=5, cls=(ppc_gpreg,)) +rs = bs(l=5, cls=(ppc_gpreg,)) +crfd = bs(l=3, cls=(ppc_crfreg,)) +crfs = bs(l=3, cls=(ppc_crfreg,)) +sh = bs(l=5, cls=(ppc_u05imm,)) +mb = bs(l=5, cls=(ppc_u05imm,)) +me = bs(l=5, cls=(ppc_u05imm,)) +nb = bs(l=5, cls=(ppc_u05imm,)) +crm = bs(l=8, cls=(ppc_u08imm,)) +sr = bs(l=4, cls=(ppc_u04imm,)) +spr = bs(l=10, cls=(ppc_spr,)) +tbr = bs(l=10, cls=(ppc_tbr,)) +u05imm = bs(l=5, cls=(ppc_u05imm,)) + +s24imm_branch = bs(l=24, cls=(ppc_s24imm_branch,), fname="imm") +s14imm_branch = bs(l=14, cls=(ppc_s14imm_branch,), fname="imm") +s16imm = bs(l=16, cls=(ppc_s16imm,), fname="imm") +u16imm = bs(l=16, cls=(ppc_u16imm,), fname="imm") +u08imm = bs(l=5, cls=(ppc_u08imm,), fname="imm") +u02imm_noarg = bs(l=2, cls=(ppc_u02imm_noarg,), fname="imm") + +ra_noarg = bs(l=5, cls=(ppc_gpreg_noarg,), fname="ra") +ra_or_0_noarg = bs(l=5, cls=(ppc_gpreg_or_0_noarg,), fname="ra") +dregimm = bs(l=16, cls=(ppc_deref32,)) + +rc_mod = bs_mod_name(l=1, mn_mod=['', '.'], fname='rc') + +arith1_name = {"MULLI": 0b000111, "SUBFIC": 0b001000, "ADDIC": 0b001100, + "ADDIC.": 0b001101 } + +logic2_name = {"ORI": 0b011000, "XORI": 0b011010, "ANDI.": 0b011100 } +slogic2_name = {"ORIS": 0b011001, "XORIS": 0b011011, "ANDIS.": 0b011101 } + +arith3_name = {"SUBFC": 0b0000001000, "ADDC": 0b0000001010, + "MULHWU": 0b0000001011, "SUBF": 0b0000101000, + "MULHW": 0b0001001011, "SUBFE": 0b0010001000, + "ADDE": 0b0010001010, "MULLW": 0b0011101011, + "ADD": 0b0100001010, "DIVWU": 0b0111001011, + "DIVW": 0b0111101011, "SUBFCO": 0b1000001000, + "ADDCO": 0b1000001010, "SUBFO": 0b1000101000, + "SUBFEO": 0b1010001000, "ADDEO": 0b1010001010, + "MULLWO": 0b1011101011, "ADDO": 0b1100001010, + "DIVWUO": 0b1111001011, "DIVWO": 0b1111101011 } + +xor_name = { "EQV": 0b0100011100, "XOR": 0b0100111100 } + +arith4_name = {"NEG": 0b0001101000, "SUBFZE": 0b0011001000, + "ADDZE": 0b0011001010, "SUBFME": 0b0011101000, + "ADDME": 0b0011101010, "NEGO": 0b1001101000, + "SUBFZEO": 0b1011001000, "ADDZEO": 0b1011001010, + "SUBFMEO": 0b1011101000, "ADDMEO": 0b1011101010 } + +arith5_name = {"CNTLZW": 0b00000, "EXTSH": 0b11100, "EXTSB": 0b11101 } + +crlogic_name = {"CRAND": 0b1000, "CRANDC": 0b0100, "CREQV": 0b1001, + "CRNAND": 0b0111, "CRNOR": 0b0001, "CROR": 0b1110, + "CRORC": 0b1101, "CRXOR": 0b0110 } + +rotins_name = {"RLWIMI": 0b010100, "RLWINM": 0b010101 } + +bs_arith1_name = bs_name(l=6, name=arith1_name) + +load1_name = {"LWARX": 0b0000010100, "LWZX": 0b0000010111, + "LBZX": 0b0001010111, "LHZX": 0b0100010111, + "ECIWX": 0b0100110110, "LHAX": 0b0101010111, + "LSWX": 0b1000010101, "LWBRX": 0b1000010110, + "LHBRX": 0b1100010110 } + +load1_name_u = {"LWZUX": 0b0000110111, "LBZUX": 0b0001110111, + "LHZUX": 0b0100110111, "LHAUX": 0b0101110111 } + +load2_name = {"LWZ": 0b0000, "LBZ": 0b0010, "LHZ": 0b1000, "LHA": 0b1010, + "LMW": 0b1110 } + +load2_name_u = {"LWZU": 0b0001, "LBZU": 0b0011, "LHZU": 0b1001, "LHAU": 0b1011} + +store1_name = { "STWCX.": 0b00100101101, "STWX": 0b00100101110, + "STBX": 0b00110101110, "STHX": 0b01100101110, + "ECOWX": 0b01101101100, "STSWX": 0b10100101010, + "STWBRX": 0b10100101100, "STHBRX": 0b11100101100 } +store1_name_u = { "STWUX": 0b00101101110, "STBUX": 0b00111101110, + "STHUX": 0b01101101110 } + +store2_name = { "STW": 0b0100, "STB": 0b0110, "STH": 0b1100, "STMW": 0b1111 } +store2_name_u = { "STWU": 0b0101, "STBU": 0b0111, "STHU": 0b1101 } + +logic1_name = {"SLW": 0b0000011000, "AND": 0b0000011100, + "ANDC": 0b0000111100, "NOR": 0b0001111100, + "ORC": 0b0110011100, "OR": 0b0110111100, + "NAND": 0b0111011100, "SRW": 0b1000011000, + "SRAW": 0b1100011000 } + +dcb_name = {"DCBST": 0b00001, "DCBF": 0b00010, + "DCBTST": 0b00111, "DCBT": 0b01000, + "DCBI": 0b01110, "DCBA": 0b10111, + "ICBI": 0b11110, "DCBZ": 0b11111 } + +class bs_mod_name_prio4(bs_mod_name): + prio = 4 + +class bs_mod_name_prio5(bs_mod_name): + prio = 5 + +class bs_mod_name_prio6(bs_mod_name): + prio = 6 + +branch_to_reg = bs_mod_name_prio4(l=1, mn_mod=['LR', 'CTR'], fname='btoreg') +branch_lk = bs_mod_name_prio5(l=1, mn_mod=['', 'L'], fname='lk') +branch_aa = bs_mod_name_prio6(l=1, mn_mod=['', 'A'], fname='aa') + +ppcop("arith1", [bs_arith1_name, rd, ra, s16imm]) +ppcop("ADDIS", [bs('001111'), rd, ra_or_0, u16imm]) +ppcop("ADDI", [bs('001110'), rd, ra_or_0, s16imm]) + +ppcop("logic2", [bs_name(l=6, name=logic2_name), rs, ra, u16imm], + [ra, rs, u16imm]) +ppcop("slogic2", [bs_name(l=6, name=slogic2_name), rs, ra, u16imm], + [ra, rs, u16imm]) + +ppcop("store1", [bs('011111'), rs, ra_or_0, rb, + bs_name(l=11, name=store1_name)]) +ppcop("store1u", [bs('011111'), rs, ra, rb, + bs_name(l=11, name=store1_name_u)]) + +ppcop("store2", [bs('10'), bs_name(l=4, name=store2_name), rs, + ra_noarg, dregimm]) +ppcop("store2u", [bs('10'), bs_name(l=4, name=store2_name_u), rs, + ra_or_0_noarg, dregimm]) + +ppcop("arith3", [bs('011111'), rd, ra, rb, bs_name(l=10, name=arith3_name), + rc_mod]) + +ppcop("xor", [bs('011111'), rs, ra, rb, bs_name(l=10, name=xor_name), + rc_mod], [ra, rs, rb]) + +ppcop("arith4", [bs('011111'), rd, ra, bs('00000'), + bs_name(l=10, name=arith4_name), rc_mod]) + +ppcop("arith5", [bs('011111'), rs, ra, bs('00000'), + bs_name(l=5, name=arith5_name), + bs('11010'), rc_mod], [ra, rs]) + +ppcop("load1", [bs('011111'), rd, ra_or_0, rb, + bs_name(l=10, name=load1_name), bs('0')]) +ppcop("load1u", [bs('011111'), rd, ra, rb, + bs_name(l=10, name=load1_name_u), bs('0')]) +ppcop("load2", [bs('10'), bs_name(l=4, name=load2_name), + rd, ra_or_0_noarg, dregimm]) +ppcop("load2u", [bs('10'), bs_name(l=4, name=load2_name_u), + rd, ra_noarg, dregimm]) + +ppcop("logic1", [bs('011111'), rs, ra, rb, bs_name(l=10, name=logic1_name), + rc_mod], + [ra, rs, rb]) + +ppcop("TWI", [bs('000011'), u05imm, ra, s16imm]) +ppcop("TW", [bs('011111'), u05imm, ra, rb, bs('00000001000')]) + +ppcop("CMPW", [bs('011111'), crfd, bs('00'), ra, rb, bs('00000000000')]) +ppcop("CMPLW", [bs('011111'), crfd, bs('00'), ra, rb, bs('00001000000')]) +ppcop("CMPLWI", [bs('001010'), crfd, bs('00'), ra, u16imm]) +ppcop("CMPWI", [bs('001011'), crfd, bs('00'), ra, s16imm]) + +ppcop("BC", [bs('010000'), bs(l=5, cls=(ppc_u05imm,), fname='bo'), + crfs, + ppc_divert_conditional_branch(l=2, fname='bi'), + s14imm_branch, branch_aa, branch_lk]) +ppcop("SC", [bs('01000100000000000000000000000010')]) +ppcop("B", [bs('010010'), s24imm_branch, branch_aa, branch_lk]) +ppcop("MCRF", [bs('010011'), crfd, bs('00'), crfs, bs('000000000000000000')]) + +ppcop("BCXXX", [bs('010011'), bs(l=5, cls=(ppc_u05imm,), fname='bo'), + crfs, + ppc_divert_conditional_branch(l=2, fname='bi'), + bs('00000'), branch_to_reg, + bs('000010000'), branch_lk]) + +ppcop("crlogic", [bs('010011'), + bs(l=5, cls=(ppc_u05imm,), fname='crbd'), + bs(l=5, cls=(ppc_u05imm,), fname='crba'), + bs(l=5, cls=(ppc_u05imm,), fname='crbb'), + bs('0'), + bs_name(l=4, name=crlogic_name), + bs('000010')]) + +ppcop("rotins", [bs_name(l=6, name=rotins_name), + rs, ra, sh, mb, me, rc_mod], + [ ra, rs, sh, mb, me ]) +ppcop("RLWNM", [bs('010111'), rs, ra, rb, mb, me, rc_mod], + [ ra, rs, rb, mb, me ]) +ppcop("MFXXX", [bs('011111'), rd, bs('0000000000'), + bs('000'), + bs_name(l=1, name={'MFCR':0, 'MFMSR':1}), + bs('0100110')]) + +ppcop("dcb", [bs('01111100000'), ra, rb, bs_name(l=5, name=dcb_name), + bs('101100')]) + +ppcop("MTCRF", [bs('011111'), rs, bs('0'), crm, bs('000100100000')], [crm, rs]) +ppcop("MTMSR", [bs('011111'), rs, bs('0000000000'), bs('00100100100')]) +ppcop("MTSR", [bs('011111'), rs, bs('0'), sr, bs('0000000110100100')], [sr, rs]) +ppcop("MTSRIN", [bs('011111'), rs, bs('00000'), rb, bs('00111100100')]) + +ppcop("TLBIE", [bs('011111'), bs('0000000000'), rb, bs('01001100100')]) +ppcop("MFSPR", [bs('011111'), rd, spr, bs('01010100110')]) +ppcop("TLBIA", [bs('01111100000000000000001011100100')]) +ppcop("MFTB", [bs('011111'), rd, tbr, bs('01011100110')]) +ppcop("RFI", [bs('01001100000000000000000001100100')]) +ppcop("ISYNC", [bs('01001100000000000000000100101100')]) +ppcop("MTSPR", [bs('011111'), rs, spr, bs('01110100110')], [spr, rs]) +ppcop("MCRXR", [bs('011111'), crfd, bs('000000000000'), + bs('10000000000')]) +ppcop("TLBSYNC", [bs('01111100000000000000010001101100')]) +ppcop("MFSR", [bs('011111'), rd, bs('0'), sr, bs('00000'), bs('10010100110')]) +ppcop("LSWI", [bs('011111'), rd, ra, nb, bs('10010101010')]) +ppcop("STSWI", [bs('011111'), rs, ra, nb, bs('10110101010')]) +ppcop("SYNC", [bs('011111'), bs('000000000000000'), bs('10010101100')]) +ppcop("MFSRIN", [bs('011111'), rd, bs('00000'), rb, bs('10100100110')]) + +ppcop("SRAWI", [bs('011111'), rs, ra, sh, bs('1100111000'), rc_mod], + [ra, rs, sh]) + +ppcop("EIEIO", [bs('011111'), bs('000000000000000'), bs('11010101100')]) diff --git a/miasm/arch/ppc/disasm.py b/miasm/arch/ppc/disasm.py new file mode 100644 index 00000000..b91d96bf --- /dev/null +++ b/miasm/arch/ppc/disasm.py @@ -0,0 +1,7 @@ +from miasm.arch.ppc.arch import mn_ppc +from miasm.core.asmblock import disasmEngine + +class dis_ppc32b(disasmEngine): + def __init__(self, bs=None, **kwargs): + super(dis_ppc32b, self).__init__(mn_ppc, None, bs, **kwargs) + self.attrib = 'b' diff --git a/miasm/arch/ppc/ira.py b/miasm/arch/ppc/ira.py new file mode 100644 index 00000000..953c5a86 --- /dev/null +++ b/miasm/arch/ppc/ira.py @@ -0,0 +1,87 @@ +from miasm.expression.expression import ExprAssign, ExprOp +from miasm.ir.ir import AssignBlock +from miasm.ir.analysis import ira +from miasm.arch.ppc.sem import ir_ppc32b + + +class ir_a_ppc32b(ir_ppc32b, ira): + + def __init__(self, *args): + super(ir_a_ppc32b, self).__init__(*args) + self.ret_reg = self.arch.regs.R3 + + # for test XXX TODO + def set_dead_regs(self, irblock): + pass + + def get_out_regs(self, _): + return set([self.ret_reg, self.sp]) + + def add_unused_regs(self): + leaves = [self.blocks[label] for label in self.g.leafs()] + for irblock in leaves: + self.set_dead_regs(irblock) + + def call_effects(self, ad, instr): + call_assignblks = AssignBlock( + [ + ExprAssign( + self.ret_reg, + ExprOp( + 'call_func_ret', + ad, + self.sp, + self.arch.regs.R3, + self.arch.regs.R4, + self.arch.regs.R5, + ) + ), + ExprAssign(self.sp, ExprOp('call_func_stack', ad, self.sp)), + ], + instr + ) + return [call_assignblks], [] + + def add_instr_to_current_state(self, instr, block, assignments, ir_blocks_all, gen_pc_updt): + """ + Add the IR effects of an instruction to the current state. + + @instr: native instruction + @block: native block source + @assignments: list of current AssignBlocks + @ir_blocks_all: list of additional effects + @gen_pc_updt: insert PC update effects between instructions + """ + if instr.is_subcall(): + call_assignblks, extra_irblocks = self.call_effects( + instr.getdstflow(None)[0], + instr + ) + assignments += call_assignblks + ir_blocks_all += extra_irblocks + return True + + if gen_pc_updt is not False: + self.gen_pc_update(assignments, instr) + + assignblk, ir_blocks_extra = self.instr2ir(instr) + assignments.append(assignblk) + ir_blocks_all += ir_blocks_extra + if ir_blocks_extra: + return True + return False + + def sizeof_char(self): + return 8 + + def sizeof_short(self): + return 16 + + def sizeof_int(self): + return 32 + + def sizeof_long(self): + return 32 + + def sizeof_pointer(self): + return 32 diff --git a/miasm/arch/ppc/jit.py b/miasm/arch/ppc/jit.py new file mode 100644 index 00000000..1d7ae70c --- /dev/null +++ b/miasm/arch/ppc/jit.py @@ -0,0 +1,71 @@ +from builtins import range +from miasm.jitter.jitload import Jitter, named_arguments +from miasm.core.locationdb import LocationDB +from miasm.arch.ppc.sem import ir_ppc32b +import struct + +import logging + +log = logging.getLogger('jit_ppc') +hnd = logging.StreamHandler() +hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) +log.addHandler(hnd) +log.setLevel(logging.CRITICAL) + +class jitter_ppc32b(Jitter): + max_reg_arg = 8 + + def __init__(self, *args, **kwargs): + super(jitter_ppc32b, self).__init__(ir_ppc32b(LocationDB()), + *args, **kwargs) + self.vm.set_big_endian() + + def push_uint32_t(self, v): + self.cpu.R1 -= 4 + self.vm.set_mem(self.cpu.R1, struct.pack(">I", v)) + + def pop_uint32_t(self): + x = struct.unpack(">I", self.vm.get_mem(self.cpu.R1, 4))[0] + self.cpu.R1 += 4 + return x + + def get_stack_arg(self, n): + x = struct.unpack(">I", self.vm.get_mem(self.cpu.R1 + 8 + 4 * n, 4))[0] + return x + + @named_arguments + def func_args_systemv(self, n_args): + args = [self.get_arg_n_systemv(i) for i in range(n_args)] + ret_ad = self.cpu.LR + return ret_ad, args + + def func_ret_systemv(self, ret_addr, ret_value1=None, ret_value2=None): + self.pc = self.cpu.PC = ret_addr + if ret_value1 is not None: + self.cpu.R3 = ret_value1 + if ret_value2 is not None: + self.cpu.R4 = ret_value2 + return True + + def func_prepare_systemv(self, ret_addr, *args): + for index in range(min(len(args), self.max_reg_arg)): + setattr(self.cpu, 'R%d' % (index + 3), args[index]) + for index in range(len(args) - 1, self.max_reg_arg - 1, -1): + self.push_uint32_t(args[index]) + + # reserve room for LR save word and backchain + self.cpu.R1 -= 8 + + self.cpu.LR = ret_addr + + def get_arg_n_systemv(self, index): + if index < self.max_reg_arg: + arg = getattr(self.cpu, 'R%d' % (index + 3)) + else: + arg = self.get_stack_arg(index - self.max_reg_arg) + return arg + + + def init_run(self, *args, **kwargs): + Jitter.init_run(self, *args, **kwargs) + self.cpu.PC = self.pc diff --git a/miasm/arch/ppc/regs.py b/miasm/arch/ppc/regs.py new file mode 100644 index 00000000..97556931 --- /dev/null +++ b/miasm/arch/ppc/regs.py @@ -0,0 +1,60 @@ + +from builtins import range +from miasm.expression.expression import * +from miasm.core.cpu import gen_reg, gen_regs + +exception_flags = ExprId('exception_flags', 32) +spr_access = ExprId('spr_access', 32) + +reserve = ExprId('reserve', 1) +reserve_address = ExprId('reserve_address', 32) + +SPR_ACCESS_IS_WRITE = 0x80000000 +SPR_ACCESS_SPR_MASK = 0x000003FF +SPR_ACCESS_SPR_OFF = 0 +SPR_ACCESS_GPR_MASK = 0x0001F000 +SPR_ACCESS_GPR_OFF = 12 + +gpregs_str = ["R%d" % i for i in range(32)] +gpregs_expr, gpregs_init, gpregs = gen_regs(gpregs_str, globals(), 32) + +crfregs_str = ["CR%d" % i for i in range(8)] +crfregs_expr, crfregs_init, crfregs = gen_regs(crfregs_str, globals(), 4) + +crfbitregs_str = ["CR%d_%s" % (i, flag) for i in range(8) + for flag in ['LT', 'GT', 'EQ', 'SO'] ] +crfbitregs_expr, crfbitregs_init, crfbitregs = gen_regs(crfbitregs_str, + globals(), 1) + +xerbitregs_str = ["XER_%s" % field for field in ['SO', 'OV', 'CA'] ] +xerbitregs_expr, xerbitregs_init, xerbitregs = gen_regs(xerbitregs_str, + globals(), 1) + +xerbcreg_str = ["XER_BC"] +xerbcreg_expr, xerbcreg_init, xerbcreg = gen_regs(xerbcreg_str, + globals(), 7) + + +otherregs_str = ["PC", "CTR", "LR" ] +otherregs_expr, otherregs_init, otherregs = gen_regs(otherregs_str, + globals(), 32) + +superregs_str = (["SPRG%d" % i for i in range(4)] + + ["SRR%d" % i for i in range(2)] + + ["DAR", "DSISR", "MSR", "PIR", "PVR", + "DEC", "TBL", "TBU"]) +superregs_expr, superregs_init, superregs = gen_regs(superregs_str, + globals(), 32) + +regs_flt_expr = [] + +all_regs_ids = (gpregs_expr + crfbitregs_expr + xerbitregs_expr + + xerbcreg_expr + otherregs_expr + superregs_expr + + [ exception_flags, spr_access, reserve, reserve_address ]) +all_regs_ids_byname = dict([(x.name, x) for x in all_regs_ids]) +all_regs_ids_init = [ExprId("%s_init" % x.name, x.size) for x in all_regs_ids] +all_regs_ids_no_alias = all_regs_ids[:] + +regs_init = {} +for i, r in enumerate(all_regs_ids): + regs_init[r] = all_regs_ids_init[i] diff --git a/miasm/arch/ppc/sem.py b/miasm/arch/ppc/sem.py new file mode 100644 index 00000000..5e8e394c --- /dev/null +++ b/miasm/arch/ppc/sem.py @@ -0,0 +1,924 @@ +from __future__ import print_function +from builtins import range + +import miasm.expression.expression as expr +from miasm.ir.ir import AssignBlock, IntermediateRepresentation, IRBlock +from miasm.arch.ppc.arch import mn_ppc +from miasm.arch.ppc.regs import * +from miasm.core.sembuilder import SemBuilder +from miasm.jitter.csts import * + +spr_dict = { + 8: LR, 9: CTR, 18: DSISR, 19: DAR, + 22: DEC, 26: SRR0, 27: SRR1, + 272: SPRG0, 273: SPRG0, 274: SPRG1, 275: SPRG2, 276: SPRG3, + 284: TBL, 285: TBU, 287: PVR, 1023: PIR +} + +crf_dict = dict((ExprId("CR%d" % i, 4), + dict( (bit, ExprId("CR%d_%s" % (i, bit), 1)) + for bit in ['LT', 'GT', 'EQ', 'SO' ] )) + for i in range(8) ) + +ctx = { + 'crf_dict': crf_dict, + 'spr_dict': spr_dict, + 'expr': expr, +} + +ctx.update(all_regs_ids_byname) +sbuild = SemBuilder(ctx) + +def mn_compute_flags(rvalue, overflow_expr=None): + ret = [] + ret.append(ExprAssign(CR0_LT, rvalue.msb())) + ret.append(ExprAssign(CR0_GT, (ExprCond(rvalue, ExprInt(1, 1), + ExprInt(0, 1)) & ~rvalue.msb()))) + ret.append(ExprAssign(CR0_EQ, ExprCond(rvalue, ExprInt(0, 1), + ExprInt(1, 1)))) + if overflow_expr != None: + ret.append(ExprAssign(CR0_SO, XER_SO | overflow_expr)) + else: + ret.append(ExprAssign(CR0_SO, XER_SO)) + + return ret + +def mn_do_add(ir, instr, arg1, arg2, arg3): + assert instr.name[0:3] == 'ADD' + + flags_update = [] + + has_dot = False + has_c = False + has_e = False + has_o = False + + for l in instr.name[3:]: + if l == '.': + has_dot = True + elif l == 'C': + has_c = True + elif l == 'E': + has_e = True + elif l == 'O': + has_o = True + elif l == 'I' or l == 'M' or l == 'S' or l == 'Z': + pass # Taken care of earlier + else: + assert False + + rvalue = arg2 + arg3 + + if has_e: + rvalue = rvalue + XER_CA.zeroExtend(32) + + over_expr = None + if has_o: + msb1 = arg2.msb() + msb2 = arg3.msb() + msba = rvalue.msb() + over_expr = ~(msb1 ^ msb2) & (msb1 ^ msba) + flags_update.append(ExprAssign(XER_OV, over_expr)) + flags_update.append(ExprAssign(XER_SO, XER_SO | over_expr)) + + if has_dot: + flags_update += mn_compute_flags(rvalue, over_expr) + + if has_c or has_e: + carry_expr = (((arg2 ^ arg3) ^ rvalue) ^ + ((arg2 ^ rvalue) & (~(arg2 ^ arg3)))).msb() + flags_update.append(ExprAssign(XER_CA, carry_expr)) + + return ([ ExprAssign(arg1, rvalue) ] + flags_update), [] + +def mn_do_and(ir, instr, ra, rs, arg2): + if len(instr.name) > 3 and instr.name[3] == 'C': + oarg = ~arg2 + else: + oarg = arg2 + + rvalue = rs & oarg + ret = [ ExprAssign(ra, rvalue) ] + + if instr.name[-1] == '.': + ret += mn_compute_flags(rvalue) + + return ret, [] + +def mn_do_cntlzw(ir, instr, ra, rs): + ret = [ ExprAssign(ra, ExprOp('cntleadzeros', rs)) ] + + if instr.name[-1] == '.': + ret += mn_compute_flags(rvalue) + + return ret, [] + +def crbit_to_reg(bit): + bit = bit.arg.arg + crid = bit // 4 + bitname = [ 'LT', 'GT', 'EQ', 'SO' ][bit % 4] + return all_regs_ids_byname["CR%d_%s" % (crid, bitname)] + +def mn_do_cr(ir, instr, crd, cra, crb): + a = crbit_to_reg(cra) + b = crbit_to_reg(crb) + d = crbit_to_reg(crd) + + op = instr.name[2:] + + if op == 'AND': + r = a & b + elif op == 'ANDC': + r = a & ~b + elif op == 'EQV': + r = ~(a ^ b) + elif op == 'NAND': + r = ~(a & b) + elif op == 'NOR': + r = ~(a | b) + elif op == 'OR': + r = a | b + elif op == 'ORC': + r = a | ~b + elif op == 'XOR': + r = a ^ b + else: + raise RuntimeError("Unknown operation on CR") + return [ ExprAssign(d, r) ], [] + +def mn_do_div(ir, instr, rd, ra, rb): + assert instr.name[0:4] == 'DIVW' + + flags_update = [] + + has_dot = False + has_c = False + has_o = False + has_u = False + + for l in instr.name[3:]: + if l == '.': + has_dot = True + elif l == 'C': + has_c = True + elif l == 'O': + has_o = True + elif l == 'U': + has_u = True + elif l == 'W': + pass + else: + assert False + + if has_u: + op = 'udiv' + else: + op = 'sdiv' + + rvalue = ExprOp(op, ra, rb) + + over_expr = None + if has_o: + over_expr = ExprCond(rb, ExprInt(0, 1), ExprInt(1, 1)) + if not has_u: + over_expr = over_expr | (ExprCond(ra ^ 0x80000000, ExprInt(0, 1), + ExprInt(1, 1)) & + ExprCond(rb ^ 0xFFFFFFFF, ExprInt(0, 1), + ExprInt(1, 1))) + flags_update.append(ExprAssign(XER_OV, over_expr)) + flags_update.append(ExprAssign(XER_SO, XER_SO | over_expr)) + + if has_dot: + flags_update += mn_compute_flags(rvalue, over_expr) + + return ([ ExprAssign(rd, rvalue) ] + flags_update), [] + + +def mn_do_eqv(ir, instr, ra, rs, rb): + rvalue = ~(rs ^ rb) + ret = [ ExprAssign(ra, rvalue) ] + + if instr.name[-1] == '.': + ret += mn_compute_flags(rvalue) + + return ret, [] + +def mn_do_exts(ir, instr, ra, rs): + if instr.name[4] == 'B': + size = 8 + elif instr.name[4] == 'H': + size = 16 + else: + assert False + + rvalue = rs[0:size].signExtend(32) + ret = [ ExprAssign(ra, rvalue) ] + + if instr.name[-1] == '.': + ret += mn_compute_flags(rvalue) + + return ret, [] + +def byte_swap(expr): + nbytes = expr.size // 8 + bytes = [ expr[i*8:i*8+8] for i in range(nbytes - 1, -1, -1) ] + return ExprCompose(bytes) + +def mn_do_load(ir, instr, arg1, arg2, arg3=None): + assert instr.name[0] == 'L' + + ret = [] + + if instr.name[1] == 'M': + return mn_do_lmw(ir, instr, arg1, arg2) + elif instr.name[1] == 'S': + raise RuntimeError("LSWI, and LSWX need implementing") + + size = {'B': 8, 'H': 16, 'W': 32}[instr.name[1]] + + has_a = False + has_b = False + has_u = False + is_lwarx = False + + for l in instr.name[2:]: + if l == 'A': + has_a = True + elif l == 'B': + has_b = True + elif l == 'U': + has_u = True + elif l == 'X' or l == 'Z': + pass # Taken care of earlier + elif l == 'R' and not has_b: + is_lwarx = True + else: + assert False + + if arg3 is None: + assert isinstance(arg2, ExprMem) + + address = arg2.ptr + else: + address = arg2 + arg3 + + src = ExprMem(address, size) + + if has_b: + src = byte_swap(src) + + if has_a: + src = src.signExtend(32) + else: + src = src.zeroExtend(32) + + ret.append(ExprAssign(arg1, src)) + if has_u: + if arg3 is None: + ret.append(ExprAssign(arg2.ptr.args[0], address)) + else: + ret.append(ExprAssign(arg2, address)) + + if is_lwarx: + ret.append(ExprAssign(reserve, ExprInt(1, 1))) + ret.append(ExprAssign(reserve_address, address)) # XXX should be the PA + + return ret, [] + +def mn_do_lmw(ir, instr, rd, src): + ret = [] + address = src.arg + ri = int(rd.name[1:],10) + i = 0 + while ri <= 31: + ret.append(ExprAssign(all_regs_ids_byname["R%d" % ri], + ExprMem(address + ExprInt(i, 32), 32))) + ri += 1 + i += 4 + + return ret, [] + +def mn_do_lswi(ir, instr, rd, ra, nb): + if nb == 0: + nb = 32 + i = 32 + raise RuntimeError("%r not implemented" % instr) + +def mn_do_lswx(ir, instr, rd, ra, nb): + raise RuntimeError("%r not implemented" % instr) + +def mn_do_mcrf(ir, instr, crfd, crfs): + ret = [] + + for bit in [ 'LT', 'GT', 'EQ', 'SO' ]: + d = all_regs_ids_byname["%s_%s" % (crfd, bit)] + s = all_regs_ids_byname["%s_%s" % (crfs, bit)] + ret.append(ExprAssign(d, s)) + + return ret, [] + +def mn_do_mcrxr(ir, instr, crfd): + ret = [] + + for (bit, val) in [ ('LT', XER_SO), ('GT', XER_OV), ('EQ', XER_CA), + ('SO', ExprInt(0, 1)) ]: + ret.append(ExprAssign(all_regs_ids_byname["%s_%s" % (crfd, bit)], val)) + + return ret, [] + +def mn_do_mfcr(ir, instr, rd): + return ([ ExprAssign(rd, ExprCompose(*[ all_regs_ids_byname["CR%d_%s" % (i, b)] + for i in range(7, -1, -1) + for b in ['SO', 'EQ', 'GT', 'LT']]))], + []) + +@sbuild.parse +def mn_mfmsr(rd): + rd = MSR + +def mn_mfspr(ir, instr, arg1, arg2): + sprid = arg2.arg.arg + gprid = int(arg1.name[1:]) + if sprid in spr_dict: + return [ ExprAssign(arg1, spr_dict[sprid]) ], [] + elif sprid == 1: # XER + return [ ExprAssign(arg1, ExprCompose(XER_BC, ExprInt(0, 22), + XER_CA, XER_OV, XER_SO)) ], [] + else: + return [ ExprAssign(spr_access, + ExprInt(((sprid << SPR_ACCESS_SPR_OFF) | + (gprid << SPR_ACCESS_GPR_OFF)), 32)), + ExprAssign(exception_flags, ExprInt(EXCEPT_SPR_ACCESS, 32)) ], [] + +def mn_mtcrf(ir, instr, crm, rs): + ret = [] + + for i in range(8): + if crm.arg.arg & (1 << (7 - i)): + j = (28 - 4 * i) + 3 + for b in ['LT', 'GT', 'EQ', 'SO']: + ret.append(ExprAssign(all_regs_ids_byname["CR%d_%s" % (i, b)], + rs[j:j+1])) + j -= 1 + + return ret, [] + +def mn_mtmsr(ir, instr, rs): + print("%08x: MSR assigned" % instr.offset) + return [ ExprAssign(MSR, rs) ], [] + +def mn_mtspr(ir, instr, arg1, arg2): + sprid = arg1.arg.arg + gprid = int(arg2.name[1:]) + if sprid in spr_dict: + return [ ExprAssign(spr_dict[sprid], arg2) ], [] + elif sprid == 1: # XER + return [ ExprAssign(XER_SO, arg2[31:32]), + ExprAssign(XER_OV, arg2[30:31]), + ExprAssign(XER_CA, arg2[29:30]), + ExprAssign(XER_BC, arg2[0:7]) ], [] + else: + return [ ExprAssign(spr_access, + ExprInt(((sprid << SPR_ACCESS_SPR_OFF) | + (gprid << SPR_ACCESS_GPR_OFF) | + SPR_ACCESS_IS_WRITE), 32)), + ExprAssign(exception_flags, ExprInt(EXCEPT_SPR_ACCESS, 32)) ], [] + +def mn_do_mul(ir, instr, rd, ra, arg2): + variant = instr.name[3:] + if variant[-1] == '.': + variant = variant[:-2] + + if variant == 'HW': + v1 = ra.signExtend(64) + v2 = arg2.signExtend(64) + shift = 32 + elif variant == 'HWU': + v1 = ra.zeroExtend(64) + v2 = arg2.zeroExtend(64) + shift = 32 + else: + v1 = ra + v2 = arg2 + shift = 0 + + rvalue = ExprOp('*', v1, v2) + if shift != 0: + rvalue = rvalue[shift : shift + 32] + + ret = [ ExprAssign(rd, rvalue) ] + + over_expr = None + if variant[-1] == 'O': + over_expr = ExprCond((rvalue.signExtend(64) ^ + ExprOp('*', v1.signExtend(64), + v2.signExtend(64))), + ExprInt(1, 1), ExprInt(0, 1)) + ret.append(ExprAssign(XER_OV, over_expr)) + ret.append(ExprAssign(XER_SO, XER_SO | over_expr)) + + if instr.name[-1] == '.': + ret += mn_compute_flags(rvalue, over_expr) + + return ret, [] + +def mn_do_nand(ir, instr, ra, rs, rb): + rvalue = ~(rs & rb) + ret = [ ExprAssign(ra, rvalue) ] + + if instr.name[-1] == '.': + ret += mn_compute_flags(rvalue) + + return ret, [] + +def mn_do_neg(ir, instr, rd, ra): + rvalue = -ra + ret = [ ExprAssign(rd, rvalue) ] + has_o = False + + over_expr = None + if instr.name[-1] == 'O' or instr.name[-2] == 'O': + has_o = True + over_expr = ExprCond(ra ^ ExprInt(0x80000000, 32), + ExprInt(0, 1), ExprInt(1, 1)) + ret.append(ExprAssign(XER_OV, over_expr)) + ret.append(ExprAssign(XER_SO, XER_SO | over_expr)) + + if instr.name[-1] == '.': + ret += mn_compute_flags(rvalue, over_expr) + + return ret, [] + +def mn_do_nor(ir, instr, ra, rs, rb): + + rvalue = ~(rs | rb) + ret = [ ExprAssign(ra, rvalue) ] + + if instr.name[-1] == '.': + ret += mn_compute_flags(rvalue) + + return ret, [] + +def mn_do_or(ir, instr, ra, rs, arg2): + if len(instr.name) > 2 and instr.name[2] == 'C': + oarg = ~arg2 + else: + oarg = arg2 + + rvalue = rs | oarg + ret = [ ExprAssign(ra, rvalue) ] + + if instr.name[-1] == '.': + ret += mn_compute_flags(rvalue) + + return ret, [] + +def mn_do_rfi(ir, instr): + dest = ExprCompose(ExprInt(0, 2), SRR0[2:32]) + ret = [ ExprAssign(MSR, (MSR & + ~ExprInt(0b1111111101110011, 32) | + ExprCompose(SRR1[0:2], ExprInt(0, 2), + SRR1[4:7], ExprInt(0, 1), + SRR1[8:16], ExprInt(0, 16)))), + ExprAssign(PC, dest), + ExprAssign(ir.IRDst, dest) ] + return ret, [] + +def mn_do_rotate(ir, instr, ra, rs, shift, mb, me): + r = ExprOp('<<<', rs, shift) + if mb <= me: + m = ExprInt(((1 << (32 - mb)) - 1) & ~((1 << (32 - me - 1)) - 1), 32) + else: + m = ExprInt(((1 << (32 - mb)) - 1) | ~((1 << (32 - me - 1)) - 1), 32) + rvalue = r & m + if instr.name[0:6] == 'RLWIMI': + rvalue = rvalue | (ra & ~m) + + ret = [ ExprAssign(ra, rvalue) ] + + if instr.name[-1] == '.': + ret += mn_compute_flags(rvalue) + + return ret, [] + +def mn_do_slw(ir, instr, ra, rs, rb): + + rvalue = ExprCond(rb[5:6], ExprInt(0, 32), + ExprOp('<<', rs, rb & ExprInt(0b11111, 32))) + ret = [ ExprAssign(ra, rvalue) ] + + if instr.name[-1] == '.': + ret += mn_compute_flags(rvalue) + + return ret, [] + +def mn_do_sraw(ir, instr, ra, rs, rb): + rvalue = ExprCond(rb[5:6], ExprInt(0xFFFFFFFF, 32), + ExprOp('a>>', rs, rb & ExprInt(0b11111, 32))) + ret = [ ExprAssign(ra, rvalue) ] + + if instr.name[-1] == '.': + ret += mn_compute_flags(rvalue) + + mask = ExprCond(rb[5:6], ExprInt(0xFFFFFFFF, 32), + (ExprInt(0xFFFFFFFF, 32) >> + (ExprInt(32, 32) - (rb & ExprInt(0b11111, 32))))) + ret.append(ExprAssign(XER_CA, rs.msb() & + ExprCond(rs & mask, ExprInt(1, 1), ExprInt(0, 1)))) + + return ret, [] + +def mn_do_srawi(ir, instr, ra, rs, imm): + rvalue = ExprOp('a>>', rs, imm) + ret = [ ExprAssign(ra, rvalue) ] + + if instr.name[-1] == '.': + ret += mn_compute_flags(rvalue) + + mask = ExprInt(0xFFFFFFFF >> (32 - imm.arg.arg), 32) + + ret.append(ExprAssign(XER_CA, rs.msb() & + ExprCond(rs & mask, ExprInt(1, 1), ExprInt(0, 1)))) + + return ret, [] + +def mn_do_srw(ir, instr, ra, rs, rb): + rvalue = rs >> (rb & ExprInt(0b11111, 32)) + ret = [ ExprAssign(ra, rvalue) ] + + if instr.name[-1] == '.': + ret += mn_compute_flags(rvalue) + + return ret, [] + +def mn_do_stmw(ir, instr, rs, dest): + ret = [] + address = dest.arg + ri = int(rs.name[1:],10) + i = 0 + while ri <= 31: + ret.append(ExprAssign(ExprMem(address + ExprInt(i,32), 32), + all_regs_ids_byname["R%d" % ri])) + ri += 1 + i += 4 + + return ret, [] + +def mn_do_store(ir, instr, arg1, arg2, arg3=None): + assert instr.name[0:2] == 'ST' + + ret = [] + additional_ir = [] + + if instr.name[2] == 'S': + raise RuntimeError("STSWI, and STSWX need implementing") + + size = {'B': 8, 'H': 16, 'W': 32}[instr.name[2]] + + has_b = False + has_u = False + is_stwcx = False + + for l in instr.name[3:]: + if l == 'B' or l == 'R': + has_b = True + elif l == 'U': + has_u = True + elif l == 'X' or l == 'Z': + pass # Taken care of earlier + elif l == 'C' or l == '.': + is_stwcx = True + else: + assert False + + if arg3 is None: + assert isinstance(arg2, ExprMem) + + address = arg2.ptr + else: + address = arg2 + arg3 + + dest = ExprMem(address, size) + + src = arg1[0:size] + if has_b: + src = byte_swap(src) + + ret.append(ExprAssign(dest, src)) + if has_u: + if arg3 is None: + ret.append(ExprAssign(arg2.ptr.args[0], address)) + else: + ret.append(ExprAssign(arg2, address)) + + if is_stwcx: + loc_do = ExprLoc(ir.loc_db.add_location(), ir.IRDst.size) + loc_dont = ExprLoc(ir.loc_db.add_location(), ir.IRDst.size) + loc_next = ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) + flags = [ ExprAssign(CR0_LT, ExprInt(0,1)), + ExprAssign(CR0_GT, ExprInt(0,1)), + ExprAssign(CR0_SO, XER_SO)] + ret += flags + ret.append(ExprAssign(CR0_EQ, ExprInt(1,1))) + ret.append(ExprAssign(ir.IRDst, loc_next)) + dont = flags + [ ExprAssign(CR0_EQ, ExprInt(0,1)), + ExprAssign(ir.IRDst, loc_next) ] + additional_ir = [ IRBlock(loc_do, [ AssignBlock(ret) ]), + IRBlock(loc_dont, [ AssignBlock(dont) ]) ] + ret = [ ExprAssign(reserve, ExprInt(0, 1)), + ExprAssign(ir.IRDst, ExprCond(reserve, loc_do, loc_dont)) ] + + return ret, additional_ir + +def mn_do_sub(ir, instr, arg1, arg2, arg3): + assert instr.name[0:4] == 'SUBF' + + flags_update = [] + + has_dot = False + has_c = False + has_e = False + has_o = False + + for l in instr.name[4:]: + if l == '.': + has_dot = True + elif l == 'C': + has_c = True + elif l == 'E': + has_e = True + elif l == 'O': + has_o = True + elif l == 'I' or l == 'M' or l == 'S' or l == 'Z': + pass # Taken care of earlier + else: + assert False + + if has_e: + arg3 = arg3 + XER_CA.zeroExtend(32) + arg2 = arg2 + ExprInt(1, 32) + + rvalue = arg3 - arg2 + + over_expr = None + if has_o: + msb1 = arg2.msb() + msb2 = arg3.msb() + msba = rvalue.msb() + over_expr = (msb1 ^ msb2) & (msb1 ^ msba) + flags_update.append(ExprAssign(XER_OV, over_expr)) + flags_update.append(ExprAssign(XER_SO, XER_SO | over_expr)) + + if has_dot: + flags_update += mn_compute_flags(rvalue, over_expr) + + if has_c or has_e: + carry_expr = ((((arg3 ^ arg2) ^ rvalue) ^ + ((arg3 ^ rvalue) & (arg3 ^ arg2))).msb()) + flags_update.append(ExprAssign(XER_CA, ~carry_expr)) + + return ([ ExprAssign(arg1, rvalue) ] + flags_update), [] + +def mn_do_xor(ir, instr, ra, rs, rb): + rvalue = rs ^ rb + ret = [ ExprAssign(ra, rvalue) ] + + if instr.name[-1] == '.': + ret += mn_compute_flags(rvalue) + + return ret, [] + +def mn_b(ir, instr, arg1, arg2 = None): + if arg2 is not None: + arg1 = arg2 + return [ ExprAssign(PC, arg1), ExprAssign(ir.IRDst, arg1) ], [] + +def mn_bl(ir, instr, arg1, arg2 = None): + if arg2 is not None: + arg1 = arg2 + dst = ir.get_next_instr(instr) + return [ ExprAssign(LR, ExprLoc(dst, 32)), + ExprAssign(PC, arg1), + ExprAssign(ir.IRDst, arg1) ], [] + +def mn_get_condition(instr): + bit = instr.additional_info.bi & 0b11 + cr = instr.args[0].name + return all_regs_ids_byname[cr + '_' + ['LT', 'GT', 'EQ', 'SO'][bit]] + +def mn_do_cond_branch(ir, instr, dest): + bo = instr.additional_info.bo + bi = instr.additional_info.bi + ret = [] + + if bo & 0b00100: + ctr_cond = True + else: + ret.append(ExprAssign(CTR, CTR - ExprInt(1, 32))) + ctr_cond = ExprCond(CTR ^ ExprInt(1, 32), ExprInt(1, 1), ExprInt(0, 1)) + if bo & 0b00010: + ctr_cond = ~ctr_cond + + if (bo & 0b10000): + cond_cond = True + else: + cond_cond = mn_get_condition(instr) + if not (bo & 0b01000): + cond_cond = ~cond_cond + + if ctr_cond != True or cond_cond != True: + if ctr_cond != True: + condition = ctr_cond + if cond_cond != True: + condition = condition & cond_cond + else: + condition = cond_cond + dst = ir.get_next_instr(instr) + dest_expr = ExprCond(condition, dest, + ExprLoc(dst, 32)) + else: + dest_expr = dest + + if instr.name[-1] == 'L' or instr.name[-2:-1] == 'LA': + dst = ir.get_next_instr(instr) + ret.append(ExprAssign(LR, ExprLoc(dst, 32))) + + ret.append(ExprAssign(PC, dest_expr)) + ret.append(ExprAssign(ir.IRDst, dest_expr)) + + return ret, [] + +def mn_do_nop_warn(ir, instr, *args): + print("Warning, instruction %s implemented as NOP" % instr) + return [], [] + +@sbuild.parse +def mn_cmp_signed(arg1, arg2, arg3): + crf_dict[arg1]['LT'] = expr.ExprOp(expr.TOK_INF_SIGNED, arg2, arg3) + crf_dict[arg1]['GT'] = expr.ExprOp(expr.TOK_INF_SIGNED, arg3, arg2) + crf_dict[arg1]['EQ'] = expr.ExprOp(expr.TOK_EQUAL, arg2, arg3) + crf_dict[arg1]['SO'] = XER_SO + +@sbuild.parse +def mn_cmp_unsigned(arg1, arg2, arg3): + crf_dict[arg1]['LT'] = expr.ExprOp(expr.TOK_INF_UNSIGNED, arg2, arg3) + crf_dict[arg1]['GT'] = expr.ExprOp(expr.TOK_INF_UNSIGNED, arg3, arg2) + crf_dict[arg1]['EQ'] = expr.ExprOp(expr.TOK_EQUAL, arg2, arg3) + crf_dict[arg1]['SO'] = XER_SO + +def mn_nop(ir, instr, *args): + return [], [] + +@sbuild.parse +def mn_or(arg1, arg2, arg3): + arg1 = arg2 | arg3 + +@sbuild.parse +def mn_assign(arg1, arg2): + arg2 = arg1 + +def mn_stb(ir, instr, arg1, arg2): + dest = ExprMem(arg2.arg, 8) + return [ExprAssign(dest, ExprSlice(arg1, 0, 8))], [] + +@sbuild.parse +def mn_stwu(arg1, arg2): + arg2 = arg1 + arg1 = arg2.arg + +sem_dir = { + 'B': mn_b, + 'BA': mn_b, + 'BL': mn_bl, + 'BLA': mn_bl, + 'CMPLW': mn_cmp_unsigned, + 'CMPLWI': mn_cmp_unsigned, + 'CMPW': mn_cmp_signed, + 'CMPWI': mn_cmp_signed, + 'CNTLZW': mn_do_cntlzw, + 'CNTLZW.': mn_do_cntlzw, + 'ECIWX': mn_do_nop_warn, + 'ECOWX': mn_do_nop_warn, + 'EIEIO': mn_do_nop_warn, + 'EQV': mn_do_eqv, + 'EQV.': mn_do_eqv, + 'ICBI': mn_do_nop_warn, + 'ISYNC': mn_do_nop_warn, + 'MCRF': mn_do_mcrf, + 'MCRXR': mn_do_mcrxr, + 'MFCR': mn_do_mfcr, + 'MFMSR': mn_mfmsr, + 'MFSPR': mn_mfspr, + 'MFSR': mn_do_nop_warn, + 'MFSRIN': mn_do_nop_warn, + 'MFTB': mn_mfmsr, + 'MTCRF': mn_mtcrf, + 'MTMSR': mn_mtmsr, + 'MTSPR': mn_mtspr, + 'MTSR': mn_do_nop_warn, + 'MTSRIN': mn_do_nop_warn, + 'NAND': mn_do_nand, + 'NAND.': mn_do_nand, + 'NOR': mn_do_nor, + 'NOR.': mn_do_nor, + 'RFI': mn_do_rfi, + 'SC': mn_do_nop_warn, + 'SLW': mn_do_slw, + 'SLW.': mn_do_slw, + 'SRAW': mn_do_sraw, + 'SRAW.': mn_do_sraw, + 'SRAWI': mn_do_srawi, + 'SRAWI.': mn_do_srawi, + 'SRW': mn_do_srw, + 'SRW.': mn_do_srw, + 'SYNC': mn_do_nop_warn, + 'TLBIA': mn_do_nop_warn, + 'TLBIE': mn_do_nop_warn, + 'TLBSYNC': mn_do_nop_warn, + 'TW': mn_do_nop_warn, + 'TWI': mn_do_nop_warn, +} + + +class ir_ppc32b(IntermediateRepresentation): + + def __init__(self, loc_db=None): + super(ir_ppc32b, self).__init__(mn_ppc, 'b', loc_db) + self.pc = mn_ppc.getpc() + self.sp = mn_ppc.getsp() + self.IRDst = expr.ExprId('IRDst', 32) + self.addrsize = 32 + + def get_ir(self, instr): + args = instr.args[:] + if instr.name[0:5] in [ 'ADDIS', 'ORIS', 'XORIS', 'ANDIS' ]: + args[2] = ExprInt(args[2].arg << 16, 32) + if instr.name[0:3] == 'ADD': + if instr.name[0:4] == 'ADDZ': + last_arg = ExprInt(0, 32) + elif instr.name[0:4] == 'ADDM': + last_arg = ExprInt(0xFFFFFFFF, 32) + else: + last_arg = args[2] + instr_ir, extra_ir = mn_do_add(self, instr, args[0], args[1], + last_arg) + elif instr.name[0:3] == 'AND': + instr_ir, extra_ir = mn_do_and(self, instr, *args) + elif instr.additional_info.bo_bi_are_defined: + name = instr.name + if name[-1] == '+' or name[-1] == '-': + name = name[0:-1] + if name[-3:] == 'CTR' or name[-4:] == 'CTRL': + arg1 = ExprCompose(ExprInt(0, 2), CTR[2:32]) + elif name[-2:] == 'LR' or name[-3:] == 'LRL': + arg1 = ExprCompose(ExprInt(0, 2), LR[2:32]) + else: + arg1 = args[1] + instr_ir, extra_ir = mn_do_cond_branch(self, instr, arg1) + elif instr.name[0:2] == 'CR': + instr_ir, extra_ir = mn_do_cr(self, instr, *args) + elif instr.name[0:3] == 'DCB': + instr_ir, extra_ir = mn_do_nop_warn(self, instr, *args) + elif instr.name[0:3] == 'DIV': + instr_ir, extra_ir = mn_do_div(self, instr, *args) + elif instr.name[0:4] == 'EXTS': + instr_ir, extra_ir = mn_do_exts(self, instr, *args) + elif instr.name[0] == 'L': + instr_ir, extra_ir = mn_do_load(self, instr, *args) + elif instr.name[0:3] == 'MUL': + instr_ir, extra_ir = mn_do_mul(self, instr, *args) + elif instr.name[0:3] == 'NEG': + instr_ir, extra_ir = mn_do_neg(self, instr, *args) + elif instr.name[0:2] == 'OR': + instr_ir, extra_ir = mn_do_or(self, instr, *args) + elif instr.name[0:2] == 'RL': + instr_ir, extra_ir = mn_do_rotate(self, instr, args[0], args[1], + args[2], args[3].arg.arg, + args[4].arg.arg) + elif instr.name == 'STMW': + instr_ir, extra_ir = mn_do_stmw(self, instr, *args) + elif instr.name[0:2] == 'ST': + instr_ir, extra_ir = mn_do_store(self, instr, *args) + elif instr.name[0:4] == 'SUBF': + if instr.name[0:5] == 'SUBFZ': + last_arg = ExprInt(0) + elif instr.name[0:5] == 'SUBFM': + last_arg = ExprInt(0xFFFFFFFF) + else: + last_arg = args[2] + instr_ir, extra_ir = mn_do_sub(self, instr, args[0], args[1], + last_arg) + elif instr.name[0:3] == 'XOR': + instr_ir, extra_ir = mn_do_xor(self, instr, *args) + else: + instr_ir, extra_ir = sem_dir[instr.name](self, instr, *args) + + return instr_ir, extra_ir + + def get_next_instr(self, instr): + l = self.loc_db.get_or_create_offset_location(instr.offset + 4) + return l + + def get_next_break_loc_key(self, instr): + l = self.loc_db.get_or_create_offset_location(instr.offset + 4) + return l diff --git a/miasm/arch/sh4/__init__.py b/miasm/arch/sh4/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/miasm/arch/sh4/arch.py b/miasm/arch/sh4/arch.py new file mode 100644 index 00000000..9310d3c3 --- /dev/null +++ b/miasm/arch/sh4/arch.py @@ -0,0 +1,999 @@ +#-*- coding:utf-8 -*- + +from __future__ import print_function +from builtins import range + +from pyparsing import * +from miasm.core.cpu import * +from miasm.expression.expression import * +from collections import defaultdict +import miasm.arch.sh4.regs as regs_module +from miasm.arch.sh4.regs import * + + +from miasm.core.asm_ast import AstInt, AstId, AstMem, AstOp + +jra = ExprId('jra', 32) +jrb = ExprId('jrb', 32) +jrc = ExprId('jrc', 32) + + +# parser helper ########### +PLUS = Suppress("+") +MULT = Suppress("*") +MINUS = Suppress("-") +AND = Suppress("&") +LBRACK = Suppress("[") +RBRACK = Suppress("]") +DEREF = Suppress("@") +COMMA = Suppress(",") +LPARENT = Suppress("(") +RPARENT = Suppress(")") + + +def cb_deref_pcimm(tokens): + return tokens[0] + tokens[1] + + +def cb_pcandimmimm(tokens): + return (tokens[0] & tokens[1]) + tokens[2] + + + +ref_pc = (LPARENT + reg_info_pc.parser + COMMA + base_expr + RPARENT).setParseAction(cb_deref_pcimm) +ref_pcandimm = (LPARENT + reg_info_pc.parser + AND + base_expr + COMMA + base_expr + RPARENT).setParseAction(cb_pcandimmimm) +pcdisp = (reg_info_pc.parser + AND + base_expr + PLUS + base_expr).setParseAction(cb_pcandimmimm) + +PTR = Suppress('PTR') + + +def cb_deref_mem(tokens): + assert len(tokens) == 1 + result = AstMem(tokens[0], 32) + return result + + +def cb_predec(tokens): + assert len(tokens) == 1 + result = AstMem(AstOp('predec', tokens[0]), 32) + return result + + +def cb_postinc(tokens): + assert len(tokens) == 1 + result = AstMem(AstOp('postinc', tokens[0]), 32) + return result + + +def cb_regdisp(tokens): + assert len(tokens) == 2 + result = AstMem(tokens[0] + tokens[1], 32) + return result + + +def cb_regreg(tokens): + assert len(tokens) == 2 + result = AstMem(tokens[0] + tokens[1], 32) + return result + + +deref_pc = (DEREF + ref_pc).setParseAction(cb_deref_mem) +deref_pcimm = (DEREF + ref_pcandimm).setParseAction(cb_deref_mem) + +dgpregs_base = (DEREF + gpregs.parser).setParseAction(cb_deref_mem) +dgpregs_predec = (DEREF + MINUS + gpregs.parser).setParseAction(cb_predec) +dgpregs_postinc = (DEREF + gpregs.parser + PLUS).setParseAction(cb_postinc) + +dgpregs = dgpregs_base | dgpregs_predec | dgpregs_postinc + +d_gpreg_gpreg = (DEREF + LPARENT + gpregs.parser + COMMA + gpregs.parser + RPARENT).setParseAction(cb_regdisp) +dgpregs_p = dgpregs_predec | dgpregs_postinc + + +dgpregs_ir = (DEREF + LPARENT + gpregs.parser + COMMA + base_expr + RPARENT).setParseAction(cb_regdisp) +dgpregs_ir |= d_gpreg_gpreg + +dgbr_imm = (DEREF + LPARENT + reg_info_gbr.parser + COMMA + base_expr + RPARENT).setParseAction(cb_regdisp) + +dgbr_reg = (DEREF + LPARENT + reg_info_gbr.parser + COMMA + gpregs.parser + RPARENT).setParseAction(cb_regreg) + + +class sh4_arg(m_arg): + def asm_ast_to_expr(self, arg, loc_db): + if isinstance(arg, AstId): + if isinstance(arg.name, ExprId): + return arg.name + if arg.name in gpregs.str: + return None + loc_key = loc_db.get_or_create_name_location(arg.name.encode()) + return ExprLoc(loc_key, 32) + if isinstance(arg, AstOp): + args = [self.asm_ast_to_expr(tmp, loc_db) for tmp in arg.args] + if None in args: + return None + return ExprOp(arg.op, *args) + if isinstance(arg, AstInt): + return ExprInt(arg.value, 32) + if isinstance(arg, AstMem): + ptr = self.asm_ast_to_expr(arg.ptr, loc_db) + if ptr is None: + return None + return ExprMem(ptr, arg.size) + return None + + +_, bs_pr = gen_reg_bs('PR', reg_info_pr, (m_reg, sh4_arg,)) +_, bs_r0 = gen_reg_bs('R0', reg_info_r0, (m_reg, sh4_arg,)) +_, bs_sr = gen_reg_bs('SR', reg_info_sr, (m_reg, sh4_arg,)) +_, bs_gbr = gen_reg_bs('GBR', reg_info_gbr, (m_reg, sh4_arg,)) +_, bs_vbr = gen_reg_bs('VBR', reg_info_vbr, (m_reg, sh4_arg,)) +_, bs_ssr = gen_reg_bs('SSR', reg_info_ssr, (m_reg, sh4_arg,)) +_, bs_spc = gen_reg_bs('SPC', reg_info_spc, (m_reg, sh4_arg,)) +_, bs_sgr = gen_reg_bs('SGR', reg_info_sgr, (m_reg, sh4_arg,)) +_, bs_dbr = gen_reg_bs('dbr', reg_info_dbr, (m_reg, sh4_arg,)) +_, bs_mach = gen_reg_bs('mach', reg_info_mach, (m_reg, sh4_arg,)) +_, bs_macl = gen_reg_bs('macl', reg_info_macl, (m_reg, sh4_arg,)) +_, bs_fpul = gen_reg_bs('fpul', reg_info_fpul, (m_reg, sh4_arg,)) +_, bs_fr0 = gen_reg_bs('fr0', reg_info_fr0, (m_reg, sh4_arg,)) + +class sh4_reg(reg_noarg, sh4_arg): + pass + + +class sh4_gpreg(sh4_reg): + reg_info = gpregs + parser = reg_info.parser + + +class sh4_dr(sh4_reg): + reg_info = dregs + parser = reg_info.parser + + +class sh4_bgpreg(sh4_reg): + reg_info = bgpregs + parser = reg_info.parser + + +class sh4_gpreg_noarg(reg_noarg, ): + reg_info = gpregs + parser = reg_info.parser + + +class sh4_freg(sh4_reg): + reg_info = fregs + parser = reg_info.parser + + +class sh4_dgpreg(sh4_arg): + parser = dgpregs_base + + def fromstring(self, text, loc_db, parser_result=None): + start, stop = super(sh4_dgpreg, self).fromstring(text, loc_db, parser_result) + if start is None or self.expr == [None]: + return start, stop + self.expr = ExprMem(self.expr.ptr, self.sz) + return start, stop + + def decode(self, v): + r = gpregs.expr[v] + self.expr = ExprMem(r, self.sz) + return True + + def encode(self): + e = self.expr + if not isinstance(e, ExprMem): + return False + if not isinstance(e.ptr, ExprId): + return False + v = gpregs.expr.index(e.ptr) + self.value = v + return True + + +class sh4_dgpregpinc(sh4_arg): + parser = dgpregs_p + + def fromstring(self, text, loc_db, parser_result=None): + start, stop = super(sh4_dgpregpinc, self).fromstring(text, loc_db, parser_result) + if self.expr == [None]: + return None, None + if not isinstance(self.expr.ptr, ExprOp): + return None, None + if self.expr.ptr.op != self.op: + return None, None + return start, stop + + def decode(self, v): + r = gpregs.expr[v] + e = ExprOp(self.op, r) + self.expr = ExprMem(e, self.sz) + return True + + def encode(self): + e = self.expr + if not isinstance(e, ExprMem): + return False + e = e.ptr + res = match_expr(e, ExprOp(self.op, jra), [jra]) + if not res: + return False + r = res[jra] + if not r in gpregs.expr: + return False + v = gpregs.expr.index(r) + self.value = v + return True + + +class sh4_dgpregpdec(sh4_arg): + parser = dgpregs_postinc + op = "preinc" + + +class sh4_dgpreg_imm(sh4_dgpreg): + parser = dgpregs_ir + + def decode(self, v): + p = self.parent + r = gpregs.expr[v] + s = self.sz + d = ExprInt((p.disp.value * s) // 8, 32) + e = ExprMem(r + d, s) + self.expr = e + return True + + def encode(self): + e = self.expr + p = self.parent + s = self.sz + if not isinstance(e, ExprMem): + return False + if isinstance(e.ptr, ExprId): + v = gpregs.expr.index(e.ptr) + p.disp.value = 0 + elif isinstance(e.ptr, ExprOp): + res = match_expr(e, ExprMem(jra + jrb, self.sz), [jra, jrb]) + if not res: + return False + if not isinstance(res[jra], ExprId): + return False + if not isinstance(res[jrb], ExprInt): + return False + d = int(res[jrb]) + p.disp.value = d // (s // 8) + if not res[jra] in gpregs.expr: + return False + v = gpregs.expr.index(res[jra]) + else: + return False + self.value = v + return True + + +class sh4_imm(imm_noarg, sh4_arg): + parser = base_expr + pass + + +class sh4_simm(sh4_imm): + parser = base_expr + + def decode(self, v): + v = sign_ext(v, self.l, 32) + v = self.decodeval(v) + self.expr = ExprInt(v, 32) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = int(self.expr) + if (1 << (self.l - 1)) & v: + v = -((0xffffffff ^ v) + 1) + v = self.encodeval(v) + self.value = (v & 0xffffffff) & self.lmask + return True + + +class sh4_dpc16imm(sh4_dgpreg): + parser = deref_pc + + def decode(self, v): + self.expr = ExprMem(PC + ExprInt(v * 2 + 4, 32), 16) + return True + + def calcdisp(self, v): + v = (int(v) - 4) // 2 + if not 0 < v <= 0xff: + return None + return v + + def encode(self): + res = match_expr(self.expr, ExprMem(PC + jra, 16), [jra]) + if not res: + return False + if not isinstance(res[jra], ExprInt): + return False + v = self.calcdisp(res[jra]) + if v is None: + return False + self.value = v + return True + + +class sh4_dgbrimm8(sh4_dgpreg): + parser = dgbr_imm + + def decode(self, v): + s = self.sz + self.expr = ExprMem(GBR + ExprInt((v * s) // 8, 32), s) + return True + + def encode(self): + e = self.expr + s = self.sz + if e == ExprMem(GBR, 32): + self.value = 0 + return True + res = match_expr(self.expr, ExprMem(GBR + jra, s), [jra]) + if not res: + return False + if not isinstance(res[jra], ExprInt): + return False + self.value = int(res[jra]) // (s // 8) + return True + + +class sh4_dpc32imm(sh4_dpc16imm): + parser = deref_pcimm + + def decode(self, v): + self.expr = ExprMem( + (PC & ExprInt(0xfffffffc, 32)) + ExprInt(v * 4 + 4, 32), 32) + return True + + def calcdisp(self, v): + v = (int(v) - 4) // 4 + if not 0 < v <= 0xff: + return None + return v + + def encode(self): + res = match_expr( + self.expr, ExprMem((PC & ExprInt(0xFFFFFFFC, 32)) + jra, 32), [jra]) + if not res: + return False + if not isinstance(res[jra], ExprInt): + return False + v = self.calcdisp(res[jra]) + if v is None: + return False + self.value = v + return True + + +class sh4_pc32imm(sh4_arg): + parser = pcdisp + + def decode(self, v): + self.expr = (PC & ExprInt(0xfffffffc, 32)) + ExprInt(v * 4 + 4, 32) + return True + + def encode(self): + res = match_expr(self.expr, (PC & ExprInt(0xfffffffc, 32)) + jra, [jra]) + if not res: + return False + if not isinstance(res[jra], ExprInt): + return False + v = (int(res[jra]) - 4) // 4 + if v is None: + return False + self.value = v + return True + +class additional_info(object): + + def __init__(self): + self.except_on_instr = False + + +class instruction_sh4(instruction): + __slots__ = [] + delayslot = 0 + + def __init__(self, *args, **kargs): + super(instruction_sh4, self).__init__(*args, **kargs) + + def dstflow(self): + return self.name.startswith('J') + + @staticmethod + def arg2str(expr, index=None, loc_db=None): + if isinstance(expr, ExprId) or isinstance(expr, ExprInt): + return str(expr) + elif expr.is_loc(): + if loc_db is not None: + return loc_db.pretty_str(expr.loc_key) + else: + return str(expr) + assert(isinstance(expr, ExprMem)) + ptr = expr.ptr + + if isinstance(ptr, ExprOp): + if ptr.op == "predec": + s = '-%s' % ptr.args[0] + elif ptr.op == "postinc": + s = '%s+' % ptr.args[0] + else: + s = ','.join( + str(x).replace('(', '').replace(')', '') + for x in ptr.args + ) + s = "(%s)"%s + s = "@%s" % s + elif isinstance(ptr, ExprId): + s = "@%s" % ptr + else: + raise NotImplementedError('zarb arg2str') + return s + + + """ + def dstflow2label(self, loc_db): + e = self.args[0] + if not isinstance(e, ExprInt): + return + if self.name == 'BLX': + ad = e.arg+8+self.offset + else: + ad = e.arg+8+self.offset + l = loc_db.get_or_create_offset_location(ad) + s = ExprId(l, e.size) + self.args[0] = s + """ + + def breakflow(self): + if self.name.startswith('J'): + return True + return False + + def is_subcall(self): + return self.name == 'JSR' + + def getdstflow(self, loc_db): + return [self.args[0]] + + def splitflow(self): + return self.name == 'JSR' + + def get_symbol_size(self, symbol, loc_db): + return 32 + + def fixDstOffset(self): + e = self.args[0] + if self.offset is None: + raise ValueError('symbol not resolved %s' % l) + if not isinstance(e, ExprInt): + log.debug('dyn dst %r', e) + return + off = e.arg - (self.offset + 4 + self.l) + print(hex(off)) + if int(off % 4): + raise ValueError('strange offset! %r' % off) + self.args[0] = ExprInt(off, 32) + print('final', self.args[0]) + + def get_args_expr(self): + args = [a for a in self.args] + return args + + +class mn_sh4(cls_mn): + bintree = {} + regs = regs_module + num = 0 + all_mn = [] + all_mn_mode = defaultdict(list) + all_mn_name = defaultdict(list) + all_mn_inst = defaultdict(list) + pc = PC + # delayslot: + # http://resource.renesas.com/lib/eng/e_learnig/sh4/13/index.html + delayslot = 0 # unit is instruction instruction + instruction = instruction_sh4 + + def additional_info(self): + info = additional_info() + return info + + @classmethod + def getbits(cls, bs, attrib, start, n): + if not n: + return 0 + o = 0 + if n > bs.getlen() * 8: + raise ValueError('not enough bits %r %r' % (n, len(bs.bin) * 8)) + while n: + i = start // 8 + c = cls.getbytes(bs, i) + if not c: + raise IOError + c = ord(c) + r = 8 - start % 8 + c &= (1 << r) - 1 + l = min(r, n) + c >>= (r - l) + o <<= l + o |= c + n -= l + start += l + return o + + @classmethod + def getbytes(cls, bs, offset, l=1): + out = b"" + for _ in range(l): + n_offset = (offset & ~1) + 1 - offset % 2 + out += bs.getbytes(n_offset, 1) + offset += 1 + return out + + @classmethod + def check_mnemo(cls, fields): + l = sum([x.l for x in fields]) + assert l == 16, "len %r" % l + + @classmethod + def getmn(cls, name): + return name.upper().replace('_', '.') + + @classmethod + def gen_modes(cls, subcls, name, bases, dct, fields): + dct['mode'] = None + return [(subcls, name, bases, dct, fields)] + + def value(self, mode): + v = super(mn_sh4, self).value(mode) + return [x[::-1] for x in v] + + +class bs_dr0gbr(sh4_dgpreg): + parser = dgbr_reg + + def decode(self, v): + self.expr = ExprMem(GBR + R0, 8) + return True + + def encode(self): + return self.expr == ExprMem(GBR + R0, 8) + + +class bs_dr0gp(sh4_dgpreg): + parser = d_gpreg_gpreg + + def decode(self, v): + self.expr = ExprMem(gpregs.expr[v] + R0, self.sz) + return True + + def encode(self): + res = match_expr(self.expr, ExprMem(R0 + jra, self.sz), [jra]) + if not res: + return False + r = res[jra] + if not r in gpregs.expr: + return False + self.value = gpregs.expr.index(r) + return True + + +class bs_dgpreg(sh4_dgpreg): + parser = dgpregs_base + + +rn = bs(l=4, cls=(sh4_gpreg,), fname="rn") +rm = bs(l=4, cls=(sh4_gpreg,), fname="rm") + + +d08_rn = bs(l=4, cls=(sh4_dgpreg,), fname="rn", sz = 8) +d16_rn = bs(l=4, cls=(sh4_dgpreg,), fname="rn", sz = 16) +d32_rn = bs(l=4, cls=(sh4_dgpreg,), fname="rn", sz = 32) +d08_rm = bs(l=4, cls=(sh4_dgpreg,), fname="rm", sz = 8) +d16_rm = bs(l=4, cls=(sh4_dgpreg,), fname="rm", sz = 16) +d32_rm = bs(l=4, cls=(sh4_dgpreg,), fname="rm", sz = 32) + + +brm = bs(l=3, cls=(sh4_bgpreg,), fname="brm") +brn = bs(l=3, cls=(sh4_bgpreg,), fname="brn") + +d08rnimm = bs(l=4, fname="rn", cls=(sh4_dgpreg_imm,), sz = 8) +d16rnimm = bs(l=4, fname="rn", cls=(sh4_dgpreg_imm,), sz = 16) +d32rnimm = bs(l=4, fname="rn", cls=(sh4_dgpreg_imm,), sz = 32) + +d08rmimm = bs(l=4, fname="rm", cls=(sh4_dgpreg_imm,), sz = 8) +d16rmimm = bs(l=4, fname="rm", cls=(sh4_dgpreg_imm,), sz = 16) +d32rmimm = bs(l=4, fname="rm", cls=(sh4_dgpreg_imm,), sz = 32) + +btype = bs(l=4, fname="btype", order=-1) + +s08imm = bs(l=8, cls=(sh4_simm,), fname="imm") +s12imm = bs(l=12, cls=(sh4_simm,), fname="imm") +dpc16imm = bs(l=8, cls=(sh4_dpc16imm,), fname="pcimm", sz=16) +dpc32imm = bs(l=8, cls=(sh4_dpc32imm,), fname="pcimm", sz=32) +dimm4 = bs(l=4, fname='disp', order=-1) +d08gbrimm8 = bs(l=8, cls=(sh4_dgbrimm8,), fname='disp', sz=8) +d16gbrimm8 = bs(l=8, cls=(sh4_dgbrimm8,), fname='disp', sz=16) +d32gbrimm8 = bs(l=8, cls=(sh4_dgbrimm8,), fname='disp', sz=32) + +pc32imm = bs(l=8, cls=(sh4_pc32imm,), fname="pcimm") + +d08rnpinc = bs(l=4, cls=(sh4_dgpregpinc,), op='postinc', sz=8, fname="rn") +d08rmpinc = bs(l=4, cls=(sh4_dgpregpinc,), op='postinc', sz=8, fname="rm") + +d16rnpinc = bs(l=4, cls=(sh4_dgpregpinc,), op='postinc', sz=16, fname="rn") +d16rmpinc = bs(l=4, cls=(sh4_dgpregpinc,), op='postinc', sz=16, fname="rm") + +d32rnpinc = bs(l=4, cls=(sh4_dgpregpinc,), op='postinc', sz=32, fname="rn") +d32rmpinc = bs(l=4, cls=(sh4_dgpregpinc,), op='postinc', sz=32, fname="rm") + +d08rnpdec = bs(l=4, cls=(sh4_dgpregpinc,), op='predec', sz=8, fname="rn") +d08rmpdec = bs(l=4, cls=(sh4_dgpregpinc,), op='predec', sz=8, fname="rm") + +d16rnpdec = bs(l=4, cls=(sh4_dgpregpinc,), op='predec', sz=16, fname="rn") +d16rmpdec = bs(l=4, cls=(sh4_dgpregpinc,), op='predec', sz=16, fname="rm") + +d32rnpdec = bs(l=4, cls=(sh4_dgpregpinc,), op='predec', sz=32, fname="rn") +d32rmpdec = bs(l=4, cls=(sh4_dgpregpinc,), op='predec', sz=32, fname="rm") + + +u08imm = bs(l=8, cls=(sh4_imm,), fname="imm") +dr0gbr = bs(l=0, cls=(bs_dr0gbr,), sz=8) + +d08gpreg = bs(l=4, cls=(bs_dgpreg,), sz=8) +d32gpreg = bs(l=4, cls=(bs_dgpreg,), sz=32) + +frn = bs(l=4, cls=(sh4_freg,), fname="frn") +frm = bs(l=4, cls=(sh4_freg,), fname="frm") + +bd08r0gp = bs(l=4, cls=(bs_dr0gp,), sz=8) +bd16r0gp = bs(l=4, cls=(bs_dr0gp,), sz=16) +bd32r0gp = bs(l=4, cls=(bs_dr0gp,), sz=32) + +drn = bs(l=3, cls=(sh4_dr,), fname="drn") +drm = bs(l=3, cls=(sh4_dr,), fname="drm") + + +def addop(name, fields, args=None, alias=False): + dct = {"fields": fields} + dct["alias"] = alias + if args is not None: + dct['args'] = args + type(name, (mn_sh4,), dct) + +addop("mov", [bs('1110'), rn, s08imm], [s08imm, rn]) +addop("mov_w", [bs('1001'), rn, dpc16imm], [dpc16imm, rn]) +addop("mov_l", [bs('1101'), rn, dpc32imm], [dpc32imm, rn]) +addop("mov", [bs('0110', fname="opc"), rn, rm, bs('0011')], [rm, rn]) +addop("mov_b", [bs('0010', fname="opc"), d08_rn, rm, bs('0000')], [rm, d08_rn]) +addop("mov_w", [bs('0010', fname="opc"), d16_rn, rm, bs('0001')], [rm, d16_rn]) +addop("mov_l", [bs('0010', fname="opc"), d32_rn, rm, bs('0010')], [rm, d32_rn]) +addop("mov_b", [bs('0110', fname="opc"), rn, d08_rm, bs('0000')], [d08_rm, rn]) +addop("mov_w", [bs('0110', fname="opc"), rn, d16_rm, bs('0001')], [d16_rm, rn]) +addop("mov_l", [bs('0110', fname="opc"), rn, d32_rm, bs('0010')], [d32_rm, rn]) +addop("mov_b", + [bs('0010', fname="opc"), d08rnpdec, rm, bs('0100')], [rm, d08rnpdec]) +addop("mov_w", + [bs('0010', fname="opc"), d16rnpdec, rm, bs('0101')], [rm, d16rnpdec]) +addop("mov_l", + [bs('0010', fname="opc"), d32rnpdec, rm, bs('0110')], [rm, d32rnpdec]) +addop("mov_b", + [bs('0110', fname="opc"), rn, d08rmpinc, bs('0100')], [rm, d08rnpinc]) +addop("mov_w", + [bs('0110', fname="opc"), rn, d16rmpinc, bs('0101')], [d16rmpinc, rn]) +addop("mov_l", + [bs('0110', fname="opc"), rn, d32rmpinc, bs('0110')], [d32rmpinc, rn]) +addop("mov_b", [bs('10000000', fname='opc'), bs_r0, d08rnimm, dimm4]) +addop("mov_w", [bs('10000001', fname='opc'), bs_r0, d16rnimm, dimm4]) +addop("mov_l", [bs('0001', fname='opc'), d32rnimm, rm, dimm4], [rm, d32rnimm]) +addop("mov_b", [bs('10000100', fname='opc'), d08rmimm, dimm4, bs_r0]) +addop("mov_w", [bs('10000101', fname='opc'), d16rmimm, dimm4, bs_r0]) +addop("mov_l", [bs('0101', fname='opc'), rn, d32rmimm, dimm4], [d32rmimm, rn]) +addop("mov_b", + [bs('0000', fname='opc'), bd08r0gp, rm, bs('0100')], [rm, bd08r0gp]) +addop("mov_w", + [bs('0000', fname='opc'), bd16r0gp, rm, bs('0101')], [rm, bd16r0gp]) +addop("mov_l", + [bs('0000', fname='opc'), bd32r0gp, rm, bs('0110')], [rm, bd32r0gp]) +addop("mov_b", + [bs('0000', fname='opc'), rn, bd08r0gp, bs('1100')], [bd08r0gp, rn]) +addop("mov_w", + [bs('0000', fname='opc'), rn, bd16r0gp, bs('1101')], [bd16r0gp, rn]) +addop("mov_l", + [bs('0000', fname='opc'), rn, bd32r0gp, bs('1110')], [bd32r0gp, rn]) + +addop("mov_b", [bs('11000000'), bs_r0, d08gbrimm8]) +addop("mov_w", [bs('11000001'), bs_r0, d16gbrimm8]) +addop("mov_l", [bs('11000010'), bs_r0, d32gbrimm8]) + +addop("mov_b", [bs('11000100'), d08gbrimm8, bs_r0]) +addop("mov_w", [bs('11000101'), d16gbrimm8, bs_r0]) +addop("mov_l", [bs('11000110'), d32gbrimm8, bs_r0]) + +addop("mov", [bs('11000111'), pc32imm, bs_r0]) + +addop("swapb", [bs('0110'), rn, rm, bs('1000')], [rm, rn]) +addop("swapw", [bs('0110'), rn, rm, bs('1001')], [rm, rn]) +addop("xtrct", [bs('0010'), rn, rm, bs('1101')], [rm, rn]) + + +addop("add", [bs('0011'), rn, rm, bs('1100')], [rm, rn]) +addop("add", [bs('0111'), rn, s08imm], [s08imm, rn]) +addop("addc", [bs('0011'), rn, rm, bs('1110')], [rm, rn]) +addop("addv", [bs('0011'), rn, rm, bs('1111')], [rm, rn]) + + +addop("cmpeq", [bs('10001000'), s08imm, bs_r0]) + + +addop("cmpeq", [bs('0011'), rn, rm, bs('0000')], [rm, rn]) +addop("cmphs", [bs('0011'), rn, rm, bs('0010')], [rm, rn]) +addop("cmpge", [bs('0011'), rn, rm, bs('0011')], [rm, rn]) +addop("cmphi", [bs('0011'), rn, rm, bs('0110')], [rm, rn]) +addop("cmpgt", [bs('0011'), rn, rm, bs('0111')], [rm, rn]) + + +addop("cmppz", [bs('0100'), rn, bs('00010001')]) +addop("cmppl", [bs('0100'), rn, bs('00010101')]) +addop("cmpstr", [bs('0010'), rn, rm, bs('1100')], [rm, rn]) + + +addop("div1", [bs('0011'), rn, rm, bs('0100')], [rm, rn]) + +addop("div0s", [bs('0010'), rn, rm, bs('0111')], [rm, rn]) +addop("div0u", [bs('0000000000011001')]) + +addop("dmuls", [bs('0011'), rn, rm, bs('1101')], [rm, rn]) +addop("dmulu", [bs('0011'), rn, rm, bs('0101')], [rm, rn]) + +addop("dt", [bs('0100'), rn, bs('00010000')]) + + +addop("extsb", [bs('0110'), rn, rm, bs('1110')], [rm, rn]) +addop("extsw", [bs('0110'), rn, rm, bs('1111')], [rm, rn]) +addop("extub", [bs('0110'), rn, rm, bs('1100')], [rm, rn]) +addop("extuw", [bs('0110'), rn, rm, bs('1101')], [rm, rn]) + +addop("mac_l", [bs('0000', fname='opc'), d32rnpinc, + d32rmpinc, bs('1111')], [d32rmpinc, d32rnpinc]) +addop("mac_w", [bs('0100', fname='opc'), d16rnpinc, + d16rmpinc, bs('1111')], [d16rmpinc, d16rnpinc]) + +addop("mull", [bs('0000'), rn, rm, bs('0111')], [rm, rn]) +addop("mulsw", [bs('0010'), rn, rm, bs('1111')], [rm, rn]) +addop("muluw", [bs('0010'), rn, rm, bs('1110')], [rm, rn]) + +addop("neg", [bs('0110'), rn, rm, bs('1011')], [rm, rn]) +addop("negc", [bs('0110'), rn, rm, bs('1010')], [rm, rn]) + +addop("sub", [bs('0011'), rn, rm, bs('1000')], [rm, rn]) +addop("subc", [bs('0011'), rn, rm, bs('1010')], [rm, rn]) +addop("subv", [bs('0011'), rn, rm, bs('1011')], [rm, rn]) + +addop("and", [bs('0010'), rn, rm, bs('1001')], [rm, rn]) +addop("and", [bs('11001001'), u08imm, bs_r0]) +addop("and_b", [bs('11001101'), u08imm, dr0gbr]) + +addop("not", [bs('0110'), rn, rm, bs('0111')], [rm, rn]) + +addop("or", [bs('0010'), rn, rm, bs('1011')], [rm, rn]) + +addop("or", [bs('11001011'), u08imm, bs_r0]) +addop("or_b", [bs('11001111'), u08imm, dr0gbr]) + +addop("tas_b", [bs('0100'), d08gpreg, bs('00011011')]) +addop("tst", [bs('0010'), rn, rm, bs('1000')], [rm, rn]) +addop("tst", [bs('11001000'), u08imm, bs_r0]) +addop("tst_b", [bs('11001100'), u08imm, dr0gbr]) + + +addop("xor", [bs('0010'), rn, rm, bs('1010')], [rm, rn]) +addop("xor", [bs('11001010'), u08imm, bs_r0]) +addop("xor_b", [bs('11001110'), u08imm, dr0gbr]) + +addop("rotl", [bs('0100'), rn, bs('00000100')]) +addop("rotr", [bs('0100'), rn, bs('00000101')]) +addop("rotcl", [bs('0100'), rn, bs('00100100')]) +addop("rotcr", [bs('0100'), rn, bs('00100101')]) + +addop("shad", [bs('0100'), rn, rm, bs('1100')], [rm, rn]) +addop("shal", [bs('0100'), rn, bs('00100000')]) +addop("shar", [bs('0100'), rn, bs('00100001')]) +addop("shld", [bs('0100'), rn, rm, bs('1101')], [rm, rn]) + +addop("shll", [bs('0100'), rn, bs('00000000')]) +addop("shlr", [bs('0100'), rn, bs('00000001')]) +addop("shll2", [bs('0100'), rn, bs('00001000')]) +addop("shlr2", [bs('0100'), rn, bs('00001001')]) +addop("shll8", [bs('0100'), rn, bs('00011000')]) +addop("shlr8", [bs('0100'), rn, bs('00011001')]) +addop("shll16", [bs('0100'), rn, bs('00101000')]) +addop("shlr16", [bs('0100'), rn, bs('00101001')]) + + +addop("bf", [bs('10001011'), s08imm]) +""" + def splitflow(self): + return True + def breakflow(self): + return True + def dstflow(self): + return True + def dstflow2label(self, loc_db): + e = self.args[0].expr + ad = e.arg*2+4+self.offset + l = loc_db.get_or_create_offset_location(ad) + s = ExprId(l, e.size) + self.args[0].expr = s +""" + +addop("bfs", [bs('10001111'), s08imm]) +""" + delayslot = 1 +""" +addop("bt", [bs('10001001'), s08imm]) + +addop("bts", [bs('10001101'), s08imm]) + +addop("bra", [bs('1010'), s12imm]) +""" + delayslot = 1 + def breakflow(self): + return True + def dstflow(self): + return True + def dstflow2label(self, loc_db): + e = self.args[0].expr + ad = e.arg*2+4+self.offset + l = loc_db.get_or_create_offset_location(ad) + s = ExprId(l, e.size) + self.args[0].expr = s +""" + +addop("braf", [bs('0000'), rn, bs('00100011')]) +""" + delayslot = 1 + def breakflow(self): + return True + def dstflow(self): + return True +""" +addop("bsr", [bs('1011'), s12imm]) + +addop("bsrf", [bs('0000'), rn, bs('00000011')]) +""" + delayslot = 1 + def breakflow(self): + return True + def is_subcall(self): + return True + def splitflow(self): + return True +""" + +addop("jmp_l", [bs('0100'), d32gpreg, bs('00101011')]) +""" + delayslot = 1 + def breakflow(self): + return True +""" + +addop("jsr_l", [bs('0100'), d32gpreg, bs('00001011')]) +""" + delayslot = 1 + def breakflow(self): + return True + def is_subcall(self): + return True + def splitflow(self): + return True +""" + +addop("rts", [bs('0000000000001011')]) +""" + delayslot = 1 + def breakflow(self): + return True +""" +addop("clrmac", [bs('0000000000101000')]) +addop("clrs", [bs('0000000001001000')]) +addop("clrt", [bs('0000000000001000')]) + + +addop("ldc", [bs('0100'), rm, bs_sr, bs('00001110')]) +addop("ldc", [bs('0100'), rm, bs_gbr, bs('00011110')]) +addop("ldc", [bs('0100'), rm, bs_vbr, bs('00101110')]) +addop("ldc", [bs('0100'), rm, bs_ssr, bs('00111110')]) +addop("ldc", [bs('0100'), rm, bs_spc, bs('01001110')]) +addop("ldc", [bs('0100'), rm, bs_dbr, bs('11111010')]) +addop("ldc", [bs('0100'), rm, bs('1'), brn, bs('1110')], [rm, brn]) +addop("ldc_l", [bs('0100'), d32rmpinc, bs_sr, bs('00000111')]) +addop("ldc_l", [bs('0100'), d32rmpinc, bs_gbr, bs('00010111')]) +addop("ldc_l", [bs('0100'), d32rmpinc, bs_vbr, bs('00100111')]) +addop("ldc_l", [bs('0100'), d32rmpinc, bs_ssr, bs('00110111')]) +addop("ldc_l", [bs('0100'), d32rmpinc, bs_spc, bs('01000111')]) +addop("ldc_l", [bs('0100'), d32rmpinc, bs_dbr, bs('11110110')]) +addop("ldc_l", [bs('0100'), d32rmpinc, bs('1'), brn, bs('0111')]) +addop("lds", [bs('0100'), rm, bs_mach, bs('00001010')]) +addop("lds", [bs('0100'), rm, bs_macl, bs('00011010')]) +addop("lds", [bs('0100'), rm, bs_pr, bs('00101010')]) +addop("lds_l", [bs('0100'), d32rmpinc, bs_mach, bs('00000110')]) +addop("lds_l", [bs('0100'), d32rmpinc, bs_macl, bs('00010110')]) +addop("lds_l", [bs('0100'), d32rmpinc, bs_pr, bs('00100110')]) +addop("ldtlb", [bs('0000000000111000')]) + +addop("movca_l", [bs('0000'), bs_r0, d32gpreg, bs('11000011')]) +addop("nop", [bs('0000000000001001')]) +addop("ocbi_l", [bs('0000'), d32gpreg, bs('10010011')]) +addop("ocbp_l", [bs('0000'), d32gpreg, bs('10100011')]) +addop("ocbwb_l", [bs('0000'), d32gpreg, bs('10110011')]) +addop("pref_l", [bs('0000'), d32gpreg, bs('10000011')]) + + +addop("rte", [bs('0000000000101011')]) +addop("sets", [bs('0000000001011000')]) +addop("sett", [bs('0000000000011000')]) +addop("sleep", [bs('0000000000011011')]) +addop("stc", [bs('0000'), bs_sr, rn, bs('00000010')]) +addop("stc", [bs('0000'), bs_gbr, rn, bs('00010010')]) +addop("stc", [bs('0000'), bs_vbr, rn, bs('00100010')]) +addop("stc", [bs('0000'), bs_ssr, rn, bs('00110010')]) +addop("stc", [bs('0000'), bs_spc, rn, bs('01000010')]) +addop("stc", [bs('0000'), bs_sgr, rn, bs('00111010')]) +addop("stc", [bs('0000'), bs_dbr, rn, bs('11111010')]) +addop("stc", [bs('0000'), rn, bs('1'), brm, bs('0010')], [brm, rn]) + +addop("stc_l", [bs('0100'), bs_sr, d32rmpdec, bs('00000011')]) +addop("stc_l", [bs('0100'), bs_gbr, d32rmpdec, bs('00010011')]) +addop("stc_l", [bs('0100'), bs_vbr, d32rmpdec, bs('00100011')]) +addop("stc_l", [bs('0100'), bs_ssr, d32rmpdec, bs('00110011')]) +addop("stc_l", [bs('0100'), bs_spc, d32rmpdec, bs('01000011')]) +addop("stc_l", [bs('0100'), bs_sgr, d32rmpdec, bs('00110010')]) +addop("stc_l", [bs('0100'), bs_dbr, d32rmpdec, bs('11110010')]) +addop("stc_l", + [bs('0100'), d32rnpdec, bs('1'), brm, bs('0011')], [brm, d32rnpdec]) + +# float +addop("sts", [bs('0000'), bs_mach, rm, bs('00001010')]) +addop("sts", [bs('0000'), bs_macl, rm, bs('00011010')]) +addop("sts", [bs('0000'), bs_pr, rm, bs('00101010')]) +addop("sts_l", [bs('0100'), bs_mach, d32rmpdec, bs('00000010')]) +addop("sts_l", [bs('0100'), bs_macl, d32rmpdec, bs('00010010')]) +addop("sts_l", + [bs('0100'), d32rnpdec, bs_pr, bs('00100010')], [bs_pr, d32rnpdec]) +addop("trapa", [bs('11000011'), u08imm]) + +addop("fldi0", [bs('1111'), frn, bs('10001101')]) +addop("fldi1", [bs('1111'), frn, bs('10011101')]) +addop("fmov", [bs('1111'), frn, frm, bs('1100')], [frm, frn]) +addop("fmov_s", [bs('1111'), frn, d32gpreg, bs('1000')], [d32gpreg, frn]) +addop("fmov_s", [bs('1111'), frn, bd32r0gp, bs('0110')], [bd32r0gp, frn]) +addop("fmov_s", [bs('1111'), frn, d32rmpinc, bs('1001')], [d32rmpinc, frn]) +addop("fmov_s", [bs('1111'), d32gpreg, frm, bs('1010')], [frm, d32gpreg]) +addop("fmov_s", [bs('1111'), d32rnpdec, frm, bs('1011')], [frm, d32rnpdec]) +addop("fmov_s", [bs('1111'), bd32r0gp, frm, bs('0111')], [frm, bd32r0gp]) + +addop("flds", [bs('1111'), frm, bs_fpul, bs('00011101')]) +addop("fsts", [bs('1111'), bs_fpul, frm, bs('00001101')]) +addop("fabs", [bs('1111'), frn, bs('01011101')]) +addop("fadd", [bs('1111'), frn, frm, bs('0000')], [frm, frn]) +addop("fcmpeq", [bs('1111'), frn, frm, bs('0100')], [frm, frn]) +addop("fcmpgt", [bs('1111'), frn, frm, bs('0101')], [frm, frn]) +addop("fdiv", [bs('1111'), frn, frm, bs('0011')], [frm, frn]) + +addop("float", [bs('1111'), bs_fpul, frn, bs('00101101')]) +addop("fmac", [bs('1111'), bs_fr0, frn, frm, bs('1110')], [bs_fr0, frm, frn]) +addop("fmul", [bs('1111'), frn, frm, bs('0010')], [frm, frn]) +addop("fneg", [bs('1111'), frn, bs('01001101')]) +addop("fsqrt", [bs('1111'), frn, bs('01101101')]) +addop("fsub", [bs('1111'), frn, frm, bs('0001')], [frm, frn]) +addop("ftrc", [bs('1111'), frm, bs_fpul, bs('00111101')]) diff --git a/miasm/arch/sh4/regs.py b/miasm/arch/sh4/regs.py new file mode 100644 index 00000000..8a7e1881 --- /dev/null +++ b/miasm/arch/sh4/regs.py @@ -0,0 +1,84 @@ +from builtins import range +from miasm.expression.expression import * +from miasm.core.cpu import reg_info, gen_reg + +# GP +gpregs_str = ['R%d' % r for r in range(0x10)] +gpregs_expr = [ExprId(x, 32) for x in gpregs_str] +gpregs = reg_info(gpregs_str, gpregs_expr) + +bgpregs_str = ['R%d_BANK' % r for r in range(0x8)] +bgpregs_expr = [ExprId(x, 32) for x in bgpregs_str] +bgpregs = reg_info(bgpregs_str, bgpregs_expr) + +fregs_str = ['FR%d' % r for r in range(0x10)] +fregs_expr = [ExprId(x, 32) for x in fregs_str] +fregs = reg_info(fregs_str, fregs_expr) + +dregs_str = ['DR%d' % r for r in range(0x8)] +dregs_expr = [ExprId(x, 32) for x in dregs_str] +dregs = reg_info(dregs_str, dregs_expr) + + +PC, reg_info_pc = gen_reg('PC') +PR, reg_info_pr = gen_reg('PR') +R0, reg_info_r0 = gen_reg('R0') +GBR, reg_info_gbr = gen_reg('GBR') +SR, reg_info_sr = gen_reg('SR') +VBR, reg_info_vbr = gen_reg('VBR') +SSR, reg_info_ssr = gen_reg('SSR') +SPC, reg_info_spc = gen_reg('SPC') +SGR, reg_info_sgr = gen_reg('SGR') +DBR, reg_info_dbr = gen_reg('DBR') +MACH, reg_info_mach = gen_reg('MACH') +MACL, reg_info_macl = gen_reg('MACL') +FPUL, reg_info_fpul = gen_reg('FPUL') +FR0, reg_info_fr0 = gen_reg('FR0') + +R0 = gpregs_expr[0] +R1 = gpregs_expr[1] +R2 = gpregs_expr[2] +R3 = gpregs_expr[3] +R4 = gpregs_expr[4] +R5 = gpregs_expr[5] +R6 = gpregs_expr[6] +R7 = gpregs_expr[7] +R8 = gpregs_expr[8] +R9 = gpregs_expr[9] +R10 = gpregs_expr[10] +R11 = gpregs_expr[11] +R12 = gpregs_expr[12] +R13 = gpregs_expr[13] +R14 = gpregs_expr[14] +R15 = gpregs_expr[15] + + +reg_zf = 'zf' +reg_nf = 'nf' +reg_of = 'of' +reg_cf = 'cf' + +zf = ExprId(reg_zf, size=1) +nf = ExprId(reg_nf, size=1) +of = ExprId(reg_of, size=1) +cf = ExprId(reg_cf, size=1) + + +all_regs_ids = [ + R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, + zf, nf, of, cf, + + PC, PR, R0, GBR, SR, VBR, SSR, SPC, + SGR, DBR, MACH, MACL, FPUL, FR0] + +all_regs_ids_no_alias = all_regs_ids + +all_regs_ids_byname = dict([(x.name, x) for x in all_regs_ids]) + +all_regs_ids_init = [ExprId("%s_init" % x.name, x.size) for x in all_regs_ids] + +regs_init = {} +for i, r in enumerate(all_regs_ids): + regs_init[r] = all_regs_ids_init[i] + +regs_flt_expr = [] diff --git a/miasm/arch/x86/__init__.py b/miasm/arch/x86/__init__.py new file mode 100644 index 00000000..bbad893b --- /dev/null +++ b/miasm/arch/x86/__init__.py @@ -0,0 +1 @@ +__all__ = ["arch", "disasm", "regs", "sem"] diff --git a/miasm/arch/x86/arch.py b/miasm/arch/x86/arch.py new file mode 100644 index 00000000..a82fac02 --- /dev/null +++ b/miasm/arch/x86/arch.py @@ -0,0 +1,4637 @@ +#-*- coding:utf-8 -*- + +from __future__ import print_function +from builtins import range +import re + +from future.utils import viewitems + +from miasm.core.utils import int_to_byte +from miasm.expression.expression import * +from pyparsing import * +from miasm.core.cpu import * +from collections import defaultdict +import miasm.arch.x86.regs as regs_module +from miasm.arch.x86.regs import * +from miasm.core.asm_ast import AstNode, AstInt, AstId, AstMem, AstOp + + +log = logging.getLogger("x86_arch") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.WARN) + +conditional_branch = ["JO", "JNO", "JB", "JAE", + "JZ", "JNZ", "JBE", "JA", + "JS", "JNS", "JPE", "JNP", + #"L", "NL", "NG", "G"] + "JL", "JGE", "JLE", "JG", + "JCXZ", "JECXZ", "JRCXZ"] + +unconditional_branch = ['JMP', 'JMPF'] + +f_isad = "AD" +f_s08 = "S08" +f_u08 = "U08" +f_s16 = "S16" +f_u16 = "U16" +f_s32 = "S32" +f_u32 = "U32" +f_s64 = "S64" +f_u64 = "U64" +f_imm = 'IMM' + +f_imm2size = {f_s08: 8, f_s16: 16, f_s32: 32, f_s64: 64, + f_u08: 8, f_u16: 16, f_u32: 32, f_u64: 64} + + +size2gpregs = {8: gpregs08, 16: gpregs16, + 32: gpregs32, 64: gpregs64} + + +replace_regs64 = { + AL: RAX[:8], CL: RCX[:8], DL: RDX[:8], BL: RBX[:8], + AH: RAX[8:16], CH: RCX[8:16], DH: RDX[8:16], BH: RBX[8:16], + SPL: RSP[0:8], BPL: RBP[0:8], SIL: RSI[0:8], DIL: RDI[0:8], + R8B: R8[0:8], R9B: R9[0:8], R10B: R10[0:8], R11B: R11[0:8], + R12B: R12[0:8], R13B: R13[0:8], R14B: R14[0:8], R15B: R15[0:8], + + AX: RAX[:16], CX: RCX[:16], DX: RDX[:16], BX: RBX[:16], + SP: RSP[:16], BP: RBP[:16], SI: RSI[:16], DI: RDI[:16], + R8W: R8[:16], R9W: R9[:16], R10W: R10[:16], R11W: R11[:16], + R12W: R12[:16], R13W: R13[:16], R14W: R14[:16], R15W: R15[:16], + + EAX: RAX[:32], ECX: RCX[:32], EDX: RDX[:32], EBX: RBX[:32], + ESP: RSP[:32], EBP: RBP[:32], ESI: RSI[:32], EDI: RDI[:32], + R8D: R8[:32], R9D: R9[:32], R10D: R10[:32], R11D: R11[:32], + R12D: R12[:32], R13D: R13[:32], R14D: R14[:32], R15D: R15[:32], + + IP: RIP[:16], EIP: RIP[:32], + + ExprId("ST", 64): float_st0, + ExprId("ST(0)", 64): float_st0, + ExprId("ST(1)", 64): float_st1, + ExprId("ST(2)", 64): float_st2, + ExprId("ST(3)", 64): float_st3, + ExprId("ST(4)", 64): float_st4, + ExprId("ST(5)", 64): float_st5, + ExprId("ST(6)", 64): float_st6, + ExprId("ST(7)", 64): float_st7, + +} + +replace_regs32 = { + AL: EAX[:8], CL: ECX[:8], DL: EDX[:8], BL: EBX[:8], + AH: EAX[8:16], CH: ECX[8:16], DH: EDX[8:16], BH: EBX[8:16], + + AX: EAX[:16], CX: ECX[:16], DX: EDX[:16], BX: EBX[:16], + SP: ESP[:16], BP: EBP[:16], SI: ESI[:16], DI: EDI[:16], + + IP: EIP[:16], + + + ExprId("ST", 64): float_st0, + ExprId("ST(0)", 64): float_st0, + ExprId("ST(1)", 64): float_st1, + ExprId("ST(2)", 64): float_st2, + ExprId("ST(3)", 64): float_st3, + ExprId("ST(4)", 64): float_st4, + ExprId("ST(5)", 64): float_st5, + ExprId("ST(6)", 64): float_st6, + ExprId("ST(7)", 64): float_st7, + +} + +replace_regs16 = { + AL: AX[:8], CL: CX[:8], DL: DX[:8], BL: BX[:8], + AH: AX[8:16], CH: CX[8:16], DH: DX[8:16], BH: BX[8:16], + + AX: AX[:16], CX: CX[:16], DX: DX[:16], BX: BX[:16], + SP: SP[:16], BP: BP[:16], SI: SI[:16], DI: DI[:16], + + + ExprId("ST", 64): float_st0, + ExprId("ST(0)", 64): float_st0, + ExprId("ST(1)", 64): float_st1, + ExprId("ST(2)", 64): float_st2, + ExprId("ST(3)", 64): float_st3, + ExprId("ST(4)", 64): float_st4, + ExprId("ST(5)", 64): float_st5, + ExprId("ST(6)", 64): float_st6, + ExprId("ST(7)", 64): float_st7, + +} + +replace_regs = {16: replace_regs16, + 32: replace_regs32, + 64: replace_regs64} + + +segm2enc = {CS: 1, SS: 2, DS: 3, ES: 4, FS: 5, GS: 6} +enc2segm = dict((value, key) for key, value in viewitems(segm2enc)) + +segm_info = reg_info_dct(enc2segm) + + + +enc2crx = { + 0: cr0, + 1: cr1, + 2: cr2, + 3: cr3, + 4: cr4, + 5: cr5, + 6: cr6, + 7: cr7, +} + +crx_info = reg_info_dct(enc2crx) + + +enc2drx = { + 0: dr0, + 1: dr1, + 2: dr2, + 3: dr3, + 4: dr4, + 5: dr5, + 6: dr6, + 7: dr7, +} + +drx_info = reg_info_dct(enc2drx) + + + +# parser helper ########### +PLUS = Suppress("+") +MULT = Suppress("*") + +COLON = Suppress(":") + + +LBRACK = Suppress("[") +RBRACK = Suppress("]") + + +gpreg = ( + gpregs08.parser | + gpregs08_64.parser | + gpregs16.parser | + gpregs32.parser | + gpregs64.parser | + gpregs_xmm.parser | + gpregs_mm.parser | + gpregs_bnd.parser +) + + + + +def cb_deref_segmoff(tokens): + assert len(tokens) == 2 + return AstOp('segm', tokens[0], tokens[1]) + + +def cb_deref_base_expr(tokens): + tokens = tokens[0] + assert isinstance(tokens, AstNode) + addr = tokens + return addr + + +deref_mem_ad = (LBRACK + base_expr + RBRACK).setParseAction(cb_deref_base_expr) + +deref_ptr = (base_expr + COLON + base_expr).setParseAction(cb_deref_segmoff) + + +PTR = Suppress('PTR') + +FAR = Suppress('FAR') + + +BYTE = Literal('BYTE') +WORD = Literal('WORD') +DWORD = Literal('DWORD') +QWORD = Literal('QWORD') +TBYTE = Literal('TBYTE') +XMMWORD = Literal('XMMWORD') + +MEMPREFIX2SIZE = {'BYTE': 8, 'WORD': 16, 'DWORD': 32, + 'QWORD': 64, 'TBYTE': 80, 'XMMWORD': 128} + +SIZE2MEMPREFIX = dict((value, key) for key, value in viewitems(MEMPREFIX2SIZE)) + +def cb_deref_mem(tokens): + if len(tokens) == 2: + s, ptr = tokens + assert isinstance(ptr, AstNode) + return AstMem(ptr, MEMPREFIX2SIZE[s]) + elif len(tokens) == 3: + s, segm, ptr = tokens + return AstMem(AstOp('segm', segm, ptr), MEMPREFIX2SIZE[s]) + raise ValueError('len(tokens) > 3') + +mem_size = (BYTE | DWORD | QWORD | WORD | TBYTE | XMMWORD) +deref_mem = (mem_size + PTR + Optional((base_expr + COLON))+ deref_mem_ad).setParseAction(cb_deref_mem) + + +rmarg = ( + gpregs08.parser | + gpregs08_64.parser | + gpregs16.parser | + gpregs32.parser | + gpregs64.parser | + gpregs_mm.parser | + gpregs_xmm.parser | + gpregs_bnd.parser +) + +rmarg |= deref_mem + + +mem_far = FAR + deref_mem + + +cl_or_imm = r08_ecx.parser +cl_or_imm |= base_expr + + + +class x86_arg(m_arg): + def asm_ast_to_expr(self, value, loc_db, size_hint=None, fixed_size=None): + if size_hint is None: + size_hint = self.parent.mode + if fixed_size is None: + fixed_size = set() + if isinstance(value, AstId): + if value.name in all_regs_ids_byname: + reg = all_regs_ids_byname[value.name] + fixed_size.add(reg.size) + return reg + if isinstance(value.name, ExprId): + fixed_size.add(value.name.size) + return value.name + if value.name in MEMPREFIX2SIZE: + return None + if value.name in ["FAR"]: + return None + + loc_key = loc_db.get_or_create_name_location(value.name.encode()) + return ExprLoc(loc_key, size_hint) + if isinstance(value, AstOp): + # First pass to retrieve fixed_size + if value.op == "segm": + segm = self.asm_ast_to_expr(value.args[0], loc_db) + ptr = self.asm_ast_to_expr(value.args[1], loc_db, None, fixed_size) + return ExprOp('segm', segm, ptr) + args = [self.asm_ast_to_expr(arg, loc_db, None, fixed_size) for arg in value.args] + if len(fixed_size) == 0: + # No fixed size + pass + elif len(fixed_size) == 1: + # One fixed size, regen all + size = list(fixed_size)[0] + args = [self.asm_ast_to_expr(arg, loc_db, size, fixed_size) for arg in value.args] + else: + raise ValueError("Size conflict") + if None in args: + return None + return ExprOp(value.op, *args) + if isinstance(value, AstInt): + if 1 << size_hint < value.value: + size_hint *= 2 + return ExprInt(value.value, size_hint) + if isinstance(value, AstMem): + fixed_size.add(value.size) + ptr = self.asm_ast_to_expr(value.ptr, loc_db, None, set()) + if ptr is None: + return None + return ExprMem(ptr, value.size) + return None + +class r_al(reg_noarg, x86_arg): + reg_info = r08_eax + parser = reg_info.parser + + +class r_ax(reg_noarg, x86_arg): + reg_info = r16_eax + parser = reg_info.parser + + +class r_dx(reg_noarg, x86_arg): + reg_info = r16_edx + parser = reg_info.parser + + +class r_eax(reg_noarg, x86_arg): + reg_info = r32_eax + parser = reg_info.parser + + +class r_rax(reg_noarg, x86_arg): + reg_info = r64_eax + parser = reg_info.parser + + +class r_cl(reg_noarg, x86_arg): + reg_info = r08_ecx + parser = reg_info.parser + + +invmode = {16: 32, 32: 16} + + +def opmode_prefix(mode): + size, opmode, admode = mode + if size in [16, 32]: + if opmode: + return invmode[size] + else: + return size + elif size == 64: + if opmode: + return 16 + else: + return 32 + raise NotImplementedError('not fully functional') + + +def admode_prefix(mode): + size, opmode, admode = mode + if size in [16, 32]: + if admode: + return invmode[size] + else: + return size + elif size == 64: + return 64 + raise NotImplementedError('not fully functional') + + +def v_opmode_info(size, opmode, rex_w, stk): + if size in [16, 32]: + if opmode: + return invmode[size] + else: + return size + elif size == 64: + # Rex has the maximum priority + # Then opmode + # Then stacker + if rex_w == 1: + return 64 + elif opmode == 1: + return 16 + elif stk: + return 64 + else: + return 32 + + +def v_opmode(p): + stk = hasattr(p, 'stk') + return v_opmode_info(p.mode, p.opmode, p.rex_w.value, stk) + + +def v_admode_info(size, admode): + if size in [16, 32]: + if admode: + return invmode[size] + else: + return size + elif size == 64: + if admode == 1: + return 32 + return 64 + + +def v_admode(p): + return v_admode_info(p.mode, p.admode) + + +def offsize(p): + if p.opmode: + return 16 + else: + return p.mode + + +def get_prefix(s): + g = re.search('(\S+)(\s+)', s) + if not g: + return None, s + prefix, b = g.groups() + return prefix, s[len(prefix) + len(b):] + + +repeat_mn = ["INS", "OUTS", + "MOVSB", "MOVSW", "MOVSD", "MOVSQ", + "SCASB", "SCASW", "SCASD", "SCASQ", + "LODSB", "LODSW", "LODSD", "LODSQ", + "STOSB", "STOSW", "STOSD", "STOSQ", + "CMPSB", "CMPSW", "CMPSD", "CMPSQ", + ] + + +class group(object): + + def __init__(self): + self.value = None + + +class additional_info(object): + + def __init__(self): + self.except_on_instr = False + self.g1 = group() + self.g2 = group() + self.vopmode = None + self.stk = False + self.v_opmode = None + self.v_admode = None + self.prefixed = b'' + + +class instruction_x86(instruction): + __slots__ = [] + delayslot = 0 + + def __init__(self, *args, **kargs): + super(instruction_x86, self).__init__(*args, **kargs) + + def v_opmode(self): + return self.additional_info.v_opmode + + def v_admode(self): + return self.additional_info.v_admode + + def dstflow(self): + if self.name in conditional_branch + unconditional_branch: + return True + if self.name.startswith('LOOP'): + return True + return self.name in ['CALL'] + + def dstflow2label(self, loc_db): + if self.additional_info.g1.value & 14 and self.name in repeat_mn: + return + expr = self.args[0] + if not expr.is_int(): + return + addr = expr.arg + int(self.offset) + loc_key = loc_db.get_or_create_offset_location(addr) + self.args[0] = ExprLoc(loc_key, expr.size) + + def breakflow(self): + if self.name in conditional_branch + unconditional_branch: + return True + if self.name.startswith('LOOP'): + return True + if self.name.startswith('RET'): + return True + if self.name.startswith('INT'): + return True + if self.name.startswith('SYS'): + return True + return self.name in ['CALL', 'HLT', 'IRET', 'IRETD', 'IRETQ', 'ICEBP'] + + def splitflow(self): + if self.name in conditional_branch: + return True + if self.name in unconditional_branch: + return False + if self.name.startswith('LOOP'): + return True + if self.name.startswith('INT'): + return True + if self.name.startswith('SYS'): + return True + return self.name in ['CALL'] + + def setdstflow(self, a): + return + + def is_subcall(self): + return self.name in ['CALL'] + + def getdstflow(self, loc_db): + if self.additional_info.g1.value & 14 and self.name in repeat_mn: + addr = int(self.offset) + loc_key = loc_db.get_or_create_offset_location(addr) + return [ExprLoc(loc_key, self.v_opmode())] + return [self.args[0]] + + def get_symbol_size(self, symbol, loc_db): + return self.mode + + def fixDstOffset(self): + expr = self.args[0] + if self.offset is None: + raise ValueError('symbol not resolved %s' % l) + if not isinstance(expr, ExprInt): + log.warning('dynamic dst %r', expr) + return + self.args[0] = ExprInt(int(expr) - self.offset, self.mode) + + def get_info(self, c): + self.additional_info.g1.value = c.g1.value + self.additional_info.g2.value = c.g2.value + self.additional_info.stk = hasattr(c, 'stk') + self.additional_info.v_opmode = c.v_opmode() + self.additional_info.v_admode = c.v_admode() + self.additional_info.prefix = c.prefix + self.additional_info.prefixed = getattr(c, "prefixed", b"") + + def __str__(self): + return self.to_string() + + def to_string(self, loc_db=None): + o = super(instruction_x86, self).to_string(loc_db) + if self.additional_info.g1.value & 1: + o = "LOCK %s" % o + if self.additional_info.g1.value & 2: + if getattr(self.additional_info.prefixed, 'default', b"") != b"\xF2": + o = "REPNE %s" % o + if self.additional_info.g1.value & 8: + if getattr(self.additional_info.prefixed, 'default', b"") != b"\xF3": + o = "REP %s" % o + elif self.additional_info.g1.value & 4: + if getattr(self.additional_info.prefixed, 'default', b"") != b"\xF3": + o = "REPE %s" % o + return o + + def get_args_expr(self): + args = [] + for a in self.args: + a = a.replace_expr(replace_regs[self.mode]) + args.append(a) + return args + + @staticmethod + def arg2str(expr, index=None, loc_db=None): + if expr.is_id() or expr.is_int(): + o = str(expr) + elif expr.is_loc(): + if loc_db is not None: + o = loc_db.pretty_str(expr.loc_key) + else: + o = str(expr) + elif ((isinstance(expr, ExprOp) and expr.op == 'far' and + isinstance(expr.args[0], ExprMem)) or + isinstance(expr, ExprMem)): + if isinstance(expr, ExprOp): + prefix, expr = "FAR ", expr.args[0] + else: + prefix = "" + sz = SIZE2MEMPREFIX[expr.size] + segm = "" + if expr.is_mem_segm(): + segm = "%s:" % expr.ptr.args[0] + expr = expr.ptr.args[1] + else: + expr = expr.ptr + if isinstance(expr, ExprOp): + s = str(expr).replace('(', '').replace(')', '') + else: + s = str(expr) + o = prefix + sz + ' PTR %s[%s]' % (segm, s) + elif isinstance(expr, ExprOp) and expr.op == 'segm': + o = "%s:%s" % (expr.args[0], expr.args[1]) + else: + raise ValueError('check this %r' % expr) + return "%s" % o + + + +class mn_x86(cls_mn): + name = "x86" + prefix_op_size = False + prefix_ad_size = False + regs = regs_module + all_mn = [] + all_mn_mode = defaultdict(list) + all_mn_name = defaultdict(list) + all_mn_inst = defaultdict(list) + bintree = {} + num = 0 + delayslot = 0 + pc = {16: IP, 32: EIP, 64: RIP} + sp = {16: SP, 32: ESP, 64: RSP} + instruction = instruction_x86 + max_instruction_len = 15 + + @classmethod + def getpc(cls, attrib): + return cls.pc[attrib] + + @classmethod + def getsp(cls, attrib): + return cls.sp[attrib] + + def v_opmode(self): + if hasattr(self, 'stk'): + stk = 1 + else: + stk = 0 + return v_opmode_info(self.mode, self.opmode, self.rex_w.value, stk) + + def v_admode(self): + size, opmode, admode = self.mode, self.opmode, self.admode + if size in [16, 32]: + if admode: + return invmode[size] + else: + return size + elif size == 64: + if admode == 1: + return 32 + return 64 + + def additional_info(self): + info = additional_info() + info.g1.value = self.g1.value + info.g2.value = self.g2.value + info.stk = hasattr(self, 'stk') + info.v_opmode = self.v_opmode() + info.prefixed = b"" + if hasattr(self, 'prefixed'): + info.prefixed = self.prefixed.default + return info + + @classmethod + def check_mnemo(cls, fields): + pass + + @classmethod + def getmn(cls, name): + return name.upper() + + @classmethod + def mod_fields(cls, fields): + prefix = [d_g1, d_g2, d_rex_p, d_rex_w, d_rex_r, d_rex_x, d_rex_b] + return prefix + fields + + @classmethod + def gen_modes(cls, subcls, name, bases, dct, fields): + dct['mode'] = None + return [(subcls, name, bases, dct, fields)] + + @classmethod + def fromstring(cls, text, loc_db, mode): + pref = 0 + prefix, new_s = get_prefix(text) + if prefix == "LOCK": + pref |= 1 + text = new_s + elif prefix == "REPNE" or prefix == "REPNZ": + pref |= 2 + text = new_s + elif prefix == "REPE" or prefix == "REPZ": + pref |= 4 + text = new_s + elif prefix == "REP": + pref |= 8 + text = new_s + c = super(mn_x86, cls).fromstring(text, loc_db, mode) + c.additional_info.g1.value = pref + return c + + @classmethod + def pre_dis(cls, v, mode, offset): + offset_o = offset + pre_dis_info = {'opmode': 0, + 'admode': 0, + 'g1': 0, + 'g2': 0, + 'rex_p': 0, + 'rex_w': 0, + 'rex_r': 0, + 'rex_x': 0, + 'rex_b': 0, + 'prefix': b"", + 'prefixed': b"", + } + while True: + c = v.getbytes(offset) + if c == b'\x66': + pre_dis_info['opmode'] = 1 + elif c == b'\x67': + pre_dis_info['admode'] = 1 + elif c == b'\xf0': + pre_dis_info['g1'] = 1 + elif c == b'\xf2': + pre_dis_info['g1'] = 2 + elif c == b'\xf3': + pre_dis_info['g1'] = 12 + + elif c == b'\x2e': + pre_dis_info['g2'] = 1 + elif c == b'\x36': + pre_dis_info['g2'] = 2 + elif c == b'\x3e': + pre_dis_info['g2'] = 3 + elif c == b'\x26': + pre_dis_info['g2'] = 4 + elif c == b'\x64': + pre_dis_info['g2'] = 5 + elif c == b'\x65': + pre_dis_info['g2'] = 6 + + else: + break + pre_dis_info['prefix'] += c + offset += 1 + if mode == 64 and c in b'@ABCDEFGHIJKLMNO': + x = ord(c) + pre_dis_info['rex_p'] = 1 + pre_dis_info['rex_w'] = (x >> 3) & 1 + pre_dis_info['rex_r'] = (x >> 2) & 1 + pre_dis_info['rex_x'] = (x >> 1) & 1 + pre_dis_info['rex_b'] = (x >> 0) & 1 + offset += 1 + elif pre_dis_info.get('g1', None) == 12 and c in [b'\xa6', b'\xa7', b'\xae', b'\xaf']: + pre_dis_info['g1'] = 4 + return pre_dis_info, v, mode, offset, offset - offset_o + + @classmethod + def get_cls_instance(cls, cc, mode, infos=None): + for opmode in [0, 1]: + for admode in [0, 1]: + c = cc() + c.init_class() + + c.reset_class() + c.add_pre_dis_info() + c.dup_info(infos) + c.mode = mode + c.opmode = opmode + c.admode = admode + + if not hasattr(c, 'stk') and hasattr(c, "fopmode") and c.fopmode.mode == 64: + c.rex_w.value = 1 + yield c + + def post_dis(self): + if self.g2.value: + for a in self.args: + if not isinstance(a.expr, ExprMem): + continue + m = a.expr + a.expr = ExprMem( + ExprOp('segm', enc2segm[self.g2.value], m.ptr), m.size) + return self + + def dup_info(self, infos): + if infos is not None: + self.g1.value = infos.g1.value + self.g2.value = infos.g2.value + + def reset_class(self): + super(mn_x86, self).reset_class() + if hasattr(self, "opmode"): + del(self.opmode) + if hasattr(self, "admode"): + del(self.admode) + + def add_pre_dis_info(self, pre_dis_info=None): + if pre_dis_info is None: + return True + if hasattr(self, "prefixed") and self.prefixed.default == b"\x66": + pre_dis_info['opmode'] = 0 + self.opmode = pre_dis_info['opmode'] + self.admode = pre_dis_info['admode'] + + if hasattr(self, 'no_xmm_pref') and\ + pre_dis_info['prefix'] and\ + pre_dis_info['prefix'][-1] in b'\x66\xf2\xf3': + return False + if (hasattr(self, "prefixed") and + not pre_dis_info['prefix'].endswith(self.prefixed.default)): + return False + if (self.rex_w.value is not None and + self.rex_w.value != pre_dis_info['rex_w']): + return False + else: + self.rex_w.value = pre_dis_info['rex_w'] + self.rex_r.value = pre_dis_info['rex_r'] + self.rex_b.value = pre_dis_info['rex_b'] + self.rex_x.value = pre_dis_info['rex_x'] + self.rex_p.value = pre_dis_info['rex_p'] + + if hasattr(self, 'no_rex') and\ + (self.rex_r.value or self.rex_b.value or + self.rex_x.value or self.rex_p.value): + return False + + + self.g1.value = pre_dis_info['g1'] + self.g2.value = pre_dis_info['g2'] + self.prefix = pre_dis_info['prefix'] + return True + + def post_asm(self, v): + return v + + + def gen_prefix(self): + v = b"" + rex = 0x40 + if self.g1.value is None: + self.g1.value = 0 + if self.g2.value is None: + self.g2.value = 0 + + if self.rex_w.value: + rex |= 0x8 + if self.rex_r.value: + rex |= 0x4 + if self.rex_x.value: + rex |= 0x2 + if self.rex_b.value: + rex |= 0x1 + if rex != 0x40 or self.rex_p.value == 1: + v = int_to_byte(rex) + v + if hasattr(self, 'no_rex'): + return None + + if hasattr(self, 'prefixed'): + v = self.prefixed.default + v + + if self.g1.value & 1: + v = b"\xf0" + v + if self.g1.value & 2: + if hasattr(self, 'no_xmm_pref'): + return None + v = b"\xf2" + v + if self.g1.value & 12: + if hasattr(self, 'no_xmm_pref'): + return None + v = b"\xf3" + v + if self.g2.value: + v = { + 1: b'\x2e', + 2: b'\x36', + 3: b'\x3e', + 4: b'\x26', + 5: b'\x64', + 6: b'\x65' + }[self.g2.value] + v + # mode prefix + if hasattr(self, "admode") and self.admode: + v = b"\x67" + v + + if hasattr(self, "opmode") and self.opmode: + if hasattr(self, 'no_xmm_pref'): + return None + v = b"\x66" + v + return v + + def encodefields(self, decoded): + v = super(mn_x86, self).encodefields(decoded) + prefix = self.gen_prefix() + if prefix is None: + return None + return prefix + v + + def getnextflow(self, loc_db): + raise NotImplementedError('not fully functional') + + def ir_pre_instruction(self): + return [ExprAssign(mRIP[self.mode], + ExprInt(self.offset + self.l, mRIP[self.mode].size))] + + @classmethod + def filter_asm_candidates(cls, instr, candidates): + + cand_same_mode = [] + cand_diff_mode = [] + out = [] + for c, v in candidates: + if (hasattr(c, 'no_xmm_pref') and + (c.g1.value & 2 or c.g1.value & 4 or c.g1.value & 8 or c.opmode)): + continue + if hasattr(c, "fopmode") and v_opmode(c) != c.fopmode.mode: + continue + if hasattr(c, "fadmode") and v_admode(c) != c.fadmode.mode: + continue + # relative dstflow must not have opmode set + # (assign IP instead of EIP for instance) + if (instr.dstflow() and + instr.name not in ["JCXZ", "JECXZ", "JRCXZ"] and + len(instr.args) == 1 and + isinstance(instr.args[0], ExprInt) and c.opmode): + continue + + out.append((c, v)) + candidates = out + for c, v in candidates: + if v_opmode(c) == instr.mode: + cand_same_mode += v + for c, v in candidates: + if v_opmode(c) != instr.mode: + cand_diff_mode += v + cand_same_mode.sort(key=len) + cand_diff_mode.sort(key=len) + return cand_same_mode + cand_diff_mode + + +class bs_modname_size(bs_divert): + prio = 1 + + def divert(self, i, candidates): + out = [] + for candidate in candidates: + cls, name, bases, dct, fields = candidate + fopmode = opmode_prefix( + (dct['mode'], dct['opmode'], dct['admode'])) + mode = dct['mode'] + size, opmode, admode = dct['mode'], dct['opmode'], dct['admode'] + # no mode64 exinstance in name means no 64bit version of mnemo + if mode == 64: + if mode in self.args['name']: + nfields = fields[:] + f, i = getfieldindexby_name(nfields, 'rex_w') + f = bs("1", l=0, cls=(bs_fbit,), fname="rex_w") + osize = v_opmode_info(size, opmode, 1, 0) + nfields[i] = f + nfields = nfields[:-1] + ndct = dict(dct) + if osize in self.args['name']: + ndct['name'] = self.args['name'][osize] + out.append((cls, ndct['name'], bases, ndct, nfields)) + + nfields = fields[:] + nfields = nfields[:-1] + f, i = getfieldindexby_name(nfields, 'rex_w') + f = bs("0", l=0, cls=(bs_fbit,), fname="rex_w") + osize = v_opmode_info(size, opmode, 0, 0) + nfields[i] = f + ndct = dict(dct) + if osize in self.args['name']: + ndct['name'] = self.args['name'][osize] + out.append((cls, ndct['name'], bases, ndct, nfields)) + else: + l = opmode_prefix((dct['mode'], dct['opmode'], dct['admode'])) + osize = v_opmode_info(size, opmode, None, 0) + nfields = fields[:-1] + ndct = dict(dct) + if osize in self.args['name']: + ndct['name'] = self.args['name'][osize] + out.append((cls, ndct['name'], bases, ndct, nfields)) + return out + + +class bs_modname_jecx(bs_divert): + prio = 1 + + def divert(self, i, candidates): + out = [] + for candidate in candidates: + cls, name, bases, dct, fields = candidate + fopmode = opmode_prefix( + (dct['mode'], dct['opmode'], dct['admode'])) + mode = dct['mode'] + size, opmode, admode = dct['mode'], dct['opmode'], dct['admode'] + + nfields = fields[:] + nfields = nfields[:-1] + args = dict(self.args) + ndct = dict(dct) + if mode == 64: + if admode: + ndct['name'] = "JECXZ" + else: + ndct['name'] = "JRCXZ" + elif mode == 32: + if admode: + ndct['name'] = "JCXZ" + else: + ndct['name'] = "JECXZ" + elif mode == 16: + if admode: + ndct['name'] = "JECXZ" + else: + ndct['name'] = "JCXZ" + else: + raise ValueError('unhandled mode') + out.append((cls, ndct['name'], bases, ndct, nfields)) + return out + + +class bs_modname_mode(bs_divert): + prio = 1 + + def divert(self, i, candidates): + out = [] + for candidate in candidates: + cls, name, bases, dct, fields = candidate + fopmode = opmode_prefix( + (dct['mode'], dct['opmode'], dct['admode'])) + size, opmode, admode = dct['mode'], dct['opmode'], dct['admode'] + + mode = dct['mode'] + l = opmode_prefix((dct['mode'], dct['opmode'], dct['admode'])) + osize = v_opmode_info(size, opmode, None, 0) + nfields = fields[:-1] + args = dict(self.args) + ndct = dict(dct) + if mode == 64 or osize == 32: + ndct['name'] = self.args['name'][mode] + else: + ndct['name'] = self.args['name'][16] + out.append((cls, ndct['name'], bases, ndct, nfields)) + return out + + +class x86_imm(imm_noarg): + parser = base_expr + + def decodeval(self, v): + return swap_uint(self.l, v) + + def encodeval(self, v): + return swap_uint(self.l, v) + + +class x86_imm_fix_08(imm_noarg): + parser = base_expr + intsize = 8 + intmask = (1 << intsize) - 1 + + def decodeval(self, v): + return self.ival + + def encode(self): + v = self.expr2int(self.expr) + if v != self.ival: + return False + self.value = 0 + return True + + +class x86_08(x86_imm): + intsize = 8 + intmask = (1 << intsize) - 1 + + +class x86_16(x86_imm): + intsize = 16 + intmask = (1 << intsize) - 1 + + +class x86_32(x86_imm): + intsize = 32 + intmask = (1 << intsize) - 1 + + +class x86_64(x86_imm): + intsize = 64 + intmask = (1 << intsize) - 1 + + +class x86_08_ne(x86_imm): + intsize = 8 + intmask = (1 << intsize) - 1 + + def encode(self): + return True + + def decode(self, v): + v = swap_uint(self.l, v) + p = self.parent + admode = p.v_admode() + value = sign_ext(v, self.intsize, admode) + self.expr = ExprInt(value, admode) + return True + + +class x86_16_ne(x86_08_ne): + intsize = 16 + intmask = (1 << intsize) - 1 + + +class x86_32_ne(x86_08_ne): + intsize = 32 + intmask = (1 << intsize) - 1 + + +class x86_64_ne(x86_08_ne): + intsize = 64 + intmask = (1 << intsize) - 1 + + +class x86_s08to16(x86_imm): + in_size = 8 + out_size = 16 + + def myexpr(self, x): + return ExprInt(x, 16) + + def int2expr(self, v): + return self.myexpr(v) + + def expr2int(self, e): + if not isinstance(e, ExprInt): + return None + v = int(e) + if v & ~((1 << self.l) - 1) != 0: + return None + return v + + def decode(self, v): + v = v & self.lmask + v = self.decodeval(v) + if self.parent.v_opmode() == 64: + self.expr = ExprInt(sign_ext(v, self.in_size, 64), 64) + else: + if (1 << (self.l - 1)) & v: + v = sign_ext(v, self.l, self.out_size) + self.expr = self.myexpr(v) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = int(self.expr) + opmode = self.parent.v_opmode() + + out_size = self.out_size + if opmode != self.out_size: + if opmode == 32 and self.out_size == 64: + out_size = opmode + if v == sign_ext( + int(v & ((1 << self.in_size) - 1)), self.in_size, out_size): + pass + else: + # test with rex_w + self.parent.rex_w.value = 1 + opmode = self.parent.v_opmode() + out_size = opmode + if (v != sign_ext( + int(v & ((1 << self.in_size) - 1)), + self.in_size, out_size)): + return False + if v != sign_ext( + int(v & ((1 << self.in_size) - 1)), self.in_size, out_size): + return False + v = self.encodeval(v) + self.value = (v & 0xffffffff) & self.lmask + return True + + def decodeval(self, v): + return swap_uint(self.l, v) + + def encodeval(self, v): + return swap_sint(self.l, v) + + +class x86_s08to32(x86_s08to16): + in_size = 8 + out_size = 32 + + def myexpr(self, x): + return ExprInt(x, 32) + + def decode(self, v): + v = v & self.lmask + v = self.decodeval(v) + if self.parent.rex_w.value == 1: + v = ExprInt(sign_ext(v, self.in_size, 64), 64) + else: + v = ExprInt(sign_ext(v, self.in_size, 32), 32) + + self.expr = v + return True + + +class x86_s08to64(x86_s08to32): + in_size = 8 + out_size = 64 + + def myexpr(self, x): + return ExprInt(x, 64) + + +class x86_s32to64(x86_s08to32): + in_size = 32 + out_size = 64 + + def myexpr(self, x): + return ExprInt(x, 64) + + +class bs_eax(x86_arg): + reg_info = r_eax_all + rindex = 0 + parser = reg_info.parser + + def decode(self, v): + p = self.parent + expr = None + if hasattr(p, 'w8') and p.w8.value == 0: + expr = regs08_expr[self.rindex] + else: + expr = size2gpregs[p.v_opmode()].expr[self.rindex] + self.expr = expr + return True + + def encode(self): + self.value = 0 + p = self.parent + expr = self.expr + osize = p.v_opmode() + if hasattr(p, 'w8'): + if p.w8.value is None: + # XXX TODO: priority in w8 erase? + if expr.size == 8: + p.w8.value = 0 + else: + p.w8.value = 1 + if hasattr(p, 'w8') and p.w8.value == 0: + return expr == regs08_expr[self.rindex] + elif p.mode in [16, 32]: + return expr == size2gpregs[osize].expr[self.rindex] + elif p.mode == 64: + if expr == size2gpregs[64].expr[self.rindex]: + p.rex_w.value = 1 + return True + elif expr == size2gpregs[osize].expr[self.rindex]: + return True + return False + return False + +class bs_seg(x86_arg): + reg_info = r_eax_all + rindex = 0 + parser = reg_info.parser + + def decode(self, v): + self.expr = self.reg_info.expr[0] + return True + + def encode(self): + self.value = 0 + return self.expr == self.reg_info.expr[0] + + +class bs_edx(bs_eax): + reg_info = r_edx_all + rindex = 2 + parser = reg_info.parser + + +class bs_st(bs_eax): + reg_info = r_st_all + rindex = 0 + parser = reg_info.parser + + +class bs_cs(bs_seg): + reg_info = r_cs_all + rindex = 0 + parser = reg_info.parser + + +class bs_ds(bs_seg): + reg_info = r_ds_all + rindex = 0 + parser = reg_info.parser + + +class bs_es(bs_seg): + reg_info = r_es_all + rindex = 0 + parser = reg_info.parser + + +class bs_ss(bs_seg): + reg_info = r_ss_all + rindex = 0 + parser = reg_info.parser + + +class bs_fs(bs_seg): + reg_info = r_fs_all + rindex = 0 + parser = reg_info.parser + + +class bs_gs(bs_seg): + reg_info = r_gs_all + rindex = 0 + parser = reg_info.parser + + +class x86_reg_st(reg_noarg, x86_arg): + reg_info = r_st_all + parser = reg_info.parser + + +class bs_sib_scale(bs_divert): + bsname = "sib_scale" + + def divert(self, i, candidates): + out = [] + done = False + for cls, name, bases, dct, fields in candidates: + if (not (admode_prefix( + (dct['mode'], dct['opmode'], dct['admode'])) != 16 and + 'rm' in dct and dct['rm'] == 0b100 and + 'mod' in dct and dct['mod'] != 0b11)): + ndct = dict(dct) + nfields = fields[:] + nfields[i] = None + ndct[self.args['fname']] = None + out.append((cls, ndct['name'], bases, ndct, nfields)) + continue + + nfields = fields[:] + args = dict(self.args) + ndct = dict(dct) + f = bs(**args) + nfields[i] = f + ndct[self.args['fname']] = None + out.append((cls, ndct['name'], bases, ndct, nfields)) + return out + + +class bs_sib_index(bs_sib_scale): + pass + + +class bs_sib_base(bs_sib_scale): + pass + + +class bs_disp(bs_divert): + + def divert(self, i, candidates): + out = [] + done = False + for cls, name, bases, dct, fields in candidates: + ndct = dict(dct) + nfields = fields[:] + if (admode_prefix( + (dct['mode'], dct['opmode'], dct['admode'])) == 16): + if 'mod' in dct and dct['mod'] == 0b00 and \ + 'rm' in dct and dct['rm'] == 0b110: + nfields[i] = bs( + l=16, cls=(x86_16_ne,), fname=self.args['fname']) + ndct[self.args['fname']] = True + out.append((cls, ndct['name'], bases, ndct, nfields)) + continue + elif 'mod' in dct and dct['mod'] == 0b01: + nfields[i] = bs( + l=8, cls=(x86_08_ne,), fname=self.args['fname']) + ndct[self.args['fname']] = True + out.append((cls, ndct['name'], bases, ndct, nfields)) + continue + elif 'mod' in dct and dct['mod'] == 0b10: + nfields[i] = bs( + l=16, cls=(x86_16_ne,), fname=self.args['fname']) + ndct[self.args['fname']] = True + out.append((cls, ndct['name'], bases, ndct, nfields)) + continue + else: + if 'mod' in dct and dct['mod'] == 0b00 and \ + 'rm' in dct and dct['rm'] == 0b101: + nfields[i] = bs( + l=32, cls=(x86_32_ne,), fname=self.args['fname']) + ndct[self.args['fname']] = True + out.append((cls, ndct['name'], bases, ndct, nfields)) + continue + elif 'mod' in dct and dct['mod'] == 0b01: + nfields[i] = bs( + l=8, cls=(x86_08_ne,), fname=self.args['fname']) + ndct[self.args['fname']] = True + out.append((cls, ndct['name'], bases, ndct, nfields)) + continue + elif 'mod' in dct and dct['mod'] == 0b10: + nfields[i] = bs( + l=32, cls=(x86_32_ne,), fname=self.args['fname']) + ndct[self.args['fname']] = True + out.append((cls, ndct['name'], bases, ndct, nfields)) + continue + + nfields[i] = None + ndct[self.args['fname']] = None + out.append((cls, ndct['name'], bases, ndct, nfields)) + return out + + +def getmodrm(c): + return (c >> 6) & 3, (c >> 3) & 7, c & 7 + + +def setmodrm(mod, re, rm): + return ((mod & 3) << 6) | ((re & 7) << 3) | (rm & 7) + + +def sib(c): + return modrm(c) + +db_afs_64 = [] +sib_64_s08_ebp = [] + + +def gen_modrm_form(): + global db_afs_64, sib_64_s08_ebp + ebp = 5 + + sib_s08_ebp = [{f_isad: True} for i in range(0x100)] + sib_u32_ebp = [{f_isad: True} for i in range(0x100)] + sib_u32 = [{f_isad: True} for i in range(0x100)] + + sib_u64 = [] + for rex_x in range(2): + o = [] + for rex_b in range(2): + x = [{f_isad: True} for i in range(0x100)] + o.append(x) + sib_u64.append(o) + + sib_u64_ebp = [] + for rex_x in range(2): + o = [] + for rex_b in range(2): + x = [{f_isad: True} for i in range(0x100)] + o.append(x) + sib_u64_ebp.append(o) + + sib_64_s08_ebp = [] + for rex_x in range(2): + o = [] + for rex_b in range(2): + x = [{f_isad: True} for i in range(0x100)] + o.append(x) + sib_64_s08_ebp.append(o) + + for sib_rez in [sib_s08_ebp, + sib_u32_ebp, + sib_u32, + sib_64_s08_ebp, + sib_u64_ebp, + sib_u64, + ]: + for index in range(0x100): + ss, i, b = getmodrm(index) + + if b == 0b101: + if sib_rez == sib_s08_ebp: + sib_rez[index][f_imm] = f_s08 + sib_rez[index][ebp] = 1 + elif sib_rez == sib_u32_ebp: + sib_rez[index][f_imm] = f_u32 + sib_rez[index][ebp] = 1 + elif sib_rez == sib_u32: + sib_rez[index][f_imm] = f_u32 + elif sib_rez == sib_u64_ebp: + for rex_b in range(2): + for rex_x in range(2): + sib_rez[rex_x][rex_b][index][f_imm] = f_u32 + sib_rez[rex_x][rex_b][index][ebp + 8 * rex_b] = 1 + elif sib_rez == sib_u64: + for rex_b in range(2): + for rex_x in range(2): + sib_rez[rex_x][rex_b][index][f_imm] = f_u32 + elif sib_rez == sib_64_s08_ebp: + for rex_b in range(2): + for rex_x in range(2): + sib_rez[rex_x][rex_b][index][f_imm] = f_s08 + sib_rez[rex_x][rex_b][index][ebp + 8 * rex_b] = 1 + + else: + if sib_rez == sib_s08_ebp: + sib_rez[index][b] = 1 + sib_rez[index][f_imm] = f_s08 + elif sib_rez == sib_u32_ebp: + sib_rez[index][b] = 1 + sib_rez[index][f_imm] = f_u32 + elif sib_rez == sib_u32: + sib_rez[index][b] = 1 + elif sib_rez == sib_u64_ebp: + for rex_b in range(2): + for rex_x in range(2): + sib_rez[rex_x][rex_b][index][b + 8 * rex_b] = 1 + sib_rez[rex_x][rex_b][index][f_imm] = f_u32 + elif sib_rez == sib_u64: + for rex_b in range(2): + for rex_x in range(2): + sib_rez[rex_x][rex_b][index][b + 8 * rex_b] = 1 + elif sib_rez == sib_64_s08_ebp: + for rex_b in range(2): + for rex_x in range(2): + sib_rez[rex_x][rex_b][index][f_imm] = f_s08 + sib_rez[rex_x][rex_b][index][b + 8 * rex_b] = 1 + + if i == 0b100 and sib_rez in [sib_s08_ebp, sib_u32_ebp, sib_u32]: + continue + + if sib_rez in [sib_s08_ebp, sib_u32_ebp, sib_u32]: + tmp = i + if not tmp in sib_rez[index]: + sib_rez[index][tmp] = 0 # 1 << ss + sib_rez[index][tmp] += 1 << ss + else: + for rex_b in range(2): + for rex_x in range(2): + tmp = i + 8 * rex_x + if i == 0b100 and rex_x == 0: + continue + if not tmp in sib_rez[rex_x][rex_b][index]: + sib_rez[rex_x][rex_b][index][tmp] = 0 # 1 << ss + sib_rez[rex_x][rex_b][index][tmp] += 1 << ss + + # 32bit + db_afs_32 = [None for i in range(0x100)] + for i in range(0x100): + index = i + mod, re, rm = getmodrm(i) + + if mod == 0b00: + if rm == 0b100: + db_afs_32[index] = sib_u32 + elif rm == 0b101: + db_afs_32[index] = {f_isad: True, f_imm: f_u32} + else: + db_afs_32[index] = {f_isad: True, rm: 1} + elif mod == 0b01: + if rm == 0b100: + db_afs_32[index] = sib_s08_ebp + continue + tmp = {f_isad: True, rm: 1, f_imm: f_s08} + db_afs_32[index] = tmp + + elif mod == 0b10: + if rm == 0b100: + db_afs_32[index] = sib_u32_ebp + else: + db_afs_32[index] = {f_isad: True, rm: 1, f_imm: f_u32} + elif mod == 0b11: + db_afs_32[index] = {f_isad: False, rm: 1} + + # 64bit + db_afs_64 = [None for i in range(0x400)] + for i in range(0x400): + index = i + rex_x = (index >> 9) & 1 + rex_b = (index >> 8) & 1 + mod, re, rm = getmodrm(i & 0xff) + + if mod == 0b00: + if rm == 0b100: + db_afs_64[i] = sib_u64[rex_x][rex_b] + elif rm == 0b101: + db_afs_64[i] = {f_isad: True, f_imm: f_u32, 16: 1} + else: + db_afs_64[i] = {f_isad: True, rm + 8 * rex_b: 1} + elif mod == 0b01: + if rm == 0b100: + db_afs_64[i] = sib_64_s08_ebp[rex_x][rex_b] + continue + tmp = {f_isad: True, rm + 8 * rex_b: 1, f_imm: f_s08} + db_afs_64[i] = tmp + + elif mod == 0b10: + if rm == 0b100: + db_afs_64[i] = sib_u64_ebp[rex_x][rex_b] + else: + db_afs_64[i] = {f_isad: True, rm + 8 * rex_b: 1, f_imm: f_u32} + elif mod == 0b11: + db_afs_64[i] = {f_isad: False, rm + 8 * rex_b: 1} + + # 16bit + db_afs_16 = [None for i in range(0x100)] + _si = 6 + _di = 7 + _bx = 3 + _bp = 5 + for i in range(0x100): + index = i + mod, re, rm = getmodrm(i) + + if mod == 0b00: + if rm == 0b100: + db_afs_16[index] = {f_isad: True, _si: 1} + elif rm == 0b101: + db_afs_16[index] = {f_isad: True, _di: 1} + elif rm == 0b110: + db_afs_16[index] = { + f_isad: True, f_imm: f_u16} # {f_isad:True,_bp:1} + elif rm == 0b111: + db_afs_16[index] = {f_isad: True, _bx: 1} + else: + db_afs_16[index] = {f_isad: True, + [_si, _di][rm % 2]: 1, + [_bx, _bp][(rm >> 1) % 2]: 1} + elif mod in [0b01, 0b10]: + if mod == 0b01: + my_imm = f_s08 + else: + my_imm = f_u16 + + if rm == 0b100: + db_afs_16[index] = {f_isad: True, _si: 1, f_imm: my_imm} + elif rm == 0b101: + db_afs_16[index] = {f_isad: True, _di: 1, f_imm: my_imm} + elif rm == 0b110: + db_afs_16[index] = {f_isad: True, _bp: 1, f_imm: my_imm} + elif rm == 0b111: + db_afs_16[index] = {f_isad: True, _bx: 1, f_imm: my_imm} + else: + db_afs_16[index] = {f_isad: True, + [_si, _di][rm % 2]: 1, + [_bx, _bp][(rm >> 1) % 2]: 1, + f_imm: my_imm} + + elif mod == 0b11: + db_afs_16[index] = {f_isad: False, rm: 1} + + byte2modrm = {} + byte2modrm[16] = db_afs_16 + byte2modrm[32] = db_afs_32 + byte2modrm[64] = db_afs_64 + + modrm2byte = {16: defaultdict(list), + 32: defaultdict(list), + 64: defaultdict(list), + } + for size, db_afs in viewitems(byte2modrm): + for i, modrm in enumerate(db_afs): + if not isinstance(modrm, list): + # We only need sort for determinism + modrm = tuple(sorted(viewitems(modrm), key=str)) + modrm2byte[size][modrm].append(i) + continue + for j, modrm_f in enumerate(modrm): + # We only need sort for determinism + modrm_f = tuple(sorted(viewitems(modrm_f), key=str)) + modrm2byte[size][modrm_f].append((i, j)) + + return byte2modrm, modrm2byte + +byte2modrm, modrm2byte = gen_modrm_form() + + +# ret is modr; ret is displacement +def exprfindmod(e, o=None): + if o is None: + o = {} + if isinstance(e, ExprInt): + return e + if isinstance(e, ExprId): + i = size2gpregs[e.size].expr.index(e) + o[i] = 1 + return None + elif isinstance(e, ExprOp): + out = None + if e.op == '+': + for a in e.args: + r = exprfindmod(a, o) + if out and r1: + raise ValueError('multiple displacement!') + out = r + return out + elif e.op == "*": + mul = int(e.args[1]) + a = e.args[0] + i = size2gpregs[a.size].expr.index(a) + o[i] = mul + else: + raise ValueError('bad op') + return None + +def test_addr_size(ptr, size): + if isinstance(ptr, ExprInt): + return ptr.arg < (1 << size) + else: + return ptr.size == size + +SIZE2XMMREG = {64:gpregs_mm, + 128:gpregs_xmm} +SIZE2BNDREG = {64:gpregs_mm, + 128:gpregs_bnd} + +def parse_mem(expr, parent, w8, sx=0, xmm=0, mm=0, bnd=0): + dct_expr = {} + opmode = parent.v_opmode() + if expr.is_mem_segm() and expr.ptr.args[0].is_int(): + return None, None, False + + if expr.is_mem_segm(): + segm = expr.ptr.args[0] + ptr = expr.ptr.args[1] + else: + segm = None + ptr = expr.ptr + + dct_expr[f_isad] = True + ad_size = ptr.size + admode = parent.v_admode() + if not test_addr_size(ptr, admode): + return None, None, False + + if (w8 == 1 and expr.size != opmode and not sx and + not (hasattr(parent, 'sd') or hasattr(parent, 'wd'))): + return None, None, False + + if hasattr(parent, 'wd'): + if expr.size == 16: + parent.wd.value = 1 + elif expr.size == 32: + pass + else: + return None, None, False + + if (not isinstance(ptr, ExprInt) and + parent.mode == 64 and + ptr.size == 32 and + parent.admode != 1): + return None, None, False + dct_expr = {f_isad: True} + disp = exprfindmod(ptr, dct_expr) + out = [] + if disp is None: + # add 0 disp + disp = ExprInt(0, 32) + if disp is not None: + for signed, encoding, cast_size in [(True, f_s08, 8), + (True, f_s16, 16), + (True, f_s32, 32), + (False, f_u08, 8), + (False, f_u16, 16), + (False, f_u32, 32)]: + value = ExprInt(int(disp), cast_size) + if admode < value.size: + if signed: + if int(disp.arg) != sign_ext(int(value), admode, disp.size): + continue + else: + if int(disp.arg) != int(value): + continue + else: + if int(disp.arg) != sign_ext(int(value), value.size, admode): + continue + x1 = dict(dct_expr) + x1[f_imm] = (encoding, value) + out.append(x1) + else: + out = [dct_expr] + return out, segm, True + +def expr2modrm(expr, parent, w8, sx=0, xmm=0, mm=0, bnd=0): + dct_expr = {f_isad : False} + + if mm or xmm or bnd: + if mm and expr.size != 64: + return None, None, False + elif xmm and expr.size != 128: + return None, None, False + elif bnd and expr.size != 128: + return None, None, False + + if isinstance(expr, ExprId): + if bnd: + size2reg = SIZE2BNDREG + else: + size2reg = SIZE2XMMREG + selreg = size2reg[expr.size] + if not expr in selreg.expr: + return None, None, False + i = selreg.expr.index(expr) + dct_expr[i] = 1 + return [dct_expr], None, True + else: + return parse_mem(expr, parent, w8, sx, xmm, mm) + + elif expr.size == 64 and expr not in gpregs_mm.expr: + if hasattr(parent, 'sd'): + parent.sd.value = 1 + elif hasattr(parent, 'wd'): + pass + elif hasattr(parent, 'stk'): + pass + else: + parent.rex_w.value = 1 + opmode = parent.v_opmode() + if sx == 1: + opmode = 16 + if sx == 2: + opmode = 32 + if expr.size == 8 and w8 != 0: + return None, None, False + + if w8 == 0 and expr.size != 8: + return None, None, False + + if not isinstance(expr, ExprMem): + dct_expr[f_isad] = False + if xmm: + if expr in gpregs_xmm.expr: + i = gpregs_xmm.expr.index(expr) + dct_expr[i] = 1 + return [dct_expr], None, True + else: + return None, None, False + if bnd: + if expr in gpregs_bnd.expr: + i = gpregs_bnd.expr.index(expr) + dct_expr[i] = 1 + return [dct_expr], None, True + else: + return None, None, False + if mm: + if expr in gpregs_mm.expr: + i = gpregs_mm.expr.index(expr) + dct_expr[i] = 1 + return [dct_expr], None, True + else: + return None, None, False + if w8 == 0: + if parent.mode == 64 and expr in gpregs08_64.expr: + r = gpregs08_64 + parent.rex_p.value = 1 + else: + parent.rex_p.value = 0 + parent.rex_x.value = 0 + r = size2gpregs[8] + if not expr in r.expr: + return None, None, False + i = r.expr.index(expr) + dct_expr[i] = 1 + return [dct_expr], None, True + if opmode != expr.size: + return None, None, False + if not expr in size2gpregs[opmode].expr: + return None, None, False + i = size2gpregs[opmode].expr.index(expr) + if i > 7: + if parent.mode != 64: + return None, None, False + dct_expr[i] = 1 + return [dct_expr], None, True + return parse_mem(expr, parent, w8, sx, xmm, mm, bnd) + +def modrm2expr(modrm, parent, w8, sx=0, xmm=0, mm=0, bnd=0): + o = [] + if not modrm[f_isad]: + modrm_k = [key for key, value in viewitems(modrm) if value == 1] + if len(modrm_k) != 1: + raise ValueError('strange reg encoding %r' % modrm) + modrm_k = modrm_k[0] + if w8 == 0: + opmode = 8 + elif sx == 1: + opmode = 16 + elif sx == 2: + opmode = 32 + else: + opmode = parent.v_opmode() + if xmm: + expr = gpregs_xmm.expr[modrm_k] + elif mm: + expr = gpregs_mm.expr[modrm_k] + elif bnd: + expr = gpregs_bnd.expr[modrm_k] + elif opmode == 8 and (parent.v_opmode() == 64 or parent.rex_p.value == 1): + expr = gpregs08_64.expr[modrm_k] + else: + expr = size2gpregs[opmode].expr[modrm_k] + return expr + admode = parent.v_admode() + opmode = parent.v_opmode() + for modrm_k, scale in viewitems(modrm): + if isinstance(modrm_k, int): + expr = size2gpregs[admode].expr[modrm_k] + if scale != 1: + expr = ExprInt(scale, admode) * expr + o.append(expr) + if f_imm in modrm: + if parent.disp.value is None: + return None + o.append(ExprInt(int(parent.disp.expr), admode)) + expr = ExprOp('+', *o) + if w8 == 0: + opmode = 8 + elif sx == 1: + opmode = 16 + elif sx == 2: + opmode = 32 + if xmm: + opmode = 128 + elif mm: + opmode = 64 + elif bnd: + opmode = 128 + + expr = ExprMem(expr, size=opmode) + return expr + + +class x86_rm_arg(x86_arg): + parser = rmarg + + def fromstring(self, text, loc_db, parser_result=None): + start, stop = super(x86_rm_arg, self).fromstring(text, loc_db, parser_result) + p = self.parent + if start is None: + return None, None + return start, stop + + def get_modrm(self): + p = self.parent + admode = p.v_admode() + + if not admode in [16, 32, 64]: + raise ValueError('strange admode %r', admode) + v = setmodrm(p.mod.value, 0, p.rm.value) + v |= p.rex_b.value << 8 + v |= p.rex_x.value << 9 + if p.mode == 64: + # XXXx to check + admode = 64 + + xx = byte2modrm[admode][v] + if isinstance(xx, list): + if not p.sib_scale: + return False + v = setmodrm(p.sib_scale.value, + p.sib_index.value, + p.sib_base.value) + xx = xx[v] + return xx + + def decode(self, v): + p = self.parent + xx = self.get_modrm() + self.expr = modrm2expr(xx, p, 1) + return self.expr is not None + + def gen_cand(self, v_cand, admode): + if not admode in modrm2byte: + # XXX TODO: 64bit + return + if not v_cand: + return + + p = self.parent + o_rex_x = p.rex_x.value + o_rex_b = p.rex_b.value + # add candidate without 0 imm + new_v_cand = [] + moddd = False + for v in v_cand: + new_v_cand.append(v) + if f_imm in v and int(v[f_imm][1]) == 0: + v = dict(v) + del(v[f_imm]) + new_v_cand.append(v) + moddd = True + + v_cand = new_v_cand + + out_c = [] + for v in v_cand: + disp = None + # patch value in modrm + if f_imm in v: + size, disp = v[f_imm] + disp = int(disp) + + v[f_imm] = size + vo = v + # We only need sort for determinism + v = tuple(sorted(viewitems(v), key=str)) + admode = 64 if p.mode == 64 else admode + if not v in modrm2byte[admode]: + continue + xx = modrm2byte[admode][v] + + # default case + for x in xx: + if type(x) == tuple: + modrm, sib = x + else: + modrm = x + sib = None + + # 16 bit cannot have sib + if sib is not None and admode == 16: + continue + rex = modrm >> 8 # 0# XXX HACK REM temporary REX modrm>>8 + if rex and admode != 64: + continue + + p.rex_x.value = (rex >> 1) & 1 + p.rex_b.value = rex & 1 + + if o_rex_x is not None and p.rex_x.value != o_rex_x: + continue + if o_rex_b is not None and p.rex_b.value != o_rex_b: + continue + + mod, re, rm = getmodrm(modrm) + # check re on parent + if re != p.reg.value: + continue + + if sib is not None: + s_scale, s_index, s_base = getmodrm(sib) + else: + s_scale, s_index, s_base = None, None, None + + p.mod.value = mod + p.rm.value = rm + p.sib_scale.value = s_scale + p.sib_index.value = s_index + p.sib_base.value = s_base + p.disp.value = disp + if disp is not None: + p.disp.l = f_imm2size[vo[f_imm]] + + yield True + + return + + def encode(self): + if isinstance(self.expr, ExprInt): + return + p = self.parent + admode = p.v_admode() + mode = self.expr.size + v_cand, segm, ok = expr2modrm(self.expr, p, 1) + if segm: + p.g2.value = segm2enc[segm] + for x in self.gen_cand(v_cand, admode): + yield x + +class x86_rm_mem(x86_rm_arg): + def fromstring(self, text, loc_db, parser_result=None): + self.expr = None + start, stop = super(x86_rm_mem, self).fromstring(text, loc_db, parser_result) + if not isinstance(self.expr, ExprMem): + return None, None + return start, stop + + +class x86_rm_mem_far(x86_rm_arg): + parser = mem_far + def fromstring(self, text, loc_db, parser_result=None): + self.expr = None + start, stop = super(x86_rm_mem_far, self).fromstring(text, loc_db, parser_result) + if not isinstance(self.expr, ExprMem): + return None, None + self.expr = ExprOp('far', self.expr) + return start, stop + + def decode(self, v): + ret = super(x86_rm_mem_far, self).decode(v) + if not ret: + return ret + if isinstance(self.expr, m2_expr.ExprMem): + self.expr = ExprOp('far', self.expr) + return True + + def encode(self): + if not (isinstance(self.expr, m2_expr.ExprOp) and + self.expr.op == 'far'): + return + + expr = self.expr.args[0] + if isinstance(expr, ExprInt): + return + p = self.parent + admode = p.v_admode() + mode = expr.size + v_cand, segm, ok = expr2modrm(expr, p, 1) + if segm: + p.g2.value = segm2enc[segm] + for x in self.gen_cand(v_cand, admode): + yield x + +class x86_rm_w8(x86_rm_arg): + + def decode(self, v): + p = self.parent + xx = self.get_modrm() + self.expr = modrm2expr(xx, p, p.w8.value) + return self.expr is not None + + def encode(self): + if isinstance(self.expr, ExprInt): + return + p = self.parent + if p.w8.value is None: + if self.expr.size == 8: + p.w8.value = 0 + else: + p.w8.value = 1 + + v_cand, segm, ok = expr2modrm(self.expr, p, p.w8.value) + if segm: + p.g2.value = segm2enc[segm] + for x in self.gen_cand(v_cand, p.v_admode()): + yield x + + +class x86_rm_sx(x86_rm_arg): + + def decode(self, v): + p = self.parent + xx = self.get_modrm() + self.expr = modrm2expr(xx, p, p.w8.value, 1) + return self.expr is not None + + def encode(self): + if isinstance(self.expr, ExprInt): + return + p = self.parent + if p.w8.value is None: + if self.expr.size == 8: + p.w8.value = 0 + else: + p.w8.value = 1 + v_cand, segm, ok = expr2modrm(self.expr, p, p.w8.value, 1) + if segm: + p.g2.value = segm2enc[segm] + for x in self.gen_cand(v_cand, p.v_admode()): + yield x + + +class x86_rm_sxd(x86_rm_arg): + + def decode(self, v): + p = self.parent + xx = self.get_modrm() + self.expr = modrm2expr(xx, p, 1, 2) + return self.expr is not None + + def encode(self): + if isinstance(self.expr, ExprInt): + return + p = self.parent + v_cand, segm, ok = expr2modrm(self.expr, p, 1, 2) + if segm: + p.g2.value = segm2enc[segm] + for x in self.gen_cand(v_cand, p.v_admode()): + yield x + + +class x86_rm_sd(x86_rm_arg): + out_size = 64 + def get_s_value(self): + return self.parent.sd.value + def set_s_value(self, value): + self.parent.sd.value = value + + def decode(self, v): + p = self.parent + xx = self.get_modrm() + expr = modrm2expr(xx, p, 1) + if not isinstance(expr, ExprMem): + return False + if self.get_s_value() == 0: + expr = ExprMem(expr.ptr, 32) + else: + expr = ExprMem(expr.ptr, self.out_size) + self.expr = expr + return self.expr is not None + + def encode(self): + if isinstance(self.expr, ExprInt): + return + p = self.parent + if not self.expr.size in [32, 64]: + return + self.set_s_value(0) + v_cand, segm, ok = expr2modrm(self.expr, p, 1) + for x in self.gen_cand(v_cand, p.v_admode()): + yield x + + +class x86_rm_wd(x86_rm_sd): + out_size = 16 + def get_s_value(self): + return self.parent.wd.value + def set_s_value(self, value): + self.parent.wd.value = value + + def encode(self): + if isinstance(self.expr, ExprInt): + return + p = self.parent + p.wd.value = 0 + v_cand, segm, ok = expr2modrm(self.expr, p, 1) + for x in self.gen_cand(v_cand, p.v_admode()): + yield x + + +class x86_rm_08(x86_rm_arg): + msize = 8 + + def decode(self, v): + p = self.parent + xx = self.get_modrm() + expr = modrm2expr(xx, p, 0) + if not isinstance(expr, ExprMem): + self.expr = expr + return True + self.expr = ExprMem(expr.ptr, self.msize) + return self.expr is not None + + def encode(self): + if isinstance(self.expr, ExprInt): + return + p = self.parent + v_cand, segm, ok = expr2modrm(self.expr, p, 0, 0, 0, 0) + for x in self.gen_cand(v_cand, p.v_admode()): + yield x + +class x86_rm_reg_m08(x86_rm_arg): + msize = 8 + + def decode(self, v): + ret = x86_rm_arg.decode(self, v) + if not ret: + return ret + if not isinstance(self.expr, ExprMem): + return True + self.expr = ExprMem(self.expr.ptr, self.msize) + return self.expr is not None + + def encode(self): + if isinstance(self.expr, ExprInt): + return + p = self.parent + if isinstance(self.expr, ExprMem): + expr = ExprMem(self.expr.ptr, 32) + else: + expr = self.expr + v_cand, segm, ok = expr2modrm(expr, p, 1, 0, 0, 0) + for x in self.gen_cand(v_cand, p.v_admode()): + yield x + +class x86_rm_reg_m16(x86_rm_reg_m08): + msize = 16 + +class x86_rm_m64(x86_rm_arg): + msize = 64 + + def decode(self, v): + p = self.parent + xx = self.get_modrm() + expr = modrm2expr(xx, p, 1) + if not isinstance(expr, ExprMem): + return False + self.expr = ExprMem(expr.ptr, self.msize) + return self.expr is not None + + def encode(self): + if isinstance(self.expr, ExprInt): + return + p = self.parent + v_cand, segm, ok = expr2modrm(self.expr, p, 0, 0, 0, 1) + for x in self.gen_cand(v_cand, p.v_admode()): + yield x + + +class x86_rm_m80(x86_rm_m64): + msize = 80 + + def encode(self): + if isinstance(self.expr, ExprInt): + return + if not isinstance(self.expr, ExprMem) or self.expr.size != self.msize: + return + p = self.parent + mode = p.mode + if mode == 64: + mode = 32 + self.expr = ExprMem(self.expr.ptr, mode) + v_cand, segm, ok = expr2modrm(self.expr, p, 1) + for x in self.gen_cand(v_cand, p.v_admode()): + yield x + + +class x86_rm_m08(x86_rm_arg): + msize = 8 + + def decode(self, v): + p = self.parent + xx = self.get_modrm() + self.expr = modrm2expr(xx, p, 0) + return self.expr is not None + + def encode(self): + if self.expr.size != 8: + return + p = self.parent + mode = p.mode + v_cand, segm, ok = expr2modrm(self.expr, p, 0) + for x in self.gen_cand(v_cand, p.v_admode()): + yield x + + +class x86_rm_m16(x86_rm_m80): + msize = 16 + + +class x86_rm_mm(x86_rm_m80): + msize = 64 + is_mm = True + is_xmm = False + is_bnd = False + + def decode(self, v): + p = self.parent + xx = self.get_modrm() + expr = modrm2expr(xx, p, 0, 0, self.is_xmm, self.is_mm, self.is_bnd) + if isinstance(expr, ExprMem): + if self.msize is None: + return False + if expr.size != self.msize: + expr = ExprMem(expr.ptr, self.msize) + self.expr = expr + return True + + + def encode(self): + expr = self.expr + if isinstance(expr, ExprInt): + return + if isinstance(expr, ExprMem) and expr.size != self.msize: + return + p = self.parent + mode = p.mode + if mode == 64: + mode = 32 + if isinstance(expr, ExprMem): + if self.is_xmm: + expr = ExprMem(expr.ptr, 128) + elif self.is_mm: + expr = ExprMem(expr.ptr, 64) + + v_cand, segm, ok = expr2modrm(expr, p, 0, 0, self.is_xmm, self.is_mm, + self.is_bnd) + for x in self.gen_cand(v_cand, p.v_admode()): + yield x + + +class x86_rm_mm_m64(x86_rm_mm): + msize = 64 + is_mm = True + is_xmm = False + +class x86_rm_xmm(x86_rm_mm): + msize = 128 + is_mm = False + is_xmm = True + + +class x86_rm_xmm_m32(x86_rm_mm): + msize = 32 + is_mm = False + is_xmm = True + +class x86_rm_xmm_m64(x86_rm_mm): + msize = 64 + is_mm = False + is_xmm = True + +class x86_rm_xmm_m128(x86_rm_mm): + msize = 128 + is_mm = False + is_xmm = True + + +class x86_rm_xmm_reg(x86_rm_mm): + msize = None + is_mm = False + is_xmm = True + +class x86_rm_mm_reg(x86_rm_mm): + msize = None + is_mm = True + is_xmm = False + + +class x86_rm_bnd(x86_rm_mm): + msize = 128 + is_mm = False + is_xmm = False + is_bnd = True + + +class x86_rm_bnd_reg(x86_rm_mm): + msize = None + is_mm = False + is_xmm = False + is_bnd = True + + +class x86_rm_bnd_m64(x86_rm_mm): + msize = 64 + is_mm = False + is_xmm = False + is_bnd = True + + +class x86_rm_bnd_m128(x86_rm_mm): + msize = 128 + is_mm = False + is_xmm = False + is_bnd = True + + +class x86_rm_reg_noarg(object): + prio = default_prio + 1 + + parser = gpreg + + def fromstring(self, text, loc_db, parser_result=None): + if not hasattr(self.parent, 'sx') and hasattr(self.parent, "w8"): + self.parent.w8.value = 1 + if parser_result: + result, start, stop = parser_result[self.parser] + if result == [None]: + return None, None + self.expr = result + if self.expr.size == 8: + if hasattr(self.parent, 'sx') or not hasattr(self.parent, 'w8'): + return None, None + self.parent.w8.value = 0 + return start, stop + try: + result, start, stop = next(self.parser.scanString(text)) + except StopIteration: + return None, None + expr = self.asm_ast_to_expr(result[0], loc_db) + if expr is None: + return None, None + + self.expr = expr + if self.expr.size == 0: + if hasattr(self.parent, 'sx') or not hasattr(self.parent, 'w8'): + return None, None + self.parent.w8.value = 0 + + return start, stop + + def getrexsize(self): + return self.parent.rex_r.value + + def setrexsize(self, v): + self.parent.rex_r.value = v + + def decode(self, v): + v = v & self.lmask + p = self.parent + opmode = p.v_opmode() + if not hasattr(p, 'sx') and (hasattr(p, 'w8') and p.w8.value == 0): + opmode = 8 + r = size2gpregs[opmode] + if p.mode == 64 and self.getrexsize(): + v |= 0x8 + if p.v_opmode() == 64 or p.rex_p.value == 1: + if not hasattr(p, 'sx') and (hasattr(p, 'w8') and p.w8.value == 0): + r = gpregs08_64 + elif p.rex_r.value == 1: + v |= 8 + self.expr = r.expr[v] + return True + + def encode(self): + if not isinstance(self.expr, ExprId): + return False + if self.expr in gpregs64.expr and not hasattr(self.parent, 'stk'): + self.parent.rex_w.value = 1 + opmode = self.parent.v_opmode() + if not hasattr(self.parent, 'sx') and hasattr(self.parent, 'w8'): + self.parent.w8.value = 1 + if self.expr.size == 8: + if hasattr(self.parent, 'sx') or not hasattr(self.parent, 'w8'): + return False + self.parent.w8.value = 0 + opmode = 8 + r = size2gpregs[opmode] + if self.expr in r.expr: + i = r.expr.index(self.expr) + elif (opmode == 8 and self.parent.mode == 64 and + self.expr in gpregs08_64.expr): + i = gpregs08_64.expr.index(self.expr) + self.parent.rex_p.value = 1 + else: + log.debug("cannot encode reg %r", self.expr) + return False + if self.parent.v_opmode() == 64: + if i > 7: + self.setrexsize(1) + i -= 8 + elif self.parent.mode == 64 and i > 7: + i -= 8 + self.setrexsize(1) + self.value = i + if self.value > self.lmask: + log.debug("cannot encode field value %x %x", + self.value, self.lmask) + return False + return True + + +class x86_rm_reg_mm(x86_rm_reg_noarg, x86_arg): + selreg = gpregs_mm + def decode(self, v): + if self.parent.mode == 64 and self.getrexsize(): + v |= 0x8 + self.expr = self.selreg.expr[v] + return True + + def encode(self): + if not isinstance(self.expr, ExprId): + return False + if self.expr not in self.selreg.expr: + return False + i = self.selreg.expr.index(self.expr) + if self.parent.mode == 64 and i > 7: + i -= 8 + self.setrexsize(1) + self.value = i + if self.value > self.lmask: + log.debug("cannot encode field value %x %x", + self.value, self.lmask) + return False + return True + +class x86_rm_reg_xmm(x86_rm_reg_mm): + selreg = gpregs_xmm + +class x86_rm_reg_bnd(x86_rm_reg_mm): + selreg = gpregs_bnd + +class x86_rm_reg(x86_rm_reg_noarg, x86_arg): + pass + + +class x86_reg(x86_rm_reg): + + def getrexsize(self): + return self.parent.rex_b.value + + def setrexsize(self, v): + self.parent.rex_b.value = v + + +class x86_reg_modrm(x86_rm_reg): + + def getrexsize(self): + return self.parent.rex_r.value + + def setrexsize(self, v): + self.parent.rex_r.value = v + + + +class x86_reg_noarg(x86_rm_reg_noarg): + + def getrexsize(self): + return self.parent.rex_b.value + + def setrexsize(self, v): + self.parent.rex_b.value = v + + +class x86_rm_segm(reg_noarg, x86_arg): + prio = default_prio + 1 + reg_info = segmreg + parser = reg_info.parser + + +class x86_rm_cr(reg_noarg, x86_arg): + prio = default_prio + 1 + reg_info = crregs + parser = reg_info.parser + + +class x86_rm_dr(reg_noarg, x86_arg): + prio = default_prio + 1 + reg_info = drregs + parser = reg_info.parser + + +class x86_rm_flt(reg_noarg, x86_arg): + prio = default_prio + 1 + reg_info = fltregs + parser = reg_info.parser + + +class bs_fbit(bsi): + + def decode(self, v): + # value already decoded in pre_dis_info + return True + + +class bs_cl1(bsi, x86_arg): + parser = cl_or_imm + + def decode(self, v): + if v == 1: + self.expr = regs08_expr[1] + else: + self.expr = ExprInt(1, 8) + return True + + def encode(self): + if self.expr == regs08_expr[1]: + self.value = 1 + elif isinstance(self.expr, ExprInt) and int(self.expr) == 1: + self.value = 0 + else: + return False + return True + + +def sib_cond(cls, mode, v): + if admode_prefix((mode, v["opmode"], v["admode"])) == 16: + return None + if v['mod'] == 0b11: + return None + elif v['rm'] == 0b100: + return cls.ll + else: + return None + return v['rm'] == 0b100 + + +class bs_cond_scale(bs_cond): + # cond must return field len + ll = 2 + + @classmethod + def flen(cls, mode, v): + return sib_cond(cls, mode, v) + + def encode(self): + if self.value is None: + self.value = 0 + self.l = 0 + return True + return super(bs_cond_scale, self).encode() + + def decode(self, v): + self.value = v + return True + + +class bs_cond_index(bs_cond_scale): + ll = 3 + + @classmethod + def flen(cls, mode, v): + return sib_cond(cls, mode, v) + + +class bs_cond_disp(bs_cond): + # cond must return field len + + @classmethod + def flen(cls, mode, v): + if admode_prefix((mode, v['opmode'], v['admode'])) == 16: + if v['mod'] == 0b00: + if v['rm'] == 0b110: + return 16 + else: + return None + elif v['mod'] == 0b01: + return 8 + elif v['mod'] == 0b10: + return 16 + return None + # 32, 64 + if 'sib_base' in v and v['sib_base'] == 0b101: + if v['mod'] == 0b00: + return 32 + elif v['mod'] == 0b01: + return 8 + elif v['mod'] == 0b10: + return 32 + else: + return None + + if v['mod'] == 0b00: + if v['rm'] == 0b101: + return 32 + else: + return None + elif v['mod'] == 0b01: + return 8 + elif v['mod'] == 0b10: + return 32 + else: + return None + + def encode(self): + if self.value is None: + self.value = 0 + self.l = 0 + return True + self.value = swap_uint(self.l, self.value) + return True + + def decode(self, v): + admode = self.parent.v_admode() + v = swap_uint(self.l, v) + self.value = v + v = sign_ext(v, self.l, admode) + v = ExprInt(v, admode) + self.expr = v + return True + + +class bs_cond_imm(bs_cond_scale, x86_arg): + parser = base_expr + max_size = 32 + + def fromstring(self, text, loc_db, parser_result=None): + if parser_result: + expr, start, stop = parser_result[self.parser] + else: + try: + expr, start, stop = next(self.parser.scanString(text)) + except StopIteration: + expr = None + self.expr = expr + + if len(self.parent.args) > 1: + l = self.parent.args[0].expr.size + else: + l = self.parent.v_opmode() + if isinstance(self.expr, ExprInt): + v = int(self.expr) + mask = ((1 << l) - 1) + self.expr = ExprInt(v & mask, l) + + if self.expr is None: + log.debug('cannot fromstring int %r', text) + return None, None + return start, stop + + @classmethod + def flen(cls, mode, v): + if 'w8' not in v or v['w8'] == 1: + if 'se' in v and v['se'] == 1: + return 8 + else: + osize = v_opmode_info(mode, v['opmode'], v['rex_w'], 0) + osize = min(osize, cls.max_size) + return osize + return 8 + + def getmaxlen(self): + return 32 + + def encode(self): + if not isinstance(self.expr, ExprInt): + return + arg0_expr = self.parent.args[0].expr + self.parent.rex_w.value = 0 + # special case for push + if len(self.parent.args) == 1: + v = int(self.expr) + l = self.parent.v_opmode() + l = min(l, self.max_size) + + self.l = l + mask = ((1 << self.l) - 1) + if v != sign_ext(v & mask, self.l, l): + return + self.value = swap_uint(self.l, v & ((1 << self.l) - 1)) + yield True + return + + # assume 2 args; use first arg to guess op size + if arg0_expr.size == 64: + self.parent.rex_w.value = 1 + + l = self.parent.v_opmode() + v = int(self.expr) + if arg0_expr.size == 8: + if not hasattr(self.parent, 'w8'): + return + self.parent.w8.value = 0 + l = 8 + if hasattr(self.parent, 'se'): + self.parent.se.value = 0 + elif hasattr(self.parent, 'se'): + if hasattr(self.parent, 'w8'): + self.parent.w8.value = 1 + # try to generate signed extended version + if v == sign_ext(v & 0xFF, 8, arg0_expr.size): + self.parent.se.value = 1 + self.l = 8 + self.value = v & 0xFF + yield True + self.parent.se.value = 0 + else: + if hasattr(self.parent, 'w8'): + self.parent.w8.value = 1 + if l == 64: + self.l = self.getmaxlen() + else: + self.l = l + + mask = ((1 << self.l) - 1) + if v != sign_ext(v & mask, self.l, l): + return + self.value = swap_uint(self.l, v & ((1 << self.l) - 1)) + yield True + + def decode(self, v): + opmode = self.parent.v_opmode() + v = swap_uint(self.l, v) + self.value = v + l_out = opmode + if hasattr(self.parent, 'w8') and self.parent.w8.value == 0: + l_out = 8 + v = sign_ext(v, self.l, l_out) + self.expr = ExprInt(v, l_out) + return True + + +class bs_cond_imm64(bs_cond_imm): + max_size = 64 + + def getmaxlen(self): + return 64 + + @classmethod + def flen(cls, mode, v): + if 'w8' not in v or v['w8'] == 1: + if 'se' in v and v['se'] == 1: + return 8 + else: + osize = v_opmode_info(mode, v['opmode'], v['rex_w'], 0) + return osize + else: + return 8 + + +class bs_rel_off(bs_cond_imm): + parser = base_expr + + def fromstring(self, text, loc_db, parser_result=None): + if parser_result: + expr, start, stop = parser_result[self.parser] + else: + try: + expr, start, stop = next(self.parser.scanString(text)) + except StopIteration: + expr = None + self.expr = expr + l = self.parent.mode + if isinstance(self.expr, ExprInt): + v = int(self.expr) + mask = ((1 << l) - 1) + self.expr = ExprInt(v & mask, l) + return start, stop + + @classmethod + def flen(cls, mode, v): + osize = v_opmode_info(mode, v['opmode'], v['rex_w'], 0) + if osize == 16: + return 16 + else: + return 32 + + def encode(self): + if not isinstance(self.expr, ExprInt): + return + arg0_expr = self.parent.args[0].expr + if self.l == 0: + l = self.parent.v_opmode() + self.l = l + l = offsize(self.parent) + prefix = self.parent.gen_prefix() + parent_len = len(prefix) * 8 + self.parent.l + self.l + assert(parent_len % 8 == 0) + + v = int(self.expr.arg) - parent_len // 8 + if prefix is None: + return + mask = ((1 << self.l) - 1) + if self.l > l: + return + if v != sign_ext(v & mask, self.l, l): + return + self.value = swap_uint(self.l, v & ((1 << self.l) - 1)) + yield True + + def decode(self, v): + v = swap_uint(self.l, v) + size = offsize(self.parent) + v = sign_ext(v, self.l, size) + v += self.parent.l + self.expr = ExprInt(v, size) + return True + +class bs_s08(bs_rel_off): + parser = base_expr + + @classmethod + def flen(cls, mode, v): + return 8 + + def encode(self): + if not isinstance(self.expr, ExprInt): + return + arg0_expr = self.parent.args[0].expr + if self.l != 0: + l = self.l + else: + l = self.parent.v_opmode() + self.l = l + l = offsize(self.parent) + v = int(self.expr) + mask = ((1 << self.l) - 1) + if self.l > l: + return + if v != sign_ext(v & mask, self.l, l): + return + self.value = swap_uint(self.l, v & ((1 << self.l) - 1)) + yield True + + def decode(self, v): + v = swap_uint(self.l, v) + size = offsize(self.parent) + v = sign_ext(v, self.l, size) + self.expr = ExprInt(v, size) + return True + + +class bs_rel_off08(bs_rel_off): + + @classmethod + def flen(cls, mode, v): + return 8 + + +class bs_moff(bsi): + + @classmethod + def flen(cls, mode, v): + osize = v_opmode_info(mode, v['opmode'], v['rex_w'], 0) + if osize == 16: + return 16 + else: + return 32 + + def encode(self): + if not hasattr(self.parent, "mseg"): + return + m = self.parent.mseg.expr + if not (isinstance(m, ExprOp) and m.op == 'segm'): + return + if not isinstance(m.args[1], ExprInt): + return + l = self.parent.v_opmode() + if l == 16: + self.l = 16 + else: + self.l = 32 + v = int(m.args[1]) + mask = ((1 << self.l) - 1) + if v != sign_ext(v & mask, self.l, l): + return + self.value = swap_uint(self.l, v & ((1 << self.l) - 1)) + yield True + + def decode(self, v): + opmode = self.parent.v_opmode() + if opmode == 64: + return False + v = swap_uint(self.l, v) + self.value = v + v = sign_ext(v, self.l, opmode) + self.expr = ExprInt(v, opmode) + return True + + +class bs_movoff(x86_arg): + parser = deref_mem + + def fromstring(self, text, loc_db, parser_result=None): + if parser_result: + e, start, stop = parser_result[self.parser] + if e is None: + return None, None + if not isinstance(e, ExprMem): + return None, None + self.expr = e + if self.expr is None: + return None, None + return start, stop + try: + v, start, stop = next(self.parser.scanString(text)) + except StopIteration: + return None, None + if not isinstance(e, ExprMem): + return None, None + self.expr = v[0] + if self.expr is None: + log.debug('cannot fromstring int %r', text) + return None, None + return start, stop + + @classmethod + def flen(cls, mode, v): + if mode == 64: + if v['admode']: + return 32 + else: + return 64 + asize = v_admode_info(mode, v['admode']) + return asize + + def encode(self): + p = self.parent + if not isinstance(self.expr, ExprMem) or not isinstance(self.expr.ptr, ExprInt): + return + self.l = p.v_admode() + v = int(self.expr.ptr) + mask = ((1 << self.l) - 1) + if v != mask & v: + return + self.value = swap_uint(self.l, v & ((1 << self.l) - 1)) + yield True + + def decode(self, v): + if self.parent.mode == 64: + if self.parent.admode == 1: + l = 32 + else: + l = 64 + else: + l = self.parent.v_admode() + v = swap_uint(self.l, v) + self.value = v + v = sign_ext(v, self.l, l) + v = ExprInt(v, l) + size = self.parent.v_opmode() + if self.parent.w8.value == 0: + size = 8 + self.expr = ExprMem(v, size) + return True + + +class bs_msegoff(x86_arg): + parser = deref_ptr + + def fromstring(self, text, loc_db, parser_result=None): + if parser_result: + e, start, stop = parser_result[self.parser] + if e is None: + return None, None + self.expr = e + if self.expr is None: + return None, None + return start, stop + try: + v, start, stop = next(self.parser.scanString(text)) + except StopIteration: + return None, None + self.expr = v[0] + if self.expr is None: + log.debug('cannot fromstring int %r', text) + return None, None + return start, stop + + def encode(self): + if not (isinstance(self.expr, ExprOp) and self.expr.op == 'segm'): + return + if not isinstance(self.expr.args[0], ExprInt): + return + if not isinstance(self.expr.args[1], ExprInt): + return + l = self.parent.v_opmode() + v = int(self.expr.args[0]) + mask = ((1 << self.l) - 1) + if v != sign_ext(v & mask, self.l, l): + return + self.value = swap_uint(self.l, v & ((1 << self.l) - 1)) + yield True + + def decode(self, v): + opmode = self.parent.v_opmode() + v = swap_uint(self.l, v) + self.value = v + v = ExprInt(v, 16) + self.expr = ExprOp('segm', v, self.parent.off.expr) + return True + + +d_rex_p = bs(l=0, cls=(bs_fbit,), fname="rex_p") +d_rex_w = bs(l=0, cls=(bs_fbit,), fname="rex_w") +d_rex_r = bs(l=0, cls=(bs_fbit,), fname="rex_r") +d_rex_x = bs(l=0, cls=(bs_fbit,), fname="rex_x") +d_rex_b = bs(l=0, cls=(bs_fbit,), fname="rex_b") + +d_g1 = bs(l=0, cls=(bs_fbit,), fname="g1") +d_g2 = bs(l=0, cls=(bs_fbit,), fname="g2") + + +d_cl1 = bs(l=1, cls=(bs_cl1,), fname="cl1") + + +w8 = bs(l=1, fname="w8") +se = bs(l=1, fname="se") + +sx = bs(l=0, fname="sx") +sxd = bs(l=0, fname="sx") + + +xmmreg = bs(l=0, fname="xmmreg") +mmreg = bs(l=0, fname="mmreg") + +pref_f2 = bs(l=0, fname="prefixed", default=b"\xf2") +pref_f3 = bs(l=0, fname="prefixed", default=b"\xf3") +pref_66 = bs(l=0, fname="prefixed", default=b"\x66") +no_xmm_pref = bs(l=0, fname="no_xmm_pref") + +no_rex = bs(l=0, fname="no_rex") + +sib_scale = bs(l=2, cls=(bs_cond_scale,), fname = "sib_scale") +sib_index = bs(l=3, cls=(bs_cond_index,), fname = "sib_index") +sib_base = bs(l=3, cls=(bs_cond_index,), fname = "sib_base") + +disp = bs(l=0, cls=(bs_cond_disp,), fname = "disp") + +s08 = bs(l=8, cls=(bs_s08, )) + +u08 = bs(l=8, cls=(x86_08, x86_arg)) +u07 = bs(l=7, cls=(x86_08, x86_arg)) +u16 = bs(l=16, cls=(x86_16, x86_arg)) +u32 = bs(l=32, cls=(x86_32, x86_arg)) +s3264 = bs(l=32, cls=(x86_s32to64, x86_arg)) + +u08_3 = bs(l=0, cls=(x86_imm_fix_08, x86_arg), ival = 3) + +d0 = bs("000", fname='reg') +d1 = bs("001", fname='reg') +d2 = bs("010", fname='reg') +d3 = bs("011", fname='reg') +d4 = bs("100", fname='reg') +d5 = bs("101", fname='reg') +d6 = bs("110", fname='reg') +d7 = bs("111", fname='reg') + +sd = bs(l=1, fname="sd") +wd = bs(l=1, fname="wd") + +stk = bs(l=0, fname="stk") + + +class field_size(object): + prio = default_prio + + def __init__(self, d=None): + if d is None: + d = {} + self.d = d + + def get(self, opm, adm=None): + return self.d[opm] + +class bs_mem(object): + def encode(self): + return self.value != 0b11 + + def decode(self, v): + self.value = v + return v != 0b11 + +d_imm64 = bs(l=0, fname="imm64") + +d_eax = bs(l=0, cls=(bs_eax, ), fname='eax') +d_edx = bs(l=0, cls=(bs_edx, ), fname='edx') +d_st = bs(l=0, cls=(x86_reg_st, ), fname='st') +d_imm = bs(l=0, cls=(bs_cond_imm,), fname="imm") +d_imm64 = bs(l=0, cls=(bs_cond_imm64,), fname="imm") +d_ax = bs(l=0, cls=(r_ax, ), fname='ax') +d_dx = bs(l=0, cls=(r_dx, ), fname='dx') +d_cl = bs(l=0, cls=(r_cl, ), fname='cl') + +d_cs = bs(l=0, cls=(bs_cs, ), fname='cs') +d_ds = bs(l=0, cls=(bs_ds, ), fname='ds') +d_es = bs(l=0, cls=(bs_es, ), fname='es') +d_ss = bs(l=0, cls=(bs_ss, ), fname='ss') +d_fs = bs(l=0, cls=(bs_fs, ), fname='fs') +d_gs = bs(l=0, cls=(bs_gs, ), fname='gs') + +# Offset must be decoded in last position to have final instruction len +rel_off = bs(l=0, cls=(bs_rel_off,), fname="off", order=-1) +# Offset must be decoded in last position to have final instruction len +rel_off08 = bs(l=8, cls=(bs_rel_off08,), fname="off", order=-1) +moff = bs(l=0, cls=(bs_moff,), fname="off") +msegoff = bs(l=16, cls=(bs_msegoff,), fname="mseg") +movoff = bs(l=0, cls=(bs_movoff,), fname="off") +mod = bs(l=2, fname="mod") +mod_mem = bs(l=2, cls=(bs_mem,), fname="mod") + +rmreg = bs(l=3, cls=(x86_rm_reg, ), order =1, fname = "reg") +reg = bs(l=3, cls=(x86_reg, ), order =1, fname = "reg") + +reg_modrm = bs(l=3, cls=(x86_reg_modrm, ), order =1, fname = "reg") + + +regnoarg = bs(l=3, default_val="000", order=1, fname="reg") +segm = bs(l=3, cls=(x86_rm_segm, ), order =1, fname = "reg") +crreg = bs(l=3, cls=(x86_rm_cr, ), order =1, fname = "reg") +drreg = bs(l=3, cls=(x86_rm_dr, ), order =1, fname = "reg") + + +mm_reg = bs(l=3, cls=(x86_rm_reg_mm, ), order =1, fname = "reg") +xmm_reg = bs(l=3, cls=(x86_rm_reg_xmm, ), order =1, fname = "reg") +bnd_reg = bs(l=3, cls=(x86_rm_reg_bnd, ), order =1, fname = "reg") + + +fltreg = bs(l=3, cls=(x86_rm_flt, ), order =1, fname = "reg") + +rm = bs(l=3, fname="rm") + +rm_arg = bs(l=0, cls=(x86_rm_arg,), fname='rmarg') +rm_arg_w8 = bs(l=0, cls=(x86_rm_w8,), fname='rmarg') +rm_arg_sx = bs(l=0, cls=(x86_rm_sx,), fname='rmarg') +rm_arg_sxd = bs(l=0, cls=(x86_rm_sxd,), fname='rmarg') +rm_arg_sd = bs(l=0, cls=(x86_rm_sd,), fname='rmarg') +rm_arg_wd = bs(l=0, cls=(x86_rm_wd,), fname='rmarg') +rm_arg_08 = bs(l=0, cls=(x86_rm_08,), fname='rmarg') +rm_arg_reg_m08 = bs(l=0, cls=(x86_rm_reg_m08,), fname='rmarg') +rm_arg_reg_m16 = bs(l=0, cls=(x86_rm_reg_m16,), fname='rmarg') +rm_arg_m08 = bs(l=0, cls=(x86_rm_m08,), fname='rmarg') +rm_arg_m64 = bs(l=0, cls=(x86_rm_m64,), fname='rmarg') +rm_arg_m80 = bs(l=0, cls=(x86_rm_m80,), fname='rmarg') +rm_arg_m16 = bs(l=0, cls=(x86_rm_m16,), fname='rmarg') + +rm_mem = bs(l=0, cls=(x86_rm_mem,), fname='rmarg') +rm_mem_far = bs(l=0, cls=(x86_rm_mem_far,), fname='rmarg') + +rm_arg_mm = bs(l=0, cls=(x86_rm_mm,), fname='rmarg') +rm_arg_mm_m64 = bs(l=0, cls=(x86_rm_mm_m64,), fname='rmarg') +rm_arg_mm_reg = bs(l=0, cls=(x86_rm_mm_reg,), fname='rmarg') + +rm_arg_xmm = bs(l=0, cls=(x86_rm_xmm,), fname='rmarg') +rm_arg_xmm_m32 = bs(l=0, cls=(x86_rm_xmm_m32,), fname='rmarg') +rm_arg_xmm_m64 = bs(l=0, cls=(x86_rm_xmm_m64,), fname='rmarg') +rm_arg_xmm_m128 = bs(l=0, cls=(x86_rm_xmm_m128,), fname='rmarg') +rm_arg_xmm_reg = bs(l=0, cls=(x86_rm_xmm_reg,), fname='rmarg') + +rm_arg_bnd = bs(l=0, cls=(x86_rm_bnd,), fname='rmarg') +rm_arg_bnd_m64 = bs(l=0, cls=(x86_rm_bnd_m64,), fname='rmarg') +rm_arg_bnd_m128 = bs(l=0, cls=(x86_rm_bnd_m128,), fname='rmarg') +rm_arg_bnd_reg = bs(l=0, cls=(x86_rm_bnd_reg,), fname='rmarg') + + +swapargs = bs_swapargs(l=1, fname="swap", mn_mod=list(range(1 << 1))) + + +class bs_op_mode(bsi): + + def decode(self, v): + opmode = self.parent.v_opmode() + return opmode == self.mode + + +class bs_ad_mode(bsi): + + def decode(self, v): + admode = self.parent.v_admode() + return admode == self.mode + + +class bs_op_mode_no64(bsi): + + def encode(self): + if self.parent.mode == 64: + return False + return super(bs_op_mode_no64, self).encode() + + def decode(self, v): + if self.parent.mode == 64: + return False + opmode = self.parent.v_opmode() + return opmode == self.mode + + +class bs_op_mode64(bsi): + def encode(self): + if self.parent.mode != 64: + return False + return super(bs_op_mode64, self).encode() + + def decode(self, v): + if self.parent.mode != 64: + return False + return True + +class bs_op_modeno64(bsi): + def encode(self): + if self.parent.mode == 64: + return False + return super(bs_op_modeno64, self).encode() + + def decode(self, v): + if self.parent.mode == 64: + return False + return True + + + +bs_opmode16 = bs(l=0, cls=(bs_op_mode,), mode = 16, fname="fopmode") +bs_opmode32 = bs(l=0, cls=(bs_op_mode,), mode = 32, fname="fopmode") +bs_opmode64 = bs(l=0, cls=(bs_op_mode,), mode = 64, fname="fopmode") + + +bs_admode16 = bs(l=0, cls=(bs_ad_mode,), mode = 16, fname="fadmode") +bs_admode32 = bs(l=0, cls=(bs_ad_mode,), mode = 32, fname="fadmode") +bs_admode64 = bs(l=0, cls=(bs_ad_mode,), mode = 64, fname="fadmode") + +bs_opmode16_no64 = bs(l=0, cls=(bs_op_mode_no64,), mode = 16, fname="fopmode") +bs_opmode32_no64 = bs(l=0, cls=(bs_op_mode_no64,), mode = 32, fname="fopmode") + +bs_mode64 = bs(l=0, cls=(bs_op_mode64,)) +bs_modeno64 = bs(l=0, cls=(bs_op_modeno64,)) + + +cond_list = ["O", "NO", "B", "AE", + "Z", "NZ", "BE", "A", + "S", "NS", "PE", "NP", + #"L", "NL", "NG", "G"] + "L", "GE", "LE", "G"] +cond = bs_mod_name(l=4, fname='cond', mn_mod=cond_list) + + +def rmmod(r, rm_arg_x=rm_arg, modrm=mod): + return [modrm, r, rm, sib_scale, sib_index, sib_base, disp, rm_arg_x] + +# +# mode | reg | rm # +# + +# +# scale | index | base # +# + +# +# Prefix | REX prefix | Opcode | mod/rm | sib | displacement | immediate # +# + + +def addop(name, fields, args=None, alias=False): + dct = {"fields": fields} + dct["alias"] = alias + if args is not None: + dct['args'] = args + type(name, (mn_x86,), dct) +""" +class ia32_aaa(mn_x86): + fields = [bs8(0x37)] +""" +addop("aaa", [bs8(0x37)]) +addop("aas", [bs8(0x3F)]) +addop("aad", [bs8(0xd5), u08]) +addop("aam", [bs8(0xd4), u08]) + +addop("adc", [bs("0001010"), w8, d_eax, d_imm]) +addop("adc", [bs("100000"), se, w8] + rmmod(d2, rm_arg_w8) + [d_imm]) +addop("adc", [bs("000100"), swapargs, w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) + +addop("add", [bs("0000010"), w8, d_eax, d_imm]) +addop("add", [bs("100000"), se, w8] + rmmod(d0, rm_arg_w8) + [d_imm]) +addop("add", [bs("000000"), swapargs, w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) + +addop("and", [bs("0010010"), w8, d_eax, d_imm]) +addop("and", [bs("100000"), se, w8] + rmmod(d4, rm_arg_w8) + [d_imm]) +addop("and", [bs("001000"), swapargs, w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) + +addop("bndmov", [bs8(0x0f), bs8(0x1a), pref_66, bs_modeno64] + + rmmod(bnd_reg, rm_arg_bnd_m64), [bnd_reg, rm_arg_bnd_m64]) +addop("bndmov", [bs8(0x0f), bs8(0x1a), pref_66, bs_mode64] + + rmmod(bnd_reg, rm_arg_bnd_m128), [bnd_reg, rm_arg_bnd_m128]) +addop("bndmov", [bs8(0x0f), bs8(0x1b), pref_66, bs_modeno64] + + rmmod(bnd_reg, rm_arg_bnd_m64), [rm_arg_bnd_m64, bnd_reg]) +addop("bndmov", [bs8(0x0f), bs8(0x1b), pref_66, bs_mode64] + + rmmod(bnd_reg, rm_arg_bnd_m128), [rm_arg_bnd_m128, bnd_reg]) + + + +addop("bsf", [bs8(0x0f), bs8(0xbc)] + rmmod(rmreg)) +addop("bsr", [bs8(0x0f), bs8(0xbd), mod, + rmreg, rm, sib_scale, sib_index, sib_base, disp, rm_arg]) + +addop("bswap", [bs8(0x0f), bs('11001'), reg]) + +addop("bt", [bs8(0x0f), bs8(0xa3)] + rmmod(rmreg), [rm_arg, rmreg]) +addop("bt", [bs8(0x0f), bs8(0xba)] + rmmod(d4) + [u08]) +addop("btc", [bs8(0x0f), bs8(0xbb)] + rmmod(rmreg), [rm_arg, rmreg]) +addop("btc", [bs8(0x0f), bs8(0xba)] + rmmod(d7) + [u08]) + + +addop("btr", [bs8(0x0f), bs8(0xb3)] + rmmod(rmreg), [rm_arg, rmreg]) +addop("btr", [bs8(0x0f), bs8(0xba)] + rmmod(d6) + [u08]) +addop("bts", [bs8(0x0f), bs8(0xab)] + rmmod(rmreg), [rm_arg, rmreg]) +addop("bts", [bs8(0x0f), bs8(0xba)] + rmmod(d5) + [u08]) + +addop("call", [bs8(0xe8), rel_off]) +addop("call", [bs8(0xff), stk] + rmmod(d2)) +addop("call", [bs8(0xff), stk] + rmmod(d3, rm_arg_x=rm_mem_far, modrm=mod_mem)) +addop("call", [bs8(0x9a), bs_modeno64, moff, msegoff]) + + +addop("cbw", [bs8(0x98), bs_opmode16]) +addop("cwde", [bs8(0x98), bs_opmode32]) +addop("cdqe", [bs8(0x98), bs_opmode64]) + +addop("clc", [bs8(0xf8)]) +addop("cld", [bs8(0xfc)]) +addop("cli", [bs8(0xfa)]) +addop("clts", [bs8(0x0f), bs8(0x06)]) +addop("cmc", [bs8(0xf5)]) + +addop("cmov", [bs8(0x0f), bs('0100'), cond] + rmmod(rmreg)) + +addop("cmp", [bs("0011110"), w8, d_eax, d_imm]) +addop("cmp", [bs("100000"), se, w8] + rmmod(d7, rm_arg_w8) + [d_imm]) +addop("cmp", [bs("001110"), swapargs, w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) + + +addop("cmpsb", [bs8(0xa6)]) +addop("cmpsw", [bs8(0xa7), bs_opmode16]) +addop("cmpsd", [bs8(0xa7), bs_opmode32]) +addop("cmpsq", [bs8(0xa7), bs_opmode64]) + +addop("cmpxchg", [bs8(0x0f), bs('1011000'), w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) +addop("cmpxchg8b", [bs8(0x0f), bs8(0xc7), bs_opmode16] + rmmod(d1, rm_arg_m64)) +addop("cmpxchg8b", [bs8(0x0f), bs8(0xc7), bs_opmode32] + rmmod(d1, rm_arg_m64)) +addop("cmpxchg16b", [bs8(0x0f), bs8(0xc7), bs_opmode64] + rmmod(d1, rm_arg_xmm_m128)) + +# XXX TODO CMPXCHG8/16 + +addop("comiss", [bs8(0x0f), bs8(0x2f), no_xmm_pref] + + rmmod(xmm_reg, rm_arg_xmm_m32), [xmm_reg, rm_arg_xmm_m32]) +addop("comisd", [bs8(0x0f), bs8(0x2f), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m64), [xmm_reg, rm_arg_xmm_m64]) + +addop("cpuid", [bs8(0x0f), bs8(0xa2)]) + +addop("cwd", [bs8(0x99), bs_opmode16]) +addop("cdq", [bs8(0x99), bs_opmode32]) +addop("cqo", [bs8(0x99), bs_opmode64]) + + +addop("daa", [bs8(0x27)]) +addop("das", [bs8(0x2f)]) +addop("dec", [bs('1111111'), w8] + rmmod(d1, rm_arg_w8)) +addop("dec", [bs('01001'), reg, bs_modeno64]) +addop("div", [bs('1111011'), w8] + rmmod(d6, rm_arg_w8)) +addop("enter", [bs8(0xc8), u16, u08]) + +# float ##### +addop("fwait", [bs8(0x9b)]) + +addop("f2xm1", [bs8(0xd9), bs8(0xf0)]) +addop("fabs", [bs8(0xd9), bs8(0xe1)]) + +addop("fadd", [bs("11011"), sd, bs("00")] + rmmod(d0, rm_arg_sd)) +addop("fadd", [bs("11011"), swapargs, bs("00"), + bs("11000"), d_st, fltreg], [d_st, fltreg]) +addop("faddp", [bs8(0xde), bs("11000"), fltreg, d_st]) +addop("fiadd", [bs("11011"), wd, bs("10")] + rmmod(d0, rm_arg_wd)) + +addop("fbld", [bs8(0xdf)] + rmmod(d4, rm_arg_m80)) +addop("fbldp", [bs8(0xdf)] + rmmod(d6, rm_arg_m80)) +addop("fchs", [bs8(0xd9), bs8(0xe0)]) +# addop("fclex", [bs8(0x9b), bs8(0xdb), bs8(0xe2)]) +addop("fnclex", [bs8(0xdb), bs8(0xe2)]) + +addop("fcmovb", [bs8(0xda), bs("11000"), d_st, fltreg]) +addop("fcmove", [bs8(0xda), bs("11001"), d_st, fltreg]) +addop("fcmovbe", [bs8(0xda), bs("11010"), d_st, fltreg]) +addop("fcmovu", [bs8(0xda), bs("11011"), d_st, fltreg]) +addop("fcmovnb", [bs8(0xdb), bs("11000"), d_st, fltreg]) +addop("fcmovne", [bs8(0xdb), bs("11001"), d_st, fltreg]) +addop("fcmovnbe", [bs8(0xdb), bs("11010"), d_st, fltreg]) +addop("fcmovnu", [bs8(0xdb), bs("11011"), d_st, fltreg]) + +addop("fcom", [bs("11011"), sd, bs("00")] + rmmod(d2, rm_arg_sd)) +addop("fcom", [bs("11011"), swapargs, bs("00"), + bs("11010"), d_st, fltreg], [d_st, fltreg]) +addop("fcomp", [bs("11011"), sd, bs("00")] + rmmod(d3, rm_arg_sd)) +addop("fcomp", + [bs("11011"), swapargs, bs("00"), bs("11011"), + d_st, fltreg], [d_st, fltreg]) +addop("fcompp", [bs8(0xde), bs8(0xd9)]) + +addop("fcomi", [bs8(0xdb), bs("11110"), d_st, fltreg]) +addop("fcomip", [bs8(0xdf), bs("11110"), d_st, fltreg]) +addop("fucomi", [bs8(0xdb), bs("11101"), d_st, fltreg]) +addop("fucomip", [bs8(0xdf), bs("11101"), d_st, fltreg]) + +addop("fcos", [bs8(0xd9), bs8(0xff)]) +addop("fdecstp", [bs8(0xd9), bs8(0xf6)]) + + +addop("fdiv", [bs("11011"), sd, bs("00")] + rmmod(d6, rm_arg_sd)) +addop("fdiv", [bs8(0xd8), bs("11110"), d_st, fltreg]) +addop("fdiv", [bs8(0xdc), bs("11111"), fltreg, d_st]) +addop("fdivp", [bs8(0xde), bs("11111"), fltreg, d_st]) +addop("fidiv", [bs("11011"), wd, bs("10")] + rmmod(d6, rm_arg_wd)) + +addop("fdivr", [bs("11011"), sd, bs("00")] + rmmod(d7, rm_arg_sd)) +addop("fdivr", [bs8(0xd8), bs("11111"), d_st, fltreg]) +addop("fdivr", [bs8(0xdc), bs("11110"), fltreg, d_st]) +addop("fdivrp", [bs8(0xde), bs("11110"), fltreg, d_st]) +addop("fidivr", [bs("11011"), wd, bs("10")] + rmmod(d7, rm_arg_wd)) + +addop("ffree", [bs8(0xdd), bs("11000"), fltreg]) +addop("ficom", [bs("11011"), wd, bs("10")] + rmmod(d2, rm_arg_wd)) +addop("ficomp", [bs("11011"), wd, bs("10")] + rmmod(d3, rm_arg_wd)) +addop("fild", [bs("11011"), wd, bs("11")] + rmmod(d0, rm_arg_wd)) +addop("fild", [bs8(0xdf)] + rmmod(d5, rm_arg_m64)) + +addop("fincstp", [bs8(0xd9), bs8(0xf7)]) + +# addop("finit", [bs8(0x9b), bs8(0xdb), bs8(0xe3)]) +addop("fninit", [bs8(0xdb), bs8(0xe3)]) + +addop("fist", [bs("11011"), wd, bs("11")] + rmmod(d2, rm_arg_wd)) +addop("fistp", [bs("11011"), wd, bs("11")] + rmmod(d3, rm_arg_wd)) +addop("fistp", [bs8(0xdf)] + rmmod(d7, rm_arg_m64)) + +addop("fisttp", [bs("11011"), wd, bs("11")] + rmmod(d1, rm_arg_wd)) +addop("fisttp", [bs8(0xdd)] + rmmod(d1, rm_arg_m64)) + +addop("fld", [bs("11011"), sd, bs("01")] + rmmod(d0, rm_arg_sd)) +addop("fld", [bs8(0xdb)] + rmmod(d5, rm_arg_m80)) +addop("fld", [bs8(0xd9), bs("11000"), fltreg]) + +addop("fld1", [bs8(0xd9), bs8(0xe8)]) +addop("fldl2t", [bs8(0xd9), bs8(0xe9)]) +addop("fldl2e", [bs8(0xd9), bs8(0xea)]) +addop("fldpi", [bs8(0xd9), bs8(0xeb)]) +addop("fldlg2", [bs8(0xd9), bs8(0xec)]) +addop("fldln2", [bs8(0xd9), bs8(0xed)]) +addop("fldz", [bs8(0xd9), bs8(0xee)]) + +addop("fldcw", [bs8(0xd9)] + rmmod(d5, rm_arg_m16)) +addop("fldenv", [bs8(0xd9)] + rmmod(d4, rm_arg_m80)) # XXX TODO: m14? + +addop("fmul", [bs("11011"), sd, bs("00")] + rmmod(d1, rm_arg_sd)) +addop("fmul", [bs("11011"), swapargs, bs("00"), + bs("11001"), d_st, fltreg], [d_st, fltreg]) +addop("fmulp", [bs8(0xde), bs("11001"), fltreg, d_st]) +addop("fimul", [bs("11011"), wd, bs("10")] + rmmod(d1, rm_arg_wd)) + +addop("fnop", [bs8(0xd9), bs8(0xd0)]) +addop("fpatan", [bs8(0xd9), bs8(0xf3)]) +addop("fprem", [bs8(0xd9), bs8(0xf8)]) +addop("fprem1", [bs8(0xd9), bs8(0xf5)]) +addop("fptan", [bs8(0xd9), bs8(0xf2)]) +addop("frndint", [bs8(0xd9), bs8(0xfc)]) +addop("frstor", [bs8(0xdd)] + rmmod(d4, rm_arg_m80)) # XXX TODO: m94 ? +# addop("fsave", [bs8(0x9b), bs8(0xdd)] + rmmod(d6, rm_arg_m80)) # XXX +# TODO: m94 ? +addop("fnsave", [bs8(0xdd)] + rmmod(d6, rm_arg_m80)) # XXX TODO: m94 ? + +addop("fscale", [bs8(0xd9), bs8(0xfd)]) +addop("fsin", [bs8(0xd9), bs8(0xfe)]) +addop("fsincos", [bs8(0xd9), bs8(0xfb)]) +addop("fsqrt", [bs8(0xd9), bs8(0xfa)]) + +addop("fst", [bs("11011"), sd, bs("01")] + rmmod(d2, rm_arg_sd)) +addop("fst", [bs8(0xdd), bs("11010"), fltreg]) +addop("fstp", [bs("11011"), sd, bs("01")] + rmmod(d3, rm_arg_sd)) +addop("fstp", [bs8(0xdb)] + rmmod(d7, rm_arg_m80)) +addop("fstp", [bs8(0xdd), bs("11011"), fltreg]) + +# addop("fstcw", [bs8(0x9b), bs8(0xd9)] + rmmod(d7, rm_arg_m16)) +addop("fnstcw", [bs8(0xd9)] + rmmod(d7, rm_arg_m16)) +# addop("fstenv", [bs8(0x9b), bs8(0xd9)] + rmmod(d6, rm_arg_m80)) # XXX +# TODO: m14? +addop("fnstenv", [bs8(0xd9)] + rmmod(d6, rm_arg_m80)) # XXX TODO: m14? +# addop("fstsw", [bs8(0x9b), bs8(0xdd)] + rmmod(d7, rm_arg_m16)) +addop("fnstsw", [bs8(0xdd)] + rmmod(d7, rm_arg_m16)) +# addop("fstsw", [bs8(0x9b), bs8(0xdf), bs8(0xe0), d_ax]) +addop("fnstsw", [bs8(0xdf), bs8(0xe0), d_ax]) + +addop("fsub", [bs("11011"), sd, bs("00")] + rmmod(d4, rm_arg_sd)) +addop("fsub", [bs8(0xd8), bs("11100"), d_st, fltreg]) +addop("fsub", [bs8(0xdc), bs("11101"), fltreg, d_st]) +addop("fsubp", [bs8(0xde), bs("11101"), fltreg, d_st]) +addop("fisub", [bs("11011"), wd, bs("10")] + rmmod(d4, rm_arg_wd)) + +addop("fsubr", [bs("11011"), sd, bs("00")] + rmmod(d5, rm_arg_sd)) +addop("fsubr", [bs8(0xd8), bs("11101"), d_st, fltreg]) +addop("fsubr", [bs8(0xdc), bs("11100"), fltreg, d_st]) +addop("fsubrp", [bs8(0xde), bs("11100"), fltreg, d_st]) +addop("fisubr", [bs("11011"), wd, bs("10")] + rmmod(d5, rm_arg_wd)) +addop("ftst", [bs8(0xd9), bs8(0xe4)]) + + +addop("fucom", [bs8(0xdd), bs("11100"), fltreg]) +addop("fucomp", [bs8(0xdd), bs("11101"), fltreg]) +addop("fucompp", [bs8(0xda), bs8(0xe9)]) + +addop("fxam", [bs8(0xd9), bs8(0xe5)]) +addop("fxch", [bs8(0xd9), bs("11001"), fltreg]) +addop("fxrstor", [bs8(0x0f), bs8(0xae)] + + rmmod(d1, rm_arg_m80)) # XXX TODO m512 +addop("fxsave", [bs8(0x0f), bs8(0xae)] + + rmmod(d0, rm_arg_m80)) # XXX TODO m512 +addop("stmxcsr", [bs8(0x0f), bs8(0xae)] + rmmod(d3)) +addop("ldmxcsr", [bs8(0x0f), bs8(0xae)] + rmmod(d2)) + +addop("fxtract", [bs8(0xd9), bs8(0xf4)]) +addop("fyl2x", [bs8(0xd9), bs8(0xf1)]) +addop("fyl2xp1", [bs8(0xd9), bs8(0xf9)]) + +addop("hlt", [bs8(0xf4)]) +addop("icebp", [bs8(0xf1)]) + +addop("idiv", [bs('1111011'), w8] + rmmod(d7, rm_arg_w8)) + +addop("imul", [bs('1111011'), w8] + rmmod(d5, rm_arg_w8)) +addop("imul", [bs8(0x0f), bs8(0xaf)] + rmmod(rmreg)) + +addop("imul", [bs("011010"), se, bs('1')] + rmmod(rmreg) + [d_imm]) + +addop("in", [bs("1110010"), w8, d_eax, u08]) +addop("in", [bs("1110110"), w8, d_eax, d_edx]) + +addop("inc", [bs('1111111'), w8] + rmmod(d0, rm_arg_w8)) +addop("inc", [bs('01000'), reg, bs_modeno64]) + +addop("insb", [bs8(0x6c)]) +addop("insw", [bs8(0x6d), bs_opmode16]) +addop("insd", [bs8(0x6d), bs_opmode32]) +addop("insd", [bs8(0x6d), bs_opmode64]) + +addop("int", [bs8(0xcc), u08_3]) +addop("int", [bs8(0xcd), u08]) +addop("into", [bs8(0xce)]) +addop("invd", [bs8(0x0f), bs8(0x08)]) +addop("invlpg", [bs8(0x0f), bs8(0x01)] + rmmod(d7)) + +addop("iret", [bs8(0xcf), bs_opmode16]) +addop("iretd", [bs8(0xcf), bs_opmode32]) +addop("iretq", [bs8(0xcf), bs_opmode64]) + +addop("j", [bs('0111'), cond, rel_off08]) + +addop("jcxz", [bs8(0xe3), rel_off08, bs_admode16]) +addop("jecxz", [bs8(0xe3), rel_off08, bs_admode32]) +addop("jrcxz", [bs8(0xe3), rel_off08, bs_admode64]) + +addop("j", [bs8(0x0f), bs('1000'), cond, rel_off]) +addop("jmp", [bs8(0xeb), rel_off08]) +addop("jmp", [bs8(0xe9), rel_off]) +# TODO XXX replace stk force64? +addop("jmp", [bs8(0xff), stk] + rmmod(d4)) +addop("jmp", [bs8(0xea), bs_modeno64, moff, msegoff]) + +addop("jmp", [bs8(0xff)] + rmmod(d5, rm_arg_x=rm_mem_far, modrm=mod_mem)) + +addop("lahf", [bs8(0x9f)]) +addop("lar", [bs8(0x0f), bs8(0x02)] + rmmod(rmreg)) + +addop("lea", [bs8(0x8d)] + rmmod(rmreg, rm_arg_x=rm_mem, modrm=mod_mem)) +addop("les", [bs8(0xc4)] + rmmod(rmreg, rm_arg_x=rm_mem, modrm=mod_mem)) +addop("lds", [bs8(0xc5)] + rmmod(rmreg, rm_arg_x=rm_mem, modrm=mod_mem)) +addop("lss", [bs8(0x0f), bs8(0xb2)] + rmmod(rmreg, rm_arg_x=rm_mem, modrm=mod_mem)) +addop("lfs", [bs8(0x0f), bs8(0xb4)] + rmmod(rmreg, rm_arg_x=rm_mem, modrm=mod_mem)) +addop("lgs", [bs8(0x0f), bs8(0xb5)] + rmmod(rmreg, rm_arg_x=rm_mem, modrm=mod_mem)) + +addop("lgdt", [bs8(0x0f), bs8(0x01)] + rmmod(d2, modrm=mod_mem)) +addop("lidt", [bs8(0x0f), bs8(0x01)] + rmmod(d3, modrm=mod_mem)) + +addop("lfence", [bs8(0x0f), bs8(0xae), bs8(0xe8)]) +addop("mfence", [bs8(0x0f), bs8(0xae), bs8(0xf0)]) +addop("sfence", [bs8(0x0f), bs8(0xae), bs8(0xf8)]) + +addop("leave", [bs8(0xc9), stk]) + +addop("lodsb", [bs8(0xac)]) +addop("lodsw", [bs8(0xad), bs_opmode16]) +addop("lodsd", [bs8(0xad), bs_opmode32]) +addop("lodsq", [bs8(0xad), bs_opmode64]) + +addop("loop", [bs8(0xe2), rel_off08]) +addop("loope", [bs8(0xe1), rel_off08]) +addop("loopne", [bs8(0xe0), rel_off08]) +addop("lsl", [bs8(0x0f), bs8(0x03)] + rmmod(rmreg)) +addop("monitor", [bs8(0x0f), bs8(0x01), bs8(0xc8)]) + +addop("mov", [bs("100010"), swapargs, w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) +addop("mov", [bs("100011"), swapargs, bs('0')] + rmmod(segm), [rm_arg, segm]) +addop("mov", [bs("101000"), swapargs, w8, d_eax, movoff], [d_eax, movoff]) +addop("mov", [bs("1011"), w8, reg, d_imm64]) +addop("mov", [bs("1100011"), w8] + rmmod(d0, rm_arg_w8) + [d_imm]) +addop("mov", [bs8(0x0f), bs("001000"), swapargs, bs('0')] + + rmmod(crreg), [rm_arg, crreg]) +addop("mov", [bs8(0x0f), bs("001000"), swapargs, bs('1')] + + rmmod(drreg), [rm_arg, drreg]) +addop("movsb", [bs8(0xa4)]) +addop("movsw", [bs8(0xa5), bs_opmode16]) +addop("movsd", [bs8(0xa5), bs_opmode32]) +addop("movsq", [bs8(0xa5), bs_opmode64]) + +addop("movsx", [bs8(0x0f), bs("1011111"), w8, sx] + rmmod(rmreg, rm_arg_sx)) +addop("movsxd", [bs8(0x63), sxd, bs_mode64] + rmmod(rmreg, rm_arg_sxd)) + +addop("movups", [bs8(0x0f), bs("0001000"), swapargs, no_xmm_pref] + + rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm]) +addop("movsd", [bs8(0x0f), bs("0001000"), swapargs, pref_f2] + + rmmod(xmm_reg, rm_arg_xmm_m64), [xmm_reg, rm_arg_xmm_m64]) +addop("movss", [bs8(0x0f), bs("0001000"), swapargs, pref_f3] + + rmmod(xmm_reg, rm_arg_xmm_m32), [xmm_reg, rm_arg_xmm_m32]) +addop("movupd", [bs8(0x0f), bs8(0x10), pref_66] + rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm]) +addop("movupd", [bs8(0x0f), bs8(0x11), pref_66] + rmmod(xmm_reg, rm_arg_xmm), [rm_arg_xmm, xmm_reg]) + + +addop("movd", [bs8(0x0f), bs('011'), swapargs, bs('1110'), no_xmm_pref] + + rmmod(mm_reg, rm_arg), [mm_reg, rm_arg]) +addop("movd", [bs8(0x0f), bs('011'), swapargs, bs('1110'), pref_66, bs_opmode32] + + rmmod(xmm_reg, rm_arg), [xmm_reg, rm_arg]) +addop("movq", [bs8(0x0f), bs('011'), swapargs, bs('1110'), pref_66, bs_opmode64] + + rmmod(xmm_reg, rm_arg), [xmm_reg, rm_arg]) + +addop("movq", [bs8(0x0f), bs('011'), swapargs, bs('1111'), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64), [mm_reg, rm_arg_mm_m64]) + +addop("movq", [bs8(0x0f), bs8(0x7e), pref_f3] + + rmmod(xmm_reg, rm_arg_xmm_m64), [xmm_reg, rm_arg_xmm_m64]) +addop("movq", [bs8(0x0f), bs8(0xd6), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m64), [rm_arg_xmm_m64, xmm_reg]) + +addop("movmskps", [bs8(0x0f), bs8(0x50), no_xmm_pref] + + rmmod(reg_modrm, rm_arg_xmm_reg)) +addop("movmskpd", [bs8(0x0f), bs8(0x50), pref_66] + + rmmod(reg_modrm, rm_arg_xmm_reg)) + +addop("addss", [bs8(0x0f), bs8(0x58), pref_f3] + rmmod(xmm_reg, rm_arg_xmm_m32)) +addop("addsd", [bs8(0x0f), bs8(0x58), pref_f2] + rmmod(xmm_reg, rm_arg_xmm_m64)) + +addop("subss", [bs8(0x0f), bs8(0x5c), pref_f3] + rmmod(xmm_reg, rm_arg_xmm_m32)) +addop("subsd", [bs8(0x0f), bs8(0x5c), pref_f2] + rmmod(xmm_reg, rm_arg_xmm_m64)) + +addop("mulss", [bs8(0x0f), bs8(0x59), pref_f3] + rmmod(xmm_reg, rm_arg_xmm_m32)) +addop("mulsd", [bs8(0x0f), bs8(0x59), pref_f2] + rmmod(xmm_reg, rm_arg_xmm_m64)) + +addop("divss", [bs8(0x0f), bs8(0x5e), pref_f3] + rmmod(xmm_reg, rm_arg_xmm_m32)) +addop("divsd", [bs8(0x0f), bs8(0x5e), pref_f2] + rmmod(xmm_reg, rm_arg_xmm_m64)) + + +addop("pminsw", [bs8(0x0f), bs8(0xea), no_xmm_pref] + rmmod(mm_reg, rm_arg_mm)) +addop("pminsw", [bs8(0x0f), bs8(0xea), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) + +addop("ucomiss", [bs8(0x0f), bs8(0x2e), no_xmm_pref] + rmmod(xmm_reg, rm_arg_xmm_m32)) +addop("ucomisd", [bs8(0x0f), bs8(0x2e), pref_66] + rmmod(xmm_reg, rm_arg_xmm_m64)) + + +addop("movzx", [bs8(0x0f), bs("1011011"), w8, sx] + rmmod(rmreg, rm_arg_sx)) +addop("mul", [bs('1111011'), w8] + rmmod(d4, rm_arg_w8)) + +addop("neg", [bs('1111011'), w8] + rmmod(d3, rm_arg_w8)) +addop("nop", [bs8(0x0f), bs8(0x1f)] + rmmod(d0, rm_arg)) # XXX TODO m512 +addop("nop", [bs8(0x0f), bs8(0x1f)] + rmmod(d1, rm_arg)) # XXX TODO m512 +addop("nop", [bs8(0x0f), bs8(0x1f)] + rmmod(d2, rm_arg)) # XXX TODO m512 +addop("nop", [bs8(0x0f), bs8(0x1f)] + rmmod(d3, rm_arg)) # XXX TODO m512 +addop("nop", [bs8(0x0f), bs8(0x1f)] + rmmod(d4, rm_arg)) # XXX TODO m512 +addop("nop", [bs8(0x0f), bs8(0x1f)] + rmmod(d5, rm_arg)) # XXX TODO m512 +addop("nop", [bs8(0x0f), bs8(0x1f)] + rmmod(d6, rm_arg)) # XXX TODO m512 +addop("nop", [bs8(0x0f), bs8(0x1f)] + rmmod(d7, rm_arg)) # XXX TODO m512 +addop("not", [bs('1111011'), w8] + rmmod(d2, rm_arg_w8)) +addop("or", [bs("0000110"), w8, d_eax, d_imm]) +addop("or", [bs("100000"), se, w8] + rmmod(d1, rm_arg_w8) + [d_imm]) +addop("or", [bs("000010"), swapargs, w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) +addop("out", [bs("1110011"), w8, u08, d_eax]) +addop("out", [bs("1110111"), w8, d_edx, d_eax]) + +addop("outsb", [bs8(0x6e)]) +addop("outsw", [bs8(0x6f), bs_opmode16]) +addop("outsd", [bs8(0x6f), bs_opmode32]) +addop("outsd", [bs8(0x6f), bs_opmode64]) + +addop("setalc", [bs8(0xD6)]) + +# addop("pause", [bs8(0xf3), bs8(0x90)]) + +addop("popw", [bs8(0x8f), stk, bs_opmode16] + rmmod(d0)) +addop("popw", [bs("01011"), stk, reg, bs_opmode16]) +addop("popw", [bs8(0x1f), stk, d_ds, bs_opmode16]) +addop("popw", [bs8(0x07), stk, d_es, bs_opmode16]) +addop("popw", [bs8(0x17), stk, d_ss, bs_opmode16]) +addop("popw", [bs8(0x0f), stk, bs8(0xa1), d_fs, bs_opmode16]) +addop("popw", [bs8(0x0f), stk, bs8(0xa9), d_gs, bs_opmode16]) + +addop("pop", [bs8(0x8f), stk, bs_opmode32] + rmmod(d0)) +addop("pop", [bs("01011"), stk, reg, bs_opmode32]) +addop("pop", [bs8(0x1f), stk, d_ds, bs_opmode32]) +addop("pop", [bs8(0x07), stk, d_es, bs_opmode32]) +addop("pop", [bs8(0x17), stk, d_ss, bs_opmode32]) +addop("pop", [bs8(0x0f), stk, bs8(0xa1), d_fs, bs_opmode32]) +addop("pop", [bs8(0x0f), stk, bs8(0xa9), d_gs, bs_opmode32]) + +addop("pop", [bs8(0x8f), stk, bs_opmode64] + rmmod(d0)) +addop("pop", [bs("01011"), stk, reg, bs_opmode64]) +addop("pop", [bs8(0x1f), stk, d_ds, bs_opmode64]) +addop("pop", [bs8(0x07), stk, d_es, bs_opmode64]) +addop("pop", [bs8(0x17), stk, d_ss, bs_opmode64]) +addop("pop", [bs8(0x0f), stk, bs8(0xa1), d_fs, bs_opmode64]) +addop("pop", [bs8(0x0f), stk, bs8(0xa9), d_gs, bs_opmode64]) + + +addop("popa", [bs8(0x61), stk, bs_opmode16]) +addop("popad", [bs8(0x61), stk, bs_opmode32]) + +addop("popfw", [bs8(0x9d), stk, bs_opmode16]) +addop("popfd", [bs8(0x9d), stk, bs_opmode32]) +addop("popfq", [bs8(0x9d), stk, bs_opmode64]) + +addop("prefetch0", [bs8(0x0f), bs8(0x18)] + rmmod(d1, rm_arg_m08)) +addop("prefetch1", [bs8(0x0f), bs8(0x18)] + rmmod(d2, rm_arg_m08)) +addop("prefetch2", [bs8(0x0f), bs8(0x18)] + rmmod(d3, rm_arg_m08)) +addop("prefetchnta", [bs8(0x0f), bs8(0x18)] + rmmod(d0, rm_arg_m08)) +addop("prefetchw", [bs8(0x0f), bs8(0x0d)] + rmmod(d1, rm_arg_m08)) + +addop("pushw", [bs8(0xff), stk, bs_opmode16] + rmmod(d6)) +addop("pushw", [bs("01010"), stk, reg, bs_opmode16]) +addop("pushw", [bs8(0x6a), s08, stk, bs_opmode16]) +addop("pushw", [bs8(0x68), d_imm, stk, bs_opmode16]) +addop("pushw", [bs8(0x0e), stk, d_cs, bs_opmode16]) +addop("pushw", [bs8(0x16), stk, d_ss, bs_opmode16]) +addop("pushw", [bs8(0x1e), stk, d_ds, bs_opmode16]) +addop("pushw", [bs8(0x06), stk, d_es, bs_opmode16]) +addop("pushw", [bs8(0x0f), stk, bs8(0xa0), d_fs, bs_opmode16]) +addop("pushw", [bs8(0x0f), stk, bs8(0xa8), d_gs, bs_opmode16]) + +addop("push", [bs8(0xff), stk, bs_opmode32] + rmmod(d6)) +addop("push", [bs("01010"), stk, reg, bs_opmode32]) +addop("push", [bs8(0x6a), s08, stk, bs_opmode32]) +addop("push", [bs8(0x68), d_imm, stk, bs_opmode32]) +addop("push", [bs8(0x0e), stk, d_cs, bs_opmode32]) +addop("push", [bs8(0x16), stk, d_ss, bs_opmode32]) +addop("push", [bs8(0x1e), stk, d_ds, bs_opmode32]) +addop("push", [bs8(0x06), stk, d_es, bs_opmode32]) +addop("push", [bs8(0x0f), stk, bs8(0xa0), d_fs, bs_opmode32]) +addop("push", [bs8(0x0f), stk, bs8(0xa8), d_gs, bs_opmode32]) + +addop("push", [bs8(0xff), stk, bs_opmode64] + rmmod(d6)) +addop("push", [bs("01010"), stk, reg, bs_opmode64]) +addop("push", [bs8(0x6a), s08, stk, bs_opmode64]) +addop("push", [bs8(0x68), d_imm, stk, bs_opmode64]) +addop("push", [bs8(0x0e), stk, d_cs, bs_opmode64]) +addop("push", [bs8(0x16), stk, d_ss, bs_opmode64]) +addop("push", [bs8(0x1e), stk, d_ds, bs_opmode64]) +addop("push", [bs8(0x06), stk, d_es, bs_opmode64]) +addop("push", [bs8(0x0f), stk, bs8(0xa0), d_fs, bs_opmode64]) +addop("push", [bs8(0x0f), stk, bs8(0xa8), d_gs, bs_opmode64]) + +addop("pusha", [bs8(0x60), stk, bs_opmode16_no64]) +addop("pushad", [bs8(0x60), stk, bs_opmode32_no64]) + + +addop("pushfw", [bs8(0x9c), stk, bs_opmode16]) +addop("pushfd", [bs8(0x9c), stk, bs_opmode32]) +addop("pushfq", [bs8(0x9c), stk, bs_opmode64]) + +addop("rcl", [bs('110100'), d_cl1, w8] + + rmmod(d2, rm_arg_w8), [rm_arg_w8, d_cl1]) +addop("rcl", [bs('1100000'), w8] + rmmod(d2, rm_arg_w8) + [u08]) +addop("rcr", [bs('110100'), d_cl1, w8] + + rmmod(d3, rm_arg_w8), [rm_arg_w8, d_cl1]) +addop("rcr", [bs('1100000'), w8] + rmmod(d3, rm_arg_w8) + [u08]) +addop("rol", [bs('110100'), d_cl1, w8] + + rmmod(d0, rm_arg_w8), [rm_arg_w8, d_cl1]) +addop("rol", [bs('1100000'), w8] + rmmod(d0, rm_arg_w8) + [u08]) +addop("ror", [bs('110100'), d_cl1, w8] + + rmmod(d1, rm_arg_w8), [rm_arg_w8, d_cl1]) +addop("ror", [bs('1100000'), w8] + rmmod(d1, rm_arg_w8) + [u08]) + +addop("rdmsr", [bs8(0x0f), bs8(0x32)]) +addop("rdpmc", [bs8(0x0f), bs8(0x33)]) +addop("rdtsc", [bs8(0x0f), bs8(0x31)]) +addop("ret", [bs8(0xc3), stk]) +addop("ret", [bs8(0xc2), stk, u16]) +addop("retf", [bs8(0xcb), stk]) +addop("retf", [bs8(0xca), stk, u16]) + +addop("rsm", [bs8(0x0f), bs8(0xaa)]) +addop("sahf", [bs8(0x9e)]) + +# XXX tipo in doc: /4 instead of /6 +addop("sal", [bs('110100'), d_cl1, w8] + + rmmod(d6, rm_arg_w8), [rm_arg_w8, d_cl1]) +addop("sal", [bs('1100000'), w8] + rmmod(d6, rm_arg_w8) + [u08]) +addop("sar", [bs('110100'), d_cl1, w8] + + rmmod(d7, rm_arg_w8), [rm_arg_w8, d_cl1]) +addop("sar", [bs('1100000'), w8] + rmmod(d7, rm_arg_w8) + [u08]) + +addop("scasb", [bs8(0xae)]) +addop("scasw", [bs8(0xaf), bs_opmode16]) +addop("scasd", [bs8(0xaf), bs_opmode32]) +addop("scasq", [bs8(0xaf), bs_opmode64]) + +addop("shl", [bs('110100'), d_cl1, w8] + + rmmod(d4, rm_arg_w8), [rm_arg_w8, d_cl1]) +addop("shl", [bs('1100000'), w8] + rmmod(d4, rm_arg_w8) + [u08]) +addop("shr", [bs('110100'), d_cl1, w8] + + rmmod(d5, rm_arg_w8), [rm_arg_w8, d_cl1]) +addop("shr", [bs('1100000'), w8] + rmmod(d5, rm_arg_w8) + [u08]) + +addop("sbb", [bs("0001110"), w8, d_eax, d_imm]) +addop("sbb", [bs("100000"), se, w8] + rmmod(d3, rm_arg_w8) + [d_imm]) +addop("sbb", [bs("000110"), swapargs, w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) + +addop("set", [bs8(0x0f), bs('1001'), cond] + rmmod(regnoarg, rm_arg_08)) +addop("sgdt", [bs8(0x0f), bs8(0x01)] + rmmod(d0, modrm=mod_mem)) +addop("shld", [bs8(0x0f), bs8(0xa4)] + + rmmod(rmreg) + [u08], [rm_arg, rmreg, u08]) +addop("shld", [bs8(0x0f), bs8(0xa5)] + + rmmod(rmreg) + [d_cl], [rm_arg, rmreg, d_cl]) +addop("shrd", [bs8(0x0f), bs8(0xac)] + + rmmod(rmreg) + [u08], [rm_arg, rmreg, u08]) +addop("shrd", [bs8(0x0f), bs8(0xad)] + + rmmod(rmreg) + [d_cl], [rm_arg, rmreg, d_cl]) +addop("sidt", [bs8(0x0f), bs8(0x01)] + rmmod(d1, modrm=mod_mem)) +addop("sldt", [bs8(0x0f), bs8(0x00)] + rmmod(d0, rm_arg_x=rm_arg_reg_m16)) +addop("smsw", [bs8(0x0f), bs8(0x01)] + rmmod(d4)) +addop("stc", [bs8(0xf9)]) +addop("std", [bs8(0xfd)]) +addop("sti", [bs8(0xfb)]) +addop("stosb", [bs8(0xaa)]) +addop("stosw", [bs8(0xab), bs_opmode16]) +addop("stosd", [bs8(0xab), bs_opmode32]) +addop("stosq", [bs8(0xab), bs_opmode64]) + +addop("str", [bs8(0x0f), bs8(0x00)] + rmmod(d1)) + +addop("sub", [bs("0010110"), w8, d_eax, d_imm]) +addop("sub", [bs("100000"), se, w8] + rmmod(d5, rm_arg_w8) + [d_imm]) +addop("sub", [bs("001010"), swapargs, w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) + +addop("syscall", [bs8(0x0f), bs8(0x05)]) +addop("sysenter", [bs8(0x0f), bs8(0x34)]) +addop("sysexit", [bs8(0x0f), bs8(0x35)]) +addop("sysret", [bs8(0x0f), bs8(0x07)]) +addop("test", [bs("1010100"), w8, d_eax, d_imm]) +addop("test", [bs("1111011"), w8] + rmmod(d0, rm_arg_w8) + [d_imm]) +addop("test", [bs("1000010"), w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) +addop("ud2", [bs8(0x0f), bs8(0x0b)]) +addop("verr", [bs8(0x0f), bs8(0x00)] + rmmod(d4)) +addop("verw", [bs8(0x0f), bs8(0x00)] + rmmod(d5)) +addop("wbinvd", [bs8(0x0f), bs8(0x09)]) +addop("wrmsr", [bs8(0x0f), bs8(0x30)]) +addop("xadd", [bs8(0x0f), bs("1100000"), w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) + +addop("nop", [bs8(0x90), no_rex], alias=True) + +addop("xchg", [bs('10010'), d_eax, reg]) +addop("xchg", [bs('1000011'), w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) +addop("xlat", [bs8(0xd7)]) + + +addop("xor", [bs("0011010"), w8, d_eax, d_imm]) +addop("xor", [bs("100000"), se, w8] + rmmod(d6, rm_arg_w8) + [d_imm]) +addop("xor", [bs("001100"), swapargs, w8] + + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) + + +addop("xgetbv", [bs8(0x0f), bs8(0x01), bs8(0xd0)]) + + + +#### MMX/SSE/AVX operations +#### Categories are the same than here: https://software.intel.com/sites/landingpage/IntrinsicsGuide/ +#### + +### Arithmetic (integers) +### + +## Move +# SSE +addop("movapd", [bs8(0x0f), bs("0010100"), swapargs] + + rmmod(xmm_reg, rm_arg_xmm) + [bs_opmode16], [xmm_reg, rm_arg_xmm]) +addop("movaps", [bs8(0x0f), bs("0010100"), swapargs] + + rmmod(xmm_reg, rm_arg_xmm_m128) + [bs_opmode32], [xmm_reg, rm_arg_xmm_m128]) +addop("movaps", [bs8(0x0f), bs("0010100"), swapargs] + + rmmod(xmm_reg, rm_arg_xmm_m128) + [bs_opmode64], [xmm_reg, rm_arg_xmm_m128]) +addop("movdqu", [bs8(0x0f), bs("011"), swapargs, bs("1111"), pref_f3] + + rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm]) +addop("movdqa", [bs8(0x0f), bs("011"), swapargs, bs("1111"), pref_66] + + rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm]) + +addop("movhpd", [bs8(0x0f), bs("0001011"), swapargs, pref_66] + + rmmod(xmm_reg, rm_arg_m64), [xmm_reg, rm_arg_m64]) +addop("movhps", [bs8(0x0f), bs("0001011"), swapargs, no_xmm_pref] + + rmmod(xmm_reg, rm_arg_m64), [xmm_reg, rm_arg_m64]) +addop("movlpd", [bs8(0x0f), bs("0001001"), swapargs, pref_66] + + rmmod(xmm_reg, rm_arg_m64), [xmm_reg, rm_arg_m64]) +addop("movlps", [bs8(0x0f), bs("0001001"), swapargs, no_xmm_pref] + + rmmod(xmm_reg, rm_arg_m64), [xmm_reg, rm_arg_m64]) + +addop("movhlps", [bs8(0x0f), bs8(0x12), no_xmm_pref] + + rmmod(xmm_reg, rm_arg_xmm_reg), [xmm_reg, rm_arg_xmm_reg]) +addop("movlhps", [bs8(0x0f), bs8(0x16), no_xmm_pref] + + rmmod(xmm_reg, rm_arg_xmm_reg), [xmm_reg, rm_arg_xmm_reg]) + +addop("movdq2q", [bs8(0x0f), bs8(0xd6), pref_f2] + + rmmod(mm_reg, rm_arg_xmm_reg), [mm_reg, rm_arg_xmm_reg]) +addop("movq2dq", [bs8(0x0f), bs8(0xd6), pref_f3] + + rmmod(xmm_reg, rm_arg_mm)) + +## Additions +# SSE +addop("paddb", [bs8(0x0f), bs8(0xfc), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) +addop("paddw", [bs8(0x0f), bs8(0xfd), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) +addop("paddd", [bs8(0x0f), bs8(0xfe), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) +addop("paddq", [bs8(0x0f), bs8(0xd4), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) + +addop("paddb", [bs8(0x0f), bs8(0xfc), no_xmm_pref] + rmmod(mm_reg, rm_arg_mm)) +addop("paddw", [bs8(0x0f), bs8(0xfd), no_xmm_pref] + rmmod(mm_reg, rm_arg_mm)) +addop("paddd", [bs8(0x0f), bs8(0xfe), no_xmm_pref] + rmmod(mm_reg, rm_arg_mm)) +addop("paddq", [bs8(0x0f), bs8(0xd4), no_xmm_pref] + rmmod(mm_reg, rm_arg_mm)) + +## Substractions +# SSE +addop("psubb", [bs8(0x0f), bs8(0xf8), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) +addop("psubw", [bs8(0x0f), bs8(0xf9), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) +addop("psubd", [bs8(0x0f), bs8(0xfa), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) +addop("psubq", [bs8(0x0f), bs8(0xfb), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) + +addop("psubb", [bs8(0x0f), bs8(0xf8), no_xmm_pref] + rmmod(mm_reg, rm_arg_mm)) +addop("psubw", [bs8(0x0f), bs8(0xf9), no_xmm_pref] + rmmod(mm_reg, rm_arg_mm)) +addop("psubd", [bs8(0x0f), bs8(0xfa), no_xmm_pref] + rmmod(mm_reg, rm_arg_mm)) +addop("psubq", [bs8(0x0f), bs8(0xfb), no_xmm_pref] + rmmod(mm_reg, rm_arg_mm)) + +### Arithmetic (floating-point) +### + +## Additions +# SSE +addop("addps", [bs8(0x0f), bs8(0x58), no_xmm_pref] + rmmod(xmm_reg, rm_arg_xmm)) +addop("addpd", [bs8(0x0f), bs8(0x58), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) + +## Substractions +# SSE +addop("subps", [bs8(0x0f), bs8(0x5c), no_xmm_pref] + rmmod(xmm_reg, rm_arg_xmm)) +addop("subpd", [bs8(0x0f), bs8(0x5c), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) + +## Multiplications +# SSE +addop("mulps", [bs8(0x0f), bs8(0x59), no_xmm_pref] + rmmod(xmm_reg, rm_arg_xmm)) +addop("mulpd", [bs8(0x0f), bs8(0x59), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) + +## Divisions +# SSE +addop("divps", [bs8(0x0f), bs8(0x5e), no_xmm_pref] + rmmod(xmm_reg, rm_arg_xmm)) +addop("divpd", [bs8(0x0f), bs8(0x5e), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) + +### Logical (floating-point) +### + +## XOR +addop("xorps", [bs8(0x0f), bs8(0x57), no_xmm_pref] + rmmod(xmm_reg, rm_arg_xmm)) +addop("xorpd", [bs8(0x0f), bs8(0x57), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) + +## AND +addop("andps", [bs8(0x0f), bs8(0x54), no_xmm_pref] + rmmod(xmm_reg, rm_arg_xmm)) +addop("andpd", [bs8(0x0f), bs8(0x54), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) + +addop("andnps", [bs8(0x0f), bs8(0x55), no_xmm_pref] + rmmod(xmm_reg, rm_arg_xmm)) +addop("andnpd", [bs8(0x0f), bs8(0x55), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) + +## OR +addop("orps", [bs8(0x0f), bs8(0x56), no_xmm_pref] + rmmod(xmm_reg, rm_arg_xmm)) +addop("orpd", [bs8(0x0f), bs8(0x56), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) + +## AND +# MMX +addop("pand", [bs8(0x0f), bs8(0xdb), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm), [mm_reg, rm_arg_mm]) +# SSE +addop("pand", [bs8(0x0f), bs8(0xdb), pref_66] + + rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm]) + +## ANDN +# MMX +addop("pandn", [bs8(0x0f), bs8(0xdf), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm), [mm_reg, rm_arg_mm]) +# SSE +addop("pandn", [bs8(0x0f), bs8(0xdf), pref_66] + + rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm]) + +## OR +# MMX +addop("por", [bs8(0x0f), bs8(0xeb), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm), [mm_reg, rm_arg_mm]) +# SSE +addop("por", [bs8(0x0f), bs8(0xeb), pref_66] + + rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm]) + +## XOR +# MMX +addop("pxor", [bs8(0x0f), bs8(0xef), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm)) +# MMX +addop("pxor", [bs8(0x0f), bs8(0xef), pref_66] + + rmmod(xmm_reg, rm_arg_xmm)) + +### Comparisons (floating-point) +### +addop("minps", [bs8(0x0f), bs8(0x5d), no_xmm_pref] + rmmod(xmm_reg, + rm_arg_xmm_m128)) +addop("minss", [bs8(0x0f), bs8(0x5d), pref_f3] + rmmod(xmm_reg, + rm_arg_xmm_m32)) +addop("minpd", [bs8(0x0f), bs8(0x5d), pref_66] + rmmod(xmm_reg, + rm_arg_xmm_m128)) +addop("minsd", [bs8(0x0f), bs8(0x5d), pref_f2] + rmmod(xmm_reg, + rm_arg_xmm_m64)) +addop("maxps", [bs8(0x0f), bs8(0x5f), no_xmm_pref] + rmmod(xmm_reg, + rm_arg_xmm_m128)) +addop("maxpd", [bs8(0x0f), bs8(0x5f), pref_66] + rmmod(xmm_reg, + rm_arg_xmm_m128)) +addop("maxsd", [bs8(0x0f), bs8(0x5f), pref_f2] + rmmod(xmm_reg, rm_arg_xmm_m64)) +addop("maxss", [bs8(0x0f), bs8(0x5f), pref_f3] + rmmod(xmm_reg, rm_arg_xmm_m32)) + +for cond_name, value in [ + ("eq", 0x00), + ("lt", 0x01), + ("le", 0x02), + ("unord", 0x03), + ("neq", 0x04), + ("nlt", 0x05), + ("nle", 0x06), + ("ord", 0x07), +]: + addop("cmp%sps" % cond_name, [bs8(0x0f), bs8(0xc2), no_xmm_pref] + + rmmod(xmm_reg, rm_arg_xmm_m64) + [bs8(value)]) + addop("cmp%spd" % cond_name, [bs8(0x0f), bs8(0xc2), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m64) + [bs8(value)]) + addop("cmp%sss" % cond_name, [bs8(0x0f), bs8(0xc2), pref_f3] + + rmmod(xmm_reg, rm_arg_xmm_m32) + [bs8(value)]) + addop("cmp%ssd" % cond_name, [bs8(0x0f), bs8(0xc2), pref_f2] + + rmmod(xmm_reg, rm_arg_xmm_m32) + [bs8(value)]) + + + +addop("pshufb", [bs8(0x0f), bs8(0x38), bs8(0x00), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("pshufb", [bs8(0x0f), bs8(0x38), bs8(0x00), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) +addop("pshufd", [bs8(0x0f), bs8(0x70), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128) + [u08]) +addop("pshuflw", [bs8(0x0f), bs8(0x70), pref_f2] + + rmmod(xmm_reg, rm_arg_xmm_m128) + [u08]) +addop("pshufhw", [bs8(0x0f), bs8(0x70), pref_f3] + + rmmod(xmm_reg, rm_arg_xmm_m128) + [u08]) + + +### Convert +### SS = single precision +### SD = double precision +### + +## SS -> SD +## + +addop("cvtdq2pd", [bs8(0x0f), bs8(0xe6), pref_f3] + + rmmod(xmm_reg, rm_arg_xmm_m64)) +addop("cvtdq2ps", [bs8(0x0f), bs8(0x5b), no_xmm_pref] + + rmmod(xmm_reg, rm_arg_xmm)) +addop("cvtpd2dq", [bs8(0x0f), bs8(0xe6), pref_f2] + + rmmod(xmm_reg, rm_arg_xmm)) +addop("cvtpd2pi", [bs8(0x0f), bs8(0x2d), pref_66] + + rmmod(mm_reg, rm_arg_xmm)) +addop("cvtpd2ps", [bs8(0x0f), bs8(0x5a), pref_66] + + rmmod(xmm_reg, rm_arg_xmm)) +addop("cvtpi2pd", [bs8(0x0f), bs8(0x2a), pref_66] + + rmmod(xmm_reg, rm_arg_mm_m64)) +addop("cvtpi2ps", [bs8(0x0f), bs8(0x2a), no_xmm_pref] + + rmmod(xmm_reg, rm_arg_mm_m64)) +addop("cvtps2dq", [bs8(0x0f), bs8(0x5b), pref_66] + + rmmod(xmm_reg, rm_arg_xmm)) +addop("cvtps2pd", [bs8(0x0f), bs8(0x5a), no_xmm_pref] + + rmmod(xmm_reg, rm_arg_xmm_m64)) +addop("cvtps2pi", [bs8(0x0f), bs8(0x2d), no_xmm_pref] + + rmmod(mm_reg, rm_arg_xmm_m64)) +addop("cvtsd2si", [bs8(0x0f), bs8(0x2d), pref_f2] + + rmmod(reg, rm_arg_xmm_m64)) +addop("cvtsd2ss", [bs8(0x0f), bs8(0x5a), pref_f2] + + rmmod(xmm_reg, rm_arg_xmm_m64)) +addop("cvtsi2sd", [bs8(0x0f), bs8(0x2a), pref_f2] + + rmmod(xmm_reg, rm_arg)) +addop("cvtsi2ss", [bs8(0x0f), bs8(0x2a), xmmreg, pref_f3] + + rmmod(xmm_reg, rm_arg)) +addop("cvtss2sd", [bs8(0x0f), bs8(0x5a), pref_f3] + + rmmod(xmm_reg, rm_arg_xmm_m32)) +addop("cvtss2si", [bs8(0x0f), bs8(0x2d), pref_f3] + + rmmod(rmreg, rm_arg_xmm_m32)) +addop("cvttpd2pi",[bs8(0x0f), bs8(0x2c), pref_66] + + rmmod(mm_reg, rm_arg_xmm)) +addop("cvttpd2dq",[bs8(0x0f), bs8(0xe6), pref_66] + + rmmod(xmm_reg, rm_arg_xmm)) +addop("cvttps2dq",[bs8(0x0f), bs8(0x5b), pref_f3] + + rmmod(xmm_reg, rm_arg_xmm)) +addop("cvttps2pi",[bs8(0x0f), bs8(0x2c), no_xmm_pref] + + rmmod(mm_reg, rm_arg_xmm_m64)) +addop("cvttsd2si",[bs8(0x0f), bs8(0x2c), pref_f2] + + rmmod(reg, rm_arg_xmm_m64)) +addop("cvttss2si",[bs8(0x0f), bs8(0x2c), pref_f3] + + rmmod(reg, rm_arg_xmm_m32)) + +addop("palignr", [bs8(0x0f), bs8(0x73), bs8(0x0f), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64) + [u08], [mm_reg, rm_arg_mm_m64, u08]) +addop("palignr", [bs8(0x0f), bs8(0x3a), bs8(0x0f), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128) + [u08], [xmm_reg, rm_arg_xmm_m128, u08]) + +addop("psrlq", [bs8(0x0f), bs8(0x73), no_xmm_pref] + + rmmod(d2, rm_arg_mm) + [u08], [rm_arg_mm, u08]) +addop("psrlq", [bs8(0x0f), bs8(0x73), pref_66] + + rmmod(d2, rm_arg_xmm) + [u08], [rm_arg_xmm, u08]) + +addop("psrlq", [bs8(0x0f), bs8(0xd3), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm), [mm_reg, rm_arg_mm]) +addop("psrlq", [bs8(0x0f), bs8(0xd3), pref_66] + + rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm]) + + +addop("psrld", [bs8(0x0f), bs8(0x72), no_xmm_pref] + + rmmod(d2, rm_arg_mm) + [u08], [rm_arg_mm, u08]) +addop("psrld", [bs8(0x0f), bs8(0x72), pref_66] + + rmmod(d2, rm_arg_xmm) + [u08], [rm_arg_xmm, u08]) + +addop("psrld", [bs8(0x0f), bs8(0xd2), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm), [mm_reg, rm_arg_mm]) +addop("psrld", [bs8(0x0f), bs8(0xd2), pref_66] + + rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm]) + +addop("psrldq", [bs8(0x0f), bs8(0x73), pref_66] + + rmmod(d3, rm_arg_xmm) + [u08], [rm_arg_xmm, u08]) + +addop("psrlw", [bs8(0x0f), bs8(0x71), no_xmm_pref] + + rmmod(d2, rm_arg_mm) + [u08], [rm_arg_mm, u08]) +addop("psrlw", [bs8(0x0f), bs8(0x71), pref_66] + + rmmod(d2, rm_arg_xmm) + [u08], [rm_arg_xmm, u08]) + +addop("psrlw", [bs8(0x0f), bs8(0xd1), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64), [mm_reg, rm_arg_mm_m64]) +addop("psrlw", [bs8(0x0f), bs8(0xd1), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128), [xmm_reg, rm_arg_xmm_m128]) + +addop("psraw", [bs8(0x0f), bs8(0xe1), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64), [mm_reg, rm_arg_mm_m64]) +addop("psraw", [bs8(0x0f), bs8(0xe1), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128), [xmm_reg, rm_arg_xmm_m128]) + +addop("psraw", [bs8(0x0f), bs8(0x71), no_xmm_pref] + + rmmod(d4, rm_arg_mm_m64) + [u08], [rm_arg_mm_m64, u08]) +addop("psraw", [bs8(0x0f), bs8(0x71), pref_66] + + rmmod(d4, rm_arg_xmm_m128) + [u08], [rm_arg_xmm_m128, u08]) + +addop("psrad", [bs8(0x0f), bs8(0xe2), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64), [mm_reg, rm_arg_mm_m64]) +addop("psrad", [bs8(0x0f), bs8(0xe2), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128), [xmm_reg, rm_arg_xmm_m128]) + +addop("psrad", [bs8(0x0f), bs8(0x72), no_xmm_pref] + + rmmod(d4, rm_arg_mm_m64) + [u08], [rm_arg_mm_m64, u08]) +addop("psrad", [bs8(0x0f), bs8(0x72), pref_66] + + rmmod(d4, rm_arg_xmm_m128) + [u08], [rm_arg_xmm_m128, u08]) + + +addop("psllq", [bs8(0x0f), bs8(0x73), no_xmm_pref] + + rmmod(d6, rm_arg_mm) + [u08], [rm_arg_mm, u08]) +addop("psllq", [bs8(0x0f), bs8(0x73), pref_66] + + rmmod(d6, rm_arg_xmm) + [u08], [rm_arg_xmm, u08]) + +addop("psllq", [bs8(0x0f), bs8(0xf3), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm), [mm_reg, rm_arg_mm]) +addop("psllq", [bs8(0x0f), bs8(0xf3), pref_66] + + rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm]) + + +addop("pslld", [bs8(0x0f), bs8(0x72), no_xmm_pref] + + rmmod(d6, rm_arg_mm) + [u08], [rm_arg_mm, u08]) +addop("pslld", [bs8(0x0f), bs8(0x72), pref_66] + + rmmod(d6, rm_arg_xmm) + [u08], [rm_arg_xmm, u08]) + +addop("pslld", [bs8(0x0f), bs8(0xf2), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm), [mm_reg, rm_arg_mm]) +addop("pslld", [bs8(0x0f), bs8(0xf2), pref_66] + + rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm]) + + +addop("psllw", [bs8(0x0f), bs8(0x71), no_xmm_pref] + + rmmod(d6, rm_arg_mm) + [u08], [rm_arg_mm, u08]) +addop("psllw", [bs8(0x0f), bs8(0x71), pref_66] + + rmmod(d6, rm_arg_xmm) + [u08], [rm_arg_xmm, u08]) + +addop("psllw", [bs8(0x0f), bs8(0xf1), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm), [mm_reg, rm_arg_mm]) +addop("psllw", [bs8(0x0f), bs8(0xf1), pref_66] + + rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm]) + +addop("pslldq", [bs8(0x0f), bs8(0x73), pref_66] + + rmmod(d7, rm_arg_xmm) + [u08], [rm_arg_xmm, u08]) + + +addop("pmaxub", [bs8(0x0f), bs8(0xde), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm)) +addop("pmaxub", [bs8(0x0f), bs8(0xde), pref_66] + + rmmod(xmm_reg, rm_arg_xmm)) + +addop("pmaxuw", [bs8(0x0f), bs8(0x38), bs8(0x3e), pref_66] + + rmmod(xmm_reg, rm_arg_xmm)) + +addop("pmaxud", [bs8(0x0f), bs8(0x38), bs8(0x3f), pref_66] + + rmmod(xmm_reg, rm_arg_xmm)) + +addop("pmaxsw", [bs8(0x0f), bs8(0xee), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("pmaxsw", [bs8(0x0f), bs8(0xee), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) + +addop("pminub", [bs8(0x0f), bs8(0xda), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm)) +addop("pminub", [bs8(0x0f), bs8(0xda), pref_66] + + rmmod(xmm_reg, rm_arg_xmm)) + +addop("pminuw", [bs8(0x0f), bs8(0x38), bs8(0x3a), pref_66] + + rmmod(xmm_reg, rm_arg_xmm)) + +addop("pminud", [bs8(0x0f), bs8(0x38), bs8(0x3b), pref_66] + + rmmod(xmm_reg, rm_arg_xmm)) + + +addop("pcmpeqb", [bs8(0x0f), bs8(0x74), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm)) +addop("pcmpeqb", [bs8(0x0f), bs8(0x74), pref_66] + + rmmod(xmm_reg, rm_arg_xmm)) + +addop("pcmpeqw", [bs8(0x0f), bs8(0x75), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm)) +addop("pcmpeqw", [bs8(0x0f), bs8(0x75), pref_66] + + rmmod(xmm_reg, rm_arg_xmm)) + +addop("pcmpeqd", [bs8(0x0f), bs8(0x76), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm)) +addop("pcmpeqd", [bs8(0x0f), bs8(0x76), pref_66] + + rmmod(xmm_reg, rm_arg_xmm)) + +addop("pcmpgtb", [bs8(0x0f), bs8(0x64), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm)) +addop("pcmpgtb", [bs8(0x0f), bs8(0x64), pref_66] + + rmmod(xmm_reg, rm_arg_xmm)) + +addop("pcmpgtw", [bs8(0x0f), bs8(0x65), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm)) +addop("pcmpgtw", [bs8(0x0f), bs8(0x65), pref_66] + + rmmod(xmm_reg, rm_arg_xmm)) + +addop("pcmpgtd", [bs8(0x0f), bs8(0x66), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm)) +addop("pcmpgtd", [bs8(0x0f), bs8(0x66), pref_66] + + rmmod(xmm_reg, rm_arg_xmm)) + +addop("pcmpeqq", [bs8(0x0f), bs8(0x38), bs8(0x29), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) +addop("pcmpgtq", [bs8(0x0f), bs8(0x38), bs8(0x37), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) + +addop("punpckhbw", [bs8(0x0f), bs8(0x68), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm)) +addop("punpckhbw", [bs8(0x0f), bs8(0x68), pref_66] + + rmmod(xmm_reg, rm_arg_xmm)) + +addop("punpckhwd", [bs8(0x0f), bs8(0x69), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm)) +addop("punpckhwd", [bs8(0x0f), bs8(0x69), pref_66] + + rmmod(xmm_reg, rm_arg_xmm)) + +addop("punpckhdq", [bs8(0x0f), bs8(0x6a), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm)) +addop("punpckhdq", [bs8(0x0f), bs8(0x6a), pref_66] + + rmmod(xmm_reg, rm_arg_xmm)) + +addop("punpckhqdq", [bs8(0x0f), bs8(0x6d), pref_66] + + rmmod(xmm_reg, rm_arg_xmm)) + + + +addop("punpcklbw", [bs8(0x0f), bs8(0x60), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm)) +addop("punpcklbw", [bs8(0x0f), bs8(0x60), pref_66] + + rmmod(xmm_reg, rm_arg_xmm)) + +addop("punpcklwd", [bs8(0x0f), bs8(0x61), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm)) +addop("punpcklwd", [bs8(0x0f), bs8(0x61), pref_66] + + rmmod(xmm_reg, rm_arg_xmm)) + +addop("punpckldq", [bs8(0x0f), bs8(0x62), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm)) +addop("punpckldq", [bs8(0x0f), bs8(0x62), pref_66] + + rmmod(xmm_reg, rm_arg_xmm)) + +addop("punpcklqdq", [bs8(0x0f), bs8(0x6c), pref_66] + + rmmod(xmm_reg, rm_arg_xmm)) + + +addop("unpckhps", [bs8(0x0f), bs8(0x15), no_xmm_pref] + + rmmod(xmm_reg, rm_arg_xmm)) +addop("unpckhpd", [bs8(0x0f), bs8(0x15), pref_66] + + rmmod(xmm_reg, rm_arg_xmm)) + + +addop("unpcklps", [bs8(0x0f), bs8(0x14), no_xmm_pref] + + rmmod(xmm_reg, rm_arg_xmm)) +addop("unpcklpd", [bs8(0x0f), bs8(0x14), pref_66] + + rmmod(xmm_reg, rm_arg_xmm)) + + + +addop("pinsrb", [bs8(0x0f), bs8(0x3a), bs8(0x20), pref_66] + + rmmod(xmm_reg, rm_arg_reg_m08) + [u08]) +addop("pinsrd", [bs8(0x0f), bs8(0x3a), bs8(0x22), pref_66, bs_opmode32] + + rmmod(xmm_reg, rm_arg) + [u08]) +addop("pinsrq", [bs8(0x0f), bs8(0x3a), bs8(0x22), pref_66] + + rmmod(xmm_reg, rm_arg_m64) + [bs_opmode64] + [u08]) + +addop("pinsrw", [bs8(0x0f), bs8(0xc4), no_xmm_pref] + + rmmod(mm_reg, rm_arg_reg_m16) + [u08]) +addop("pinsrw", [bs8(0x0f), bs8(0xc4), pref_66] + + rmmod(xmm_reg, rm_arg_reg_m16) + [u08]) + + +addop("pextrb", [bs8(0x0f), bs8(0x3a), bs8(0x14), pref_66] + + rmmod(xmm_reg, rm_arg_reg_m08) + [u08], [rm_arg_reg_m08, xmm_reg, u08]) +addop("pextrd", [bs8(0x0f), bs8(0x3a), bs8(0x16), pref_66, bs_opmode32] + + rmmod(xmm_reg, rm_arg) + [u08], [rm_arg, xmm_reg, u08]) +addop("pextrq", [bs8(0x0f), bs8(0x3a), bs8(0x16), pref_66] + + rmmod(xmm_reg, rm_arg_m64) + [bs_opmode64] + [u08], [rm_arg_m64, xmm_reg, u08]) + + +addop("pextrw", [bs8(0x0f), bs8(0x3a), bs8(0x15), pref_66] + + rmmod(xmm_reg, rm_arg_reg_m16) + [u08], [rm_arg_reg_m16, xmm_reg, u08]) +addop("pextrw", [bs8(0x0f), bs8(0xc5), no_xmm_pref] + + rmmod(rmreg, rm_arg_mm) + [u08], [rmreg, rm_arg_mm, u08]) +addop("pextrw", [bs8(0x0f), bs8(0xc5), pref_66] + + rmmod(rmreg, rm_arg_xmm) + [u08], [rmreg, rm_arg_xmm, u08]) + + +addop("sqrtpd", [bs8(0x0f), bs8(0x51), pref_66] + + rmmod(xmm_reg, rm_arg_xmm)) +addop("sqrtps", [bs8(0x0f), bs8(0x51), no_xmm_pref] + + rmmod(xmm_reg, rm_arg_xmm)) +addop("sqrtsd", [bs8(0x0f), bs8(0x51), pref_f2] + + rmmod(xmm_reg, rm_arg_xmm_m64)) +addop("sqrtss", [bs8(0x0f), bs8(0x51), pref_f3] + + rmmod(xmm_reg, rm_arg_xmm_m32)) + +addop("pmovmskb", [bs8(0x0f), bs8(0xd7), no_xmm_pref] + + rmmod(reg_modrm, rm_arg_mm_reg)) +addop("pmovmskb", [bs8(0x0f), bs8(0xd7), pref_66] + + rmmod(reg_modrm, rm_arg_xmm_reg)) + +addop("shufps", [bs8(0x0f), bs8(0xc6), no_xmm_pref] + + rmmod(xmm_reg, rm_arg_xmm) + [u08]) +addop("shufpd", [bs8(0x0f), bs8(0xc6), pref_66] + + rmmod(xmm_reg, rm_arg_xmm) + [u08]) + +addop("aesenc", [bs8(0x0f), bs8(0x38), bs8(0xdc), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) +addop("aesdec", [bs8(0x0f), bs8(0x38), bs8(0xde), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) + +addop("aesenclast", [bs8(0x0f), bs8(0x38), bs8(0xdd), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) +addop("aesdeclast", [bs8(0x0f), bs8(0x38), bs8(0xdf), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) + +addop("packsswb", [bs8(0x0f), bs8(0x63), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("packsswb", [bs8(0x0f), bs8(0x63), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) +addop("packssdw", [bs8(0x0f), bs8(0x6b), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("packssdw", [bs8(0x0f), bs8(0x6b), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) + +addop("packuswb", [bs8(0x0f), bs8(0x67), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("packuswb", [bs8(0x0f), bs8(0x67), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) + +addop("pmullw", [bs8(0x0f), bs8(0xd5), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("pmullw", [bs8(0x0f), bs8(0xd5), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) +addop("pmulhuw", [bs8(0x0f), bs8(0xe4), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("pmulhuw", [bs8(0x0f), bs8(0xe4), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) +addop("pmulhw", [bs8(0x0f), bs8(0xe5), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("pmulhw", [bs8(0x0f), bs8(0xe5), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) +addop("pmuludq", [bs8(0x0f), bs8(0xf4), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("pmuludq", [bs8(0x0f), bs8(0xf4), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) + + +addop("psubusb", [bs8(0x0f), bs8(0xd8), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("psubusb", [bs8(0x0f), bs8(0xd8), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) +addop("psubusw", [bs8(0x0f), bs8(0xd9), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("psubusw", [bs8(0x0f), bs8(0xd9), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) +addop("psubsb", [bs8(0x0f), bs8(0xe8), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("psubsb", [bs8(0x0f), bs8(0xe8), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) +addop("psubsw", [bs8(0x0f), bs8(0xe9), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("psubsw", [bs8(0x0f), bs8(0xe9), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) + + +addop("paddusb", [bs8(0x0f), bs8(0xdc), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("paddusb", [bs8(0x0f), bs8(0xdc), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) +addop("paddusw", [bs8(0x0f), bs8(0xdd), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("paddusw", [bs8(0x0f), bs8(0xdd), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) +addop("paddsb", [bs8(0x0f), bs8(0xec), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("paddsb", [bs8(0x0f), bs8(0xec), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) +addop("paddsw", [bs8(0x0f), bs8(0xed), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("paddsw", [bs8(0x0f), bs8(0xed), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) + +addop("pmaddwd", [bs8(0x0f), bs8(0xf5), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("pmaddwd", [bs8(0x0f), bs8(0xf5), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) + +addop("psadbw", [bs8(0x0f), bs8(0xf6), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("psadbw", [bs8(0x0f), bs8(0xf6), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) + +addop("pavgb", [bs8(0x0f), bs8(0xe0), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("pavgb", [bs8(0x0f), bs8(0xe0), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) +addop("pavgw", [bs8(0x0f), bs8(0xe3), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_m64)) +addop("pavgw", [bs8(0x0f), bs8(0xe3), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_m128)) + +addop("maskmovq", [bs8(0x0f), bs8(0xf7), no_xmm_pref] + + rmmod(mm_reg, rm_arg_mm_reg)) +addop("maskmovdqu", [bs8(0x0f), bs8(0xf7), pref_66] + + rmmod(xmm_reg, rm_arg_xmm_reg)) + +addop("emms", [bs8(0x0f), bs8(0x77)]) + +addop("endbr64", [pref_f3, bs8(0x0f), bs8(0x1e), bs8(0xfa)]) +addop("endbr32", [pref_f3, bs8(0x0f), bs8(0x1e), bs8(0xfb)]) + +mn_x86.bintree = factor_one_bit(mn_x86.bintree) +# mn_x86.bintree = factor_fields_all(mn_x86.bintree) +""" +mod reg r/m + XX XXX XXX + +""" + + +def print_size(e): + print(e, e.size) + return e diff --git a/miasm/arch/x86/ctype.py b/miasm/arch/x86/ctype.py new file mode 100644 index 00000000..2a61689a --- /dev/null +++ b/miasm/arch/x86/ctype.py @@ -0,0 +1,137 @@ +from miasm.core.objc import CLeafTypes, ObjCDecl, PADDING_TYPE_NAME +from miasm.core.ctypesmngr import CTypeId, CTypePtr + + +class CTypeAMD64_unk(CLeafTypes): + """Define C types sizes/alignment for x86_64 architecture""" + + obj_pad = ObjCDecl(PADDING_TYPE_NAME, 1, 1) # __padding__ is size 1/align 1 + + obj_char = ObjCDecl("char", 1, 1) + obj_short = ObjCDecl("short", 2, 2) + obj_int = ObjCDecl("int", 4, 4) + obj_long = ObjCDecl("long", 8, 8) + + obj_uchar = ObjCDecl("uchar", 1, 1) + obj_ushort = ObjCDecl("ushort", 2, 2) + obj_uint = ObjCDecl("uint", 4, 4) + obj_ulong = ObjCDecl("ulong", 8, 8) + + obj_void = ObjCDecl("void", 1, 1) + + obj_enum = ObjCDecl("enum", 4, 4) + + obj_float = ObjCDecl("float", 4, 4) + obj_double = ObjCDecl("double", 8, 8) + obj_ldouble = ObjCDecl("ldouble", 16, 16) + + def __init__(self): + self.types = { + CTypeId(PADDING_TYPE_NAME): self.obj_pad, + + CTypeId('char'): self.obj_char, + CTypeId('short'): self.obj_short, + CTypeId('int'): self.obj_int, + CTypeId('void'): self.obj_void, + CTypeId('long',): self.obj_long, + CTypeId('float'): self.obj_float, + CTypeId('double'): self.obj_double, + + CTypeId('signed', 'char'): self.obj_char, + CTypeId('unsigned', 'char'): self.obj_uchar, + + CTypeId('short', 'int'): self.obj_short, + CTypeId('signed', 'short'): self.obj_short, + CTypeId('signed', 'short', 'int'): self.obj_short, + CTypeId('unsigned', 'short'): self.obj_ushort, + CTypeId('unsigned', 'short', 'int'): self.obj_ushort, + + CTypeId('unsigned', ): self.obj_uint, + CTypeId('unsigned', 'int'): self.obj_uint, + CTypeId('signed', 'int'): self.obj_int, + + CTypeId('long', 'int'): self.obj_long, + CTypeId('long', 'long'): self.obj_long, + CTypeId('long', 'long', 'int'): self.obj_long, + CTypeId('signed', 'long', 'long'): self.obj_long, + CTypeId('unsigned', 'long', 'long'): self.obj_ulong, + CTypeId('signed', 'long', 'long', 'int'): self.obj_long, + CTypeId('unsigned', 'long', 'long', 'int'): self.obj_ulong, + + CTypeId('signed', 'long'): self.obj_long, + CTypeId('unsigned', 'long'): self.obj_ulong, + CTypeId('signed', 'long', 'int'): self.obj_long, + CTypeId('unsigned', 'long', 'int'): self.obj_ulong, + + CTypeId('long', 'double'): self.obj_ldouble, + CTypePtr(CTypeId('void')): self.obj_ulong, + } + + + + + +class CTypeX86_unk(CLeafTypes): + """Define C types sizes/alignment for x86_32 architecture""" + + obj_pad = ObjCDecl(PADDING_TYPE_NAME, 1, 1) # __padding__ is size 1/align 1 + + obj_char = ObjCDecl("char", 1, 1) + obj_short = ObjCDecl("short", 2, 2) + obj_int = ObjCDecl("int", 4, 4) + obj_long = ObjCDecl("long", 4, 4) + + obj_uchar = ObjCDecl("uchar", 1, 1) + obj_ushort = ObjCDecl("ushort", 2, 2) + obj_uint = ObjCDecl("uint", 4, 4) + obj_ulong = ObjCDecl("ulong", 4, 4) + + obj_void = ObjCDecl("void", 1, 1) + + obj_enum = ObjCDecl("enum", 4, 4) + + obj_float = ObjCDecl("float", 4, 4) + obj_double = ObjCDecl("double", 8, 8) + obj_ldouble = ObjCDecl("ldouble", 16, 16) + + def __init__(self): + self.types = { + CTypeId(PADDING_TYPE_NAME): self.obj_pad, + + CTypeId('char'): self.obj_char, + CTypeId('short'): self.obj_short, + CTypeId('int'): self.obj_int, + CTypeId('void'): self.obj_void, + CTypeId('long',): self.obj_long, + CTypeId('float'): self.obj_float, + CTypeId('double'): self.obj_double, + + CTypeId('signed', 'char'): self.obj_char, + CTypeId('unsigned', 'char'): self.obj_uchar, + + CTypeId('short', 'int'): self.obj_short, + CTypeId('signed', 'short'): self.obj_short, + CTypeId('signed', 'short', 'int'): self.obj_short, + CTypeId('unsigned', 'short'): self.obj_ushort, + CTypeId('unsigned', 'short', 'int'): self.obj_ushort, + + CTypeId('unsigned', ): self.obj_uint, + CTypeId('unsigned', 'int'): self.obj_uint, + CTypeId('signed', 'int'): self.obj_int, + + CTypeId('long', 'int'): self.obj_long, + CTypeId('long', 'long'): self.obj_long, + CTypeId('long', 'long', 'int'): self.obj_long, + CTypeId('signed', 'long', 'long'): self.obj_long, + CTypeId('unsigned', 'long', 'long'): self.obj_ulong, + CTypeId('signed', 'long', 'long', 'int'): self.obj_long, + CTypeId('unsigned', 'long', 'long', 'int'): self.obj_ulong, + + CTypeId('signed', 'long'): self.obj_long, + CTypeId('unsigned', 'long'): self.obj_ulong, + CTypeId('signed', 'long', 'int'): self.obj_long, + CTypeId('unsigned', 'long', 'int'): self.obj_ulong, + + CTypeId('long', 'double'): self.obj_ldouble, + CTypePtr(CTypeId('void')): self.obj_uint, + } diff --git a/miasm/arch/x86/disasm.py b/miasm/arch/x86/disasm.py new file mode 100644 index 00000000..01147970 --- /dev/null +++ b/miasm/arch/x86/disasm.py @@ -0,0 +1,30 @@ +from miasm.core.asmblock import disasmEngine +from miasm.arch.x86.arch import mn_x86 + + +cb_x86_funcs = [] + + +def cb_x86_disasm(*args, **kwargs): + for func in cb_x86_funcs: + func(*args, **kwargs) + + +class dis_x86(disasmEngine): + attrib = None + + def __init__(self, bs=None, **kwargs): + super(dis_x86, self).__init__(mn_x86, self.attrib, bs, **kwargs) + self.dis_block_callback = cb_x86_disasm + + +class dis_x86_16(dis_x86): + attrib = 16 + + +class dis_x86_32(dis_x86): + attrib = 32 + + +class dis_x86_64(dis_x86): + attrib = 64 diff --git a/miasm/arch/x86/ira.py b/miasm/arch/x86/ira.py new file mode 100644 index 00000000..dc6db273 --- /dev/null +++ b/miasm/arch/x86/ira.py @@ -0,0 +1,80 @@ +#-*- coding:utf-8 -*- + +from miasm.expression.expression import ExprAssign, ExprOp +from miasm.ir.ir import AssignBlock +from miasm.ir.analysis import ira +from miasm.arch.x86.sem import ir_x86_16, ir_x86_32, ir_x86_64 + + +class ir_a_x86_16(ir_x86_16, ira): + + def __init__(self, loc_db=None): + ir_x86_16.__init__(self, loc_db) + self.ret_reg = self.arch.regs.AX + + def get_out_regs(self, _): + return set([self.ret_reg, self.sp]) + +class ir_a_x86_32(ir_x86_32, ir_a_x86_16): + + def __init__(self, loc_db=None): + ir_x86_32.__init__(self, loc_db) + self.ret_reg = self.arch.regs.EAX + + def sizeof_char(self): + return 8 + + def sizeof_short(self): + return 16 + + def sizeof_int(self): + return 32 + + def sizeof_long(self): + return 32 + + def sizeof_pointer(self): + return 32 + + +class ir_a_x86_64(ir_x86_64, ir_a_x86_16): + + def __init__(self, loc_db=None): + ir_x86_64.__init__(self, loc_db) + self.ret_reg = self.arch.regs.RAX + + def call_effects(self, ad, instr): + call_assignblk = AssignBlock( + [ + ExprAssign( + self.ret_reg, + ExprOp( + 'call_func_ret', + ad, + self.sp, + self.arch.regs.RCX, + self.arch.regs.RDX, + self.arch.regs.R8, + self.arch.regs.R9, + ) + ), + ExprAssign(self.sp, ExprOp('call_func_stack', ad, self.sp)), + ], + instr + ) + return [call_assignblk], [] + + def sizeof_char(self): + return 8 + + def sizeof_short(self): + return 16 + + def sizeof_int(self): + return 32 + + def sizeof_long(self): + return 64 + + def sizeof_pointer(self): + return 64 diff --git a/miasm/arch/x86/jit.py b/miasm/arch/x86/jit.py new file mode 100644 index 00000000..2d1e45c3 --- /dev/null +++ b/miasm/arch/x86/jit.py @@ -0,0 +1,286 @@ +from builtins import range +import logging + +from miasm.jitter.jitload import Jitter, named_arguments +from miasm.arch.x86.sem import ir_x86_16, ir_x86_32, ir_x86_64 +from miasm.jitter.codegen import CGen +from miasm.core.locationdb import LocationDB +from miasm.ir.translators.C import TranslatorC + +log = logging.getLogger('jit_x86') +hnd = logging.StreamHandler() +hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) +log.addHandler(hnd) +log.setLevel(logging.CRITICAL) + + +class x86_32_CGen(CGen): + def __init__(self, ir_arch): + self.ir_arch = ir_arch + self.PC = self.ir_arch.arch.regs.RIP + self.translator = TranslatorC(self.ir_arch.loc_db) + self.init_arch_C() + + def gen_post_code(self, attrib, pc_value): + out = [] + if attrib.log_regs: + # Update PC for dump_gpregs + out.append("%s = %s;" % (self.C_PC, pc_value)) + out.append('dump_gpregs_32(jitcpu->cpu);') + return out + +class x86_64_CGen(x86_32_CGen): + def gen_post_code(self, attrib, pc_value): + out = [] + if attrib.log_regs: + # Update PC for dump_gpregs + out.append("%s = %s;" % (self.C_PC, pc_value)) + out.append('dump_gpregs_64(jitcpu->cpu);') + return out + +class jitter_x86_16(Jitter): + + C_Gen = x86_32_CGen + + def __init__(self, *args, **kwargs): + sp = LocationDB() + Jitter.__init__(self, ir_x86_16(sp), *args, **kwargs) + self.vm.set_little_endian() + self.ir_arch.do_stk_segm = False + self.orig_irbloc_fix_regs_for_mode = self.ir_arch.irbloc_fix_regs_for_mode + self.ir_arch.irbloc_fix_regs_for_mode = self.ir_archbloc_fix_regs_for_mode + + def ir_archbloc_fix_regs_for_mode(self, irblock, attrib=64): + return self.orig_irbloc_fix_regs_for_mode(irblock, 64) + + def push_uint16_t(self, value): + self.cpu.SP -= self.ir_arch.sp.size // 8 + self.vm.set_u16(self.cpu.SP, value) + + def pop_uint16_t(self): + value = self.vm.get_u16(self.cpu.SP) + self.cpu.SP += self.ir_arch.sp.size // 8 + return value + + def get_stack_arg(self, index): + return self.vm.get_u16(self.cpu.SP + 4 * index) + + def init_run(self, *args, **kwargs): + Jitter.init_run(self, *args, **kwargs) + self.cpu.IP = self.pc + + +class jitter_x86_32(Jitter): + + C_Gen = x86_32_CGen + + def __init__(self, *args, **kwargs): + sp = LocationDB() + Jitter.__init__(self, ir_x86_32(sp), *args, **kwargs) + self.vm.set_little_endian() + self.ir_arch.do_stk_segm = False + + self.orig_irbloc_fix_regs_for_mode = self.ir_arch.irbloc_fix_regs_for_mode + self.ir_arch.irbloc_fix_regs_for_mode = self.ir_archbloc_fix_regs_for_mode + + def ir_archbloc_fix_regs_for_mode(self, irblock, attrib=64): + return self.orig_irbloc_fix_regs_for_mode(irblock, 64) + + def push_uint16_t(self, value): + self.cpu.ESP -= self.ir_arch.sp.size // 8 + self.vm.set_u16(self.cpu.ESP, value) + + def pop_uint16_t(self): + value = self.vm.get_u16(self.cpu.ESP) + self.cpu.ESP += self.ir_arch.sp.size // 8 + return value + + def push_uint32_t(self, value): + self.cpu.ESP -= self.ir_arch.sp.size // 8 + self.vm.set_u32(self.cpu.ESP, value) + + def pop_uint32_t(self): + value = self.vm.get_u32(self.cpu.ESP) + self.cpu.ESP += self.ir_arch.sp.size // 8 + return value + + def get_stack_arg(self, index): + return self.vm.get_u32(self.cpu.ESP + 4 * index) + + def init_run(self, *args, **kwargs): + Jitter.init_run(self, *args, **kwargs) + self.cpu.EIP = self.pc + + # calling conventions + + # stdcall + @named_arguments + def func_args_stdcall(self, n_args): + ret_ad = self.pop_uint32_t() + args = [self.pop_uint32_t() for _ in range(n_args)] + return ret_ad, args + + def func_ret_stdcall(self, ret_addr, ret_value1=None, ret_value2=None): + self.pc = self.cpu.EIP = ret_addr + if ret_value1 is not None: + self.cpu.EAX = ret_value1 + if ret_value2 is not None: + self.cpu.EDX = ret_value2 + + def func_prepare_stdcall(self, ret_addr, *args): + for arg in reversed(args): + self.push_uint32_t(arg) + self.push_uint32_t(ret_addr) + + get_arg_n_stdcall = get_stack_arg + + # cdecl + @named_arguments + def func_args_cdecl(self, n_args): + ret_ad = self.pop_uint32_t() + args = [self.get_stack_arg(i) for i in range(n_args)] + return ret_ad, args + + def func_ret_cdecl(self, ret_addr, ret_value1=None, ret_value2=None): + self.pc = self.cpu.EIP = ret_addr + if ret_value1 is not None: + self.cpu.EAX = ret_value1 + if ret_value2 is not None: + self.cpu.EDX = ret_value2 + + get_arg_n_cdecl = get_stack_arg + + # System V + func_args_systemv = func_args_cdecl + func_ret_systemv = func_ret_cdecl + func_prepare_systemv = func_prepare_stdcall + get_arg_n_systemv = get_stack_arg + + + # fastcall + @named_arguments + def func_args_fastcall(self, n_args): + args_regs = ['ECX', 'EDX'] + ret_ad = self.pop_uint32_t() + args = [] + for i in range(n_args): + args.append(self.get_arg_n_fastcall(i)) + return ret_ad, args + + def func_prepare_fastcall(self, ret_addr, *args): + args_regs = ['ECX', 'EDX'] + for i in range(min(len(args), len(args_regs))): + setattr(self.cpu, args_regs[i], args[i]) + remaining_args = args[len(args_regs):] + for arg in reversed(remaining_args): + self.push_uint32_t(arg) + self.push_uint32_t(ret_addr) + + def get_arg_n_fastcall(self, index): + args_regs = ['ECX', 'EDX'] + if index < len(args_regs): + return getattr(self.cpu, args_regs[index]) + return self.get_stack_arg(index - len(args_regs)) + + + +class jitter_x86_64(Jitter): + + C_Gen = x86_64_CGen + args_regs_systemv = ['RDI', 'RSI', 'RDX', 'RCX', 'R8', 'R9'] + args_regs_stdcall = ['RCX', 'RDX', 'R8', 'R9'] + + def __init__(self, *args, **kwargs): + sp = LocationDB() + Jitter.__init__(self, ir_x86_64(sp), *args, **kwargs) + self.vm.set_little_endian() + self.ir_arch.do_stk_segm = False + + self.orig_irbloc_fix_regs_for_mode = self.ir_arch.irbloc_fix_regs_for_mode + self.ir_arch.irbloc_fix_regs_for_mode = self.ir_archbloc_fix_regs_for_mode + + def ir_archbloc_fix_regs_for_mode(self, irblock, attrib=64): + return self.orig_irbloc_fix_regs_for_mode(irblock, 64) + + def push_uint64_t(self, value): + self.cpu.RSP -= self.ir_arch.sp.size // 8 + self.vm.set_u64(self.cpu.RSP, value) + + def pop_uint64_t(self): + value = self.vm.get_u64(self.cpu.RSP) + self.cpu.RSP += self.ir_arch.sp.size // 8 + return value + + def get_stack_arg(self, index): + return self.vm.get_u64(self.cpu.RSP + 8 * index) + + def init_run(self, *args, **kwargs): + Jitter.init_run(self, *args, **kwargs) + self.cpu.RIP = self.pc + + # calling conventions + + # stdcall + @named_arguments + def func_args_stdcall(self, n_args): + args_regs = self.args_regs_stdcall + ret_ad = self.pop_uint64_t() + args = [] + for i in range(min(n_args, 4)): + args.append(self.cpu.get_gpreg()[args_regs[i]]) + for i in range(max(0, n_args - 4)): + args.append(self.get_stack_arg(i)) + return ret_ad, args + + def func_prepare_stdcall(self, ret_addr, *args): + args_regs = self.args_regs_stdcall + for i in range(min(len(args), len(args_regs))): + setattr(self.cpu, args_regs[i], args[i]) + remaining_args = args[len(args_regs):] + for arg in reversed(remaining_args): + self.push_uint64_t(arg) + self.push_uint64_t(ret_addr) + + def func_ret_stdcall(self, ret_addr, ret_value=None): + self.pc = self.cpu.RIP = ret_addr + if ret_value is not None: + self.cpu.RAX = ret_value + return True + + # cdecl + func_args_cdecl = func_args_stdcall + func_ret_cdecl = func_ret_stdcall + func_prepare_cdecl = func_prepare_stdcall + + # System V + + def get_arg_n_systemv(self, index): + args_regs = self.args_regs_systemv + if index < len(args_regs): + return getattr(self.cpu, args_regs[index]) + return self.get_stack_arg(index - len(args_regs)) + + @named_arguments + def func_args_systemv(self, n_args): + ret_ad = self.pop_uint64_t() + args = [self.get_arg_n_systemv(index) for index in range(n_args)] + return ret_ad, args + + func_ret_systemv = func_ret_cdecl + + def func_prepare_systemv(self, ret_addr, *args): + args_regs = self.args_regs_systemv + self.push_uint64_t(ret_addr) + for i in range(min(len(args), len(args_regs))): + setattr(self.cpu, args_regs[i], args[i]) + remaining_args = args[len(args_regs):] + for arg in reversed(remaining_args): + self.push_uint64_t(arg) + + def syscall_args_systemv(self, n_args): + args = [self.cpu.RDI, self.cpu.RSI, self.cpu.RDX, self.cpu.R10, + self.cpu.R8, self.cpu.R9][:n_args] + return args + + def syscall_ret_systemv(self, value): + self.cpu.RAX = value diff --git a/miasm/arch/x86/regs.py b/miasm/arch/x86/regs.py new file mode 100644 index 00000000..1fc2009c --- /dev/null +++ b/miasm/arch/x86/regs.py @@ -0,0 +1,454 @@ +from builtins import range +from miasm.expression.expression import ExprId +from miasm.core.cpu import reg_info + + +IP = ExprId('IP', 16) +EIP = ExprId('EIP', 32) +RIP = ExprId('RIP', 64) +exception_flags = ExprId('exception_flags', 32) +interrupt_num = ExprId('interrupt_num', 8) + +# GP + + +regs08_str = ["AL", "CL", "DL", "BL", "AH", "CH", "DH", "BH"] + \ + ["R%dB" % (i + 8) for i in range(8)] +regs08_expr = [ExprId(x, 8) for x in regs08_str] + +regs08_64_str = ["AL", "CL", "DL", "BL", "SPL", "BPL", "SIL", "DIL"] + \ + ["R%dB" % (i + 8) for i in range(8)] +regs08_64_expr = [ExprId(x, 8) for x in regs08_64_str] + + +regs16_str = ["AX", "CX", "DX", "BX", "SP", "BP", "SI", "DI"] + \ + ["R%dW" % (i + 8) for i in range(8)] +regs16_expr = [ExprId(x, 16) for x in regs16_str] + +regs32_str = ["EAX", "ECX", "EDX", "EBX", "ESP", "EBP", "ESI", "EDI"] + \ + ["R%dD" % (i + 8) for i in range(8)] +regs32_expr = [ExprId(x, 32) for x in regs32_str] + +regs64_str = ["RAX", "RCX", "RDX", "RBX", "RSP", "RBP", "RSI", "RDI", + "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15", + "RIP"] +regs64_expr = [ExprId(x, 64) for x in regs64_str] + + +regs_xmm_str = ["XMM%d" % i for i in range(16)] +regs_xmm_expr = [ExprId(x, 128) for x in regs_xmm_str] + +regs_mm_str = ["MM%d" % i for i in range(16)] +regs_mm_expr = [ExprId(x, 64) for x in regs_mm_str] + +regs_bnd_str = ["BND%d" % i for i in range(4)] +regs_bnd_expr = [ExprId(x, 128) for x in regs_bnd_str] + +gpregs08 = reg_info(regs08_str, regs08_expr) +gpregs08_64 = reg_info(regs08_64_str, regs08_64_expr) +gpregs16 = reg_info(regs16_str, regs16_expr) +gpregs32 = reg_info(regs32_str, regs32_expr) +gpregs64 = reg_info(regs64_str, regs64_expr) + +gpregs_xmm = reg_info(regs_xmm_str, regs_xmm_expr) +gpregs_mm = reg_info(regs_mm_str, regs_mm_expr) +gpregs_bnd = reg_info(regs_bnd_str, regs_bnd_expr) + +r08_eax = reg_info([regs08_str[0]], [regs08_expr[0]]) +r16_eax = reg_info([regs16_str[0]], [regs16_expr[0]]) +r32_eax = reg_info([regs32_str[0]], [regs32_expr[0]]) +r64_eax = reg_info([regs64_str[0]], [regs64_expr[0]]) + +r08_ecx = reg_info([regs08_str[1]], [regs08_expr[1]]) + +r_eax_all = reg_info( + [regs08_str[0], regs16_str[0], regs32_str[0], regs64_str[0]], + [regs08_expr[0], regs16_expr[0], regs32_expr[0], regs64_expr[0]]) +r_edx_all = reg_info( + [regs08_str[2], regs16_str[2], regs32_str[2], regs64_str[2]], + [regs08_expr[2], regs16_expr[2], regs32_expr[2], regs64_expr[2]]) + +r16_edx = reg_info([regs16_str[2]], [regs16_expr[2]]) + + +selectr_str = ["ES", "CS", "SS", "DS", "FS", "GS"] +selectr_expr = [ExprId(x, 16) for x in selectr_str] +segmreg = reg_info(selectr_str, selectr_expr) + +crregs32_str = ["CR%d" % i for i in range(8)] +crregs32_expr = [ExprId(x, 32) for x in crregs32_str] +crregs = reg_info(crregs32_str, crregs32_expr) + + +drregs32_str = ["DR%d" % i for i in range(8)] +drregs32_expr = [ExprId(x, 32) for x in drregs32_str] +drregs = reg_info(drregs32_str, drregs32_expr) + + +fltregs32_str = ["ST(%d)" % i for i in range(8)] +fltregs32_expr = [ExprId(x, 64) for x in fltregs32_str] +fltregs = reg_info(fltregs32_str, fltregs32_expr) + +r_st_all = reg_info(['ST'], + [ExprId('ST', 64)]) + +r_cs_all = reg_info(['CS'], + [ExprId('CS', 16)]) +r_ds_all = reg_info(['DS'], + [ExprId('DS', 16)]) +r_es_all = reg_info(['ES'], + [ExprId('ES', 16)]) +r_ss_all = reg_info(['SS'], + [ExprId('SS', 16)]) +r_fs_all = reg_info(['FS'], + [ExprId('FS', 16)]) +r_gs_all = reg_info(['GS'], + [ExprId('GS', 16)]) + + +AL = regs08_expr[0] +CL = regs08_expr[1] +DL = regs08_expr[2] +BL = regs08_expr[3] +AH = regs08_expr[4] +CH = regs08_expr[5] +DH = regs08_expr[6] +BH = regs08_expr[7] +R8B = regs08_expr[8] +R9B = regs08_expr[9] +R10B = regs08_expr[10] +R11B = regs08_expr[11] +R12B = regs08_expr[12] +R13B = regs08_expr[13] +R14B = regs08_expr[14] +R15B = regs08_expr[15] + +SPL = regs08_64_expr[4] +BPL = regs08_64_expr[5] +SIL = regs08_64_expr[6] +DIL = regs08_64_expr[7] + + +AX = regs16_expr[0] +CX = regs16_expr[1] +DX = regs16_expr[2] +BX = regs16_expr[3] +SP = regs16_expr[4] +BP = regs16_expr[5] +SI = regs16_expr[6] +DI = regs16_expr[7] +R8W = regs16_expr[8] +R9W = regs16_expr[9] +R10W = regs16_expr[10] +R11W = regs16_expr[11] +R12W = regs16_expr[12] +R13W = regs16_expr[13] +R14W = regs16_expr[14] +R15W = regs16_expr[15] + + +EAX = regs32_expr[0] +ECX = regs32_expr[1] +EDX = regs32_expr[2] +EBX = regs32_expr[3] +ESP = regs32_expr[4] +EBP = regs32_expr[5] +ESI = regs32_expr[6] +EDI = regs32_expr[7] +R8D = regs32_expr[8] +R9D = regs32_expr[9] +R10D = regs32_expr[10] +R11D = regs32_expr[11] +R12D = regs32_expr[12] +R13D = regs32_expr[13] +R14D = regs32_expr[14] +R15D = regs32_expr[15] + + +RAX = regs64_expr[0] +RCX = regs64_expr[1] +RDX = regs64_expr[2] +RBX = regs64_expr[3] +RSP = regs64_expr[4] +RBP = regs64_expr[5] +RSI = regs64_expr[6] +RDI = regs64_expr[7] +R8 = regs64_expr[8] +R9 = regs64_expr[9] +R10 = regs64_expr[10] +R11 = regs64_expr[11] +R12 = regs64_expr[12] +R13 = regs64_expr[13] +R14 = regs64_expr[14] +R15 = regs64_expr[15] + + +reg_zf = 'zf' +reg_nf = 'nf' +reg_pf = 'pf' +reg_of = 'of' +reg_cf = 'cf' +reg_tf = 'tf' +reg_if = 'i_f' +reg_df = 'df' +reg_af = 'af' +reg_iopl = 'iopl_f' +reg_nt = 'nt' +reg_rf = 'rf' +reg_vm = 'vm' +reg_ac = 'ac' +reg_vif = 'vif' +reg_vip = 'vip' +reg_id = 'i_d' + + +reg_es = "ES" +reg_cs = "CS" +reg_ss = "SS" +reg_ds = "DS" +reg_fs = "FS" +reg_gs = "GS" + +reg_dr0 = 'DR0' +reg_dr1 = 'DR1' +reg_dr2 = 'DR2' +reg_dr3 = 'DR3' +reg_dr4 = 'DR4' +reg_dr5 = 'DR5' +reg_dr6 = 'DR6' +reg_dr7 = 'DR7' + +reg_cr0 = 'CR0' +reg_cr1 = 'CR1' +reg_cr2 = 'CR2' +reg_cr3 = 'CR3' +reg_cr4 = 'CR4' +reg_cr5 = 'CR5' +reg_cr6 = 'CR6' +reg_cr7 = 'CR7' + +reg_mm0 = 'MM0' +reg_mm1 = 'MM1' +reg_mm2 = 'MM2' +reg_mm3 = 'MM3' +reg_mm4 = 'MM4' +reg_mm5 = 'MM5' +reg_mm6 = 'MM6' +reg_mm7 = 'MM7' + +reg_tsc = "tsc" + +reg_float_c0 = 'float_c0' +reg_float_c1 = 'float_c1' +reg_float_c2 = 'float_c2' +reg_float_c3 = 'float_c3' +reg_float_stack_ptr = "float_stack_ptr" +reg_float_control = 'reg_float_control' +reg_float_eip = 'reg_float_eip' +reg_float_cs = 'reg_float_cs' +reg_float_address = 'reg_float_address' +reg_float_ds = 'reg_float_ds' + + +dr0 = ExprId(reg_dr0, 32) +dr1 = ExprId(reg_dr1, 32) +dr2 = ExprId(reg_dr2, 32) +dr3 = ExprId(reg_dr3, 32) +dr4 = ExprId(reg_dr4, 32) +dr5 = ExprId(reg_dr5, 32) +dr6 = ExprId(reg_dr6, 32) +dr7 = ExprId(reg_dr7, 32) + +cr0 = ExprId(reg_cr0, 32) +cr1 = ExprId(reg_cr1, 32) +cr2 = ExprId(reg_cr2, 32) +cr3 = ExprId(reg_cr3, 32) +cr4 = ExprId(reg_cr4, 32) +cr5 = ExprId(reg_cr5, 32) +cr6 = ExprId(reg_cr6, 32) +cr7 = ExprId(reg_cr7, 32) + +mm0 = ExprId(reg_mm0, 64) +mm1 = ExprId(reg_mm1, 64) +mm2 = ExprId(reg_mm2, 64) +mm3 = ExprId(reg_mm3, 64) +mm4 = ExprId(reg_mm4, 64) +mm5 = ExprId(reg_mm5, 64) +mm6 = ExprId(reg_mm6, 64) +mm7 = ExprId(reg_mm7, 64) + +XMM0 = regs_xmm_expr[0] +XMM1 = regs_xmm_expr[1] +XMM2 = regs_xmm_expr[2] +XMM3 = regs_xmm_expr[3] +XMM4 = regs_xmm_expr[4] +XMM5 = regs_xmm_expr[5] +XMM6 = regs_xmm_expr[6] +XMM7 = regs_xmm_expr[7] +XMM8 = regs_xmm_expr[8] +XMM9 = regs_xmm_expr[9] +XMM10 = regs_xmm_expr[10] +XMM11 = regs_xmm_expr[11] +XMM12 = regs_xmm_expr[12] +XMM13 = regs_xmm_expr[13] +XMM14 = regs_xmm_expr[14] +XMM15 = regs_xmm_expr[15] + +# tmp1= ExprId(reg_tmp1) +zf = ExprId(reg_zf, size=1) +nf = ExprId(reg_nf, size=1) +pf = ExprId(reg_pf, size=1) +of = ExprId(reg_of, size=1) +cf = ExprId(reg_cf, size=1) +tf = ExprId(reg_tf, size=1) +i_f = ExprId(reg_if, size=1) +df = ExprId(reg_df, size=1) +af = ExprId(reg_af, size=1) +iopl = ExprId(reg_iopl, size=2) +nt = ExprId(reg_nt, size=1) +rf = ExprId(reg_rf, size=1) +vm = ExprId(reg_vm, size=1) +ac = ExprId(reg_ac, size=1) +vif = ExprId(reg_vif, size=1) +vip = ExprId(reg_vip, size=1) +i_d = ExprId(reg_id, size=1) + +ES = ExprId(reg_es, size=16) +CS = ExprId(reg_cs, size=16) +SS = ExprId(reg_ss, size=16) +DS = ExprId(reg_ds, size=16) +FS = ExprId(reg_fs, size=16) +GS = ExprId(reg_gs, size=16) + +tsc = ExprId(reg_tsc, size=64) + +float_c0 = ExprId(reg_float_c0, size=1) +float_c1 = ExprId(reg_float_c1, size=1) +float_c2 = ExprId(reg_float_c2, size=1) +float_c3 = ExprId(reg_float_c3, size=1) +float_stack_ptr = ExprId(reg_float_stack_ptr, size=3) +float_control = ExprId(reg_float_control, 16) +float_eip = ExprId(reg_float_eip, 32) +float_cs = ExprId(reg_float_cs, size=16) +float_address = ExprId(reg_float_address, 32) +float_ds = ExprId(reg_float_ds, size=16) + +float_st0 = ExprId("float_st0", 64) +float_st1 = ExprId("float_st1", 64) +float_st2 = ExprId("float_st2", 64) +float_st3 = ExprId("float_st3", 64) +float_st4 = ExprId("float_st4", 64) +float_st5 = ExprId("float_st5", 64) +float_st6 = ExprId("float_st6", 64) +float_st7 = ExprId("float_st7", 64) + + +float_list = [float_st0, float_st1, float_st2, float_st3, + float_st4, float_st5, float_st6, float_st7] + +float_replace = {fltregs32_expr[i]: float_list[i] for i in range(8)} +float_replace[r_st_all.expr[0]] = float_st0 + + +EAX_init = ExprId('EAX_init', 32) +EBX_init = ExprId('EBX_init', 32) +ECX_init = ExprId('ECX_init', 32) +EDX_init = ExprId('EDX_init', 32) +ESI_init = ExprId('ESI_init', 32) +EDI_init = ExprId('EDI_init', 32) +ESP_init = ExprId('ESP_init', 32) +EBP_init = ExprId('EBP_init', 32) + + +RAX_init = ExprId('RAX_init', 64) +RBX_init = ExprId('RBX_init', 64) +RCX_init = ExprId('RCX_init', 64) +RDX_init = ExprId('RDX_init', 64) +RSI_init = ExprId('RSI_init', 64) +RDI_init = ExprId('RDI_init', 64) +RSP_init = ExprId('RSP_init', 64) +RBP_init = ExprId('RBP_init', 64) + + +all_regs_ids = [ + AL, CL, DL, BL, AH, CH, DH, BH, + R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B, + SPL, BPL, SIL, DIL, + AX, CX, DX, BX, SP, BP, SI, DI, + R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W, + IP, + EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, + R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D, + EIP, + + RAX, RBX, RCX, RDX, RSP, RBP, RIP, RSI, RDI, + R8, R9, R10, R11, R12, R13, R14, R15, + zf, nf, pf, of, cf, af, df, + tf, i_f, iopl, nt, rf, vm, ac, vif, vip, i_d, + float_control, float_eip, float_cs, float_address, float_ds, + tsc, + ES, CS, SS, DS, FS, GS, + float_st0, float_st1, float_st2, float_st3, + float_st4, float_st5, float_st6, float_st7, + float_c0, float_c1, float_c2, float_c3, + cr0, cr3, + dr0, dr1, dr2, dr3, dr4, dr5, dr6, dr7, + float_stack_ptr, + mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, + + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, + + + exception_flags, interrupt_num, +] + fltregs32_expr + +all_regs_ids_no_alias = [ + RAX, RBX, RCX, RDX, RSP, RBP, RIP, RSI, RDI, + R8, R9, R10, R11, R12, R13, R14, R15, + zf, nf, pf, of, cf, af, df, + tf, i_f, iopl, nt, rf, vm, ac, vif, vip, i_d, + float_control, float_eip, float_cs, float_address, float_ds, + tsc, + ES, CS, SS, DS, FS, GS, + float_st0, float_st1, float_st2, float_st3, + float_st4, float_st5, float_st6, float_st7, + float_c0, float_c1, float_c2, float_c3, + cr0, cr3, + dr0, dr1, dr2, dr3, dr4, dr5, dr6, dr7, + float_stack_ptr, + mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, + + + exception_flags, interrupt_num, +] + fltregs32_expr + +attrib_to_regs = { + 16: regs16_expr + all_regs_ids_no_alias[all_regs_ids_no_alias.index(zf):] + [IP], + 32: regs32_expr + all_regs_ids_no_alias[all_regs_ids_no_alias.index(zf):] + [EIP], + 64: all_regs_ids_no_alias, +} + +all_regs_ids_byname = dict([(x.name, x) for x in all_regs_ids]) + +all_regs_ids_init = [ExprId("%s_init" % x.name, x.size) for x in all_regs_ids] + +regs_init = {} +for i, r in enumerate(all_regs_ids): + regs_init[r] = all_regs_ids_init[i] + +regs_flt_expr = [float_st0, float_st1, float_st2, float_st3, + float_st4, float_st5, float_st6, float_st7, + ] + +mRAX = {16: AX, 32: EAX, 64: RAX} +mRBX = {16: BX, 32: EBX, 64: RBX} +mRCX = {16: CX, 32: ECX, 64: RCX} +mRDX = {16: DX, 32: EDX, 64: RDX} +mRSI = {16: SI, 32: ESI, 64: RSI} +mRDI = {16: DI, 32: EDI, 64: RDI} +mRBP = {16: BP, 32: EBP, 64: RBP} +mRSP = {16: SP, 32: ESP, 64: RSP} +mRIP = {16: IP, 32: EIP, 64: RIP} diff --git a/miasm/arch/x86/sem.py b/miasm/arch/x86/sem.py new file mode 100644 index 00000000..52858ad2 --- /dev/null +++ b/miasm/arch/x86/sem.py @@ -0,0 +1,5822 @@ +# +# Copyright (C) 2011 EADS France, Fabrice Desclaux +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# + +from builtins import range + +from future.utils import viewitems + +import logging +import miasm.expression.expression as m2_expr +from miasm.expression.simplifications import expr_simp +from miasm.arch.x86.regs import * +from miasm.arch.x86.arch import mn_x86, repeat_mn, replace_regs +from miasm.ir.ir import IntermediateRepresentation, IRBlock, AssignBlock +from miasm.core.sembuilder import SemBuilder +from miasm.jitter.csts import EXCEPT_DIV_BY_ZERO, EXCEPT_ILLEGAL_INSN, \ + EXCEPT_PRIV_INSN, EXCEPT_SOFT_BP, EXCEPT_INT_XX +import math +import struct + + +LOG_X86_SEM = logging.getLogger("x86_sem") +CONSOLE_HANDLER = logging.StreamHandler() +CONSOLE_HANDLER.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +LOG_X86_SEM.addHandler(CONSOLE_HANDLER) +LOG_X86_SEM.setLevel(logging.WARNING) + + +# SemBuilder context +ctx = {'mRAX': mRAX, + 'mRBX': mRBX, + 'mRCX': mRCX, + 'mRDX': mRDX, + 'zf': zf, + } +sbuild = SemBuilder(ctx) + + + +""" +http://www.emulators.com/docs/nx11_flags.htm + +CF(A+B) = (((A XOR B) XOR D) < 0) XOR (((A XOR D) AND NOT (A XOR B)) < 0) +CF(A-B) = (((A XOR B) XOR D) < 0) XOR (((A XOR D) AND (A XOR B)) < 0) + +OF(A+B) = ((A XOR D) AND NOT (A XOR B)) < 0 +OF(A-B) = ((A XOR D) AND (A XOR B)) < 0 +""" + + +# XXX TODO make default check against 0 or not 0 (same eq as in C) +def update_flag_zf_eq(a, b): + return [m2_expr.ExprAssign(zf, m2_expr.ExprOp("FLAG_EQ_CMP", a, b))] + + +def update_flag_zf(a): + return [ + m2_expr.ExprAssign( + zf, + m2_expr.ExprCond( + a, + m2_expr.ExprInt(0, zf.size), + m2_expr.ExprInt(1, zf.size) + ) + ) + ] + + +def update_flag_nf(arg): + return [ + m2_expr.ExprAssign( + nf, + m2_expr.ExprOp("FLAG_SIGN_SUB", arg, m2_expr.ExprInt(0, arg.size)) + ) + ] + + +def update_flag_pf(a): + return [m2_expr.ExprAssign(pf, + m2_expr.ExprOp('parity', + a & m2_expr.ExprInt(0xFF, a.size)))] + + +def update_flag_af(op1, op2, res): + return [m2_expr.ExprAssign(af, (op1 ^ op2 ^ res)[4:5])] + + +def update_flag_znp(a): + e = [] + e += update_flag_zf(a) + e += update_flag_nf(a) + e += update_flag_pf(a) + return e + + +def update_flag_np(result): + e = [] + e += update_flag_nf(result) + e += update_flag_pf(result) + return e + + +def null_flag_co(): + e = [] + e.append(m2_expr.ExprAssign(of, m2_expr.ExprInt(0, of.size))) + e.append(m2_expr.ExprAssign(cf, m2_expr.ExprInt(0, cf.size))) + return e + + +def update_flag_arith(a): + e = [] + e += update_flag_znp(a) + return e + + +def update_flag_zfaddwc_eq(arg1, arg2, arg3): + return [m2_expr.ExprAssign(zf, m2_expr.ExprOp("FLAG_EQ_ADDWC", arg1, arg2, arg3))] + +def update_flag_zfsubwc_eq(arg1, arg2, arg3): + return [m2_expr.ExprAssign(zf, m2_expr.ExprOp("FLAG_EQ_SUBWC", arg1, arg2, arg3))] + + +def update_flag_arith_add_znp(arg1, arg2): + """ + Compute znp flags for (arg1 + arg2) + """ + e = [] + e += update_flag_zf_eq(arg1, -arg2) + e += [m2_expr.ExprAssign(nf, m2_expr.ExprOp("FLAG_SIGN_SUB", arg1, -arg2))] + e += update_flag_pf(arg1+arg2) + return e + + +def update_flag_arith_addwc_znp(arg1, arg2, arg3): + """ + Compute znp flags for (arg1 + arg2 + cf) + """ + e = [] + e += update_flag_zfaddwc_eq(arg1, arg2, arg3) + e += [m2_expr.ExprAssign(nf, m2_expr.ExprOp("FLAG_SIGN_ADDWC", arg1, arg2, arg3))] + e += update_flag_pf(arg1+arg2+arg3.zeroExtend(arg2.size)) + return e + + + + +def update_flag_arith_sub_znp(arg1, arg2): + """ + Compute znp flags for (arg1 - arg2) + """ + e = [] + e += update_flag_zf_eq(arg1, arg2) + e += [m2_expr.ExprAssign(nf, m2_expr.ExprOp("FLAG_SIGN_SUB", arg1, arg2))] + e += update_flag_pf(arg1 - arg2) + return e + + +def update_flag_arith_subwc_znp(arg1, arg2, arg3): + """ + Compute znp flags for (arg1 - (arg2 + cf)) + """ + e = [] + e += update_flag_zfsubwc_eq(arg1, arg2, arg3) + e += [m2_expr.ExprAssign(nf, m2_expr.ExprOp("FLAG_SIGN_SUBWC", arg1, arg2, arg3))] + e += update_flag_pf(arg1 - (arg2+arg3.zeroExtend(arg2.size))) + return e + + +def check_ops_msb(a, b, c): + if not a or not b or not c or a != b or a != c: + raise ValueError('bad ops size %s %s %s' % (a, b, c)) + + +def arith_flag(a, b, c): + a_s, b_s, c_s = a.size, b.size, c.size + check_ops_msb(a_s, b_s, c_s) + a_s, b_s, c_s = a.msb(), b.msb(), c.msb() + return a_s, b_s, c_s + +# checked: ok for adc add because b & c before +cf + + +def update_flag_add_cf(op1, op2, res): + "Compute cf in @res = @op1 + @op2" + #return [m2_expr.ExprAssign(cf, m2_expr.ExprOp("FLAG_SUB_CF", op1, -op2))] + return [m2_expr.ExprAssign(cf, m2_expr.ExprOp("FLAG_ADD_CF", op1, op2))] + + +def update_flag_add_of(op1, op2, res): + "Compute of in @res = @op1 + @op2" + return [m2_expr.ExprAssign(of, m2_expr.ExprOp("FLAG_ADD_OF", op1, op2))] + + +# checked: ok for sbb add because b & c before +cf +def update_flag_sub_cf(op1, op2, res): + "Compote CF in @res = @op1 - @op2" + return [m2_expr.ExprAssign(cf, m2_expr.ExprOp("FLAG_SUB_CF", op1, op2))] + + +def update_flag_sub_of(op1, op2, res): + "Compote OF in @res = @op1 - @op2" + return [m2_expr.ExprAssign(of, m2_expr.ExprOp("FLAG_SUB_OF", op1, op2))] + + +def update_flag_addwc_cf(op1, op2, op3): + "Compute cf in @res = @op1 + @op2 + @op3" + return [m2_expr.ExprAssign(cf, m2_expr.ExprOp("FLAG_ADDWC_CF", op1, op2, op3))] + + +def update_flag_addwc_of(op1, op2, op3): + "Compute of in @res = @op1 + @op2 + @op3" + return [m2_expr.ExprAssign(of, m2_expr.ExprOp("FLAG_ADDWC_OF", op1, op2, op3))] + + + +def update_flag_subwc_cf(op1, op2, op3): + "Compute cf in @res = @op1 + @op2 + @op3" + return [m2_expr.ExprAssign(cf, m2_expr.ExprOp("FLAG_SUBWC_CF", op1, op2, op3))] + + +def update_flag_subwc_of(op1, op2, op3): + "Compute of in @res = @op1 + @op2 + @op3" + return [m2_expr.ExprAssign(of, m2_expr.ExprOp("FLAG_SUBWC_OF", op1, op2, op3))] + + + + +def update_flag_arith_add_co(x, y, z): + e = [] + e += update_flag_add_cf(x, y, z) + e += update_flag_add_of(x, y, z) + return e + + +def update_flag_arith_sub_co(x, y, z): + e = [] + e += update_flag_sub_cf(x, y, z) + e += update_flag_sub_of(x, y, z) + return e + + + + +def update_flag_arith_addwc_co(arg1, arg2, arg3): + e = [] + e += update_flag_addwc_cf(arg1, arg2, arg3) + e += update_flag_addwc_of(arg1, arg2, arg3) + return e + + +def update_flag_arith_subwc_co(arg1, arg2, arg3): + e = [] + e += update_flag_subwc_cf(arg1, arg2, arg3) + e += update_flag_subwc_of(arg1, arg2, arg3) + return e + + + +def set_float_cs_eip(instr): + e = [] + # XXX TODO check float updt + e.append(m2_expr.ExprAssign(float_eip, + m2_expr.ExprInt(instr.offset, float_eip.size))) + e.append(m2_expr.ExprAssign(float_cs, CS)) + return e + + +def mode2addrsize(mode): + """Returns the address size for a given @mode""" + + mode2size = {16:32, 32:32, 64:64} + if mode not in mode2size: + raise RuntimeError("Unknown size %s", mode) + return mode2size[mode] + + +def instr2addrsize(instr): + """Returns the address size for a given @instr""" + + return mode2addrsize(instr.mode) + + +def expraddr(mode, ptr): + """Returns memory address pointer with size according to current @mode""" + return ptr.zeroExtend(mode2addrsize(mode)) + + +def fix_mem_args_size(instr, *args): + out = [] + for arg in args: + if not arg.is_mem(): + out.append(arg) + continue + ptr = arg.ptr + size = arg.size + if ptr.is_op('segm'): + ptr = m2_expr.ExprOp( + 'segm', ptr.args[0], expraddr(instr.mode, ptr.args[1])) + else: + ptr = expraddr(instr.mode, ptr) + out.append(m2_expr.ExprMem(ptr, size)) + return out + + +def mem2double(instr, arg): + """ + Add float conversion if argument is an ExprMem + @arg: argument to transform + """ + if isinstance(arg, m2_expr.ExprMem): + if arg.size > 64: + # TODO: move to 80 bits + arg = m2_expr.ExprMem(expraddr(instr.mode, arg.ptr), size=64) + return m2_expr.ExprOp('sint_to_fp', arg.signExtend(64)) + else: + return arg + + +def float_implicit_st0(arg1, arg2): + """ + Generate full float operators if one argument is implicit (float_st0) + """ + if arg2 is None: + arg2 = arg1 + arg1 = float_st0 + return arg1, arg2 + + +def gen_jcc(ir, instr, cond, dst, jmp_if): + """ + Macro to generate jcc semantic + @ir: ir instance + @instr: instruction + @cond: condition of the jcc + @dst: the destination if jcc is taken + @jmp_if: jump if/notif cond + """ + + e = [] + meip = mRIP[ir.IRDst.size] + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = m2_expr.ExprLoc(loc_next, dst.size) + + if jmp_if: + dstA, dstB = dst, loc_next_expr + else: + dstA, dstB = loc_next_expr, dst + mn_dst = m2_expr.ExprCond(cond, + dstA.zeroExtend(ir.IRDst.size), + dstB.zeroExtend(ir.IRDst.size)) + e.append(m2_expr.ExprAssign(meip, mn_dst)) + e.append(m2_expr.ExprAssign(ir.IRDst, mn_dst)) + return e, [] + + +def gen_fcmov(ir, instr, cond, arg1, arg2, mov_if): + """Generate fcmov + @ir: ir instance + @instr: instruction instance + @cond: condition + @mov_if: invert condition if False""" + + loc_do, loc_do_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_skip = ir.get_next_loc_key(instr) + loc_skip_expr = m2_expr.ExprLoc(loc_skip, ir.IRDst.size) + if mov_if: + dstA, dstB = loc_do_expr, loc_skip_expr + else: + dstA, dstB = loc_skip_expr, loc_do_expr + e = [] + e_do, extra_irs = [m2_expr.ExprAssign(arg1, arg2)], [] + e_do.append(m2_expr.ExprAssign(ir.IRDst, loc_skip_expr)) + e.append(m2_expr.ExprAssign(ir.IRDst, m2_expr.ExprCond(cond, dstA, dstB))) + return e, [IRBlock(loc_do, [AssignBlock(e_do, instr)])] + + +def gen_cmov(ir, instr, cond, dst, src, mov_if): + """Generate cmov + @ir: ir instance + @instr: instruction instance + @cond: condition + @mov_if: invert condition if False""" + + loc_do, loc_do_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_skip = ir.get_next_loc_key(instr) + loc_skip_expr = m2_expr.ExprLoc(loc_skip, ir.IRDst.size) + if mov_if: + dstA, dstB = loc_do_expr, loc_skip_expr + else: + dstA, dstB = loc_skip_expr, loc_do_expr + e = [m2_expr.ExprAssign(dst, dst)] + e_do, extra_irs = mov(ir, instr, dst, src) + e_do.append(m2_expr.ExprAssign(ir.IRDst, loc_skip_expr)) + e.append(m2_expr.ExprAssign(ir.IRDst, m2_expr.ExprCond(cond, dstA, dstB))) + return e, [IRBlock(loc_do, [AssignBlock(e_do, instr)])] + + +def mov(_, instr, dst, src): + if dst in [ES, CS, SS, DS, FS, GS]: + src = src[:dst.size] + if src in [ES, CS, SS, DS, FS, GS]: + src = src.zeroExtend(dst.size) + e = [m2_expr.ExprAssign(dst, src)] + return e, [] + + +def movq(_, instr, dst, src): + src_final = (src.zeroExtend(dst.size) + if dst.size >= src.size else + src[:dst.size]) + return [m2_expr.ExprAssign(dst, src_final)], [] + + +@sbuild.parse +def xchg(arg1, arg2): + arg1 = arg2 + arg2 = arg1 + + + +def movzx(_, instr, dst, src): + e = [m2_expr.ExprAssign(dst, src.zeroExtend(dst.size))] + return e, [] + + +def movsx(_, instr, dst, src): + e = [m2_expr.ExprAssign(dst, src.signExtend(dst.size))] + return e, [] + + +def lea(_, instr, dst, src): + ptr = src.ptr + if src.is_mem_segm(): + # Do not use segmentation here + ptr = ptr.args[1] + + if ptr.size > dst.size: + ptr = ptr[:dst.size] + e = [m2_expr.ExprAssign(dst, ptr.zeroExtend(dst.size))] + return e, [] + + +def add(_, instr, dst, src): + e = [] + + result = dst + src + + e += update_flag_arith_add_znp(dst, src) + e += update_flag_arith_add_co(dst, src, result) + e += update_flag_af(dst, src, result) + e.append(m2_expr.ExprAssign(dst, result)) + return e, [] + + +def xadd(_, instr, dst, src): + e = [] + + result = dst + src + e += update_flag_arith_add_znp(dst, src) + e += update_flag_arith_add_co(src, dst, result) + e += update_flag_af(dst, src, result) + if dst != src: + e.append(m2_expr.ExprAssign(src, dst)) + e.append(m2_expr.ExprAssign(dst, result)) + return e, [] + + +def adc(_, instr, dst, src): + e = [] + + arg1 = dst + arg2 = src + result = arg1 + (arg2 + cf.zeroExtend(src.size)) + + e += update_flag_arith_addwc_znp(arg1, arg2, cf) + e += update_flag_arith_addwc_co(arg1, arg2, cf) + e += update_flag_af(arg1, arg2, result) + e.append(m2_expr.ExprAssign(dst, result)) + return e, [] + + +def sub(_, instr, dst, src): + e = [] + arg1, arg2 = dst, src + result = dst - src + + e += update_flag_arith_sub_znp(arg1, arg2) + e += update_flag_arith_sub_co(arg1, arg2, result) + e += update_flag_af(dst, src, result) + + e.append(m2_expr.ExprAssign(dst, result)) + return e, [] + +# a-(b+cf) + + +def sbb(_, instr, dst, src): + e = [] + arg1 = dst + arg2 = src + result = arg1 - (arg2 + cf.zeroExtend(src.size)) + + e += update_flag_arith_subwc_znp(arg1, arg2, cf) + e += update_flag_af(arg1, arg2, result) + e += update_flag_arith_subwc_co(arg1, arg2, cf) + e.append(m2_expr.ExprAssign(dst, result)) + return e, [] + + +def neg(_, instr, src): + e = [] + dst = m2_expr.ExprInt(0, src.size) + arg1, arg2 = dst, src + result = arg1 - arg2 + + e += update_flag_arith_sub_znp(arg1, arg2) + e += update_flag_arith_sub_co(arg1, arg2, result) + e += update_flag_af(arg1, arg2, result) + e.append(m2_expr.ExprAssign(src, result)) + return (e, []) + + +def l_not(_, instr, dst): + e = [] + result = (~dst) + e.append(m2_expr.ExprAssign(dst, result)) + return (e, []) + + +def l_cmp(_, instr, dst, src): + e = [] + arg1, arg2 = dst, src + result = dst - src + + e += update_flag_arith_sub_znp(arg1, arg2) + e += update_flag_arith_sub_co(arg1, arg2, result) + e += update_flag_af(dst, src, result) + return (e, []) + + +def xor(_, instr, dst, src): + e = [] + result = dst ^ src + e += [m2_expr.ExprAssign(zf, m2_expr.ExprOp('FLAG_EQ_CMP', dst, src))] + e += update_flag_np(result) + e += null_flag_co() + e.append(m2_expr.ExprAssign(dst, result)) + return (e, []) + + +def pxor(_, instr, dst, src): + e = [] + result = dst ^ src + e.append(m2_expr.ExprAssign(dst, result)) + return (e, []) + + +def l_or(_, instr, dst, src): + e = [] + result = dst | src + e += [m2_expr.ExprAssign(zf, m2_expr.ExprOp('FLAG_EQ', dst | src))] + e += update_flag_np(result) + e += null_flag_co() + e.append(m2_expr.ExprAssign(dst, result)) + return (e, []) + + +def l_and(_, instr, dst, src): + e = [] + result = dst & src + e += [m2_expr.ExprAssign(zf, m2_expr.ExprOp('FLAG_EQ_AND', dst, src))] + e += update_flag_np(result) + e += null_flag_co() + + e.append(m2_expr.ExprAssign(dst, result)) + return (e, []) + + +def l_test(_, instr, dst, src): + e = [] + result = dst & src + + e += [m2_expr.ExprAssign(zf, m2_expr.ExprOp('FLAG_EQ_CMP', result, m2_expr.ExprInt(0, result.size)))] + e += [m2_expr.ExprAssign(nf, m2_expr.ExprOp("FLAG_SIGN_SUB", result, m2_expr.ExprInt(0, result.size)))] + e += update_flag_pf(result) + e += null_flag_co() + + return (e, []) + + +def get_shift(dst, src): + if isinstance(src, m2_expr.ExprInt): + src = m2_expr.ExprInt(int(src), dst.size) + else: + src = src.zeroExtend(dst.size) + if dst.size == 64: + shift = src & m2_expr.ExprInt(63, src.size) + else: + shift = src & m2_expr.ExprInt(31, src.size) + shift = expr_simp(shift) + return shift + + +def _rotate_tpl(ir, instr, dst, src, op, left=False): + '''Template to generate a rotater with operation @op + A temporary basic block is generated to handle 0-rotate + @op: operation to execute + @left (optional): indicates a left rotate if set, default is False + ''' + # Compute results + shifter = get_shift(dst, src) + res = m2_expr.ExprOp(op, dst, shifter) + + # CF is computed with 1-less round than `res` + new_cf = m2_expr.ExprOp( + op, dst, shifter - m2_expr.ExprInt(1, size=shifter.size)) + new_cf = new_cf.msb() if left else new_cf[:1] + + # OF is defined only for @b == 1 + new_of = m2_expr.ExprCond(src - m2_expr.ExprInt(1, size=src.size), + m2_expr.ExprInt(0, size=of.size), + res.msb() ^ new_cf if left else (dst ^ res).msb()) + + # Build basic blocks + e_do = [m2_expr.ExprAssign(cf, new_cf), + m2_expr.ExprAssign(of, new_of), + m2_expr.ExprAssign(dst, res) + ] + e = [m2_expr.ExprAssign(dst, dst)] + # Don't generate conditional shifter on constant + if isinstance(shifter, m2_expr.ExprInt): + if int(shifter) != 0: + return (e_do, []) + else: + return (e, []) + loc_do, loc_do_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_skip = ir.get_next_loc_key(instr) + loc_skip_expr = m2_expr.ExprLoc(loc_skip, ir.IRDst.size) + e_do.append(m2_expr.ExprAssign(ir.IRDst, loc_skip_expr)) + e.append(m2_expr.ExprAssign( + ir.IRDst, m2_expr.ExprCond(shifter, loc_do_expr, loc_skip_expr))) + return (e, [IRBlock(loc_do, [AssignBlock(e_do, instr)])]) + + +def l_rol(ir, instr, dst, src): + return _rotate_tpl(ir, instr, dst, src, '<<<', left=True) + + +def l_ror(ir, instr, dst, src): + return _rotate_tpl(ir, instr, dst, src, '>>>') + + +def rotate_with_carry_tpl(ir, instr, op, dst, src): + # Compute results + shifter = get_shift(dst, src).zeroExtend(dst.size + 1) + result = m2_expr.ExprOp(op, m2_expr.ExprCompose(dst, cf), shifter) + + new_cf = result[dst.size:dst.size +1] + new_dst = result[:dst.size] + + result_trunc = result[:dst.size] + if op == '<<<': + of_value = result_trunc.msb() ^ new_cf + else: + of_value = (dst ^ result_trunc).msb() + # OF is defined only for @b == 1 + new_of = m2_expr.ExprCond(src - m2_expr.ExprInt(1, size=src.size), + m2_expr.ExprInt(0, size=of.size), + of_value) + + + # Build basic blocks + e_do = [m2_expr.ExprAssign(cf, new_cf), + m2_expr.ExprAssign(of, new_of), + m2_expr.ExprAssign(dst, new_dst) + ] + e = [m2_expr.ExprAssign(dst, dst)] + # Don't generate conditional shifter on constant + if isinstance(shifter, m2_expr.ExprInt): + if int(shifter) != 0: + return (e_do, []) + else: + return (e, []) + loc_do, loc_do_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_skip = ir.get_next_loc_key(instr) + loc_skip_expr = m2_expr.ExprLoc(loc_skip, ir.IRDst.size) + e_do.append(m2_expr.ExprAssign(ir.IRDst, loc_skip_expr)) + e.append(m2_expr.ExprAssign( + ir.IRDst, m2_expr.ExprCond(shifter, loc_do_expr, loc_skip_expr))) + return (e, [IRBlock(loc_do, [AssignBlock(e_do, instr)])]) + +def rcl(ir, instr, dst, src): + return rotate_with_carry_tpl(ir, instr, '<<<', dst, src) + +def rcr(ir, instr, dst, src): + return rotate_with_carry_tpl(ir, instr, '>>>', dst, src) + + +def _shift_tpl(op, ir, instr, a, b, c=None, op_inv=None, left=False, + custom_of=None): + """Template to generate a shifter with operation @op + A temporary basic block is generated to handle 0-shift + @op: operation to execute + @c (optional): if set, instruction has a bit provider + @op_inv (optional): opposite operation of @op. Must be provided if @c + @left (optional): indicates a left shift if set, default is False + @custom_of (optional): if set, override the computed value of OF + """ + if c is not None: + shifter = get_shift(a, c) + else: + shifter = get_shift(a, b) + + res = m2_expr.ExprOp(op, a, shifter) + cf_from_dst = m2_expr.ExprOp(op, a, + (shifter - m2_expr.ExprInt(1, a.size))) + cf_from_dst = cf_from_dst.msb() if left else cf_from_dst[:1] + + new_cf = cf_from_dst + i1 = m2_expr.ExprInt(1, size=a.size) + if c is not None: + # There is a source for new bits + isize = m2_expr.ExprInt(a.size, size=a.size) + mask = m2_expr.ExprOp(op_inv, i1, (isize - shifter)) - i1 + + # An overflow can occurred, emulate the 'undefined behavior' + # Overflow behavior if (shift / size % 2) + base_cond_overflow = shifter if left else ( + shifter - m2_expr.ExprInt(1, size=shifter.size)) + cond_overflow = base_cond_overflow & m2_expr.ExprInt(a.size, shifter.size) + if left: + # Overflow occurs one round before right + mask = m2_expr.ExprCond(cond_overflow, mask, ~mask) + else: + mask = m2_expr.ExprCond(cond_overflow, ~mask, mask) + + # Build res with dst and src + res = ((m2_expr.ExprOp(op, a, shifter) & mask) | + (m2_expr.ExprOp(op_inv, b, (isize - shifter)) & ~mask)) + + # Overflow case: cf come from src (bit number shifter % size) + cf_from_src = m2_expr.ExprOp(op, b, + (shifter.zeroExtend(b.size) & + m2_expr.ExprInt(a.size - 1, b.size)) - i1) + cf_from_src = cf_from_src.msb() if left else cf_from_src[:1] + new_cf = m2_expr.ExprCond(cond_overflow, cf_from_src, cf_from_dst) + + # Overflow flag, only occurred when shifter is equal to 1 + if custom_of is None: + value_of = a.msb() ^ a[-2:-1] if left else b[:1] ^ a.msb() + else: + value_of = custom_of + + # Build basic blocks + e_do = [ + m2_expr.ExprAssign(cf, new_cf), + m2_expr.ExprAssign(of, m2_expr.ExprCond(shifter - i1, + m2_expr.ExprInt(0, of.size), + value_of)), + m2_expr.ExprAssign(a, res), + ] + e_do += update_flag_znp(res) + e = [m2_expr.ExprAssign(a, a)] + # Don't generate conditional shifter on constant + if isinstance(shifter, m2_expr.ExprInt): + if int(shifter) != 0: + return (e_do, []) + else: + return (e, []) + loc_do, loc_do_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_skip = ir.get_next_loc_key(instr) + loc_skip_expr = m2_expr.ExprLoc(loc_skip, ir.IRDst.size) + e_do.append(m2_expr.ExprAssign(ir.IRDst, loc_skip_expr)) + e.append(m2_expr.ExprAssign(ir.IRDst, m2_expr.ExprCond(shifter, loc_do_expr, + loc_skip_expr))) + return e, [IRBlock(loc_do, [AssignBlock(e_do, instr)])] + + +def sar(ir, instr, dst, src): + # Fixup OF, always cleared if src != 0 + i0 = m2_expr.ExprInt(0, size=of.size) + return _shift_tpl("a>>", ir, instr, dst, src, custom_of=i0) + + +def shr(ir, instr, dst, src): + return _shift_tpl(">>", ir, instr, dst, src, custom_of=dst.msb()) + + +def shrd(ir, instr, dst, src1, src2): + return _shift_tpl(">>>", ir, instr, dst, src1, src2, "<<<") + + +def shl(ir, instr, dst, src): + return _shift_tpl("<<", ir, instr, dst, src, left=True) + + +def shld(ir, instr, dst, src1, src2): + return _shift_tpl("<<<", ir, instr, dst, src1, src2, ">>>", left=True) + + +# XXX todo ### +def cmc(_, instr): + e = [m2_expr.ExprAssign(cf, m2_expr.ExprCond(cf, m2_expr.ExprInt(0, cf.size), + m2_expr.ExprInt(1, cf.size)))] + return e, [] + + +def clc(_, instr): + e = [m2_expr.ExprAssign(cf, m2_expr.ExprInt(0, cf.size))] + return e, [] + + +def stc(_, instr): + e = [m2_expr.ExprAssign(cf, m2_expr.ExprInt(1, cf.size))] + return e, [] + + +def cld(_, instr): + e = [m2_expr.ExprAssign(df, m2_expr.ExprInt(0, df.size))] + return e, [] + + +def std(_, instr): + e = [m2_expr.ExprAssign(df, m2_expr.ExprInt(1, df.size))] + return e, [] + + +def cli(_, instr): + e = [m2_expr.ExprAssign(i_f, m2_expr.ExprInt(0, i_f.size))] + return e, [] + + +def sti(_, instr): + e = [m2_expr.ExprAssign(exception_flags, m2_expr.ExprInt(EXCEPT_PRIV_INSN, 32))] + return e, [] + + +def inc(_, instr, dst): + e = [] + src = m2_expr.ExprInt(1, dst.size) + arg1, arg2 = dst, src + result = dst + src + + e += update_flag_arith_add_znp(arg1, arg2) + e += update_flag_af(arg1, arg2, result) + e += update_flag_add_of(arg1, arg2, result) + + e.append(m2_expr.ExprAssign(dst, result)) + return e, [] + + +def dec(_, instr, dst): + e = [] + src = m2_expr.ExprInt(1, dst.size) + arg1, arg2 = dst, src + result = dst - src + + e += update_flag_arith_sub_znp(arg1, arg2) + e += update_flag_af(arg1, arg2, result) + e += update_flag_sub_of(arg1, arg2, result) + + e.append(m2_expr.ExprAssign(dst, result)) + return e, [] + + +def push_gen(ir, instr, src, size): + e = [] + if not size in [16, 32, 64]: + raise ValueError('bad size stacker!') + if src.size < size: + src = src.zeroExtend(size) + off_size = src.size + + sp = mRSP[instr.mode] + new_sp = sp - m2_expr.ExprInt(off_size // 8, sp.size) + e.append(m2_expr.ExprAssign(sp, new_sp)) + if ir.do_stk_segm: + new_sp = ir.gen_segm_expr(SS, new_sp) + e.append(m2_expr.ExprAssign(ir.ExprMem(new_sp, off_size), + src)) + return e, [] + + +def push(ir, instr, src): + return push_gen(ir, instr, src, instr.mode) + + +def pushw(ir, instr, src): + return push_gen(ir, instr, src, 16) + + +def pop_gen(ir, instr, src, size): + e = [] + if not size in [16, 32, 64]: + raise ValueError('bad size stacker!') + + sp = mRSP[instr.mode] + new_sp = sp + m2_expr.ExprInt(src.size // 8, sp.size) + # don't generate ESP incrementation on POP ESP + if src != ir.sp: + e.append(m2_expr.ExprAssign(sp, new_sp)) + # XXX FIX XXX for pop [esp] + if isinstance(src, m2_expr.ExprMem): + src = expr_simp(src.replace_expr({sp: new_sp})) + result = sp + if ir.do_stk_segm: + result = ir.gen_segm_expr(SS, result) + + e.append(m2_expr.ExprAssign(src, ir.ExprMem(result, src.size))) + return e, [] + + +def pop(ir, instr, src): + return pop_gen(ir, instr, src, instr.mode) + + +def popw(ir, instr, src): + return pop_gen(ir, instr, src, 16) + + +def sete(_, instr, dst): + e = [] + e.append( + m2_expr.ExprAssign( + dst, + m2_expr.ExprOp("CC_EQ", zf).zeroExtend(dst.size), + ) + ) + return e, [] + + +def setnz(_, instr, dst): + e = [] + e.append( + m2_expr.ExprAssign( + dst, + m2_expr.ExprOp("CC_EQ", ~zf).zeroExtend(dst.size), + ) + ) + return e, [] + + +def setl(_, instr, dst): + e = [] + e.append( + m2_expr.ExprAssign( + dst, + m2_expr.ExprOp("CC_S<", nf, of).zeroExtend(dst.size), + ) + ) + return e, [] + + +def setg(_, instr, dst): + e = [] + e.append( + m2_expr.ExprAssign( + dst, + m2_expr.ExprOp("CC_S>", nf, of, zf).zeroExtend(dst.size), + ) + ) + return e, [] + + +def setge(_, instr, dst): + e = [] + e.append( + m2_expr.ExprAssign( + dst, + m2_expr.ExprOp("CC_S>=", nf, of).zeroExtend(dst.size), + ) + ) + return e, [] + + +def seta(_, instr, dst): + e = [] + e.append( + m2_expr.ExprAssign( + dst, + m2_expr.ExprOp("CC_U>", cf, zf).zeroExtend(dst.size), + ) + ) + return e, [] + + +def setae(_, instr, dst): + e = [] + e.append( + m2_expr.ExprAssign( + dst, + m2_expr.ExprOp("CC_U>=", cf).zeroExtend(dst.size), + ) + ) + return e, [] + + +def setb(_, instr, dst): + e = [] + e.append( + m2_expr.ExprAssign( + dst, + m2_expr.ExprOp("CC_U<", cf).zeroExtend(dst.size), + ) + ) + return e, [] + + +def setbe(_, instr, dst): + e = [] + e.append( + m2_expr.ExprAssign( + dst, + m2_expr.ExprOp("CC_U<=", cf, zf).zeroExtend(dst.size), + ) + ) + return e, [] + + +def setns(_, instr, dst): + e = [] + e.append( + m2_expr.ExprAssign( + dst, + m2_expr.ExprOp("CC_NEG", ~nf).zeroExtend(dst.size), + ) + ) + return e, [] + + +def sets(_, instr, dst): + e = [] + e.append( + m2_expr.ExprAssign( + dst, + m2_expr.ExprOp("CC_NEG", nf).zeroExtend(dst.size), + ) + ) + return e, [] + + +def seto(_, instr, dst): + e = [] + e.append( + m2_expr.ExprAssign( + dst, + of.zeroExtend(dst.size) + ) + ) + return e, [] + + +def setp(_, instr, dst): + e = [] + e.append( + m2_expr.ExprAssign( + dst, + pf.zeroExtend(dst.size) + ) + ) + return e, [] + + +def setnp(_, instr, dst): + e = [] + e.append( + m2_expr.ExprAssign( + dst, + m2_expr.ExprCond( + pf, + m2_expr.ExprInt(0, dst.size), + m2_expr.ExprInt(1, dst.size) + ) + ) + ) + return e, [] + + +def setle(_, instr, dst): + e = [] + e.append( + m2_expr.ExprAssign( + dst, + m2_expr.ExprOp("CC_S<=", nf, of, zf).zeroExtend(dst.size), + ) + ) + return e, [] + + +def setna(_, instr, dst): + e = [] + e.append( + m2_expr.ExprAssign( + dst, + m2_expr.ExprOp("CC_U<=", cf, zf).zeroExtend(dst.size), + ) + ) + return e, [] + + +def setnbe(_, instr, dst): + e = [] + e.append( + m2_expr.ExprAssign( + dst, + m2_expr.ExprOp("CC_U>", cf, zf).zeroExtend(dst.size), + ) + ) + return e, [] + + +def setno(_, instr, dst): + e = [] + e.append( + m2_expr.ExprAssign( + dst, + m2_expr.ExprCond( + of, + m2_expr.ExprInt(0, dst.size), + m2_expr.ExprInt(1, dst.size) + ) + ) + ) + return e, [] + + +def setnb(_, instr, dst): + e = [] + e.append( + m2_expr.ExprAssign( + dst, + m2_expr.ExprOp("CC_U>=", cf).zeroExtend(dst.size), + ) + ) + return e, [] + + +def setalc(_, instr): + dst = mRAX[instr.mode][0:8] + e = [] + e.append( + m2_expr.ExprAssign(dst, m2_expr.ExprCond(cf, m2_expr.ExprInt(0xff, dst.size), + m2_expr.ExprInt(0, dst.size)))) + return e, [] + + +def bswap(_, instr, dst): + e = [] + if dst.size == 16: + result = m2_expr.ExprCompose(dst[8:16], dst[:8]) + elif dst.size == 32: + result = m2_expr.ExprCompose( + dst[24:32], dst[16:24], dst[8:16], dst[:8]) + elif dst.size == 64: + result = m2_expr.ExprCompose(dst[56:64], dst[48:56], dst[40:48], dst[32:40], + dst[24:32], dst[16:24], dst[8:16], dst[:8]) + else: + raise ValueError('the size DOES matter') + e.append(m2_expr.ExprAssign(dst, result)) + return e, [] + + +def cmps(ir, instr, size): + loc_df_0, loc_df_0_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_df_1, loc_df_1_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_next_expr = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) + + src1 = mRSI[instr.mode][:instr.v_admode()] + src2 = mRDI[instr.mode][:instr.v_admode()] + + if ir.do_str_segm: + if instr.additional_info.g2.value: + raise NotImplementedError("add segm support") + src1_sgm = ir.gen_segm_expr(DS, src1) + src2_sgm = ir.gen_segm_expr(ES, src2) + else: + src1_sgm = src1 + src2_sgm = src2 + + offset = m2_expr.ExprInt(size // 8, src1.size) + + e, _ = l_cmp(ir, instr, + ir.ExprMem(src1_sgm, size), + ir.ExprMem(src2_sgm, size)) + + + e0 = [] + e0.append(m2_expr.ExprAssign(src1, src1 + offset)) + e0.append(m2_expr.ExprAssign(src2, src2 + offset)) + e0.append(m2_expr.ExprAssign(ir.IRDst, loc_next_expr)) + e0 = IRBlock(loc_df_0, [AssignBlock(e0, instr)]) + + e1 = [] + e1.append(m2_expr.ExprAssign(src1, src1 - offset)) + e1.append(m2_expr.ExprAssign(src2, src2 - offset)) + e1.append(m2_expr.ExprAssign(ir.IRDst, loc_next_expr)) + e1 = IRBlock(loc_df_1, [AssignBlock(e1, instr)]) + + e.append(m2_expr.ExprAssign(ir.IRDst, + m2_expr.ExprCond(df, loc_df_1_expr, loc_df_0_expr))) + return e, [e0, e1] + + +def scas(ir, instr, size): + loc_df_0, loc_df_0_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_df_1, loc_df_1_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_next_expr = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) + + src = mRDI[instr.mode][:instr.v_admode()] + + if ir.do_str_segm: + if instr.additional_info.g2.value: + raise NotImplementedError("add segm support") + src_sgm = ir.gen_segm_expr(ES, src) + + else: + src_sgm = src + + offset = m2_expr.ExprInt(size // 8, src.size) + e, extra = l_cmp(ir, instr, + mRAX[instr.mode][:size], + ir.ExprMem(src_sgm, size)) + + e0 = [] + e0.append(m2_expr.ExprAssign(src, src + offset)) + + e0.append(m2_expr.ExprAssign(ir.IRDst, loc_next_expr)) + e0 = IRBlock(loc_df_0, [AssignBlock(e0, instr)]) + + e1 = [] + e1.append(m2_expr.ExprAssign(src, src - offset)) + e1.append(m2_expr.ExprAssign(ir.IRDst, loc_next_expr)) + e1 = IRBlock(loc_df_1, [AssignBlock(e1, instr)]) + + e.append(m2_expr.ExprAssign(ir.IRDst, + m2_expr.ExprCond(df, loc_df_1_expr, loc_df_0_expr))) + + return e, [e0, e1] + + +def compose_eflag(s=32): + args = [] + + args = [cf, m2_expr.ExprInt(1, 1), pf, m2_expr.ExprInt(0, 1), af, + m2_expr.ExprInt(0, 1), zf, nf, tf, i_f, df, of, iopl] + + if s == 32: + args += [nt, m2_expr.ExprInt(0, 1), rf, vm, ac, vif, vip, i_d] + elif s == 16: + args += [nt, m2_expr.ExprInt(0, 1)] + else: + raise ValueError('unk size') + if s == 32: + args.append(m2_expr.ExprInt(0, 10)) + return m2_expr.ExprCompose(*args) + + +def pushfd(ir, instr): + return push(ir, instr, compose_eflag()) + + +def pushfq(ir, instr): + return push(ir, instr, compose_eflag().zeroExtend(64)) + + +def pushfw(ir, instr): + return pushw(ir, instr, compose_eflag(16)) + + +def popfd(ir, instr): + tmp = ir.ExprMem(mRSP[instr.mode], 32) + e = [] + e.append(m2_expr.ExprAssign(cf, m2_expr.ExprSlice(tmp, 0, 1))) + e.append(m2_expr.ExprAssign(pf, m2_expr.ExprSlice(tmp, 2, 3))) + e.append(m2_expr.ExprAssign(af, m2_expr.ExprSlice(tmp, 4, 5))) + e.append(m2_expr.ExprAssign(zf, m2_expr.ExprSlice(tmp, 6, 7))) + e.append(m2_expr.ExprAssign(nf, m2_expr.ExprSlice(tmp, 7, 8))) + e.append(m2_expr.ExprAssign(tf, m2_expr.ExprSlice(tmp, 8, 9))) + e.append(m2_expr.ExprAssign(i_f, m2_expr.ExprSlice(tmp, 9, 10))) + e.append(m2_expr.ExprAssign(df, m2_expr.ExprSlice(tmp, 10, 11))) + e.append(m2_expr.ExprAssign(of, m2_expr.ExprSlice(tmp, 11, 12))) + e.append(m2_expr.ExprAssign(iopl, m2_expr.ExprSlice(tmp, 12, 14))) + e.append(m2_expr.ExprAssign(nt, m2_expr.ExprSlice(tmp, 14, 15))) + e.append(m2_expr.ExprAssign(rf, m2_expr.ExprSlice(tmp, 16, 17))) + e.append(m2_expr.ExprAssign(vm, m2_expr.ExprSlice(tmp, 17, 18))) + e.append(m2_expr.ExprAssign(ac, m2_expr.ExprSlice(tmp, 18, 19))) + e.append(m2_expr.ExprAssign(vif, m2_expr.ExprSlice(tmp, 19, 20))) + e.append(m2_expr.ExprAssign(vip, m2_expr.ExprSlice(tmp, 20, 21))) + e.append(m2_expr.ExprAssign(i_d, m2_expr.ExprSlice(tmp, 21, 22))) + e.append(m2_expr.ExprAssign(mRSP[instr.mode], + mRSP[instr.mode] + m2_expr.ExprInt(instr.mode // 8, mRSP[instr.mode].size))) + e.append(m2_expr.ExprAssign(exception_flags, + m2_expr.ExprCond(m2_expr.ExprSlice(tmp, 8, 9), + m2_expr.ExprInt( + EXCEPT_SOFT_BP, 32), + exception_flags + ) + ) + ) + return e, [] + + +def _tpl_eflags(tmp): + """Extract eflags from @tmp + @tmp: Expr instance with a size >= 16 + """ + return [m2_expr.ExprAssign(dest, tmp[base:base + dest.size]) + for base, dest in ((0, cf), (2, pf), (4, af), (6, zf), (7, nf), + (8, tf), (9, i_f), (10, df), (11, of), + (12, iopl), (14, nt))] + + +def popfw(ir, instr): + tmp = ir.ExprMem(mRSP[instr.mode], 16) + e = _tpl_eflags(tmp) + e.append( + m2_expr.ExprAssign(mRSP[instr.mode], mRSP[instr.mode] + m2_expr.ExprInt(2, mRSP[instr.mode].size))) + return e, [] + +pa_regs = [ + mRAX, mRCX, + mRDX, mRBX, + mRSP, mRBP, + mRSI, mRDI +] + + +def pusha_gen(ir, instr, size): + e = [] + cur_sp = mRSP[instr.mode] + for i, reg in enumerate(pa_regs): + stk_ptr = cur_sp + m2_expr.ExprInt(-(size // 8) * (i + 1), instr.mode) + e.append(m2_expr.ExprAssign(ir.ExprMem(stk_ptr, size), reg[size])) + e.append(m2_expr.ExprAssign(cur_sp, stk_ptr)) + return e, [] + + +def pusha(ir, instr): + return pusha_gen(ir, instr, 16) + + +def pushad(ir, instr): + return pusha_gen(ir, instr, 32) + + +def popa_gen(ir, instr, size): + e = [] + cur_sp = mRSP[instr.mode] + for i, reg in enumerate(reversed(pa_regs)): + if reg == mRSP: + continue + stk_ptr = cur_sp + m2_expr.ExprInt((size // 8) * i, instr.mode) + e.append(m2_expr.ExprAssign(reg[size], ir.ExprMem(stk_ptr, size))) + + stk_ptr = cur_sp + m2_expr.ExprInt((size // 8) * (i + 1), instr.mode) + e.append(m2_expr.ExprAssign(cur_sp, stk_ptr)) + + return e, [] + + +def popa(ir, instr): + return popa_gen(ir, instr, 16) + + +def popad(ir, instr): + return popa_gen(ir, instr, 32) + + +def call(ir, instr, dst): + e = [] + # opmode, admode = instr.opmode, instr.admode + s = dst.size + meip = mRIP[ir.IRDst.size] + opmode, admode = s, instr.v_admode() + myesp = mRSP[instr.mode][:opmode] + n = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) + + if isinstance(dst, m2_expr.ExprOp): + if dst.op == "segm": + # Far call segm:addr + if instr.mode not in [16, 32]: + raise RuntimeError('not supported') + segm = dst.args[0] + base = dst.args[1] + m1 = segm.zeroExtend(CS.size) + m2 = base.zeroExtend(meip.size) + elif dst.op == "far": + # Far call far [eax] + addr = dst.args[0].arg + m1 = ir.ExprMem(addr, CS.size) + m2 = ir.ExprMem(addr + m2_expr.ExprInt(2, addr.size), meip.size) + else: + raise RuntimeError("bad call operator") + + e.append(m2_expr.ExprAssign(CS, m1)) + e.append(m2_expr.ExprAssign(meip, m2)) + + e.append(m2_expr.ExprAssign(ir.IRDst, m2)) + + c = myesp + m2_expr.ExprInt(-s // 8, s) + e.append(m2_expr.ExprAssign(ir.ExprMem(c, size=s).zeroExtend(s), + CS.zeroExtend(s))) + + c = myesp + m2_expr.ExprInt((-2 * s) // 8, s) + e.append(m2_expr.ExprAssign(ir.ExprMem(c, size=s).zeroExtend(s), + meip.zeroExtend(s))) + + c = myesp + m2_expr.ExprInt((-2 * s) // 8, s) + e.append(m2_expr.ExprAssign(myesp, c)) + return e, [] + + c = myesp + m2_expr.ExprInt(-s // 8, s) + e.append(m2_expr.ExprAssign(myesp, c)) + if ir.do_stk_segm: + c = ir.gen_segm_expr(SS, c) + + e.append(m2_expr.ExprAssign(ir.ExprMem(c, size=s), n)) + e.append(m2_expr.ExprAssign(meip, dst.zeroExtend(ir.IRDst.size))) + e.append(m2_expr.ExprAssign(ir.IRDst, dst.zeroExtend(ir.IRDst.size))) + return e, [] + + +def ret(ir, instr, src=None): + e = [] + meip = mRIP[ir.IRDst.size] + size, admode = instr.v_opmode(), instr.v_admode() + myesp = mRSP[instr.mode][:size] + + if src is None: + value = (myesp + (m2_expr.ExprInt(size // 8, size))) + else: + src = m2_expr.ExprInt(int(src), size) + value = (myesp + (m2_expr.ExprInt(size // 8, size) + src)) + + e.append(m2_expr.ExprAssign(myesp, value)) + result = myesp + if ir.do_stk_segm: + result = ir.gen_segm_expr(SS, result) + + e.append(m2_expr.ExprAssign(meip, ir.ExprMem( + result, size=size).zeroExtend(size))) + e.append(m2_expr.ExprAssign(ir.IRDst, + ir.ExprMem(result, size=size).zeroExtend(size))) + return e, [] + + +def retf(ir, instr, src=None): + e = [] + meip = mRIP[ir.IRDst.size] + size, admode = instr.v_opmode(), instr.v_admode() + if src is None: + src = m2_expr.ExprInt(0, instr.mode) + myesp = mRSP[instr.mode][:size] + + src = src.zeroExtend(size) + + result = myesp + if ir.do_stk_segm: + result = ir.gen_segm_expr(SS, result) + + e.append(m2_expr.ExprAssign(meip, ir.ExprMem( + result, size=size).zeroExtend(size))) + e.append(m2_expr.ExprAssign(ir.IRDst, + ir.ExprMem(result, size=size).zeroExtend(size))) + # e.append(m2_expr.ExprAssign(meip, ir.ExprMem(c, size = s))) + result = myesp + m2_expr.ExprInt(size // 8, size) + if ir.do_stk_segm: + result = ir.gen_segm_expr(SS, result) + + e.append(m2_expr.ExprAssign(CS, ir.ExprMem(result, size=16))) + + value = myesp + (m2_expr.ExprInt((2 * size) // 8, size) + src) + e.append(m2_expr.ExprAssign(myesp, value)) + return e, [] + + +def leave(ir, instr): + size = instr.mode + myesp = mRSP[size] + e = [] + e.append(m2_expr.ExprAssign(mRBP[size], ir.ExprMem(mRBP[size], size=size))) + e.append(m2_expr.ExprAssign(myesp, + m2_expr.ExprInt(size // 8, size) + mRBP[size])) + return e, [] + + +def enter(ir, instr, src1, src2): + size, admode = instr.v_opmode(), instr.v_admode() + myesp = mRSP[instr.mode][:size] + myebp = mRBP[instr.mode][:size] + + src1 = src1.zeroExtend(size) + + e = [] + esp_tmp = myesp - m2_expr.ExprInt(size // 8, size) + e.append(m2_expr.ExprAssign(ir.ExprMem(esp_tmp, size=size), + myebp)) + e.append(m2_expr.ExprAssign(myebp, esp_tmp)) + e.append(m2_expr.ExprAssign(myesp, + myesp - (src1 + m2_expr.ExprInt(size // 8, size)))) + return e, [] + + +def jmp(ir, instr, dst): + e = [] + meip = mRIP[ir.IRDst.size] + + if isinstance(dst, m2_expr.ExprOp): + if dst.op == "segm": + # Far jmp segm:addr + segm = dst.args[0] + base = dst.args[1] + m1 = segm.zeroExtend(CS.size) + m2 = base.zeroExtend(meip.size) + elif dst.op == "far": + # Far jmp far [eax] + addr = dst.args[0].arg + m1 = ir.ExprMem(addr, CS.size) + m2 = ir.ExprMem(addr + m2_expr.ExprInt(2, addr.size), meip.size) + else: + raise RuntimeError("bad jmp operator") + + e.append(m2_expr.ExprAssign(CS, m1)) + e.append(m2_expr.ExprAssign(meip, m2)) + e.append(m2_expr.ExprAssign(ir.IRDst, m2)) + + else: + # Classic jmp + e.append(m2_expr.ExprAssign(meip, dst)) + e.append(m2_expr.ExprAssign(ir.IRDst, dst)) + + if isinstance(dst, m2_expr.ExprMem): + dst = meip + return e, [] + + +def jz(ir, instr, dst): + #return gen_jcc(ir, instr, zf, dst, True) + return gen_jcc(ir, instr, m2_expr.ExprOp("CC_EQ", zf), dst, True) + + +def jcxz(ir, instr, dst): + return gen_jcc(ir, instr, mRCX[instr.mode][:16], dst, False) + + +def jecxz(ir, instr, dst): + return gen_jcc(ir, instr, mRCX[instr.mode][:32], dst, False) + + +def jrcxz(ir, instr, dst): + return gen_jcc(ir, instr, mRCX[instr.mode], dst, False) + + +def jnz(ir, instr, dst): + #return gen_jcc(ir, instr, zf, dst, False) + return gen_jcc(ir, instr, m2_expr.ExprOp("CC_EQ", zf), dst, False) + + + +def jp(ir, instr, dst): + return gen_jcc(ir, instr, pf, dst, True) + + +def jnp(ir, instr, dst): + return gen_jcc(ir, instr, pf, dst, False) + + +def ja(ir, instr, dst): + #return gen_jcc(ir, instr, cf | zf, dst, False) + return gen_jcc(ir, instr, m2_expr.ExprOp("CC_U>", cf, zf), dst, True) + + +def jae(ir, instr, dst): + #return gen_jcc(ir, instr, cf, dst, False) + return gen_jcc(ir, instr, m2_expr.ExprOp("CC_U>=", cf), dst, True) + + +def jb(ir, instr, dst): + #return gen_jcc(ir, instr, cf, dst, True) + return gen_jcc(ir, instr, m2_expr.ExprOp("CC_U<", cf), dst, True) + + +def jbe(ir, instr, dst): + #return gen_jcc(ir, instr, cf | zf, dst, True) + return gen_jcc(ir, instr, m2_expr.ExprOp("CC_U<=", cf, zf), dst, True) + + +def jge(ir, instr, dst): + #return gen_jcc(ir, instr, nf - of, dst, False) + return gen_jcc(ir, instr, m2_expr.ExprOp("CC_S>=", nf, of), dst, True) + + +def jg(ir, instr, dst): + #return gen_jcc(ir, instr, zf | (nf - of), dst, False) + return gen_jcc(ir, instr, m2_expr.ExprOp("CC_S>", nf, of, zf), dst, True) + + +def jl(ir, instr, dst): + #return gen_jcc(ir, instr, nf - of, dst, True) + return gen_jcc(ir, instr, m2_expr.ExprOp("CC_S<", nf, of), dst, True) + + +def jle(ir, instr, dst): + #return gen_jcc(ir, instr, zf | (nf - of), dst, True) + return gen_jcc(ir, instr, m2_expr.ExprOp("CC_S<=", nf, of, zf), dst, True) + + + +def js(ir, instr, dst): + #return gen_jcc(ir, instr, nf, dst, True) + return gen_jcc(ir, instr, m2_expr.ExprOp("CC_NEG", nf), dst, True) + + + +def jns(ir, instr, dst): + #return gen_jcc(ir, instr, nf, dst, False) + return gen_jcc(ir, instr, m2_expr.ExprOp("CC_NEG", nf), dst, False) + + +def jo(ir, instr, dst): + return gen_jcc(ir, instr, of, dst, True) + + +def jno(ir, instr, dst): + return gen_jcc(ir, instr, of, dst, False) + + +def loop(ir, instr, dst): + e = [] + meip = mRIP[ir.IRDst.size] + admode = instr.v_admode() + myecx = mRCX[instr.mode][:admode] + + n = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) + c = myecx - m2_expr.ExprInt(1, myecx.size) + dst_o = m2_expr.ExprCond(c, + dst.zeroExtend(ir.IRDst.size), + n.zeroExtend(ir.IRDst.size)) + e.append(m2_expr.ExprAssign(myecx, c)) + e.append(m2_expr.ExprAssign(meip, dst_o)) + e.append(m2_expr.ExprAssign(ir.IRDst, dst_o)) + return e, [] + + +def loopne(ir, instr, dst): + e = [] + meip = mRIP[ir.IRDst.size] + admode = instr.v_admode() + myecx = mRCX[instr.mode][:admode] + + n = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) + + c = m2_expr.ExprCond(myecx - m2_expr.ExprInt(1, size=myecx.size), + m2_expr.ExprInt(1, 1), + m2_expr.ExprInt(0, 1)) + c &= zf ^ m2_expr.ExprInt(1, 1) + + e.append(m2_expr.ExprAssign(myecx, myecx - m2_expr.ExprInt(1, myecx.size))) + dst_o = m2_expr.ExprCond(c, + dst.zeroExtend(ir.IRDst.size), + n.zeroExtend(ir.IRDst.size)) + e.append(m2_expr.ExprAssign(meip, dst_o)) + e.append(m2_expr.ExprAssign(ir.IRDst, dst_o)) + return e, [] + + +def loope(ir, instr, dst): + e = [] + meip = mRIP[ir.IRDst.size] + admode = instr.v_admode() + myecx = mRCX[instr.mode][:admode] + + n = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) + c = m2_expr.ExprCond(myecx - m2_expr.ExprInt(1, size=myecx.size), + m2_expr.ExprInt(1, 1), + m2_expr.ExprInt(0, 1)) + c &= zf + e.append(m2_expr.ExprAssign(myecx, myecx - m2_expr.ExprInt(1, myecx.size))) + dst_o = m2_expr.ExprCond(c, + dst.zeroExtend(ir.IRDst.size), + n.zeroExtend(ir.IRDst.size)) + e.append(m2_expr.ExprAssign(meip, dst_o)) + e.append(m2_expr.ExprAssign(ir.IRDst, dst_o)) + return e, [] + +# XXX size to do; eflag + + +def div(ir, instr, src1): + e = [] + size = src1.size + if size == 8: + src2 = mRAX[instr.mode][:16] + elif size in [16, 32, 64]: + s1, s2 = mRDX[size], mRAX[size] + src2 = m2_expr.ExprCompose(s2, s1) + else: + raise ValueError('div arg not impl', src1) + + c_d = m2_expr.ExprOp('udiv', src2, src1.zeroExtend(src2.size)) + c_r = m2_expr.ExprOp('umod', src2, src1.zeroExtend(src2.size)) + + # if 8 bit div, only ax is assigned + if size == 8: + e.append(m2_expr.ExprAssign(src2, m2_expr.ExprCompose(c_d[:8], c_r[:8]))) + else: + e.append(m2_expr.ExprAssign(s1, c_r[:size])) + e.append(m2_expr.ExprAssign(s2, c_d[:size])) + + loc_div, loc_div_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_except, loc_except_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = m2_expr.ExprLoc(loc_next, ir.IRDst.size) + + do_div = [] + do_div += e + do_div.append(m2_expr.ExprAssign(ir.IRDst, loc_next_expr)) + blk_div = IRBlock(loc_div, [AssignBlock(do_div, instr)]) + + do_except = [] + do_except.append(m2_expr.ExprAssign(exception_flags, m2_expr.ExprInt( + EXCEPT_DIV_BY_ZERO, exception_flags.size))) + do_except.append(m2_expr.ExprAssign(ir.IRDst, loc_next_expr)) + blk_except = IRBlock(loc_except, [AssignBlock(do_except, instr)]) + + e = [] + e.append(m2_expr.ExprAssign(ir.IRDst, + m2_expr.ExprCond(src1, loc_div_expr, loc_except_expr))) + + return e, [blk_div, blk_except] + + +# XXX size to do; eflag + +def idiv(ir, instr, src1): + e = [] + size = src1.size + + if size == 8: + src2 = mRAX[instr.mode][:16] + elif size in [16, 32, 64]: + s1, s2 = mRDX[size], mRAX[size] + src2 = m2_expr.ExprCompose(s2, s1) + else: + raise ValueError('div arg not impl', src1) + + c_d = m2_expr.ExprOp('sdiv', src2, src1.signExtend(src2.size)) + c_r = m2_expr.ExprOp('smod', src2, src1.signExtend(src2.size)) + + # if 8 bit div, only ax is assigned + if size == 8: + e.append(m2_expr.ExprAssign(src2, m2_expr.ExprCompose(c_d[:8], c_r[:8]))) + else: + e.append(m2_expr.ExprAssign(s1, c_r[:size])) + e.append(m2_expr.ExprAssign(s2, c_d[:size])) + + loc_div, loc_div_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_except, loc_except_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = m2_expr.ExprLoc(loc_next, ir.IRDst.size) + + do_div = [] + do_div += e + do_div.append(m2_expr.ExprAssign(ir.IRDst, loc_next_expr)) + blk_div = IRBlock(loc_div, [AssignBlock(do_div, instr)]) + + do_except = [] + do_except.append(m2_expr.ExprAssign(exception_flags, m2_expr.ExprInt( + EXCEPT_DIV_BY_ZERO, exception_flags.size))) + do_except.append(m2_expr.ExprAssign(ir.IRDst, loc_next_expr)) + blk_except = IRBlock(loc_except, [AssignBlock(do_except, instr)]) + + e = [] + e.append(m2_expr.ExprAssign(ir.IRDst, + m2_expr.ExprCond(src1, loc_div_expr, loc_except_expr))) + + return e, [blk_div, blk_except] + + +# XXX size to do; eflag + + +def mul(_, instr, src1): + e = [] + size = src1.size + if src1.size in [16, 32, 64]: + result = m2_expr.ExprOp('*', + mRAX[size].zeroExtend(size * 2), + src1.zeroExtend(size * 2)) + e.append(m2_expr.ExprAssign(mRAX[size], result[:size])) + e.append(m2_expr.ExprAssign(mRDX[size], result[size:size * 2])) + + elif src1.size == 8: + result = m2_expr.ExprOp('*', + mRAX[instr.mode][:8].zeroExtend(16), + src1.zeroExtend(16)) + e.append(m2_expr.ExprAssign(mRAX[instr.mode][:16], result)) + else: + raise ValueError('unknow size') + + e.append(m2_expr.ExprAssign(of, m2_expr.ExprCond(result[size:size * 2], + m2_expr.ExprInt(1, 1), + m2_expr.ExprInt(0, 1)))) + e.append(m2_expr.ExprAssign(cf, m2_expr.ExprCond(result[size:size * 2], + m2_expr.ExprInt(1, 1), + m2_expr.ExprInt(0, 1)))) + + return e, [] + + +def imul(_, instr, src1, src2=None, src3=None): + e = [] + size = src1.size + if src2 is None: + if size in [16, 32, 64]: + result = m2_expr.ExprOp('*', + mRAX[size].signExtend(size * 2), + src1.signExtend(size * 2)) + e.append(m2_expr.ExprAssign(mRAX[size], result[:size])) + e.append(m2_expr.ExprAssign(mRDX[size], result[size:size * 2])) + elif size == 8: + dst = mRAX[instr.mode][:16] + result = m2_expr.ExprOp('*', + mRAX[instr.mode][:8].signExtend(16), + src1.signExtend(16)) + + e.append(m2_expr.ExprAssign(dst, result)) + value = m2_expr.ExprCond(result - result[:size].signExtend(size * 2), + m2_expr.ExprInt(1, 1), + m2_expr.ExprInt(0, 1)) + e.append(m2_expr.ExprAssign(cf, value)) + value = m2_expr.ExprCond(result - result[:size].signExtend(size * 2), + m2_expr.ExprInt(1, 1), + m2_expr.ExprInt(0, 1)) + e.append(m2_expr.ExprAssign(of, value)) + + else: + if src3 is None: + src3 = src2 + src2 = src1 + result = m2_expr.ExprOp('*', + src2.signExtend(size * 2), + src3.signExtend(size * 2)) + e.append(m2_expr.ExprAssign(src1, result[:size])) + + value = m2_expr.ExprCond(result - result[:size].signExtend(size * 2), + m2_expr.ExprInt(1, 1), + m2_expr.ExprInt(0, 1)) + e.append(m2_expr.ExprAssign(cf, value)) + value = m2_expr.ExprCond(result - result[:size].signExtend(size * 2), + m2_expr.ExprInt(1, 1), + m2_expr.ExprInt(0, 1)) + e.append(m2_expr.ExprAssign(of, value)) + return e, [] + + +def cbw(_, instr): + # Only in 16 bit + e = [] + tempAL = mRAX[instr.v_opmode()][:8] + tempAX = mRAX[instr.v_opmode()][:16] + e.append(m2_expr.ExprAssign(tempAX, tempAL.signExtend(16))) + return e, [] + + +def cwde(_, instr): + # Only in 32/64 bit + e = [] + tempAX = mRAX[instr.v_opmode()][:16] + tempEAX = mRAX[instr.v_opmode()][:32] + e.append(m2_expr.ExprAssign(tempEAX, tempAX.signExtend(32))) + return e, [] + + +def cdqe(_, instr): + # Only in 64 bit + e = [] + tempEAX = mRAX[instr.mode][:32] + tempRAX = mRAX[instr.mode][:64] + e.append(m2_expr.ExprAssign(tempRAX, tempEAX.signExtend(64))) + return e, [] + + +def cwd(_, instr): + # Only in 16 bit + e = [] + tempAX = mRAX[instr.mode][:16] + tempDX = mRDX[instr.mode][:16] + result = tempAX.signExtend(32) + e.append(m2_expr.ExprAssign(tempAX, result[:16])) + e.append(m2_expr.ExprAssign(tempDX, result[16:32])) + return e, [] + + +def cdq(_, instr): + # Only in 32/64 bit + e = [] + tempEAX = mRAX[instr.v_opmode()] + tempEDX = mRDX[instr.v_opmode()] + result = tempEAX.signExtend(64) + e.append(m2_expr.ExprAssign(tempEDX, result[32:64])) + return e, [] + + +def cqo(_, instr): + # Only in 64 bit + e = [] + tempRAX = mRAX[instr.mode][:64] + tempRDX = mRDX[instr.mode][:64] + result = tempRAX.signExtend(128) + e.append(m2_expr.ExprAssign(tempRAX, result[:64])) + e.append(m2_expr.ExprAssign(tempRDX, result[64:128])) + return e, [] + + +def stos(ir, instr, size): + loc_df_0, loc_df_0_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_df_1, loc_df_1_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_next_expr = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) + + addr_o = mRDI[instr.mode][:instr.v_admode()] + addr = addr_o + addr_p = addr + m2_expr.ExprInt(size // 8, addr.size) + addr_m = addr - m2_expr.ExprInt(size // 8, addr.size) + if ir.do_str_segm: + mss = ES + if instr.additional_info.g2.value: + raise NotImplementedError("add segm support") + addr = ir.gen_segm_expr(mss, addr) + + + b = mRAX[instr.mode][:size] + + e0 = [] + e0.append(m2_expr.ExprAssign(addr_o, addr_p)) + e0.append(m2_expr.ExprAssign(ir.IRDst, loc_next_expr)) + e0 = IRBlock(loc_df_0, [AssignBlock(e0, instr)]) + + e1 = [] + e1.append(m2_expr.ExprAssign(addr_o, addr_m)) + e1.append(m2_expr.ExprAssign(ir.IRDst, loc_next_expr)) + e1 = IRBlock(loc_df_1, [AssignBlock(e1, instr)]) + + e = [] + e.append(m2_expr.ExprAssign(ir.ExprMem(addr, size), b)) + e.append(m2_expr.ExprAssign(ir.IRDst, + m2_expr.ExprCond(df, loc_df_1_expr, loc_df_0_expr))) + return e, [e0, e1] + + +def lods(ir, instr, size): + loc_df_0, loc_df_0_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_df_1, loc_df_1_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_next_expr = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) + e = [] + + addr_o = mRSI[instr.mode][:instr.v_admode()] + addr = addr_o + addr_p = addr + m2_expr.ExprInt(size // 8, addr.size) + addr_m = addr - m2_expr.ExprInt(size // 8, addr.size) + if ir.do_str_segm: + mss = DS + if instr.additional_info.g2.value: + raise NotImplementedError("add segm support") + addr = ir.gen_segm_expr(mss, addr) + + + b = mRAX[instr.mode][:size] + + e0 = [] + e0.append(m2_expr.ExprAssign(addr_o, addr_p)) + e0.append(m2_expr.ExprAssign(ir.IRDst, loc_next_expr)) + e0 = IRBlock(loc_df_0, [AssignBlock(e0, instr)]) + + e1 = [] + e1.append(m2_expr.ExprAssign(addr_o, addr_m)) + e1.append(m2_expr.ExprAssign(ir.IRDst, loc_next_expr)) + e1 = IRBlock(loc_df_1, [AssignBlock(e1, instr)]) + + e = [] + if instr.mode == 64 and b.size == 32: + e.append(m2_expr.ExprAssign(mRAX[instr.mode], + ir.ExprMem(addr, size).zeroExtend(64))) + else: + e.append(m2_expr.ExprAssign(b, ir.ExprMem(addr, size))) + + e.append(m2_expr.ExprAssign(ir.IRDst, + m2_expr.ExprCond(df, loc_df_1_expr, loc_df_0_expr))) + return e, [e0, e1] + + +def movs(ir, instr, size): + loc_df_0, loc_df_0_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_df_1, loc_df_1_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_next_expr = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) + + dst = mRDI[instr.mode][:instr.v_admode()] + src = mRSI[instr.mode][:instr.v_admode()] + + e = [] + if ir.do_str_segm: + if instr.additional_info.g2.value: + raise NotImplementedError("add segm support") + src_sgm = ir.gen_segm_expr(DS, src) + dst_sgm = ir.gen_segm_expr(ES, dst) + + else: + src_sgm = src + dst_sgm = dst + + offset = m2_expr.ExprInt(size // 8, src.size) + + e.append(m2_expr.ExprAssign(ir.ExprMem(dst_sgm, size), + ir.ExprMem(src_sgm, size))) + + e0 = [] + e0.append(m2_expr.ExprAssign(src, src + offset)) + e0.append(m2_expr.ExprAssign(dst, dst + offset)) + e0.append(m2_expr.ExprAssign(ir.IRDst, loc_next_expr)) + e0 = IRBlock(loc_df_0, [AssignBlock(e0, instr)]) + + e1 = [] + e1.append(m2_expr.ExprAssign(src, src - offset)) + e1.append(m2_expr.ExprAssign(dst, dst - offset)) + e1.append(m2_expr.ExprAssign(ir.IRDst, loc_next_expr)) + e1 = IRBlock(loc_df_1, [AssignBlock(e1, instr)]) + + e.append(m2_expr.ExprAssign(ir.IRDst, + m2_expr.ExprCond(df, loc_df_1_expr, loc_df_0_expr))) + return e, [e0, e1] + + +def movsd(_, instr, dst, src): + # 64 bits access + if dst.is_id() and src.is_id(): + src = src[:64] + dst = dst[:64] + elif dst.is_mem() and src.is_id(): + dst = m2_expr.ExprMem(dst.ptr, 64) + src = src[:64] + else: + src = m2_expr.ExprMem(src.ptr, 64) + # Erase dst high bits + src = src.zeroExtend(dst.size) + return [m2_expr.ExprAssign(dst, src)], [] + + +def movsd_dispatch(ir, instr, dst=None, src=None): + if dst is None and src is None: + return movs(ir, instr, 32) + else: + return movsd(ir, instr, dst, src) + + +def float_prev(flt, popcount=1): + if not flt in float_list: + return None + i = float_list.index(flt) + if i < popcount: + # Drop value (ex: FSTP ST(0)) + return None + flt = float_list[i - popcount] + return flt + + +def float_pop(avoid_flt=None, popcount=1): + """ + Generate floatpop semantic (@popcount times), avoiding the avoid_flt@ float + @avoid_flt: float avoided in the generated semantic + @popcount: pop count + """ + avoid_flt = float_prev(avoid_flt, popcount) + e = [] + for i in range(8 - popcount): + if avoid_flt != float_list[i]: + e.append(m2_expr.ExprAssign(float_list[i], + float_list[i + popcount])) + fill_value = m2_expr.ExprOp("sint_to_fp", m2_expr.ExprInt(0, 64)) + for i in range(8 - popcount, 8): + e.append(m2_expr.ExprAssign(float_list[i], + fill_value)) + e.append( + m2_expr.ExprAssign(float_stack_ptr, + float_stack_ptr - m2_expr.ExprInt(popcount, 3))) + return e + +# XXX TODO + + +def fcom(_, instr, dst=None, src=None): + + if dst is None and src is None: + dst, src = float_st0, float_st1 + elif src is None: + src = mem2double(instr, dst) + dst = float_st0 + + e = [] + + e.append(m2_expr.ExprAssign(float_c0, m2_expr.ExprOp('fcom_c0', dst, src))) + e.append(m2_expr.ExprAssign(float_c1, m2_expr.ExprOp('fcom_c1', dst, src))) + e.append(m2_expr.ExprAssign(float_c2, m2_expr.ExprOp('fcom_c2', dst, src))) + e.append(m2_expr.ExprAssign(float_c3, m2_expr.ExprOp('fcom_c3', dst, src))) + + e += set_float_cs_eip(instr) + return e, [] + + +def ftst(_, instr): + dst = float_st0 + + e = [] + src = m2_expr.ExprOp('sint_to_fp', m2_expr.ExprInt(0, 64)) + e.append(m2_expr.ExprAssign(float_c0, m2_expr.ExprOp('fcom_c0', dst, src))) + e.append(m2_expr.ExprAssign(float_c1, m2_expr.ExprOp('fcom_c1', dst, src))) + e.append(m2_expr.ExprAssign(float_c2, m2_expr.ExprOp('fcom_c2', dst, src))) + e.append(m2_expr.ExprAssign(float_c3, m2_expr.ExprOp('fcom_c3', dst, src))) + + e += set_float_cs_eip(instr) + return e, [] + + +def fxam(ir, instr): + """ + NaN: + C3, C2, C0 = 001; + Normal: + C3, C2, C0 = 010; + Infinity: + C3, C2, C0 = 011; + Zero: + C3, C2, C0 = 100; + Empty: + C3, C2, C0 = 101; + Denormal: + C3, C2, C0 = 110; + + C1 = sign bit of ST; (* 0 for positive, 1 for negative *) + """ + dst = float_st0 + + # Empty not handled + locs = {} + for name in ["NaN", "Normal", "Infinity", "Zero", "Denormal"]: + locs[name] = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = m2_expr.ExprLoc(loc_next, ir.IRDst.size) + + # if Denormal: + # if zero: + # do_zero + # else: + # do_denormal + # else: + # if Nan: + # do_nan + # else: + # if infinity: + # do_infinity + # else: + # do_normal + + irdst = m2_expr.ExprCond( + m2_expr.expr_is_IEEE754_denormal(dst), + m2_expr.ExprCond(m2_expr.expr_is_IEEE754_zero(dst), + locs["Zero"][1], + locs["Denormal"][1], + ), + m2_expr.ExprCond(m2_expr.expr_is_NaN(dst), + locs["NaN"][1], + m2_expr.ExprCond(m2_expr.expr_is_infinite(dst), + locs["Infinity"][1], + locs["Normal"][1], + ) + ) + ) + base = [m2_expr.ExprAssign(ir.IRDst, irdst), + m2_expr.ExprAssign(float_c1, dst.msb()) + ] + base += set_float_cs_eip(instr) + + out = [ + IRBlock(locs["Zero"][0], [AssignBlock({ + float_c0: m2_expr.ExprInt(0, float_c0.size), + float_c2: m2_expr.ExprInt(0, float_c2.size), + float_c3: m2_expr.ExprInt(1, float_c3.size), + ir.IRDst: loc_next_expr, + }, instr)]), + IRBlock(locs["Denormal"][0], [AssignBlock({ + float_c0: m2_expr.ExprInt(0, float_c0.size), + float_c2: m2_expr.ExprInt(1, float_c2.size), + float_c3: m2_expr.ExprInt(1, float_c3.size), + ir.IRDst: loc_next_expr, + }, instr)]), + IRBlock(locs["NaN"][0], [AssignBlock({ + float_c0: m2_expr.ExprInt(1, float_c0.size), + float_c2: m2_expr.ExprInt(0, float_c2.size), + float_c3: m2_expr.ExprInt(0, float_c3.size), + ir.IRDst: loc_next_expr, + }, instr)]), + IRBlock(locs["Infinity"][0], [AssignBlock({ + float_c0: m2_expr.ExprInt(1, float_c0.size), + float_c2: m2_expr.ExprInt(1, float_c2.size), + float_c3: m2_expr.ExprInt(0, float_c3.size), + ir.IRDst: loc_next_expr, + }, instr)]), + IRBlock(locs["Normal"][0], [AssignBlock({ + float_c0: m2_expr.ExprInt(0, float_c0.size), + float_c2: m2_expr.ExprInt(1, float_c2.size), + float_c3: m2_expr.ExprInt(0, float_c3.size), + ir.IRDst: loc_next_expr, + }, instr)]), + ] + return base, out + + +def ficom(_, instr, dst, src=None): + + dst, src = float_implicit_st0(dst, src) + + e = [] + + e.append(m2_expr.ExprAssign(float_c0, + m2_expr.ExprOp('fcom_c0', dst, + src.zeroExtend(dst.size)))) + e.append(m2_expr.ExprAssign(float_c1, + m2_expr.ExprOp('fcom_c1', dst, + src.zeroExtend(dst.size)))) + e.append(m2_expr.ExprAssign(float_c2, + m2_expr.ExprOp('fcom_c2', dst, + src.zeroExtend(dst.size)))) + e.append(m2_expr.ExprAssign(float_c3, + m2_expr.ExprOp('fcom_c3', dst, + src.zeroExtend(dst.size)))) + + e += set_float_cs_eip(instr) + return e, [] + + +def fcomi(_, instr, dst=None, src=None): + # TODO unordered float + if dst is None and src is None: + dst, src = float_st0, float_st1 + elif src is None: + src = dst + dst = float_st0 + + e = [] + + e.append(m2_expr.ExprAssign(cf, m2_expr.ExprOp('fcom_c0', dst, src))) + e.append(m2_expr.ExprAssign(pf, m2_expr.ExprOp('fcom_c2', dst, src))) + e.append(m2_expr.ExprAssign(zf, m2_expr.ExprOp('fcom_c3', dst, src))) + + e.append(m2_expr.ExprAssign(of, m2_expr.ExprInt(0, 1))) + e.append(m2_expr.ExprAssign(nf, m2_expr.ExprInt(0, 1))) + e.append(m2_expr.ExprAssign(af, m2_expr.ExprInt(0, 1))) + + e += set_float_cs_eip(instr) + return e, [] + + +def fcomip(ir, instr, dst=None, src=None): + e, extra = fcomi(ir, instr, dst, src) + e += float_pop() + e += set_float_cs_eip(instr) + return e, extra + + +def fucomi(ir, instr, dst=None, src=None): + # TODO unordered float + return fcomi(ir, instr, dst, src) + + +def fucomip(ir, instr, dst=None, src=None): + # TODO unordered float + return fcomip(ir, instr, dst, src) + + +def fcomp(ir, instr, dst=None, src=None): + e, extra = fcom(ir, instr, dst, src) + e += float_pop() + e += set_float_cs_eip(instr) + return e, extra + + +def fcompp(ir, instr, dst=None, src=None): + e, extra = fcom(ir, instr, dst, src) + e += float_pop(popcount=2) + e += set_float_cs_eip(instr) + return e, extra + + +def ficomp(ir, instr, dst, src=None): + e, extra = ficom(ir, instr, dst, src) + e += float_pop() + e += set_float_cs_eip(instr) + return e, extra + + +def fucom(ir, instr, dst=None, src=None): + # TODO unordered float + return fcom(ir, instr, dst, src) + + +def fucomp(ir, instr, dst=None, src=None): + # TODO unordered float + return fcomp(ir, instr, dst, src) + + +def fucompp(ir, instr, dst=None, src=None): + # TODO unordered float + return fcompp(ir, instr, dst, src) + + +def comiss(_, instr, dst, src): + # TODO unordered float + + e = [] + + dst = m2_expr.ExprOp('sint_to_fp', dst[:32]) + src = m2_expr.ExprOp('sint_to_fp', src[:32]) + + e.append(m2_expr.ExprAssign(cf, m2_expr.ExprOp('fcom_c0', dst, src))) + e.append(m2_expr.ExprAssign(pf, m2_expr.ExprOp('fcom_c2', dst, src))) + e.append(m2_expr.ExprAssign(zf, m2_expr.ExprOp('fcom_c3', dst, src))) + + e.append(m2_expr.ExprAssign(of, m2_expr.ExprInt(0, 1))) + e.append(m2_expr.ExprAssign(nf, m2_expr.ExprInt(0, 1))) + e.append(m2_expr.ExprAssign(af, m2_expr.ExprInt(0, 1))) + + e += set_float_cs_eip(instr) + return e, [] + + +def comisd(_, instr, dst, src): + # TODO unordered float + + e = [] + + dst = m2_expr.ExprOp('sint_to_fp', dst[:64]) + src = m2_expr.ExprOp('sint_to_fp', src[:64]) + + e.append(m2_expr.ExprAssign(cf, m2_expr.ExprOp('fcom_c0', dst, src))) + e.append(m2_expr.ExprAssign(pf, m2_expr.ExprOp('fcom_c2', dst, src))) + e.append(m2_expr.ExprAssign(zf, m2_expr.ExprOp('fcom_c3', dst, src))) + + e.append(m2_expr.ExprAssign(of, m2_expr.ExprInt(0, 1))) + e.append(m2_expr.ExprAssign(nf, m2_expr.ExprInt(0, 1))) + e.append(m2_expr.ExprAssign(af, m2_expr.ExprInt(0, 1))) + + e += set_float_cs_eip(instr) + return e, [] + + +def fld(_, instr, src): + + if src.size == 32: + src = m2_expr.ExprOp("fpconvert_fp64", src) + if isinstance(src, m2_expr.ExprMem) and src.size > 64: + raise NotImplementedError('convert from 80bits') + + e = [] + e.append(m2_expr.ExprAssign(float_st7, float_st6)) + e.append(m2_expr.ExprAssign(float_st6, float_st5)) + e.append(m2_expr.ExprAssign(float_st5, float_st4)) + e.append(m2_expr.ExprAssign(float_st4, float_st3)) + e.append(m2_expr.ExprAssign(float_st3, float_st2)) + e.append(m2_expr.ExprAssign(float_st2, float_st1)) + e.append(m2_expr.ExprAssign(float_st1, float_st0)) + e.append(m2_expr.ExprAssign(float_st0, src)) + e.append( + m2_expr.ExprAssign(float_stack_ptr, + float_stack_ptr + m2_expr.ExprInt(1, 3))) + + e += set_float_cs_eip(instr) + return e, [] + + +def fst(_, instr, dst): + e = [] + + if isinstance(dst, m2_expr.ExprMem) and dst.size > 64: + raise NotImplementedError('convert to 80bits') + src = float_st0 + + if dst.size == 32: + src = m2_expr.ExprOp("fpconvert_fp32", src) + e.append(m2_expr.ExprAssign(dst, src)) + e += set_float_cs_eip(instr) + return e, [] + + +def fstp(ir, instr, dst): + e = [] + + if isinstance(dst, m2_expr.ExprMem) and dst.size > 64: + raise NotImplementedError('convert to 80bits') + + if isinstance(dst, m2_expr.ExprMem): + src = float_st0 + if dst.size == 32: + src = m2_expr.ExprOp("fpconvert_fp32", src) + e.append(m2_expr.ExprAssign(dst, src)) + else: + src = float_st0 + if float_list.index(dst) > 1: + # a = st0 -> st0 is dropped + # a = st1 -> st0 = st0, useless + e.append(m2_expr.ExprAssign(float_prev(dst), src)) + + e += set_float_cs_eip(instr) + e += float_pop(dst) + return e, [] + + +def fist(_, instr, dst): + e = [] + e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('fp_to_sint%d' % dst.size, + float_st0))) + + e += set_float_cs_eip(instr) + return e, [] + + +def fistp(ir, instr, dst): + e, extra = fist(ir, instr, dst) + e += float_pop(dst) + return e, extra + + +def fisttp(_, instr, dst): + e = [] + e.append(m2_expr.ExprAssign( + dst, + m2_expr.ExprOp('fp_to_sint%d' % dst.size, + m2_expr.ExprOp('fpround_towardszero', float_st0) + ))) + + e += set_float_cs_eip(instr) + e += float_pop(dst) + return e, [] + + +def fild(ir, instr, src): + # XXXXX + src = m2_expr.ExprOp('sint_to_fp', src.signExtend(64)) + e = [] + e += set_float_cs_eip(instr) + e_fld, extra = fld(ir, instr, src) + e += e_fld + return e, extra + + +def fldz(ir, instr): + return fld(ir, instr, m2_expr.ExprOp('sint_to_fp', m2_expr.ExprInt(0, 64))) + + +def fld1(ir, instr): + return fld(ir, instr, m2_expr.ExprOp('sint_to_fp', m2_expr.ExprInt(1, 64))) + + +def fldl2t(ir, instr): + value_f = math.log(10) / math.log(2) + value = struct.unpack('Q', struct.pack('d', value_f))[0] + return fld(ir, instr, m2_expr.ExprOp( + 'sint_to_fp', + m2_expr.ExprInt(value, 64) + )) + + +def fldpi(ir, instr): + value_f = math.pi + value = struct.unpack('Q', struct.pack('d', value_f))[0] + return fld(ir, instr, m2_expr.ExprOp( + 'sint_to_fp', + m2_expr.ExprInt(value, 64) + )) + + +def fldln2(ir, instr): + value_f = math.log(2) + value = struct.unpack('Q', struct.pack('d', value_f))[0] + return fld(ir, instr, m2_expr.ExprOp('mem_64_to_double', + m2_expr.ExprInt(value, 64))) + + +def fldl2e(ir, instr): + x = struct.pack('d', 1 / math.log(2)) + x = struct.unpack('Q', x)[0] + return fld(ir, instr, m2_expr.ExprOp('mem_64_to_double', + m2_expr.ExprInt(x, 64))) + + +def fldlg2(ir, instr): + x = struct.pack('d', math.log10(2)) + x = struct.unpack('Q', x)[0] + return fld(ir, instr, m2_expr.ExprOp('mem_64_to_double', + m2_expr.ExprInt(x, 64))) + + +def fadd(_, instr, dst, src=None): + dst, src = float_implicit_st0(dst, src) + e = [] + src = mem2double(instr, src) + e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('fadd', dst, src))) + + e += set_float_cs_eip(instr) + return e, [] + + +def fiadd(_, instr, dst, src=None): + dst, src = float_implicit_st0(dst, src) + e = [] + src = mem2double(instr, src) + e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('fiadd', dst, src))) + e += set_float_cs_eip(instr) + return e, [] + + +def fisub(_, instr, dst, src=None): + dst, src = float_implicit_st0(dst, src) + e = [] + src = mem2double(instr, src) + e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('fisub', dst, src))) + e += set_float_cs_eip(instr) + return e, [] + + +def fisubr(_, instr, dst, src=None): + dst, src = float_implicit_st0(dst, src) + e = [] + src = mem2double(instr, src) + e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('fisub', src, dst))) + e += set_float_cs_eip(instr) + return e, [] + + +def fpatan(_, instr): + e = [] + a = float_st1 + e.append(m2_expr.ExprAssign(float_prev(a), + m2_expr.ExprOp('fpatan', float_st0, float_st1))) + e += set_float_cs_eip(instr) + e += float_pop(a) + return e, [] + + +def fprem(_, instr): + e = [] + e.append( + m2_expr.ExprAssign(float_st0, m2_expr.ExprOp('fprem', float_st0, float_st1))) + # Remaining bits (ex: used in argument reduction in tan) + quotient = m2_expr.ExprOp('fp_to_sint32', m2_expr.ExprOp('fpround_towardszero', m2_expr.ExprOp('fdiv', float_st0, float_st1))) + e += [m2_expr.ExprAssign(float_c0, quotient[2:3]), + m2_expr.ExprAssign(float_c3, quotient[1:2]), + m2_expr.ExprAssign(float_c1, quotient[0:1]), + # Consider the reduction is always completed + m2_expr.ExprAssign(float_c2, m2_expr.ExprInt(0, 1)), + ] + e += set_float_cs_eip(instr) + return e, [] + + +def fprem1(_, instr): + e = [] + e.append( + m2_expr.ExprAssign(float_st0, m2_expr.ExprOp('fprem1', float_st0, float_st1))) + e += set_float_cs_eip(instr) + return e, [] + + +def faddp(_, instr, dst, src=None): + dst, src = float_implicit_st0(dst, src) + e = [] + src = mem2double(instr, src) + e.append(m2_expr.ExprAssign(float_prev(dst), m2_expr.ExprOp('fadd', dst, src))) + e += set_float_cs_eip(instr) + e += float_pop(dst) + return e, [] + + +def fninit(_, instr): + e = [] + e += set_float_cs_eip(instr) + return e, [] + + +def fyl2x(_, instr): + e = [] + a = float_st1 + e.append( + m2_expr.ExprAssign(float_prev(a), m2_expr.ExprOp('fyl2x', float_st0, float_st1))) + e += set_float_cs_eip(instr) + e += float_pop(a) + return e, [] + + +def fnstenv(ir, instr, dst): + e = [] + # XXX TODO tag word, ... + status_word = m2_expr.ExprCompose(m2_expr.ExprInt(0, 8), + float_c0, float_c1, float_c2, + float_stack_ptr, float_c3, + m2_expr.ExprInt(0, 1)) + + s = instr.mode + # The behaviour in 64bit is identical to 32 bit + # This will truncate addresses + size = min(32, s) + ad = ir.ExprMem(dst.ptr, size=16) + e.append(m2_expr.ExprAssign(ad, float_control)) + ad = ir.ExprMem( + dst.ptr + m2_expr.ExprInt( + size // (8 * 1), + dst.ptr.size + ), + size=16 + ) + e.append(m2_expr.ExprAssign(ad, status_word)) + ad = ir.ExprMem( + dst.ptr + m2_expr.ExprInt( + size // (8 * 3), + dst.ptr.size + ), + size=size + ) + e.append(m2_expr.ExprAssign(ad, float_eip[:size])) + ad = ir.ExprMem( + dst.ptr + m2_expr.ExprInt( + size // (8 * 4), + dst.ptr.size + ), + size=16 + ) + e.append(m2_expr.ExprAssign(ad, float_cs)) + ad = ir.ExprMem( + dst.ptr + m2_expr.ExprInt( + size // (8 * 5), + dst.ptr.size + ), + size=size + ) + e.append(m2_expr.ExprAssign(ad, float_address[:size])) + ad = ir.ExprMem( + dst.ptr + m2_expr.ExprInt( + size // (8 * 6), + dst.ptr.size + ), + size=16 + ) + e.append(m2_expr.ExprAssign(ad, float_ds)) + return e, [] + + +def fldenv(ir, instr, src): + e = [] + # Inspired from fnstenv (same TODOs / issues) + + s = instr.mode + # The behaviour in 64bit is identical to 32 bit + # This will truncate addresses + size = min(32, s) + + # Float control + ad = ir.ExprMem(src.ptr, size=16) + e.append(m2_expr.ExprAssign(float_control, ad)) + + # Status word + ad = ir.ExprMem( + src.ptr + m2_expr.ExprInt( + size // (8 * 1), + size=src.ptr.size + ), + size=16 + ) + e += [ + m2_expr.ExprAssign(x, y) for x, y in ((float_c0, ad[8:9]), + (float_c1, ad[9:10]), + (float_c2, ad[10:11]), + (float_stack_ptr, ad[11:14]), + (float_c3, ad[14:15])) + ] + + # EIP, CS, Address, DS + for offset, target in ( + (3, float_eip[:size]), + (4, float_cs), + (5, float_address[:size]), + (6, float_ds) + ): + ad = ir.ExprMem( + src.ptr + m2_expr.ExprInt( + size // ( 8 * offset), + size=src.ptr.size + ), + size=target.size + ) + e.append(m2_expr.ExprAssign(target, ad)) + + return e, [] + + +def fsub(_, instr, dst, src=None): + dst, src = float_implicit_st0(dst, src) + e = [] + src = mem2double(instr, src) + e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('fsub', dst, src))) + e += set_float_cs_eip(instr) + return e, [] + + +def fsubp(_, instr, dst, src=None): + dst, src = float_implicit_st0(dst, src) + e = [] + src = mem2double(instr, src) + e.append(m2_expr.ExprAssign(float_prev(dst), m2_expr.ExprOp('fsub', dst, src))) + e += set_float_cs_eip(instr) + e += float_pop(dst) + return e, [] + + +def fsubr(_, instr, dst, src=None): + dst, src = float_implicit_st0(dst, src) + e = [] + src = mem2double(instr, src) + e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('fsub', src, dst))) + e += set_float_cs_eip(instr) + return e, [] + + +def fsubrp(_, instr, dst, src=None): + dst, src = float_implicit_st0(dst, src) + e = [] + src = mem2double(instr, src) + e.append(m2_expr.ExprAssign(float_prev(dst), m2_expr.ExprOp('fsub', src, dst))) + e += set_float_cs_eip(instr) + e += float_pop(dst) + return e, [] + + +def fmul(_, instr, dst, src=None): + dst, src = float_implicit_st0(dst, src) + e = [] + src = mem2double(instr, src) + e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('fmul', dst, src))) + e += set_float_cs_eip(instr) + return e, [] + + +def fimul(_, instr, dst, src=None): + dst, src = float_implicit_st0(dst, src) + e = [] + src = mem2double(instr, src) + e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('fimul', dst, src))) + e += set_float_cs_eip(instr) + return e, [] + + +def fdiv(_, instr, dst, src=None): + dst, src = float_implicit_st0(dst, src) + e = [] + src = mem2double(instr, src) + e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('fdiv', dst, src))) + e += set_float_cs_eip(instr) + return e, [] + + +def fdivr(_, instr, dst, src=None): + dst, src = float_implicit_st0(dst, src) + e = [] + src = mem2double(instr, src) + e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('fdiv', src, dst))) + e += set_float_cs_eip(instr) + return e, [] + + +def fdivrp(_, instr, dst, src=None): + dst, src = float_implicit_st0(dst, src) + e = [] + src = mem2double(instr, src) + e.append(m2_expr.ExprAssign(float_prev(dst), m2_expr.ExprOp('fdiv', src, dst))) + e += set_float_cs_eip(instr) + e += float_pop(dst) + return e, [] + + +def fidiv(_, instr, dst, src=None): + dst, src = float_implicit_st0(dst, src) + e = [] + src = mem2double(instr, src) + e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('fidiv', dst, src))) + e += set_float_cs_eip(instr) + return e, [] + + +def fidivr(_, instr, dst, src=None): + dst, src = float_implicit_st0(dst, src) + e = [] + src = mem2double(instr, src) + e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('fidiv', src, dst))) + e += set_float_cs_eip(instr) + return e, [] + + +def fdivp(_, instr, dst, src=None): + # Invalid emulation + dst, src = float_implicit_st0(dst, src) + e = [] + src = mem2double(instr, src) + e.append(m2_expr.ExprAssign(float_prev(dst), m2_expr.ExprOp('fdiv', dst, src))) + e += set_float_cs_eip(instr) + e += float_pop(dst) + return e, [] + + +def fmulp(_, instr, dst, src=None): + # Invalid emulation + dst, src = float_implicit_st0(dst, src) + e = [] + src = mem2double(instr, src) + e.append(m2_expr.ExprAssign(float_prev(dst), m2_expr.ExprOp('fmul', dst, src))) + e += set_float_cs_eip(instr) + e += float_pop(dst) + return e, [] + + +def ftan(_, instr, src): + e = [] + src = mem2double(instr, src) + e.append(m2_expr.ExprAssign(float_st0, m2_expr.ExprOp('ftan', src))) + e += set_float_cs_eip(instr) + return e, [] + + +def fxch(_, instr, src): + e = [] + src = mem2double(instr, src) + e.append(m2_expr.ExprAssign(float_st0, src)) + e.append(m2_expr.ExprAssign(src, float_st0)) + e += set_float_cs_eip(instr) + return e, [] + + +def fptan(_, instr): + e = [] + e.append(m2_expr.ExprAssign(float_st7, float_st6)) + e.append(m2_expr.ExprAssign(float_st6, float_st5)) + e.append(m2_expr.ExprAssign(float_st5, float_st4)) + e.append(m2_expr.ExprAssign(float_st4, float_st3)) + e.append(m2_expr.ExprAssign(float_st3, float_st2)) + e.append(m2_expr.ExprAssign(float_st2, float_st1)) + e.append(m2_expr.ExprAssign(float_st1, m2_expr.ExprOp('ftan', float_st0))) + e.append( + m2_expr.ExprAssign( + float_st0, + m2_expr.ExprOp( + 'sint_to_fp', + m2_expr.ExprInt(1, 64) + ) + ) + ) + e.append( + m2_expr.ExprAssign(float_stack_ptr, + float_stack_ptr + m2_expr.ExprInt(1, 3))) + return e, [] + + +def frndint(_, instr): + e = [] + e.append(m2_expr.ExprAssign(float_st0, m2_expr.ExprOp('frndint', float_st0))) + e += set_float_cs_eip(instr) + return e, [] + + +def fsin(_, instr): + e = [] + e.append(m2_expr.ExprAssign(float_st0, m2_expr.ExprOp('fsin', float_st0))) + e += set_float_cs_eip(instr) + return e, [] + + +def fcos(_, instr): + e = [] + e.append(m2_expr.ExprAssign(float_st0, m2_expr.ExprOp('fcos', float_st0))) + e += set_float_cs_eip(instr) + return e, [] + + +def fsincos(_, instr): + e = [] + e.append(m2_expr.ExprAssign(float_st7, float_st6)) + e.append(m2_expr.ExprAssign(float_st6, float_st5)) + e.append(m2_expr.ExprAssign(float_st5, float_st4)) + e.append(m2_expr.ExprAssign(float_st4, float_st3)) + e.append(m2_expr.ExprAssign(float_st3, float_st2)) + e.append(m2_expr.ExprAssign(float_st2, float_st1)) + e.append(m2_expr.ExprAssign(float_st1, m2_expr.ExprOp('fsin', float_st0))) + e.append(m2_expr.ExprAssign(float_st0, m2_expr.ExprOp('fcos', float_st0))) + e.append( + m2_expr.ExprAssign(float_stack_ptr, + float_stack_ptr + m2_expr.ExprInt(1, 3))) + return e, [] + + +def fscale(_, instr): + e = [] + e.append(m2_expr.ExprAssign(float_st0, m2_expr.ExprOp('fscale', float_st0, + float_st1))) + e += set_float_cs_eip(instr) + return e, [] + + +def f2xm1(_, instr): + e = [] + e.append(m2_expr.ExprAssign(float_st0, m2_expr.ExprOp('f2xm1', float_st0))) + e += set_float_cs_eip(instr) + return e, [] + + +def fchs(_, instr): + e = [] + e.append(m2_expr.ExprAssign(float_st0, m2_expr.ExprOp('fchs', float_st0))) + e += set_float_cs_eip(instr) + return e, [] + + +def fsqrt(_, instr): + e = [] + e.append(m2_expr.ExprAssign(float_st0, m2_expr.ExprOp('fsqrt', float_st0))) + e += set_float_cs_eip(instr) + return e, [] + + +def fabs(_, instr): + e = [] + e.append(m2_expr.ExprAssign(float_st0, m2_expr.ExprOp('fabs', float_st0))) + e += set_float_cs_eip(instr) + return e, [] + + +def fnstsw(_, instr, dst): + args = [ + # Exceptions -> 0 + m2_expr.ExprInt(0, 8), + float_c0, + float_c1, + float_c2, + float_stack_ptr, + float_c3, + # B: FPU is not busy -> 0 + m2_expr.ExprInt(0, 1)] + e = [m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*args))] + return e, [] + + +def fnstcw(_, instr, dst): + e = [] + e.append(m2_expr.ExprAssign(dst, float_control)) + return e, [] + + +def fldcw(_, instr, src): + e = [] + e.append(m2_expr.ExprAssign(float_control, src)) + return e, [] + + +def fwait(_, instr): + return [], [] + + +def fcmovb(ir, instr, arg1, arg2): + return gen_fcmov(ir, instr, cf, arg1, arg2, True) + + +def fcmove(ir, instr, arg1, arg2): + return gen_fcmov(ir, instr, zf, arg1, arg2, True) + + +def fcmovbe(ir, instr, arg1, arg2): + return gen_fcmov(ir, instr, cf | zf, arg1, arg2, True) + + +def fcmovu(ir, instr, arg1, arg2): + return gen_fcmov(ir, instr, pf, arg1, arg2, True) + + +def fcmovnb(ir, instr, arg1, arg2): + return gen_fcmov(ir, instr, cf, arg1, arg2, False) + + +def fcmovne(ir, instr, arg1, arg2): + return gen_fcmov(ir, instr, zf, arg1, arg2, False) + + +def fcmovnbe(ir, instr, arg1, arg2): + return gen_fcmov(ir, instr, cf | zf, arg1, arg2, False) + + +def fcmovnu(ir, instr, arg1, arg2): + return gen_fcmov(ir, instr, pf, arg1, arg2, False) + + +def nop(_, instr, a=None): + return [], [] + + +def prefetch0(_, instr, src=None): + # see 4-198 on this documentation + # https://www-ssl.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf + return [], [] + + +def prefetch1(_, instr, src=None): + # see 4-198 on this documentation + # https://www-ssl.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf + return [], [] + + +def prefetch2(_, instr, src=None): + # see 4-198 on this documentation + # https://www-ssl.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf + return [], [] + + +def prefetchw(_, instr, src=None): + # see 4-201 on this documentation + # https://www-ssl.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf + return [], [] + +def prefetchnta(_, instr, src=None): + # see 4-201 on this documentation + # https://www-ssl.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf + return [], [] + + +def lfence(_, instr, src=None): + # see 3-485 on this documentation + # https://www-ssl.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf + return [], [] + + +def mfence(_, instr, src=None): + # see 3-516 on this documentation + # https://www-ssl.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf + return [], [] + + +def sfence(_, instr, src=None): + # see 3-356 on this documentation + # https://www-ssl.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf + return [], [] + + +def ud2(_, instr, src=None): + e = [m2_expr.ExprAssign(exception_flags, m2_expr.ExprInt( + EXCEPT_ILLEGAL_INSN, exception_flags.size))] + return e, [] + + +def hlt(_, instr): + e = [] + except_int = EXCEPT_PRIV_INSN + e.append(m2_expr.ExprAssign(exception_flags, m2_expr.ExprInt(except_int, 32))) + return e, [] + + +def rdtsc(_, instr): + e = [] + e.append(m2_expr.ExprAssign(tsc, tsc + m2_expr.ExprInt(1, 64))) + e.append(m2_expr.ExprAssign(mRAX[32], tsc[:32])) + e.append(m2_expr.ExprAssign(mRDX[32], tsc[32:])) + return e, [] + + +def daa(_, instr): + e = [] + r_al = mRAX[instr.mode][:8] + + cond1 = m2_expr.expr_is_unsigned_greater(r_al[:4], m2_expr.ExprInt(0x9, 4)) | af + e.append(m2_expr.ExprAssign(af, cond1)) + + cond2 = m2_expr.expr_is_unsigned_greater(m2_expr.ExprInt(6, 8), r_al) + cond3 = m2_expr.expr_is_unsigned_greater(r_al, m2_expr.ExprInt(0x99, 8)) | cf + + cf_c1 = m2_expr.ExprCond(cond1, + cf | (cond2), + m2_expr.ExprInt(0, 1)) + new_cf = m2_expr.ExprCond(cond3, + m2_expr.ExprInt(1, 1), + m2_expr.ExprInt(0, 1)) + e.append(m2_expr.ExprAssign(cf, new_cf)) + + al_c1 = m2_expr.ExprCond(cond1, + r_al + m2_expr.ExprInt(6, 8), + r_al) + + new_al = m2_expr.ExprCond(cond3, + al_c1 + m2_expr.ExprInt(0x60, 8), + al_c1) + e.append(m2_expr.ExprAssign(r_al, new_al)) + e += update_flag_znp(new_al) + return e, [] + + +def das(_, instr): + e = [] + r_al = mRAX[instr.mode][:8] + + cond1 = m2_expr.expr_is_unsigned_greater(r_al[:4], m2_expr.ExprInt(0x9, 4)) | af + e.append(m2_expr.ExprAssign(af, cond1)) + + cond2 = m2_expr.expr_is_unsigned_greater(m2_expr.ExprInt(6, 8), r_al) + cond3 = m2_expr.expr_is_unsigned_greater(r_al, m2_expr.ExprInt(0x99, 8)) | cf + + cf_c1 = m2_expr.ExprCond(cond1, + cf | (cond2), + m2_expr.ExprInt(0, 1)) + new_cf = m2_expr.ExprCond(cond3, + m2_expr.ExprInt(1, 1), + cf_c1) + e.append(m2_expr.ExprAssign(cf, new_cf)) + + al_c1 = m2_expr.ExprCond(cond1, + r_al - m2_expr.ExprInt(6, 8), + r_al) + + new_al = m2_expr.ExprCond(cond3, + al_c1 - m2_expr.ExprInt(0x60, 8), + al_c1) + e.append(m2_expr.ExprAssign(r_al, new_al)) + e += update_flag_znp(new_al) + return e, [] + + +def aam(ir, instr, src): + e = [] + assert src.is_int() + + value = int(src) + if value: + tempAL = mRAX[instr.mode][0:8] + newEAX = m2_expr.ExprCompose( + m2_expr.ExprOp("umod", tempAL, src), + m2_expr.ExprOp("udiv", tempAL, src), + mRAX[instr.mode][16:] + ) + e += [m2_expr.ExprAssign(mRAX[instr.mode], newEAX)] + e += update_flag_arith(newEAX) + e.append(m2_expr.ExprAssign(af, m2_expr.ExprInt(0, 1))) + else: + e.append( + m2_expr.ExprAssign( + exception_flags, + m2_expr.ExprInt(EXCEPT_DIV_BY_ZERO, exception_flags.size) + ) + ) + return e, [] + + +def aad(_, instr, src): + e = [] + tempAL = mRAX[instr.mode][0:8] + tempAH = mRAX[instr.mode][8:16] + newEAX = m2_expr.ExprCompose((tempAL + (tempAH * src)) & m2_expr.ExprInt(0xFF, 8), + m2_expr.ExprInt(0, 8), + mRAX[instr.mode][16:]) + e += [m2_expr.ExprAssign(mRAX[instr.mode], newEAX)] + e += update_flag_arith(newEAX) + e.append(m2_expr.ExprAssign(af, m2_expr.ExprInt(0, 1))) + return e, [] + + +def _tpl_aaa(_, instr, op): + """Templating for aaa, aas with operation @op + @op: operation to apply + """ + e = [] + r_al = mRAX[instr.mode][:8] + r_ah = mRAX[instr.mode][8:16] + r_ax = mRAX[instr.mode][:16] + i0 = m2_expr.ExprInt(0, 1) + i1 = m2_expr.ExprInt(1, 1) + # cond: if (al & 0xf) > 9 OR af == 1 + cond = (r_al & m2_expr.ExprInt(0xf, 8)) - m2_expr.ExprInt(9, 8) + cond = ~cond.msb() & m2_expr.ExprCond(cond, i1, i0) + cond |= af & i1 + + to_add = m2_expr.ExprInt(0x106, size=r_ax.size) + if op == "-": + # Avoid ExprOp("-", A, B), should be ExprOp("+", A, ExprOp("-", B)) + first_part = r_ax - to_add + else: + first_part = m2_expr.ExprOp(op, r_ax, to_add) + new_ax = first_part & m2_expr.ExprInt(0xff0f, + size=r_ax.size) + # set AL + e.append(m2_expr.ExprAssign(r_ax, m2_expr.ExprCond(cond, new_ax, r_ax))) + e.append(m2_expr.ExprAssign(af, cond)) + e.append(m2_expr.ExprAssign(cf, cond)) + return e, [] + + +def aaa(ir, instr): + return _tpl_aaa(ir, instr, "+") + + +def aas(ir, instr): + return _tpl_aaa(ir, instr, "-") + + +def bsr_bsf(ir, instr, dst, src, op_func): + """ + IF SRC == 0 + ZF = 1 + DEST is left unchanged + ELSE + ZF = 0 + DEST = @op_func(SRC) + """ + loc_src_null, loc_src_null_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_src_not_null, loc_src_not_null_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = m2_expr.ExprLoc(loc_next, ir.IRDst.size) + + aff_dst = m2_expr.ExprAssign(ir.IRDst, loc_next_expr) + e = [m2_expr.ExprAssign(ir.IRDst, m2_expr.ExprCond(src, + loc_src_not_null_expr, + loc_src_null_expr))] + e_src_null = [] + e_src_null.append(m2_expr.ExprAssign(zf, m2_expr.ExprInt(1, zf.size))) + # XXX destination is undefined + e_src_null.append(aff_dst) + + e_src_not_null = [] + e_src_not_null.append(m2_expr.ExprAssign(zf, m2_expr.ExprInt(0, zf.size))) + e_src_not_null.append(m2_expr.ExprAssign(dst, op_func(src))) + e_src_not_null.append(aff_dst) + + return e, [IRBlock(loc_src_null, [AssignBlock(e_src_null, instr)]), + IRBlock(loc_src_not_null, [AssignBlock(e_src_not_null, instr)])] + + +def bsf(ir, instr, dst, src): + return bsr_bsf(ir, instr, dst, src, + lambda src: m2_expr.ExprOp("cnttrailzeros", src)) + + +def bsr(ir, instr, dst, src): + return bsr_bsf( + ir, instr, dst, src, + lambda src: m2_expr.ExprInt(src.size - 1, src.size) - m2_expr.ExprOp("cntleadzeros", src) + ) + + +def arpl(_, instr, dst, src): + e = [] + e.append(m2_expr.ExprAssign(exception_flags, m2_expr.ExprInt(1 << 7, 32))) + return e, [] + + +def ins(_, instr, size): + e = [] + e.append(m2_expr.ExprAssign(exception_flags, m2_expr.ExprInt(1 << 7, 32))) + return e, [] + + +def sidt(ir, instr, dst): + e = [] + if not isinstance(dst, m2_expr.ExprMem) or dst.size != 32: + raise ValueError('not exprmem 32bit instance!!') + ptr = dst.ptr + LOG_X86_SEM.warning("DEFAULT SIDT ADDRESS %s!!", dst) + e.append(m2_expr.ExprAssign(ir.ExprMem(ptr, 32), + m2_expr.ExprInt(0xe40007ff, 32))) + e.append( + m2_expr.ExprAssign(ir.ExprMem(ptr + m2_expr.ExprInt(4, ptr.size), 16), + m2_expr.ExprInt(0x8245, 16))) + return e, [] + + +def sldt(_, instr, dst): + LOG_X86_SEM.warning("DEFAULT SLDT ADDRESS %s!!", dst) + e = [m2_expr.ExprAssign(dst, m2_expr.ExprInt(0, dst.size))] + return e, [] + + +def cmovz(ir, instr, dst, src): + #return gen_cmov(ir, instr, zf, dst, src, True) + return gen_cmov(ir, instr, m2_expr.ExprOp("CC_EQ", zf), dst, src, True) + + +def cmovnz(ir, instr, dst, src): + #return gen_cmov(ir, instr, zf, dst, src, False) + return gen_cmov(ir, instr, m2_expr.ExprOp("CC_EQ", zf), dst, src, False) + + +def cmovpe(ir, instr, dst, src): + return gen_cmov(ir, instr, pf, dst, src, True) + + +def cmovnp(ir, instr, dst, src): + return gen_cmov(ir, instr, pf, dst, src, False) + + +def cmovge(ir, instr, dst, src): + #return gen_cmov(ir, instr, nf ^ of, dst, src, False) + return gen_cmov(ir, instr, m2_expr.ExprOp("CC_S>=", nf, of), dst, src, True) + + +def cmovg(ir, instr, dst, src): + #return gen_cmov(ir, instr, zf | (nf ^ of), dst, src, False) + return gen_cmov(ir, instr, m2_expr.ExprOp("CC_S>", nf, of, zf), dst, src, True) + + +def cmovl(ir, instr, dst, src): + #return gen_cmov(ir, instr, nf ^ of, dst, src, True) + return gen_cmov(ir, instr, m2_expr.ExprOp("CC_S<", nf, of), dst, src, True) + + +def cmovle(ir, instr, dst, src): + #return gen_cmov(ir, instr, zf | (nf ^ of), dst, src, True) + return gen_cmov(ir, instr, m2_expr.ExprOp("CC_S<=", nf, of, zf), dst, src, True) + + +def cmova(ir, instr, dst, src): + #return gen_cmov(ir, instr, cf | zf, dst, src, False) + return gen_cmov(ir, instr, m2_expr.ExprOp("CC_U>", cf, zf), dst, src, True) + + +def cmovae(ir, instr, dst, src): + #return gen_cmov(ir, instr, cf, dst, src, False) + return gen_cmov(ir, instr, m2_expr.ExprOp("CC_U>=", cf), dst, src, True) + + +def cmovbe(ir, instr, dst, src): + #return gen_cmov(ir, instr, cf | zf, dst, src, True) + return gen_cmov(ir, instr, m2_expr.ExprOp("CC_U<=", cf, zf), dst, src, True) + + +def cmovb(ir, instr, dst, src): + #return gen_cmov(ir, instr, cf, dst, src, True) + return gen_cmov(ir, instr, m2_expr.ExprOp("CC_U<", cf), dst, src, True) + + +def cmovo(ir, instr, dst, src): + return gen_cmov(ir, instr, of, dst, src, True) + + +def cmovno(ir, instr, dst, src): + return gen_cmov(ir, instr, of, dst, src, False) + + +def cmovs(ir, instr, dst, src): + #return gen_cmov(ir, instr, nf, dst, src, True) + return gen_cmov(ir, instr, m2_expr.ExprOp("CC_NEG", nf), dst, src, True) + + +def cmovns(ir, instr, dst, src): + #return gen_cmov(ir, instr, nf, dst, src, False) + return gen_cmov(ir, instr, m2_expr.ExprOp("CC_NEG", nf), dst, src, False) + + +def icebp(_, instr): + e = [] + e.append(m2_expr.ExprAssign(exception_flags, + m2_expr.ExprInt(EXCEPT_SOFT_BP, 32))) + return e, [] +# XXX + + +def l_int(_, instr, src): + e = [] + # XXX + if src.arg in [1, 3]: + except_int = EXCEPT_SOFT_BP + else: + except_int = EXCEPT_INT_XX + e.append(m2_expr.ExprAssign(exception_flags, + m2_expr.ExprInt(except_int, 32))) + e.append(m2_expr.ExprAssign(interrupt_num, src)) + return e, [] + + +def l_sysenter(_, instr): + e = [] + e.append(m2_expr.ExprAssign(exception_flags, + m2_expr.ExprInt(EXCEPT_PRIV_INSN, 32))) + return e, [] + + +def l_syscall(_, instr): + e = [] + e.append(m2_expr.ExprAssign(exception_flags, + m2_expr.ExprInt(EXCEPT_PRIV_INSN, 32))) + return e, [] + +# XXX + + +def l_out(_, instr, src1, src2): + e = [] + e.append(m2_expr.ExprAssign(exception_flags, + m2_expr.ExprInt(EXCEPT_PRIV_INSN, 32))) + return e, [] + +# XXX + + +def l_outs(_, instr, size): + e = [] + e.append(m2_expr.ExprAssign(exception_flags, + m2_expr.ExprInt(EXCEPT_PRIV_INSN, 32))) + return e, [] + +# XXX actually, xlat performs al = (ds:[e]bx + ZeroExtend(al)) + + +def xlat(ir, instr): + e = [] + ptr = mRAX[instr.mode][0:8].zeroExtend(mRBX[instr.mode].size) + src = ir.ExprMem(mRBX[instr.mode] + ptr, 8) + e.append(m2_expr.ExprAssign(mRAX[instr.mode][0:8], src)) + return e, [] + + +def cpuid(_, instr): + e = [] + e.append( + m2_expr.ExprAssign(mRAX[instr.mode], + m2_expr.ExprOp('x86_cpuid', mRAX[instr.mode], m2_expr.ExprInt(0, instr.mode)))) + e.append( + m2_expr.ExprAssign(mRBX[instr.mode], + m2_expr.ExprOp('x86_cpuid', mRAX[instr.mode], m2_expr.ExprInt(1, instr.mode)))) + e.append( + m2_expr.ExprAssign(mRCX[instr.mode], + m2_expr.ExprOp('x86_cpuid', mRAX[instr.mode], m2_expr.ExprInt(2, instr.mode)))) + e.append( + m2_expr.ExprAssign(mRDX[instr.mode], + m2_expr.ExprOp('x86_cpuid', mRAX[instr.mode], m2_expr.ExprInt(3, instr.mode)))) + return e, [] + + +def bittest_get(ir, instr, src, index): + index = index.zeroExtend(src.size) + if isinstance(src, m2_expr.ExprMem): + b_mask = {16: 4, 32: 5, 64: 6} + b_decal = {16: 1, 32: 3, 64: 7} + ptr = src.ptr + segm = src.is_mem_segm() + if segm: + ptr = ptr.args[1] + + off_bit = index.zeroExtend( + src.size) & m2_expr.ExprInt((1 << b_mask[src.size]) - 1, + src.size) + off_byte = ((index.zeroExtend(ptr.size) >> m2_expr.ExprInt(3, ptr.size)) & + m2_expr.ExprInt(((1 << src.size) - 1) ^ b_decal[src.size], ptr.size)) + + addr = ptr + off_byte + if segm: + addr = ir.gen_segm_expr(src.ptr.args[0], addr) + + d = ir.ExprMem(addr, src.size) + else: + off_bit = m2_expr.ExprOp( + '&', index, m2_expr.ExprInt(src.size - 1, src.size)) + d = src + return d, off_bit + + +def bt(ir, instr, src, index): + e = [] + index = index.zeroExtend(src.size) + d, off_bit = bittest_get(ir, instr, src, index) + d = d >> off_bit + e.append(m2_expr.ExprAssign(cf, d[:1])) + return e, [] + + +def btc(ir, instr, src, index): + e = [] + d, off_bit = bittest_get(ir, instr, src, index) + e.append(m2_expr.ExprAssign(cf, (d >> off_bit)[:1])) + + m = m2_expr.ExprInt(1, src.size) << off_bit + e.append(m2_expr.ExprAssign(d, d ^ m)) + + return e, [] + + +def bts(ir, instr, src, index): + e = [] + d, off_bit = bittest_get(ir, instr, src, index) + e.append(m2_expr.ExprAssign(cf, (d >> off_bit)[:1])) + m = m2_expr.ExprInt(1, src.size) << off_bit + e.append(m2_expr.ExprAssign(d, d | m)) + + return e, [] + + +def btr(ir, instr, src, index): + e = [] + d, off_bit = bittest_get(ir, instr, src, index) + e.append(m2_expr.ExprAssign(cf, (d >> off_bit)[:1])) + m = ~(m2_expr.ExprInt(1, src.size) << off_bit) + e.append(m2_expr.ExprAssign(d, d & m)) + + return e, [] + + +def into(_, instr): + return [], [] + + +def l_in(_, instr, src1, src2): + e = [] + e.append(m2_expr.ExprAssign(exception_flags, + m2_expr.ExprInt(EXCEPT_PRIV_INSN, 32))) + return e, [] + + +@sbuild.parse +def cmpxchg(arg1, arg2): + accumulator = mRAX[instr.v_opmode()][:arg1.size] + if (accumulator - arg1): + zf = i1(0) + accumulator = arg1 + else: + zf = i1(1) + arg1 = arg2 + + +@sbuild.parse +def cmpxchg8b(arg1): + accumulator = {mRAX[32], mRDX[32]} + if accumulator - arg1: + zf = i1(0) + mRAX[32] = arg1[:32] + mRDX[32] = arg1[32:] + else: + zf = i1(1) + arg1 = {mRBX[32], mRCX[32]} + + +@sbuild.parse +def cmpxchg16b(arg1): + accumulator = {mRAX[64], mRDX[64]} + if accumulator - arg1: + zf = i1(0) + mRAX[64] = arg1[:64] + mRDX[64] = arg1[64:] + else: + zf = i1(1) + arg1 = {mRBX[64], mRCX[64]} + + +def lds(ir, instr, dst, src): + e = [] + e.append(m2_expr.ExprAssign(dst, ir.ExprMem(src.ptr, size=dst.size))) + DS_value = ir.ExprMem(src.ptr + m2_expr.ExprInt(dst.size // 8, src.ptr.size), + size=16) + e.append(m2_expr.ExprAssign(DS, DS_value)) + return e, [] + + +def les(ir, instr, dst, src): + e = [] + e.append(m2_expr.ExprAssign(dst, ir.ExprMem(src.ptr, size=dst.size))) + ES_value = ir.ExprMem(src.ptr + m2_expr.ExprInt(dst.size // 8, src.ptr.size), + size=16) + e.append(m2_expr.ExprAssign(ES, ES_value)) + return e, [] + + +def lss(ir, instr, dst, src): + e = [] + e.append(m2_expr.ExprAssign(dst, ir.ExprMem(src.ptr, size=dst.size))) + SS_value = ir.ExprMem(src.ptr + m2_expr.ExprInt(dst.size // 8, src.ptr.size), + size=16) + e.append(m2_expr.ExprAssign(SS, SS_value)) + return e, [] + + +def lfs(ir, instr, dst, src): + e = [] + e.append(m2_expr.ExprAssign(dst, ir.ExprMem(src.ptr, size=dst.size))) + FS_value = ir.ExprMem(src.ptr + m2_expr.ExprInt(dst.size // 8, src.ptr.size), + size=16) + e.append(m2_expr.ExprAssign(FS, FS_value)) + return e, [] + + +def lgs(ir, instr, dst, src): + e = [] + e.append(m2_expr.ExprAssign(dst, ir.ExprMem(src.ptr, size=dst.size))) + GS_value = ir.ExprMem(src.ptr + m2_expr.ExprInt(dst.size // 8, src.ptr.size), + size=16) + e.append(m2_expr.ExprAssign(GS, GS_value)) + return e, [] + + +def lahf(_, instr): + e = [] + args = [cf, m2_expr.ExprInt(1, 1), pf, m2_expr.ExprInt(0, 1), af, + m2_expr.ExprInt(0, 1), zf, nf] + e.append( + m2_expr.ExprAssign(mRAX[instr.mode][8:16], m2_expr.ExprCompose(*args))) + return e, [] + + +def sahf(_, instr): + tmp = mRAX[instr.mode][8:16] + e = [] + e.append(m2_expr.ExprAssign(cf, tmp[0:1])) + e.append(m2_expr.ExprAssign(pf, tmp[2:3])) + e.append(m2_expr.ExprAssign(af, tmp[4:5])) + e.append(m2_expr.ExprAssign(zf, tmp[6:7])) + e.append(m2_expr.ExprAssign(nf, tmp[7:8])) + return e, [] + + +def lar(_, instr, dst, src): + e = [] + e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('access_segment', src))) + e.append(m2_expr.ExprAssign(zf, m2_expr.ExprOp('access_segment_ok', src))) + return e, [] + + +def lsl(_, instr, dst, src): + e = [] + e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('load_segment_limit', src))) + e.append(m2_expr.ExprAssign(zf, m2_expr.ExprOp('load_segment_limit_ok', src))) + return e, [] + + +def fclex(_, instr): + # XXX TODO + return [], [] + + +def fnclex(_, instr): + # XXX TODO + return [], [] + + +def l_str(_, instr, dst): + e = [] + e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('load_tr_segment_selector', + m2_expr.ExprInt(0, 32)))) + return e, [] + + +def movd(_, instr, dst, src): + e = [] + if dst in regs_mm_expr: + e.append(m2_expr.ExprAssign( + dst, m2_expr.ExprCompose(src, m2_expr.ExprInt(0, 32)))) + elif dst in regs_xmm_expr: + e.append(m2_expr.ExprAssign( + dst, m2_expr.ExprCompose(src, m2_expr.ExprInt(0, 96)))) + else: + e.append(m2_expr.ExprAssign(dst, src[:32])) + return e, [] + + +def movdqu(_, instr, dst, src): + # XXX TODO alignment check + return [m2_expr.ExprAssign(dst, src)], [] + + +def movapd(_, instr, dst, src): + # XXX TODO alignment check + return [m2_expr.ExprAssign(dst, src)], [] + + +def andps(_, instr, dst, src): + e = [] + e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('&', dst, src))) + return e, [] + + +def andnps(_, instr, dst, src): + e = [] + e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('&', dst ^ dst.mask, src))) + return e, [] + + +def orps(_, instr, dst, src): + e = [] + e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('|', dst, src))) + return e, [] + + +def xorps(_, instr, dst, src): + e = [] + e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('^', dst, src))) + return e, [] + + +def rdmsr(ir, instr): + e = [m2_expr.ExprAssign(exception_flags,m2_expr.ExprInt(EXCEPT_PRIV_INSN, 32))] + return e, [] + + +def wrmsr(ir, instr): + e = [m2_expr.ExprAssign(exception_flags,m2_expr.ExprInt(EXCEPT_PRIV_INSN, 32))] + return e, [] + +# MMX/SSE/AVX operations +# + +def vec_op_clip(op, size, callback=None): + """ + Generate simd operations + @op: the operator + @size: size of an element + """ + def vec_op_clip_instr(ir, instr, dst, src): + if op == '-': + result = dst[:size] - src[:size] + else: + result = m2_expr.ExprOp(op, dst[:size], src[:size]) + if callback is not None: + result = callback(result) + return [m2_expr.ExprAssign(dst[:size], result)], [] + return vec_op_clip_instr + +# Generic vertical operation + + +def vec_vertical_sem(op, elt_size, reg_size, dst, src, apply_on_output): + assert reg_size % elt_size == 0 + n = reg_size // elt_size + if op == '-': + ops = [ + apply_on_output((dst[i * elt_size:(i + 1) * elt_size] + - src[i * elt_size:(i + 1) * elt_size])) + for i in range(0, n) + ] + else: + ops = [ + apply_on_output(m2_expr.ExprOp(op, dst[i * elt_size:(i + 1) * elt_size], + src[i * elt_size:(i + 1) * elt_size])) + for i in range(0, n) + ] + + return m2_expr.ExprCompose(*ops) + + +def __vec_vertical_instr_gen(op, elt_size, sem, apply_on_output): + def vec_instr(ir, instr, dst, src): + e = [] + if isinstance(src, m2_expr.ExprMem): + src = ir.ExprMem(src.ptr, dst.size) + reg_size = dst.size + e.append(m2_expr.ExprAssign(dst, sem(op, elt_size, reg_size, dst, src, + apply_on_output))) + return e, [] + return vec_instr + + +def vec_vertical_instr(op, elt_size, apply_on_output=lambda x: x): + return __vec_vertical_instr_gen(op, elt_size, vec_vertical_sem, + apply_on_output) + + +def _keep_mul_high(expr, signed=False): + assert expr.is_op("*") and len(expr.args) == 2 + + if signed: + arg1 = expr.args[0].signExtend(expr.size * 2) + arg2 = expr.args[1].signExtend(expr.size * 2) + else: + arg1 = expr.args[0].zeroExtend(expr.size * 2) + arg2 = expr.args[1].zeroExtend(expr.size * 2) + return m2_expr.ExprOp("*", arg1, arg2)[expr.size:] + +# Op, signed => associated comparison +_min_max_func = { + ("min", False): m2_expr.expr_is_unsigned_lower, + ("min", True): m2_expr.expr_is_signed_lower, + ("max", False): m2_expr.expr_is_unsigned_greater, + ("max", True): m2_expr.expr_is_signed_greater, +} +def _min_max(expr, signed): + assert (expr.is_op("min") or expr.is_op("max")) and len(expr.args) == 2 + return m2_expr.ExprCond( + _min_max_func[(expr.op, signed)](expr.args[1], expr.args[0]), + expr.args[1], + expr.args[0], + ) + +def _float_min_max(expr): + assert (expr.is_op("fmin") or expr.is_op("fmax")) and len(expr.args) == 2 + src1 = expr.args[0] + src2 = expr.args[1] + if expr.is_op("fmin"): + comp = m2_expr.expr_is_float_lower(src1, src2) + elif expr.is_op("fmax"): + comp = m2_expr.expr_is_float_lower(src2, src1) + + # x86 documentation (for MIN): + # IF ((SRC1 = 0.0) and (SRC2 = 0.0)) THEN DEST <-SRC2; + # ELSE IF (SRC1 = SNaN) THEN DEST <-SRC2; FI; + # ELSE IF (SRC2 = SNaN) THEN DEST <-SRC2; FI; + # ELSE IF (SRC1 < SRC2) THEN DEST <-SRC1; + # ELSE DEST<-SRC2; + # + # But this includes the NaN output of "SRC1 < SRC2" + # Associated text is more detailed, and this is the version impl here + return m2_expr.ExprCond( + m2_expr.expr_is_sNaN(src2), src2, + m2_expr.ExprCond( + m2_expr.expr_is_NaN(src2) | m2_expr.expr_is_NaN(src1), src2, + m2_expr.ExprCond(comp, src1, src2) + ) + ) + + +# Integer arithmetic +# + +# Additions +# + +# SSE +paddb = vec_vertical_instr('+', 8) +paddw = vec_vertical_instr('+', 16) +paddd = vec_vertical_instr('+', 32) +paddq = vec_vertical_instr('+', 64) + +# Substractions +# + +# SSE +psubb = vec_vertical_instr('-', 8) +psubw = vec_vertical_instr('-', 16) +psubd = vec_vertical_instr('-', 32) +psubq = vec_vertical_instr('-', 64) + +# Multiplications +# + +# SSE +pmullb = vec_vertical_instr('*', 8) +pmullw = vec_vertical_instr('*', 16) +pmulld = vec_vertical_instr('*', 32) +pmullq = vec_vertical_instr('*', 64) +pmulhub = vec_vertical_instr('*', 8, _keep_mul_high) +pmulhuw = vec_vertical_instr('*', 16, _keep_mul_high) +pmulhud = vec_vertical_instr('*', 32, _keep_mul_high) +pmulhuq = vec_vertical_instr('*', 64, _keep_mul_high) +pmulhb = vec_vertical_instr('*', 8, lambda x: _keep_mul_high(x, signed=True)) +pmulhw = vec_vertical_instr('*', 16, lambda x: _keep_mul_high(x, signed=True)) +pmulhd = vec_vertical_instr('*', 32, lambda x: _keep_mul_high(x, signed=True)) +pmulhq = vec_vertical_instr('*', 64, lambda x: _keep_mul_high(x, signed=True)) + +def pmuludq(ir, instr, dst, src): + e = [] + if dst.size == 64: + e.append(m2_expr.ExprAssign( + dst, + src[:32].zeroExtend(64) * dst[:32].zeroExtend(64) + )) + elif dst.size == 128: + e.append(m2_expr.ExprAssign( + dst[:64], + src[:32].zeroExtend(64) * dst[:32].zeroExtend(64) + )) + e.append(m2_expr.ExprAssign( + dst[64:], + src[64:96].zeroExtend(64) * dst[64:96].zeroExtend(64) + )) + else: + raise RuntimeError("Unsupported size %d" % dst.size) + return e, [] + +# Mix +# + +# SSE +def pmaddwd(ir, instr, dst, src): + sizedst = 32 + sizesrc = 16 + out = [] + for start in range(0, dst.size, sizedst): + base = start + mul1 = src[base: base + sizesrc].signExtend(sizedst) * dst[base: base + sizesrc].signExtend(sizedst) + base += sizesrc + mul2 = src[base: base + sizesrc].signExtend(sizedst) * dst[base: base + sizesrc].signExtend(sizedst) + out.append(mul1 + mul2) + return [m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*out))], [] + + +def _absolute(expr): + """Return abs(@expr)""" + signed = expr.msb() + value_unsigned = (expr ^ expr.mask) + m2_expr.ExprInt(1, expr.size) + return m2_expr.ExprCond(signed, value_unsigned, expr) + + +def psadbw(ir, instr, dst, src): + sizedst = 16 + sizesrc = 8 + out_dst = [] + for start in range(0, dst.size, 64): + out = [] + for src_start in range(0, 64, sizesrc): + beg = start + src_start + end = beg + sizesrc + # Not clear in the doc equations, but in the text, src and dst are: + # "8 unsigned byte integers" + out.append(_absolute(dst[beg: end].zeroExtend(sizedst) - src[beg: end].zeroExtend(sizedst))) + out_dst.append(m2_expr.ExprOp("+", *out)) + out_dst.append(m2_expr.ExprInt(0, 64 - sizedst)) + + return [m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*out_dst))], [] + +def _average(expr): + assert expr.is_op("avg") and len(expr.args) == 2 + + arg1 = expr.args[0].zeroExtend(expr.size * 2) + arg2 = expr.args[1].zeroExtend(expr.size * 2) + one = m2_expr.ExprInt(1, arg1.size) + # avg(unsigned) = (a + b + 1) >> 1, addition being at least on one more bit + return ((arg1 + arg2 + one) >> one)[:expr.size] + +pavgb = vec_vertical_instr('avg', 8, _average) +pavgw = vec_vertical_instr('avg', 16, _average) + +# Comparisons +# + +# SSE +pminsw = vec_vertical_instr('min', 16, lambda x: _min_max(x, signed=True)) +pminub = vec_vertical_instr('min', 8, lambda x: _min_max(x, signed=False)) +pminuw = vec_vertical_instr('min', 16, lambda x: _min_max(x, signed=False)) +pminud = vec_vertical_instr('min', 32, lambda x: _min_max(x, signed=False)) +pmaxub = vec_vertical_instr('max', 8, lambda x: _min_max(x, signed=False)) +pmaxuw = vec_vertical_instr('max', 16, lambda x: _min_max(x, signed=False)) +pmaxud = vec_vertical_instr('max', 32, lambda x: _min_max(x, signed=False)) +pmaxsw = vec_vertical_instr('max', 16, lambda x: _min_max(x, signed=True)) + +# Floating-point arithmetic +# + +# SSE +addss = vec_op_clip('fadd', 32) +addsd = vec_op_clip('fadd', 64) +addps = vec_vertical_instr('fadd', 32) +addpd = vec_vertical_instr('fadd', 64) +subss = vec_op_clip('fsub', 32) +subsd = vec_op_clip('fsub', 64) +subps = vec_vertical_instr('fsub', 32) +subpd = vec_vertical_instr('fsub', 64) +mulss = vec_op_clip('fmul', 32) +mulsd = vec_op_clip('fmul', 64) +mulps = vec_vertical_instr('fmul', 32) +mulpd = vec_vertical_instr('fmul', 64) +divss = vec_op_clip('fdiv', 32) +divsd = vec_op_clip('fdiv', 64) +divps = vec_vertical_instr('fdiv', 32) +divpd = vec_vertical_instr('fdiv', 64) + +# Comparisons (floating-point) + +minps = vec_vertical_instr('fmin', 32, _float_min_max) +minpd = vec_vertical_instr('fmin', 64, _float_min_max) +minss = vec_op_clip('fmin', 32, _float_min_max) +minsd = vec_op_clip('fmin', 64, _float_min_max) +maxps = vec_vertical_instr('fmax', 32, _float_min_max) +maxpd = vec_vertical_instr('fmax', 64, _float_min_max) +maxss = vec_op_clip('fmax', 32, _float_min_max) +maxsd = vec_op_clip('fmax', 64, _float_min_max) + +def _float_compare_to_mask(expr): + if expr.op == 'unord': + to_ext = m2_expr.expr_is_NaN(expr.args[0]) | m2_expr.expr_is_NaN(expr.args[1]) + elif expr.op == 'ord': + to_ext = ~m2_expr.expr_is_NaN(expr.args[0]) & ~m2_expr.expr_is_NaN(expr.args[1]) + else: + if expr.op == '==fu': + to_ext = m2_expr.expr_is_float_equal(expr.args[0], expr.args[1]) + on_NaN = m2_expr.ExprInt(0, 1) + elif expr.op == ' fp32 is needed + if double: + tmp_src = m2_expr.ExprOp('fpconvert_fp32', src[i*64:i*64 + 64]) + else: + tmp_src = src[i*32:i*32 + 32] + + e.append(m2_expr.ExprAssign( + dst[i*32:i*32 + 32], + m2_expr.ExprOp('fp_to_sint32', m2_expr.ExprOp( + 'fpround_towardszero', + tmp_src + )))) + return e + +def cvttpd2pi(_, instr, dst, src): + return _cvtt_tpl(dst, src, [0, 1], double=True), [] + +def cvttpd2dq(_, instr, dst, src): + e = _cvtt_tpl(dst, src, [0, 1], double=True) + e.append(m2_expr.ExprAssign(dst[64:128], m2_expr.ExprInt(0, 64))) + return e, [] + +def cvttsd2si(_, instr, dst, src): + return _cvtt_tpl(dst, src, [0], double=True), [] + +def cvttps2dq(_, instr, dst, src): + return _cvtt_tpl(dst, src, [0, 1, 2, 3], double=False), [] + +def cvttps2pi(_, instr, dst, src): + return _cvtt_tpl(dst, src, [0, 1], double=False), [] + +def cvttss2si(_, instr, dst, src): + return _cvtt_tpl(dst, src, [0], double=False), [] + +def movss(_, instr, dst, src): + e = [] + if not isinstance(dst, m2_expr.ExprMem) and not isinstance(src, m2_expr.ExprMem): + # Source and Destination xmm + e.append(m2_expr.ExprAssign(dst[:32], src[:32])) + elif not isinstance(src, m2_expr.ExprMem) and isinstance(dst, m2_expr.ExprMem): + # Source XMM Destination Mem + e.append(m2_expr.ExprAssign(dst, src[:32])) + else: + # Source Mem Destination XMM + e.append(m2_expr.ExprAssign( + dst, m2_expr.ExprCompose(src, m2_expr.ExprInt(0, 96)))) + return e, [] + + +def ucomiss(_, instr, src1, src2): + e = [] + e.append(m2_expr.ExprAssign(zf, m2_expr.ExprOp( + 'ucomiss_zf', src1[:32], src2[:32]))) + e.append(m2_expr.ExprAssign(pf, m2_expr.ExprOp( + 'ucomiss_pf', src1[:32], src2[:32]))) + e.append(m2_expr.ExprAssign(cf, m2_expr.ExprOp( + 'ucomiss_cf', src1[:32], src2[:32]))) + + e.append(m2_expr.ExprAssign(of, m2_expr.ExprInt(0, 1))) + e.append(m2_expr.ExprAssign(af, m2_expr.ExprInt(0, 1))) + e.append(m2_expr.ExprAssign(nf, m2_expr.ExprInt(0, 1))) + + return e, [] + +def ucomisd(_, instr, src1, src2): + e = [] + e.append(m2_expr.ExprAssign(zf, m2_expr.ExprOp( + 'ucomisd_zf', src1[:64], src2[:64]))) + e.append(m2_expr.ExprAssign(pf, m2_expr.ExprOp( + 'ucomisd_pf', src1[:64], src2[:64]))) + e.append(m2_expr.ExprAssign(cf, m2_expr.ExprOp( + 'ucomisd_cf', src1[:64], src2[:64]))) + + e.append(m2_expr.ExprAssign(of, m2_expr.ExprInt(0, 1))) + e.append(m2_expr.ExprAssign(af, m2_expr.ExprInt(0, 1))) + e.append(m2_expr.ExprAssign(nf, m2_expr.ExprInt(0, 1))) + + return e, [] + + +def pshufb(_, instr, dst, src): + e = [] + if dst.size == 64: + bit_l = 3 + elif dst.size == 128: + bit_l = 4 + else: + raise NotImplementedError("bad size") + for i in range(0, src.size, 8): + index = src[ + i:i + bit_l].zeroExtend(dst.size) << m2_expr.ExprInt(3, dst.size) + value = (dst >> index)[:8] + e.append(m2_expr.ExprAssign(dst[i:i + 8], + m2_expr.ExprCond(src[i + 7:i + 8], + m2_expr.ExprInt(0, 8), + value))) + return e, [] + + +def pshufd(_, instr, dst, src, imm): + control = int(imm) + out = [] + for i in range(4): + shift = ((control >> (i * 2)) & 3) * 32 + # shift is 2 bits long, expr.size is 128 + # => shift + 32 <= src.size + out.append(src[shift: shift + 32]) + return [m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*out))], [] + + +def pshuflw(_, instr, dst, src, imm): + control = int(imm) + out = [] + for i in range(4): + shift = ((control >> (i * 2)) & 3) * 16 + out.append(src[shift: shift + 16]) + out.append(src[64:]) + return [m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*out))], [] + + +def pshufhw(_, instr, dst, src, imm): + control = int(imm) + out = [src[:64]] + for i in range(4): + shift = ((control >> (i * 2)) & 3) * 16 + out.append(src[shift + 64: shift + 16 + 64]) + return [m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*out))], [] + + +def ps_rl_ll(ir, instr, dst, src, op, size): + mask = {16: 0xF, + 32: 0x1F, + 64: 0x3F}[size] + mask = m2_expr.ExprInt(mask, dst.size) + + # Saturate the counter to 2**size + count = src.zeroExtend(dst.size) + count = m2_expr.ExprCond(count & expr_simp(~mask), + m2_expr.ExprInt(size, dst.size), # saturation + count, # count < 2**size + ) + count = count[:size] + if src.is_int(): + count = expr_simp(count) + + out = [] + for i in range(0, dst.size, size): + out.append(m2_expr.ExprOp(op, dst[i:i + size], count)) + return [m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*out))], [] + + +def psrlw(ir, instr, dst, src): + return ps_rl_ll(ir, instr, dst, src, ">>", 16) + + +def psrld(ir, instr, dst, src): + return ps_rl_ll(ir, instr, dst, src, ">>", 32) + + +def psrlq(ir, instr, dst, src): + return ps_rl_ll(ir, instr, dst, src, ">>", 64) + + +def psllw(ir, instr, dst, src): + return ps_rl_ll(ir, instr, dst, src, "<<", 16) + + +def pslld(ir, instr, dst, src): + return ps_rl_ll(ir, instr, dst, src, "<<", 32) + + +def psllq(ir, instr, dst, src): + return ps_rl_ll(ir, instr, dst, src, "<<", 64) + + +def psraw(ir, instr, dst, src): + return ps_rl_ll(ir, instr, dst, src, "a>>", 16) + + +def psrad(ir, instr, dst, src): + return ps_rl_ll(ir, instr, dst, src, "a>>", 32) + + +def pslldq(_, instr, dst, src): + assert src.is_int() + e = [] + count = int(src) + if count > 15: + return [m2_expr.ExprAssign(dst, m2_expr.ExprInt(0, dst.size))], [] + else: + return [m2_expr.ExprAssign(dst, dst << m2_expr.ExprInt(8 * count, dst.size))], [] + + +def psrldq(_, instr, dst, src): + assert src.is_int() + count = int(src) + if count > 15: + return [m2_expr.ExprAssign(dst, m2_expr.ExprInt(0, dst.size))], [] + else: + return [m2_expr.ExprAssign(dst, dst >> m2_expr.ExprInt(8 * count, dst.size))], [] + + +def iret(ir, instr): + """IRET implementation + XXX: only support "no-privilege change" + """ + size = instr.v_opmode() + exprs, _ = retf(ir, instr, m2_expr.ExprInt(size // 8, size=size)) + tmp = mRSP[instr.mode][:size] + m2_expr.ExprInt((2 * size) // 8, size=size) + exprs += _tpl_eflags(tmp) + return exprs, [] + + +def pcmpeq(_, instr, dst, src, size): + e = [] + for i in range(0, dst.size, size): + test = m2_expr.expr_is_equal(dst[i:i + size], src[i:i + size]) + e.append(m2_expr.ExprAssign(dst[i:i + size], + m2_expr.ExprCond(test, + m2_expr.ExprInt(-1, size), + m2_expr.ExprInt(0, size)))) + return e, [] + + +def pcmpgt(_, instr, dst, src, size): + e = [] + for i in range(0, dst.size, size): + test = m2_expr.expr_is_signed_greater(dst[i:i + size], src[i:i + size]) + e.append(m2_expr.ExprAssign(dst[i:i + size], + m2_expr.ExprCond(test, + m2_expr.ExprInt(-1, size), + m2_expr.ExprInt(0, size)))) + return e, [] + + +def pcmpeqb(ir, instr, dst, src): + return pcmpeq(ir, instr, dst, src, 8) + +def pcmpeqw(ir, instr, dst, src): + return pcmpeq(ir, instr, dst, src, 16) + +def pcmpeqd(ir, instr, dst, src): + return pcmpeq(ir, instr, dst, src, 32) + +def pcmpeqq(ir, instr, dst, src): + return pcmpeq(ir, instr, dst, src, 64) + + + + +def pcmpgtb(ir, instr, dst, src): + return pcmpgt(ir, instr, dst, src, 8) + +def pcmpgtw(ir, instr, dst, src): + return pcmpgt(ir, instr, dst, src, 16) + +def pcmpgtd(ir, instr, dst, src): + return pcmpgt(ir, instr, dst, src, 32) + +def pcmpgtq(ir, instr, dst, src): + return pcmpgt(ir, instr, dst, src, 64) + + + +def punpck(_, instr, dst, src, size, off): + e = [] + slices = [] + for i in range(dst.size // (2 * size)): + slices.append(dst[size * i + off: size * i + off + size]) + slices.append(src[size * i + off: size * i + off + size]) + e.append(m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*slices))) + return e, [] + + +def punpckhbw(ir, instr, dst, src): + return punpck(ir, instr, dst, src, 8, dst.size // 2) + + +def punpckhwd(ir, instr, dst, src): + return punpck(ir, instr, dst, src, 16, dst.size // 2) + + +def punpckhdq(ir, instr, dst, src): + return punpck(ir, instr, dst, src, 32, dst.size // 2) + + +def punpckhqdq(ir, instr, dst, src): + return punpck(ir, instr, dst, src, 64, dst.size // 2) + + +def punpcklbw(ir, instr, dst, src): + return punpck(ir, instr, dst, src, 8, 0) + + +def punpcklwd(ir, instr, dst, src): + return punpck(ir, instr, dst, src, 16, 0) + + +def punpckldq(ir, instr, dst, src): + return punpck(ir, instr, dst, src, 32, 0) + + +def punpcklqdq(ir, instr, dst, src): + return punpck(ir, instr, dst, src, 64, 0) + + +def pinsr(_, instr, dst, src, imm, size): + e = [] + + mask = {8: 0xF, + 16: 0x7, + 32: 0x3, + 64: 0x1}[size] + + sel = (int(imm) & mask) * size + e.append(m2_expr.ExprAssign(dst[sel:sel + size], src[:size])) + + return e, [] + + +def pinsrb(ir, instr, dst, src, imm): + return pinsr(ir, instr, dst, src, imm, 8) + + +def pinsrw(ir, instr, dst, src, imm): + return pinsr(ir, instr, dst, src, imm, 16) + + +def pinsrd(ir, instr, dst, src, imm): + return pinsr(ir, instr, dst, src, imm, 32) + + +def pinsrq(ir, instr, dst, src, imm): + return pinsr(ir, instr, dst, src, imm, 64) + + +def pextr(_, instr, dst, src, imm, size): + e = [] + + mask = {8: 0xF, + 16: 0x7, + 32: 0x3, + 64: 0x1}[size] + + sel = (int(imm) & mask) * size + e.append(m2_expr.ExprAssign(dst, src[sel:sel + size].zeroExtend(dst.size))) + + return e, [] + + +def pextrb(ir, instr, dst, src, imm): + return pextr(ir, instr, dst, src, imm, 8) + + +def pextrw(ir, instr, dst, src, imm): + return pextr(ir, instr, dst, src, imm, 16) + + +def pextrd(ir, instr, dst, src, imm): + return pextr(ir, instr, dst, src, imm, 32) + + +def pextrq(ir, instr, dst, src, imm): + return pextr(ir, instr, dst, src, imm, 64) + + +def unpckhps(_, instr, dst, src): + e = [] + src = m2_expr.ExprCompose(dst[64:96], src[64:96], dst[96:128], src[96:128]) + e.append(m2_expr.ExprAssign(dst, src)) + return e, [] + + +def unpckhpd(_, instr, dst, src): + e = [] + src = m2_expr.ExprCompose(dst[64:128], src[64:128]) + e.append(m2_expr.ExprAssign(dst, src)) + return e, [] + + +def unpcklps(_, instr, dst, src): + e = [] + src = m2_expr.ExprCompose(dst[0:32], src[0:32], dst[32:64], src[32:64]) + e.append(m2_expr.ExprAssign(dst, src)) + return e, [] + + +def unpcklpd(_, instr, dst, src): + e = [] + src = m2_expr.ExprCompose(dst[0:64], src[0:64]) + e.append(m2_expr.ExprAssign(dst, src)) + return e, [] + + +def movlpd(_, instr, dst, src): + e = [] + e.append(m2_expr.ExprAssign(dst[:64], src[:64])) + return e, [] + + +def movlps(_, instr, dst, src): + e = [] + e.append(m2_expr.ExprAssign(dst[:64], src[:64])) + return e, [] + + +def movhpd(_, instr, dst, src): + e = [] + if src.size == 64: + e.append(m2_expr.ExprAssign(dst[64:128], src)) + elif dst.size == 64: + e.append(m2_expr.ExprAssign(dst, src[64:128])) + else: + raise RuntimeError("bad encoding!") + return e, [] + + +def movlhps(_, instr, dst, src): + e = [] + e.append(m2_expr.ExprAssign(dst[64:128], src[:64])) + return e, [] + + +def movhlps(_, instr, dst, src): + e = [] + e.append(m2_expr.ExprAssign(dst[:64], src[64:128])) + return e, [] + + +def movdq2q(_, instr, dst, src): + e = [] + e.append(m2_expr.ExprAssign(dst, src[:64])) + return e, [] + + +def movq2dq(_, instr, dst, src): + e = [] + e.append(m2_expr.ExprAssign(dst, src[:64].zeroExtend(dst.size))) + return e, [] + + +def sqrt_gen(_, instr, dst, src, size): + e = [] + out = [] + for i in range(src.size // size): + out.append(m2_expr.ExprOp('fsqrt', + src[i * size: (i + 1) * size])) + src = m2_expr.ExprCompose(*out) + e.append(m2_expr.ExprAssign(dst, src)) + return e, [] + + +def sqrtpd(ir, instr, dst, src): + return sqrt_gen(ir, instr, dst, src, 64) + + +def sqrtps(ir, instr, dst, src): + return sqrt_gen(ir, instr, dst, src, 32) + + +def sqrtsd(_, instr, dst, src): + e = [] + e.append(m2_expr.ExprAssign(dst[:64], + m2_expr.ExprOp('fsqrt', + src[:64]))) + return e, [] + + +def sqrtss(_, instr, dst, src): + e = [] + e.append(m2_expr.ExprAssign(dst[:32], + m2_expr.ExprOp('fsqrt', + src[:32]))) + return e, [] + + +def pmovmskb(_, instr, dst, src): + e = [] + out = [] + for i in range(src.size // 8): + out.append(src[8 * i + 7:8 * (i + 1)]) + src = m2_expr.ExprCompose(*out) + e.append(m2_expr.ExprAssign(dst, src.zeroExtend(dst.size))) + return e, [] + + +def smsw(ir, instr, dst): + e = [] + LOG_X86_SEM.warning("DEFAULT SMSW %s!!", str(dst)) + e.append(m2_expr.ExprAssign(dst, m2_expr.ExprInt(0x80050033, 32)[:dst.size])) + return e, [] + + +def bndmov(ir, instr, dst, src): + # Implemented as a NOP, because BND side effects are not yet supported + return [], [] + +def palignr(ir, instr, dst, src, imm): + # dst.src >> imm * 8 [:dst.size] + + shift = int(imm) * 8 + if shift == 0: + result = src + elif shift == src.size: + result = dst + elif shift > src.size: + result = dst >> m2_expr.ExprInt(shift - src.size, dst.size) + else: + # shift < src.size + result = m2_expr.ExprCompose( + src[shift:], + dst[:shift], + ) + + return [m2_expr.ExprAssign(dst, result)], [] + + +def _signed_saturation(expr, dst_size): + """Saturate the expr @expr for @dst_size bit + Signed saturation return MAX_INT / MIN_INT or value depending on the value + """ + assert expr.size > dst_size + + median = 1 << (dst_size - 1) + min_int = m2_expr.ExprInt(- median, dst_size) + max_int = m2_expr.ExprInt(median - 1, dst_size) + signed = expr.msb() + value_unsigned = (expr ^ expr.mask) + m2_expr.ExprInt(1, expr.size) + # Re-use the sign bit + value = m2_expr.ExprCompose(expr[:dst_size - 1], signed) + + # Bit hack: to avoid a double signed comparison, use mask + # ie., in unsigned, 0xXY > 0x0f iff X is not null + + # if expr >s 0 + # if expr[dst_size - 1:] > 0: # bigger than max_int + # -> max_int + # else + # -> value + # else # negative + # if expr[dst_size:-1] > 0: # smaller than min_int + # -> value + # else + # -> min_int + + return m2_expr.ExprCond( + signed, + m2_expr.ExprCond(value_unsigned[dst_size - 1:], + min_int, + value), + m2_expr.ExprCond(expr[dst_size - 1:], + max_int, + value), + ) + + +def _unsigned_saturation(expr, dst_size): + """Saturate the expr @expr for @dst_size bit + Unsigned saturation return MAX_INT or value depending on the value + """ + assert expr.size > dst_size + + zero = m2_expr.ExprInt(0, dst_size) + max_int = m2_expr.ExprInt(-1, dst_size) + value = expr[:dst_size] + signed = expr.msb() + + + # Bit hack: to avoid a double signed comparison, use mask + # ie., in unsigned, 0xXY > 0x0f iff X is not null + + return m2_expr.ExprCond( + signed, + zero, + m2_expr.ExprCond(expr[dst_size:], + max_int, + value), + ) + + + +def packsswb(ir, instr, dst, src): + out = [] + for source in [dst, src]: + for start in range(0, dst.size, 16): + out.append(_signed_saturation(source[start:start + 16], 8)) + return [m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*out))], [] + + +def packssdw(ir, instr, dst, src): + out = [] + for source in [dst, src]: + for start in range(0, dst.size, 32): + out.append(_signed_saturation(source[start:start + 32], 16)) + return [m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*out))], [] + + +def packuswb(ir, instr, dst, src): + out = [] + for source in [dst, src]: + for start in range(0, dst.size, 16): + out.append(_unsigned_saturation(source[start:start + 16], 8)) + return [m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*out))], [] + + +def _saturation_sub_unsigned(expr): + assert expr.is_op("+") and len(expr.args) == 2 and expr.args[-1].is_op("-") + + # Compute the soustraction on one more bit to be able to distinguish cases: + # 0x48 - 0xd7 in 8 bit, should saturate + arg1 = expr.args[0].zeroExtend(expr.size + 1) + arg2 = expr.args[1].args[0].zeroExtend(expr.size + 1) + return _unsigned_saturation(arg1 - arg2, expr.size) + +def _saturation_sub_signed(expr): + assert expr.is_op("+") and len(expr.args) == 2 and expr.args[-1].is_op("-") + + # Compute the subtraction on two more bits, see _saturation_sub_unsigned + arg1 = expr.args[0].signExtend(expr.size + 2) + arg2 = expr.args[1].args[0].signExtend(expr.size + 2) + return _signed_saturation(arg1 - arg2, expr.size) + +def _saturation_add(expr): + assert expr.is_op("+") and len(expr.args) == 2 + + # Compute the addition on one more bit to be able to distinguish cases: + # 0x48 + 0xd7 in 8 bit, should saturate + + arg1 = expr.args[0].zeroExtend(expr.size + 1) + arg2 = expr.args[1].zeroExtend(expr.size + 1) + + # We can also use _unsigned_saturation with two additional bits (to + # distinguish minus and overflow case) + # The resulting expression being more complicated with an impossible case + # (signed=True), we rewrite the rule here + + return m2_expr.ExprCond((arg1 + arg2).msb(), m2_expr.ExprInt(-1, expr.size), + expr) + +def _saturation_add_signed(expr): + assert expr.is_op("+") and len(expr.args) == 2 + + # Compute the subtraction on two more bits, see _saturation_add_unsigned + + arg1 = expr.args[0].signExtend(expr.size + 2) + arg2 = expr.args[1].signExtend(expr.size + 2) + + return _signed_saturation(arg1 + arg2, expr.size) + + +# Saturate SSE operations + +psubusb = vec_vertical_instr('-', 8, _saturation_sub_unsigned) +psubusw = vec_vertical_instr('-', 16, _saturation_sub_unsigned) +paddusb = vec_vertical_instr('+', 8, _saturation_add) +paddusw = vec_vertical_instr('+', 16, _saturation_add) +psubsb = vec_vertical_instr('-', 8, _saturation_sub_signed) +psubsw = vec_vertical_instr('-', 16, _saturation_sub_signed) +paddsb = vec_vertical_instr('+', 8, _saturation_add_signed) +paddsw = vec_vertical_instr('+', 16, _saturation_add_signed) + + +# Others SSE operations + +def maskmovq(ir, instr, src, mask): + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = m2_expr.ExprLoc(loc_next, ir.IRDst.size) + blks = [] + + # For each possibility, check if a write is necessary + check_labels = [m2_expr.ExprLoc(ir.loc_db.add_location(), ir.IRDst.size) + for _ in range(0, mask.size, 8)] + # If the write has to be done, do it (otherwise, nothing happen) + write_labels = [m2_expr.ExprLoc(ir.loc_db.add_location(), ir.IRDst.size) + for _ in range(0, mask.size, 8)] + + # Build check blocks + for i, start in enumerate(range(0, mask.size, 8)): + bit = mask[start + 7: start + 8] + cur_label = check_labels[i] + next_check_label = check_labels[i + 1] if (i + 1) < len(check_labels) else loc_next_expr + write_label = write_labels[i] + check = m2_expr.ExprAssign(ir.IRDst, + m2_expr.ExprCond(bit, + write_label, + next_check_label)) + blks.append(IRBlock(cur_label.loc_key, [AssignBlock([check], instr)])) + + # Build write blocks + dst_addr = mRDI[instr.mode] + for i, start in enumerate(range(0, mask.size, 8)): + cur_label = write_labels[i] + next_check_label = check_labels[i + 1] if (i + 1) < len(check_labels) else loc_next_expr + write_addr = dst_addr + m2_expr.ExprInt(i, dst_addr.size) + + # @8[DI/EDI/RDI + i] = src[byte i] + write_mem = m2_expr.ExprAssign(m2_expr.ExprMem(write_addr, 8), + src[start: start + 8]) + jump = m2_expr.ExprAssign(ir.IRDst, next_check_label) + blks.append(IRBlock(cur_label.loc_key, [AssignBlock([write_mem, jump], instr)])) + + # If mask is null, bypass all + e = [m2_expr.ExprAssign(ir.IRDst, m2_expr.ExprCond(mask, + check_labels[0], + loc_next_expr))] + return e, blks + + +def emms(ir, instr): + # Implemented as a NOP + return [], [] + +def endbr64(ir, instr): + # Implemented as a NOP + return [], [] + +def endbr32(ir, instr): + # Implemented as a NOP + return [], [] + +# Common value without too many option, 0x1fa0 +STMXCSR_VALUE = 0x1fa0 +def stmxcsr(ir, instr, dst): + return [m2_expr.ExprAssign(dst, m2_expr.ExprInt(STMXCSR_VALUE, dst.size))], [] + +def ldmxcsr(ir, instr, dst): + # Implemented as a NOP + return [], [] + + +def _select4(src, control): + # Implementation inspired from Intel Intrisics Guide + # @control is already resolved (was an immediate) + + if control == 0: + return src[:32] # 0 + elif control == 1: + return src[32:64] + elif control == 2: + return src[64:96] + elif control == 3: + return src[96:] + else: + raise ValueError("Control must be on 2 bits") + + +def shufps(ir, instr, dst, src, imm8): + out = [] + control = int(imm8) + for i in range(4): + if i < 2: + source = dst + else: + source = src + out.append(_select4(source, (control >> (i * 2)) & 3)) + return [m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*out))], [] + + +def shufpd(ir, instr, dst, src, imm8): + out = [] + control = int(imm8) + out.append(dst[64:] if control & 1 else dst[:64]) + out.append(src[64:] if control & 2 else src[:64]) + return [m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*out))], [] + +def movmskps(ir, instr, dst, src): + out = [] + for i in range(4): + out.append(src[(32 * i) + 31:(32 * i) + 32]) + return [m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*out).zeroExtend(dst.size))], [] + +def movmskpd(ir, instr, dst, src): + out = [] + for i in range(2): + out.append(src[(64 * i) + 63:(64 * i) + 64]) + return [m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*out).zeroExtend(dst.size))], [] + + +mnemo_func = {'mov': mov, + 'xchg': xchg, + 'movzx': movzx, + 'movsx': movsx, + 'movsxd': movsx, + 'lea': lea, + 'add': add, + 'xadd': xadd, + 'adc': adc, + 'sub': sub, + 'sbb': sbb, + 'neg': neg, + 'not': l_not, + 'cmp': l_cmp, + 'xor': xor, + 'pxor': pxor, + 'or': l_or, + 'and': l_and, + 'test': l_test, + 'rol': l_rol, + 'ror': l_ror, + 'rcl': rcl, + 'rcr': rcr, + 'sar': sar, + 'shr': shr, + 'sal': shl, + 'shl': shl, + 'shld': shld, + 'cmc': cmc, + 'clc': clc, + 'stc': stc, + 'cld': cld, + 'std': std, + 'cli': cli, + 'sti': sti, + 'bsf': bsf, + 'bsr': bsr, + 'inc': inc, + 'dec': dec, + 'push': push, + 'pushw': pushw, + 'pop': pop, + 'popw': popw, + 'sete': sete, + 'setnz': setnz, + 'setl': setl, + 'setg': setg, + 'setge': setge, + 'seta': seta, + 'setae': setae, + 'setb': setb, + 'setbe': setbe, + 'setns': setns, + 'sets': sets, + 'seto': seto, + 'setp': setp, + 'setpe': setp, + 'setnp': setnp, + 'setpo': setnp, + 'setle': setle, + 'setng': setle, + 'setna': setna, + 'setnbe': setnbe, + 'setno': setno, + 'setnc': setnb, + 'setz': sete, + 'setne': setnz, + 'setnb': setae, + 'setnae': setb, + 'setc': setb, + 'setnge': setl, + 'setnl': setge, + 'setnle': setg, + 'setalc': setalc, + 'bswap': bswap, + 'cmpsb': lambda ir, instr: cmps(ir, instr, 8), + 'cmpsw': lambda ir, instr: cmps(ir, instr, 16), + 'cmpsd': lambda ir, instr: cmps(ir, instr, 32), + 'cmpsq': lambda ir, instr: cmps(ir, instr, 64), + 'scasb': lambda ir, instr: scas(ir, instr, 8), + 'scasw': lambda ir, instr: scas(ir, instr, 16), + 'scasd': lambda ir, instr: scas(ir, instr, 32), + 'scasq': lambda ir, instr: scas(ir, instr, 64), + 'pushfd': pushfd, + 'pushfq': pushfq, + 'pushfw': pushfw, + 'popfd': popfd, + 'popfq': popfd, + 'popfw': popfw, + 'pusha': pusha, + 'pushad': pushad, + 'popad': popad, + 'popa': popa, + 'call': call, + 'ret': ret, + 'retf': retf, + 'iret': iret, + 'iretd': iret, + 'leave': leave, + 'enter': enter, + 'jmp': jmp, + 'jz': jz, + 'je': jz, + 'jcxz': jcxz, + 'jecxz': jecxz, + 'jrcxz': jrcxz, + 'jnz': jnz, + 'jp': jp, + 'jpe': jp, + 'jnp': jnp, + 'ja': ja, + 'jae': jae, + 'jb': jb, + 'jbe': jbe, + 'jg': jg, + 'jge': jge, + 'jl': jl, + 'jle': jle, + 'js': js, + 'jns': jns, + 'jo': jo, + 'jno': jno, + 'loop': loop, + 'loopne': loopne, + 'loope': loope, + 'div': div, + 'mul': mul, + 'imul': imul, + 'idiv': idiv, + + 'cbw': cbw, + 'cwde': cwde, + 'cdqe': cdqe, + + 'cwd': cwd, + 'cdq': cdq, + 'cqo': cqo, + + 'daa': daa, + 'das': das, + 'aam': aam, + 'aad': aad, + 'aaa': aaa, + 'aas': aas, + 'shrd': shrd, + 'stosb': lambda ir, instr: stos(ir, instr, 8), + 'stosw': lambda ir, instr: stos(ir, instr, 16), + 'stosd': lambda ir, instr: stos(ir, instr, 32), + 'stosq': lambda ir, instr: stos(ir, instr, 64), + + 'lodsb': lambda ir, instr: lods(ir, instr, 8), + 'lodsw': lambda ir, instr: lods(ir, instr, 16), + 'lodsd': lambda ir, instr: lods(ir, instr, 32), + 'lodsq': lambda ir, instr: lods(ir, instr, 64), + + 'movsb': lambda ir, instr: movs(ir, instr, 8), + 'movsw': lambda ir, instr: movs(ir, instr, 16), + 'movsd': movsd_dispatch, + 'movsq': lambda ir, instr: movs(ir, instr, 64), + 'fcomp': fcomp, + 'fcompp': fcompp, + 'ficomp': ficomp, + 'fucom': fucom, + 'fucomp': fucomp, + 'fucompp': fucompp, + 'comiss': comiss, + 'comisd': comisd, + 'nop': nop, + 'ud2': ud2, + 'prefetch0': prefetch0, + 'prefetch1': prefetch1, + 'prefetch2': prefetch2, + 'prefetchw': prefetchw, + 'prefetchnta': prefetchnta, + 'lfence': lfence, + 'mfence': mfence, + 'sfence': sfence, + 'fnop': nop, # XXX + 'hlt': hlt, + 'rdtsc': rdtsc, + 'fst': fst, + 'fstp': fstp, + 'fist': fist, + 'fistp': fistp, + 'fisttp': fisttp, + 'fld': fld, + 'fldz': fldz, + 'fld1': fld1, + 'fldl2t': fldl2t, + 'fldpi': fldpi, + 'fldln2': fldln2, + 'fldl2e': fldl2e, + 'fldlg2': fldlg2, + 'fild': fild, + 'fadd': fadd, + 'fiadd': fiadd, + 'fisub': fisub, + 'fisubr': fisubr, + 'fpatan': fpatan, + 'fprem': fprem, + 'fprem1': fprem1, + 'fninit': fninit, + 'fyl2x': fyl2x, + 'faddp': faddp, + 'fsub': fsub, + 'fsubp': fsubp, + 'fsubr': fsubr, + 'fsubrp': fsubrp, + 'fmul': fmul, + 'fimul': fimul, + 'fmulp': fmulp, + 'fdiv': fdiv, + 'fdivr': fdivr, + 'fdivrp': fdivrp, + 'fidiv': fidiv, + 'fidivr': fidivr, + 'fdivp': fdivp, + 'fxch': fxch, + 'fptan': fptan, + 'frndint': frndint, + 'fsin': fsin, + 'fcos': fcos, + 'fsincos': fsincos, + 'fscale': fscale, + 'f2xm1': f2xm1, + 'fchs': fchs, + 'fsqrt': fsqrt, + 'fabs': fabs, + 'fnstsw': fnstsw, + 'fnstcw': fnstcw, + 'fldcw': fldcw, + 'fwait': fwait, + 'fcmovb': fcmovb, + 'fcmove': fcmove, + 'fcmovbe': fcmovbe, + 'fcmovu': fcmovu, + 'fcmovnb': fcmovnb, + 'fcmovne': fcmovne, + 'fcmovnbe': fcmovnbe, + 'fcmovnu': fcmovnu, + 'fnstenv': fnstenv, + 'fldenv': fldenv, + 'sidt': sidt, + 'sldt': sldt, + 'arpl': arpl, + 'cmovz': cmovz, + 'cmove': cmovz, + 'cmovnz': cmovnz, + 'cmovpe': cmovpe, + 'cmovnp': cmovnp, + 'cmovge': cmovge, + 'cmovnl': cmovge, + 'cmovg': cmovg, + 'cmovl': cmovl, + 'cmova': cmova, + 'cmovae': cmovae, + 'cmovbe': cmovbe, + 'cmovb': cmovb, + 'cmovnge': cmovl, + 'cmovle': cmovle, + 'cmovng': cmovle, + 'cmovo': cmovo, + 'cmovno': cmovno, + 'cmovs': cmovs, + 'cmovns': cmovns, + 'icebp': icebp, + 'int': l_int, + 'xlat': xlat, + 'bt': bt, + 'cpuid': cpuid, + 'fcom': fcom, + 'ftst': ftst, + 'fxam': fxam, + 'ficom': ficom, + 'fcomi': fcomi, + 'fcomip': fcomip, + 'fucomi': fucomi, + 'fucomip': fucomip, + 'insb': lambda ir, instr: ins(ir, instr, 8), + 'insw': lambda ir, instr: ins(ir, instr, 16), + 'insd': lambda ir, instr: ins(ir, instr, 32), + 'btc': btc, + 'bts': bts, + 'btr': btr, + 'into': into, + 'in': l_in, + 'outsb': lambda ir, instr: l_outs(ir, instr, 8), + 'outsw': lambda ir, instr: l_outs(ir, instr, 16), + 'outsd': lambda ir, instr: l_outs(ir, instr, 32), + + 'out': l_out, + "sysenter": l_sysenter, + "syscall": l_syscall, + "cmpxchg": cmpxchg, + "cmpxchg8b": cmpxchg8b, + "lds": lds, + "les": les, + "lss": lss, + "lfs": lfs, + "lgs": lgs, + "lahf": lahf, + "sahf": sahf, + "lar": lar, + "lsl": lsl, + "fclex": fclex, + "fnclex": fnclex, + "str": l_str, + "movd": movd, + "movdqu": movdqu, + "movdqa": movdqu, + "movapd": movapd, # XXX TODO alignment check + "movupd": movapd, # XXX TODO alignment check + "movaps": movapd, # XXX TODO alignment check + "movups": movapd, # XXX TODO alignment check + "andps": andps, + "andpd": andps, + "andnps": andnps, + "andnpd": andnps, + "orps": orps, + "orpd": orps, + "xorps": xorps, + "xorpd": xorps, + + "movq": movq, + + "pminsw": pminsw, + "cvtdq2pd": cvtdq2pd, + "cvtdq2ps": cvtdq2ps, + "cvtpd2dq": cvtpd2dq, + "cvtpd2pi": cvtpd2pi, + "cvtpd2ps": cvtpd2ps, + "cvtpi2pd": cvtpi2pd, + "cvtpi2ps": cvtpi2ps, + "cvtps2dq": cvtps2dq, + "cvtps2pd": cvtps2pd, + "cvtps2pi": cvtps2pi, + "cvtsd2si": cvtsd2si, + "cvtsd2ss": cvtsd2ss, + "cvtsi2sd": cvtsi2sd, + "cvtsi2ss": cvtsi2ss, + "cvtss2sd": cvtss2sd, + "cvtss2si": cvtss2si, + "cvttpd2pi": cvttpd2pi, + "cvttpd2dq": cvttpd2dq, + "cvttps2dq": cvttps2dq, + "cvttps2pi": cvttps2pi, + "cvttsd2si": cvttsd2si, + "cvttss2si": cvttss2si, + + + "bndmov": bndmov, + + + + + "movss": movss, + + "ucomiss": ucomiss, + "ucomisd": ucomisd, + + # + # MMX/AVX/SSE operations + + # Arithmetic (integers) + # + + # Additions + # SSE + "paddb": paddb, + "paddw": paddw, + "paddd": paddd, + "paddq": paddq, + + # Substractions + # SSE + "psubb": psubb, + "psubw": psubw, + "psubd": psubd, + "psubq": psubq, + + # Multiplications + # SSE + "pmullb": pmullb, + "pmullw": pmullw, + "pmulld": pmulld, + "pmullq": pmullq, + "pmulhub": pmulhub, + "pmulhuw": pmulhuw, + "pmulhud": pmulhud, + "pmulhuq": pmulhuq, + "pmulhb": pmulhb, + "pmulhw": pmulhw, + "pmulhd": pmulhd, + "pmulhq": pmulhq, + "pmuludq": pmuludq, + + # Mix + # SSE + "pmaddwd": pmaddwd, + "psadbw": psadbw, + "pavgb": pavgb, + "pavgw": pavgw, + + # Arithmetic (floating-point) + # + + # Additions + # SSE + "addss": addss, + "addsd": addsd, + "addps": addps, + "addpd": addpd, + + # Substractions + # SSE + "subss": subss, + "subsd": subsd, + "subps": subps, + "subpd": subpd, + + # Multiplications + # SSE + "mulss": mulss, + "mulsd": mulsd, + "mulps": mulps, + "mulpd": mulpd, + + # Divisions + # SSE + "divss": divss, + "divsd": divsd, + "divps": divps, + "divpd": divpd, + + # Comparisons (floating-point) + # + "minps": minps, + "minpd": minpd, + "minss": minss, + "minsd": minsd, + "maxps": maxps, + "maxpd": maxpd, + "maxss": maxss, + "maxsd": maxsd, + "cmpeqps": cmpeqps, + "cmpeqpd": cmpeqpd, + "cmpeqss": cmpeqss, + "cmpeqsd": cmpeqsd, + "cmpltps": cmpltps, + "cmpltpd": cmpltpd, + "cmpltss": cmpltss, + "cmpltsd": cmpltsd, + "cmpleps": cmpleps, + "cmplepd": cmplepd, + "cmpless": cmpless, + "cmplesd": cmplesd, + "cmpunordps": cmpunordps, + "cmpunordpd": cmpunordpd, + "cmpunordss": cmpunordss, + "cmpunordsd": cmpunordsd, + "cmpneqps": cmpneqps, + "cmpneqpd": cmpneqpd, + "cmpneqss": cmpneqss, + "cmpneqsd": cmpneqsd, + "cmpnltps": cmpnltps, + "cmpnltpd": cmpnltpd, + "cmpnltss": cmpnltss, + "cmpnltsd": cmpnltsd, + "cmpnleps": cmpnleps, + "cmpnlepd": cmpnlepd, + "cmpnless": cmpnless, + "cmpnlesd": cmpnlesd, + "cmpordps": cmpordps, + "cmpordpd": cmpordpd, + "cmpordss": cmpordss, + "cmpordsd": cmpordsd, + + # Logical (floating-point) + # + + "pand": pand, + "pandn": pandn, + "por": por, + + "rdmsr": rdmsr, + "wrmsr": wrmsr, + "pshufb": pshufb, + "pshufd": pshufd, + "pshuflw": pshuflw, + "pshufhw": pshufhw, + + "psrlw": psrlw, + "psrld": psrld, + "psrlq": psrlq, + "psllw": psllw, + "pslld": pslld, + "psllq": psllq, + "pslldq": pslldq, + "psrldq": psrldq, + "psraw": psraw, + "psrad": psrad, + + "palignr": palignr, + + "pmaxub": pmaxub, + "pmaxuw": pmaxuw, + "pmaxud": pmaxud, + "pmaxsw": pmaxsw, + + "pminub": pminub, + "pminuw": pminuw, + "pminud": pminud, + + "pcmpeqb": pcmpeqb, + "pcmpeqw": pcmpeqw, + "pcmpeqd": pcmpeqd, + "pcmpeqq": pcmpeqq, + + "pcmpgtb": pcmpgtb, + "pcmpgtw": pcmpgtw, + "pcmpgtd": pcmpgtd, + "pcmpgtq": pcmpgtq, + + "punpckhbw": punpckhbw, + "punpckhwd": punpckhwd, + "punpckhdq": punpckhdq, + "punpckhqdq": punpckhqdq, + + + "punpcklbw": punpcklbw, + "punpcklwd": punpcklwd, + "punpckldq": punpckldq, + "punpcklqdq": punpcklqdq, + + "pinsrb": pinsrb, + "pinsrw": pinsrw, + "pinsrd": pinsrd, + "pinsrq": pinsrq, + + "pextrb": pextrb, + "pextrw": pextrw, + "pextrd": pextrd, + "pextrq": pextrq, + + "unpckhps": unpckhps, + "unpckhpd": unpckhpd, + "unpcklps": unpcklps, + "unpcklpd": unpcklpd, + + "movlpd": movlpd, + "movlps": movlps, + "movhpd": movhpd, + "movhps": movhpd, + "movlhps": movlhps, + "movhlps": movhlps, + "movdq2q": movdq2q, + "movq2dq": movq2dq, + + "sqrtpd": sqrtpd, + "sqrtps": sqrtps, + "sqrtsd": sqrtsd, + "sqrtss": sqrtss, + + "pmovmskb": pmovmskb, + + "packsswb": packsswb, + "packssdw": packssdw, + "packuswb": packuswb, + + "psubusb": psubusb, + "psubusw": psubusw, + "paddusb": paddusb, + "paddusw": paddusw, + "psubsb": psubsb, + "psubsw": psubsw, + "paddsb": paddsb, + "paddsw": paddsw, + + "smsw": smsw, + "maskmovq": maskmovq, + "maskmovdqu": maskmovq, + "emms": emms, + "shufps": shufps, + "shufpd": shufpd, + "movmskps": movmskps, + "movmskpd": movmskpd, + "stmxcsr": stmxcsr, + "ldmxcsr": ldmxcsr, + "endbr64": endbr64, + "endbr32": endbr32, + } + + +class ir_x86_16(IntermediateRepresentation): + + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_x86, 16, loc_db) + self.do_stk_segm = False + self.do_ds_segm = False + self.do_str_segm = False + self.do_all_segm = False + self.pc = IP + self.sp = SP + self.IRDst = m2_expr.ExprId('IRDst', 16) + # Size of memory pointer access in IR + # 16 bit mode memory accesses may be greater than 16 bits + # 32 bit size may be enough + self.addrsize = 32 + + def mod_pc(self, instr, instr_ir, extra_ir): + pass + + def ExprMem(self, ptr, size): + """Generate a memory access to @ptr + The ptr is resized to a fixed size self.addrsize + + @ptr: Expr instance to the memory address + @size: size of the memory""" + + return m2_expr.ExprMem(expraddr(self.addrsize, ptr), size) + + def gen_segm_expr(self, selector, addr): + ptr = m2_expr.ExprOp( + 'segm', + selector, + addr.zeroExtend(self.addrsize) + ) + + return ptr + + def get_ir(self, instr): + args = instr.args[:] + args = [arg.replace_expr(float_replace) for arg in args] + args = fix_mem_args_size(instr, *args) + my_ss = None + if self.do_ds_segm: + my_ss = DS + if self.do_all_segm and instr.additional_info.g2.value: + my_ss = {1: CS, 2: SS, 3: DS, 4: ES, 5: FS, 6: GS}[ + instr.additional_info.g2.value] + if my_ss is not None: + for i, a in enumerate(args): + if a.is_mem() and not a.is_mem_segm(): + args[i] = self.ExprMem(m2_expr.ExprOp('segm', my_ss, + a.ptr), a.size) + + if not instr.name.lower() in mnemo_func: + raise NotImplementedError( + "Mnemonic %s not implemented" % instr.name) + + instr_ir, extra_ir = mnemo_func[ + instr.name.lower()](self, instr, *args) + self.mod_pc(instr, instr_ir, extra_ir) + instr.additional_info.except_on_instr = False + if instr.additional_info.g1.value & 6 == 0 or \ + not instr.name in repeat_mn: + return instr_ir, extra_ir + if instr.name == "MOVSD" and len(instr.args) == 2: + return instr_ir, extra_ir + + instr.additional_info.except_on_instr = True + admode = instr.v_admode() + c_reg = mRCX[instr.mode][:admode] + + zf_val = None + # set if zf is tested (cmps, scas) + for e in instr_ir: # +[updt_c]: + if e.dst == zf: + zf_val = e.src + + cond_dec = m2_expr.ExprCond(c_reg - m2_expr.ExprInt(1, c_reg.size), + m2_expr.ExprInt(0, 1), m2_expr.ExprInt(1, 1)) + # end condition + if zf_val is None: + c_cond = cond_dec + elif instr.additional_info.g1.value & 2: # REPNE and REPNZ + c_cond = cond_dec | zf + elif instr.additional_info.g1.value & 12: # REPE, REP and REPZ + c_cond = cond_dec | (zf ^ m2_expr.ExprInt(1, 1)) + + # gen while + loc_do, loc_do_expr = self.gen_loc_key_and_expr(self.IRDst.size) + loc_end, loc_end_expr = self.gen_loc_key_and_expr(self.IRDst.size) + loc_skip = self.get_next_loc_key(instr) + loc_skip_expr = m2_expr.ExprLoc(loc_skip, self.IRDst.size) + loc_next = self.get_next_loc_key(instr) + loc_next_expr = m2_expr.ExprLoc(loc_next, self.IRDst.size) + + fix_next_loc = {loc_next_expr: loc_end_expr} + new_extra_ir = [irblock.modify_exprs(mod_src=lambda expr: expr.replace_expr(fix_next_loc)) + for irblock in extra_ir] + + cond_bloc = [] + cond_bloc.append(m2_expr.ExprAssign(c_reg, + c_reg - m2_expr.ExprInt(1, + c_reg.size))) + cond_bloc.append(m2_expr.ExprAssign(self.IRDst, m2_expr.ExprCond(c_cond, + loc_skip_expr, + loc_do_expr))) + cond_bloc = IRBlock(loc_end, [AssignBlock(cond_bloc, instr)]) + e_do = instr_ir + + c = IRBlock(loc_do, [AssignBlock(e_do, instr)]) + e_n = [m2_expr.ExprAssign(self.IRDst, m2_expr.ExprCond(c_reg, loc_do_expr, + loc_skip_expr))] + return e_n, [cond_bloc, c] + new_extra_ir + + def expr_fix_regs_for_mode(self, e, mode=64): + return e.replace_expr(replace_regs[mode]) + + def expraff_fix_regs_for_mode(self, e, mode=64): + dst = self.expr_fix_regs_for_mode(e.dst, mode) + src = self.expr_fix_regs_for_mode(e.src, mode) + return m2_expr.ExprAssign(dst, src) + + def irbloc_fix_regs_for_mode(self, irblock, mode=64): + irs = [] + for assignblk in irblock: + new_assignblk = dict(assignblk) + for dst, src in viewitems(assignblk): + del new_assignblk[dst] + # Special case for 64 bits: + # If destination is a 32 bit reg, zero extend the 64 bit reg + if mode == 64: + if (isinstance(dst, m2_expr.ExprId) and + dst.size == 32 and + dst in replace_regs[64]): + src = src.zeroExtend(64) + dst = replace_regs[64][dst].arg + dst = self.expr_fix_regs_for_mode(dst, mode) + src = self.expr_fix_regs_for_mode(src, mode) + new_assignblk[dst] = src + irs.append(AssignBlock(new_assignblk, assignblk.instr)) + return IRBlock(irblock.loc_key, irs) + + +class ir_x86_32(ir_x86_16): + + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_x86, 32, loc_db) + self.do_stk_segm = False + self.do_ds_segm = False + self.do_str_segm = False + self.do_all_segm = False + self.pc = EIP + self.sp = ESP + self.IRDst = m2_expr.ExprId('IRDst', 32) + self.addrsize = 32 + + +class ir_x86_64(ir_x86_16): + + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_x86, 64, loc_db) + self.do_stk_segm = False + self.do_ds_segm = False + self.do_str_segm = False + self.do_all_segm = False + self.pc = RIP + self.sp = RSP + self.IRDst = m2_expr.ExprId('IRDst', 64) + self.addrsize = 64 + + def mod_pc(self, instr, instr_ir, extra_ir): + # fix RIP for 64 bit + pc_fixed = {self.pc: m2_expr.ExprInt(instr.offset + instr.l, 64)} + + for i, expr in enumerate(instr_ir): + dst, src = expr.dst, expr.src + if dst != self.pc: + dst = dst.replace_expr(pc_fixed) + src = src.replace_expr(pc_fixed) + instr_ir[i] = m2_expr.ExprAssign(dst, src) + + for idx, irblock in enumerate(extra_ir): + extra_ir[idx] = irblock.modify_exprs(lambda expr: expr.replace_expr(pc_fixed) \ + if expr != self.pc else expr, + lambda expr: expr.replace_expr(pc_fixed)) diff --git a/miasm/core/__init__.py b/miasm/core/__init__.py new file mode 100644 index 00000000..d154134b --- /dev/null +++ b/miasm/core/__init__.py @@ -0,0 +1 @@ +"Core components" diff --git a/miasm/core/asm_ast.py b/miasm/core/asm_ast.py new file mode 100644 index 00000000..69ff1f9c --- /dev/null +++ b/miasm/core/asm_ast.py @@ -0,0 +1,93 @@ +from builtins import int as int_types + +class AstNode(object): + """ + Ast node object + """ + def __neg__(self): + if isinstance(self, AstInt): + value = AstInt(-self.value) + else: + value = AstOp('-', self) + return value + + def __add__(self, other): + return AstOp('+', self, other) + + def __sub__(self, other): + return AstOp('-', self, other) + + def __div__(self, other): + return AstOp('/', self, other) + + def __mod__(self, other): + return AstOp('%', self, other) + + def __mul__(self, other): + return AstOp('*', self, other) + + def __lshift__(self, other): + return AstOp('<<', self, other) + + def __rshift__(self, other): + return AstOp('>>', self, other) + + def __xor__(self, other): + return AstOp('^', self, other) + + def __or__(self, other): + return AstOp('|', self, other) + + def __and__(self, other): + return AstOp('&', self, other) + + +class AstInt(AstNode): + """ + Ast integer + """ + def __init__(self, value): + self.value = value + + def __str__(self): + return "%s" % self.value + + +class AstId(AstNode): + """ + Ast Id + """ + def __init__(self, name): + self.name = name + + def __str__(self): + return "%s" % self.name + + +class AstMem(AstNode): + """ + Ast memory deref + """ + def __init__(self, ptr, size): + assert isinstance(ptr, AstNode) + assert isinstance(size, int_types) + self.ptr = ptr + self.size = size + + def __str__(self): + return "@%d[%s]" % (self.size, self.ptr) + + +class AstOp(AstNode): + """ + Ast operator + """ + def __init__(self, op, *args): + assert all(isinstance(arg, AstNode) for arg in args) + self.op = op + self.args = args + + def __str__(self): + if len(self.args) == 1: + return "(%s %s)" % (self.op, self.args[0]) + return '(' + ("%s" % self.op).join(str(x) for x in self.args) + ')' diff --git a/miasm/core/asmblock.py b/miasm/core/asmblock.py new file mode 100644 index 00000000..0e715f41 --- /dev/null +++ b/miasm/core/asmblock.py @@ -0,0 +1,1629 @@ +#-*- coding:utf-8 -*- + +from builtins import map +from builtins import range +import logging +import warnings +from collections import namedtuple +from builtins import int as int_types + +from future.utils import viewitems, viewvalues + +from miasm.expression.expression import ExprId, ExprInt, get_expr_locs +from miasm.expression.expression import LocKey +from miasm.expression.simplifications import expr_simp +from miasm.expression.modint import moduint, modint +from miasm.core.utils import Disasm_Exception, pck +from miasm.core.graph import DiGraph, DiGraphSimplifier, MatchGraphJoker +from miasm.core.interval import interval +from miasm.core.locationdb import LocationDB + + +log_asmblock = logging.getLogger("asmblock") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log_asmblock.addHandler(console_handler) +log_asmblock.setLevel(logging.WARNING) + + +def is_int(a): + return isinstance(a, (modint, moduint, int_types)) + + +class AsmRaw(object): + + def __init__(self, raw=b""): + self.raw = raw + + def __str__(self): + return repr(self.raw) + + def to_string(self, loc_db): + return str(self) + + +class asm_raw(AsmRaw): + + def __init__(self, raw=b""): + warnings.warn('DEPRECATION WARNING: use "AsmRaw" instead of "asm_raw"') + super(asm_label, self).__init__(raw) + + +class AsmConstraint(object): + c_to = "c_to" + c_next = "c_next" + + def __init__(self, loc_key, c_t=c_to): + # Sanity check + assert isinstance(loc_key, LocKey) + + self.loc_key = loc_key + self.c_t = c_t + + def get_label(self): + warnings.warn('DEPRECATION WARNING: use ".loc_key" instead of ".label"') + return self.loc_key + + def set_label(self, loc_key): + warnings.warn('DEPRECATION WARNING: use ".loc_key" instead of ".label"') + self.loc_key = loc_key + + label = property(get_label, set_label) + + def to_string(self, loc_db=None): + if loc_db is None: + return "%s:%s" % (self.c_t, self.loc_key) + else: + return "%s:%s" % ( + self.c_t, + loc_db.pretty_str(self.loc_key) + ) + + def __str__(self): + return self.to_string() + + +class asm_constraint(AsmConstraint): + + def __init__(self, loc_key, c_t=AsmConstraint.c_to): + warnings.warn('DEPRECATION WARNING: use "AsmConstraint" instead of "asm_constraint"') + super(asm_constraint, self).__init__(loc_key, c_t) + + +class AsmConstraintNext(AsmConstraint): + + def __init__(self, loc_key): + super(AsmConstraintNext, self).__init__( + loc_key, + c_t=AsmConstraint.c_next + ) + + +class asm_constraint_next(AsmConstraint): + + def __init__(self, loc_key): + warnings.warn('DEPRECATION WARNING: use "AsmConstraintNext" instead of "asm_constraint_next"') + super(asm_constraint_next, self).__init__(loc_key) + + +class AsmConstraintTo(AsmConstraint): + + def __init__(self, loc_key): + super(AsmConstraintTo, self).__init__( + loc_key, + c_t=AsmConstraint.c_to + ) + +class asm_constraint_to(AsmConstraint): + + def __init__(self, loc_key): + warnings.warn('DEPRECATION WARNING: use "AsmConstraintTo" instead of "asm_constraint_to"') + super(asm_constraint_to, self).__init__(loc_key) + + +class AsmBlock(object): + + def __init__(self, loc_key, alignment=1): + assert isinstance(loc_key, LocKey) + + self.bto = set() + self.lines = [] + self._loc_key = loc_key + self.alignment = alignment + + def get_label(self): + warnings.warn('DEPRECATION WARNING: use ".loc_key" instead of ".label"') + return self.loc_key + + loc_key = property(lambda self:self._loc_key) + label = property(get_label) + + + def to_string(self, loc_db=None): + out = [] + if loc_db is None: + out.append(str(self.loc_key)) + else: + out.append(loc_db.pretty_str(self.loc_key)) + + for instr in self.lines: + out.append(instr.to_string(loc_db)) + if self.bto: + lbls = ["->"] + for dst in self.bto: + if dst is None: + lbls.append("Unknown? ") + else: + lbls.append(dst.to_string(loc_db) + " ") + lbls = '\t'.join(sorted(lbls)) + out.append(lbls) + return '\n'.join(out) + + def __str__(self): + return self.to_string() + + def addline(self, l): + self.lines.append(l) + + def addto(self, c): + assert isinstance(self.bto, set) + self.bto.add(c) + + def split(self, loc_db, offset): + loc_key = loc_db.get_or_create_offset_location(offset) + log_asmblock.debug('split at %x', offset) + offsets = [x.offset for x in self.lines] + offset = loc_db.get_location_offset(loc_key) + if offset not in offsets: + log_asmblock.warning( + 'cannot split bloc at %X ' % offset + + 'middle instruction? default middle') + offsets.sort() + return None + new_bloc = AsmBlock(loc_key) + i = offsets.index(offset) + + self.lines, new_bloc.lines = self.lines[:i], self.lines[i:] + flow_mod_instr = self.get_flow_instr() + log_asmblock.debug('flow mod %r', flow_mod_instr) + c = AsmConstraint(loc_key, AsmConstraint.c_next) + # move dst if flowgraph modifier was in original bloc + # (usecase: split delayslot bloc) + if flow_mod_instr: + for xx in self.bto: + log_asmblock.debug('lbl %s', xx) + c_next = set( + x for x in self.bto if x.c_t == AsmConstraint.c_next + ) + c_to = [x for x in self.bto if x.c_t != AsmConstraint.c_next] + self.bto = set([c] + c_to) + new_bloc.bto = c_next + else: + new_bloc.bto = self.bto + self.bto = set([c]) + return new_bloc + + def get_range(self): + """Returns the offset hull of an AsmBlock""" + if len(self.lines): + return (self.lines[0].offset, + self.lines[-1].offset + self.lines[-1].l) + else: + return 0, 0 + + def get_offsets(self): + return [x.offset for x in self.lines] + + def add_cst(self, loc_key, constraint_type): + """ + Add constraint between current block and block at @loc_key + @loc_key: LocKey instance of constraint target + @constraint_type: AsmConstraint c_to/c_next + """ + assert isinstance(loc_key, LocKey) + c = AsmConstraint(loc_key, constraint_type) + self.bto.add(c) + + def get_flow_instr(self): + if not self.lines: + return None + for i in range(-1, -1 - self.lines[0].delayslot - 1, -1): + if not 0 <= i < len(self.lines): + return None + l = self.lines[i] + if l.splitflow() or l.breakflow(): + raise NotImplementedError('not fully functional') + + def get_subcall_instr(self): + if not self.lines: + return None + delayslot = self.lines[0].delayslot + end_index = len(self.lines) - 1 + ds_max_index = max(end_index - delayslot, 0) + for i in range(end_index, ds_max_index - 1, -1): + l = self.lines[i] + if l.is_subcall(): + return l + return None + + def get_next(self): + for constraint in self.bto: + if constraint.c_t == AsmConstraint.c_next: + return constraint.loc_key + return None + + @staticmethod + def _filter_constraint(constraints): + """Sort and filter @constraints for AsmBlock.bto + @constraints: non-empty set of AsmConstraint instance + + Always the same type -> one of the constraint + c_next and c_to -> c_next + """ + # Only one constraint + if len(constraints) == 1: + return next(iter(constraints)) + + # Constraint type -> set of corresponding constraint + cbytype = {} + for cons in constraints: + cbytype.setdefault(cons.c_t, set()).add(cons) + + # Only one type -> any constraint is OK + if len(cbytype) == 1: + return next(iter(constraints)) + + # At least 2 types -> types = {c_next, c_to} + # c_to is included in c_next + return next(iter(cbytype[AsmConstraint.c_next])) + + def fix_constraints(self): + """Fix next block constraints""" + # destination -> associated constraints + dests = {} + for constraint in self.bto: + dests.setdefault(constraint.loc_key, set()).add(constraint) + + self.bto = set( + self._filter_constraint(constraints) + for constraints in viewvalues(dests) + ) + + +class asm_bloc(object): + + def __init__(self, loc_key, alignment=1): + warnings.warn('DEPRECATION WARNING: use "AsmBlock" instead of "asm_bloc"') + super(asm_bloc, self).__init__(loc_key, alignment) + + +class AsmBlockBad(AsmBlock): + + """Stand for a *bad* ASM block (malformed, unreachable, + not disassembled, ...)""" + + + ERROR_UNKNOWN = -1 + ERROR_CANNOT_DISASM = 0 + ERROR_NULL_STARTING_BLOCK = 1 + ERROR_FORBIDDEN = 2 + ERROR_IO = 3 + + + ERROR_TYPES = { + ERROR_UNKNOWN: "Unknown error", + ERROR_CANNOT_DISASM: "Unable to disassemble", + ERROR_NULL_STARTING_BLOCK: "Null starting block", + ERROR_FORBIDDEN: "Address forbidden by dont_dis", + ERROR_IO: "IOError", + } + + def __init__(self, loc_key=None, alignment=1, errno=ERROR_UNKNOWN, *args, **kwargs): + """Instantiate an AsmBlock_bad. + @loc_key, @alignment: same as AsmBlock.__init__ + @errno: (optional) specify a error type associated with the block + """ + super(AsmBlockBad, self).__init__(loc_key, alignment, *args, **kwargs) + self._errno = errno + + errno = property(lambda self: self._errno) + + def __str__(self): + error_txt = self.ERROR_TYPES.get(self._errno, self._errno) + return "%s\n\tBad block: %s" % ( + self.loc_key, + error_txt + ) + + def addline(self, *args, **kwargs): + raise RuntimeError("An AsmBlockBad cannot have line") + + def addto(self, *args, **kwargs): + raise RuntimeError("An AsmBlockBad cannot have bto") + + def split(self, *args, **kwargs): + raise RuntimeError("An AsmBlockBad cannot be split") + + +class asm_block_bad(AsmBlockBad): + + def __init__(self, loc_key=None, alignment=1, errno=-1, *args, **kwargs): + warnings.warn('DEPRECATION WARNING: use "AsmBlockBad" instead of "asm_block_bad"') + super(asm_block_bad, self).__init__(loc_key, alignment, *args, **kwargs) + +class AsmSymbolPool(LocationDB): + """[DEPRECATED API] use 'LocationDB' instead""" + + def __init__(self, *args, **kwargs): + warnings.warn("Deprecated API, use 'LocationDB' instead") + super(AsmSymbolPool, self).__init__(*args, **kwargs) + +class asm_symbol_pool(AsmSymbolPool): + + def __init__(self): + warnings.warn('DEPRECATION WARNING: use "LocationDB" instead of "asm_symbol_pool"') + super(asm_symbol_pool, self).__init__() + + +class AsmCFG(DiGraph): + + """Directed graph standing for a ASM Control Flow Graph with: + - nodes: AsmBlock + - edges: constraints between blocks, synchronized with AsmBlock's "bto" + + Specialized the .dot export and force the relation between block to be uniq, + and associated with a constraint. + + Offer helpers on AsmCFG management, such as research by loc_key, sanity + checking and mnemonic size guessing. + """ + + # Internal structure for pending management + AsmCFGPending = namedtuple("AsmCFGPending", + ["waiter", "constraint"]) + + def __init__(self, loc_db=None, *args, **kwargs): + super(AsmCFG, self).__init__(*args, **kwargs) + # Edges -> constraint + self.edges2constraint = {} + # Expected LocKey -> set( (src, dst), constraint ) + self._pendings = {} + # Loc_Key2block built on the fly + self._loc_key_to_block = {} + # loc_db + self.loc_db = loc_db + + + def copy(self): + """Copy the current graph instance""" + graph = self.__class__(self.loc_db) + return graph + self + + + # Compatibility with old list API + def append(self, *args, **kwargs): + raise DeprecationWarning("AsmCFG is a graph, use add_node") + + def remove(self, *args, **kwargs): + raise DeprecationWarning("AsmCFG is a graph, use del_node") + + def __getitem__(self, *args, **kwargs): + raise DeprecationWarning("Order of AsmCFG elements is not reliable") + + def __contains__(self, _): + """ + DEPRECATED. Use: + - loc_key in AsmCFG.nodes() to test loc_key existence + """ + raise RuntimeError("DEPRECATED") + + def __iter__(self): + """ + DEPRECATED. Use: + - AsmCFG.blocks() to iter on blocks + - loc_key in AsmCFG.nodes() to test loc_key existence + """ + raise RuntimeError("DEPRECATED") + + def __len__(self): + """Return the number of blocks in AsmCFG""" + return len(self._nodes) + + @property + def blocks(self): + return viewvalues(self._loc_key_to_block) + + # Manage graph with associated constraints + def add_edge(self, src, dst, constraint): + """Add an edge to the graph + @src: LocKey instance, source + @dst: LocKey instance, destination + @constraint: constraint associated to this edge + """ + # Sanity check + assert isinstance(src, LocKey) + assert isinstance(dst, LocKey) + known_cst = self.edges2constraint.get((src, dst), None) + if known_cst is not None: + assert known_cst == constraint + return + + # Add the edge to src.bto if needed + block_src = self.loc_key_to_block(src) + if block_src: + if dst not in [cons.loc_key for cons in block_src.bto]: + block_src.bto.add(AsmConstraint(dst, constraint)) + + # Add edge + self.edges2constraint[(src, dst)] = constraint + super(AsmCFG, self).add_edge(src, dst) + + def add_uniq_edge(self, src, dst, constraint): + """ + Synonym for `add_edge` + """ + self.add_edge(src, dst, constraint) + + def del_edge(self, src, dst): + """Delete the edge @src->@dst and its associated constraint""" + src_blk = self.loc_key_to_block(src) + dst_blk = self.loc_key_to_block(dst) + assert src_blk is not None + assert dst_blk is not None + # Delete from src.bto + to_remove = [cons for cons in src_blk.bto if cons.loc_key == dst] + if to_remove: + assert len(to_remove) == 1 + src_blk.bto.remove(to_remove[0]) + + # Del edge + del self.edges2constraint[(src, dst)] + super(AsmCFG, self).del_edge(src, dst) + + def del_block(self, block): + super(AsmCFG, self).del_node(block.loc_key) + del self._loc_key_to_block[block.loc_key] + + + def add_node(self, node): + assert isinstance(node, LocKey) + return super(AsmCFG, self).add_node(node) + + def add_block(self, block): + """ + Add the block @block to the current instance, if it is not already in + @block: AsmBlock instance + + Edges will be created for @block.bto, if destinations are already in + this instance. If not, they will be resolved when adding these + aforementioned destinations. + `self.pendings` indicates which blocks are not yet resolved. + + """ + status = super(AsmCFG, self).add_node(block.loc_key) + + if not status: + return status + + # Update waiters + if block.loc_key in self._pendings: + for bblpend in self._pendings[block.loc_key]: + self.add_edge(bblpend.waiter.loc_key, block.loc_key, bblpend.constraint) + del self._pendings[block.loc_key] + + # Synchronize edges with block destinations + self._loc_key_to_block[block.loc_key] = block + + for constraint in block.bto: + dst = self._loc_key_to_block.get(constraint.loc_key, + None) + if dst is None: + # Block is yet unknown, add it to pendings + to_add = self.AsmCFGPending(waiter=block, + constraint=constraint.c_t) + self._pendings.setdefault(constraint.loc_key, + set()).add(to_add) + else: + # Block is already in known nodes + self.add_edge(block.loc_key, dst.loc_key, constraint.c_t) + + return status + + def merge(self, graph): + """Merge with @graph, taking in account constraints""" + # Add known blocks + for block in graph.blocks: + self.add_block(block) + # Add nodes not already in it (ie. not linked to a block) + for node in graph.nodes(): + self.add_node(node) + # -> add_edge(x, y, constraint) + for edge in graph._edges: + # May fail if there is an incompatibility in edges constraints + # between the two graphs + self.add_edge(*edge, constraint=graph.edges2constraint[edge]) + + + def node2lines(self, node): + if self.loc_db is None: + loc_key_name = node + else: + loc_key_name = self.loc_db.pretty_str(node) + yield self.DotCellDescription(text=loc_key_name, + attr={'align': 'center', + 'colspan': 2, + 'bgcolor': 'grey'}) + block = self._loc_key_to_block.get(node, None) + if block is None: + return + if isinstance(block, AsmBlockBad): + yield [ + self.DotCellDescription( + text=block.ERROR_TYPES.get(block._errno, + block._errno + ), + attr={}) + ] + return + for line in block.lines: + if self._dot_offset: + yield [self.DotCellDescription(text="%.8X" % line.offset, + attr={}), + self.DotCellDescription(text=line.to_string(self.loc_db), attr={})] + else: + yield self.DotCellDescription(text=line.to_string(self.loc_db), attr={}) + + def node_attr(self, node): + block = self._loc_key_to_block.get(node, None) + if isinstance(block, AsmBlockBad): + return {'style': 'filled', 'fillcolor': 'red'} + return {} + + def edge_attr(self, src, dst): + cst = self.edges2constraint.get((src, dst), None) + edge_color = "blue" + + if len(self.successors(src)) > 1: + if cst == AsmConstraint.c_next: + edge_color = "red" + else: + edge_color = "limegreen" + + return {"color": edge_color} + + def dot(self, offset=False): + """ + @offset: (optional) if set, add the corresponding offsets in each node + """ + self._dot_offset = offset + return super(AsmCFG, self).dot() + + # Helpers + @property + def pendings(self): + """Dictionary of loc_key -> set(AsmCFGPending instance) indicating + which loc_key are missing in the current instance. + A loc_key is missing if a block which is already in nodes has constraints + with him (thanks to its .bto) and the corresponding block is not yet in + nodes + """ + return self._pendings + + def label2block(self, loc_key): + """ + DEPRECATED: Use "loc_key_to_block" instead of "label2block" + + Return the block corresponding to loc_key @loc_key + @loc_key: LocKey instance + """ + warnings.warn('DEPRECATION WARNING: use "loc_key_to_block" instead of "label2block"') + return self.loc_key_to_block(loc_key) + + def rebuild_edges(self): + """Consider blocks '.bto' and rebuild edges according to them, ie: + - update constraint type + - add missing edge + - remove no more used edge + + This method should be called if a block's '.bto' in nodes have been + modified without notifying this instance to resynchronize edges. + """ + for block in self.blocks: + edges = [] + # Rebuild edges from bto + for constraint in block.bto: + dst = self._loc_key_to_block.get(constraint.loc_key, + None) + if dst is None: + # Missing destination, add to pendings + self._pendings.setdefault( + constraint.loc_key, + set() + ).add( + self.AsmCFGPending( + block, + constraint.c_t + ) + ) + continue + edge = (block.loc_key, dst.loc_key) + edges.append(edge) + if edge in self._edges: + # Already known edge, constraint may have changed + self.edges2constraint[edge] = constraint.c_t + else: + # An edge is missing + self.add_edge(edge[0], edge[1], constraint.c_t) + + # Remove useless edges + for succ in self.successors(block.loc_key): + edge = (block.loc_key, succ) + if edge not in edges: + self.del_edge(*edge) + + def get_bad_blocks(self): + """Iterator on AsmBlockBad elements""" + # A bad asm block is always a leaf + for loc_key in self.leaves(): + block = self._loc_key_to_block.get(loc_key, None) + if isinstance(block, AsmBlockBad): + yield block + + def get_bad_blocks_predecessors(self, strict=False): + """Iterator on loc_keys with an AsmBlockBad destination + @strict: (optional) if set, return loc_key with only bad + successors + """ + # Avoid returning the same block + done = set() + for badblock in self.get_bad_blocks(): + for predecessor in self.predecessors_iter(badblock.loc_key): + if predecessor not in done: + if (strict and + not all(isinstance(self._loc_key_to_block.get(block, None), AsmBlockBad) + for block in self.successors_iter(predecessor))): + continue + yield predecessor + done.add(predecessor) + + def getby_offset(self, offset): + """Return asmblock containing @offset""" + for block in self.blocks: + if block.lines[0].offset <= offset < \ + (block.lines[-1].offset + block.lines[-1].l): + return block + return None + + def loc_key_to_block(self, loc_key): + """ + Return the asmblock corresponding to loc_key @loc_key, None if unknown + loc_key + @loc_key: LocKey instance + """ + return self._loc_key_to_block.get(loc_key, None) + + def sanity_check(self): + """Do sanity checks on blocks' constraints: + * no pendings + * no multiple next constraint to same block + * no next constraint to self + """ + + if len(self._pendings) != 0: + raise RuntimeError( + "Some blocks are missing: %s" % list( + map( + str, + self._pendings + ) + ) + ) + + next_edges = { + edge: constraint + for edge, constraint in viewitems(self.edges2constraint) + if constraint == AsmConstraint.c_next + } + + for loc_key in self._nodes: + if loc_key not in self._loc_key_to_block: + raise RuntimeError("Not supported yet: every node must have a corresponding AsmBlock") + # No next constraint to self + if (loc_key, loc_key) in next_edges: + raise RuntimeError('Bad constraint: self in next') + + # No multiple next constraint to same block + pred_next = list(ploc_key + for (ploc_key, dloc_key) in next_edges + if dloc_key == loc_key) + + if len(pred_next) > 1: + raise RuntimeError("Too many next constraints for bloc %r" + "(%s)" % (loc_key, + pred_next)) + + def guess_blocks_size(self, mnemo): + """Asm and compute max block size + Add a 'size' and 'max_size' attribute on each block + @mnemo: metamn instance""" + for block in self.blocks: + size = 0 + for instr in block.lines: + if isinstance(instr, AsmRaw): + # for special AsmRaw, only extract len + if isinstance(instr.raw, list): + data = None + if len(instr.raw) == 0: + l = 0 + else: + l = (instr.raw[0].size // 8) * len(instr.raw) + elif isinstance(instr.raw, str): + data = instr.raw.encode() + l = len(data) + elif isinstance(instr.raw, bytes): + data = instr.raw + l = len(data) + else: + raise NotImplementedError('asm raw') + else: + # Assemble the instruction to retrieve its len. + # If the instruction uses symbol it will fail + # In this case, the max_instruction_len is used + try: + candidates = mnemo.asm(instr) + l = len(candidates[-1]) + except: + l = mnemo.max_instruction_len + data = None + instr.data = data + instr.l = l + size += l + + block.size = size + block.max_size = size + log_asmblock.info("size: %d max: %d", block.size, block.max_size) + + def apply_splitting(self, loc_db, dis_block_callback=None, **kwargs): + """Consider @self' bto destinations and split block in @self if one of + these destinations jumps in the middle of this block. + In order to work, they must be only one block in @self per loc_key in + @loc_db (which is true if @self come from the same disasmEngine). + + @loc_db: LocationDB instance associated with @self'loc_keys + @dis_block_callback: (optional) if set, this callback will be called on + new block destinations + @kwargs: (optional) named arguments to pass to dis_block_callback + """ + # Get all possible destinations not yet resolved, with a resolved + # offset + block_dst = [] + for loc_key in self.pendings: + offset = loc_db.get_location_offset(loc_key) + if offset is not None: + block_dst.append(offset) + + todo = set(self.blocks) + rebuild_needed = False + + while todo: + # Find a block with a destination inside another one + cur_block = todo.pop() + range_start, range_stop = cur_block.get_range() + + for off in block_dst: + if not (off > range_start and off < range_stop): + continue + + # `cur_block` must be split at offset `off`from miasm.core.locationdb import LocationDB + + new_b = cur_block.split(loc_db, off) + log_asmblock.debug("Split block %x", off) + if new_b is None: + log_asmblock.error("Cannot split %x!!", off) + continue + + # Remove pending from cur_block + # Links from new_b will be generated in rebuild_edges + for dst in new_b.bto: + if dst.loc_key not in self.pendings: + continue + self.pendings[dst.loc_key] = set(pending for pending in self.pendings[dst.loc_key] + if pending.waiter != cur_block) + + # The new block destinations may need to be disassembled + if dis_block_callback: + offsets_to_dis = set( + self.loc_db.get_location_offset(constraint.loc_key) + for constraint in new_b.bto + ) + dis_block_callback(cur_bloc=new_b, + offsets_to_dis=offsets_to_dis, + loc_db=loc_db, **kwargs) + + # Update structure + rebuild_needed = True + self.add_block(new_b) + + # The new block must be considered + todo.add(new_b) + range_start, range_stop = cur_block.get_range() + + # Rebuild edges to match new blocks'bto + if rebuild_needed: + self.rebuild_edges() + + def __str__(self): + out = [] + for block in self.blocks: + out.append(str(block)) + for loc_key_a, loc_key_b in self.edges(): + out.append("%s -> %s" % (loc_key_a, loc_key_b)) + return '\n'.join(out) + + def __repr__(self): + return "<%s %s>" % (self.__class__.__name__, hex(id(self))) + +# Out of _merge_blocks to be computed only once +_acceptable_block = lambda graph, loc_key: (not isinstance(graph.loc_key_to_block(loc_key), AsmBlockBad) and + len(graph.loc_key_to_block(loc_key).lines) > 0) +_parent = MatchGraphJoker(restrict_in=False, filt=_acceptable_block) +_son = MatchGraphJoker(restrict_out=False, filt=_acceptable_block) +_expgraph = _parent >> _son + + +def _merge_blocks(dg, graph): + """Graph simplification merging AsmBlock with one and only one son with this + son if this son has one and only one parent""" + + # Blocks to ignore, because they have been removed from the graph + to_ignore = set() + + for match in _expgraph.match(graph): + + # Get matching blocks + lbl_block, lbl_succ = match[_parent], match[_son] + block = graph.loc_key_to_block(lbl_block) + succ = graph.loc_key_to_block(lbl_succ) + + # Ignore already deleted blocks + if (block in to_ignore or + succ in to_ignore): + continue + + # Remove block last instruction if needed + last_instr = block.lines[-1] + if last_instr.delayslot > 0: + # TODO: delayslot + raise RuntimeError("Not implemented yet") + + if last_instr.is_subcall(): + continue + if last_instr.breakflow() and last_instr.dstflow(): + block.lines.pop() + + # Merge block + block.lines += succ.lines + for nextb in graph.successors_iter(lbl_succ): + graph.add_edge(lbl_block, nextb, graph.edges2constraint[(lbl_succ, nextb)]) + + graph.del_block(succ) + to_ignore.add(lbl_succ) + + +bbl_simplifier = DiGraphSimplifier() +bbl_simplifier.enable_passes([_merge_blocks]) + + +def conservative_asm(mnemo, instr, symbols, conservative): + """ + Asm instruction; + Try to keep original instruction bytes if it exists + """ + candidates = mnemo.asm(instr, symbols) + if not candidates: + raise ValueError('cannot asm:%s' % str(instr)) + if not hasattr(instr, "b"): + return candidates[0], candidates + if instr.b in candidates: + return instr.b, candidates + if conservative: + for c in candidates: + if len(c) == len(instr.b): + return c, candidates + return candidates[0], candidates + + +def fix_expr_val(expr, symbols): + """Resolve an expression @expr using @symbols""" + def expr_calc(e): + if isinstance(e, ExprId): + # Example: + # toto: + # .dword label + loc_key = symbols.get_name_location(e.name) + offset = symbols.get_location_offset(loc_key) + e = ExprInt(offset, e.size) + return e + result = expr.visit(expr_calc) + result = expr_simp(result) + if not isinstance(result, ExprInt): + raise RuntimeError('Cannot resolve symbol %s' % expr) + return result + + +def fix_loc_offset(loc_db, loc_key, offset, modified): + """ + Fix the @loc_key offset to @offset. If the @offset has changed, add @loc_key + to @modified + @loc_db: current loc_db + """ + loc_offset = loc_db.get_location_offset(loc_key) + if loc_offset == offset: + return + loc_db.set_location_offset(loc_key, offset, force=True) + modified.add(loc_key) + + +class BlockChain(object): + + """Manage blocks linked with an asm_constraint_next""" + + def __init__(self, loc_db, blocks): + self.loc_db = loc_db + self.blocks = blocks + self.place() + + @property + def pinned(self): + """Return True iff at least one block is pinned""" + return self.pinned_block_idx is not None + + def _set_pinned_block_idx(self): + self.pinned_block_idx = None + for i, block in enumerate(self.blocks): + loc_key = block.loc_key + if self.loc_db.get_location_offset(loc_key) is not None: + if self.pinned_block_idx is not None: + raise ValueError("Multiples pinned block detected") + self.pinned_block_idx = i + + def place(self): + """Compute BlockChain min_offset and max_offset using pinned block and + blocks' size + """ + self._set_pinned_block_idx() + self.max_size = 0 + for block in self.blocks: + self.max_size += block.max_size + block.alignment - 1 + + # Check if chain has one block pinned + if not self.pinned: + return + + loc = self.blocks[self.pinned_block_idx].loc_key + offset_base = self.loc_db.get_location_offset(loc) + assert(offset_base % self.blocks[self.pinned_block_idx].alignment == 0) + + self.offset_min = offset_base + for block in self.blocks[:self.pinned_block_idx - 1:-1]: + self.offset_min -= block.max_size + \ + (block.alignment - block.max_size) % block.alignment + + self.offset_max = offset_base + for block in self.blocks[self.pinned_block_idx:]: + self.offset_max += block.max_size + \ + (block.alignment - block.max_size) % block.alignment + + def merge(self, chain): + """Best effort merge two block chains + Return the list of resulting blockchains""" + self.blocks += chain.blocks + self.place() + return [self] + + def fix_blocks(self, modified_loc_keys): + """Propagate a pinned to its blocks' neighbour + @modified_loc_keys: store new pinned loc_keys""" + + if not self.pinned: + raise ValueError('Trying to fix unpinned block') + + # Propagate offset to blocks before pinned block + pinned_block = self.blocks[self.pinned_block_idx] + offset = self.loc_db.get_location_offset(pinned_block.loc_key) + if offset % pinned_block.alignment != 0: + raise RuntimeError('Bad alignment') + + for block in self.blocks[:self.pinned_block_idx - 1:-1]: + new_offset = offset - block.size + new_offset = new_offset - new_offset % pinned_block.alignment + fix_loc_offset(self.loc_db, + block.loc_key, + new_offset, + modified_loc_keys) + + # Propagate offset to blocks after pinned block + offset = self.loc_db.get_location_offset(pinned_block.loc_key) + pinned_block.size + + last_block = pinned_block + for block in self.blocks[self.pinned_block_idx + 1:]: + offset += (- offset) % last_block.alignment + fix_loc_offset(self.loc_db, + block.loc_key, + offset, + modified_loc_keys) + offset += block.size + last_block = block + return modified_loc_keys + + +class BlockChainWedge(object): + + """Stand for wedges between blocks""" + + def __init__(self, loc_db, offset, size): + self.loc_db = loc_db + self.offset = offset + self.max_size = size + self.offset_min = offset + self.offset_max = offset + size + + def merge(self, chain): + """Best effort merge two block chains + Return the list of resulting blockchains""" + self.loc_db.set_location_offset(chain.blocks[0].loc_key, self.offset_max) + chain.place() + return [self, chain] + + +def group_constrained_blocks(loc_db, asmcfg): + """ + Return the BlockChains list built from grouped blocks in asmcfg linked by + asm_constraint_next + @asmcfg: an AsmCfg instance + """ + log_asmblock.info('group_constrained_blocks') + + # Group adjacent asmcfg + remaining_blocks = list(asmcfg.blocks) + known_block_chains = {} + + while remaining_blocks: + # Create a new block chain + block_list = [remaining_blocks.pop()] + + # Find sons in remainings blocks linked with a next constraint + while True: + # Get next block + next_loc_key = block_list[-1].get_next() + if next_loc_key is None or asmcfg.loc_key_to_block(next_loc_key) is None: + break + next_block = asmcfg.loc_key_to_block(next_loc_key) + + # Add the block at the end of the current chain + if next_block not in remaining_blocks: + break + block_list.append(next_block) + remaining_blocks.remove(next_block) + + # Check if son is in a known block group + if next_loc_key is not None and next_loc_key in known_block_chains: + block_list += known_block_chains[next_loc_key] + del known_block_chains[next_loc_key] + + known_block_chains[block_list[0].loc_key] = block_list + + out_block_chains = [] + for loc_key in known_block_chains: + chain = BlockChain(loc_db, known_block_chains[loc_key]) + out_block_chains.append(chain) + return out_block_chains + + +def get_blockchains_address_interval(blockChains, dst_interval): + """Compute the interval used by the pinned @blockChains + Check if the placed chains are in the @dst_interval""" + + allocated_interval = interval() + for chain in blockChains: + if not chain.pinned: + continue + chain_interval = interval([(chain.offset_min, chain.offset_max - 1)]) + if chain_interval not in dst_interval: + raise ValueError('Chain placed out of destination interval') + allocated_interval += chain_interval + return allocated_interval + + +def resolve_symbol(blockChains, loc_db, dst_interval=None): + """Place @blockChains in the @dst_interval""" + + log_asmblock.info('resolve_symbol') + if dst_interval is None: + dst_interval = interval([(0, 0xFFFFFFFFFFFFFFFF)]) + + forbidden_interval = interval( + [(-1, 0xFFFFFFFFFFFFFFFF + 1)]) - dst_interval + allocated_interval = get_blockchains_address_interval(blockChains, + dst_interval) + log_asmblock.debug('allocated interval: %s', allocated_interval) + + pinned_chains = [chain for chain in blockChains if chain.pinned] + + # Add wedge in forbidden intervals + for start, stop in forbidden_interval.intervals: + wedge = BlockChainWedge( + loc_db, offset=start, size=stop + 1 - start) + pinned_chains.append(wedge) + + # Try to place bigger blockChains first + pinned_chains.sort(key=lambda x: x.offset_min) + blockChains.sort(key=lambda x: -x.max_size) + + fixed_chains = list(pinned_chains) + + log_asmblock.debug("place chains") + for chain in blockChains: + if chain.pinned: + continue + fixed = False + for i in range(1, len(fixed_chains)): + prev_chain = fixed_chains[i - 1] + next_chain = fixed_chains[i] + + if prev_chain.offset_max + chain.max_size < next_chain.offset_min: + new_chains = prev_chain.merge(chain) + fixed_chains[i - 1:i] = new_chains + fixed = True + break + if not fixed: + raise RuntimeError('Cannot find enough space to place blocks') + + return [chain for chain in fixed_chains if isinstance(chain, BlockChain)] + + +def get_block_loc_keys(block): + """Extract loc_keys used by @block""" + symbols = set() + for instr in block.lines: + if isinstance(instr, AsmRaw): + if isinstance(instr.raw, list): + for expr in instr.raw: + symbols.update(get_expr_locs(expr)) + else: + for arg in instr.args: + symbols.update(get_expr_locs(arg)) + return symbols + + +def assemble_block(mnemo, block, loc_db, conservative=False): + """Assemble a @block using @loc_db + @conservative: (optional) use original bytes when possible + """ + offset_i = 0 + + for instr in block.lines: + if isinstance(instr, AsmRaw): + if isinstance(instr.raw, list): + # Fix special AsmRaw + data = b"" + for expr in instr.raw: + expr_int = fix_expr_val(expr, loc_db) + data += pck[expr_int.size](expr_int.arg) + instr.data = data + + instr.offset = offset_i + offset_i += instr.l + continue + + # Assemble an instruction + saved_args = list(instr.args) + instr.offset = loc_db.get_location_offset(block.loc_key) + offset_i + + # Replace instruction's arguments by resolved ones + instr.args = instr.resolve_args_with_symbols(loc_db) + + if instr.dstflow(): + instr.fixDstOffset() + + old_l = instr.l + cached_candidate, _ = conservative_asm(mnemo, instr, loc_db, + conservative) + + # Restore original arguments + instr.args = saved_args + + # We need to update the block size + block.size = block.size - old_l + len(cached_candidate) + instr.data = cached_candidate + instr.l = len(cached_candidate) + + offset_i += instr.l + + +def asmblock_final(mnemo, asmcfg, blockChains, loc_db, conservative=False): + """Resolve and assemble @blockChains using @loc_db until fixed point is + reached""" + + log_asmblock.debug("asmbloc_final") + + # Init structures + blocks_using_loc_key = {} + for block in asmcfg.blocks: + exprlocs = get_block_loc_keys(block) + loc_keys = set(expr.loc_key for expr in exprlocs) + for loc_key in loc_keys: + blocks_using_loc_key.setdefault(loc_key, set()).add(block) + + block2chain = {} + for chain in blockChains: + for block in chain.blocks: + block2chain[block] = chain + + # Init worklist + blocks_to_rework = set(asmcfg.blocks) + + # Fix and re-assemble blocks until fixed point is reached + while True: + + # Propagate pinned blocks into chains + modified_loc_keys = set() + for chain in blockChains: + chain.fix_blocks(modified_loc_keys) + + for loc_key in modified_loc_keys: + # Retrieve block with modified reference + mod_block = asmcfg.loc_key_to_block(loc_key) + if mod_block is not None: + blocks_to_rework.add(mod_block) + + # Enqueue blocks referencing a modified loc_key + if loc_key not in blocks_using_loc_key: + continue + for block in blocks_using_loc_key[loc_key]: + blocks_to_rework.add(block) + + # No more work + if not blocks_to_rework: + break + + while blocks_to_rework: + block = blocks_to_rework.pop() + assemble_block(mnemo, block, loc_db, conservative) + + +def asmbloc_final(mnemo, blocks, blockChains, loc_db, conservative=False): + """Resolve and assemble @blockChains using @loc_db until fixed point is + reached""" + + warnings.warn('DEPRECATION WARNING: use "asmblock_final" instead of "asmbloc_final"') + asmblock_final(mnemo, blocks, blockChains, loc_db, conservative) + +def asm_resolve_final(mnemo, asmcfg, loc_db, dst_interval=None): + """Resolve and assemble @asmcfg using @loc_db into interval + @dst_interval""" + + asmcfg.sanity_check() + + asmcfg.guess_blocks_size(mnemo) + blockChains = group_constrained_blocks(loc_db, asmcfg) + resolved_blockChains = resolve_symbol( + blockChains, + loc_db, + dst_interval + ) + + asmblock_final(mnemo, asmcfg, resolved_blockChains, loc_db) + patches = {} + output_interval = interval() + + for block in asmcfg.blocks: + offset = loc_db.get_location_offset(block.loc_key) + for instr in block.lines: + if not instr.data: + # Empty line + continue + assert len(instr.data) == instr.l + patches[offset] = instr.data + instruction_interval = interval([(offset, offset + instr.l - 1)]) + if not (instruction_interval & output_interval).empty: + raise RuntimeError("overlapping bytes %X" % int(offset)) + instr.offset = offset + offset += instr.l + return patches + + +class disasmEngine(object): + + """Disassembly engine, taking care of disassembler options and mutli-block + strategy. + + Engine options: + + + Object supporting membership test (offset in ..) + - dont_dis: stop the current disassembly branch if reached + - split_dis: force a basic block end if reached, + with a next constraint on its successor + - dont_dis_retcall_funcs: stop disassembly after a call to one + of the given functions + + + On/Off + - follow_call: recursively disassemble CALL destinations + - dontdis_retcall: stop on CALL return addresses + - dont_dis_nulstart_bloc: stop if a block begin with a few \x00 + + + Number + - lines_wd: maximum block's size (in number of instruction) + - blocs_wd: maximum number of distinct disassembled block + + + callback(arch, attrib, pool_bin, cur_bloc, offsets_to_dis, + loc_db) + - dis_block_callback: callback after each new disassembled block + """ + + def __init__(self, arch, attrib, bin_stream, **kwargs): + """Instantiate a new disassembly engine + @arch: targeted architecture + @attrib: architecture attribute + @bin_stream: bytes source + @kwargs: (optional) custom options + """ + self.arch = arch + self.attrib = attrib + self.bin_stream = bin_stream + self.loc_db = LocationDB() + + # Setup options + self.dont_dis = [] + self.split_dis = [] + self.follow_call = False + self.dontdis_retcall = False + self.lines_wd = None + self.blocs_wd = None + self.dis_block_callback = None + self.dont_dis_nulstart_bloc = False + self.dont_dis_retcall_funcs = set() + + # Override options if needed + self.__dict__.update(kwargs) + + def get_job_done(self): + warnings.warn("""DEPRECATION WARNING: "job_done" is not needed anymore, support is dropped.""") + return set() + + def set_job_done(self, _): + warnings.warn("""DEPRECATION WARNING: "job_done" is not needed anymore, support is dropped.""") + return + + def get_dis_bloc_callback(self): + warnings.warn("""DEPRECATION WARNING: "dis_bloc_callback" use dis_block_callback.""") + return self.dis_block_callback + + def set_dis_bloc_callback(self, function): + warnings.warn("""DEPRECATION WARNING: "dis_bloc_callback" use dis_block_callback.""") + self.dis_block_callback = function + + @property + def symbol_pool(self): + warnings.warn("""DEPRECATION WARNING: use 'loc_db'""") + return self.loc_db + + # Deprecated + job_done = property(get_job_done, set_job_done) + dis_bloc_callback = property(get_dis_bloc_callback, set_dis_bloc_callback) + + def _dis_block(self, offset, job_done=None): + """Disassemble the block at offset @offset + @job_done: a set of already disassembled addresses + Return the created AsmBlock and future offsets to disassemble + """ + + if job_done is None: + job_done = set() + lines_cpt = 0 + in_delayslot = False + delayslot_count = self.arch.delayslot + offsets_to_dis = set() + add_next_offset = False + loc_key = self.loc_db.get_or_create_offset_location(offset) + cur_block = AsmBlock(loc_key) + log_asmblock.debug("dis at %X", int(offset)) + while not in_delayslot or delayslot_count > 0: + if in_delayslot: + delayslot_count -= 1 + + if offset in self.dont_dis: + if not cur_block.lines: + job_done.add(offset) + # Block is empty -> bad block + cur_block = AsmBlockBad(loc_key, errno=AsmBlockBad.ERROR_FORBIDDEN) + else: + # Block is not empty, stop the desassembly pass and add a + # constraint to the next block + loc_key_cst = self.loc_db.get_or_create_offset_location(offset) + cur_block.add_cst(loc_key_cst, AsmConstraint.c_next) + break + + if lines_cpt > 0 and offset in self.split_dis: + loc_key_cst = self.loc_db.get_or_create_offset_location(offset) + cur_block.add_cst(loc_key_cst, AsmConstraint.c_next) + offsets_to_dis.add(offset) + break + + lines_cpt += 1 + if self.lines_wd is not None and lines_cpt > self.lines_wd: + log_asmblock.debug("lines watchdog reached at %X", int(offset)) + break + + if offset in job_done: + loc_key_cst = self.loc_db.get_or_create_offset_location(offset) + cur_block.add_cst(loc_key_cst, AsmConstraint.c_next) + break + + off_i = offset + error = None + try: + instr = self.arch.dis(self.bin_stream, self.attrib, offset) + except Disasm_Exception as e: + log_asmblock.warning(e) + instr = None + error = AsmBlockBad.ERROR_CANNOT_DISASM + except IOError as e: + log_asmblock.warning(e) + instr = None + error = AsmBlockBad.ERROR_IO + + + if instr is None: + log_asmblock.warning("cannot disasm at %X", int(off_i)) + if not cur_block.lines: + job_done.add(offset) + # Block is empty -> bad block + cur_block = AsmBlockBad(loc_key, errno=error) + else: + # Block is not empty, stop the desassembly pass and add a + # constraint to the next block + loc_key_cst = self.loc_db.get_or_create_offset_location(off_i) + cur_block.add_cst(loc_key_cst, AsmConstraint.c_next) + break + + # XXX TODO nul start block option + if (self.dont_dis_nulstart_bloc and + not cur_block.lines and + instr.b.count(b'\x00') == instr.l): + log_asmblock.warning("reach nul instr at %X", int(off_i)) + # Block is empty -> bad block + cur_block = AsmBlockBad(loc_key, errno=AsmBlockBad.ERROR_NULL_STARTING_BLOCK) + break + + # special case: flow graph modificator in delayslot + if in_delayslot and instr and (instr.splitflow() or instr.breakflow()): + add_next_offset = True + break + + job_done.add(offset) + log_asmblock.debug("dis at %X", int(offset)) + + offset += instr.l + log_asmblock.debug(instr) + log_asmblock.debug(instr.args) + + cur_block.addline(instr) + if not instr.breakflow(): + continue + # test split + if instr.splitflow() and not (instr.is_subcall() and self.dontdis_retcall): + add_next_offset = True + if instr.dstflow(): + instr.dstflow2label(self.loc_db) + destinations = instr.getdstflow(self.loc_db) + known_dsts = [] + for dst in destinations: + if not dst.is_loc(): + continue + loc_key = dst.loc_key + loc_key_offset = self.loc_db.get_location_offset(loc_key) + known_dsts.append(loc_key) + if loc_key_offset in self.dont_dis_retcall_funcs: + add_next_offset = False + if (not instr.is_subcall()) or self.follow_call: + cur_block.bto.update([AsmConstraint(loc_key, AsmConstraint.c_to) for loc_key in known_dsts]) + + # get in delayslot mode + in_delayslot = True + delayslot_count = instr.delayslot + + for c in cur_block.bto: + loc_key_offset = self.loc_db.get_location_offset(c.loc_key) + offsets_to_dis.add(loc_key_offset) + + if add_next_offset: + loc_key_cst = self.loc_db.get_or_create_offset_location(offset) + cur_block.add_cst(loc_key_cst, AsmConstraint.c_next) + offsets_to_dis.add(offset) + + # Fix multiple constraints + cur_block.fix_constraints() + + if self.dis_block_callback is not None: + self.dis_block_callback(mn=self.arch, attrib=self.attrib, + pool_bin=self.bin_stream, cur_bloc=cur_block, + offsets_to_dis=offsets_to_dis, + loc_db=self.loc_db, + # Deprecated API + symbol_pool=self.loc_db) + return cur_block, offsets_to_dis + + def dis_block(self, offset): + """Disassemble the block at offset @offset and return the created + AsmBlock + @offset: targeted offset to disassemble + """ + current_block, _ = self._dis_block(offset) + return current_block + + def dis_bloc(self, offset): + """ + DEPRECATED function + Use dis_block instead of dis_bloc + """ + warnings.warn('DEPRECATION WARNING: use "dis_block" instead of "dis_bloc"') + return self.dis_block(offset) + + def dis_multiblock(self, offset, blocks=None): + """Disassemble every block reachable from @offset regarding + specific disasmEngine conditions + Return an AsmCFG instance containing disassembled blocks + @offset: starting offset + @blocks: (optional) AsmCFG instance of already disassembled blocks to + merge with + """ + log_asmblock.info("dis bloc all") + job_done = set() + if blocks is None: + blocks = AsmCFG(self.loc_db) + todo = [offset] + + bloc_cpt = 0 + while len(todo): + bloc_cpt += 1 + if self.blocs_wd is not None and bloc_cpt > self.blocs_wd: + log_asmblock.debug("blocks watchdog reached at %X", int(offset)) + break + + target_offset = int(todo.pop(0)) + if (target_offset is None or + target_offset in job_done): + continue + cur_block, nexts = self._dis_block(target_offset, job_done) + todo += nexts + blocks.add_block(cur_block) + + blocks.apply_splitting(self.loc_db, + dis_block_callback=self.dis_block_callback, + mn=self.arch, attrib=self.attrib, + pool_bin=self.bin_stream) + return blocks + + def dis_multibloc(self, offset, blocs=None): + """ + DEPRECATED function + Use dis_multiblock instead of dis_multibloc + """ + warnings.warn('DEPRECATION WARNING: use "dis_multiblock" instead of "dis_multibloc"') + return self.dis_multiblock(offset, blocs) + + def dis_instr(self, offset): + """Disassemble one instruction at offset @offset and return the + corresponding instruction instance + @offset: targeted offset to disassemble + """ + old_lineswd = self.lines_wd + self.lines_wd = 1 + try: + block = self.dis_block(offset) + finally: + self.lines_wd = old_lineswd + + instr = block.lines[0] + return instr diff --git a/miasm/core/bin_stream.py b/miasm/core/bin_stream.py new file mode 100644 index 00000000..727a853d --- /dev/null +++ b/miasm/core/bin_stream.py @@ -0,0 +1,316 @@ +# +# Copyright (C) 2011 EADS France, Fabrice Desclaux +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# + +from builtins import str +from future.utils import PY3 + +from miasm.core.utils import BIG_ENDIAN, LITTLE_ENDIAN +from miasm.core.utils import upck8le, upck16le, upck32le, upck64le +from miasm.core.utils import upck8be, upck16be, upck32be, upck64be + + +class bin_stream(object): + + # Cache must be initialized by entering atomic mode + _cache = None + CACHE_SIZE = 10000 + # By default, no atomic mode + _atomic_mode = False + + def __init__(self, *args, **kargs): + self.endianness = LITTLE_ENDIAN + + def __repr__(self): + return "<%s !!>" % self.__class__.__name__ + + def __str__(self): + if PY3: + return repr(self) + return self.__bytes__() + + def hexdump(self, offset, l): + return + + def enter_atomic_mode(self): + """Enter atomic mode. In this mode, read may be cached""" + assert not self._atomic_mode + self._atomic_mode = True + self._cache = {} + + def leave_atomic_mode(self): + """Leave atomic mode""" + assert self._atomic_mode + self._atomic_mode = False + self._cache = None + + def _getbytes(self, start, length): + return self.bin[start:start + length] + + def getbytes(self, start, l=1): + """Return the bytes from the bit stream + @start: starting offset (in byte) + @l: (optional) number of bytes to read + + Wrapper on _getbytes, with atomic mode handling. + """ + if self._atomic_mode: + val = self._cache.get((start,l), None) + if val is None: + val = self._getbytes(start, l) + self._cache[(start,l)] = val + else: + val = self._getbytes(start, l) + return val + + def getbits(self, start, n): + """Return the bits from the bit stream + @start: the offset in bits + @n: number of bits to read + """ + # Trivial case + if n == 0: + return 0 + + # Get initial bytes + if n > self.getlen() * 8: + raise IOError('not enough bits %r %r' % (n, len(self.bin) * 8)) + byte_start = start // 8 + byte_stop = (start + n + 7) // 8 + temp = self.getbytes(byte_start, byte_stop - byte_start) + if not temp: + raise IOError('cannot get bytes') + + # Init + start = start % 8 + out = 0 + while n: + # Get needed bits, working on maximum 8 bits at a time + cur_byte_idx = start // 8 + new_bits = ord(temp[cur_byte_idx:cur_byte_idx + 1]) + to_keep = 8 - start % 8 + new_bits &= (1 << to_keep) - 1 + cur_len = min(to_keep, n) + new_bits >>= (to_keep - cur_len) + + # Update output + out <<= cur_len + out |= new_bits + + # Update counters + n -= cur_len + start += cur_len + return out + + def get_u8(self, addr, endianness=None): + """ + Return u8 from address @addr + endianness: Optional: LITTLE_ENDIAN/BIG_ENDIAN + """ + if endianness is None: + endianness = self.endianness + data = self.getbytes(addr, 1) + return data + + def get_u16(self, addr, endianness=None): + """ + Return u16 from address @addr + endianness: Optional: LITTLE_ENDIAN/BIG_ENDIAN + """ + if endianness is None: + endianness = self.endianness + data = self.getbytes(addr, 2) + if endianness == LITTLE_ENDIAN: + return upck16le(data) + else: + return upck32be(data) + + def get_u32(self, addr, endianness=None): + """ + Return u32 from address @addr + endianness: Optional: LITTLE_ENDIAN/BIG_ENDIAN + """ + if endianness is None: + endianness = self.endianness + data = self.getbytes(addr, 4) + if endianness == LITTLE_ENDIAN: + return upck32le(data) + else: + return upck32be(data) + + def get_u64(self, addr, endianness=None): + """ + Return u64 from address @addr + endianness: Optional: LITTLE_ENDIAN/BIG_ENDIAN + """ + if endianness is None: + endianness = self.endianness + data = self.getbytes(addr, 8) + if endianness == LITTLE_ENDIAN: + return upck64le(data) + else: + return upck64be(data) + + +class bin_stream_str(bin_stream): + + def __init__(self, input_str=b"", offset=0, base_address=0, shift=None): + bin_stream.__init__(self) + if shift is not None: + raise DeprecationWarning("use base_address instead of shift") + self.bin = input_str + self.offset = offset + self.base_address = base_address + self.l = len(input_str) + + def _getbytes(self, start, l=1): + if start + l - self.base_address > self.l: + raise IOError("not enough bytes in str") + if start - self.base_address < 0: + raise IOError("Negative offset") + + return super(bin_stream_str, self)._getbytes(start - self.base_address, l) + + def readbs(self, l=1): + if self.offset + l - self.base_address > self.l: + raise IOError("not enough bytes in str") + if self.offset - self.base_address < 0: + raise IOError("Negative offset") + self.offset += l + return self.bin[self.offset - l - self.base_address:self.offset - self.base_address] + + def __bytes__(self): + return self.bin[self.offset - self.base_address:] + + def setoffset(self, val): + self.offset = val + + def getlen(self): + return self.l - (self.offset - self.base_address) + + +class bin_stream_file(bin_stream): + + def __init__(self, binary, offset=0, base_address=0, shift=None): + bin_stream.__init__(self) + if shift is not None: + raise DeprecationWarning("use base_address instead of shift") + self.bin = binary + self.bin.seek(0, 2) + self.base_address = base_address + self.l = self.bin.tell() + self.offset = offset + + def getoffset(self): + return self.bin.tell() + self.base_address + + def setoffset(self, val): + self.bin.seek(val - self.base_address) + offset = property(getoffset, setoffset) + + def readbs(self, l=1): + if self.offset + l - self.base_address > self.l: + raise IOError("not enough bytes in file") + if self.offset - self.base_address < 0: + raise IOError("Negative offset") + return self.bin.read(l) + + def __bytes__(self): + return self.bin.read() + + def getlen(self): + return self.l - (self.offset - self.base_address) + + +class bin_stream_container(bin_stream): + + def __init__(self, binary, offset=0): + bin_stream.__init__(self) + self.bin = binary + self.l = binary.virt.max_addr() + self.offset = offset + + def is_addr_in(self, ad): + return self.bin.virt.is_addr_in(ad) + + def getlen(self): + return self.l + + def readbs(self, l=1): + if self.offset + l > self.l: + raise IOError("not enough bytes") + if self.offset < 0: + raise IOError("Negative offset") + self.offset += l + return self.bin.virt.get(self.offset - l, self.offset) + + def _getbytes(self, start, l=1): + try: + return self.bin.virt.get(start, start + l) + except ValueError: + raise IOError("cannot get bytes") + + def __bytes__(self): + return self.bin.virt.get(self.offset, self.offset + self.l) + + def setoffset(self, val): + self.offset = val + + +class bin_stream_pe(bin_stream_container): + def __init__(self, binary, *args, **kwargs): + super(bin_stream_pe, self).__init__(binary, *args, **kwargs) + self.endianness = binary._sex + + +class bin_stream_elf(bin_stream_container): + def __init__(self, binary, *args, **kwargs): + super(bin_stream_elf, self).__init__(binary, *args, **kwargs) + self.endianness = binary.sex + + +class bin_stream_vm(bin_stream): + + def __init__(self, vm, offset=0, base_offset=0): + self.offset = offset + self.base_offset = base_offset + self.vm = vm + if self.vm.is_little_endian(): + self.endianness = LITTLE_ENDIAN + else: + self.endianness = BIG_ENDIAN + + def getlen(self): + return 0xFFFFFFFFFFFFFFFF + + def _getbytes(self, start, l=1): + try: + s = self.vm.get_mem(start + self.base_offset, l) + except: + raise IOError('cannot get mem ad', hex(start)) + return s + + def readbs(self, l=1): + try: + s = self.vm.get_mem(self.offset + self.base_offset, l) + except: + raise IOError('cannot get mem ad', hex(self.offset)) + self.offset += l + return s + + def setoffset(self, val): + self.offset = val diff --git a/miasm/core/bin_stream_ida.py b/miasm/core/bin_stream_ida.py new file mode 100644 index 00000000..e0fab297 --- /dev/null +++ b/miasm/core/bin_stream_ida.py @@ -0,0 +1,45 @@ +from builtins import range +from idc import Byte, SegEnd +from idautils import Segments +from idaapi import is_mapped + +from miasm.core.utils import int_to_byte +from miasm.core.bin_stream import bin_stream_str + + +class bin_stream_ida(bin_stream_str): + """ + bin_stream implementation for IDA + + Don't generate xrange using address computation: + It can raise error on overflow 7FFFFFFF with 32 bit python + """ + def _getbytes(self, start, l=1): + out = [] + for ad in range(l): + offset = ad + start + self.base_address + if not is_mapped(offset): + raise IOError("not enough bytes") + out.append(int_to_byte(Byte(offset))) + return b''.join(out) + + def readbs(self, l=1): + if self.offset + l > self.l: + raise IOError("not enough bytes") + content = self.getbytes(self.offset) + self.offset += l + return content + + def __str__(self): + raise NotImplementedError('Not fully functional') + + def setoffset(self, val): + self.offset = val + + def getlen(self): + # Lazy version + if hasattr(self, "_getlen"): + return self._getlen + max_addr = SegEnd(list(Segments())[-1] - (self.offset - self.base_address)) + self._getlen = max_addr + return max_addr diff --git a/miasm/core/cpu.py b/miasm/core/cpu.py new file mode 100644 index 00000000..425f3aff --- /dev/null +++ b/miasm/core/cpu.py @@ -0,0 +1,1713 @@ +#-*- coding:utf-8 -*- + +from builtins import range +import re +import struct +import logging +from collections import defaultdict + + +from future.utils import viewitems, viewvalues + +import pyparsing + +from miasm.core.utils import decode_hex +import miasm.expression.expression as m2_expr +from miasm.core.bin_stream import bin_stream, bin_stream_str +from miasm.core.utils import Disasm_Exception +from miasm.expression.simplifications import expr_simp +from miasm.core.locationdb import LocationDB + + +from miasm.core.asm_ast import AstNode, AstInt, AstId, AstOp +from future.utils import with_metaclass + +log = logging.getLogger("cpuhelper") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.WARN) + + +class bitobj(object): + + def __init__(self, s=b""): + if not s: + bits = [] + else: + bits = [int(x) for x in bin(int(encode_hex(s), 16))[2:]] + if len(bits) % 8: + bits = [0 for x in range(8 - (len(bits) % 8))] + bits + self.bits = bits + self.offset = 0 + + def __len__(self): + return len(self.bits) - self.offset + + def getbits(self, n): + if not n: + return 0 + if n > len(self.bits) - self.offset: + raise ValueError('not enough bits %r %r' % (n, len(self.bits))) + b = self.bits[self.offset:self.offset + n] + b = int("".join(str(x) for x in b), 2) + self.offset += n + return b + + def putbits(self, b, n): + if not n: + return + bits = list(bin(b)[2:]) + bits = [int(x) for x in bits] + bits = [0 for x in range(n - len(bits))] + bits + self.bits += bits + + def tostring(self): + if len(self.bits) % 8: + raise ValueError( + 'num bits must be 8 bit aligned: %d' % len(self.bits) + ) + b = int("".join(str(x) for x in self.bits), 2) + b = "%X" % b + b = '0' * (len(self.bits) // 4 - len(b)) + b + b = decode_hex(b.encode()) + return b + + def reset(self): + self.offset = 0 + + def copy_state(self): + b = self.__class__() + b.bits = self.bits + b.offset = self.offset + return b + + +def literal_list(l): + l = l[:] + l.sort() + l = l[::-1] + o = pyparsing.Literal(l[0]) + for x in l[1:]: + o |= pyparsing.Literal(x) + return o + + +class reg_info(object): + + def __init__(self, reg_str, reg_expr): + self.str = reg_str + self.expr = reg_expr + self.parser = literal_list(reg_str).setParseAction(self.cb_parse) + + def cb_parse(self, tokens): + assert len(tokens) == 1 + i = self.str.index(tokens[0]) + reg = self.expr[i] + result = AstId(reg) + return result + + def reg2expr(self, s): + i = self.str.index(s[0]) + return self.expr[i] + + def expr2regi(self, e): + return self.expr.index(e) + + +class reg_info_dct(object): + + def __init__(self, reg_expr): + self.dct_str_inv = dict((v.name, k) for k, v in viewitems(reg_expr)) + self.dct_expr = reg_expr + self.dct_expr_inv = dict((v, k) for k, v in viewitems(reg_expr)) + reg_str = [v.name for v in viewvalues(reg_expr)] + self.parser = literal_list(reg_str).setParseAction(self.cb_parse) + + def cb_parse(self, tokens): + assert len(tokens) == 1 + i = self.dct_str_inv[tokens[0]] + reg = self.dct_expr[i] + result = AstId(reg) + return result + + def reg2expr(self, s): + i = self.dct_str_inv[s[0]] + return self.dct_expr[i] + + def expr2regi(self, e): + return self.dct_expr_inv[e] + + +def gen_reg(reg_name, sz=32): + """Gen reg expr and parser""" + reg = m2_expr.ExprId(reg_name, sz) + reginfo = reg_info([reg_name], [reg]) + return reg, reginfo + + +def gen_reg_bs(reg_name, reg_info, base_cls): + """ + Generate: + class bs_reg_name(base_cls): + reg = reg_info + + bs_reg_name = bs(l=0, cls=(bs_reg_name,)) + """ + + bs_name = "bs_%s" % reg_name + cls = type(bs_name, base_cls, {'reg': reg_info}) + + bs_obj = bs(l=0, cls=(cls,)) + + return cls, bs_obj + + +def gen_regs(rnames, env, sz=32): + regs_str = [] + regs_expr = [] + regs_init = [] + for rname in rnames: + r = m2_expr.ExprId(rname, sz) + r_init = m2_expr.ExprId(rname+'_init', sz) + regs_str.append(rname) + regs_expr.append(r) + regs_init.append(r_init) + env[rname] = r + + reginfo = reg_info(regs_str, regs_expr) + return regs_expr, regs_init, reginfo + + +LPARENTHESIS = pyparsing.Literal("(") +RPARENTHESIS = pyparsing.Literal(")") + + +def int2expr(tokens): + v = tokens[0] + return (m2_expr.ExprInt, v) + + +def parse_op(tokens): + v = tokens[0] + return (m2_expr.ExprOp, v) + + +def parse_id(tokens): + v = tokens[0] + return (m2_expr.ExprId, v) + + +def ast_parse_op(tokens): + if len(tokens) == 1: + return tokens[0] + if len(tokens) == 2: + if tokens[0] in ['-', '+', '!']: + return m2_expr.ExprOp(tokens[0], tokens[1]) + if len(tokens) == 3: + if tokens[1] == '-': + # a - b => a + (-b) + tokens[1] = '+' + tokens[2] = - tokens[2] + return m2_expr.ExprOp(tokens[1], tokens[0], tokens[2]) + tokens = tokens[::-1] + while len(tokens) >= 3: + o1, op, o2 = tokens.pop(), tokens.pop(), tokens.pop() + if op == '-': + # a - b => a + (-b) + op = '+' + o2 = - o2 + e = m2_expr.ExprOp(op, o1, o2) + tokens.append(e) + if len(tokens) != 1: + raise NotImplementedError('strange op') + return tokens[0] + + +def ast_id2expr(a): + return m2_expr.ExprId(a, 32) + + +def ast_int2expr(a): + return m2_expr.ExprInt(a, 32) + + +def neg_int(tokens): + x = -tokens[0] + return x + + +integer = pyparsing.Word(pyparsing.nums).setParseAction(lambda tokens: int(tokens[0])) +hex_word = pyparsing.Literal('0x') + pyparsing.Word(pyparsing.hexnums) +hex_int = pyparsing.Combine(hex_word).setParseAction(lambda tokens: int(tokens[0], 16)) + +# str_int = (Optional('-') + (hex_int | integer)) +str_int_pos = (hex_int | integer) +str_int_neg = (pyparsing.Suppress('-') + \ + (hex_int | integer)).setParseAction(neg_int) + +str_int = str_int_pos | str_int_neg +str_int.setParseAction(int2expr) + +logicop = pyparsing.oneOf('& | ^ >> << <<< >>>') +signop = pyparsing.oneOf('+ -') +multop = pyparsing.oneOf('* / %') +plusop = pyparsing.oneOf('+ -') + + +########################## + +def literal_list(l): + l = l[:] + l.sort() + l = l[::-1] + o = pyparsing.Literal(l[0]) + for x in l[1:]: + o |= pyparsing.Literal(x) + return o + + +def cb_int(tokens): + assert len(tokens) == 1 + integer = AstInt(tokens[0]) + return integer + + +def cb_parse_id(tokens): + assert len(tokens) == 1 + reg = tokens[0] + return AstId(reg) + + +def cb_op_not(tokens): + tokens = tokens[0] + assert len(tokens) == 2 + assert tokens[0] == "!" + result = AstOp("!", tokens[1]) + return result + + +def merge_ops(tokens, op): + args = [] + if len(tokens) >= 3: + args = [tokens.pop(0)] + i = 0 + while i < len(tokens): + op_tmp = tokens[i] + arg = tokens[i+1] + i += 2 + if op_tmp != op: + raise ValueError("Bad operator") + args.append(arg) + result = AstOp(op, *args) + return result + + +def cb_op_and(tokens): + result = merge_ops(tokens[0], "&") + return result + + +def cb_op_xor(tokens): + result = merge_ops(tokens[0], "^") + return result + + +def cb_op_sign(tokens): + assert len(tokens) == 1 + op, value = tokens[0] + return -value + + +def cb_op_div(tokens): + tokens = tokens[0] + assert len(tokens) == 3 + assert tokens[1] == "/" + result = AstOp("/", tokens[0], tokens[2]) + return result + + +def cb_op_plusminus(tokens): + tokens = tokens[0] + if len(tokens) == 3: + # binary op + assert isinstance(tokens[0], AstNode) + assert isinstance(tokens[2], AstNode) + op, args = tokens[1], [tokens[0], tokens[2]] + elif len(tokens) > 3: + args = [tokens.pop(0)] + i = 0 + while i < len(tokens): + op = tokens[i] + arg = tokens[i+1] + i += 2 + if op == '-': + arg = -arg + elif op == '+': + pass + else: + raise ValueError("Bad operator") + args.append(arg) + op = '+' + else: + raise ValueError("Parsing error") + assert all(isinstance(arg, AstNode) for arg in args) + result = AstOp(op, *args) + return result + + +def cb_op_mul(tokens): + tokens = tokens[0] + assert len(tokens) == 3 + assert isinstance(tokens[0], AstNode) + assert isinstance(tokens[2], AstNode) + + # binary op + op, args = tokens[1], [tokens[0], tokens[2]] + result = AstOp(op, *args) + return result + + +integer = pyparsing.Word(pyparsing.nums).setParseAction(lambda tokens: int(tokens[0])) +hex_word = pyparsing.Literal('0x') + pyparsing.Word(pyparsing.hexnums) +hex_int = pyparsing.Combine(hex_word).setParseAction(lambda tokens: int(tokens[0], 16)) + +str_int_pos = (hex_int | integer) + +str_int = str_int_pos +str_int.setParseAction(cb_int) + +notop = pyparsing.oneOf('!') +andop = pyparsing.oneOf('&') +orop = pyparsing.oneOf('|') +xorop = pyparsing.oneOf('^') +shiftop = pyparsing.oneOf('>> <<') +rotop = pyparsing.oneOf('<<< >>>') +signop = pyparsing.oneOf('+ -') +mulop = pyparsing.oneOf('*') +plusop = pyparsing.oneOf('+ -') +divop = pyparsing.oneOf('/') + + +variable = pyparsing.Word(pyparsing.alphas + "_$.", pyparsing.alphanums + "_") +variable.setParseAction(cb_parse_id) +operand = str_int | variable + +base_expr = pyparsing.operatorPrecedence(operand, + [(notop, 1, pyparsing.opAssoc.RIGHT, cb_op_not), + (andop, 2, pyparsing.opAssoc.RIGHT, cb_op_and), + (xorop, 2, pyparsing.opAssoc.RIGHT, cb_op_xor), + (signop, 1, pyparsing.opAssoc.RIGHT, cb_op_sign), + (mulop, 2, pyparsing.opAssoc.RIGHT, cb_op_mul), + (divop, 2, pyparsing.opAssoc.RIGHT, cb_op_div), + (plusop, 2, pyparsing.opAssoc.LEFT, cb_op_plusminus), + ]) + + +default_prio = 0x1337 + + +def isbin(s): + return re.match('[0-1]+$', s) + + +def int2bin(i, l): + s = '0' * l + bin(i)[2:] + return s[-l:] + + +def myror32(v, r): + return ((v & 0xFFFFFFFF) >> r) | ((v << (32 - r)) & 0xFFFFFFFF) + + +def myrol32(v, r): + return ((v & 0xFFFFFFFF) >> (32 - r)) | ((v << r) & 0xFFFFFFFF) + + +class bs(object): + all_new_c = {} + prio = default_prio + + def __init__(self, strbits=None, l=None, cls=None, + fname=None, order=0, flen=None, **kargs): + if fname is None: + fname = hex(id(str((strbits, l, cls, fname, order, flen, kargs)))) + if strbits is None: + strbits = "" # "X"*l + elif l is None: + l = len(strbits) + if strbits and isbin(strbits): + value = int(strbits, 2) + elif 'default_val' in kargs: + value = int(kargs['default_val'], 2) + else: + value = None + allbits = list(strbits) + allbits.reverse() + fbits = 0 + fmask = 0 + while allbits: + a = allbits.pop() + if a == " ": + continue + fbits <<= 1 + fmask <<= 1 + if a in '01': + a = int(a) + fbits |= a + fmask |= 1 + lmask = (1 << l) - 1 + # gen conditional field + if cls: + for b in cls: + if 'flen' in b.__dict__: + flen = getattr(b, 'flen') + + self.strbits = strbits + self.l = l + self.cls = cls + self.fname = fname + self.order = order + self.fbits = fbits + self.fmask = fmask + self.flen = flen + self.value = value + self.kargs = kargs + + lmask = property(lambda self:(1 << self.l) - 1) + + def __getitem__(self, item): + return getattr(self, item) + + def __repr__(self): + o = self.__class__.__name__ + if self.fname: + o += "_%s" % self.fname + o += "_%(strbits)s" % self + if self.cls: + o += '_' + '_'.join([x.__name__ for x in self.cls]) + return o + + def gen(self, parent): + c_name = 'nbsi' + if self.cls: + c_name += '_' + '_'.join([x.__name__ for x in self.cls]) + bases = list(self.cls) + else: + bases = [] + # bsi added at end of list + # used to use first function of added class + bases += [bsi] + k = c_name, tuple(bases) + if k in self.all_new_c: + new_c = self.all_new_c[k] + else: + new_c = type(c_name, tuple(bases), {}) + self.all_new_c[k] = new_c + c = new_c(parent, + self.strbits, self.l, self.cls, + self.fname, self.order, self.lmask, self.fbits, + self.fmask, self.value, self.flen, **self.kargs) + return c + + def check_fbits(self, v): + return v & self.fmask == self.fbits + + @classmethod + def flen(cls, v): + raise NotImplementedError('not fully functional') + + +class dum_arg(object): + + def __init__(self, e=None): + self.expr = e + + +class bsopt(bs): + + def ispresent(self): + return True + + +class bsi(object): + + def __init__(self, parent, strbits, l, cls, fname, order, + lmask, fbits, fmask, value, flen, **kargs): + self.parent = parent + self.strbits = strbits + self.l = l + self.cls = cls + self.fname = fname + self.order = order + self.fbits = fbits + self.fmask = fmask + self.flen = flen + self.value = value + self.kargs = kargs + self.__dict__.update(self.kargs) + + lmask = property(lambda self:(1 << self.l) - 1) + + def decode(self, v): + self.value = v & self.lmask + return True + + def encode(self): + return True + + def clone(self): + s = self.__class__(self.parent, + self.strbits, self.l, self.cls, + self.fname, self.order, self.lmask, self.fbits, + self.fmask, self.value, self.flen, **self.kargs) + s.__dict__.update(self.kargs) + if hasattr(self, 'expr'): + s.expr = self.expr + return s + + def __hash__(self): + kargs = [] + for k, v in list(viewitems(self.kargs)): + if isinstance(v, list): + v = tuple(v) + kargs.append((k, v)) + l = [self.strbits, self.l, self.cls, + self.fname, self.order, self.lmask, self.fbits, + self.fmask, self.value] # + kargs + + return hash(tuple(l)) + + +class bs_divert(object): + prio = default_prio + + def __init__(self, **kargs): + self.args = kargs + + def __getattr__(self, item): + if item in self.__dict__: + return self.__dict__[item] + elif item in self.args: + return self.args.get(item) + else: + raise AttributeError + + +class bs_name(bs_divert): + prio = 1 + + def divert(self, i, candidates): + out = [] + for cls, _, bases, dct, fields in candidates: + for new_name, value in viewitems(self.args['name']): + nfields = fields[:] + s = int2bin(value, self.args['l']) + args = dict(self.args) + args.update({'strbits': s}) + f = bs(**args) + nfields[i] = f + ndct = dict(dct) + ndct['name'] = new_name + out.append((cls, new_name, bases, ndct, nfields)) + return out + + +class bs_mod_name(bs_divert): + prio = 2 + + def divert(self, i, candidates): + out = [] + for cls, _, bases, dct, fields in candidates: + tab = self.args['mn_mod'] + if isinstance(tab, list): + tmp = {} + for j, v in enumerate(tab): + tmp[j] = v + tab = tmp + for value, new_name in viewitems(tab): + nfields = fields[:] + s = int2bin(value, self.args['l']) + args = dict(self.args) + args.update({'strbits': s}) + f = bs(**args) + nfields[i] = f + ndct = dict(dct) + ndct['name'] = self.modname(ndct['name'], value) + out.append((cls, new_name, bases, ndct, nfields)) + return out + + def modname(self, name, i): + return name + self.args['mn_mod'][i] + + +class bs_cond(bsi): + pass + + +class bs_swapargs(bs_divert): + + def divert(self, i, candidates): + out = [] + for cls, name, bases, dct, fields in candidates: + # args not permuted + ndct = dict(dct) + nfields = fields[:] + # gen fix field + f = gen_bsint(0, self.args['l'], self.args) + nfields[i] = f + out.append((cls, name, bases, ndct, nfields)) + + # args permuted + ndct = dict(dct) + nfields = fields[:] + ap = ndct['args_permut'][:] + a = ap.pop(0) + b = ap.pop(0) + ndct['args_permut'] = [b, a] + ap + # gen fix field + f = gen_bsint(1, self.args['l'], self.args) + nfields[i] = f + + out.append((cls, name, bases, ndct, nfields)) + return out + + +class m_arg(object): + + def fromstring(self, text, loc_db, parser_result=None): + if parser_result: + e, start, stop = parser_result[self.parser] + self.expr = e + return start, stop + try: + v, start, stop = next(self.parser.scanString(text)) + except StopIteration: + return None, None + arg = v[0] + expr = self.asm_ast_to_expr(arg, loc_db) + self.expr = expr + return start, stop + + def asm_ast_to_expr(self, arg, loc_db, **kwargs): + raise NotImplementedError("Virtual") + + +class m_reg(m_arg): + prio = default_prio + + @property + def parser(self): + return self.reg.parser + + def decode(self, v): + self.expr = self.reg.expr[0] + return True + + def encode(self): + return self.expr == self.reg.expr[0] + + +class reg_noarg(object): + reg_info = None + parser = None + + def fromstring(self, text, loc_db, parser_result=None): + if parser_result: + e, start, stop = parser_result[self.parser] + self.expr = e + return start, stop + try: + v, start, stop = next(self.parser.scanString(text)) + except StopIteration: + return None, None + arg = v[0] + expr = self.parses_to_expr(arg, loc_db) + self.expr = expr + return start, stop + + def decode(self, v): + v = v & self.lmask + if v >= len(self.reg_info.expr): + return False + self.expr = self.reg_info.expr[v] + return True + + def encode(self): + if not self.expr in self.reg_info.expr: + log.debug("cannot encode reg %r", self.expr) + return False + self.value = self.reg_info.expr.index(self.expr) + if self.value > self.lmask: + log.debug("cannot encode field value %x %x", + self.value, self.lmask) + return False + return True + + def check_fbits(self, v): + return v & self.fmask == self.fbits + + +class mn_prefix(object): + pass + + +def swap16(v): + return struct.unpack('H', v))[0] + + +def swap32(v): + return struct.unpack('I', v))[0] + + +def perm_inv(p): + o = [None for x in range(len(p))] + for i, x in enumerate(p): + o[x] = i + return o + + +def gen_bsint(value, l, args): + s = int2bin(value, l) + args = dict(args) + args.update({'strbits': s}) + f = bs(**args) + return f + +total_scans = 0 + + +def branch2nodes(branch, nodes=None): + if nodes is None: + nodes = [] + for k, v in viewitems(branch): + if not isinstance(v, dict): + continue + for k2 in v: + nodes.append((k, k2)) + branch2nodes(v, nodes) + + +def factor_one_bit(tree): + if isinstance(tree, set): + return tree + new_keys = defaultdict(lambda: defaultdict(dict)) + if len(tree) == 1: + return tree + for k, v in viewitems(tree): + if k == "mn": + new_keys[k] = v + continue + l, fmask, fbits, fname, flen = k + if flen is not None or l <= 1: + new_keys[k] = v + continue + cfmask = fmask >> (l - 1) + nfmask = fmask & ((1 << (l - 1)) - 1) + cfbits = fbits >> (l - 1) + nfbits = fbits & ((1 << (l - 1)) - 1) + ck = 1, cfmask, cfbits, None, flen + nk = l - 1, nfmask, nfbits, fname, flen + if nk in new_keys[ck]: + raise NotImplementedError('not fully functional') + new_keys[ck][nk] = v + for k, v in list(viewitems(new_keys)): + new_keys[k] = factor_one_bit(v) + # try factor sons + if len(new_keys) != 1: + return new_keys + subtree = next(iter(viewvalues(new_keys))) + if len(subtree) != 1: + return new_keys + if next(iter(subtree)) == 'mn': + return new_keys + + return new_keys + + +def factor_fields(tree): + if not isinstance(tree, dict): + return tree + if len(tree) != 1: + return tree + # merge + k1, v1 = next(iter(viewitems(tree))) + if k1 == "mn": + return tree + l1, fmask1, fbits1, fname1, flen1 = k1 + if fname1 is not None: + return tree + if flen1 is not None: + return tree + + if not isinstance(v1, dict): + return tree + if len(v1) != 1: + return tree + k2, v2 = next(iter(viewitems(v1))) + if k2 == "mn": + return tree + l2, fmask2, fbits2, fname2, flen2 = k2 + if fname2 is not None: + return tree + if flen2 is not None: + return tree + l = l1 + l2 + fmask = (fmask1 << l2) | fmask2 + fbits = (fbits1 << l2) | fbits2 + fname = fname2 + flen = flen2 + k = l, fmask, fbits, fname, flen + new_keys = {k: v2} + return new_keys + + +def factor_fields_all(tree): + if not isinstance(tree, dict): + return tree + new_keys = {} + for k, v in viewitems(tree): + v = factor_fields(v) + new_keys[k] = factor_fields_all(v) + return new_keys + + +def graph_tree(tree): + nodes = [] + branch2nodes(tree, nodes) + + out = """ + digraph G { + """ + for a, b in nodes: + if b == 'mn': + continue + out += "%s -> %s;\n" % (id(a), id(b)) + out += "}" + open('graph.txt', 'w').write(out) + + +def add_candidate_to_tree(tree, c): + branch = tree + for f in c.fields: + if f.l == 0: + continue + node = f.l, f.fmask, f.fbits, f.fname, f.flen + + if not node in branch: + branch[node] = {} + branch = branch[node] + if not 'mn' in branch: + branch['mn'] = set() + branch['mn'].add(c) + + +def add_candidate(bases, c): + add_candidate_to_tree(bases[0].bintree, c) + + +def getfieldby_name(fields, fname): + f = [x for x in fields if hasattr(x, 'fname') and x.fname == fname] + if len(f) != 1: + raise ValueError('more than one field with name: %s' % fname) + return f[0] + + +def getfieldindexby_name(fields, fname): + for i, f in enumerate(fields): + if hasattr(f, 'fname') and f.fname == fname: + return f, i + return None + + +class metamn(type): + + def __new__(mcs, name, bases, dct): + if name == "cls_mn" or name.startswith('mn_'): + return type.__new__(mcs, name, bases, dct) + alias = dct.get('alias', False) + + fields = bases[0].mod_fields(dct['fields']) + if not 'name' in dct: + dct["name"] = bases[0].getmn(name) + if 'args' in dct: + # special case for permuted arguments + o = [] + p = [] + for i, a in enumerate(dct['args']): + o.append((i, a)) + if a in fields: + p.append((fields.index(a), a)) + p.sort() + p = [x[1] for x in p] + p = [dct['args'].index(x) for x in p] + dct['args_permut'] = perm_inv(p) + # order fields + f_ordered = [x for x in enumerate(fields)] + f_ordered.sort(key=lambda x: (x[1].prio, x[0])) + candidates = bases[0].gen_modes(mcs, name, bases, dct, fields) + for i, fc in f_ordered: + if isinstance(fc, bs_divert): + candidates = fc.divert(i, candidates) + for cls, name, bases, dct, fields in candidates: + ndct = dict(dct) + fields = [f for f in fields if f] + ndct['fields'] = fields + ndct['mn_len'] = sum([x.l for x in fields]) + c = type.__new__(cls, name, bases, ndct) + c.alias = alias + c.check_mnemo(fields) + c.num = bases[0].num + bases[0].num += 1 + bases[0].all_mn.append(c) + mode = dct['mode'] + bases[0].all_mn_mode[mode].append(c) + bases[0].all_mn_name[c.name].append(c) + i = c() + i.init_class() + bases[0].all_mn_inst[c].append(i) + add_candidate(bases, c) + # gen byte lookup + o = "" + for f in i.fields_order: + if not isinstance(f, bsi): + raise ValueError('f is not bsi') + if f.l == 0: + continue + o += f.strbits + return c + + +class instruction(object): + __slots__ = ["name", "mode", "args", + "l", "b", "offset", "data", + "additional_info", "delayslot"] + + def __init__(self, name, mode, args, additional_info=None): + self.name = name + self.mode = mode + self.args = args + self.additional_info = additional_info + self.offset = None + self.l = None + self.b = None + + def gen_args(self, args): + out = ', '.join([str(x) for x in args]) + return out + + def __str__(self): + return self.to_string() + + def to_string(self, loc_db=None): + o = "%-10s " % self.name + args = [] + for i, arg in enumerate(self.args): + if not isinstance(arg, m2_expr.Expr): + raise ValueError('zarb arg type') + x = self.arg2str(arg, i, loc_db) + args.append(x) + o += self.gen_args(args) + return o + + def get_asm_offset(self, expr): + return m2_expr.ExprInt(self.offset, expr.size) + + def get_asm_next_offset(self, expr): + return m2_expr.ExprInt(self.offset+self.l, expr.size) + + def resolve_args_with_symbols(self, symbols=None): + if symbols is None: + symbols = LocationDB() + args_out = [] + for expr in self.args: + # try to resolve symbols using symbols (0 for default value) + loc_keys = m2_expr.get_expr_locs(expr) + fixed_expr = {} + for exprloc in loc_keys: + loc_key = exprloc.loc_key + names = symbols.get_location_names(loc_key) + # special symbols + if b'$' in names: + fixed_expr[exprloc] = self.get_asm_offset(exprloc) + continue + if b'_' in names: + fixed_expr[exprloc] = self.get_asm_next_offset(exprloc) + continue + arg_int = symbols.get_location_offset(loc_key) + if arg_int is not None: + fixed_expr[exprloc] = m2_expr.ExprInt(arg_int, exprloc.size) + continue + if not names: + raise ValueError('Unresolved symbol: %r' % exprloc) + + offset = symbols.get_location_offset(loc_key) + if offset is None: + raise ValueError( + 'The offset of loc_key "%s" cannot be determined' % names + ) + else: + # Fix symbol with its offset + size = exprloc.size + if size is None: + default_size = self.get_symbol_size(exprloc, symbols) + size = default_size + value = m2_expr.ExprInt(offset, size) + fixed_expr[exprloc] = value + + expr = expr.replace_expr(fixed_expr) + expr = expr_simp(expr) + args_out.append(expr) + return args_out + + def get_info(self, c): + return + + +class cls_mn(with_metaclass(metamn, object)): + args_symb = [] + instruction = instruction + # Block's offset alignment + alignment = 1 + + @classmethod + def guess_mnemo(cls, bs, attrib, pre_dis_info, offset): + candidates = [] + + candidates = set() + + fname_values = pre_dis_info + todo = [ + (dict(fname_values), branch, offset * 8) + for branch in list(viewitems(cls.bintree)) + ] + for fname_values, branch, offset_b in todo: + (l, fmask, fbits, fname, flen), vals = branch + + if flen is not None: + l = flen(attrib, fname_values) + if l is not None: + try: + v = cls.getbits(bs, attrib, offset_b, l) + except IOError: + # Raised if offset is out of bound + continue + offset_b += l + if v & fmask != fbits: + continue + if fname is not None and not fname in fname_values: + fname_values[fname] = v + for nb, v in viewitems(vals): + if 'mn' in nb: + candidates.update(v) + else: + todo.append((dict(fname_values), (nb, v), offset_b)) + + return [c for c in candidates] + + def reset_class(self): + for f in self.fields_order: + if f.strbits and isbin(f.strbits): + f.value = int(f.strbits, 2) + elif 'default_val' in f.kargs: + f.value = int(f.kargs['default_val'], 2) + else: + f.value = None + if f.fname: + setattr(self, f.fname, f) + + def init_class(self): + args = [] + fields_order = [] + to_decode = [] + off = 0 + for i, fc in enumerate(self.fields): + f = fc.gen(self) + f.offset = off + off += f.l + fields_order.append(f) + to_decode.append((i, f)) + + if isinstance(f, m_arg): + args.append(f) + if f.fname: + setattr(self, f.fname, f) + if hasattr(self, 'args_permut'): + args = [args[self.args_permut[i]] + for i in range(len(self.args_permut))] + to_decode.sort(key=lambda x: (x[1].order, x[0])) + to_decode = [fields_order.index(f[1]) for f in to_decode] + self.args = args + self.fields_order = fields_order + self.to_decode = to_decode + + def add_pre_dis_info(self, prefix=None): + return True + + @classmethod + def getbits(cls, bs, attrib, offset_b, l): + return bs.getbits(offset_b, l) + + @classmethod + def getbytes(cls, bs, offset, l): + return bs.getbytes(offset, l) + + @classmethod + def pre_dis(cls, v_o, attrib, offset): + return {}, v_o, attrib, offset, 0 + + def post_dis(self): + return self + + @classmethod + def check_mnemo(cls, fields): + pass + + @classmethod + def mod_fields(cls, fields): + return fields + + @classmethod + def dis(cls, bs_o, mode_o = None, offset=0): + if not isinstance(bs_o, bin_stream): + bs_o = bin_stream_str(bs_o) + + bs_o.enter_atomic_mode() + + offset_o = offset + try: + pre_dis_info, bs, mode, offset, prefix_len = cls.pre_dis( + bs_o, mode_o, offset) + except: + bs_o.leave_atomic_mode() + raise + candidates = cls.guess_mnemo(bs, mode, pre_dis_info, offset) + if not candidates: + bs_o.leave_atomic_mode() + raise Disasm_Exception('cannot disasm (guess) at %X' % offset) + + out = [] + out_c = [] + if hasattr(bs, 'getlen'): + bs_l = bs.getlen() + else: + bs_l = len(bs) + + alias = False + for c in candidates: + log.debug("*" * 40, mode, c.mode) + log.debug(c.fields) + + c = cls.all_mn_inst[c][0] + + c.reset_class() + c.mode = mode + + if not c.add_pre_dis_info(pre_dis_info): + continue + + todo = {} + getok = True + fname_values = dict(pre_dis_info) + offset_b = offset * 8 + + total_l = 0 + for i, f in enumerate(c.fields_order): + if f.flen is not None: + l = f.flen(mode, fname_values) + else: + l = f.l + if l is not None: + total_l += l + f.l = l + f.is_present = True + log.debug("FIELD %s %s %s %s", f.__class__, f.fname, + offset_b, l) + if bs_l * 8 - offset_b < l: + getok = False + break + try: + bv = cls.getbits(bs, mode, offset_b, l) + except: + bs_o.leave_atomic_mode() + raise + offset_b += l + if not f.fname in fname_values: + fname_values[f.fname] = bv + todo[i] = bv + else: + f.is_present = False + todo[i] = None + + if not getok: + continue + + c.l = prefix_len + total_l // 8 + for i in c.to_decode: + f = c.fields_order[i] + if f.is_present: + ret = f.decode(todo[i]) + if not ret: + log.debug("cannot decode %r", f) + break + + if not ret: + continue + for a in c.args: + a.expr = expr_simp(a.expr) + + c.b = cls.getbytes(bs, offset_o, c.l) + c.offset = offset_o + c = c.post_dis() + if c is None: + continue + c_args = [a.expr for a in c.args] + instr = cls.instruction(c.name, mode, c_args, + additional_info=c.additional_info()) + instr.l = prefix_len + total_l // 8 + instr.b = cls.getbytes(bs, offset_o, instr.l) + instr.offset = offset_o + instr.get_info(c) + if c.alias: + alias = True + out.append(instr) + out_c.append(c) + + bs_o.leave_atomic_mode() + + if not out: + raise Disasm_Exception('cannot disasm at %X' % offset_o) + if len(out) != 1: + if not alias: + log.warning('dis multiple args ret default') + + for i, o in enumerate(out_c): + if o.alias: + return out[i] + raise NotImplementedError( + 'Multiple disas: \n' + + "\n".join(str(x) for x in out) + ) + return out[0] + + @classmethod + def fromstring(cls, text, loc_db, mode = None): + global total_scans + name = re.search('(\S+)', text).groups() + if not name: + raise ValueError('cannot find name', text) + name = name[0] + + if not name in cls.all_mn_name: + raise ValueError('unknown name', name) + clist = [x for x in cls.all_mn_name[name]] + out = [] + out_args = [] + parsers = defaultdict(dict) + + for cc in clist: + for c in cls.get_cls_instance(cc, mode): + args_expr = [] + args_str = text[len(name):].strip(' ') + + start = 0 + cannot_parse = False + len_o = len(args_str) + + for i, f in enumerate(c.args): + start_i = len_o - len(args_str) + if type(f.parser) == tuple: + parser = f.parser + else: + parser = (f.parser,) + for p in parser: + if p in parsers[(i, start_i)]: + continue + try: + total_scans += 1 + v, start, stop = next(p.scanString(args_str)) + except StopIteration: + v, start, stop = [None], None, None + if start != 0: + v, start, stop = [None], None, None + if v != [None]: + v = f.asm_ast_to_expr(v[0], loc_db) + if v is None: + v, start, stop = [None], None, None + parsers[(i, start_i)][p] = v, start, stop + start, stop = f.fromstring(args_str, loc_db, parsers[(i, start_i)]) + if start != 0: + log.debug("cannot fromstring %r", args_str) + cannot_parse = True + break + if f.expr is None: + raise NotImplementedError('not fully functional') + f.expr = expr_simp(f.expr) + args_expr.append(f.expr) + args_str = args_str[stop:].strip(' ') + if args_str.startswith(','): + args_str = args_str[1:] + args_str = args_str.strip(' ') + if args_str: + cannot_parse = True + if cannot_parse: + continue + + out.append(c) + out_args.append(args_expr) + break + + if len(out) == 0: + raise ValueError('cannot fromstring %r' % text) + if len(out) != 1: + log.debug('fromstring multiple args ret default') + c = out[0] + c_args = out_args[0] + + instr = cls.instruction(c.name, mode, c_args, + additional_info=c.additional_info()) + return instr + + def dup_info(self, infos): + return + + @classmethod + def get_cls_instance(cls, cc, mode, infos=None): + c = cls.all_mn_inst[cc][0] + + c.reset_class() + c.add_pre_dis_info() + c.dup_info(infos) + + c.mode = mode + yield c + + @classmethod + def asm(cls, instr, symbols=None): + """ + Re asm instruction by searching mnemo using name and args. We then + can modify args and get the hex of a modified instruction + """ + clist = cls.all_mn_name[instr.name] + clist = [x for x in clist] + vals = [] + candidates = [] + args = instr.resolve_args_with_symbols(symbols) + + for cc in clist: + + for c in cls.get_cls_instance( + cc, instr.mode, instr.additional_info): + + cannot_parse = False + if len(c.args) != len(instr.args): + continue + + # only fix args expr + for i in range(len(c.args)): + c.args[i].expr = args[i] + + v = c.value(instr.mode) + if not v: + log.debug("cannot encode %r", c) + cannot_parse = True + if cannot_parse: + continue + vals += v + candidates.append((c, v)) + if len(vals) == 0: + raise ValueError( + 'cannot asm %r %r' % + (instr.name, [str(x) for x in instr.args]) + ) + if len(vals) != 1: + log.debug('asm multiple args ret default') + + vals = cls.filter_asm_candidates(instr, candidates) + return vals + + @classmethod + def filter_asm_candidates(cls, instr, candidates): + o = [] + for _, v in candidates: + o += v + o.sort(key=len) + return o + + def value(self, mode): + todo = [(0, 0, [(x, self.fields_order[x]) for x in self.to_decode[::-1]])] + + result = [] + done = [] + + while todo: + index, cur_len, to_decode = todo.pop() + # TEST XXX + for _, f in to_decode: + setattr(self, f.fname, f) + if (index, [x[1].value for x in to_decode]) in done: + continue + done.append((index, [x[1].value for x in to_decode])) + + can_encode = True + for i, f in to_decode[index:]: + f.parent.l = cur_len + ret = f.encode() + if not ret: + log.debug('cannot encode %r', f) + can_encode = False + break + + if f.value is not None and f.l: + assert f.value <= f.lmask + cur_len += f.l + index += 1 + if ret is True: + continue + + for _ in ret: + o = [] + if ((index, cur_len, [xx[1].value for xx in to_decode]) in todo or + (index, cur_len, [xx[1].value for xx in to_decode]) in done): + raise NotImplementedError('not fully functional') + + for p, f in to_decode: + fnew = f.clone() + o.append((p, fnew)) + todo.append((index, cur_len, o)) + can_encode = False + + break + if not can_encode: + continue + result.append(to_decode) + + return self.decoded2bytes(result) + + def encodefields(self, decoded): + bits = bitobj() + for _, f in decoded: + setattr(self, f.fname, f) + + if f.value is None: + continue + bits.putbits(f.value, f.l) + + return bits.tostring() + + def decoded2bytes(self, result): + if not result: + return [] + + out = [] + for decoded in result: + decoded.sort() + + o = self.encodefields(decoded) + if o is None: + continue + out.append(o) + out = list(set(out)) + return out + + def gen_args(self, args): + out = ', '.join([str(x) for x in args]) + return out + + def args2str(self): + args = [] + for arg in self.args: + # XXX todo test + if not (isinstance(arg, m2_expr.Expr) or + isinstance(arg.expr, m2_expr.Expr)): + raise ValueError('zarb arg type') + x = str(arg) + args.append(x) + return args + + def __str__(self): + o = "%-10s " % self.name + args = [] + for arg in self.args: + # XXX todo test + if not (isinstance(arg, m2_expr.Expr) or + isinstance(arg.expr, m2_expr.Expr)): + raise ValueError('zarb arg type') + x = str(arg) + args.append(x) + + o += self.gen_args(args) + return o + + def parse_prefix(self, v): + return 0 + + def set_dst_symbol(self, loc_db): + dst = self.getdstflow(loc_db) + args = [] + for d in dst: + if isinstance(d, m2_expr.ExprInt): + l = loc_db.get_or_create_offset_location(int(d)) + + a = m2_expr.ExprId(l.name, d.size) + else: + a = d + args.append(a) + self.args_symb = args + + def getdstflow(self, loc_db): + return [self.args[0].expr] + + +class imm_noarg(object): + intsize = 32 + intmask = (1 << intsize) - 1 + + def int2expr(self, v): + if (v & ~self.intmask) != 0: + return None + return m2_expr.ExprInt(v, self.intsize) + + def expr2int(self, e): + if not isinstance(e, m2_expr.ExprInt): + return None + v = int(e) + if v & ~self.intmask != 0: + return None + return v + + def fromstring(self, text, loc_db, parser_result=None): + if parser_result: + e, start, stop = parser_result[self.parser] + else: + try: + e, start, stop = next(self.parser.scanString(text)) + except StopIteration: + return None, None + if e == [None]: + return None, None + + assert(isinstance(e, m2_expr.Expr)) + if isinstance(e, tuple): + self.expr = self.int2expr(e[1]) + elif isinstance(e, m2_expr.Expr): + self.expr = e + else: + raise TypeError('zarb expr') + if self.expr is None: + log.debug('cannot fromstring int %r', text) + return None, None + return start, stop + + def decodeval(self, v): + return v + + def encodeval(self, v): + if v > self.lmask: + return False + return v + + def decode(self, v): + v = v & self.lmask + v = self.decodeval(v) + e = self.int2expr(v) + if not e: + return False + self.expr = e + return True + + def encode(self): + v = self.expr2int(self.expr) + if v is None: + return False + v = self.encodeval(v) + if v is False: + return False + if v > self.lmask: + return False + self.value = v + return True + + +class imm08_noarg(object): + int2expr = lambda self, x: m2_expr.ExprInt(x, 8) + + +class imm16_noarg(object): + int2expr = lambda self, x: m2_expr.ExprInt(x, 16) + + +class imm32_noarg(object): + int2expr = lambda self, x: m2_expr.ExprInt(x, 32) + + +class imm64_noarg(object): + int2expr = lambda self, x: m2_expr.ExprInt(x, 64) + + +class int32_noarg(imm_noarg): + intsize = 32 + intmask = (1 << intsize) - 1 + + def decode(self, v): + v = sign_ext(v, self.l, self.intsize) + v = self.decodeval(v) + self.expr = self.int2expr(v) + return True + + def encode(self): + if not isinstance(self.expr, m2_expr.ExprInt): + return False + v = int(self.expr) + if sign_ext(v & self.lmask, self.l, self.intsize) != v: + return False + v = self.encodeval(v & self.lmask) + if v is False: + return False + self.value = v & self.lmask + return True + +class bs8(bs): + prio = default_prio + + def __init__(self, v, cls=None, fname=None, **kargs): + super(bs8, self).__init__(int2bin(v, 8), 8, + cls=cls, fname=fname, **kargs) + + + + +def swap_uint(size, i): + if size == 8: + return i & 0xff + elif size == 16: + return struct.unpack('H', i & 0xffff))[0] + elif size == 32: + return struct.unpack('I', i & 0xffffffff))[0] + elif size == 64: + return struct.unpack('Q', i & 0xffffffffffffffff))[0] + raise ValueError('unknown int len %r' % size) + + +def swap_sint(size, i): + if size == 8: + return i + elif size == 16: + return struct.unpack('H', i & 0xffff))[0] + elif size == 32: + return struct.unpack('I', i & 0xffffffff))[0] + elif size == 64: + return struct.unpack('Q', i & 0xffffffffffffffff))[0] + raise ValueError('unknown int len %r' % size) + + +def sign_ext(v, s_in, s_out): + assert(s_in <= s_out) + v &= (1 << s_in) - 1 + sign_in = v & (1 << (s_in - 1)) + if not sign_in: + return v + m = (1 << (s_out)) - 1 + m ^= (1 << s_in) - 1 + v |= m + return v diff --git a/miasm/core/ctypesmngr.py b/miasm/core/ctypesmngr.py new file mode 100644 index 00000000..94c96f7e --- /dev/null +++ b/miasm/core/ctypesmngr.py @@ -0,0 +1,771 @@ +import re + +from pycparser import c_parser, c_ast + +RE_HASH_CMT = re.compile(r'^#\s*\d+.*$', flags=re.MULTILINE) + +# Ref: ISO/IEC 9899:TC2 +# http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1124.pdf + + +def c_to_ast(parser, c_str): + """Transform a @c_str into a C ast + Note: will ignore lines containing code refs ie: + # 23 "miasm.h" + + @parser: pycparser instance + @c_str: c string + """ + + new_str = re.sub(RE_HASH_CMT, "", c_str) + return parser.parse(new_str, filename='') + + +class CTypeBase(object): + """Object to represent the 3 forms of C type: + * object types + * function types + * incomplete types + """ + + def __init__(self): + self.__repr = str(self) + self.__hash = hash(self.__repr) + + @property + def _typerepr(self): + return self.__repr + + def __eq__(self, other): + raise NotImplementedError("Abstract method") + + def __ne__(self, other): + return not self.__eq__(other) + + def eq_base(self, other): + """Trivial common equality test""" + return self.__class__ == other.__class__ + + def __hash__(self): + return self.__hash + + def __repr__(self): + return self._typerepr + + +class CTypeId(CTypeBase): + """C type id: + int + unsigned int + """ + + def __init__(self, *names): + # Type specifier order does not matter + # so the canonical form is ordered + self.names = tuple(sorted(names)) + super(CTypeId, self).__init__() + + def __hash__(self): + return hash((self.__class__, self.names)) + + def __eq__(self, other): + return (self.eq_base(other) and + self.names == other.names) + + def __ne__(self, other): + return not self.__eq__(other) + + def __str__(self): + return "" % ', '.join(self.names) + + +class CTypeArray(CTypeBase): + """C type for array: + typedef int XXX[4]; + """ + + def __init__(self, target, size): + assert isinstance(target, CTypeBase) + self.target = target + self.size = size + super(CTypeArray, self).__init__() + + def __hash__(self): + return hash((self.__class__, self.target, self.size)) + + def __eq__(self, other): + return (self.eq_base(other) and + self.target == other.target and + self.size == other.size) + + def __ne__(self, other): + return not self.__eq__(other) + + def __str__(self): + return "" % (self.size, str(self.target)) + + +class CTypePtr(CTypeBase): + """C type for pointer: + typedef int* XXX; + """ + + def __init__(self, target): + assert isinstance(target, CTypeBase) + self.target = target + super(CTypePtr, self).__init__() + + def __hash__(self): + return hash((self.__class__, self.target)) + + def __eq__(self, other): + return (self.eq_base(other) and + self.target == other.target) + + def __ne__(self, other): + return not self.__eq__(other) + + def __str__(self): + return "" % str(self.target) + + +class CTypeStruct(CTypeBase): + """C type for structure""" + + def __init__(self, name, fields=None): + assert name is not None + self.name = name + if fields is None: + fields = () + for field_name, field in fields: + assert field_name is not None + assert isinstance(field, CTypeBase) + self.fields = tuple(fields) + super(CTypeStruct, self).__init__() + + def __hash__(self): + return hash((self.__class__, self.name, self.fields)) + + def __eq__(self, other): + return (self.eq_base(other) and + self.name == other.name and + self.fields == other.fields) + + def __ne__(self, other): + return not self.__eq__(other) + + def __str__(self): + out = [] + out.append("" % self.name) + for name, field in self.fields: + out.append("\t%-10s %s" % (name, field)) + return '\n'.join(out) + + +class CTypeUnion(CTypeBase): + """C type for union""" + + def __init__(self, name, fields=None): + assert name is not None + self.name = name + if fields is None: + fields = [] + for field_name, field in fields: + assert field_name is not None + assert isinstance(field, CTypeBase) + self.fields = tuple(fields) + super(CTypeUnion, self).__init__() + + def __hash__(self): + return hash((self.__class__, self.name, self.fields)) + + def __eq__(self, other): + return (self.eq_base(other) and + self.name == other.name and + self.fields == other.fields) + + def __str__(self): + out = [] + out.append("" % self.name) + for name, field in self.fields: + out.append("\t%-10s %s" % (name, field)) + return '\n'.join(out) + + +class CTypeEnum(CTypeBase): + """C type for enums""" + + def __init__(self, name): + self.name = name + super(CTypeEnum, self).__init__() + + def __hash__(self): + return hash((self.__class__, self.name)) + + def __eq__(self, other): + return (self.eq_base(other) and + self.name == other.name) + + def __ne__(self, other): + return not self.__eq__(other) + + def __str__(self): + return "" % self.name + + +class CTypeFunc(CTypeBase): + """C type for enums""" + + def __init__(self, name, abi=None, type_ret=None, args=None): + if type_ret: + assert isinstance(type_ret, CTypeBase) + if args: + for arg_name, arg in args: + assert isinstance(arg, CTypeBase) + args = tuple(args) + else: + args = tuple() + self.name = name + self.abi = abi + self.type_ret = type_ret + self.args = args + super(CTypeFunc, self).__init__() + + def __hash__(self): + return hash((self.__class__, self.name, self.abi, + self.type_ret, self.args)) + + def __eq__(self, other): + return (self.eq_base(other) and + self.name == other.name and + self.abi == other.abi and + self.type_ret == other.type_ret and + self.args == other.args) + + def __ne__(self, other): + return not self.__eq__(other) + + def __str__(self): + return "" % (self.type_ret, + self.abi, + self.name, + ", ".join(["%s %s" % (name, arg) for (name, arg) in self.args])) + + +class CTypeEllipsis(CTypeBase): + """C type for ellipsis argument (...)""" + + def __hash__(self): + return hash((self.__class__)) + + def __eq__(self, other): + return self.eq_base(other) + + def __ne__(self, other): + return not self.__eq__(other) + + def __str__(self): + return "" + + +class CTypeSizeof(CTypeBase): + """C type for sizeof""" + + def __init__(self, target): + self.target = target + super(CTypeSizeof, self).__init__() + + def __hash__(self): + return hash((self.__class__, self.target)) + + def __eq__(self, other): + return (self.eq_base(other) and + self.target == other.target) + + def __ne__(self, other): + return not self.__eq__(other) + + def __str__(self): + return "" % self.target + + +class CTypeOp(CTypeBase): + """C type for operator (+ * ...)""" + + def __init__(self, operator, *args): + self.operator = operator + self.args = tuple(args) + super(CTypeOp, self).__init__() + + def __hash__(self): + return hash((self.__class__, self.operator, self.args)) + + def __eq__(self, other): + return (self.eq_base(other) and + self.operator == other.operator and + self.args == other.args) + + def __str__(self): + return "" % (self.operator, + ', '.join([str(arg) for arg in self.args])) + + +class FuncNameIdentifier(c_ast.NodeVisitor): + """Visit an c_ast to find IdentifierType""" + + def __init__(self): + super(FuncNameIdentifier, self).__init__() + self.node_name = None + + def visit_TypeDecl(self, node): + """Retrieve the name in a function declaration: + Only one IdentifierType is present""" + self.node_name = node + + +class CAstTypes(object): + """Store all defined C types and typedefs""" + INTERNAL_PREFIX = "__GENTYPE__" + ANONYMOUS_PREFIX = "__ANONYMOUS__" + + def __init__(self, knowntypes=None, knowntypedefs=None): + if knowntypes is None: + knowntypes = {} + if knowntypedefs is None: + knowntypedefs = {} + + self._types = dict(knowntypes) + self._typedefs = dict(knowntypedefs) + self.cpt = 0 + self.loc_to_decl_info = {} + self.parser = c_parser.CParser() + self._cpt_decl = 0 + + + self.ast_to_typeid_rules = { + c_ast.Struct: self.ast_to_typeid_struct, + c_ast.Union: self.ast_to_typeid_union, + c_ast.IdentifierType: self.ast_to_typeid_identifiertype, + c_ast.TypeDecl: self.ast_to_typeid_typedecl, + c_ast.Decl: self.ast_to_typeid_decl, + c_ast.Typename: self.ast_to_typeid_typename, + c_ast.FuncDecl: self.ast_to_typeid_funcdecl, + c_ast.Enum: self.ast_to_typeid_enum, + c_ast.PtrDecl: self.ast_to_typeid_ptrdecl, + c_ast.EllipsisParam: self.ast_to_typeid_ellipsisparam, + c_ast.ArrayDecl: self.ast_to_typeid_arraydecl, + } + + self.ast_parse_rules = { + c_ast.Struct: self.ast_parse_struct, + c_ast.Union: self.ast_parse_union, + c_ast.Typedef: self.ast_parse_typedef, + c_ast.TypeDecl: self.ast_parse_typedecl, + c_ast.IdentifierType: self.ast_parse_identifiertype, + c_ast.Decl: self.ast_parse_decl, + c_ast.PtrDecl: self.ast_parse_ptrdecl, + c_ast.Enum: self.ast_parse_enum, + c_ast.ArrayDecl: self.ast_parse_arraydecl, + c_ast.FuncDecl: self.ast_parse_funcdecl, + c_ast.FuncDef: self.ast_parse_funcdef, + c_ast.Pragma: self.ast_parse_pragma, + } + + def gen_uniq_name(self): + """Generate uniq name for unnamed strucs/union""" + cpt = self.cpt + self.cpt += 1 + return self.INTERNAL_PREFIX + "%d" % cpt + + def gen_anon_name(self): + """Generate name for anonymous strucs/union""" + cpt = self.cpt + self.cpt += 1 + return self.ANONYMOUS_PREFIX + "%d" % cpt + + def is_generated_name(self, name): + """Return True if the name is internal""" + return name.startswith(self.INTERNAL_PREFIX) + + def is_anonymous_name(self, name): + """Return True if the name is anonymous""" + return name.startswith(self.ANONYMOUS_PREFIX) + + def add_type(self, type_id, type_obj): + """Add new C type + @type_id: Type descriptor (CTypeBase instance) + @type_obj: Obj* instance""" + assert isinstance(type_id, CTypeBase) + if type_id in self._types: + assert self._types[type_id] == type_obj + else: + self._types[type_id] = type_obj + + def add_typedef(self, type_new, type_src): + """Add new typedef + @type_new: CTypeBase instance of the new type name + @type_src: CTypeBase instance of the target type""" + assert isinstance(type_src, CTypeBase) + self._typedefs[type_new] = type_src + + def get_type(self, type_id): + """Get ObjC corresponding to the @type_id + @type_id: Type descriptor (CTypeBase instance) + """ + assert isinstance(type_id, CTypeBase) + if isinstance(type_id, CTypePtr): + subobj = self.get_type(type_id.target) + return CTypePtr(subobj) + if type_id in self._types: + return self._types[type_id] + elif type_id in self._typedefs: + return self.get_type(self._typedefs[type_id]) + return type_id + + def is_known_type(self, type_id): + """Return true if @type_id is known + @type_id: Type descriptor (CTypeBase instance) + """ + if isinstance(type_id, CTypePtr): + return self.is_known_type(type_id.target) + if type_id in self._types: + return True + if type_id in self._typedefs: + return self.is_known_type(self._typedefs[type_id]) + return False + + def add_c_decl_from_ast(self, ast): + """ + Adds types from a C ast + @ast: C ast + """ + self.ast_parse_declarations(ast) + + + def digest_decl(self, c_str): + + char_id = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_" + + + # Seek deck + index_decl = [] + index = 0 + for decl in ['__cdecl__', '__stdcall__']: + index = 0 + while True: + index = c_str.find(decl, index) + if index == -1: + break + decl_off = index + decl_len = len(decl) + + index = index+len(decl) + while c_str[index] not in char_id: + index += 1 + + id_start = index + + while c_str[index] in char_id: + index += 1 + id_stop = index + + name = c_str[id_start:id_stop] + index_decl.append((decl_off, decl_len, id_start, id_stop, decl, )) + + index_decl.sort() + + # Remove decl + off = 0 + offsets = [] + for decl_off, decl_len, id_start, id_stop, decl in index_decl: + decl_off -= off + c_str = c_str[:decl_off] + c_str[decl_off+decl_len:] + off += decl_len + offsets.append((id_start-off, id_stop-off, decl)) + + index = 0 + lineno = 1 + + # Index to lineno, column + for id_start, id_stop, decl in offsets: + nbr = c_str.count('\n', index, id_start) + lineno += nbr + last_cr = c_str.rfind('\n', 0, id_start) + # column starts at 1 + column = id_start - last_cr + index = id_start + self.loc_to_decl_info[(lineno, column)] = decl + return c_str + + + def add_c_decl(self, c_str): + """ + Adds types from a C string types declaring + Note: will ignore lines containing code refs ie: + '# 23 "miasm.h"' + Returns the C ast + @c_str: C string containing C types declarations + """ + c_str = self.digest_decl(c_str) + + ast = c_to_ast(self.parser, c_str) + self.add_c_decl_from_ast(ast) + + return ast + + def ast_eval_int(self, ast): + """Eval a C ast object integer + + @ast: parsed pycparser.c_ast object + """ + + if isinstance(ast, c_ast.BinaryOp): + left = self.ast_eval_int(ast.left) + right = self.ast_eval_int(ast.right) + is_pure_int = (isinstance(left, int) and + isinstance(right, int)) + + if is_pure_int: + if ast.op == '*': + result = left * right + elif ast.op == '/': + assert left % right == 0 + result = left // right + elif ast.op == '+': + result = left + right + elif ast.op == '-': + result = left - right + elif ast.op == '<<': + result = left << right + elif ast.op == '>>': + result = left >> right + else: + raise NotImplementedError("Not implemented!") + else: + result = CTypeOp(ast.op, left, right) + + elif isinstance(ast, c_ast.UnaryOp): + if ast.op == 'sizeof' and isinstance(ast.expr, c_ast.Typename): + subobj = self.ast_to_typeid(ast.expr) + result = CTypeSizeof(subobj) + else: + raise NotImplementedError("Not implemented!") + + elif isinstance(ast, c_ast.Constant): + result = int(ast.value, 0) + elif isinstance(ast, c_ast.Cast): + # TODO: Can trunc integers? + result = self.ast_eval_int(ast.expr) + else: + raise NotImplementedError("Not implemented!") + return result + + def ast_to_typeid_struct(self, ast): + """Return the CTypeBase of an Struct ast""" + name = self.gen_uniq_name() if ast.name is None else ast.name + args = [] + if ast.decls: + for arg in ast.decls: + if arg.name is None: + arg_name = self.gen_anon_name() + else: + arg_name = arg.name + args.append((arg_name, self.ast_to_typeid(arg))) + decl = CTypeStruct(name, args) + return decl + + def ast_to_typeid_union(self, ast): + """Return the CTypeBase of an Union ast""" + name = self.gen_uniq_name() if ast.name is None else ast.name + args = [] + if ast.decls: + for arg in ast.decls: + if arg.name is None: + arg_name = self.gen_anon_name() + else: + arg_name = arg.name + args.append((arg_name, self.ast_to_typeid(arg))) + decl = CTypeUnion(name, args) + return decl + + def ast_to_typeid_identifiertype(self, ast): + """Return the CTypeBase of an IdentifierType ast""" + return CTypeId(*ast.names) + + def ast_to_typeid_typedecl(self, ast): + """Return the CTypeBase of a TypeDecl ast""" + return self.ast_to_typeid(ast.type) + + def ast_to_typeid_decl(self, ast): + """Return the CTypeBase of a Decl ast""" + return self.ast_to_typeid(ast.type) + + def ast_to_typeid_typename(self, ast): + """Return the CTypeBase of a TypeName ast""" + return self.ast_to_typeid(ast.type) + + def get_funcname(self, ast): + """Return the name of a function declaration ast""" + funcnameid = FuncNameIdentifier() + funcnameid.visit(ast) + node_name = funcnameid.node_name + if node_name.coord is not None: + lineno, column = node_name.coord.line, node_name.coord.column + decl_info = self.loc_to_decl_info.get((lineno, column), None) + else: + decl_info = None + return node_name.declname, decl_info + + def ast_to_typeid_funcdecl(self, ast): + """Return the CTypeBase of an FuncDecl ast""" + type_ret = self.ast_to_typeid(ast.type) + name, decl_info = self.get_funcname(ast.type) + if ast.args: + args = [] + for arg in ast.args.params: + typeid = self.ast_to_typeid(arg) + if isinstance(typeid, CTypeEllipsis): + arg_name = None + else: + arg_name = arg.name + args.append((arg_name, typeid)) + else: + args = [] + + obj = CTypeFunc(name, decl_info, type_ret, args) + decl = CTypeFunc(name) + if not self.is_known_type(decl): + self.add_type(decl, obj) + return obj + + def ast_to_typeid_enum(self, ast): + """Return the CTypeBase of an Enum ast""" + name = self.gen_uniq_name() if ast.name is None else ast.name + return CTypeEnum(name) + + def ast_to_typeid_ptrdecl(self, ast): + """Return the CTypeBase of a PtrDecl ast""" + return CTypePtr(self.ast_to_typeid(ast.type)) + + def ast_to_typeid_ellipsisparam(self, _): + """Return the CTypeBase of an EllipsisParam ast""" + return CTypeEllipsis() + + def ast_to_typeid_arraydecl(self, ast): + """Return the CTypeBase of an ArrayDecl ast""" + target = self.ast_to_typeid(ast.type) + if ast.dim is None: + value = None + else: + value = self.ast_eval_int(ast.dim) + return CTypeArray(target, value) + + def ast_to_typeid(self, ast): + """Return the CTypeBase of the @ast + @ast: pycparser.c_ast instance""" + cls = ast.__class__ + if not cls in self.ast_to_typeid_rules: + raise NotImplementedError("Strange type %r" % ast) + return self.ast_to_typeid_rules[cls](ast) + + # Ast parse type declarators + + def ast_parse_decl(self, ast): + """Parse ast Decl""" + return self.ast_parse_declaration(ast.type) + + def ast_parse_typedecl(self, ast): + """Parse ast Typedecl""" + return self.ast_parse_declaration(ast.type) + + def ast_parse_struct(self, ast): + """Parse ast Struct""" + obj = self.ast_to_typeid(ast) + if ast.decls and ast.name is not None: + # Add struct to types if named + decl = CTypeStruct(ast.name) + if not self.is_known_type(decl): + self.add_type(decl, obj) + return obj + + def ast_parse_union(self, ast): + """Parse ast Union""" + obj = self.ast_to_typeid(ast) + if ast.decls and ast.name is not None: + # Add union to types if named + decl = CTypeUnion(ast.name) + if not self.is_known_type(decl): + self.add_type(decl, obj) + return obj + + def ast_parse_typedef(self, ast): + """Parse ast TypeDef""" + decl = CTypeId(ast.name) + obj = self.ast_parse_declaration(ast.type) + if (isinstance(obj, (CTypeStruct, CTypeUnion)) and + self.is_generated_name(obj.name)): + # Add typedef name to default name + # for a question of clarity + obj.name += "__%s" % ast.name + self.add_typedef(decl, obj) + # Typedef does not return any object + return None + + def ast_parse_identifiertype(self, ast): + """Parse ast IdentifierType""" + return CTypeId(*ast.names) + + def ast_parse_ptrdecl(self, ast): + """Parse ast PtrDecl""" + return CTypePtr(self.ast_parse_declaration(ast.type)) + + def ast_parse_enum(self, ast): + """Parse ast Enum""" + return self.ast_to_typeid(ast) + + def ast_parse_arraydecl(self, ast): + """Parse ast ArrayDecl""" + return self.ast_to_typeid(ast) + + def ast_parse_funcdecl(self, ast): + """Parse ast FuncDecl""" + return self.ast_to_typeid(ast) + + def ast_parse_funcdef(self, ast): + """Parse ast FuncDef""" + return self.ast_to_typeid(ast.decl) + + def ast_parse_pragma(self, _): + """Prama does not return any object""" + return None + + def ast_parse_declaration(self, ast): + """Add one ast type declaration to the type manager + (packed style in type manager) + + @ast: parsed pycparser.c_ast object + """ + cls = ast.__class__ + if not cls in self.ast_parse_rules: + raise NotImplementedError("Strange declaration %r" % cls) + return self.ast_parse_rules[cls](ast) + + def ast_parse_declarations(self, ast): + """Add ast types declaration to the type manager + (packed style in type manager) + + @ast: parsed pycparser.c_ast object + """ + for ext in ast.ext: + ret = self.ast_parse_declaration(ext) + + def parse_c_type(self, c_str): + """Parse a C string representing a C type and return the associated + Miasm C object. + @c_str: C string of a C type + """ + + new_str = "%s __MIASM_INTERNAL_%s;" % (c_str, self._cpt_decl) + ret = self.parser.cparser.parse(input=new_str, lexer=self.parser.clex) + self._cpt_decl += 1 + return ret diff --git a/miasm/core/graph.py b/miasm/core/graph.py new file mode 100644 index 00000000..f585379b --- /dev/null +++ b/miasm/core/graph.py @@ -0,0 +1,1017 @@ +from collections import defaultdict, namedtuple + +from future.utils import viewitems, viewvalues +import re + + +class DiGraph(object): + + """Implementation of directed graph""" + + # Stand for a cell in a dot node rendering + DotCellDescription = namedtuple("DotCellDescription", + ["text", "attr"]) + + def __init__(self): + self._nodes = set() + self._edges = [] + # N -> Nodes N2 with a edge (N -> N2) + self._nodes_succ = {} + # N -> Nodes N2 with a edge (N2 -> N) + self._nodes_pred = {} + + def __repr__(self): + out = [] + for node in self._nodes: + out.append(str(node)) + for src, dst in self._edges: + out.append("%s -> %s" % (src, dst)) + return '\n'.join(out) + + def nodes(self): + return self._nodes + + def edges(self): + return self._edges + + def merge(self, graph): + """Merge the current graph with @graph + @graph: DiGraph instance + """ + for node in graph._nodes: + self.add_node(node) + for edge in graph._edges: + self.add_edge(*edge) + + def __add__(self, graph): + """Wrapper on `.merge`""" + self.merge(graph) + return self + + def copy(self): + """Copy the current graph instance""" + graph = self.__class__() + return graph + self + + def __eq__(self, graph): + if not isinstance(graph, self.__class__): + return False + if self._nodes != graph.nodes(): + return False + return sorted(self._edges) == sorted(graph.edges()) + + def __ne__(self, other): + return not self.__eq__(other) + + def add_node(self, node): + """Add the node @node to the graph. + If the node was already present, return False. + Otherwise, return True + """ + if node in self._nodes: + return False + self._nodes.add(node) + self._nodes_succ[node] = [] + self._nodes_pred[node] = [] + return True + + def del_node(self, node): + """Delete the @node of the graph; Also delete every edge to/from this + @node""" + + if node in self._nodes: + self._nodes.remove(node) + for pred in self.predecessors(node): + self.del_edge(pred, node) + for succ in self.successors(node): + self.del_edge(node, succ) + + def add_edge(self, src, dst): + if not src in self._nodes: + self.add_node(src) + if not dst in self._nodes: + self.add_node(dst) + self._edges.append((src, dst)) + self._nodes_succ[src].append(dst) + self._nodes_pred[dst].append(src) + + def add_uniq_edge(self, src, dst): + """Add an edge from @src to @dst if it doesn't already exist""" + if (src not in self._nodes_succ or + dst not in self._nodes_succ[src]): + self.add_edge(src, dst) + + def del_edge(self, src, dst): + self._edges.remove((src, dst)) + self._nodes_succ[src].remove(dst) + self._nodes_pred[dst].remove(src) + + def discard_edge(self, src, dst): + """Remove edge between @src and @dst if it exits""" + if (src, dst) in self._edges: + self.del_edge(src, dst) + + def predecessors_iter(self, node): + if not node in self._nodes_pred: + return + for n_pred in self._nodes_pred[node]: + yield n_pred + + def predecessors(self, node): + return [x for x in self.predecessors_iter(node)] + + def successors_iter(self, node): + if not node in self._nodes_succ: + return + for n_suc in self._nodes_succ[node]: + yield n_suc + + def successors(self, node): + return [x for x in self.successors_iter(node)] + + def leaves_iter(self): + for node in self._nodes: + if not self._nodes_succ[node]: + yield node + + def leaves(self): + return [x for x in self.leaves_iter()] + + def heads_iter(self): + for node in self._nodes: + if not self._nodes_pred[node]: + yield node + + def heads(self): + return [x for x in self.heads_iter()] + + def find_path(self, src, dst, cycles_count=0, done=None): + """ + Searches for paths from @src to @dst + @src: loc_key of basic block from which it should start + @dst: loc_key of basic block where it should stop + @cycles_count: maximum number of times a basic block can be processed + @done: dictionary of already processed loc_keys, it's value is number of times it was processed + @out: list of paths from @src to @dst + """ + if done is None: + done = {} + if dst in done and done[dst] > cycles_count: + return [[]] + if src == dst: + return [[src]] + out = [] + for node in self.predecessors(dst): + done_n = dict(done) + done_n[dst] = done_n.get(dst, 0) + 1 + for path in self.find_path(src, node, cycles_count, done_n): + if path and path[0] == src: + out.append(path + [dst]) + return out + + def find_path_from_src(self, src, dst, cycles_count=0, done=None): + """ + This function does the same as function find_path. + But it searches the paths from src to dst, not vice versa like find_path. + This approach might be more efficient in some cases. + @src: loc_key of basic block from which it should start + @dst: loc_key of basic block where it should stop + @cycles_count: maximum number of times a basic block can be processed + @done: dictionary of already processed loc_keys, it's value is number of times it was processed + @out: list of paths from @src to @dst + """ + + if done is None: + done = {} + if src == dst: + return [[src]] + if src in done and done[src] > cycles_count: + return [[]] + out = [] + for node in self.successors(src): + done_n = dict(done) + done_n[src] = done_n.get(src, 0) + 1 + for path in self.find_path_from_src(node, dst, cycles_count, done_n): + if path and path[len(path)-1] == dst: + out.append([src] + path) + return out + + def nodeid(self, node): + """ + Returns uniq id for a @node + @node: a node of the graph + """ + return hash(node) & 0xFFFFFFFFFFFFFFFF + + def node2lines(self, node): + """ + Returns an iterator on cells of the dot @node. + A DotCellDescription or a list of DotCellDescription are accepted + @node: a node of the graph + """ + yield self.DotCellDescription(text=str(node), attr={}) + + def node_attr(self, node): + """ + Returns a dictionary of the @node's attributes + @node: a node of the graph + """ + return {} + + def edge_attr(self, src, dst): + """ + Return a dictionary of attributes for the edge between @src and @dst + @src: the source node of the edge + @dst: the destination node of the edge + """ + return {} + + @staticmethod + def _fix_chars(token): + return "&#%04d;" % ord(token.group()) + + @staticmethod + def _attr2str(default_attr, attr): + return ' '.join( + '%s="%s"' % (name, value) + for name, value in + viewitems(dict(default_attr, + **attr)) + ) + + def dot(self): + """Render dot graph with HTML""" + + escape_chars = re.compile('[' + re.escape('{}') + '&|<>' + ']') + td_attr = {'align': 'left'} + nodes_attr = {'shape': 'Mrecord', + 'fontname': 'Courier New'} + + out = ["digraph asm_graph {"] + + # Generate basic nodes + out_nodes = [] + for node in self.nodes(): + node_id = self.nodeid(node) + out_node = '%s [\n' % node_id + out_node += self._attr2str(nodes_attr, self.node_attr(node)) + out_node += 'label =<' + + node_html_lines = [] + + for lineDesc in self.node2lines(node): + out_render = "" + if isinstance(lineDesc, self.DotCellDescription): + lineDesc = [lineDesc] + for col in lineDesc: + out_render += "" % ( + self._attr2str(td_attr, col.attr), + escape_chars.sub(self._fix_chars, str(col.text))) + node_html_lines.append(out_render) + + node_html_lines = ('' + + ('').join(node_html_lines) + + '') + + out_node += node_html_lines + "
%s
> ];" + out_nodes.append(out_node) + + out += out_nodes + + # Generate links + for src, dst in self.edges(): + attrs = self.edge_attr(src, dst) + + attrs = ' '.join( + '%s="%s"' % (name, value) + for name, value in viewitems(attrs) + ) + + out.append('%s -> %s' % (self.nodeid(src), self.nodeid(dst)) + + '[' + attrs + '];') + + out.append("}") + return '\n'.join(out) + + @staticmethod + def _reachable_nodes(head, next_cb): + """Generic algorithm to compute all nodes reachable from/to node + @head""" + + todo = set([head]) + reachable = set() + while todo: + node = todo.pop() + if node in reachable: + continue + reachable.add(node) + yield node + for next_node in next_cb(node): + todo.add(next_node) + + def predecessors_stop_node_iter(self, node, head): + if node == head: + return + for next_node in self.predecessors_iter(node): + yield next_node + + def reachable_sons(self, head): + """Compute all nodes reachable from node @head. Each son is an + immediate successor of an arbitrary, already yielded son of @head""" + return self._reachable_nodes(head, self.successors_iter) + + def reachable_parents(self, leaf): + """Compute all parents of node @leaf. Each parent is an immediate + predecessor of an arbitrary, already yielded parent of @leaf""" + return self._reachable_nodes(leaf, self.predecessors_iter) + + def reachable_parents_stop_node(self, leaf, head): + """Compute all parents of node @leaf. Each parent is an immediate + predecessor of an arbitrary, already yielded parent of @leaf. + Do not compute reachables past @head node""" + return self._reachable_nodes( + leaf, + lambda node_cur: self.predecessors_stop_node_iter( + node_cur, head + ) + ) + + + @staticmethod + def _compute_generic_dominators(head, reachable_cb, prev_cb, next_cb): + """Generic algorithm to compute either the dominators or postdominators + of the graph. + @head: the head/leaf of the graph + @reachable_cb: sons/parents of the head/leaf + @prev_cb: return predecessors/successors of a node + @next_cb: return successors/predecessors of a node + """ + + nodes = set(reachable_cb(head)) + dominators = {} + for node in nodes: + dominators[node] = set(nodes) + + dominators[head] = set([head]) + todo = set(nodes) + + while todo: + node = todo.pop() + + # Heads state must not be changed + if node == head: + continue + + # Compute intersection of all predecessors'dominators + new_dom = None + for pred in prev_cb(node): + if not pred in nodes: + continue + if new_dom is None: + new_dom = set(dominators[pred]) + new_dom.intersection_update(dominators[pred]) + + # We are not a head to we have at least one dominator + assert(new_dom is not None) + + new_dom.update(set([node])) + + # If intersection has changed, add sons to the todo list + if new_dom == dominators[node]: + continue + + dominators[node] = new_dom + for succ in next_cb(node): + todo.add(succ) + return dominators + + def compute_dominators(self, head): + """Compute the dominators of the graph""" + return self._compute_generic_dominators(head, + self.reachable_sons, + self.predecessors_iter, + self.successors_iter) + + def compute_postdominators(self, leaf): + """Compute the postdominators of the graph""" + return self._compute_generic_dominators(leaf, + self.reachable_parents, + self.successors_iter, + self.predecessors_iter) + + + + + def compute_dominator_tree(self, head): + """ + Computes the dominator tree of a graph + :param head: head of graph + :return: DiGraph + """ + idoms = self.compute_immediate_dominators(head) + dominator_tree = DiGraph() + for node in idoms: + dominator_tree.add_edge(idoms[node], node) + + return dominator_tree + + @staticmethod + def _walk_generic_dominator(node, gen_dominators, succ_cb): + """Generic algorithm to return an iterator of the ordered list of + @node's dominators/post_dominator. + + The function doesn't return the self reference in dominators. + @node: The start node + @gen_dominators: The dictionary containing at least node's + dominators/post_dominators + @succ_cb: return predecessors/successors of a node + + """ + # Init + done = set() + if node not in gen_dominators: + # We are in a branch which doesn't reach head + return + node_gen_dominators = set(gen_dominators[node]) + todo = set([node]) + + # Avoid working on itself + node_gen_dominators.remove(node) + + # For each level + while node_gen_dominators: + new_node = None + + # Worklist pattern + while todo: + node = todo.pop() + if node in done: + continue + if node in node_gen_dominators: + new_node = node + break + + # Avoid loops + done.add(node) + + # Look for the next level + for pred in succ_cb(node): + todo.add(pred) + + # Return the node; it's the next starting point + assert(new_node is not None) + yield new_node + node_gen_dominators.remove(new_node) + todo = set([new_node]) + + def walk_dominators(self, node, dominators): + """Return an iterator of the ordered list of @node's dominators + The function doesn't return the self reference in dominators. + @node: The start node + @dominators: The dictionary containing at least node's dominators + """ + return self._walk_generic_dominator(node, + dominators, + self.predecessors_iter) + + def walk_postdominators(self, node, postdominators): + """Return an iterator of the ordered list of @node's postdominators + The function doesn't return the self reference in postdominators. + @node: The start node + @postdominators: The dictionary containing at least node's + postdominators + + """ + return self._walk_generic_dominator(node, + postdominators, + self.successors_iter) + + def compute_immediate_dominators(self, head): + """Compute the immediate dominators of the graph""" + dominators = self.compute_dominators(head) + idoms = {} + + for node in dominators: + for predecessor in self.walk_dominators(node, dominators): + if predecessor in dominators[node] and node != predecessor: + idoms[node] = predecessor + break + return idoms + + def compute_immediate_postdominators(self,tail): + """Compute the immediate postdominators of the graph""" + postdominators = self.compute_postdominators(tail) + ipdoms = {} + + for node in postdominators: + for successor in self.walk_postdominators(node, postdominators): + if successor in postdominators[node] and node != successor: + ipdoms[node] = successor + break + return ipdoms + + def compute_dominance_frontier(self, head): + """ + Compute the dominance frontier of the graph + + Source: Cooper, Keith D., Timothy J. Harvey, and Ken Kennedy. + "A simple, fast dominance algorithm." + Software Practice & Experience 4 (2001), p. 9 + """ + idoms = self.compute_immediate_dominators(head) + frontier = {} + + for node in idoms: + if len(self._nodes_pred[node]) >= 2: + for predecessor in self.predecessors_iter(node): + runner = predecessor + if runner not in idoms: + continue + while runner != idoms[node]: + if runner not in frontier: + frontier[runner] = set() + + frontier[runner].add(node) + runner = idoms[runner] + return frontier + + def _walk_generic_first(self, head, flag, succ_cb): + """ + Generic algorithm to compute breadth or depth first search + for a node. + @head: the head of the graph + @flag: denotes if @todo is used as queue or stack + @succ_cb: returns a node's predecessors/successors + :return: next node + """ + todo = [head] + done = set() + + while todo: + node = todo.pop(flag) + if node in done: + continue + done.add(node) + + for succ in succ_cb(node): + todo.append(succ) + + yield node + + def walk_breadth_first_forward(self, head): + """Performs a breadth first search on the graph from @head""" + return self._walk_generic_first(head, 0, self.successors_iter) + + def walk_depth_first_forward(self, head): + """Performs a depth first search on the graph from @head""" + return self._walk_generic_first(head, -1, self.successors_iter) + + def walk_breadth_first_backward(self, head): + """Performs a breadth first search on the reversed graph from @head""" + return self._walk_generic_first(head, 0, self.predecessors_iter) + + def walk_depth_first_backward(self, head): + """Performs a depth first search on the reversed graph from @head""" + return self._walk_generic_first(head, -1, self.predecessors_iter) + + def has_loop(self): + """Return True if the graph contains at least a cycle""" + todo = list(self.nodes()) + # tested nodes + done = set() + # current DFS nodes + current = set() + while todo: + node = todo.pop() + if node in done: + continue + + if node in current: + # DFS branch end + for succ in self.successors_iter(node): + if succ in current: + return True + # A node cannot be in current AND in done + current.remove(node) + done.add(node) + else: + # Launch DFS from node + todo.append(node) + current.add(node) + todo += self.successors(node) + + return False + + def compute_natural_loops(self, head): + """ + Computes all natural loops in the graph. + + Source: Aho, Alfred V., Lam, Monica S., Sethi, R. and Jeffrey Ullman. + "Compilers: Principles, Techniques, & Tools, Second Edition" + Pearson/Addison Wesley (2007), Chapter 9.6.6 + :param head: head of the graph + :return: yield a tuple of the form (back edge, loop body) + """ + for a, b in self.compute_back_edges(head): + body = self._compute_natural_loop_body(b, a) + yield ((a, b), body) + + def compute_back_edges(self, head): + """ + Computes all back edges from a node to a + dominator in the graph. + :param head: head of graph + :return: yield a back edge + """ + dominators = self.compute_dominators(head) + + # traverse graph + for node in self.walk_depth_first_forward(head): + for successor in self.successors_iter(node): + # check for a back edge to a dominator + if successor in dominators[node]: + edge = (node, successor) + yield edge + + def _compute_natural_loop_body(self, head, leaf): + """ + Computes the body of a natural loop by a depth-first + search on the reversed control flow graph. + :param head: leaf of the loop + :param leaf: header of the loop + :return: set containing loop body + """ + todo = [leaf] + done = {head} + + while todo: + node = todo.pop() + if node in done: + continue + done.add(node) + + for predecessor in self.predecessors_iter(node): + todo.append(predecessor) + return done + + def compute_strongly_connected_components(self): + """ + Partitions the graph into strongly connected components. + + Iterative implementation of Gabow's path-based SCC algorithm. + Source: Gabow, Harold N. + "Path-based depth-first search for strong and biconnected components." + Information Processing Letters 74.3 (2000), pp. 109--110 + + The iterative implementation is inspired by Mark Dickinson's + code: + http://code.activestate.com/recipes/ + 578507-strongly-connected-components-of-a-directed-graph/ + :return: yield a strongly connected component + """ + stack = [] + boundaries = [] + counter = len(self.nodes()) + + # init index with 0 + index = {v: 0 for v in self.nodes()} + + # state machine for worklist algorithm + VISIT, HANDLE_RECURSION, MERGE = 0, 1, 2 + NodeState = namedtuple('NodeState', ['state', 'node']) + + for node in self.nodes(): + # next node if node was already visited + if index[node]: + continue + + todo = [NodeState(VISIT, node)] + done = set() + + while todo: + current = todo.pop() + + if current.node in done: + continue + + # node is unvisited + if current.state == VISIT: + stack.append(current.node) + index[current.node] = len(stack) + boundaries.append(index[current.node]) + + todo.append(NodeState(MERGE, current.node)) + # follow successors + for successor in self.successors_iter(current.node): + todo.append(NodeState(HANDLE_RECURSION, successor)) + + # iterative handling of recursion algorithm + elif current.state == HANDLE_RECURSION: + # visit unvisited successor + if index[current.node] == 0: + todo.append(NodeState(VISIT, current.node)) + else: + # contract cycle if necessary + while index[current.node] < boundaries[-1]: + boundaries.pop() + + # merge strongly connected component + else: + if index[current.node] == boundaries[-1]: + boundaries.pop() + counter += 1 + scc = set() + + while index[current.node] <= len(stack): + popped = stack.pop() + index[popped] = counter + scc.add(popped) + + done.add(current.node) + + yield scc + + +class DiGraphSimplifier(object): + + """Wrapper on graph simplification passes. + + Instance handle passes lists. + """ + + def __init__(self): + self.passes = [] + + def enable_passes(self, passes): + """Add @passes to passes to applied + @passes: sequence of function (DiGraphSimplifier, DiGraph) -> None + """ + self.passes += passes + + def apply_simp(self, graph): + """Apply enabled simplifications on graph @graph + @graph: DiGraph instance + """ + while True: + new_graph = graph.copy() + for simp_func in self.passes: + simp_func(self, new_graph) + + if new_graph == graph: + break + graph = new_graph + return new_graph + + def __call__(self, graph): + """Wrapper on 'apply_simp'""" + return self.apply_simp(graph) + + +class MatchGraphJoker(object): + + """MatchGraphJoker are joker nodes of MatchGraph, that is to say nodes which + stand for any node. Restrictions can be added to jokers. + + If j1, j2 and j3 are MatchGraphJoker, one can quickly build a matcher for + the pattern: + | + +----v----+ + | (j1) | + +----+----+ + | + +----v----+ + | (j2) |<---+ + +----+--+-+ | + | +------+ + +----v----+ + | (j3) | + +----+----+ + | + v + Using: + >>> matcher = j1 >> j2 >> j3 + >>> matcher += j2 >> j2 + Or: + >>> matcher = j1 >> j2 >> j2 >> j3 + + """ + + def __init__(self, restrict_in=True, restrict_out=True, filt=None, + name=None): + """Instantiate a MatchGraphJoker, with restrictions + @restrict_in: (optional) if set, the number of predecessors of the + matched node must be the same than the joker node in the + associated MatchGraph + @restrict_out: (optional) counterpart of @restrict_in for successors + @filt: (optional) function(graph, node) -> boolean for filtering + candidate node + @name: (optional) helper for displaying the current joker + """ + if filt is None: + filt = lambda graph, node: True + self.filt = filt + if name is None: + name = str(id(self)) + self._name = name + self.restrict_in = restrict_in + self.restrict_out = restrict_out + + def __rshift__(self, joker): + """Helper for describing a MatchGraph from @joker + J1 >> J2 stands for an edge going to J2 from J1 + @joker: MatchGraphJoker instance + """ + assert isinstance(joker, MatchGraphJoker) + + graph = MatchGraph() + graph.add_node(self) + graph.add_node(joker) + graph.add_edge(self, joker) + + # For future "A >> B" idiom construction + graph._last_node = joker + + return graph + + def __str__(self): + info = [] + if not self.restrict_in: + info.append("In:*") + if not self.restrict_out: + info.append("Out:*") + return "Joker %s %s" % (self._name, + "(%s)" % " ".join(info) if info else "") + + +class MatchGraph(DiGraph): + + """MatchGraph intends to be the counterpart of match_expr, but for DiGraph + + This class provides API to match a given DiGraph pattern, with addidionnal + restrictions. + The implemented algorithm is a naive approach. + + The recommended way to instantiate a MatchGraph is the use of + MatchGraphJoker. + """ + + def __init__(self, *args, **kwargs): + super(MatchGraph, self).__init__(*args, **kwargs) + # Construction helper + self._last_node = None + + # Construction helpers + def __rshift__(self, joker): + """Construction helper, adding @joker to the current graph as a son of + _last_node + @joker: MatchGraphJoker instance""" + assert isinstance(joker, MatchGraphJoker) + assert isinstance(self._last_node, MatchGraphJoker) + + self.add_node(joker) + self.add_edge(self._last_node, joker) + self._last_node = joker + return self + + def __add__(self, graph): + """Construction helper, merging @graph with self + @graph: MatchGraph instance + """ + assert isinstance(graph, MatchGraph) + + # Reset helpers flag + self._last_node = None + graph._last_node = None + + # Merge graph into self + for node in graph.nodes(): + self.add_node(node) + for edge in graph.edges(): + self.add_edge(*edge) + + return self + + # Graph matching + def _check_node(self, candidate, expected, graph, partial_sol=None): + """Check if @candidate can stand for @expected in @graph, given @partial_sol + @candidate: @graph's node + @expected: MatchGraphJoker instance + @graph: DiGraph instance + @partial_sol: (optional) dictionary of MatchGraphJoker -> @graph's node + standing for a partial solution + """ + # Avoid having 2 different joker for the same node + if partial_sol and candidate in viewvalues(partial_sol): + return False + + # Check lambda filtering + if not expected.filt(graph, candidate): + return False + + # Check arity + # If filter_in/out, then arity must be the same + # Otherwise, arity of the candidate must be at least equal + if ((expected.restrict_in == True and + len(self.predecessors(expected)) != len(graph.predecessors(candidate))) or + (expected.restrict_in == False and + len(self.predecessors(expected)) > len(graph.predecessors(candidate)))): + return False + if ((expected.restrict_out == True and + len(self.successors(expected)) != len(graph.successors(candidate))) or + (expected.restrict_out == False and + len(self.successors(expected)) > len(graph.successors(candidate)))): + return False + + # Check edges with partial solution if any + if not partial_sol: + return True + for pred in self.predecessors(expected): + if (pred in partial_sol and + partial_sol[pred] not in graph.predecessors(candidate)): + return False + + for succ in self.successors(expected): + if (succ in partial_sol and + partial_sol[succ] not in graph.successors(candidate)): + return False + + # All checks OK + return True + + def _propagate_sol(self, node, partial_sol, graph, todo, propagator): + """ + Try to extend the current @partial_sol by propagating the solution using + @propagator on @node. + New solutions are added to @todo + """ + real_node = partial_sol[node] + for candidate in propagator(self, node): + # Edge already in the partial solution, skip it + if candidate in partial_sol: + continue + + # Check candidate + for candidate_real in propagator(graph, real_node): + if self._check_node(candidate_real, candidate, graph, + partial_sol): + temp_sol = partial_sol.copy() + temp_sol[candidate] = candidate_real + if temp_sol not in todo: + todo.append(temp_sol) + + @staticmethod + def _propagate_successors(graph, node): + """Propagate through @node successors in @graph""" + return graph.successors_iter(node) + + @staticmethod + def _propagate_predecessors(graph, node): + """Propagate through @node predecessors in @graph""" + return graph.predecessors_iter(node) + + def match(self, graph): + """Naive subgraph matching between graph and self. + Iterator on matching solution, as dictionary MatchGraphJoker -> @graph + @graph: DiGraph instance + In order to obtained correct and complete results, @graph must be + connected. + """ + # Partial solution: nodes corrects, edges between these nodes corrects + # A partial solution is a dictionary MatchGraphJoker -> @graph's node + todo = list() # Dictionnaries containing partial solution + done = list() # Already computed partial solutions + + # Elect first candidates + to_match = next(iter(self._nodes)) + for node in graph.nodes(): + if self._check_node(node, to_match, graph): + to_add = {to_match: node} + if to_add not in todo: + todo.append(to_add) + + while todo: + # When a partial_sol is computed, if more precise partial solutions + # are found, they will be added to 'todo' + # -> using last entry of todo first performs a "depth first" + # approach on solutions + # -> the algorithm may converge faster to a solution, a desired + # behavior while doing graph simplification (stopping after one + # sol) + partial_sol = todo.pop() + + # Avoid infinite loop and recurrent work + if partial_sol in done: + continue + done.append(partial_sol) + + # If all nodes are matching, this is a potential solution + if len(partial_sol) == len(self._nodes): + yield partial_sol + continue + + # Find node to tests using edges + for node in partial_sol: + self._propagate_sol(node, partial_sol, graph, todo, + MatchGraph._propagate_successors) + self._propagate_sol(node, partial_sol, graph, todo, + MatchGraph._propagate_predecessors) diff --git a/miasm/core/interval.py b/miasm/core/interval.py new file mode 100644 index 00000000..06dc546f --- /dev/null +++ b/miasm/core/interval.py @@ -0,0 +1,259 @@ +from __future__ import print_function + +INT_EQ = 0 # Equivalent +INT_B_IN_A = 1 # B in A +INT_A_IN_B = -1 # A in B +INT_DISJOIN = 2 # Disjoint +INT_JOIN = 3 # Overlap +INT_JOIN_AB = 4 # B starts at the end of A +INT_JOIN_BA = 5 # A starts at the end of B + + +def cmp_interval(inter1, inter2): + """Compare @inter1 and @inter2 and returns the associated INT_* case + @inter1, @inter2: interval instance + """ + if inter1 == inter2: + return INT_EQ + + inter1_start, inter1_stop = inter1 + inter2_start, inter2_stop = inter2 + result = INT_JOIN + if inter1_start <= inter2_start and inter1_stop >= inter2_stop: + result = INT_B_IN_A + if inter2_start <= inter1_start and inter2_stop >= inter1_stop: + result = INT_A_IN_B + if inter1_stop + 1 == inter2_start: + result = INT_JOIN_AB + if inter2_stop + 1 == inter1_start: + result = INT_JOIN_BA + if inter1_start > inter2_stop + 1 or inter2_start > inter1_stop + 1: + result = INT_DISJOIN + return result + + +class interval(object): + """Stands for intervals with integer bounds + + Offers common methods to work with interval""" + + def __init__(self, bounds=None): + """Instance an interval object + @bounds: (optional) list of (int, int) and/or interval instance + """ + if bounds is None: + bounds = [] + elif isinstance(bounds, interval): + bounds = bounds.intervals + self.is_cannon = False + self.intervals = bounds + self.cannon() + + def __iter__(self): + """Iterate on intervals""" + for inter in self.intervals: + yield inter + + @staticmethod + def cannon_list(tmp): + """ + Return a cannonizes list of intervals + @tmp: list of (int, int) + """ + tmp = sorted([x for x in tmp if x[0] <= x[1]]) + out = [] + if not tmp: + return out + out.append(tmp.pop()) + while tmp: + x = tmp.pop() + rez = cmp_interval(out[-1], x) + + if rez == INT_EQ: + continue + elif rez == INT_DISJOIN: + out.append(x) + elif rez == INT_B_IN_A: + continue + elif rez in [INT_JOIN, INT_JOIN_AB, INT_JOIN_BA, INT_A_IN_B]: + u, v = x + while out and cmp_interval(out[-1], (u, v)) in [ + INT_JOIN, INT_JOIN_AB, INT_JOIN_BA, INT_A_IN_B]: + u = min(u, out[-1][0]) + v = max(v, out[-1][1]) + out.pop() + out.append((u, v)) + else: + raise ValueError('unknown state', rez) + return out[::-1] + + def cannon(self): + "Apply .cannon_list() on self contained intervals" + if self.is_cannon is True: + return + self.intervals = interval.cannon_list(self.intervals) + self.is_cannon = True + + def __repr__(self): + if self.intervals: + o = " U ".join(["[0x%X 0x%X]" % (x[0], x[1]) + for x in self.intervals]) + else: + o = "[]" + return o + + def __contains__(self, other): + if isinstance(other, interval): + for intervalB in other.intervals: + is_in = False + for intervalA in self.intervals: + if cmp_interval(intervalA, intervalB) in [INT_EQ, INT_B_IN_A]: + is_in = True + break + if not is_in: + return False + return True + else: + for intervalA in self.intervals: + if intervalA[0] <= other <= intervalA[1]: + return True + return False + + def __eq__(self, i): + return self.intervals == i.intervals + + def __ne__(self, other): + return not self.__eq__(other) + + def __add__(self, i): + if isinstance(i, interval): + i = i.intervals + i = interval(self.intervals + i) + return i + + def __sub__(self, v): + to_test = self.intervals[:] + i = -1 + to_del = v.intervals[:] + while i < len(to_test) - 1: + i += 1 + x = to_test[i] + if x[0] > x[1]: + del to_test[i] + i -= 1 + continue + + while to_del and to_del[0][1] < x[0]: + del to_del[0] + + for y in to_del: + if y[0] > x[1]: + break + rez = cmp_interval(x, y) + if rez == INT_DISJOIN: + continue + elif rez == INT_EQ: + del to_test[i] + i -= 1 + break + elif rez == INT_A_IN_B: + del to_test[i] + i -= 1 + break + elif rez == INT_B_IN_A: + del to_test[i] + i1 = (x[0], y[0] - 1) + i2 = (y[1] + 1, x[1]) + to_test[i:i] = [i1, i2] + i -= 1 + break + elif rez in [INT_JOIN_AB, INT_JOIN_BA]: + continue + elif rez == INT_JOIN: + del to_test[i] + if x[0] < y[0]: + to_test[i:i] = [(x[0], y[0] - 1)] + else: + to_test[i:i] = [(y[1] + 1, x[1])] + i -= 1 + break + else: + raise ValueError('unknown state', rez) + return interval(to_test) + + def __and__(self, v): + out = [] + for x in self.intervals: + if x[0] > x[1]: + continue + for y in v.intervals: + rez = cmp_interval(x, y) + + if rez == INT_DISJOIN: + continue + elif rez == INT_EQ: + out.append(x) + continue + elif rez == INT_A_IN_B: + out.append(x) + continue + elif rez == INT_B_IN_A: + out.append(y) + continue + elif rez == INT_JOIN_AB: + continue + elif rez == INT_JOIN_BA: + continue + elif rez == INT_JOIN: + if x[0] < y[0]: + out.append((y[0], x[1])) + else: + out.append((x[0], y[1])) + continue + else: + raise ValueError('unknown state', rez) + return interval(out) + + def hull(self): + "Return the first and the last bounds of intervals" + if not self.intervals: + return None, None + return self.intervals[0][0], self.intervals[-1][1] + + + @property + def empty(self): + """Return True iff the interval is empty""" + return not self.intervals + + def show(self, img_x=1350, img_y=20, dry_run=False): + """ + show image representing the interval + """ + try: + import Image + import ImageDraw + except ImportError: + print('cannot import python PIL imaging') + return + + img = Image.new('RGB', (img_x, img_y), (100, 100, 100)) + draw = ImageDraw.Draw(img) + i_min, i_max = self.hull() + + print(hex(i_min), hex(i_max)) + + addr2x = lambda addr: ((addr - i_min) * img_x) // (i_max - i_min) + for a, b in self.intervals: + draw.rectangle((addr2x(a), 0, addr2x(b), img_y), (200, 0, 0)) + + if dry_run is False: + img.show() + + @property + def length(self): + """ + Return the cumulated length of intervals + """ + # Do not use __len__ because we may return a value > 32 bits + return sum((stop - start + 1) for start, stop in self.intervals) diff --git a/miasm/core/locationdb.py b/miasm/core/locationdb.py new file mode 100644 index 00000000..dd336752 --- /dev/null +++ b/miasm/core/locationdb.py @@ -0,0 +1,500 @@ +import warnings +from builtins import int as int_types + +from functools import reduce +from future.utils import viewitems, viewvalues + +from miasm.core.utils import printable, force_bytes +from miasm.expression.expression import LocKey, ExprLoc +from miasm.expression.modint import moduint, modint + + +def is_int(a): + return isinstance(a, (int_types, moduint, modint)) + + +class LocationDB(object): + """ + LocationDB is a "database" of information associated to location. + + An entry in a LocationDB is uniquely identified with a LocKey. + Additional information which can be associated with a LocKey are: + - an offset (uniq per LocationDB) + - several names (each are uniqs per LocationDB) + + As a schema: + loc_key 1 <-> 0..1 offset + 1 <-> 0..n name + + >>> loc_db = LocationDB() + # Add a location with no additional information + >>> loc_key1 = loc_db.add_location() + # Add a location with an offset + >>> loc_key2 = loc_db.add_location(offset=0x1234) + # Add a location with several names + >>> loc_key3 = loc_db.add_location(name="first_name") + >>> loc_db.add_location_name(loc_key3, "second_name") + # Associate an offset to an existing location + >>> loc_db.set_location_offset(loc_key3, 0x5678) + # Remove a name from an existing location + >>> loc_db.remove_location_name(loc_key3, "second_name") + + # Get back offset + >>> loc_db.get_location_offset(loc_key1) + None + >>> loc_db.get_location_offset(loc_key2) + 0x1234 + >>> loc_db.get_location_offset("first_name") + 0x5678 + + # Display a location + >>> loc_db.pretty_str(loc_key1) + loc_key_1 + >>> loc_db.pretty_str(loc_key2) + loc_1234 + >>> loc_db.pretty_str(loc_key3) + first_name + """ + + def __init__(self): + # Known LocKeys + self._loc_keys = set() + + # Association tables + self._loc_key_to_offset = {} + self._loc_key_to_names = {} + self._name_to_loc_key = {} + self._offset_to_loc_key = {} + + # Counter for new LocKey generation + self._loc_key_num = 0 + + def get_location_offset(self, loc_key): + """ + Return the offset of @loc_key if any, None otherwise. + @loc_key: LocKey instance + """ + assert isinstance(loc_key, LocKey) + return self._loc_key_to_offset.get(loc_key) + + def get_location_names(self, loc_key): + """ + Return the frozenset of names associated to @loc_key + @loc_key: LocKey instance + """ + assert isinstance(loc_key, LocKey) + return frozenset(self._loc_key_to_names.get(loc_key, set())) + + def get_name_location(self, name): + """ + Return the LocKey of @name if any, None otherwise. + @name: target name + """ + name = force_bytes(name) + return self._name_to_loc_key.get(name) + + def get_or_create_name_location(self, name): + """ + Return the LocKey of @name if any, create one otherwise. + @name: target name + """ + name = force_bytes(name) + loc_key = self._name_to_loc_key.get(name) + if loc_key is not None: + return loc_key + return self.add_location(name=name) + + def get_offset_location(self, offset): + """ + Return the LocKey of @offset if any, None otherwise. + @offset: target offset + """ + return self._offset_to_loc_key.get(offset) + + def get_or_create_offset_location(self, offset): + """ + Return the LocKey of @offset if any, create one otherwise. + @offset: target offset + """ + loc_key = self._offset_to_loc_key.get(offset) + if loc_key is not None: + return loc_key + return self.add_location(offset=offset) + + def get_name_offset(self, name): + """ + Return the offset of @name if any, None otherwise. + @name: target name + """ + name = force_bytes(name) + loc_key = self.get_name_location(name) + if loc_key is None: + return None + return self.get_location_offset(loc_key) + + def add_location_name(self, loc_key, name): + """Associate a name @name to a given @loc_key + @name: str instance + @loc_key: LocKey instance + """ + name = force_bytes(name) + assert loc_key in self._loc_keys + already_existing_loc = self._name_to_loc_key.get(name) + if already_existing_loc is not None and already_existing_loc != loc_key: + raise KeyError("%r is already associated to a different loc_key " + "(%r)" % (name, already_existing_loc)) + self._loc_key_to_names.setdefault(loc_key, set()).add(name) + self._name_to_loc_key[name] = loc_key + + def remove_location_name(self, loc_key, name): + """Disassociate a name @name from a given @loc_key + Fail if @name is not already associated to @loc_key + @name: str instance + @loc_key: LocKey instance + """ + assert loc_key in self._loc_keys + name = force_bytes(name) + already_existing_loc = self._name_to_loc_key.get(name) + if already_existing_loc is None: + raise KeyError("%r is not already associated" % name) + if already_existing_loc != loc_key: + raise KeyError("%r is already associated to a different loc_key " + "(%r)" % (name, already_existing_loc)) + del self._name_to_loc_key[name] + self._loc_key_to_names[loc_key].remove(name) + + def set_location_offset(self, loc_key, offset, force=False): + """Associate the offset @offset to an LocKey @loc_key + + If @force is set, override silently. Otherwise, if an offset is already + associated to @loc_key, an error will be raised + """ + assert loc_key in self._loc_keys + already_existing_loc = self.get_offset_location(offset) + if already_existing_loc is not None and already_existing_loc != loc_key: + raise KeyError("%r is already associated to a different loc_key " + "(%r)" % (offset, already_existing_loc)) + already_existing_off = self._loc_key_to_offset.get(loc_key) + if (already_existing_off is not None and + already_existing_off != offset): + if not force: + raise ValueError( + "%r already has an offset (0x%x). Use 'force=True'" + " for silent overriding" % ( + loc_key, already_existing_off + )) + else: + self.unset_location_offset(loc_key) + self._offset_to_loc_key[offset] = loc_key + self._loc_key_to_offset[loc_key] = offset + + def unset_location_offset(self, loc_key): + """Disassociate LocKey @loc_key's offset + + Fail if there is already no offset associate with it + @loc_key: LocKey + """ + assert loc_key in self._loc_keys + already_existing_off = self._loc_key_to_offset.get(loc_key) + if already_existing_off is None: + raise ValueError("%r already has no offset" % (loc_key)) + del self._offset_to_loc_key[already_existing_off] + del self._loc_key_to_offset[loc_key] + + def consistency_check(self): + """Ensure internal structures are consistent with each others""" + assert set(self._loc_key_to_names).issubset(self._loc_keys) + assert set(self._loc_key_to_offset).issubset(self._loc_keys) + assert self._loc_key_to_offset == {v: k for k, v in viewitems(self._offset_to_loc_key)} + assert reduce( + lambda x, y:x.union(y), + viewvalues(self._loc_key_to_names), + set(), + ) == set(self._name_to_loc_key) + for name, loc_key in viewitems(self._name_to_loc_key): + assert name in self._loc_key_to_names[loc_key] + + def find_free_name(self, name): + """ + If @name is not known in DB, return it + Else append an index to it corresponding to the next unknown name + + @name: string + """ + name = force_bytes(name) + if self.get_name_location(name) is None: + return name + i = 0 + while True: + new_name = "%s_%d" % (name, i) + if self.get_name_location(new_name) is None: + return new_name + i += 1 + + def add_location(self, name=None, offset=None, strict=True): + """Add a new location in the locationDB. Returns the corresponding LocKey. + If @name is set, also associate a name to this new location. + If @offset is set, also associate an offset to this new location. + + Strict mode (set by @strict, default): + If a location with @offset or @name already exists, an error will be + raised. + Otherwise: + If a location with @offset or @name already exists, the corresponding + LocKey may be updated and will be returned. + """ + + name = force_bytes(name) + # Deprecation handling + if is_int(name): + assert offset is None or offset == name + warnings.warn("Deprecated API: use 'add_location(offset=)' instead." + " An additional 'name=' can be provided to also " + "associate a name (there is no more default name)") + offset = name + name = None + + # Argument cleaning + offset_loc_key = None + if offset is not None: + offset = int(offset) + offset_loc_key = self.get_offset_location(offset) + + # Test for collisions + name_loc_key = None + if name is not None: + name_loc_key = self.get_name_location(name) + + if strict: + if name_loc_key is not None: + raise ValueError("An entry for %r already exists (%r), and " + "strict mode is enabled" % ( + name, name_loc_key + )) + if offset_loc_key is not None: + raise ValueError("An entry for 0x%x already exists (%r), and " + "strict mode is enabled" % ( + offset, offset_loc_key + )) + else: + # Non-strict mode + if name_loc_key is not None: + known_offset = self.get_offset_location(name_loc_key) + if known_offset is None: + if offset is not None: + self.set_location_offset(name_loc_key, offset) + elif known_offset != offset: + raise ValueError( + "Location with name '%s' already have an offset: 0x%x " + "(!= 0x%x)" % (name, offset, known_offset) + ) + # Name already known, same offset -> nothing to do + return name_loc_key + + elif offset_loc_key is not None: + if name is not None: + # Check for already known name are checked above + return self.add_location_name(offset_loc_key, name) + # Offset already known, no name specified + return offset_loc_key + + # No collision, this is a brand new location + loc_key = LocKey(self._loc_key_num) + self._loc_key_num += 1 + self._loc_keys.add(loc_key) + + if offset is not None: + assert offset not in self._offset_to_loc_key + self._offset_to_loc_key[offset] = loc_key + self._loc_key_to_offset[loc_key] = offset + + if name is not None: + self._name_to_loc_key[name] = loc_key + self._loc_key_to_names[loc_key] = set([name]) + + return loc_key + + def remove_location(self, loc_key): + """ + Delete the location corresponding to @loc_key + @loc_key: LocKey instance + """ + assert isinstance(loc_key, LocKey) + if loc_key not in self._loc_keys: + raise KeyError("Unknown loc_key %r" % loc_key) + names = self._loc_key_to_names.pop(loc_key, []) + for name in names: + del self._name_to_loc_key[name] + offset = self._loc_key_to_offset.pop(loc_key, None) + self._offset_to_loc_key.pop(offset, None) + self._loc_keys.remove(loc_key) + + def pretty_str(self, loc_key): + """Return a human readable version of @loc_key, according to information + available in this LocationDB instance""" + names = self.get_location_names(loc_key) + new_names = set() + for name in names: + try: + name = name.decode() + except AttributeError: + pass + new_names.add(name) + names = new_names + if names: + return ",".join(names) + offset = self.get_location_offset(loc_key) + if offset is not None: + return "loc_%x" % offset + return str(loc_key) + + @property + def loc_keys(self): + """Return all loc_keys""" + return self._loc_keys + + @property + def names(self): + """Return all known names""" + return list(self._name_to_loc_key) + + @property + def offsets(self): + """Return all known offsets""" + return list(self._offset_to_loc_key) + + def __str__(self): + out = [] + for loc_key in self._loc_keys: + names = self.get_location_names(loc_key) + offset = self.get_location_offset(loc_key) + out.append( + "%s: %s - %s" % ( + loc_key, + "0x%x" % offset if offset is not None else None, + ",".join(printable(name) for name in names) + ) + ) + return "\n".join(out) + + def merge(self, location_db): + """Merge with another LocationDB @location_db + + WARNING: old reference to @location_db information (such as LocKeys) + must be retrieved from the updated version of this instance. The + dedicated "get_*" APIs may be used for this task + """ + # A simple merge is not doable here, because LocKey will certainly + # collides + + for foreign_loc_key in location_db.loc_keys: + foreign_names = location_db.get_location_names(foreign_loc_key) + foreign_offset = location_db.get_location_offset(foreign_loc_key) + if foreign_names: + init_name = list(foreign_names)[0] + else: + init_name = None + loc_key = self.add_location(offset=foreign_offset, name=init_name, + strict=False) + cur_names = self.get_location_names(loc_key) + for name in foreign_names: + if name not in cur_names and name != init_name: + self.add_location_name(loc_key, name=name) + + def canonize_to_exprloc(self, expr): + """ + If expr is ExprInt, return ExprLoc with corresponding loc_key + Else, return expr + + @expr: Expr instance + """ + if expr.is_int(): + loc_key = self.get_or_create_offset_location(int(expr)) + ret = ExprLoc(loc_key, expr.size) + return ret + return expr + + # Deprecated APIs + @property + def items(self): + """Return all loc_keys""" + warnings.warn('DEPRECATION WARNING: use "loc_keys" instead of "items"') + return list(self._loc_keys) + + def __getitem__(self, item): + warnings.warn('DEPRECATION WARNING: use "get_name_location" or ' + '"get_offset_location"') + if item in self._name_to_loc_key: + return self._name_to_loc_key[item] + if item in self._offset_to_loc_key: + return self._offset_to_loc_key[item] + raise KeyError('unknown symbol %r' % item) + + def __contains__(self, item): + warnings.warn('DEPRECATION WARNING: use "get_name_location" or ' + '"get_offset_location", or ".offsets" or ".names"') + return item in self._name_to_loc_key or item in self._offset_to_loc_key + + def loc_key_to_name(self, loc_key): + """[DEPRECATED API], see 'get_location_names'""" + warnings.warn("Deprecated API: use 'get_location_names'") + return sorted(self.get_location_names(loc_key))[0] + + def loc_key_to_offset(self, loc_key): + """[DEPRECATED API], see 'get_location_offset'""" + warnings.warn("Deprecated API: use 'get_location_offset'") + return self.get_location_offset(loc_key) + + def remove_loc_key(self, loc_key): + """[DEPRECATED API], see 'remove_location'""" + warnings.warn("Deprecated API: use 'remove_location'") + self.remove_location(loc_key) + + def del_loc_key_offset(self, loc_key): + """[DEPRECATED API], see 'unset_location_offset'""" + warnings.warn("Deprecated API: use 'unset_location_offset'") + self.unset_location_offset(loc_key) + + def getby_offset(self, offset): + """[DEPRECATED API], see 'get_offset_location'""" + warnings.warn("Deprecated API: use 'get_offset_location'") + return self.get_offset_location(offset) + + def getby_name(self, name): + """[DEPRECATED API], see 'get_name_location'""" + warnings.warn("Deprecated API: use 'get_name_location'") + return self.get_name_location(name) + + def getby_offset_create(self, offset): + """[DEPRECATED API], see 'get_or_create_offset_location'""" + warnings.warn("Deprecated API: use 'get_or_create_offset_location'") + return self.get_or_create_offset_location(offset) + + def getby_name_create(self, name): + """[DEPRECATED API], see 'get_or_create_name_location'""" + warnings.warn("Deprecated API: use 'get_or_create_name_location'") + return self.get_or_create_name_location(name) + + def rename_location(self, loc_key, newname): + """[DEPRECATED API], see 'add_name_location' and 'remove_location_name' + """ + warnings.warn("Deprecated API: use 'add_location_name' and " + "'remove_location_name'") + for name in self.get_location_names(loc_key): + self.remove_location_name(loc_key, name) + self.add_location_name(loc_key, name) + + def set_offset(self, loc_key, offset): + """[DEPRECATED API], see 'set_location_offset'""" + warnings.warn("Deprecated API: use 'set_location_offset'") + self.set_location_offset(loc_key, offset, force=True) + + def gen_loc_key(self): + """[DEPRECATED API], see 'add_location'""" + warnings.warn("Deprecated API: use 'add_location'") + return self.add_location() + + def str_loc_key(self, loc_key): + """[DEPRECATED API], see 'pretty_str'""" + warnings.warn("Deprecated API: use 'pretty_str'") + return self.pretty_str(loc_key) diff --git a/miasm/core/objc.py b/miasm/core/objc.py new file mode 100644 index 00000000..f7765b96 --- /dev/null +++ b/miasm/core/objc.py @@ -0,0 +1,1761 @@ +""" +C helper for Miasm: +* raw C to Miasm expression +* Miasm expression to raw C +* Miasm expression to C type +""" + +from builtins import zip +from builtins import int as int_types + +import warnings +from pycparser import c_parser, c_ast +from functools import total_ordering + +from miasm.core.utils import cmp_elts +from miasm.expression.expression_reduce import ExprReducer +from miasm.expression.expression import ExprInt, ExprId, ExprOp, ExprMem + +from miasm.core.ctypesmngr import CTypeUnion, CTypeStruct, CTypeId, CTypePtr,\ + CTypeArray, CTypeOp, CTypeSizeof, CTypeEnum, CTypeFunc, CTypeEllipsis + + +PADDING_TYPE_NAME = "___padding___" + +def missing_definition(objtype): + warnings.warn("Null size type: Missing definition? %r" % objtype) + +""" +Display C type +source: "The C Programming Language - 2nd Edition - Ritchie Kernighan.pdf" +p. 124 +""" + +def objc_to_str(objc, result=None): + if result is None: + result = "" + while True: + if isinstance(objc, ObjCArray): + result += "[%d]" % objc.elems + objc = objc.objtype + elif isinstance(objc, ObjCPtr): + if not result and isinstance(objc.objtype, ObjCFunc): + result = objc.objtype.name + if isinstance(objc.objtype, (ObjCPtr, ObjCDecl, ObjCStruct, ObjCUnion)): + result = "*%s" % result + else: + result = "(*%s)" % result + + objc = objc.objtype + elif isinstance(objc, (ObjCDecl, ObjCStruct, ObjCUnion)): + if result: + result = "%s %s" % (objc, result) + else: + result = str(objc) + break + elif isinstance(objc, ObjCFunc): + args_str = [] + for name, arg in objc.args: + args_str.append(objc_to_str(arg, name)) + args = ", ".join(args_str) + result += "(%s)" % args + objc = objc.type_ret + elif isinstance(objc, ObjCInt): + return "int" + elif isinstance(objc, ObjCEllipsis): + return "..." + else: + raise TypeError("Unknown c type") + return result + + +@total_ordering +class ObjC(object): + """Generic ObjC""" + + def __init__(self, align, size): + self._align = align + self._size = size + + @property + def align(self): + """Alignment (in bytes) of the C object""" + return self._align + + @property + def size(self): + """Size (in bytes) of the C object""" + return self._size + + def cmp_base(self, other): + assert self.__class__ in OBJC_PRIO + assert other.__class__ in OBJC_PRIO + + if OBJC_PRIO[self.__class__] != OBJC_PRIO[other.__class__]: + return cmp_elts( + OBJC_PRIO[self.__class__], + OBJC_PRIO[other.__class__] + ) + if self.align != other.align: + return cmp_elts(self.align, other.align) + return cmp_elts(self.size, other.size) + + def __hash__(self): + return hash((self.__class__, self._align, self._size)) + + def __str__(self): + return objc_to_str(self) + + def __eq__(self, other): + return self.cmp_base(other) == 0 + + def __ne__(self, other): + # required Python 2.7.14 + return not self == other + + def __lt__(self, other): + return self.cmp_base(other) < 0 + + +@total_ordering +class ObjCDecl(ObjC): + """C Declaration identified""" + + def __init__(self, name, align, size): + super(ObjCDecl, self).__init__(align, size) + self._name = name + + name = property(lambda self: self._name) + + def __hash__(self): + return hash((super(ObjCDecl, self).__hash__(), self._name)) + + def __repr__(self): + return '<%s %s>' % (self.__class__.__name__, self.name) + + def __str__(self): + return str(self.name) + + def __eq__(self, other): + ret = self.cmp_base(other) + if ret: + return False + return self.name == other.name + + def __lt__(self, other): + ret = self.cmp_base(other) + if ret: + if ret < 0: + return True + return False + return self.name < other.name + + +class ObjCInt(ObjC): + """C integer""" + + def __init__(self): + super(ObjCInt, self).__init__(None, None) + + def __str__(self): + return 'int' + + +@total_ordering +class ObjCPtr(ObjC): + """C Pointer""" + + def __init__(self, objtype, void_p_align, void_p_size): + """Init ObjCPtr + + @objtype: pointer target ObjC + @void_p_align: pointer alignment (in bytes) + @void_p_size: pointer size (in bytes) + """ + + super(ObjCPtr, self).__init__(void_p_align, void_p_size) + self._lock = False + + self.objtype = objtype + if objtype is None: + self._lock = False + + def get_objtype(self): + assert self._lock is True + return self._objtype + + def set_objtype(self, objtype): + assert self._lock is False + self._lock = True + self._objtype = objtype + + objtype = property(get_objtype, set_objtype) + + def __hash__(self): + # Don't try to hash on an unlocked Ptr (still mutable) + assert self._lock + return hash((super(ObjCPtr, self).__hash__(), hash(self._objtype))) + + def __repr__(self): + return '<%s %r>' % ( + self.__class__.__name__, + self.objtype.__class__ + ) + + def __eq__(self, other): + ret = self.cmp_base(other) + if ret: + return False + return self.objtype == other.objtype + + def __lt__(self, other): + ret = self.cmp_base(other) + if ret: + if ret < 0: + return True + return False + return self.objtype < other.objtype + + +@total_ordering +class ObjCArray(ObjC): + """C array (test[XX])""" + + def __init__(self, objtype, elems): + """Init ObjCArray + + @objtype: pointer target ObjC + @elems: number of elements in the array + """ + + super(ObjCArray, self).__init__(objtype.align, elems * objtype.size) + self._elems = elems + self._objtype = objtype + + objtype = property(lambda self: self._objtype) + elems = property(lambda self: self._elems) + + def __hash__(self): + return hash((super(ObjCArray, self).__hash__(), self._elems, hash(self._objtype))) + + def __repr__(self): + return '<%r[%d]>' % (self.objtype, self.elems) + + def __eq__(self, other): + ret = self.cmp_base(other) + if ret: + return False + if self.objtype != other.objtype: + return False + return self.elems == other.elems + + def __lt__(self, other): + ret = self.cmp_base(other) + if ret > 0: + return False + if self.objtype > other.objtype: + return False + return self.elems < other.elems + +@total_ordering +class ObjCStruct(ObjC): + """C object for structures""" + + def __init__(self, name, align, size, fields): + super(ObjCStruct, self).__init__(align, size) + self._name = name + self._fields = tuple(fields) + + name = property(lambda self: self._name) + fields = property(lambda self: self._fields) + + def __hash__(self): + return hash((super(ObjCStruct, self).__hash__(), self._name)) + + def __repr__(self): + out = [] + out.append("Struct %s: (align: %d)" % (self.name, self.align)) + out.append(" off sz name") + for name, objtype, offset, size in self.fields: + out.append(" 0x%-3x %-3d %-10s %r" % + (offset, size, name, objtype.__class__.__name__)) + return '\n'.join(out) + + def __str__(self): + return 'struct %s' % (self.name) + + def __eq__(self, other): + ret = self.cmp_base(other) + if ret: + return False + return self.name == other.name + + def __lt__(self, other): + ret = self.cmp_base(other) + if ret: + if ret < 0: + return True + return False + return self.name < other.name + + +@total_ordering +class ObjCUnion(ObjC): + """C object for unions""" + + def __init__(self, name, align, size, fields): + super(ObjCUnion, self).__init__(align, size) + self._name = name + self._fields = tuple(fields) + + name = property(lambda self: self._name) + fields = property(lambda self: self._fields) + + def __hash__(self): + return hash((super(ObjCUnion, self).__hash__(), self._name)) + + def __repr__(self): + out = [] + out.append("Union %s: (align: %d)" % (self.name, self.align)) + out.append(" off sz name") + for name, objtype, offset, size in self.fields: + out.append(" 0x%-3x %-3d %-10s %r" % + (offset, size, name, objtype)) + return '\n'.join(out) + + def __str__(self): + return 'union %s' % (self.name) + + def __eq__(self, other): + ret = self.cmp_base(other) + if ret: + return False + return self.name == other.name + + def __lt__(self, other): + ret = self.cmp_base(other) + if ret: + if ret < 0: + return True + return False + return self.name < other.name + +class ObjCEllipsis(ObjC): + """C integer""" + + def __init__(self): + super(ObjCEllipsis, self).__init__(None, None) + + align = property(lambda self: self._align) + size = property(lambda self: self._size) + +@total_ordering +class ObjCFunc(ObjC): + """C object for Functions""" + + def __init__(self, name, abi, type_ret, args, void_p_align, void_p_size): + super(ObjCFunc, self).__init__(void_p_align, void_p_size) + self._name = name + self._abi = abi + self._type_ret = type_ret + self._args = tuple(args) + + args = property(lambda self: self._args) + type_ret = property(lambda self: self._type_ret) + abi = property(lambda self: self._abi) + name = property(lambda self: self._name) + + def __hash__(self): + return hash((super(ObjCFunc, self).__hash__(), hash(self._args), self._name)) + + def __repr__(self): + return "<%s %s>" % ( + self.__class__.__name__, + self.name + ) + + def __str__(self): + out = [] + out.append("Function (%s) %s: (align: %d)" % (self.abi, self.name, self.align)) + out.append(" ret: %s" % (str(self.type_ret))) + out.append(" Args:") + for name, arg in self.args: + out.append(" %s %s" % (name, arg)) + return '\n'.join(out) + + def __eq__(self, other): + ret = self.cmp_base(other) + if ret: + return False + return self.name == other.name + + def __lt__(self, other): + ret = self.cmp_base(other) + if ret: + if ret < 0: + return True + return False + return self.name < other.name + +OBJC_PRIO = { + ObjC: 0, + ObjCDecl:1, + ObjCInt:2, + ObjCPtr:3, + ObjCArray:4, + ObjCStruct:5, + ObjCUnion:6, + ObjCEllipsis:7, + ObjCFunc:8, +} + + +def access_simplifier(expr): + """Expression visitor to simplify a C access represented in Miasm + + @expr: Miasm expression representing the C access + + Example: + + IN: (In c: ['*(&((&((*(ptr_Test)).a))[0]))']) + [ExprOp('deref', ExprOp('addr', ExprOp('[]', ExprOp('addr', + ExprOp('field', ExprOp('deref', ExprId('ptr_Test', 64)), + ExprId('a', 64))), ExprInt(0x0, 64))))] + + OUT: (In c: ['(ptr_Test)->a']) + [ExprOp('->', ExprId('ptr_Test', 64), ExprId('a', 64))] + """ + + if (expr.is_op("addr") and + expr.args[0].is_op("[]") and + expr.args[0].args[1] == ExprInt(0, 64)): + return expr.args[0].args[0] + elif (expr.is_op("[]") and + expr.args[0].is_op("addr") and + expr.args[1] == ExprInt(0, 64)): + return expr.args[0].args[0] + elif (expr.is_op("addr") and + expr.args[0].is_op("deref")): + return expr.args[0].args[0] + elif (expr.is_op("deref") and + expr.args[0].is_op("addr")): + return expr.args[0].args[0] + elif (expr.is_op("field") and + expr.args[0].is_op("deref")): + return ExprOp("->", expr.args[0].args[0], expr.args[1]) + return expr + + +def access_str(expr): + """Return the C string of a C access represented in Miasm + + @expr: Miasm expression representing the C access + + In: + ExprOp('->', ExprId('ptr_Test', 64), ExprId('a', 64)) + OUT: + '(ptr_Test)->a' + """ + + if isinstance(expr, ExprId): + out = str(expr) + elif isinstance(expr, ExprInt): + out = str(int(expr)) + elif expr.is_op("addr"): + out = "&(%s)" % access_str(expr.args[0]) + elif expr.is_op("deref"): + out = "*(%s)" % access_str(expr.args[0]) + elif expr.is_op("field"): + out = "(%s).%s" % (access_str(expr.args[0]), access_str(expr.args[1])) + elif expr.is_op("->"): + out = "(%s)->%s" % (access_str(expr.args[0]), access_str(expr.args[1])) + elif expr.is_op("[]"): + out = "(%s)[%s]" % (access_str(expr.args[0]), access_str(expr.args[1])) + else: + raise RuntimeError("unknown op") + + return out + + +class CGen(object): + """Generic object to represent a C expression""" + + default_size = 64 + + + def __init__(self, ctype): + self._ctype = ctype + + @property + def ctype(self): + """Type (ObjC instance) of the current object""" + return self._ctype + + def __hash__(self): + return hash(self.__class__) + + def __eq__(self, other): + return (self.__class__ == other.__class__ and + self._ctype == other.ctype) + + def __ne__(self, other): + return not self.__eq__(other) + + def to_c(self): + """Generate corresponding C""" + + raise NotImplementedError("Virtual") + + def to_expr(self): + """Generate Miasm expression representing the C access""" + + raise NotImplementedError("Virtual") + + +class CGenInt(CGen): + """Int C object""" + + def __init__(self, integer): + assert isinstance(integer, int_types) + self._integer = integer + super(CGenInt, self).__init__(ObjCInt()) + + @property + def integer(self): + """Value of the object""" + return self._integer + + def __hash__(self): + return hash((super(CGenInt, self).__hash__(), self._integer)) + + def __eq__(self, other): + return (super(CGenInt, self).__eq__(other) and + self._integer == other.integer) + + def __ne__(self, other): + return not self.__eq__(other) + + def to_c(self): + """Generate corresponding C""" + + return "0x%X" % self.integer + + def __repr__(self): + return "<%s %s>" % (self.__class__.__name__, + self.integer) + + def to_expr(self): + """Generate Miasm expression representing the C access""" + + return ExprInt(self.integer, self.default_size) + + +class CGenId(CGen): + """ID of a C object""" + + def __init__(self, ctype, name): + self._name = name + assert isinstance(name, str) + super(CGenId, self).__init__(ctype) + + @property + def name(self): + """Name of the Id""" + return self._name + + def __hash__(self): + return hash((super(CGenId, self).__hash__(), self._name)) + + def __eq__(self, other): + return (super(CGenId, self).__eq__(other) and + self._name == other.name) + + def __repr__(self): + return "<%s %s>" % (self.__class__.__name__, + self.name) + + def to_c(self): + """Generate corresponding C""" + + return "%s" % (self.name) + + def to_expr(self): + """Generate Miasm expression representing the C access""" + + return ExprId(self.name, self.default_size) + + +class CGenField(CGen): + """ + Field of a C struct/union + + IN: + - struct (not ptr struct) + - field name + OUT: + - input type of the field => output type + - X[] => X[] + - X => X* + """ + + def __init__(self, struct, field, fieldtype, void_p_align, void_p_size): + self._struct = struct + self._field = field + assert isinstance(field, str) + if isinstance(fieldtype, ObjCArray): + ctype = fieldtype + else: + ctype = ObjCPtr(fieldtype, void_p_align, void_p_size) + super(CGenField, self).__init__(ctype) + + @property + def struct(self): + """Structure containing the field""" + return self._struct + + @property + def field(self): + """Field name""" + return self._field + + def __hash__(self): + return hash((super(CGenField, self).__hash__(), self._struct, self._field)) + + def __eq__(self, other): + return (super(CGenField, self).__eq__(other) and + self._struct == other.struct and + self._field == other.field) + + def to_c(self): + """Generate corresponding C""" + + if isinstance(self.ctype, ObjCArray): + return "(%s).%s" % (self.struct.to_c(), self.field) + elif isinstance(self.ctype, ObjCPtr): + return "&((%s).%s)" % (self.struct.to_c(), self.field) + else: + raise RuntimeError("Strange case") + + def __repr__(self): + return "<%s %s %s>" % (self.__class__.__name__, + self.struct, + self.field) + + def to_expr(self): + """Generate Miasm expression representing the C access""" + + if isinstance(self.ctype, ObjCArray): + return ExprOp("field", + self.struct.to_expr(), + ExprId(self.field, self.default_size)) + elif isinstance(self.ctype, ObjCPtr): + return ExprOp("addr", + ExprOp("field", + self.struct.to_expr(), + ExprId(self.field, self.default_size))) + else: + raise RuntimeError("Strange case") + + +class CGenArray(CGen): + """ + C Array + + This object does *not* deref the source, it only do object casting. + + IN: + - obj + OUT: + - X* => X* + - ..[][] => ..[] + - X[] => X* + """ + + def __init__(self, base, elems, void_p_align, void_p_size): + ctype = base.ctype + if isinstance(ctype, ObjCPtr): + pass + elif isinstance(ctype, ObjCArray) and isinstance(ctype.objtype, ObjCArray): + ctype = ctype.objtype + elif isinstance(ctype, ObjCArray): + ctype = ObjCPtr(ctype.objtype, void_p_align, void_p_size) + else: + raise TypeError("Strange case") + self._base = base + self._elems = elems + super(CGenArray, self).__init__(ctype) + + @property + def base(self): + """Base object supporting the array""" + return self._base + + @property + def elems(self): + """Number of elements in the array""" + return self._elems + + def __hash__(self): + return hash((super(CGenArray, self).__hash__(), self._base, self._elems)) + + def __eq__(self, other): + return (super(CGenField, self).__eq__(other) and + self._base == other.base and + self._elems == other.elems) + + def __repr__(self): + return "<%s %s>" % (self.__class__.__name__, + self.base) + + def to_c(self): + """Generate corresponding C""" + + if isinstance(self.ctype, ObjCPtr): + out_str = "&((%s)[%d])" % (self.base.to_c(), self.elems) + elif isinstance(self.ctype, ObjCArray): + out_str = "(%s)[%d]" % (self.base.to_c(), self.elems) + else: + raise RuntimeError("Strange case") + return out_str + + def to_expr(self): + """Generate Miasm expression representing the C access""" + + if isinstance(self.ctype, ObjCPtr): + return ExprOp("addr", + ExprOp("[]", + self.base.to_expr(), + ExprInt(self.elems, self.default_size))) + elif isinstance(self.ctype, ObjCArray): + return ExprOp("[]", + self.base.to_expr(), + ExprInt(self.elems, self.default_size)) + else: + raise RuntimeError("Strange case") + + +class CGenDeref(CGen): + """ + C dereference + + IN: + - ptr + OUT: + - X* => X + """ + + def __init__(self, ptr): + assert isinstance(ptr.ctype, ObjCPtr) + self._ptr = ptr + super(CGenDeref, self).__init__(ptr.ctype.objtype) + + @property + def ptr(self): + """Pointer object""" + return self._ptr + + def __hash__(self): + return hash((super(CGenDeref, self).__hash__(), self._ptr)) + + def __eq__(self, other): + return (super(CGenField, self).__eq__(other) and + self._ptr == other.ptr) + + def __repr__(self): + return "<%s %s>" % (self.__class__.__name__, + self.ptr) + + def to_c(self): + """Generate corresponding C""" + + if not isinstance(self.ptr.ctype, ObjCPtr): + raise RuntimeError() + return "*(%s)" % (self.ptr.to_c()) + + def to_expr(self): + """Generate Miasm expression representing the C access""" + + if not isinstance(self.ptr.ctype, ObjCPtr): + raise RuntimeError() + return ExprOp("deref", self.ptr.to_expr()) + + +def ast_get_c_access_expr(ast, expr_types, lvl=0): + """Transform C ast object into a C Miasm expression + + @ast: parsed pycparser.c_ast object + @expr_types: a dictionary linking ID names to their types + @lvl: actual recursion level + + Example: + + IN: + StructRef: -> + ID: ptr_Test + ID: a + + OUT: + ExprOp('->', ExprId('ptr_Test', 64), ExprId('a', 64)) + """ + + if isinstance(ast, c_ast.Constant): + obj = ExprInt(int(ast.value), 64) + elif isinstance(ast, c_ast.StructRef): + name, field = ast.name, ast.field.name + name = ast_get_c_access_expr(name, expr_types) + if ast.type == "->": + s_name = name + s_field = ExprId(field, 64) + obj = ExprOp('->', s_name, s_field) + elif ast.type == ".": + s_name = name + s_field = ExprId(field, 64) + obj = ExprOp("field", s_name, s_field) + else: + raise RuntimeError("Unknown struct access") + elif isinstance(ast, c_ast.UnaryOp) and ast.op == "&": + tmp = ast_get_c_access_expr(ast.expr, expr_types, lvl + 1) + obj = ExprOp("addr", tmp) + elif isinstance(ast, c_ast.ArrayRef): + tmp = ast_get_c_access_expr(ast.name, expr_types, lvl + 1) + index = ast_get_c_access_expr(ast.subscript, expr_types, lvl + 1) + obj = ExprOp("[]", tmp, index) + elif isinstance(ast, c_ast.ID): + assert ast.name in expr_types + obj = ExprId(ast.name, 64) + elif isinstance(ast, c_ast.UnaryOp) and ast.op == "*": + tmp = ast_get_c_access_expr(ast.expr, expr_types, lvl + 1) + obj = ExprOp("deref", tmp) + else: + raise NotImplementedError("Unknown type") + return obj + + +def parse_access(c_access): + """Parse C access + + @c_access: C access string + """ + + main = ''' + int main() { + %s; + } + ''' % c_access + + parser = c_parser.CParser() + node = parser.parse(main, filename='') + access = node.ext[-1].body.block_items[0] + return access + + +class ExprToAccessC(ExprReducer): + """ + Generate the C access object(s) for a given native Miasm expression + Example: + IN: + @32[ptr_Test] + OUT: + [> a>>>] + + An expression may be represented by multiple accessor (due to unions). + """ + + def __init__(self, expr_types, types_mngr, enforce_strict_access=True): + """Init GenCAccess + + @expr_types: a dictionary linking ID names to their types + @types_mngr: types manager + @enforce_strict_access: If false, generate access even on expression + pointing to a middle of an object. If true, raise exception if such a + pointer is encountered + """ + + self.expr_types = expr_types + self.types_mngr = types_mngr + self.enforce_strict_access = enforce_strict_access + + def updt_expr_types(self, expr_types): + """Update expr_types + @expr_types: Dictionary associating name to type + """ + + self.expr_types = expr_types + + def cgen_access(self, cgenobj, base_type, offset, deref, lvl=0): + """Return the access(es) which lead to the element at @offset of an + object of type @base_type + + In case of no @deref, stops recursion as soon as we reached the base of + an object. + In other cases, we need to go down to the final dereferenced object + + @cgenobj: current object access + @base_type: type of main object + @offset: offset (in bytes) of the target sub object + @deref: get type for a pointer or a deref + @lvl: actual recursion level + + + IN: + - base_type: struct Toto{ + int a + int b + } + - base_name: var + - 4 + OUT: + - CGenField(var, b) + + + + IN: + - base_type: int a + - 0 + OUT: + - CGenAddr(a) + + IN: + - base_type: X = int* a + - 0 + OUT: + - CGenAddr(X) + + IN: + - X = int* a + - 8 + OUT: + - ASSERT + + + IN: + - struct toto{ + int a + int b[10] + } + - 8 + OUT: + - CGenArray(CGenField(toto, b), 1) + """ + if base_type.size == 0: + missing_definition(base_type) + return set() + + + void_type = self.types_mngr.void_ptr + if isinstance(base_type, ObjCStruct): + if not 0 <= offset < base_type.size: + return set() + + if offset == 0 and not deref: + # In this case, return the struct* + return set([cgenobj]) + + for fieldname, subtype, field_offset, size in base_type.fields: + if not field_offset <= offset < field_offset + size: + continue + fieldptr = CGenField(CGenDeref(cgenobj), fieldname, subtype, + void_type.align, void_type.size) + new_type = self.cgen_access(fieldptr, subtype, + offset - field_offset, + deref, lvl + 1) + break + else: + return set() + elif isinstance(base_type, ObjCArray): + if base_type.objtype.size == 0: + missing_definition(base_type.objtype) + return set() + element_num = offset // (base_type.objtype.size) + field_offset = offset % base_type.objtype.size + if element_num >= base_type.elems: + return set() + if offset == 0 and not deref: + # In this case, return the array + return set([cgenobj]) + + curobj = CGenArray(cgenobj, element_num, + void_type.align, + void_type.size) + if field_offset == 0: + # We point to the start of the sub object, + # return it directly + return set([curobj]) + new_type = self.cgen_access(curobj, base_type.objtype, + field_offset, deref, lvl + 1) + + elif isinstance(base_type, ObjCDecl): + if self.enforce_strict_access and offset % base_type.size != 0: + return set() + elem_num = offset // base_type.size + + nobj = CGenArray(cgenobj, elem_num, + void_type.align, void_type.size) + new_type = set([nobj]) + + elif isinstance(base_type, ObjCUnion): + if offset == 0 and not deref: + # In this case, return the struct* + return set([cgenobj]) + + out = set() + for fieldname, objtype, field_offset, size in base_type.fields: + if not field_offset <= offset < field_offset + size: + continue + field = CGenField(CGenDeref(cgenobj), fieldname, objtype, + void_type.align, void_type.size) + out.update(self.cgen_access(field, objtype, + offset - field_offset, + deref, lvl + 1)) + new_type = out + + elif isinstance(base_type, ObjCPtr): + elem_num = offset // base_type.size + if self.enforce_strict_access and offset % base_type.size != 0: + return set() + nobj = CGenArray(cgenobj, elem_num, + void_type.align, void_type.size) + new_type = set([nobj]) + + else: + raise NotImplementedError("deref type %r" % base_type) + return new_type + + def reduce_known_expr(self, node, ctxt, **kwargs): + """Generate access for known expr""" + if node.expr in ctxt: + objcs = ctxt[node.expr] + return set(CGenId(objc, str(node.expr)) for objc in objcs) + return None + + def reduce_int(self, node, **kwargs): + """Generate access for ExprInt""" + + if not isinstance(node.expr, ExprInt): + return None + return set([CGenInt(int(node.expr))]) + + def get_solo_type(self, node): + """Return the type of the @node if it has only one possible type, + different from not None. In other cases, return None. + """ + if node.info is None or len(node.info) != 1: + return None + return type(list(node.info)[0].ctype) + + def reduce_op(self, node, lvl=0, **kwargs): + """Generate access for ExprOp""" + if not node.expr.is_op("+") or len(node.args) != 2: + return None + type_arg1 = self.get_solo_type(node.args[1]) + if type_arg1 != ObjCInt: + return None + arg0, arg1 = node.args + if arg0.info is None: + return None + void_type = self.types_mngr.void_ptr + out = set() + if not arg1.expr.is_int(): + return None + ptr_offset = int(arg1.expr) + for info in arg0.info: + if isinstance(info.ctype, ObjCArray): + field_type = info.ctype + elif isinstance(info.ctype, ObjCPtr): + field_type = info.ctype.objtype + else: + continue + target_type = info.ctype.objtype + + # Array-like: int* ptr; ptr[1] = X + out.update(self.cgen_access(info, field_type, ptr_offset, False, lvl)) + return out + + def reduce_mem(self, node, lvl=0, **kwargs): + """Generate access for ExprMem: + * @NN[ptr] -> elem (type) + * @64[ptr>] -> ptr + * @32[ptr] -> struct.00 + """ + + if not isinstance(node.expr, ExprMem): + return None + if node.ptr.info is None: + return None + assert isinstance(node.ptr.info, set) + void_type = self.types_mngr.void_ptr + found = set() + for subcgenobj in node.ptr.info: + if isinstance(subcgenobj.ctype, ObjCArray): + nobj = CGenArray(subcgenobj, 0, + void_type.align, + void_type.size) + target = nobj.ctype.objtype + for finalcgenobj in self.cgen_access(nobj, target, 0, True, lvl): + assert isinstance(finalcgenobj.ctype, ObjCPtr) + if self.enforce_strict_access and finalcgenobj.ctype.objtype.size != node.expr.size // 8: + continue + found.add(CGenDeref(finalcgenobj)) + + elif isinstance(subcgenobj.ctype, ObjCPtr): + target = subcgenobj.ctype.objtype + # target : type(elem) + if isinstance(target, (ObjCStruct, ObjCUnion)): + for finalcgenobj in self.cgen_access(subcgenobj, target, 0, True, lvl): + target = finalcgenobj.ctype.objtype + if self.enforce_strict_access and target.size != node.expr.size // 8: + continue + found.add(CGenDeref(finalcgenobj)) + elif isinstance(target, ObjCArray): + if self.enforce_strict_access and subcgenobj.ctype.size != node.expr.size // 8: + continue + found.update(self.cgen_access(CGenDeref(subcgenobj), target, + 0, False, lvl)) + else: + if self.enforce_strict_access and target.size != node.expr.size // 8: + continue + found.add(CGenDeref(subcgenobj)) + if not found: + return None + return found + + reduction_rules = [reduce_known_expr, + reduce_int, + reduce_op, + reduce_mem, + ] + + def get_accesses(self, expr, expr_context=None): + """Generate C access(es) for the native Miasm expression @expr + @expr: native Miasm expression + @expr_context: a dictionary linking known expressions to their + types. An expression is linked to a tuple of types. + """ + if expr_context is None: + expr_context = self.expr_types + ret = self.reduce(expr, ctxt=expr_context) + if ret.info is None: + return set() + return ret.info + + +class ExprCToExpr(ExprReducer): + """Translate a Miasm expression (representing a C access) into a native + Miasm expression and its C type: + + Example: + + IN: ((ptr_struct -> f_mini) field x) + OUT: @32[ptr_struct + 0x80], int + + + Tricky cases: + Struct S0 { + int x; + int y[0x10]; + } + + Struct S1 { + int a; + S0 toto; + } + + S1* ptr; + + Case 1: + ptr->toto => ptr + 0x4 + &(ptr->toto) => ptr + 0x4 + + Case 2: + (ptr->toto).x => @32[ptr + 0x4] + &((ptr->toto).x) => ptr + 0x4 + + Case 3: + (ptr->toto).y => ptr + 0x8 + &((ptr->toto).y) => ptr + 0x8 + + Case 4: + (ptr->toto).y[1] => @32[ptr + 0x8 + 0x4] + &((ptr->toto).y[1]) => ptr + 0x8 + 0x4 + + """ + + def __init__(self, expr_types, types_mngr): + """Init ExprCAccess + + @expr_types: a dictionary linking ID names to their types + @types_mngr: types manager + """ + + self.expr_types = expr_types + self.types_mngr = types_mngr + + def updt_expr_types(self, expr_types): + """Update expr_types + @expr_types: Dictionary associating name to type + """ + + self.expr_types = expr_types + + CST = "CST" + + def reduce_known_expr(self, node, ctxt, **kwargs): + """Reduce known expressions""" + if str(node.expr) in ctxt: + objc = ctxt[str(node.expr)] + out = (node.expr, objc) + elif node.expr.is_id(): + out = (node.expr, None) + else: + out = None + return out + + def reduce_int(self, node, **kwargs): + """Reduce ExprInt""" + + if not isinstance(node.expr, ExprInt): + return None + return self.CST + + def reduce_op_memberof(self, node, **kwargs): + """Reduce -> operator""" + + if not node.expr.is_op('->'): + return None + assert len(node.args) == 2 + out = [] + assert isinstance(node.args[1].expr, ExprId) + field = node.args[1].expr.name + src, src_type = node.args[0].info + if src_type is None: + return None + assert isinstance(src_type, (ObjCPtr, ObjCArray)) + struct_dst = src_type.objtype + assert isinstance(struct_dst, ObjCStruct) + + found = False + for name, objtype, offset, _ in struct_dst.fields: + if name != field: + continue + expr = src + ExprInt(offset, src.size) + if isinstance(objtype, (ObjCArray, ObjCStruct, ObjCUnion)): + pass + else: + expr = ExprMem(expr, objtype.size * 8) + assert not found + found = True + out = (expr, objtype) + assert found + return out + + def reduce_op_field(self, node, **kwargs): + """Reduce field operator (Struct or Union)""" + + if not node.expr.is_op('field'): + return None + assert len(node.args) == 2 + out = [] + assert isinstance(node.args[1].expr, ExprId) + field = node.args[1].expr.name + src, src_type = node.args[0].info + struct_dst = src_type + + if isinstance(struct_dst, ObjCStruct): + found = False + for name, objtype, offset, _ in struct_dst.fields: + if name != field: + continue + expr = src + ExprInt(offset, src.size) + if isinstance(objtype, ObjCArray): + # Case 4 + pass + elif isinstance(objtype, (ObjCStruct, ObjCUnion)): + # Case 1 + pass + else: + # Case 2 + expr = ExprMem(expr, objtype.size * 8) + assert not found + found = True + out = (expr, objtype) + elif isinstance(struct_dst, ObjCUnion): + found = False + for name, objtype, offset, _ in struct_dst.fields: + if name != field: + continue + expr = src + ExprInt(offset, src.size) + if isinstance(objtype, ObjCArray): + # Case 4 + pass + elif isinstance(objtype, (ObjCStruct, ObjCUnion)): + # Case 1 + pass + else: + # Case 2 + expr = ExprMem(expr, objtype.size * 8) + assert not found + found = True + out = (expr, objtype) + else: + raise NotImplementedError("unknown ObjC") + assert found + return out + + def reduce_op_array(self, node, **kwargs): + """Reduce array operator""" + + if not node.expr.is_op('[]'): + return None + assert len(node.args) == 2 + out = [] + assert isinstance(node.args[1].expr, ExprInt) + cst = node.args[1].expr + src, src_type = node.args[0].info + objtype = src_type.objtype + expr = src + cst * ExprInt(objtype.size, cst.size) + if isinstance(src_type, ObjCPtr): + if isinstance(objtype, ObjCArray): + final = objtype.objtype + expr = src + cst * ExprInt(final.size, cst.size) + objtype = final + expr = ExprMem(expr, final.size * 8) + found = True + else: + expr = ExprMem(expr, objtype.size * 8) + found = True + elif isinstance(src_type, ObjCArray): + if isinstance(objtype, ObjCArray): + final = objtype + found = True + elif isinstance(objtype, ObjCStruct): + found = True + else: + expr = ExprMem(expr, objtype.size * 8) + found = True + else: + raise NotImplementedError("Unknown access" % node.expr) + assert found + out = (expr, objtype) + return out + + def reduce_op_addr(self, node, **kwargs): + """Reduce addr operator""" + + if not node.expr.is_op('addr'): + return None + assert len(node.args) == 1 + out = [] + src, src_type = node.args[0].info + + void_type = self.types_mngr.void_ptr + + if isinstance(src_type, ObjCArray): + out = (src.arg, ObjCPtr(src_type.objtype, + void_type.align, void_type.size)) + elif isinstance(src, ExprMem): + out = (src.ptr, ObjCPtr(src_type, + void_type.align, void_type.size)) + elif isinstance(src_type, ObjCStruct): + out = (src, ObjCPtr(src_type, + void_type.align, void_type.size)) + elif isinstance(src_type, ObjCUnion): + out = (src, ObjCPtr(src_type, + void_type.align, void_type.size)) + else: + raise NotImplementedError("unk type") + return out + + def reduce_op_deref(self, node, **kwargs): + """Reduce deref operator""" + + if not node.expr.is_op('deref'): + return None + out = [] + src, src_type = node.args[0].info + assert isinstance(src_type, (ObjCPtr, ObjCArray)) + void_type = self.types_mngr.void_ptr + if isinstance(src_type, ObjCPtr): + if isinstance(src_type.objtype, ObjCArray): + size = void_type.size*8 + else: + size = src_type.objtype.size * 8 + out = (ExprMem(src, size), (src_type.objtype)) + else: + size = src_type.objtype.size * 8 + out = (ExprMem(src, size), (src_type.objtype)) + return out + + reduction_rules = [reduce_known_expr, + reduce_int, + reduce_op_memberof, + reduce_op_field, + reduce_op_array, + reduce_op_addr, + reduce_op_deref, + ] + + def get_expr(self, expr, c_context): + """Translate a Miasm expression @expr (representing a C access) into a + tuple composed of a native Miasm expression and its C type. + @expr: Miasm expression (representing a C access) + @c_context: a dictionary linking known tokens (strings) to their + types. A token is linked to only one type. + """ + ret = self.reduce(expr, ctxt=c_context) + if ret.info is None: + return (None, None) + return ret.info + + +class CTypesManager(object): + """Represent a C object, without any layout information""" + + def __init__(self, types_ast, leaf_types): + self.types_ast = types_ast + self.leaf_types = leaf_types + + @property + def void_ptr(self): + """Retrieve a void* objc""" + return self.leaf_types.types.get(CTypePtr(CTypeId('void'))) + + @property + def padding(self): + """Retrieve a padding ctype""" + return CTypeId(PADDING_TYPE_NAME) + + def _get_objc(self, type_id, resolved=None, to_fix=None, lvl=0): + if resolved is None: + resolved = {} + if to_fix is None: + to_fix = [] + if type_id in resolved: + return resolved[type_id] + type_id = self.types_ast.get_type(type_id) + fixed = True + if isinstance(type_id, CTypeId): + out = self.leaf_types.types.get(type_id, None) + assert out is not None + elif isinstance(type_id, CTypeUnion): + args = [] + align_max, size_max = 0, 0 + for name, field in type_id.fields: + objc = self._get_objc(field, resolved, to_fix, lvl + 1) + resolved[field] = objc + align_max = max(align_max, objc.align) + size_max = max(size_max, objc.size) + args.append((name, objc, 0, objc.size)) + + align, size = self.union_compute_align_size(align_max, size_max) + out = ObjCUnion(type_id.name, align, size, args) + + elif isinstance(type_id, CTypeStruct): + align_max, size_max = 0, 0 + + args = [] + offset, align_max = 0, 1 + pad_index = 0 + for name, field in type_id.fields: + objc = self._get_objc(field, resolved, to_fix, lvl + 1) + resolved[field] = objc + align_max = max(align_max, objc.align) + new_offset = self.struct_compute_field_offset(objc, offset) + if new_offset - offset: + pad_name = "__PAD__%d__" % pad_index + pad_index += 1 + size = new_offset - offset + pad_objc = self._get_objc(CTypeArray(self.padding, size), resolved, to_fix, lvl + 1) + args.append((pad_name, pad_objc, offset, pad_objc.size)) + offset = new_offset + args.append((name, objc, offset, objc.size)) + offset += objc.size + + align, size = self.struct_compute_align_size(align_max, offset) + out = ObjCStruct(type_id.name, align, size, args) + + elif isinstance(type_id, CTypePtr): + target = type_id.target + out = ObjCPtr(None, self.void_ptr.align, self.void_ptr.size) + fixed = False + + elif isinstance(type_id, CTypeArray): + target = type_id.target + objc = self._get_objc(target, resolved, to_fix, lvl + 1) + resolved[target] = objc + if type_id.size is None: + # case: toto[] + # return ObjCPtr + out = ObjCPtr(objc, self.void_ptr.align, self.void_ptr.size) + else: + size = self.size_to_int(type_id.size) + if size is None: + raise RuntimeError('Enable to compute objc size') + else: + out = ObjCArray(objc, size) + assert out.size is not None and out.align is not None + elif isinstance(type_id, CTypeEnum): + # Enum are integer + return self.leaf_types.types.get(CTypeId('int')) + elif isinstance(type_id, CTypeFunc): + type_ret = self._get_objc( + type_id.type_ret, resolved, to_fix, lvl + 1) + resolved[type_id.type_ret] = type_ret + args = [] + for name, arg in type_id.args: + objc = self._get_objc(arg, resolved, to_fix, lvl + 1) + resolved[arg] = objc + args.append((name, objc)) + out = ObjCFunc(type_id.name, type_id.abi, type_ret, args, + self.void_ptr.align, self.void_ptr.size) + elif isinstance(type_id, CTypeEllipsis): + out = ObjCEllipsis() + else: + raise TypeError("Unknown type %r" % type_id.__class__) + if not isinstance(out, ObjCEllipsis): + assert out.align is not None and out.size is not None + + if fixed: + resolved[type_id] = out + else: + to_fix.append((type_id, out)) + return out + + def get_objc(self, type_id): + """Get the ObjC corresponding to the CType @type_id + @type_id: CTypeBase instance""" + resolved = {} + to_fix = [] + out = self._get_objc(type_id, resolved, to_fix) + # Fix sub objects + while to_fix: + type_id, objc_to_fix = to_fix.pop() + objc = self._get_objc(type_id.target, resolved, to_fix) + objc_to_fix.objtype = objc + self.check_objc(out) + return out + + def check_objc(self, objc, done=None): + """Ensure each sub ObjC is resolved + @objc: ObjC instance""" + if done is None: + done = set() + if objc in done: + return True + done.add(objc) + if isinstance(objc, (ObjCDecl, ObjCInt, ObjCEllipsis)): + return True + elif isinstance(objc, (ObjCPtr, ObjCArray)): + assert self.check_objc(objc.objtype, done) + return True + elif isinstance(objc, (ObjCStruct, ObjCUnion)): + for _, field, _, _ in objc.fields: + assert self.check_objc(field, done) + return True + elif isinstance(objc, ObjCFunc): + assert self.check_objc(objc.type_ret, done) + for name, arg in objc.args: + assert self.check_objc(arg, done) + return True + else: + assert False + + def size_to_int(self, size): + """Resolve an array size + @size: CTypeOp or integer""" + if isinstance(size, CTypeOp): + assert len(size.args) == 2 + arg0, arg1 = [self.size_to_int(arg) for arg in size.args] + if size.operator == "+": + return arg0 + arg1 + elif size.operator == "-": + return arg0 - arg1 + elif size.operator == "*": + return arg0 * arg1 + elif size.operator == "/": + return arg0 // arg1 + elif size.operator == "<<": + return arg0 << arg1 + elif size.operator == ">>": + return arg0 >> arg1 + else: + raise ValueError("Unknown operator %s" % size.operator) + elif isinstance(size, int_types): + return size + elif isinstance(size, CTypeSizeof): + obj = self._get_objc(size.target) + return obj.size + else: + raise TypeError("Unknown size type") + + def struct_compute_field_offset(self, obj, offset): + """Compute the offset of the field @obj in the current structure""" + raise NotImplementedError("Abstract method") + + def struct_compute_align_size(self, align_max, size): + """Compute the alignment and size of the current structure""" + raise NotImplementedError("Abstract method") + + def union_compute_align_size(self, align_max, size): + """Compute the alignment and size of the current union""" + raise NotImplementedError("Abstract method") + + +class CTypesManagerNotPacked(CTypesManager): + """Store defined C types (not packed)""" + + def struct_compute_field_offset(self, obj, offset): + """Compute the offset of the field @obj in the current structure + (not packed)""" + + if obj.align > 1: + offset = (offset + obj.align - 1) & ~(obj.align - 1) + return offset + + def struct_compute_align_size(self, align_max, size): + """Compute the alignment and size of the current structure + (not packed)""" + if align_max > 1: + size = (size + align_max - 1) & ~(align_max - 1) + return align_max, size + + def union_compute_align_size(self, align_max, size): + """Compute the alignment and size of the current union + (not packed)""" + return align_max, size + + +class CTypesManagerPacked(CTypesManager): + """Store defined C types (packed form)""" + + def struct_compute_field_offset(self, _, offset): + """Compute the offset of the field @obj in the current structure + (packed form)""" + return offset + + def struct_compute_align_size(self, _, size): + """Compute the alignment and size of the current structure + (packed form)""" + return 1, size + + def union_compute_align_size(self, align_max, size): + """Compute the alignment and size of the current union + (packed form)""" + return 1, size + + +class CHandler(object): + """ + C manipulator for Miasm + Miasm expr <-> C + """ + + exprCToExpr_cls = ExprCToExpr + exprToAccessC_cls = ExprToAccessC + + def __init__(self, types_mngr, expr_types=None, + C_types=None, + simplify_c=access_simplifier, + enforce_strict_access=True): + self.exprc2expr = self.exprCToExpr_cls(expr_types, types_mngr) + self.access_c_gen = self.exprToAccessC_cls(expr_types, + types_mngr, + enforce_strict_access) + self.types_mngr = types_mngr + self.simplify_c = simplify_c + if expr_types is None: + expr_types = {} + self.expr_types = expr_types + if C_types is None: + C_types = {} + self.C_types = C_types + + def updt_expr_types(self, expr_types): + """Update expr_types + @expr_types: Dictionary associating name to type + """ + + self.expr_types = expr_types + self.exprc2expr.updt_expr_types(expr_types) + self.access_c_gen.updt_expr_types(expr_types) + + def expr_to_c_access(self, expr, expr_context=None): + """Generate the C access object(s) for a given native Miasm expression. + @expr: Miasm expression + @expr_context: a dictionary linking known expressions to a set of types + """ + + if expr_context is None: + expr_context = self.expr_types + return self.access_c_gen.get_accesses(expr, expr_context) + + + def expr_to_c_and_types(self, expr, expr_context=None): + """Generate the C access string and corresponding type for a given + native Miasm expression. + @expr_context: a dictionary linking known expressions to a set of types + """ + + accesses = set() + for access in self.expr_to_c_access(expr, expr_context): + c_str = access_str(access.to_expr().visit(self.simplify_c)) + accesses.add((c_str, access.ctype)) + return accesses + + def expr_to_c(self, expr, expr_context=None): + """Convert a Miasm @expr into it's C equivalent string + @expr_context: a dictionary linking known expressions to a set of types + """ + + return set(access[0] + for access in self.expr_to_c_and_types(expr, expr_context)) + + def expr_to_types(self, expr, expr_context=None): + """Get the possible types of the Miasm @expr + @expr_context: a dictionary linking known expressions to a set of types + """ + + return set(access.ctype + for access in self.expr_to_c_access(expr, expr_context)) + + def c_to_expr_and_type(self, c_str, c_context=None): + """Convert a C string expression to a Miasm expression and it's + corresponding c type + @c_str: C string + @c_context: (optional) dictionary linking known tokens (strings) to its + type. + """ + + ast = parse_access(c_str) + if c_context is None: + c_context = self.C_types + access_c = ast_get_c_access_expr(ast, c_context) + return self.exprc2expr.get_expr(access_c, c_context) + + def c_to_expr(self, c_str, c_context=None): + """Convert a C string expression to a Miasm expression + @c_str: C string + @c_context: (optional) dictionary linking known tokens (strings) to its + type. + """ + + if c_context is None: + c_context = self.C_types + expr, _ = self.c_to_expr_and_type(c_str, c_context) + return expr + + def c_to_type(self, c_str, c_context=None): + """Get the type of a C string expression + @expr: Miasm expression + @c_context: (optional) dictionary linking known tokens (strings) to its + type. + """ + + if c_context is None: + c_context = self.C_types + _, ctype = self.c_to_expr_and_type(c_str, c_context) + return ctype + + +class CLeafTypes(object): + """Define C types sizes/alignment for a given architecture""" + pass diff --git a/miasm/core/parse_asm.py b/miasm/core/parse_asm.py new file mode 100644 index 00000000..2e843474 --- /dev/null +++ b/miasm/core/parse_asm.py @@ -0,0 +1,305 @@ +#-*- coding:utf-8 -*- +import re +import codecs +from builtins import range + +from miasm.expression.expression import ExprId, ExprInt, ExprOp, LocKey +import miasm.core.asmblock as asmblock +from miasm.core.cpu import instruction, base_expr +from miasm.core.asm_ast import AstInt, AstId, AstOp + +declarator = {'byte': 8, + 'word': 16, + 'dword': 32, + 'qword': 64, + 'long': 32, + } + +size2pck = {8: 'B', + 16: 'H', + 32: 'I', + 64: 'Q', + } + +EMPTY_RE = re.compile(r'\s*$') +COMMENT_RE = re.compile(r'\s*;\S*') +LOCAL_LABEL_RE = re.compile(r'\s*(\.L\S+)\s*:') +DIRECTIVE_START_RE = re.compile(r'\s*\.') +DIRECTIVE_RE = re.compile(r'\s*\.(\S+)') +LABEL_RE = re.compile(r'\s*(\S+)\s*:') +FORGET_LABEL_RE = re.compile(r'\s*\.LF[BE]\d\s*:') + + +class Directive(object): + + """Stand for Directive""" + + pass + +class DirectiveAlign(Directive): + + """Stand for alignment representation""" + + def __init__(self, alignment=1): + self.alignment = alignment + + def __str__(self): + return "Alignment %s" % self.alignment + + +class DirectiveSplit(Directive): + + """Stand for alignment representation""" + + pass + + +class DirectiveDontSplit(Directive): + + """Stand for alignment representation""" + + pass + + +def guess_next_new_label(loc_db): + """Generate a new label + @loc_db: the LocationDB instance""" + i = 0 + gen_name = b"loc_%.8X" + while True: + name = gen_name % i + label = loc_db.get_name_location(name) + if label is None: + return loc_db.add_location(name) + i += 1 + + +STATE_NO_BLOC = 0 +STATE_IN_BLOC = 1 + + +def asm_ast_to_expr_with_size(arg, loc_db, size): + if isinstance(arg, AstId): + return ExprId(arg.name.encode(), size) + if isinstance(arg, AstOp): + args = [asm_ast_to_expr_with_size(tmp, loc_db, size) for tmp in arg.args] + return ExprOp(arg.op, *args) + if isinstance(arg, AstInt): + return ExprInt(arg.value, size) + return None + +def parse_txt(mnemo, attrib, txt, loc_db=None): + """Parse an assembly listing. Returns a couple (asmcfg, loc_db), where + asmcfg is an AsmCfg instance and loc_db the associated LocationDB + + @mnemo: architecture used + @attrib: architecture attribute + @txt: assembly listing + @loc_db: (optional) the LocationDB instance used to handle labels + of the listing + + """ + + if loc_db is None: + loc_db = asmblock.LocationDB() + + C_NEXT = asmblock.AsmConstraint.c_next + C_TO = asmblock.AsmConstraint.c_to + + lines = [] + # parse each line + for line in txt.split('\n'): + # empty + if EMPTY_RE.match(line): + continue + # comment + if COMMENT_RE.match(line): + continue + # labels to forget + if FORGET_LABEL_RE.match(line): + continue + # label beginning with .L + match_re = LABEL_RE.match(line) + if match_re: + label_name = match_re.group(1).encode() + label = loc_db.get_or_create_name_location(label_name) + lines.append(label) + continue + # directive + if DIRECTIVE_START_RE.match(line): + match_re = DIRECTIVE_RE.match(line) + directive = match_re.group(1) + if directive in ['text', 'data', 'bss']: + continue + if directive in ['string', 'ascii']: + # XXX HACK + line = line.replace(r'\n', '\n').replace(r'\r', '\r') + raw = line[line.find(r'"') + 1:line.rfind(r'"')] + raw = codecs.escape_decode(raw)[0] + if directive == 'string': + raw += b"\x00" + lines.append(asmblock.AsmRaw(raw)) + continue + if directive == 'ustring': + # XXX HACK + line = line.replace(r'\n', '\n').replace(r'\r', '\r') + raw = line[line.find(r'"') + 1:line.rfind(r'"')] + "\x00" + raw = codecs.escape_decode(raw)[0] + out = b'' + for i in range(len(raw)): + out += raw[i:i+1] + b'\x00' + lines.append(asmblock.AsmRaw(out)) + continue + if directive in declarator: + data_raw = line[match_re.end():].split(' ', 1)[1] + data_raw = data_raw.split(',') + size = declarator[directive] + expr_list = [] + + # parser + + for element in data_raw: + element = element.strip() + element_parsed = base_expr.parseString(element)[0] + element_expr = asm_ast_to_expr_with_size(element_parsed, loc_db, size) + expr_list.append(element_expr) + + raw_data = asmblock.AsmRaw(expr_list) + raw_data.element_size = size + lines.append(raw_data) + continue + if directive == 'comm': + # TODO + continue + if directive == 'split': # custom command + lines.append(DirectiveSplit()) + continue + if directive == 'dontsplit': # custom command + lines.append(DirectiveDontSplit()) + continue + if directive == "align": + align_value = int(line[match_re.end():], 0) + lines.append(DirectiveAlign(align_value)) + continue + if directive in ['file', 'intel_syntax', 'globl', 'local', + 'type', 'size', 'align', 'ident', 'section']: + continue + if directive[0:4] == 'cfi_': + continue + + raise ValueError("unknown directive %s" % directive) + + # label + match_re = LABEL_RE.match(line) + if match_re: + label_name = match_re.group(1).encode() + label = loc_db.get_or_create_name_location(label_name) + lines.append(label) + continue + + # code + if ';' in line: + line = line[:line.find(';')] + line = line.strip(' ').strip('\t') + instr = mnemo.fromstring(line, loc_db, attrib) + lines.append(instr) + + asmblock.log_asmblock.info("___pre asm oki___") + # make asmcfg + + cur_block = None + state = STATE_NO_BLOC + i = 0 + asmcfg = asmblock.AsmCFG(loc_db) + block_to_nlink = None + delayslot = 0 + while i < len(lines): + if delayslot: + delayslot -= 1 + if delayslot == 0: + state = STATE_NO_BLOC + line = lines[i] + # no current block + if state == STATE_NO_BLOC: + if isinstance(line, DirectiveDontSplit): + block_to_nlink = cur_block + i += 1 + continue + elif isinstance(line, DirectiveSplit): + block_to_nlink = None + i += 1 + continue + elif not isinstance(line, LocKey): + # First line must be a label. If it's not the case, generate + # it. + loc = guess_next_new_label(loc_db) + cur_block = asmblock.AsmBlock(loc, alignment=mnemo.alignment) + else: + cur_block = asmblock.AsmBlock(line, alignment=mnemo.alignment) + i += 1 + # Generate the current bloc + asmcfg.add_block(cur_block) + state = STATE_IN_BLOC + if block_to_nlink: + block_to_nlink.addto( + asmblock.AsmConstraint( + cur_block.loc_key, + C_NEXT + ) + ) + block_to_nlink = None + continue + + # in block + elif state == STATE_IN_BLOC: + if isinstance(line, DirectiveSplit): + state = STATE_NO_BLOC + block_to_nlink = None + elif isinstance(line, DirectiveDontSplit): + state = STATE_NO_BLOC + block_to_nlink = cur_block + elif isinstance(line, DirectiveAlign): + cur_block.alignment = line.alignment + elif isinstance(line, asmblock.AsmRaw): + cur_block.addline(line) + block_to_nlink = cur_block + elif isinstance(line, LocKey): + if block_to_nlink: + cur_block.addto( + asmblock.AsmConstraint(line, C_NEXT) + ) + block_to_nlink = None + state = STATE_NO_BLOC + continue + # instruction + elif isinstance(line, instruction): + cur_block.addline(line) + block_to_nlink = cur_block + if not line.breakflow(): + i += 1 + continue + if delayslot: + raise RuntimeError("Cannot have breakflow in delayslot") + if line.dstflow(): + for dst in line.getdstflow(loc_db): + if not isinstance(dst, ExprId): + continue + if dst in mnemo.regs.all_regs_ids: + continue + cur_block.addto(asmblock.AsmConstraint(dst.name, C_TO)) + + if not line.splitflow(): + block_to_nlink = None + + delayslot = line.delayslot + 1 + else: + raise RuntimeError("unknown class %s" % line.__class__) + i += 1 + + for block in asmcfg.blocks: + # Fix multiple constraints + block.fix_constraints() + + # Log block + asmblock.log_asmblock.info(block) + return asmcfg, loc_db diff --git a/miasm/core/sembuilder.py b/miasm/core/sembuilder.py new file mode 100644 index 00000000..c51ce608 --- /dev/null +++ b/miasm/core/sembuilder.py @@ -0,0 +1,355 @@ +"Helper to quickly build instruction's semantic side effects" + +import inspect +import ast +import re + +from future.utils import PY3 + +import miasm.expression.expression as m2_expr +from miasm.ir.ir import IRBlock, AssignBlock + + +class MiasmTransformer(ast.NodeTransformer): + """AST visitor translating DSL to Miasm expression + + memX[Y] -> ExprMem(Y, X) + iX(Y) -> ExprIntX(Y) + X if Y else Z -> ExprCond(Y, X, Z) + 'X'(Y) -> ExprOp('X', Y) + ('X' % Y)(Z) -> ExprOp('X' % Y, Z) + {a, b} -> ExprCompose(((a, 0, a.size), (b, a.size, a.size + b.size))) + """ + + # Parsers + parse_integer = re.compile("^i([0-9]+)$") + parse_mem = re.compile("^mem([0-9]+)$") + + # Visitors + def visit_Call(self, node): + """iX(Y) -> ExprIntX(Y), + 'X'(Y) -> ExprOp('X', Y), ('X' % Y)(Z) -> ExprOp('X' % Y, Z)""" + + # Recursive visit + node = self.generic_visit(node) + if isinstance(node.func, ast.Name): + # iX(Y) -> ExprInt(Y, X) + fc_name = node.func.id + + # Match the function name + new_name = fc_name + integer = self.parse_integer.search(fc_name) + + # Do replacement + if integer is not None: + size = int(integer.groups()[0]) + new_name = "ExprInt" + # Replace in the node + node.func.id = new_name + node.args.append(ast.Num(n=size)) + + elif (isinstance(node.func, ast.Str) or + (isinstance(node.func, ast.BinOp) and + isinstance(node.func.op, ast.Mod) and + isinstance(node.func.left, ast.Str))): + # 'op'(args...) -> ExprOp('op', args...) + # ('op' % (fmt))(args...) -> ExprOp('op' % (fmt), args...) + op_name = node.func + + # Do replacement + node.func = ast.Name(id="ExprOp", ctx=ast.Load()) + node.args[0:0] = [op_name] + + return node + + def visit_Subscript(self, node): + """memX[Y] -> ExprMem(Y, X)""" + + # Recursive visit + node = self.generic_visit(node) + + # Detect the syntax + if not isinstance(node.value, ast.Name): + return node + name = node.value.id + mem = self.parse_mem.search(name) + if mem is None: + return node + + # Do replacement + addr = self.visit(node.slice.value) + call = ast.Call(func=ast.Name(id='ExprMem', ctx=ast.Load()), + args=[addr, ast.Num(n=int(mem.groups()[0]))], + keywords=[], starargs=None, kwargs=None) + return call + + def visit_IfExp(self, node): + """X if Y else Z -> ExprCond(Y, X, Z)""" + # Recursive visit + node = self.generic_visit(node) + + # Build the new ExprCond + call = ast.Call(func=ast.Name(id='ExprCond', ctx=ast.Load()), + args=[self.visit(node.test), + self.visit(node.body), + self.visit(node.orelse)], + keywords=[], starargs=None, kwargs=None) + return call + + def visit_Set(self, node): + "{a, b} -> ExprCompose(a, b)" + if len(node.elts) == 0: + return node + + # Recursive visit + node = self.generic_visit(node) + + return ast.Call(func=ast.Name(id='ExprCompose', + ctx=ast.Load()), + args=node.elts, + keywords=[], + starargs=None, + kwargs=None) + +if PY3: + def get_arg_name(name): + return name.arg + def gen_arg(name, ctx): + return ast.arg(arg=name, ctx=ctx) +else: + def get_arg_name(name): + return name.id + def gen_arg(name, ctx): + return ast.Name(id=name, ctx=ctx) + + +class SemBuilder(object): + """Helper for building instruction's semantic side effects method + + This class provides a decorator @parse to use on them. + The context in which the function will be parsed must be supplied on + instantiation + """ + + def __init__(self, ctx): + """Create a SemBuilder + @ctx: context dictionary used during parsing + """ + # Init + self.transformer = MiasmTransformer() + self._ctx = dict(m2_expr.__dict__) + self._ctx["IRBlock"] = IRBlock + self._ctx["AssignBlock"] = AssignBlock + self._functions = {} + + # Update context + self._ctx.update(ctx) + + @property + def functions(self): + """Return a dictionary name -> func of parsed functions""" + return self._functions.copy() + + @staticmethod + def _create_labels(loc_else=False): + """Return the AST standing for label creations + @loc_else (optional): if set, create a label 'loc_else'""" + loc_end = "loc_end = ir.get_next_loc_key(instr)" + loc_end_expr = "loc_end_expr = ExprLoc(loc_end, ir.IRDst.size)" + out = ast.parse(loc_end).body + out += ast.parse(loc_end_expr).body + loc_if = "loc_if = ir.loc_db.add_location()" + loc_if_expr = "loc_if_expr = ExprLoc(loc_if, ir.IRDst.size)" + out += ast.parse(loc_if).body + out += ast.parse(loc_if_expr).body + if loc_else: + loc_else = "loc_else = ir.loc_db.add_location()" + loc_else_expr = "loc_else_expr = ExprLoc(loc_else, ir.IRDst.size)" + out += ast.parse(loc_else).body + out += ast.parse(loc_else_expr).body + return out + + def _parse_body(self, body, argument_names): + """Recursive function transforming a @body to a block expression + Return: + - AST to append to body (real python statements) + - a list of blocks, ie list of affblock, ie list of ExprAssign (AST)""" + + # Init + ## Real instructions + real_body = [] + ## Final blocks + blocks = [[[]]] + + for statement in body: + + if isinstance(statement, ast.Assign): + src = self.transformer.visit(statement.value) + dst = self.transformer.visit(statement.targets[0]) + + if (isinstance(dst, ast.Name) and + dst.id not in argument_names and + dst.id not in self._ctx and + dst.id not in self._local_ctx): + + # Real variable declaration + statement.value = src + real_body.append(statement) + self._local_ctx[dst.id] = src + continue + + dst.ctx = ast.Load() + + res = ast.Call(func=ast.Name(id='ExprAssign', + ctx=ast.Load()), + args=[dst, src], + keywords=[], + starargs=None, + kwargs=None) + + blocks[-1][-1].append(res) + + elif (isinstance(statement, ast.Expr) and + isinstance(statement.value, ast.Str)): + # String (docstring, comment, ...) -> keep it + real_body.append(statement) + + elif isinstance(statement, ast.If): + # Create jumps : ir.IRDst = loc_if if cond else loc_end + # if .. else .. are also handled + cond = statement.test + real_body += self._create_labels(loc_else=True) + + loc_end = ast.Name(id='loc_end_expr', ctx=ast.Load()) + loc_if = ast.Name(id='loc_if_expr', ctx=ast.Load()) + loc_else = ast.Name(id='loc_else_expr', ctx=ast.Load()) \ + if statement.orelse else loc_end + dst = ast.Call(func=ast.Name(id='ExprCond', + ctx=ast.Load()), + args=[cond, + loc_if, + loc_else], + keywords=[], + starargs=None, + kwargs=None) + + if (isinstance(cond, ast.UnaryOp) and + isinstance(cond.op, ast.Not)): + ## if not cond -> switch exprCond + dst.args[1:] = dst.args[1:][::-1] + dst.args[0] = cond.operand + + IRDst = ast.Attribute(value=ast.Name(id='ir', + ctx=ast.Load()), + attr='IRDst', ctx=ast.Load()) + blocks[-1][-1].append(ast.Call(func=ast.Name(id='ExprAssign', + ctx=ast.Load()), + args=[IRDst, dst], + keywords=[], + starargs=None, + kwargs=None)) + + # Create the new blocks + elements = [(statement.body, 'loc_if')] + if statement.orelse: + elements.append((statement.orelse, 'loc_else')) + for content, loc_name in elements: + sub_blocks, sub_body = self._parse_body(content, + argument_names) + if len(sub_blocks) > 1: + raise RuntimeError("Imbricated if unimplemented") + + ## Close the last block + jmp_end = ast.Call(func=ast.Name(id='ExprAssign', + ctx=ast.Load()), + args=[IRDst, loc_end], + keywords=[], + starargs=None, + kwargs=None) + sub_blocks[-1][-1].append(jmp_end) + + + instr = ast.Name(id='instr', ctx=ast.Load()) + effects = ast.List(elts=sub_blocks[-1][-1], + ctx=ast.Load()) + assignblk = ast.Call(func=ast.Name(id='AssignBlock', + ctx=ast.Load()), + args=[effects, instr], + keywords=[], + starargs=None, + kwargs=None) + + + ## Replace the block with a call to 'IRBlock' + loc_if_name = ast.Name(id=loc_name, ctx=ast.Load()) + + assignblks = ast.List(elts=[assignblk], + ctx=ast.Load()) + + sub_blocks[-1] = ast.Call(func=ast.Name(id='IRBlock', + ctx=ast.Load()), + args=[loc_if_name, + assignblks], + keywords=[], + starargs=None, + kwargs=None) + blocks += sub_blocks + real_body += sub_body + + # Prepare a new block for following statement + blocks.append([[]]) + + else: + # TODO: real var, +=, /=, -=, <<=, >>=, if/else, ... + raise RuntimeError("Unimplemented %s" % statement) + + return blocks, real_body + + def parse(self, func): + """Function decorator, returning a correct method from a pseudo-Python + one""" + + # Get the function AST + parsed = ast.parse(inspect.getsource(func)) + fc_ast = parsed.body[0] + argument_names = [get_arg_name(name) for name in fc_ast.args.args] + + # Init local cache + self._local_ctx = {} + + # Translate (blocks[0][0] is the current instr) + blocks, body = self._parse_body(fc_ast.body, argument_names) + + # Build the new function + fc_ast.args.args[0:0] = [ + gen_arg('ir', ast.Param()), + gen_arg('instr', ast.Param()) + ] + cur_instr = blocks[0][0] + if len(blocks[-1][0]) == 0: + ## Last block can be empty + blocks.pop() + other_blocks = blocks[1:] + body.append(ast.Return(value=ast.Tuple(elts=[ast.List(elts=cur_instr, + ctx=ast.Load()), + ast.List(elts=other_blocks, + ctx=ast.Load())], + ctx=ast.Load()))) + + ret = ast.Module([ast.FunctionDef(name=fc_ast.name, + args=fc_ast.args, + body=body, + decorator_list=[])]) + + # To display the generated function, use codegen.to_source + # codegen: https://github.com/andreif/codegen + + # Compile according to the context + fixed = ast.fix_missing_locations(ret) + codeobj = compile(fixed, '', 'exec') + ctx = self._ctx.copy() + eval(codeobj, ctx) + + # Get the function back + self._functions[fc_ast.name] = ctx[fc_ast.name] + return ctx[fc_ast.name] diff --git a/miasm/core/types.py b/miasm/core/types.py new file mode 100644 index 00000000..971b1f98 --- /dev/null +++ b/miasm/core/types.py @@ -0,0 +1,1693 @@ +"""This module provides classes to manipulate pure C types as well as their +representation in memory. A typical usecase is to use this module to +easily manipylate structures backed by a VmMngr object (a miasm sandbox virtual +memory): + + class ListNode(MemStruct): + fields = [ + ("next", Ptr(", ),]; creates fields that correspond to + certain bits of the field; analogous to a Union of Bits (see Bits below) + - Str: a character string, with an encoding; not directly mapped to a C + type, it is a higher level notion provided for ease of use + - Void: analogous to C void, can be a placeholder in void*-style cases. + - Self: special marker to reference a Struct inside itself (FIXME: to + remove?) + +And some less common types: + + - Bits: mask only some bits of a Num + - RawStruct: abstraction over a simple struct pack/unpack (no mapping to a + standard C type) + +For each type, the `.lval` property returns a MemType subclass that +allows to access the field in memory. + + +The easiest way to use the API to declare and manipulate new structures is to +subclass MemStruct and define a list of (, ): + + class MyStruct(MemStruct): + fields = [ + # Scalar field: just struct.pack field with one value + ("num", Num("I")), + ("flags", Num("B")), + # Ptr fields contain two fields: "val", for the numerical value, + # and "deref" to get the pointed object + ("other", Ptr("I", OtherStruct)), + # Ptr to a variable length String + ("s", Ptr("I", Str())), + ("i", Ptr("I", Num("I"))), + ] + +And access the fields: + + mstruct = MyStruct(jitter.vm, addr) + mstruct.num = 3 + assert mstruct.num == 3 + mstruct.other.val = addr2 + # Also works: + mstruct.other = addr2 + mstruct.other.deref = OtherStruct(jitter.vm, addr) + +MemUnion and MemBitField can also be subclassed, the `fields` field being +in the format expected by, respectively, Union and BitField. + +The `addr` argument can be omitted if an allocator is set, in which case the +structure will be automatically allocated in memory: + + my_heap = miasm.os_dep.common.heap() + # the allocator is a func(VmMngr) -> integer_address + set_allocator(my_heap) + +Note that some structures (e.g. MemStr or MemArray) do not have a static +size and cannot be allocated automatically. +""" + +from builtins import range, zip +from builtins import int as int_types +import itertools +import logging +import struct +from future.utils import PY3 +from future.utils import viewitems, with_metaclass + +log = logging.getLogger(__name__) +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.WARN) + +# Cache for dynamically generated MemTypes +DYN_MEM_STRUCT_CACHE = {} + +def set_allocator(alloc_func): + """Shorthand to set the default allocator of MemType. See + MemType.set_allocator doc for more information. + """ + MemType.set_allocator(alloc_func) + + +# Helpers + +def to_type(obj): + """If possible, return the Type associated with @obj, otherwise raises + a ValueError. + + Works with a Type instance (returns obj) or a MemType subclass or instance + (returns obj.get_type()). + """ + # obj is a python type + if isinstance(obj, type): + if issubclass(obj, MemType): + if obj.get_type() is None: + raise ValueError("%r has no static type; use a subclasses " + "with a non null _type or use a " + "Type instance" % obj) + return obj.get_type() + # obj is not not a type + else: + if isinstance(obj, Type): + return obj + elif isinstance(obj, MemType): + return obj.get_type() + raise ValueError("%r is not a Type or a MemType" % obj) + +def indent(s, size=4): + """Indent a string with @size spaces""" + return ' '*size + ('\n' + ' '*size).join(s.split('\n')) + + +# String generic getter/setter/len-er +# TODO: make miasm.os_dep.common and jitter ones use these ones + +def get_str(vm, addr, enc, max_char=None, end=u'\x00'): + """Get a @end (by default '\\x00') terminated @enc encoded string from a + VmMngr. + + For example: + - get_str(vm, addr, "ascii") will read "foo\\x00" in memory and + return u"foo" + - get_str(vm, addr, "utf-16le") will read "f\\x00o\\x00o\\x00\\x00\\x00" + in memory and return u"foo" as well. + + Setting @max_char= and @end='' allows to read non null terminated strings + from memory. + + @vm: VmMngr instance + @addr: the address at which to read the string + @enc: the encoding of the string to read. + @max_char: max number of bytes to get in memory + @end: the unencoded ending sequence of the string, by default "\\x00". + Unencoded here means that the actual ending sequence that this function + will look for is end.encode(enc), not directly @end. + """ + s = [] + end_char= end.encode(enc) + step = len(end_char) + i = 0 + while max_char is None or i < max_char: + c = vm.get_mem(addr + i, step) + if c == end_char: + break + s.append(c) + i += step + return b''.join(s).decode(enc) + +def raw_str(s, enc, end=u'\x00'): + """Returns a string representing @s as an @end (by default \\x00) + terminated @enc encoded string. + + @s: the unicode str to serialize + @enc: the encoding to apply to @s and @end before serialization. + @end: the ending string/character to append to the string _before encoding_ + and serialization (by default '\\x00') + """ + return (s + end).encode(enc) + +def set_str(vm, addr, s, enc, end=u'\x00'): + """Encode a string to an @end (by default \\x00) terminated @enc encoded + string and set it in a VmMngr memory. + + @vm: VmMngr instance + @addr: start address to serialize the string to + @s: the unicode str to serialize + @enc: the encoding to apply to @s and @end before serialization. + @end: the ending string/character to append to the string _before encoding_ + and serialization (by default '\\x00') + """ + s = raw_str(s, enc, end=end) + vm.set_mem(addr, s) + +def raw_len(py_unic_str, enc, end=u'\x00'): + """Returns the length in bytes of @py_unic_str in memory (once @end has been + added and the full str has been encoded). It returns exactly the room + necessary to call set_str with similar arguments. + + @py_unic_str: the unicode str to work with + @enc: the encoding to encode @py_unic_str to + @end: the ending string/character to append to the string _before encoding_ + (by default \\x00) + """ + return len(raw_str(py_unic_str, enc)) + +def enc_triplet(enc, max_char=None, end=u'\x00'): + """Returns a triplet of functions (get_str_enc, set_str_enc, raw_len_enc) + for a given encoding (as needed by Str to add an encoding). The prototypes + are: + + - get_str_end: same as get_str without the @enc argument + - set_str_end: same as set_str without the @enc argument + - raw_len_enc: same as raw_len without the @enc argument + """ + return ( + lambda vm, addr, max_char=max_char, end=end: \ + get_str(vm, addr, enc, max_char=max_char, end=end), + lambda vm, addr, s, end=end: set_str(vm, addr, s, enc, end=end), + lambda s, end=end: raw_len(s, enc, end=end), + ) + + +# Type classes + +class Type(object): + """Base class to provide methods to describe a type, as well as how to set + and get fields from virtual mem. + + Each Type subclass is linked to a MemType subclass (e.g. Struct to + MemStruct, Ptr to MemPtr, etc.). + + When nothing is specified, MemValue is used to access the type in memory. + MemValue instances have one `.val` field, setting and getting it call + the set and get of the Type. + + Subclasses can either override _pack and _unpack, or get and set if data + serialization requires more work (see Struct implementation for an example). + + TODO: move any trace of vm and addr out of these classes? + """ + + _self_type = None + + def _pack(self, val): + """Serializes the python value @val to a raw str""" + raise NotImplementedError() + + def _unpack(self, raw_str): + """Deserializes a raw str to an object representing the python value + of this field. + """ + raise NotImplementedError() + + def set(self, vm, addr, val): + """Set a VmMngr memory from a value. + + @vm: VmMngr instance + @addr: the start address in memory to set + @val: the python value to serialize in @vm at @addr + """ + raw = self._pack(val) + vm.set_mem(addr, raw) + + def get(self, vm, addr): + """Get the python value of a field from a VmMngr memory at @addr.""" + raw = vm.get_mem(addr, self.size) + return self._unpack(raw) + + @property + def lval(self): + """Returns a class with a (vm, addr) constructor that allows to + interact with this type in memory. + + In compilation terms, it returns a class allowing to instantiate an + lvalue of this type. + + @return: a MemType subclass. + """ + if self in DYN_MEM_STRUCT_CACHE: + return DYN_MEM_STRUCT_CACHE[self] + pinned_type = self._build_pinned_type() + DYN_MEM_STRUCT_CACHE[self] = pinned_type + return pinned_type + + def _build_pinned_type(self): + """Builds the MemType subclass allowing to interact with this type. + + Called by self.lval when it is not in cache. + """ + pinned_base_class = self._get_pinned_base_class() + pinned_type = type( + "Mem%r" % self, + (pinned_base_class,), + {'_type': self} + ) + return pinned_type + + def _get_pinned_base_class(self): + """Return the MemType subclass that maps this type in memory""" + return MemValue + + def _get_self_type(self): + """Used for the Self trick.""" + return self._self_type + + def _set_self_type(self, self_type): + """If this field refers to MemSelf/Self, replace it with @self_type + (a Type instance) when using it. Generally not used outside this + module. + """ + self._self_type = self_type + + @property + def size(self): + """Return the size in bytes of the serialized version of this field""" + raise NotImplementedError() + + def __len__(self): + return self.size + + def __neq__(self, other): + return not self == other + + def __eq__(self, other): + raise NotImplementedError("Abstract method") + + def __ne__(self, other): + return not self == other + + +class RawStruct(Type): + """Dumb struct.pack/unpack field. Mainly used to factorize code. + + Value is a tuple corresponding to the struct @fmt passed to the constructor. + """ + + def __init__(self, fmt): + self._fmt = fmt + + def _pack(self, fields): + return struct.pack(self._fmt, *fields) + + def _unpack(self, raw_str): + return struct.unpack(self._fmt, raw_str) + + @property + def size(self): + return struct.calcsize(self._fmt) + + def __repr__(self): + return "%s(%s)" % (self.__class__.__name__, self._fmt) + + def __eq__(self, other): + return self.__class__ == other.__class__ and self._fmt == other._fmt + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash((self.__class__, self._fmt)) + + +class Num(RawStruct): + """Represents a number (integer or float). The number is encoded with + a struct-style format which must represent only one value. + + TODO: use u32, i16, etc. for format. + """ + + def _pack(self, number): + return super(Num, self)._pack([number]) + + def _unpack(self, raw_str): + upck = super(Num, self)._unpack(raw_str) + if len(upck) != 1: + raise ValueError("Num format string unpacks to multiple values, " + "should be 1") + return upck[0] + + +class Ptr(Num): + """Special case of number of which value indicates the address of a + MemType. + + Mapped to MemPtr (see its doc for more info): + + assert isinstance(mystruct.ptr, MemPtr) + mystruct.ptr = 0x4000 # Assign the Ptr numeric value + mystruct.ptr.val = 0x4000 # Also assigns the Ptr numeric value + assert isinstance(mystruct.ptr.val, int) # Get the Ptr numeric value + mystruct.ptr.deref # Get the pointed MemType + mystruct.ptr.deref = other # Set the pointed MemType + """ + + def __init__(self, fmt, dst_type, *type_args, **type_kwargs): + """ + @fmt: (str) Num compatible format that will be the Ptr representation + in memory + @dst_type: (MemType or Type) the Type this Ptr points to. + If a Type is given, it is transformed into a MemType with + TheType.lval. + *type_args, **type_kwargs: arguments to pass to the the pointed + MemType when instantiating it (e.g. for MemStr encoding or + MemArray field_type). + """ + if (not isinstance(dst_type, Type) and + not (isinstance(dst_type, type) and + issubclass(dst_type, MemType)) and + not dst_type == MemSelf): + raise ValueError("dst_type of Ptr must be a MemType type, a " + "Type instance, the MemSelf marker or a class " + "name.") + super(Ptr, self).__init__(fmt) + if isinstance(dst_type, Type): + # Patch the field to propagate the MemSelf replacement + dst_type._get_self_type = lambda: self._get_self_type() + # dst_type cannot be patched here, since _get_self_type of the outer + # class has not yet been set. Patching dst_type involves calling + # dst_type.lval, which will only return a type that does not point + # on MemSelf but on the right class only when _get_self_type of the + # outer class has been replaced by _MetaMemStruct. + # In short, dst_type = dst_type.lval is not valid here, it is done + # lazily in _fix_dst_type + self._dst_type = dst_type + self._type_args = type_args + self._type_kwargs = type_kwargs + + def _fix_dst_type(self): + if self._dst_type in [MemSelf, SELF_TYPE_INSTANCE]: + if self._get_self_type() is not None: + self._dst_type = self._get_self_type() + else: + raise ValueError("Unsupported usecase for (Mem)Self, sorry") + self._dst_type = to_type(self._dst_type) + + @property + def dst_type(self): + """Return the type (MemType subtype) this Ptr points to.""" + self._fix_dst_type() + return self._dst_type + + def set(self, vm, addr, val): + """A Ptr field can be set with a MemPtr or an int""" + if isinstance(val, MemType) and isinstance(val.get_type(), Ptr): + self.set_val(vm, addr, val.val) + else: + super(Ptr, self).set(vm, addr, val) + + def get(self, vm, addr): + return self.lval(vm, addr) + + def get_val(self, vm, addr): + """Get the numeric value of a Ptr""" + return super(Ptr, self).get(vm, addr) + + def set_val(self, vm, addr, val): + """Set the numeric value of a Ptr""" + return super(Ptr, self).set(vm, addr, val) + + def deref_get(self, vm, addr): + """Deserializes the data in @vm (VmMngr) at @addr to self.dst_type. + Equivalent to a pointer dereference rvalue in C. + """ + dst_addr = self.get_val(vm, addr) + return self.dst_type.lval(vm, dst_addr, + *self._type_args, **self._type_kwargs) + + def deref_set(self, vm, addr, val): + """Serializes the @val MemType subclass instance in @vm (VmMngr) at + @addr. Equivalent to a pointer dereference assignment in C. + """ + # Sanity check + if self.dst_type != val.get_type(): + log.warning("Original type was %s, overridden by value of type %s", + self._dst_type.__name__, val.__class__.__name__) + + # Actual job + dst_addr = self.get_val(vm, addr) + vm.set_mem(dst_addr, bytes(val)) + + def _get_pinned_base_class(self): + return MemPtr + + def __repr__(self): + return "%s(%r)" % (self.__class__.__name__, self.dst_type) + + def __eq__(self, other): + return super(Ptr, self).__eq__(other) and \ + self.dst_type == other.dst_type and \ + self._type_args == other._type_args and \ + self._type_kwargs == other._type_kwargs + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash((super(Ptr, self).__hash__(), self.dst_type, + self._type_args)) + + +class Struct(Type): + """Equivalent to a C struct type. Composed of a name, and a + (, ) list describing the fields + of the struct. + + Mapped to MemStruct. + + NOTE: The `.lval` property of Struct creates classes on the fly. If an + equivalent structure is created by subclassing MemStruct, an exception + is raised to prevent creating multiple classes designating the same type. + + Example: + s = Struct("Toto", [("f1", Num("I")), ("f2", Num("I"))]) + + Toto1 = s.lval + + # This raises an exception, because it describes the same structure as + # Toto1 + class Toto(MemStruct): + fields = [("f1", Num("I")), ("f2", Num("I"))] + + Anonymous Struct, Union or BitField can be used if their field name + evaluates to False ("" or None). Such anonymous Struct field will generate + fields to the parent Struct, e.g.: + bla = Struct("Bla", [ + ("a", Num("B")), + ("", Union([("b1", Num("B")), ("b2", Num("H"))])), + ("", Struct("", [("c1", Num("B")), ("c2", Num("B"))])), + ] + Will have a b1, b2 and c1, c2 field directly accessible. The anonymous + fields are renamed to "__anon_", with an incremented number. + + In such case, bla.fields will not contain b1, b2, c1 and c2 (only the 3 + actual fields, with the anonymous ones renamed), but bla.all_fields will + return the 3 fields + b1, b2, c1 and c2 (and an information telling if it + has been generated from an anonymous Struct/Union). + + bla.get_field(vm, addr, "b1") will work. + """ + + def __init__(self, name, fields): + self.name = name + # generates self._fields and self._fields_desc + self._gen_fields(fields) + + def _gen_fields(self, fields): + """Precompute useful metadata on self.fields.""" + self._fields_desc = {} + offset = 0 + + # Build a proper (name, Field()) list, handling cases where the user + # supplies a MemType subclass instead of a Type instance + real_fields = [] + uniq_count = 0 + for fname, field in fields: + field = to_type(field) + + # For reflexion + field._set_self_type(self) + + # Anonymous Struct/Union + if not fname and isinstance(field, Struct): + # Generate field information + updated_fields = { + name: { + # Same field type than the anon field subfield + 'field': fd['field'], + # But the current offset is added + 'offset': fd['offset'] + offset, + } + for name, fd in viewitems(field._fields_desc) + } + + # Add the newly generated fields from the anon field + self._fields_desc.update(updated_fields) + real_fields += [(name, fld, True) + for name, fld in field.fields] + + # Rename the anonymous field + fname = '__anon_%x' % uniq_count + uniq_count += 1 + + self._fields_desc[fname] = {"field": field, "offset": offset} + real_fields.append((fname, field, False)) + offset = self._next_offset(field, offset) + + # fields is immutable + self._fields = tuple(real_fields) + + def _next_offset(self, field, orig_offset): + return orig_offset + field.size + + @property + def fields(self): + """Returns a sequence of (name, field) describing the fields of this + Struct, in order of offset. + + Fields generated from anonymous Unions or Structs are excluded from + this sequence. + """ + return tuple((name, field) for name, field, anon in self._fields + if not anon) + + @property + def all_fields(self): + """Returns a sequence of (, , ), + where is_anon is True when a field is generated from an anonymous + Struct or Union, and False for the fields that have been provided as is. + """ + return self._fields + + def set(self, vm, addr, val): + raw = bytes(val) + vm.set_mem(addr, raw) + + def get(self, vm, addr): + return self.lval(vm, addr) + + def get_field(self, vm, addr, name): + """Get a field value by @name and base structure @addr in @vm VmMngr.""" + if name not in self._fields_desc: + raise ValueError("'%s' type has no field '%s'" % (self, name)) + field = self.get_field_type(name) + offset = self.get_offset(name) + return field.get(vm, addr + offset) + + def set_field(self, vm, addr, name, val): + """Set a field value by @name and base structure @addr in @vm VmMngr. + @val is the python value corresponding to this field type. + """ + if name not in self._fields_desc: + raise AttributeError("'%s' object has no attribute '%s'" + % (self.__class__.__name__, name)) + field = self.get_field_type(name) + offset = self.get_offset(name) + field.set(vm, addr + offset, val) + + @property + def size(self): + return sum(field.size for _, field in self.fields) + + def get_offset(self, field_name): + """ + @field_name: (str, optional) the name of the field to get the + offset of + """ + if field_name not in self._fields_desc: + raise ValueError("This structure has no %s field" % field_name) + return self._fields_desc[field_name]['offset'] + + def get_field_type(self, name): + """Return the Type subclass instance describing field @name.""" + return self._fields_desc[name]['field'] + + def _get_pinned_base_class(self): + return MemStruct + + def __repr__(self): + return "struct %s" % self.name + + def __eq__(self, other): + return self.__class__ == other.__class__ and \ + self.fields == other.fields and \ + self.name == other.name + + def __ne__(self, other): + return not self == other + + def __hash__(self): + # Only hash name, not fields, because if a field is a Ptr to this + # Struct type, an infinite loop occurs + return hash((self.__class__, self.name)) + + +class Union(Struct): + """Represents a C union. + + Allows to put multiple fields at the same offset in a MemStruct, + similar to unions in C. The Union will have the size of the largest of its + fields. + + Mapped to MemUnion. + + Example: + + class Example(MemStruct): + fields = [("uni", Union([ + ("f1", Num("= self.size): + raise IndexError("Index %s out of bounds" % idx) + + def _get_pinned_base_class(self): + if self.is_sized(): + return MemSizedArray + else: + return MemArray + + def __repr__(self): + return "[%r; %s]" % (self.field_type, self.array_len or "unsized") + + def __eq__(self, other): + return self.__class__ == other.__class__ and \ + self.field_type == other.field_type and \ + self.array_len == other.array_len + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash((self.__class__, self.field_type, self.array_len)) + + +class Bits(Type): + """Helper class for BitField, not very useful on its own. Represents some + bits of a Num. + + The @backing_num is used to know how to serialize/deserialize data in vm, + but getting/setting this fields only assign bits from @bit_offset to + @bit_offset + @bits. Masking and shifting is handled by the class, the aim + is to provide a transparent way to set and get some bits of a num. + """ + + def __init__(self, backing_num, bits, bit_offset): + if not isinstance(backing_num, Num): + raise ValueError("backing_num should be a Num instance") + self._num = backing_num + self._bits = bits + self._bit_offset = bit_offset + + def set(self, vm, addr, val): + val_mask = (1 << self._bits) - 1 + val_shifted = (val & val_mask) << self._bit_offset + num_size = self._num.size * 8 + + full_num_mask = (1 << num_size) - 1 + num_mask = (~(val_mask << self._bit_offset)) & full_num_mask + + num_val = self._num.get(vm, addr) + res_val = (num_val & num_mask) | val_shifted + self._num.set(vm, addr, res_val) + + def get(self, vm, addr): + val_mask = (1 << self._bits) - 1 + num_val = self._num.get(vm, addr) + res_val = (num_val >> self._bit_offset) & val_mask + return res_val + + @property + def size(self): + return self._num.size + + @property + def bit_size(self): + """Number of bits read/written by this class""" + return self._bits + + @property + def bit_offset(self): + """Offset in bits (beginning at 0, the LSB) from which to read/write + bits. + """ + return self._bit_offset + + def __repr__(self): + return "%s%r(%d:%d)" % (self.__class__.__name__, self._num, + self._bit_offset, self._bit_offset + self._bits) + + def __eq__(self, other): + return self.__class__ == other.__class__ and \ + self._num == other._num and self._bits == other._bits and \ + self._bit_offset == other._bit_offset + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash((self.__class__, self._num, self._bits, self._bit_offset)) + + +class BitField(Union): + """A C-like bitfield. + + Constructed with a list [(, )] and a + @backing_num. The @backing_num is a Num instance that determines the total + size of the bitfield and the way the bits are serialized/deserialized (big + endian int, little endian short...). Can be seen (and implemented) as a + Union of Bits fields. + + Mapped to MemBitField. + + Creates fields that allow to access the bitfield fields easily. Example: + + class Example(MemStruct): + fields = [("bf", BitField(Num("B"), [ + ("f1", 2), + ("f2", 4), + ("f3", 1) + ]) + )] + + ex = Example(vm, addr) + ex.memset() + ex.f2 = 2 + ex.f1 = 5 # 5 does not fit on two bits, it will be binarily truncated + assert ex.f1 == 3 + assert ex.f2 == 2 + assert ex.f3 == 0 # previously memset() + assert ex.bf == 3 + 2 << 2 + """ + + def __init__(self, backing_num, bit_list): + """@backing num: Num instance, @bit_list: [(name, n_bits)]""" + self._num = backing_num + fields = [] + offset = 0 + for name, bits in bit_list: + fields.append((name, Bits(self._num, bits, offset))) + offset += bits + if offset > self._num.size == 8: + raise ValueError("sum of bit lengths is > to the backing num size") + super(BitField, self).__init__(fields) + + def set(self, vm, addr, val): + self._num.set(vm, addr, val) + + def _get_pinned_base_class(self): + return MemBitField + + def __eq__(self, other): + return self.__class__ == other.__class__ and \ + self._num == other._num and super(BitField, self).__eq__(other) + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash((super(BitField, self).__hash__(), self._num)) + + def __repr__(self): + fields_repr = ', '.join("%s: %r" % (name, field.bit_size) + for name, field in self.fields) + return "%s(%s)" % (self.__class__.__name__, fields_repr) + + +class Str(Type): + """A string type that handles encoding. This type is unsized (no static + size). + + The @encoding is passed to the constructor, and is one of the keys of + Str.encodings, currently: + - ascii + - latin1 + - ansi (= latin1) + - utf8 (= utf-8le) + - utf16 (= utf-16le, Windows UCS-2 compatible) + New encodings can be added with Str.add_encoding. + If an unknown encoding is passed to the constructor, Str will try to add it + to the available ones with Str.add_encoding. + + Mapped to MemStr. + """ + + # Dict of {name: (getter, setter, raw_len)} + # Where: + # - getter(vm, addr) -> unicode + # - setter(vm, addr, unicode) + # - raw_len(unicode_str) -> int (length of the str value one encoded in + # memory) + # See enc_triplet() + # + # NOTE: this appears like it could be implemented only with + # (getter, raw_str), but this would cause trouble for length-prefixed str + # encoding (Pascal-style strings). + encodings = { + "ascii": enc_triplet("ascii"), + "latin1": enc_triplet("latin1"), + "ansi": enc_triplet("latin1"), + "utf8": enc_triplet("utf8"), + "utf16": enc_triplet("utf-16le"), + } + + def __init__(self, encoding="ansi"): + if encoding not in self.encodings: + self.add_encoding(encoding) + self._enc = encoding + + @classmethod + def add_encoding(cls, enc_name, str_enc=None, getter=None, setter=None, + raw_len=None): + """Add an available Str encoding. + + @enc_name: the name that will be used to designate this encoding in the + Str constructor + @str_end: (optional) the actual str encoding name if it differs from + @enc_name + @getter: (optional) func(vm, addr) -> unicode, to force usage of this + function to retrieve the str from memory + @setter: (optional) func(vm, addr, unicode), to force usage of this + function to set the str in memory + @raw_len: (optional) func(unicode_str) -> int (length of the str value + one encoded in memory), to force usage of this function to compute + the length of this string once in memory + """ + default = enc_triplet(str_enc or enc_name) + actual = ( + getter or default[0], + setter or default[1], + raw_len or default[2], + ) + cls.encodings[enc_name] = actual + + def get(self, vm, addr): + """Set the string value in memory""" + get_str = self.encodings[self.enc][0] + return get_str(vm, addr) + + def set(self, vm, addr, s): + """Get the string value from memory""" + set_str = self.encodings[self.enc][1] + set_str(vm, addr, s) + + @property + def size(self): + """This type is unsized.""" + raise ValueError("Str is unsized") + + def value_size(self, py_str): + """Returns the in-memory size of a @py_str for this Str type (handles + encoding, i.e. will not return the same size for "utf16" and "ansi"). + """ + raw_len = self.encodings[self.enc][2] + return raw_len(py_str) + + @property + def enc(self): + """This Str's encoding name (as a str).""" + return self._enc + + def _get_pinned_base_class(self): + return MemStr + + def __repr__(self): + return "%s(%s)" % (self.__class__.__name__, self.enc) + + def __eq__(self, other): + return self.__class__ == other.__class__ and self._enc == other._enc + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash((self.__class__, self._enc)) + + +class Void(Type): + """Represents the C void type. + + Mapped to MemVoid. + """ + + def _build_pinned_type(self): + return MemVoid + + def __eq__(self, other): + return self.__class__ == other.__class__ + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash(self.__class__) + + def __repr__(self): + return self.__class__.__name__ + + +class Self(Void): + """Special marker to reference a type inside itself. + + Mapped to MemSelf. + + Example: + class ListNode(MemStruct): + fields = [ + ("next", Ptr(" allocated_address + allocator = None + + _type = None + + def __init__(self, vm, addr=None, type_=None): + self._vm = vm + if addr is None: + self._addr = self.alloc(vm, self.get_size()) + else: + self._addr = addr + if type_ is not None: + self._type = type_ + if self._type is None: + raise ValueError("Subclass MemType and define cls._type or pass " + "a type to the constructor") + + @classmethod + def alloc(cls, vm, size): + """Returns an allocated page of size @size if cls.allocator is set. + Raises ValueError otherwise. + """ + if cls.allocator is None: + raise ValueError("Cannot provide None address to MemType() if" + "%s.set_allocator has not been called." + % __name__) + return cls.allocator(vm, size) + + @classmethod + def set_allocator(cls, alloc_func): + """Set an allocator for this class; allows to instantiate statically + sized MemTypes (i.e. sizeof() is implemented) without specifying the + address (the object is allocated by @alloc_func in the vm). + + You may call set_allocator on specific MemType classes if you want + to use a different allocator. + + @alloc_func: func(VmMngr) -> integer_address + """ + cls.allocator = alloc_func + + def get_addr(self, field=None): + """Return the address of this MemType or one of its fields. + + @field: (str, optional) used by subclasses to specify the name or index + of the field to get the address of + """ + if field is not None: + raise NotImplementedError("Getting a field's address is not " + "implemented for this class.") + return self._addr + + @classmethod + def get_type(cls): + """Returns the Type subclass instance representing the C type of this + MemType. + """ + return cls._type + + @classmethod + def sizeof(cls): + """Return the static size of this type. By default, it is the size + of the underlying Type. + """ + return cls._type.size + + def get_size(self): + """Return the dynamic size of this structure (e.g. the size of an + instance). Defaults to sizeof for this base class. + + For example, MemStr defines get_size but not sizeof, as an instance + has a fixed size (at least its value has), but all the instance do not + have the same size. + """ + return self.sizeof() + + def memset(self, byte=b'\x00'): + """Fill the memory space of this MemType with @byte ('\x00' by + default). The size is retrieved with self.get_size() (dynamic size). + """ + # TODO: multibyte patterns + if not isinstance(byte, bytes) or len(byte) != 1: + raise ValueError("byte must be a 1-lengthed str") + self._vm.set_mem(self.get_addr(), byte * self.get_size()) + + def cast(self, other_type): + """Cast this MemType to another MemType (same address, same vm, + but different type). Return the casted MemType. + + @other_type: either a Type instance (other_type.lval is used) or a + MemType subclass + """ + if isinstance(other_type, Type): + other_type = other_type.lval + return other_type(self._vm, self.get_addr()) + + def cast_field(self, field, other_type, *type_args, **type_kwargs): + """ABSTRACT: Same as cast, but the address of the returned MemType + is the address at which @field is in the current MemType. + + @field: field specification, for example its name for a struct, or an + index in an array. See the subclass doc. + @other_type: either a Type instance (other_type.lval is used) or a + MemType subclass + """ + raise NotImplementedError("Abstract") + + def raw(self): + """Raw binary (str) representation of the MemType as it is in + memory. + """ + return self._vm.get_mem(self.get_addr(), self.get_size()) + + def __len__(self): + return self.get_size() + + def __str__(self): + if PY3: + return repr(self) + return self.__bytes__() + + def __bytes__(self): + return self.raw() + + def __repr__(self): + return "Mem%r" % self._type + + def __eq__(self, other): + return self.__class__ == other.__class__ and \ + self.get_type() == other.get_type() and \ + bytes(self) == bytes(other) + + def __ne__(self, other): + return not self == other + + +class MemValue(MemType): + """Simple MemType that gets and sets the Type through the `.val` + attribute. + """ + + @property + def val(self): + return self._type.get(self._vm, self._addr) + + @val.setter + def val(self, value): + self._type.set(self._vm, self._addr, value) + + def __repr__(self): + return "%r: %r" % (self.__class__, self.val) + + +class MemStruct(with_metaclass(_MetaMemStruct, MemType)): + """Base class to easily implement VmMngr backed C-like structures in miasm. + Represents a structure in virtual memory. + + The mechanism is the following: + - set a "fields" class field to be a list of + (, ) + - instances of this class will have properties to interact with these + fields. + + Example: + class MyStruct(MemStruct): + fields = [ + # Scalar field: just struct.pack field with one value + ("num", Num("I")), + ("flags", Num("B")), + # Ptr fields contain two fields: "val", for the numerical value, + # and "deref" to get the pointed object + ("other", Ptr("I", OtherStruct)), + # Ptr to a variable length String + ("s", Ptr("I", Str())), + ("i", Ptr("I", Num("I"))), + ] + + mstruct = MyStruct(vm, addr) + + # Field assignment modifies virtual memory + mstruct.num = 3 + assert mstruct.num == 3 + memval = struct.unpack("I", vm.get_mem(mstruct.get_addr(), + 4))[0] + assert memval == mstruct.num + + # Memset sets the whole structure + mstruct.memset() + assert mstruct.num == 0 + mstruct.memset('\x11') + assert mstruct.num == 0x11111111 + + other = OtherStruct(vm, addr2) + mstruct.other = other.get_addr() + assert mstruct.other.val == other.get_addr() + assert mstruct.other.deref == other + assert mstruct.other.deref.foo == 0x1234 + + Note that: + MyStruct = Struct("MyStruct", ).lval + is equivalent to the previous MyStruct declaration. + + See the various Type-s doc for more information. See MemStruct.gen_fields + doc for more information on how to handle recursive types and cyclic + dependencies. + """ + fields = None + + def get_addr(self, field_name=None): + """ + @field_name: (str, optional) the name of the field to get the + address of + """ + if field_name is not None: + offset = self._type.get_offset(field_name) + else: + offset = 0 + return self._addr + offset + + @classmethod + def get_offset(cls, field_name): + """Shorthand for self.get_type().get_offset(field_name).""" + return cls.get_type().get_offset(field_name) + + def get_field(self, name): + """Get a field value by name. + + useless most of the time since fields are accessible via self.. + """ + return self._type.get_field(self._vm, self.get_addr(), name) + + def set_field(self, name, val): + """Set a field value by name. @val is the python value corresponding to + this field type. + + useless most of the time since fields are accessible via self.. + """ + return self._type.set_field(self._vm, self.get_addr(), name, val) + + def cast_field(self, field, other_type): + """In this implementation, @field is a field name""" + if isinstance(other_type, Type): + other_type = other_type.lval + return other_type(self._vm, self.get_addr(field)) + + # Field generation method, voluntarily public to be able to gen fields + # after class definition + @classmethod + def gen_fields(cls, fields=None): + """Generate the fields of this class (so that they can be accessed with + self.) from a @fields list, as described in the class doc. + + Useful in case of a type cyclic dependency. For example, the following + is not possible in python: + + class A(MemStruct): + fields = [("b", Ptr("I", B))] + + class B(MemStruct): + fields = [("a", Ptr("I", A))] + + With gen_fields, the following is the legal equivalent: + + class A(MemStruct): + pass + + class B(MemStruct): + fields = [("a", Ptr("I", A))] + + A.gen_fields([("b", Ptr("I", B))]) + """ + if fields is not None: + if cls.fields is not None: + raise ValueError("Cannot regen fields of a class. Setting " + "cls.fields at class definition and calling " + "gen_fields are mutually exclusive.") + cls.fields = fields + + if cls._type is None: + if cls.fields is None: + raise ValueError("Cannot create a MemStruct subclass without" + " a cls._type or a cls.fields") + cls._type = cls._gen_type(cls.fields) + + if cls._type in DYN_MEM_STRUCT_CACHE: + # FIXME: Maybe a warning would be better? + raise RuntimeError("Another MemType has the same type as this " + "one. Use it instead.") + + # Register this class so that another one will not be created when + # calling cls._type.lval + DYN_MEM_STRUCT_CACHE[cls._type] = cls + + cls._gen_attributes() + + @classmethod + def _gen_attributes(cls): + # Generate self. getter and setters + for name, _, _ in cls._type.all_fields: + setattr(cls, name, property( + lambda self, name=name: self.get_field(name), + lambda self, val, name=name: self.set_field(name, val) + )) + + @classmethod + def _gen_type(cls, fields): + return Struct(cls.__name__, fields) + + def __repr__(self): + out = [] + for name, field in self._type.fields: + val_repr = repr(self.get_field(name)) + if '\n' in val_repr: + val_repr = '\n' + indent(val_repr, 4) + out.append("%s: %r = %s" % (name, field, val_repr)) + return '%r:\n' % self.__class__ + indent('\n'.join(out), 2) + + +class MemUnion(MemStruct): + """Same as MemStruct but all fields have a 0 offset in the struct.""" + @classmethod + def _gen_type(cls, fields): + return Union(fields) + + +class MemBitField(MemUnion): + """MemUnion of Bits(...) fields.""" + @classmethod + def _gen_type(cls, fields): + return BitField(fields) + + +class MemSelf(MemStruct): + """Special Marker class for reference to current class in a Ptr or Array + (mostly Array of Ptr). See Self doc. + """ + def __repr__(self): + return self.__class__.__name__ + + +class MemVoid(MemType): + """Placeholder for e.g. Ptr to an undetermined type. Useful mostly when + casted to another type. Allows to implement C's "void*" pattern. + """ + _type = Void() + + def __repr__(self): + return self.__class__.__name__ + + +class MemPtr(MemValue): + """Mem version of a Ptr, provides two properties: + - val, to set and get the numeric value of the Ptr + - deref, to set and get the pointed type + """ + @property + def val(self): + return self._type.get_val(self._vm, self._addr) + + @val.setter + def val(self, value): + return self._type.set_val(self._vm, self._addr, value) + + @property + def deref(self): + return self._type.deref_get(self._vm, self._addr) + + @deref.setter + def deref(self, val): + return self._type.deref_set(self._vm, self._addr, val) + + def __repr__(self): + return "*%s" % hex(self.val) + + +class MemStr(MemValue): + """Implements a string representation in memory. + + The string value can be got or set (with python str/unicode) through the + self.val attribute. String encoding/decoding is handled by the class, + + This type is dynamically sized only (get_size is implemented, not sizeof). + """ + + def get_size(self): + """This get_size implementation is quite unsafe: it reads the string + underneath to determine the size, it may therefore read a lot of memory + and provoke mem faults (analogous to strlen). + """ + val = self.val + return self.get_type().value_size(val) + + @classmethod + def from_str(cls, vm, py_str): + """Allocates a MemStr with the global allocator with value py_str. + Raises a ValueError if allocator is not set. + """ + size = cls._type.value_size(py_str) + addr = cls.alloc(vm, size) + memstr = cls(vm, addr) + memstr.val = py_str + return memstr + + def raw(self): + raw = self._vm.get_mem(self.get_addr(), self.get_size()) + return raw + + def __repr__(self): + return "%r: %r" % (self.__class__, self.val) + + +class MemArray(MemType): + """An unsized array of type @field_type (a Type subclass instance). + This class has no static or dynamic size. + + It can be indexed for setting and getting elements, example: + + array = Array(Num("I")).lval(vm, addr)) + array[2] = 5 + array[4:8] = [0, 1, 2, 3] + print array[20] + """ + + @property + def field_type(self): + """Return the Type subclass instance that represents the type of + this MemArray items. + """ + return self.get_type().field_type + + def get_addr(self, idx=0): + return self._addr + self.get_type().get_offset(idx) + + @classmethod + def get_offset(cls, idx): + """Shorthand for self.get_type().get_offset(idx).""" + return cls.get_type().get_offset(idx) + + def __getitem__(self, idx): + return self.get_type().get_item(self._vm, self._addr, idx) + + def __setitem__(self, idx, item): + self.get_type().set_item(self._vm, self._addr, idx, item) + + def raw(self): + raise ValueError("%s is unsized, which prevents from getting its full " + "raw representation. Use MemSizedArray instead." % + self.__class__) + + def __repr__(self): + return "[%r, ...] [%r]" % (self[0], self.field_type) + + +class MemSizedArray(MemArray): + """A fixed size MemArray. + + This type is dynamically sized. Generate a fixed @field_type and @array_len + array which has a static size by using Array(type, size).lval. + """ + + @property + def array_len(self): + """The length, in number of elements, of this array.""" + return self.get_type().array_len + + def get_size(self): + return self.get_type().size + + def __iter__(self): + for i in range(self.get_type().array_len): + yield self[i] + + def raw(self): + return self._vm.get_mem(self.get_addr(), self.get_size()) + + def __repr__(self): + item_reprs = [repr(item) for item in self] + if self.array_len > 0 and '\n' in item_reprs[0]: + items = '\n' + indent(',\n'.join(item_reprs), 2) + '\n' + else: + items = ', '.join(item_reprs) + return "[%s] [%r; %s]" % (items, self.field_type, self.array_len) + diff --git a/miasm/core/utils.py b/miasm/core/utils.py new file mode 100644 index 00000000..9856d4f2 --- /dev/null +++ b/miasm/core/utils.py @@ -0,0 +1,234 @@ +from __future__ import print_function +from builtins import range +import struct +import inspect +from collections import MutableMapping as DictMixin + +from operator import itemgetter +import codecs + +from future.utils import viewitems + +import collections + +upck8 = lambda x: struct.unpack('B', x)[0] +upck16 = lambda x: struct.unpack('H', x)[0] +upck32 = lambda x: struct.unpack('I', x)[0] +upck64 = lambda x: struct.unpack('Q', x)[0] +pck8 = lambda x: struct.pack('B', x) +pck16 = lambda x: struct.pack('H', x) +pck32 = lambda x: struct.pack('I', x) +pck64 = lambda x: struct.pack('Q', x) + +# Little endian +upck8le = lambda x: struct.unpack('B', x)[0] +upck16be = lambda x: struct.unpack('>H', x)[0] +upck32be = lambda x: struct.unpack('>I', x)[0] +upck64be = lambda x: struct.unpack('>Q', x)[0] +pck8be = lambda x: struct.pack('>B', x) +pck16be = lambda x: struct.pack('>H', x) +pck32be = lambda x: struct.pack('>I', x) +pck64be = lambda x: struct.pack('>Q', x) + + +LITTLE_ENDIAN = 1 +BIG_ENDIAN = 2 + + +pck = {8: pck8, + 16: pck16, + 32: pck32, + 64: pck64} + + +def get_caller_name(caller_num=0): + """Get the nth caller's name + @caller_num: 0 = the caller of get_caller_name, 1 = next parent, ...""" + pystk = inspect.stack() + if len(pystk) > 1 + caller_num: + return pystk[1 + caller_num][3] + else: + return "Bad caller num" + + +def whoami(): + """Returns the caller's name""" + return get_caller_name(1) + + +class Disasm_Exception(Exception): + pass + + +def printable(string): + if isinstance(string, bytes): + return "".join( + c.decode() if b" " <= c < b"~" else "." + for c in (string[i:i+1] for i in range(len(string))) + ) + return string + + +def force_bytes(value): + try: + return value.encode() + except AttributeError: + return value + + +def iterbytes(string): + for i in range(len(string)): + yield string[i:i+1] + + +def int_to_byte(value): + return struct.pack('B', value) + +def cmp_elts(elt1, elt2): + return (elt1 > elt2) - (elt1 < elt2) + + +_DECODE_HEX = codecs.getdecoder("hex_codec") +_ENCODE_HEX = codecs.getencoder("hex_codec") + +def decode_hex(value): + return _DECODE_HEX(value)[0] + +def encode_hex(value): + return _ENCODE_HEX(value)[0] + + +def hexdump(src, length=16): + lines = [] + for c in range(0, len(src), length): + chars = src[c:c + length] + hexa = ' '.join("%02x" % ord(x) for x in iterbytes(chars)) + printable = ''.join( + x.decode() if 32 <= ord(x) <= 126 else '.' for x in iterbytes(chars) + ) + lines.append("%04x %-*s %s\n" % (c, length * 3, hexa, printable)) + print(''.join(lines)) + + +# stackoverflow.com/questions/2912231 +class keydefaultdict(collections.defaultdict): + + def __missing__(self, key): + if self.default_factory is None: + raise KeyError(key) + value = self[key] = self.default_factory(key) + return value + + +class BoundedDict(DictMixin): + + """Limited in size dictionary. + + To reduce combinatory cost, once an upper limit @max_size is reached, + @max_size - @min_size elements are suppressed. + The targeted elements are the less accessed. + + One can define a callback called when an element is removed + """ + + def __init__(self, max_size, min_size=None, initialdata=None, + delete_cb=None): + """Create a BoundedDict + @max_size: maximum size of the dictionary + @min_size: (optional) number of most used element to keep when resizing + @initialdata: (optional) dict instance with initial data + @delete_cb: (optional) callback called when an element is removed + """ + self._data = initialdata.copy() if initialdata else {} + self._min_size = min_size if min_size else max_size // 3 + self._max_size = max_size + self._size = len(self._data) + # Do not use collections.Counter as it is quite slow + self._counter = {k: 1 for k in self._data} + self._delete_cb = delete_cb + + def __setitem__(self, asked_key, value): + if asked_key not in self._data: + # Update internal size and use's counter + self._size += 1 + + # Bound can only be reached on a new element + if (self._size >= self._max_size): + most_common = sorted( + viewitems(self._counter), + key=itemgetter(1), + reverse=True + ) + + # Handle callback + if self._delete_cb is not None: + for key, _ in most_common[self._min_size - 1:]: + self._delete_cb(key) + + # Keep only the most @_min_size used + self._data = {key: self._data[key] + for key, _ in most_common[:self._min_size - 1]} + self._size = self._min_size + + # Reset use's counter + self._counter = {k: 1 for k in self._data} + + # Avoid rechecking in dict: set to 1 here, add 1 otherwise + self._counter[asked_key] = 1 + else: + self._counter[asked_key] += 1 + + self._data[asked_key] = value + + def __contains__(self, key): + # Do not call has_key to avoid adding function call overhead + return key in self._data + + def has_key(self, key): + return key in self._data + + def keys(self): + "Return the list of dict's keys" + return list(self._data) + + @property + def data(self): + "Return the current instance as a dictionary" + return self._data + + def __getitem__(self, key): + # Retrieve data first to raise the proper exception on error + data = self._data[key] + # Should never raise, since the key is in self._data + self._counter[key] += 1 + return data + + def __delitem__(self, key): + if self._delete_cb is not None: + self._delete_cb(key) + del self._data[key] + self._size -= 1 + del self._counter[key] + + def __del__(self): + """Ensure the callback is called when last reference is lost""" + if self._delete_cb: + for key in self._data: + self._delete_cb(key) + + + def __len__(self): + return len(self._data) + + def __iter__(self): + return iter(self._data) diff --git a/miasm/expression/__init__.py b/miasm/expression/__init__.py new file mode 100644 index 00000000..67f567f7 --- /dev/null +++ b/miasm/expression/__init__.py @@ -0,0 +1,18 @@ +# +# Copyright (C) 2011 EADS France, Fabrice Desclaux +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +"Intermediate language implementation" diff --git a/miasm/expression/expression.py b/miasm/expression/expression.py new file mode 100644 index 00000000..f0151e98 --- /dev/null +++ b/miasm/expression/expression.py @@ -0,0 +1,2035 @@ +# +# Copyright (C) 2011 EADS France, Fabrice Desclaux +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# These module implements Miasm IR components and basic operations related. +# IR components are : +# - ExprInt +# - ExprId +# - ExprLoc +# - ExprAssign +# - ExprCond +# - ExprMem +# - ExprOp +# - ExprSlice +# - ExprCompose +# + + +from builtins import zip +from builtins import range +import warnings +import itertools +from builtins import int as int_types +from functools import cmp_to_key, total_ordering +from future.utils import viewitems + +from miasm.core.utils import force_bytes, cmp_elts +from miasm.expression.modint import mod_size2uint, is_modint, size2mask, \ + define_uint +from miasm.core.graph import DiGraph +from functools import reduce + +# Define tokens +TOK_INF = "<" +TOK_INF_SIGNED = TOK_INF + "s" +TOK_INF_UNSIGNED = TOK_INF + "u" +TOK_INF_EQUAL = "<=" +TOK_INF_EQUAL_SIGNED = TOK_INF_EQUAL + "s" +TOK_INF_EQUAL_UNSIGNED = TOK_INF_EQUAL + "u" +TOK_EQUAL = "==" +TOK_POS = "pos" +TOK_POS_STRICT = "Spos" + +# Hashing constants +EXPRINT = 1 +EXPRID = 2 +EXPRLOC = 3 +EXPRASSIGN = 4 +EXPRCOND = 5 +EXPRMEM = 6 +EXPROP = 7 +EXPRSLICE = 8 +EXPRCOMPOSE = 9 + + +priorities_list = [ + [ '+' ], + [ '*', '/', '%' ], + [ '**' ], + [ '-' ], # Unary '-', associativity with + not handled +] + +# dictionary from 'op' to priority, derived from above +priorities = dict((op, prio) + for prio, l in enumerate(priorities_list) + for op in l) +PRIORITY_MAX = len(priorities_list) - 1 + +def should_parenthesize_child(child, parent): + if (isinstance(child, ExprId) or isinstance(child, ExprInt) or + isinstance(child, ExprCompose) or isinstance(child, ExprMem) or + isinstance(child, ExprSlice)): + return False + elif isinstance(child, ExprOp) and not child.is_infix(): + return False + elif (isinstance(child, ExprCond) or isinstance(parent, ExprSlice)): + return True + elif (isinstance(child, ExprOp) and isinstance(parent, ExprOp)): + pri_child = priorities.get(child.op, -1) + pri_parent = priorities.get(parent.op, PRIORITY_MAX + 1) + return pri_child < pri_parent + else: + return True + +def str_protected_child(child, parent): + return ("(%s)" % child) if should_parenthesize_child(child, parent) else str(child) + +def visit_chk(visitor): + "Function decorator launching callback on Expression visit" + def wrapped(expr, callback, test_visit=lambda x: True): + if (test_visit is not None) and (not test_visit(expr)): + return expr + expr_new = visitor(expr, callback, test_visit) + if expr_new is None: + return None + expr_new2 = callback(expr_new) + return expr_new2 + return wrapped + + +# Expression display + + +class DiGraphExpr(DiGraph): + + """Enhanced graph for Expression display + Expression are displayed as a tree with node and edge labeled + with only relevant information""" + + def node2str(self, node): + if isinstance(node, ExprOp): + return node.op + elif isinstance(node, ExprId): + return node.name + elif isinstance(node, ExprLoc): + return "%s" % node.loc_key + elif isinstance(node, ExprMem): + return "@%d" % node.size + elif isinstance(node, ExprCompose): + return "{ %d }" % node.size + elif isinstance(node, ExprCond): + return "? %d" % node.size + elif isinstance(node, ExprSlice): + return "[%d:%d]" % (node.start, node.stop) + return str(node) + + def edge2str(self, nfrom, nto): + if isinstance(nfrom, ExprCompose): + for i in nfrom.args: + if i[0] == nto: + return "[%s, %s]" % (i[1], i[2]) + elif isinstance(nfrom, ExprCond): + if nfrom.cond == nto: + return "?" + elif nfrom.src1 == nto: + return "True" + elif nfrom.src2 == nto: + return "False" + + return "" + + +@total_ordering +class LocKey(object): + def __init__(self, key): + self._key = key + + key = property(lambda self: self._key) + + def __hash__(self): + return hash(self._key) + + def __eq__(self, other): + if self is other: + return True + if self.__class__ is not other.__class__: + return False + return self.key == other.key + + def __ne__(self, other): + # required Python 2.7.14 + return not self == other + + def __lt__(self, other): + return self.key < other.key + + def __repr__(self): + return "<%s %d>" % (self.__class__.__name__, self._key) + + def __str__(self): + return "loc_key_%d" % self.key + +# IR definitions + +class Expr(object): + + "Parent class for Miasm Expressions" + + __slots__ = ["_hash", "_repr", "_size"] + + args2expr = {} + canon_exprs = set() + use_singleton = True + + def set_size(self, _): + raise ValueError('size is not mutable') + + def __init__(self, size): + """Instantiate an Expr with size @size + @size: int + """ + # Common attribute + self._size = size + + # Lazy cache needs + self._hash = None + self._repr = None + + size = property(lambda self: self._size) + + @staticmethod + def get_object(expr_cls, args): + if not expr_cls.use_singleton: + return object.__new__(expr_cls) + + expr = Expr.args2expr.get((expr_cls, args)) + if expr is None: + expr = object.__new__(expr_cls) + Expr.args2expr[(expr_cls, args)] = expr + return expr + + def get_is_canon(self): + return self in Expr.canon_exprs + + def set_is_canon(self, value): + assert value is True + Expr.canon_exprs.add(self) + + is_canon = property(get_is_canon, set_is_canon) + + # Common operations + + def __str__(self): + raise NotImplementedError("Abstract Method") + + def __getitem__(self, i): + if not isinstance(i, slice): + raise TypeError("Expression: Bad slice: %s" % i) + start, stop, step = i.indices(self.size) + if step != 1: + raise ValueError("Expression: Bad slice: %s" % i) + return ExprSlice(self, start, stop) + + def get_size(self): + raise DeprecationWarning("use X.size instead of X.get_size()") + + def is_function_call(self): + """Returns true if the considered Expr is a function call + """ + return False + + def __repr__(self): + if self._repr is None: + self._repr = self._exprrepr() + return self._repr + + def __hash__(self): + if self._hash is None: + self._hash = self._exprhash() + return self._hash + + def __eq__(self, other): + if self is other: + return True + elif self.use_singleton: + # In case of Singleton, pointer comparison is sufficient + # Avoid computation of hash and repr + return False + + if self.__class__ is not other.__class__: + return False + if hash(self) != hash(other): + return False + return repr(self) == repr(other) + + def __ne__(self, other): + return not self.__eq__(other) + + def __add__(self, other): + return ExprOp('+', self, other) + + def __sub__(self, other): + return ExprOp('+', self, ExprOp('-', other)) + + def __div__(self, other): + return ExprOp('/', self, other) + + def __floordiv__(self, other): + return self.__div__(other) + + def __mod__(self, other): + return ExprOp('%', self, other) + + def __mul__(self, other): + return ExprOp('*', self, other) + + def __lshift__(self, other): + return ExprOp('<<', self, other) + + def __rshift__(self, other): + return ExprOp('>>', self, other) + + def __xor__(self, other): + return ExprOp('^', self, other) + + def __or__(self, other): + return ExprOp('|', self, other) + + def __and__(self, other): + return ExprOp('&', self, other) + + def __neg__(self): + return ExprOp('-', self) + + def __pow__(self, other): + return ExprOp("**", self, other) + + def __invert__(self): + return ExprOp('^', self, self.mask) + + def copy(self): + "Deep copy of the expression" + return self.visit(lambda x: x) + + def __deepcopy__(self, _): + return self.copy() + + def replace_expr(self, dct): + """Find and replace sub expression using dct + @dct: dictionary associating replaced Expr to its new Expr value + """ + return self.visit(lambda expr: dct.get(expr, expr)) + + def canonize(self): + "Canonize the Expression" + + def must_canon(expr): + return not expr.is_canon + + def canonize_visitor(expr): + if expr.is_canon: + return expr + if isinstance(expr, ExprOp): + if expr.is_associative(): + # ((a+b) + c) => (a + b + c) + args = [] + for arg in expr.args: + if isinstance(arg, ExprOp) and expr.op == arg.op: + args += arg.args + else: + args.append(arg) + args = canonize_expr_list(args) + new_e = ExprOp(expr.op, *args) + else: + new_e = expr + else: + new_e = expr + new_e.is_canon = True + return new_e + + return self.visit(canonize_visitor, must_canon) + + def msb(self): + "Return the Most Significant Bit" + return self[self.size - 1:self.size] + + def zeroExtend(self, size): + """Zero extend to size + @size: int + """ + assert self.size <= size + if self.size == size: + return self + return ExprOp('zeroExt_%d' % size, self) + + def signExtend(self, size): + """Sign extend to size + @size: int + """ + assert self.size <= size + if self.size == size: + return self + return ExprOp('signExt_%d' % size, self) + + def graph_recursive(self, graph): + """Recursive method used by graph + @graph: miasm.core.graph.DiGraph instance + Update @graph instance to include sons + This is an Abstract method""" + + raise ValueError("Abstract method") + + def graph(self): + """Return a DiGraph instance standing for Expr tree + Instance's display functions have been override for better visibility + Wrapper on graph_recursive""" + + # Create recursively the graph + graph = DiGraphExpr() + self.graph_recursive(graph) + + return graph + + def set_mask(self, value): + raise ValueError('mask is not mutable') + + mask = property(lambda self: ExprInt(-1, self.size)) + + def is_int(self, value=None): + return False + + def is_id(self, name=None): + return False + + def is_loc(self, label=None): + return False + + def is_aff(self): + return False + + def is_cond(self): + return False + + def is_mem(self): + return False + + def is_op(self, op=None): + return False + + def is_slice(self, start=None, stop=None): + return False + + def is_compose(self): + return False + + def is_op_segm(self): + """Returns True if is ExprOp and op == 'segm'""" + return False + + def is_mem_segm(self): + """Returns True if is ExprMem and ptr is_op_segm""" + return False + +class ExprInt(Expr): + + """An ExprInt represent a constant in Miasm IR. + + Some use cases: + - Constant 0x42 + - Constant -0x30 + - Constant 0x12345678 on 32bits + """ + + __slots__ = Expr.__slots__ + ["_arg"] + + + def __init__(self, arg, size): + """Create an ExprInt from a modint or num/size + @arg: 'intable' number + @size: int size""" + super(ExprInt, self).__init__(size) + # Work for ._arg is done in __new__ + + arg = property(lambda self: self._arg) + + def __reduce__(self): + state = int(self._arg), self._size + return self.__class__, state + + def __new__(cls, arg, size): + """Create an ExprInt from a modint or num/size + @arg: 'intable' number + @size: int size""" + + if is_modint(arg): + assert size == arg.size + # Avoid a common blunder + assert not isinstance(arg, ExprInt) + + # Ensure arg is always a moduint + arg = int(arg) + if size not in mod_size2uint: + define_uint(size) + arg = mod_size2uint[size](arg) + + # Get the Singleton instance + expr = Expr.get_object(cls, (arg, size)) + + # Save parameters (__init__ is called with parameters unchanged) + expr._arg = arg + return expr + + def _get_int(self): + "Return self integer representation" + return int(self._arg & size2mask(self._size)) + + def __str__(self): + if self._arg < 0: + return str("-0x%X" % (- self._get_int())) + else: + return str("0x%X" % self._get_int()) + + def get_r(self, mem_read=False, cst_read=False): + if cst_read: + return set([self]) + else: + return set() + + def get_w(self): + return set() + + def _exprhash(self): + return hash((EXPRINT, self._arg, self._size)) + + def _exprrepr(self): + return "%s(0x%X, %d)" % (self.__class__.__name__, self._get_int(), + self._size) + + def __contains__(self, expr): + return self == expr + + @visit_chk + def visit(self, callback, test_visit=None): + return self + + def copy(self): + return ExprInt(self._arg, self._size) + + def depth(self): + return 1 + + def graph_recursive(self, graph): + graph.add_node(self) + + def __int__(self): + return int(self.arg) + + def __long__(self): + return int(self.arg) + + def is_int(self, value=None): + if value is not None and self._arg != value: + return False + return True + + +class ExprId(Expr): + + """An ExprId represent an identifier in Miasm IR. + + Some use cases: + - EAX register + - 'start' offset + - variable v1 + """ + + __slots__ = Expr.__slots__ + ["_name"] + + def __init__(self, name, size=None): + """Create an identifier + @name: str, identifier's name + @size: int, identifier's size + """ + if size is None: + warnings.warn('DEPRECATION WARNING: size is a mandatory argument: use ExprId(name, SIZE)') + size = 32 + assert isinstance(name, (str, bytes)) + super(ExprId, self).__init__(size) + self._name = name + + name = property(lambda self: self._name) + + def __reduce__(self): + state = self._name, self._size + return self.__class__, state + + def __new__(cls, name, size=None): + if size is None: + warnings.warn('DEPRECATION WARNING: size is a mandatory argument: use ExprId(name, SIZE)') + size = 32 + return Expr.get_object(cls, (name, size)) + + def __str__(self): + return str(self._name) + + def get_r(self, mem_read=False, cst_read=False): + return set([self]) + + def get_w(self): + return set([self]) + + def _exprhash(self): + return hash((EXPRID, self._name, self._size)) + + def _exprrepr(self): + return "%s(%r, %d)" % (self.__class__.__name__, self._name, self._size) + + def __contains__(self, expr): + return self == expr + + @visit_chk + def visit(self, callback, test_visit=None): + return self + + def copy(self): + return ExprId(self._name, self._size) + + def depth(self): + return 1 + + def graph_recursive(self, graph): + graph.add_node(self) + + def is_id(self, name=None): + if name is not None and self._name != name: + return False + return True + + +class ExprLoc(Expr): + + """An ExprLoc represent a Label in Miasm IR. + """ + + __slots__ = Expr.__slots__ + ["_loc_key"] + + def __init__(self, loc_key, size): + """Create an identifier + @loc_key: int, label loc_key + @size: int, identifier's size + """ + assert isinstance(loc_key, LocKey) + super(ExprLoc, self).__init__(size) + self._loc_key = loc_key + + loc_key= property(lambda self: self._loc_key) + + def __reduce__(self): + state = self._loc_key, self._size + return self.__class__, state + + def __new__(cls, loc_key, size): + return Expr.get_object(cls, (loc_key, size)) + + def __str__(self): + return str(self._loc_key) + + def get_r(self, mem_read=False, cst_read=False): + return set() + + def get_w(self): + return set() + + def _exprhash(self): + return hash((EXPRLOC, self._loc_key, self._size)) + + def _exprrepr(self): + return "%s(%r, %d)" % (self.__class__.__name__, self._loc_key, self._size) + + def __contains__(self, expr): + return self == expr + + @visit_chk + def visit(self, callback, test_visit=None): + return self + + def copy(self): + return ExprLoc(self._loc_key, self._size) + + def depth(self): + return 1 + + def graph_recursive(self, graph): + graph.add_node(self) + + def is_loc(self, loc_key=None): + if loc_key is not None and self._loc_key != loc_key: + return False + return True + + +class ExprAssign(Expr): + + """An ExprAssign represent an assignment from an Expression to another one. + + Some use cases: + - var1 <- 2 + """ + + __slots__ = Expr.__slots__ + ["_dst", "_src"] + + def __init__(self, dst, src): + """Create an ExprAssign for dst <- src + @dst: Expr, assignment destination + @src: Expr, assignment source + """ + # dst & src must be Expr + assert isinstance(dst, Expr) + assert isinstance(src, Expr) + + if dst.size != src.size: + raise ValueError( + "sanitycheck: ExprAssign args must have same size! %s" % + ([(str(arg), arg.size) for arg in [dst, src]])) + + super(ExprAssign, self).__init__(self.dst.size) + + dst = property(lambda self: self._dst) + src = property(lambda self: self._src) + + + def __reduce__(self): + state = self._dst, self._src + return self.__class__, state + + def __new__(cls, dst, src): + if dst.is_slice() and dst.arg.size == src.size: + new_dst, new_src = dst.arg, src + elif dst.is_slice(): + # Complete the source with missing slice parts + new_dst = dst.arg + rest = [(ExprSlice(dst.arg, r[0], r[1]), r[0], r[1]) + for r in dst.slice_rest()] + all_a = [(src, dst.start, dst.stop)] + rest + all_a.sort(key=lambda x: x[1]) + args = [expr for (expr, _, _) in all_a] + new_src = ExprCompose(*args) + else: + new_dst, new_src = dst, src + expr = Expr.get_object(cls, (new_dst, new_src)) + expr._dst, expr._src = new_dst, new_src + return expr + + def __str__(self): + return "%s = %s" % (str(self._dst), str(self._src)) + + def get_r(self, mem_read=False, cst_read=False): + elements = self._src.get_r(mem_read, cst_read) + if isinstance(self._dst, ExprMem) and mem_read: + elements.update(self._dst.ptr.get_r(mem_read, cst_read)) + return elements + + def get_w(self): + if isinstance(self._dst, ExprMem): + return set([self._dst]) # [memreg] + else: + return self._dst.get_w() + + def _exprhash(self): + return hash((EXPRASSIGN, hash(self._dst), hash(self._src))) + + def _exprrepr(self): + return "%s(%r, %r)" % (self.__class__.__name__, self._dst, self._src) + + def __contains__(self, expr): + return (self == expr or + self._src.__contains__(expr) or + self._dst.__contains__(expr)) + + @visit_chk + def visit(self, callback, test_visit=None): + dst, src = self._dst.visit(callback, test_visit), self._src.visit(callback, test_visit) + if dst == self._dst and src == self._src: + return self + else: + return ExprAssign(dst, src) + + def copy(self): + return ExprAssign(self._dst.copy(), self._src.copy()) + + def depth(self): + return max(self._src.depth(), self._dst.depth()) + 1 + + def graph_recursive(self, graph): + graph.add_node(self) + for arg in [self._src, self._dst]: + arg.graph_recursive(graph) + graph.add_uniq_edge(self, arg) + + def is_aff(self): + return True + + +class ExprAff(ExprAssign): + """ + DEPRECATED class. + Use ExprAssign instead of ExprAff + """ + + def __init__(self, dst, src): + warnings.warn('DEPRECATION WARNING: use ExprAssign instead of ExprAff') + super(ExprAff, self).__init__(dst, src) + + +class ExprCond(Expr): + + """An ExprCond stand for a condition on an Expr + + Use cases: + - var1 < var2 + - min(var1, var2) + - if (cond) then ... else ... + """ + + __slots__ = Expr.__slots__ + ["_cond", "_src1", "_src2"] + + def __init__(self, cond, src1, src2): + """Create an ExprCond + @cond: Expr, condition + @src1: Expr, value if condition is evaled to not zero + @src2: Expr, value if condition is evaled zero + """ + + # cond, src1, src2 must be Expr + assert isinstance(cond, Expr) + assert isinstance(src1, Expr) + assert isinstance(src2, Expr) + + self._cond, self._src1, self._src2 = cond, src1, src2 + assert src1.size == src2.size + super(ExprCond, self).__init__(self.src1.size) + + cond = property(lambda self: self._cond) + src1 = property(lambda self: self._src1) + src2 = property(lambda self: self._src2) + + def __reduce__(self): + state = self._cond, self._src1, self._src2 + return self.__class__, state + + def __new__(cls, cond, src1, src2): + return Expr.get_object(cls, (cond, src1, src2)) + + def __str__(self): + return "%s?(%s,%s)" % (str_protected_child(self._cond, self), str(self._src1), str(self._src2)) + + def get_r(self, mem_read=False, cst_read=False): + out_src1 = self.src1.get_r(mem_read, cst_read) + out_src2 = self.src2.get_r(mem_read, cst_read) + return self.cond.get_r(mem_read, + cst_read).union(out_src1).union(out_src2) + + def get_w(self): + return set() + + def _exprhash(self): + return hash((EXPRCOND, hash(self.cond), + hash(self._src1), hash(self._src2))) + + def _exprrepr(self): + return "%s(%r, %r, %r)" % (self.__class__.__name__, + self._cond, self._src1, self._src2) + + def __contains__(self, expr): + return (self == expr or + self.cond.__contains__(expr) or + self.src1.__contains__(expr) or + self.src2.__contains__(expr)) + + @visit_chk + def visit(self, callback, test_visit=None): + cond = self._cond.visit(callback, test_visit) + src1 = self._src1.visit(callback, test_visit) + src2 = self._src2.visit(callback, test_visit) + if cond == self._cond and src1 == self._src1 and src2 == self._src2: + return self + return ExprCond(cond, src1, src2) + + def copy(self): + return ExprCond(self._cond.copy(), + self._src1.copy(), + self._src2.copy()) + + def depth(self): + return max(self._cond.depth(), + self._src1.depth(), + self._src2.depth()) + 1 + + def graph_recursive(self, graph): + graph.add_node(self) + for arg in [self._cond, self._src1, self._src2]: + arg.graph_recursive(graph) + graph.add_uniq_edge(self, arg) + + def is_cond(self): + return True + + +class ExprMem(Expr): + + """An ExprMem stand for a memory access + + Use cases: + - Memory read + - Memory write + """ + + __slots__ = Expr.__slots__ + ["_ptr"] + + def __init__(self, ptr, size=None): + """Create an ExprMem + @ptr: Expr, memory access address + @size: int, memory access size + """ + if size is None: + warnings.warn('DEPRECATION WARNING: size is a mandatory argument: use ExprMem(ptr, SIZE)') + size = 32 + + # ptr must be Expr + assert isinstance(ptr, Expr) + assert isinstance(size, int_types) + + if not isinstance(ptr, Expr): + raise ValueError( + 'ExprMem: ptr must be an Expr (not %s)' % type(ptr)) + + super(ExprMem, self).__init__(size) + self._ptr = ptr + + def get_arg(self): + warnings.warn('DEPRECATION WARNING: use exprmem.ptr instead of exprmem.arg') + return self.ptr + + def set_arg(self, value): + warnings.warn('DEPRECATION WARNING: use exprmem.ptr instead of exprmem.arg') + self.ptr = value + + ptr = property(lambda self: self._ptr) + arg = property(get_arg, set_arg) + + def __reduce__(self): + state = self._ptr, self._size + return self.__class__, state + + def __new__(cls, ptr, size=None): + if size is None: + warnings.warn('DEPRECATION WARNING: size is a mandatory argument: use ExprMem(ptr, SIZE)') + size = 32 + + return Expr.get_object(cls, (ptr, size)) + + def __str__(self): + return "@%d[%s]" % (self.size, str(self.ptr)) + + def get_r(self, mem_read=False, cst_read=False): + if mem_read: + return set(self._ptr.get_r(mem_read, cst_read).union(set([self]))) + else: + return set([self]) + + def get_w(self): + return set([self]) # [memreg] + + def _exprhash(self): + return hash((EXPRMEM, hash(self._ptr), self._size)) + + def _exprrepr(self): + return "%s(%r, %r)" % (self.__class__.__name__, + self._ptr, self._size) + + def __contains__(self, expr): + return self == expr or self._ptr.__contains__(expr) + + @visit_chk + def visit(self, callback, test_visit=None): + ptr = self._ptr.visit(callback, test_visit) + if ptr == self._ptr: + return self + return ExprMem(ptr, self.size) + + def copy(self): + ptr = self.ptr.copy() + return ExprMem(ptr, size=self.size) + + def is_mem_segm(self): + """Returns True if is ExprMem and ptr is_op_segm""" + return self._ptr.is_op_segm() + + def depth(self): + return self._ptr.depth() + 1 + + def graph_recursive(self, graph): + graph.add_node(self) + self._ptr.graph_recursive(graph) + graph.add_uniq_edge(self, self._ptr) + + def is_mem(self): + return True + + +class ExprOp(Expr): + + """An ExprOp stand for an operation between Expr + + Use cases: + - var1 XOR var2 + - var1 + var2 + var3 + - parity bit(var1) + """ + + __slots__ = Expr.__slots__ + ["_op", "_args"] + + def __init__(self, op, *args): + """Create an ExprOp + @op: str, operation + @*args: Expr, operand list + """ + + # args must be Expr + assert all(isinstance(arg, Expr) for arg in args) + + sizes = set([arg.size for arg in args]) + + if len(sizes) != 1: + # Special cases : operande sizes can differ + if op not in [ + "segm", + "FLAG_EQ_ADDWC", "FLAG_EQ_SUBWC", + "FLAG_SIGN_ADDWC", "FLAG_SIGN_SUBWC", + "FLAG_ADDWC_CF", "FLAG_ADDWC_OF", + "FLAG_SUBWC_CF", "FLAG_SUBWC_OF", + + ]: + raise ValueError( + "sanitycheck: ExprOp args must have same size! %s" % + ([(str(arg), arg.size) for arg in args])) + + if not isinstance(op, str): + raise ValueError("ExprOp: 'op' argument must be a string") + + assert isinstance(args, tuple) + self._op, self._args = op, args + + # Set size for special cases + if self._op in [ + TOK_EQUAL, 'parity', 'fcom_c0', 'fcom_c1', 'fcom_c2', 'fcom_c3', + 'fxam_c0', 'fxam_c1', 'fxam_c2', 'fxam_c3', + "access_segment_ok", "load_segment_limit_ok", "bcdadd_cf", + "ucomiss_zf", "ucomiss_pf", "ucomiss_cf", + "ucomisd_zf", "ucomisd_pf", "ucomisd_cf"]: + size = 1 + elif self._op in [TOK_INF, TOK_INF_SIGNED, + TOK_INF_UNSIGNED, TOK_INF_EQUAL, + TOK_INF_EQUAL_SIGNED, TOK_INF_EQUAL_UNSIGNED, + TOK_EQUAL, TOK_POS, + TOK_POS_STRICT, + ]: + size = 1 + elif self._op.startswith("fp_to_sint"): + size = int(self._op[len("fp_to_sint"):]) + elif self._op.startswith("fpconvert_fp"): + size = int(self._op[len("fpconvert_fp"):]) + elif self._op in [ + "FLAG_ADD_CF", "FLAG_SUB_CF", + "FLAG_ADD_OF", "FLAG_SUB_OF", + "FLAG_EQ", "FLAG_EQ_CMP", + "FLAG_SIGN_SUB", "FLAG_SIGN_ADD", + "FLAG_EQ_AND", + "FLAG_EQ_ADDWC", "FLAG_EQ_SUBWC", + "FLAG_SIGN_ADDWC", "FLAG_SIGN_SUBWC", + "FLAG_ADDWC_CF", "FLAG_ADDWC_OF", + "FLAG_SUBWC_CF", "FLAG_SUBWC_OF", + ]: + size = 1 + + elif self._op.startswith('signExt_'): + size = int(self._op[8:]) + elif self._op.startswith('zeroExt_'): + size = int(self._op[8:]) + elif self._op in ['segm']: + size = self._args[1].size + else: + if None in sizes: + size = None + else: + # All arguments have the same size + size = list(sizes)[0] + + super(ExprOp, self).__init__(size) + + op = property(lambda self: self._op) + args = property(lambda self: self._args) + + def __reduce__(self): + state = tuple([self._op] + list(self._args)) + return self.__class__, state + + def __new__(cls, op, *args): + return Expr.get_object(cls, (op, args)) + + def __str__(self): + if self._op == '-': # Unary minus + return '-' + str_protected_child(self._args[0], self) + if self.is_associative() or self.is_infix(): + return (' ' + self._op + ' ').join([str_protected_child(arg, self) + for arg in self._args]) + return (self._op + '(' + + ', '.join([str(arg) for arg in self._args]) + ')') + + def get_r(self, mem_read=False, cst_read=False): + return reduce(lambda elements, arg: + elements.union(arg.get_r(mem_read, cst_read)), self._args, set()) + + def get_w(self): + raise ValueError('op cannot be written!', self) + + def _exprhash(self): + h_hargs = [hash(arg) for arg in self._args] + return hash((EXPROP, self._op, tuple(h_hargs))) + + def _exprrepr(self): + return "%s(%r, %s)" % (self.__class__.__name__, self._op, + ', '.join(repr(arg) for arg in self._args)) + + def __contains__(self, expr): + if self == expr: + return True + for arg in self._args: + if arg.__contains__(expr): + return True + return False + + def is_function_call(self): + return self._op.startswith('call') + + def is_infix(self): + return self._op in [ + '-', '+', '*', '^', '&', '|', '>>', '<<', + 'a>>', '>>>', '<<<', '/', '%', '**', + TOK_INF_UNSIGNED, + TOK_INF_SIGNED, + TOK_INF_EQUAL_UNSIGNED, + TOK_INF_EQUAL_SIGNED, + TOK_EQUAL + ] + + def is_associative(self): + "Return True iff current operation is associative" + return (self._op in ['+', '*', '^', '&', '|']) + + def is_commutative(self): + "Return True iff current operation is commutative" + return (self._op in ['+', '*', '^', '&', '|']) + + @visit_chk + def visit(self, callback, test_visit=None): + args = [arg.visit(callback, test_visit) for arg in self._args] + modified = any([arg[0] != arg[1] for arg in zip(self._args, args)]) + if modified: + return ExprOp(self._op, *args) + return self + + def copy(self): + args = [arg.copy() for arg in self._args] + return ExprOp(self._op, *args) + + def depth(self): + depth = [arg.depth() for arg in self._args] + return max(depth) + 1 + + def graph_recursive(self, graph): + graph.add_node(self) + for arg in self._args: + arg.graph_recursive(graph) + graph.add_uniq_edge(self, arg) + + def is_op(self, op=None): + if op is None: + return True + return self.op == op + + def is_op_segm(self): + """Returns True if is ExprOp and op == 'segm'""" + return self.is_op('segm') + +class ExprSlice(Expr): + + __slots__ = Expr.__slots__ + ["_arg", "_start", "_stop"] + + def __init__(self, arg, start, stop): + + # arg must be Expr + assert isinstance(arg, Expr) + assert isinstance(start, int_types) + assert isinstance(stop, int_types) + assert start < stop + + self._arg, self._start, self._stop = arg, start, stop + super(ExprSlice, self).__init__(self._stop - self._start) + + arg = property(lambda self: self._arg) + start = property(lambda self: self._start) + stop = property(lambda self: self._stop) + + def __reduce__(self): + state = self._arg, self._start, self._stop + return self.__class__, state + + def __new__(cls, arg, start, stop): + return Expr.get_object(cls, (arg, start, stop)) + + def __str__(self): + return "%s[%d:%d]" % (str_protected_child(self._arg, self), self._start, self._stop) + + def get_r(self, mem_read=False, cst_read=False): + return self._arg.get_r(mem_read, cst_read) + + def get_w(self): + return self._arg.get_w() + + def _exprhash(self): + return hash((EXPRSLICE, hash(self._arg), self._start, self._stop)) + + def _exprrepr(self): + return "%s(%r, %d, %d)" % (self.__class__.__name__, self._arg, + self._start, self._stop) + + def __contains__(self, expr): + if self == expr: + return True + return self._arg.__contains__(expr) + + @visit_chk + def visit(self, callback, test_visit=None): + arg = self._arg.visit(callback, test_visit) + if arg == self._arg: + return self + return ExprSlice(arg, self._start, self._stop) + + def copy(self): + return ExprSlice(self._arg.copy(), self._start, self._stop) + + def depth(self): + return self._arg.depth() + 1 + + def slice_rest(self): + "Return the completion of the current slice" + size = self._arg.size + if self._start >= size or self._stop > size: + raise ValueError('bad slice rest %s %s %s' % + (size, self._start, self._stop)) + + if self._start == self._stop: + return [(0, size)] + + rest = [] + if self._start != 0: + rest.append((0, self._start)) + if self._stop < size: + rest.append((self._stop, size)) + + return rest + + def graph_recursive(self, graph): + graph.add_node(self) + self._arg.graph_recursive(graph) + graph.add_uniq_edge(self, self._arg) + + def is_slice(self, start=None, stop=None): + if start is not None and self._start != start: + return False + if stop is not None and self._stop != stop: + return False + return True + + +class ExprCompose(Expr): + + """ + Compose is like a hambuger. It concatenate Expressions + """ + + __slots__ = Expr.__slots__ + ["_args"] + + def __init__(self, *args): + """Create an ExprCompose + The ExprCompose is contiguous and starts at 0 + @args: [Expr, Expr, ...] + DEPRECATED: + @args: [(Expr, int, int), (Expr, int, int), ...] + """ + + # args must be Expr + assert all(isinstance(arg, Expr) for arg in args) + + assert isinstance(args, tuple) + self._args = args + super(ExprCompose, self).__init__(sum(arg.size for arg in args)) + + args = property(lambda self: self._args) + + def __reduce__(self): + state = self._args + return self.__class__, state + + def __new__(cls, *args): + return Expr.get_object(cls, args) + + def __str__(self): + return '{' + ', '.join(["%s %s %s" % (arg, idx, idx + arg.size) for idx, arg in self.iter_args()]) + '}' + + def get_r(self, mem_read=False, cst_read=False): + return reduce(lambda elements, arg: + elements.union(arg.get_r(mem_read, cst_read)), self._args, set()) + + def get_w(self): + return reduce(lambda elements, arg: + elements.union(arg.get_w()), self._args, set()) + + def _exprhash(self): + h_args = [EXPRCOMPOSE] + [hash(arg) for arg in self._args] + return hash(tuple(h_args)) + + def _exprrepr(self): + return "%s%r" % (self.__class__.__name__, self._args) + + def __contains__(self, expr): + if self == expr: + return True + for arg in self._args: + if arg == expr: + return True + if arg.__contains__(expr): + return True + return False + + @visit_chk + def visit(self, callback, test_visit=None): + args = [arg.visit(callback, test_visit) for arg in self._args] + modified = any([arg != arg_new for arg, arg_new in zip(self._args, args)]) + if modified: + return ExprCompose(*args) + return self + + def copy(self): + args = [arg.copy() for arg in self._args] + return ExprCompose(*args) + + def depth(self): + depth = [arg.depth() for arg in self._args] + return max(depth) + 1 + + def graph_recursive(self, graph): + graph.add_node(self) + for arg in self.args: + arg.graph_recursive(graph) + graph.add_uniq_edge(self, arg) + + def iter_args(self): + index = 0 + for arg in self._args: + yield index, arg + index += arg.size + + def is_compose(self): + return True + +# Expression order for comparison +EXPR_ORDER_DICT = { + ExprId: 1, + ExprLoc: 2, + ExprCond: 3, + ExprMem: 4, + ExprOp: 5, + ExprSlice: 6, + ExprCompose: 7, + ExprInt: 8, +} + + +def compare_exprs_compose(expr1, expr2): + # Sort by start bit address, then expr, then stop bit address + ret = cmp_elts(expr1[1], expr2[1]) + if ret: + return ret + ret = compare_exprs(expr1[0], expr2[0]) + if ret: + return ret + ret = cmp_elts(expr1[2], expr2[2]) + return ret + + +def compare_expr_list_compose(l1_e, l2_e): + # Sort by list elements in incremental order, then by list size + for i in range(min(len(l1_e), len(l2_e))): + ret = compare_exprs(l1_e[i], l2_e[i]) + if ret: + return ret + return cmp_elts(len(l1_e), len(l2_e)) + + +def compare_expr_list(l1_e, l2_e): + # Sort by list elements in incremental order, then by list size + for i in range(min(len(l1_e), len(l2_e))): + ret = compare_exprs(l1_e[i], l2_e[i]) + if ret: + return ret + return cmp_elts(len(l1_e), len(l2_e)) + + +def compare_exprs(expr1, expr2): + """Compare 2 expressions for canonization + @expr1: Expr + @expr2: Expr + 0 => == + 1 => expr1 > expr2 + -1 => expr1 < expr2 + """ + cls1 = expr1.__class__ + cls2 = expr2.__class__ + if cls1 != cls2: + return cmp_elts(EXPR_ORDER_DICT[cls1], EXPR_ORDER_DICT[cls2]) + if expr1 == expr2: + return 0 + if cls1 == ExprInt: + ret = cmp_elts(expr1.size, expr2.size) + if ret != 0: + return ret + return cmp_elts(expr1.arg, expr2.arg) + elif cls1 == ExprId: + name1 = force_bytes(expr1.name) + name2 = force_bytes(expr2.name) + ret = cmp_elts(name1, name2) + if ret: + return ret + return cmp_elts(expr1.size, expr2.size) + elif cls1 == ExprLoc: + ret = cmp_elts(expr1.loc_key, expr2.loc_key) + if ret: + return ret + return cmp_elts(expr1.size, expr2.size) + elif cls1 == ExprAssign: + raise NotImplementedError( + "Comparison from an ExprAssign not yet implemented" + ) + elif cls2 == ExprCond: + ret = compare_exprs(expr1.cond, expr2.cond) + if ret: + return ret + ret = compare_exprs(expr1.src1, expr2.src1) + if ret: + return ret + ret = compare_exprs(expr1.src2, expr2.src2) + return ret + elif cls1 == ExprMem: + ret = compare_exprs(expr1.ptr, expr2.ptr) + if ret: + return ret + return cmp_elts(expr1.size, expr2.size) + elif cls1 == ExprOp: + if expr1.op != expr2.op: + return cmp_elts(expr1.op, expr2.op) + return compare_expr_list(expr1.args, expr2.args) + elif cls1 == ExprSlice: + ret = compare_exprs(expr1.arg, expr2.arg) + if ret: + return ret + ret = cmp_elts(expr1.start, expr2.start) + if ret: + return ret + ret = cmp_elts(expr1.stop, expr2.stop) + return ret + elif cls1 == ExprCompose: + return compare_expr_list_compose(expr1.args, expr2.args) + raise NotImplementedError( + "Comparison between %r %r not implemented" % (expr1, expr2) + ) + + +def canonize_expr_list(expr_list): + return sorted(expr_list, key=cmp_to_key(compare_exprs)) + + +def canonize_expr_list_compose(expr_list): + return sorted(expr_list, key=cmp_to_key(compare_exprs_compose)) + +# Generate ExprInt with common size + + +def ExprInt1(i): + warnings.warn('DEPRECATION WARNING: use ExprInt(i, 1) instead of '\ + 'ExprInt1(i))') + return ExprInt(i, 1) + + +def ExprInt8(i): + warnings.warn('DEPRECATION WARNING: use ExprInt(i, 8) instead of '\ + 'ExprInt8(i))') + return ExprInt(i, 8) + + +def ExprInt16(i): + warnings.warn('DEPRECATION WARNING: use ExprInt(i, 16) instead of '\ + 'ExprInt16(i))') + return ExprInt(i, 16) + + +def ExprInt32(i): + warnings.warn('DEPRECATION WARNING: use ExprInt(i, 32) instead of '\ + 'ExprInt32(i))') + return ExprInt(i, 32) + + +def ExprInt64(i): + warnings.warn('DEPRECATION WARNING: use ExprInt(i, 64) instead of '\ + 'ExprInt64(i))') + return ExprInt(i, 64) + + +def ExprInt_from(expr, i): + "Generate ExprInt with size equal to expression" + warnings.warn('DEPRECATION WARNING: use ExprInt(i, expr.size) instead of'\ + 'ExprInt_from(expr, i))') + return ExprInt(i, expr.size) + + +def get_expr_ids_visit(expr, ids): + """Visitor to retrieve ExprId in @expr + @expr: Expr""" + if expr.is_id(): + ids.add(expr) + return expr + + +def get_expr_locs_visit(expr, locs): + """Visitor to retrieve ExprLoc in @expr + @expr: Expr""" + if expr.is_loc(): + locs.add(expr) + return expr + + +def get_expr_ids(expr): + """Retrieve ExprId in @expr + @expr: Expr""" + ids = set() + expr.visit(lambda x: get_expr_ids_visit(x, ids)) + return ids + + +def get_expr_locs(expr): + """Retrieve ExprLoc in @expr + @expr: Expr""" + locs = set() + expr.visit(lambda x: get_expr_locs_visit(x, locs)) + return locs + + +def test_set(expr, pattern, tks, result): + """Test if v can correspond to e. If so, update the context in result. + Otherwise, return False + @expr : Expr to match + @pattern : pattern Expr + @tks : list of ExprId, available jokers + @result : dictionary of ExprId -> Expr, current context + """ + + if not pattern in tks: + return expr == pattern + if pattern in result and result[pattern] != expr: + return False + result[pattern] = expr + return result + + +def match_expr(expr, pattern, tks, result=None): + """Try to match the @pattern expression with the pattern @expr with @tks jokers. + Result is output dictionary with matching joker values. + @expr : Expr pattern + @pattern : Targeted Expr to match + @tks : list of ExprId, available jokers + @result : dictionary of ExprId -> Expr, output matching context + """ + + if result is None: + result = {} + + if pattern in tks: + # pattern is a Joker + return test_set(expr, pattern, tks, result) + + if expr.is_int(): + return test_set(expr, pattern, tks, result) + + elif expr.is_id(): + return test_set(expr, pattern, tks, result) + + elif expr.is_loc(): + return test_set(expr, pattern, tks, result) + + elif expr.is_op(): + + # expr need to be the same operation than pattern + if not pattern.is_op(): + return False + if expr.op != pattern.op: + return False + if len(expr.args) != len(pattern.args): + return False + + # Perform permutation only if the current operation is commutative + if expr.is_commutative(): + permutations = itertools.permutations(expr.args) + else: + permutations = [expr.args] + + # For each permutations of arguments + for permut in permutations: + good = True + # We need to use a copy of result to not override it + myresult = dict(result) + for sub_expr, sub_pattern in zip(permut, pattern.args): + ret = match_expr(sub_expr, sub_pattern, tks, myresult) + # If the current permutation do not match EVERY terms + if ret is False: + good = False + break + if good is True: + # We found a possibility + for joker, value in viewitems(myresult): + # Updating result in place (to keep pointer in recursion) + result[joker] = value + return result + return False + + # Recursive tests + + elif expr.is_mem(): + if not pattern.is_mem(): + return False + if expr.size != pattern.size: + return False + return match_expr(expr.ptr, pattern.ptr, tks, result) + + elif expr.is_slice(): + if not pattern.is_slice(): + return False + if expr.start != pattern.start or expr.stop != pattern.stop: + return False + return match_expr(expr.arg, pattern.arg, tks, result) + + elif expr.is_cond(): + if not pattern.is_cond(): + return False + if match_expr(expr.cond, pattern.cond, tks, result) is False: + return False + if match_expr(expr.src1, pattern.src1, tks, result) is False: + return False + if match_expr(expr.src2, pattern.src2, tks, result) is False: + return False + return result + + elif expr.is_compose(): + if not pattern.is_compose(): + return False + for sub_expr, sub_pattern in zip(expr.args, pattern.args): + if match_expr(sub_expr, sub_pattern, tks, result) is False: + return False + return result + + elif expr.is_aff(): + if not pattern.is_aff(): + return False + if match_expr(expr.src, pattern.src, tks, result) is False: + return False + if match_expr(expr.dst, pattern.dst, tks, result) is False: + return False + return result + + else: + raise NotImplementedError("match_expr: Unknown type: %s" % type(expr)) + + +def MatchExpr(expr, pattern, tks, result=None): + warnings.warn('DEPRECATION WARNING: use match_expr instead of MatchExpr') + return match_expr(expr, pattern, tks, result) + + +def get_rw(exprs): + o_r = set() + o_w = set() + for expr in exprs: + o_r.update(expr.get_r(mem_read=True)) + for expr in exprs: + o_w.update(expr.get_w()) + return o_r, o_w + + +def get_list_rw(exprs, mem_read=False, cst_read=True): + """Return list of read/write reg/cst/mem for each @exprs + @exprs: list of expressions + @mem_read: walk though memory accesses + @cst_read: retrieve constants + """ + list_rw = [] + # cst_num = 0 + for expr in exprs: + o_r = set() + o_w = set() + # get r/w + o_r.update(expr.get_r(mem_read=mem_read, cst_read=cst_read)) + if isinstance(expr.dst, ExprMem): + o_r.update(expr.dst.arg.get_r(mem_read=mem_read, cst_read=cst_read)) + o_w.update(expr.get_w()) + # each cst is indexed + o_r_rw = set() + for read in o_r: + o_r_rw.add(read) + o_r = o_r_rw + list_rw.append((o_r, o_w)) + + return list_rw + + +def get_expr_ops(expr): + """Retrieve operators of an @expr + @expr: Expr""" + def visit_getops(expr, out=None): + if out is None: + out = set() + if isinstance(expr, ExprOp): + out.add(expr.op) + return expr + ops = set() + expr.visit(lambda x: visit_getops(x, ops)) + return ops + + +def get_expr_mem(expr): + """Retrieve memory accesses of an @expr + @expr: Expr""" + def visit_getmem(expr, out=None): + if out is None: + out = set() + if isinstance(expr, ExprMem): + out.add(expr) + return expr + ops = set() + expr.visit(lambda x: visit_getmem(x, ops)) + return ops + + +def _expr_compute_cf(op1, op2): + """ + Get carry flag of @op1 - @op2 + Ref: x86 cf flag + @op1: Expression + @op2: Expression + """ + res = op1 - op2 + cf = (((op1 ^ op2) ^ res) ^ ((op1 ^ res) & (op1 ^ op2))).msb() + return cf + +def _expr_compute_of(op1, op2): + """ + Get overflow flag of @op1 - @op2 + Ref: x86 of flag + @op1: Expression + @op2: Expression + """ + res = op1 - op2 + of = (((op1 ^ res) & (op1 ^ op2))).msb() + return of + +def _expr_compute_zf(op1, op2): + """ + Get zero flag of @op1 - @op2 + @op1: Expression + @op2: Expression + """ + res = op1 - op2 + zf = ExprCond(res, + ExprInt(0, 1), + ExprInt(1, 1)) + return zf + + +def _expr_compute_nf(op1, op2): + """ + Get negative (or sign) flag of @op1 - @op2 + @op1: Expression + @op2: Expression + """ + res = op1 - op2 + nf = res.msb() + return nf + + +def expr_is_equal(op1, op2): + """ + if op1 == op2: + Return ExprInt(1, 1) + else: + Return ExprInt(0, 1) + """ + + zf = _expr_compute_zf(op1, op2) + return zf + + +def expr_is_not_equal(op1, op2): + """ + if op1 != op2: + Return ExprInt(1, 1) + else: + Return ExprInt(0, 1) + """ + + zf = _expr_compute_zf(op1, op2) + return ~zf + + +def expr_is_unsigned_greater(op1, op2): + """ + UNSIGNED cmp + if op1 > op2: + Return ExprInt(1, 1) + else: + Return ExprInt(0, 1) + """ + + cf = _expr_compute_cf(op1, op2) + zf = _expr_compute_zf(op1, op2) + return ~(cf | zf) + + +def expr_is_unsigned_greater_or_equal(op1, op2): + """ + Unsigned cmp + if op1 >= op2: + Return ExprInt(1, 1) + else: + Return ExprInt(0, 1) + """ + + cf = _expr_compute_cf(op1, op2) + return ~cf + + +def expr_is_unsigned_lower(op1, op2): + """ + Unsigned cmp + if op1 < op2: + Return ExprInt(1, 1) + else: + Return ExprInt(0, 1) + """ + + cf = _expr_compute_cf(op1, op2) + return cf + + +def expr_is_unsigned_lower_or_equal(op1, op2): + """ + Unsigned cmp + if op1 <= op2: + Return ExprInt(1, 1) + else: + Return ExprInt(0, 1) + """ + + cf = _expr_compute_cf(op1, op2) + zf = _expr_compute_zf(op1, op2) + return cf | zf + + +def expr_is_signed_greater(op1, op2): + """ + Signed cmp + if op1 > op2: + Return ExprInt(1, 1) + else: + Return ExprInt(0, 1) + """ + + nf = _expr_compute_nf(op1, op2) + of = _expr_compute_of(op1, op2) + zf = _expr_compute_zf(op1, op2) + return ~(zf | (nf ^ of)) + + +def expr_is_signed_greater_or_equal(op1, op2): + """ + Signed cmp + if op1 > op2: + Return ExprInt(1, 1) + else: + Return ExprInt(0, 1) + """ + + nf = _expr_compute_nf(op1, op2) + of = _expr_compute_of(op1, op2) + return ~(nf ^ of) + + +def expr_is_signed_lower(op1, op2): + """ + Signed cmp + if op1 < op2: + Return ExprInt(1, 1) + else: + Return ExprInt(0, 1) + """ + + nf = _expr_compute_nf(op1, op2) + of = _expr_compute_of(op1, op2) + return nf ^ of + + +def expr_is_signed_lower_or_equal(op1, op2): + """ + Signed cmp + if op1 <= op2: + Return ExprInt(1, 1) + else: + Return ExprInt(0, 1) + """ + + nf = _expr_compute_nf(op1, op2) + of = _expr_compute_of(op1, op2) + zf = _expr_compute_zf(op1, op2) + return zf | (nf ^ of) + +# sign bit | exponent | significand +size_to_IEEE754_info = { + 16: { + "exponent": 5, + "significand": 10, + }, + 32: { + "exponent": 8, + "significand": 23, + }, + 64: { + "exponent": 11, + "significand": 52, + }, +} + +def expr_is_NaN(expr): + """Return 1 or 0 on 1 bit if expr represent a NaN value according to IEEE754 + """ + info = size_to_IEEE754_info[expr.size] + exponent = expr[info["significand"]: info["significand"] + info["exponent"]] + + # exponent is full of 1s and significand is not NULL + return ExprCond(exponent - ExprInt(-1, exponent.size), + ExprInt(0, 1), + ExprCond(expr[:info["significand"]], ExprInt(1, 1), + ExprInt(0, 1))) + + +def expr_is_infinite(expr): + """Return 1 or 0 on 1 bit if expr represent an infinite value according to + IEEE754 + """ + info = size_to_IEEE754_info[expr.size] + exponent = expr[info["significand"]: info["significand"] + info["exponent"]] + + # exponent is full of 1s and significand is NULL + return ExprCond(exponent - ExprInt(-1, exponent.size), + ExprInt(0, 1), + ExprCond(expr[:info["significand"]], ExprInt(0, 1), + ExprInt(1, 1))) + + +def expr_is_IEEE754_zero(expr): + """Return 1 or 0 on 1 bit if expr represent a zero value according to + IEEE754 + """ + # Sign is the msb + expr_no_sign = expr[:expr.size - 1] + return ExprCond(expr_no_sign, ExprInt(0, 1), ExprInt(1, 1)) + + +def expr_is_IEEE754_denormal(expr): + """Return 1 or 0 on 1 bit if expr represent a denormalized value according + to IEEE754 + """ + info = size_to_IEEE754_info[expr.size] + exponent = expr[info["significand"]: info["significand"] + info["exponent"]] + # exponent is full of 0s + return ExprCond(exponent, ExprInt(0, 1), ExprInt(1, 1)) + + +def expr_is_qNaN(expr): + """Return 1 or 0 on 1 bit if expr represent a qNaN (quiet) value according to + IEEE754 + """ + info = size_to_IEEE754_info[expr.size] + significand_top = expr[info["significand"]: info["significand"] + 1] + return expr_is_NaN(expr) & significand_top + + +def expr_is_sNaN(expr): + """Return 1 or 0 on 1 bit if expr represent a sNaN (signalling) value according + to IEEE754 + """ + info = size_to_IEEE754_info[expr.size] + significand_top = expr[info["significand"]: info["significand"] + 1] + return expr_is_NaN(expr) & ~significand_top + + +def expr_is_float_lower(op1, op2): + """Return 1 on 1 bit if @op1 < @op2, 0 otherwise. + /!\ Assume @op1 and @op2 are not NaN + Comparison is the floating point one, defined in IEEE754 + """ + sign1, sign2 = op1.msb(), op2.msb() + magn1, magn2 = op1[:-1], op2[:-1] + return ExprCond(sign1 ^ sign2, + # Sign different, only the sign matters + sign1, # sign1 ? op1 < op2 : op1 >= op2 + # Sign equals, the result is inversed for negatives + sign1 ^ (expr_is_unsigned_lower(magn1, magn2))) + + +def expr_is_float_equal(op1, op2): + """Return 1 on 1 bit if @op1 == @op2, 0 otherwise. + /!\ Assume @op1 and @op2 are not NaN + Comparison is the floating point one, defined in IEEE754 + """ + sign1, sign2 = op1.msb(), op2.msb() + magn1, magn2 = op1[:-1], op2[:-1] + return ExprCond(magn1 ^ magn2, + ExprInt(0, 1), + ExprCond(magn1, + # magn1 == magn2, are the signal equals? + ~(sign1 ^ sign2), + # Special case: -0.0 == +0.0 + ExprInt(1, 1)) + ) diff --git a/miasm/expression/expression_helper.py b/miasm/expression/expression_helper.py new file mode 100644 index 00000000..299e52e6 --- /dev/null +++ b/miasm/expression/expression_helper.py @@ -0,0 +1,628 @@ +# +# Copyright (C) 2011 EADS France, Fabrice Desclaux +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# + +# Expressions manipulation functions +from builtins import range +import itertools +import collections +import random +import string +import warnings + +from future.utils import viewitems, viewvalues + +import miasm.expression.expression as m2_expr + + +def parity(a): + tmp = (a) & 0xFF + cpt = 1 + while tmp != 0: + cpt ^= tmp & 1 + tmp >>= 1 + return cpt + + +def merge_sliceto_slice(expr): + """ + Apply basic factorisation on ExprCompose sub components + @expr: ExprCompose + """ + + out_args = [] + last_index = 0 + for index, arg in expr.iter_args(): + # Init + if len(out_args) == 0: + out_args.append(arg) + continue + + last_value = out_args[-1] + # Consecutive + + if last_index + last_value.size == index: + # Merge consecutive integers + if (isinstance(arg, m2_expr.ExprInt) and + isinstance(last_value, m2_expr.ExprInt)): + new_size = last_value.size + arg.size + value = int(arg) << last_value.size + value |= int(last_value) + out_args[-1] = m2_expr.ExprInt(value, size=new_size) + continue + + # Merge consecuvite slice + elif (isinstance(arg, m2_expr.ExprSlice) and + isinstance(last_value, m2_expr.ExprSlice)): + value = arg.arg + if (last_value.arg == value and + last_value.stop == arg.start): + out_args[-1] = value[last_value.start:arg.stop] + continue + + # Unmergeable + last_index = index + out_args.append(arg) + + return out_args + + +op_propag_cst = ['+', '*', '^', '&', '|', '>>', + '<<', "a>>", ">>>", "<<<", + "/", "%", 'sdiv', 'smod', 'umod', 'udiv','**'] + + +def is_pure_int(e): + """ + return True if expr is only composed with integers + /!\ ExprCond returns True is src1 and src2 are integers + """ + def modify_cond(e): + if isinstance(e, m2_expr.ExprCond): + return e.src1 | e.src2 + return e + + def find_int(e, s): + if isinstance(e, m2_expr.ExprId) or isinstance(e, m2_expr.ExprMem): + s.add(e) + return e + s = set() + new_e = e.visit(modify_cond) + new_e.visit(lambda x: find_int(x, s)) + if s: + return False + return True + + +def is_int_or_cond_src_int(e): + if isinstance(e, m2_expr.ExprInt): + return True + if isinstance(e, m2_expr.ExprCond): + return (isinstance(e.src1, m2_expr.ExprInt) and + isinstance(e.src2, m2_expr.ExprInt)) + return False + + +def fast_unify(seq, idfun=None): + # order preserving unifying list function + if idfun is None: + idfun = lambda x: x + seen = {} + result = [] + for item in seq: + marker = idfun(item) + + if marker in seen: + continue + seen[marker] = 1 + result.append(item) + return result + +def get_missing_interval(all_intervals, i_min=0, i_max=32): + """Return a list of missing interval in all_interval + @all_interval: list of (int, int) + @i_min: int, minimal missing interval bound + @i_max: int, maximal missing interval bound""" + + my_intervals = all_intervals[:] + my_intervals.sort() + my_intervals.append((i_max, i_max)) + + missing_i = [] + last_pos = i_min + for start, stop in my_intervals: + if last_pos != start: + missing_i.append((last_pos, start)) + last_pos = stop + return missing_i + + +class Variables_Identifier(object): + """Identify variables in an expression. + Returns: + - variables with their corresponding values + - original expression with variables translated + """ + + def __init__(self, expr, var_prefix="v"): + """Set the expression @expr to handle and launch variable identification + process + @expr: Expr instance + @var_prefix: (optional) prefix of the variable name, default is 'v'""" + + # Init + self.var_indice = itertools.count() + self.var_asked = set() + self._vars = {} # VarID -> Expr + self.var_prefix = var_prefix + + # Launch recurrence + self.find_variables_rec(expr) + + # Compute inter-variable dependencies + has_change = True + while has_change: + has_change = False + for var_id, var_value in list(viewitems(self._vars)): + cur = var_value + + # Do not replace with itself + to_replace = { + v_val:v_id + for v_id, v_val in viewitems(self._vars) + if v_id != var_id + } + var_value = var_value.replace_expr(to_replace) + + if cur != var_value: + # Force @self._vars update + has_change = True + self._vars[var_id] = var_value + break + + # Replace in the original equation + self._equation = expr.replace_expr( + { + v_val: v_id for v_id, v_val + in viewitems(self._vars) + } + ) + + # Compute variables dependencies + self._vars_ordered = collections.OrderedDict() + todo = set(self._vars) + needs = {} + + ## Build initial needs + for var_id, var_expr in viewitems(self._vars): + ### Handle corner cases while using Variable Identifier on an + ### already computed equation + needs[var_id] = [ + var_name + for var_name in var_expr.get_r(mem_read=True) + if self.is_var_identifier(var_name) and \ + var_name in todo and \ + var_name != var_id + ] + + ## Build order list + while todo: + done = set() + for var_id in todo: + all_met = True + for need in needs[var_id]: + if need not in self._vars_ordered: + # A dependency is not met + all_met = False + break + if not all_met: + continue + + # All dependencies are already met, add current + self._vars_ordered[var_id] = self._vars[var_id] + done.add(var_id) + + # Update the todo list + for element_done in done: + todo.remove(element_done) + + def is_var_identifier(self, expr): + "Return True iff @expr is a variable identifier" + if not isinstance(expr, m2_expr.ExprId): + return False + return expr in self._vars + + def find_variables_rec(self, expr): + """Recursive method called by find_variable to expand @expr. + Set @var_names and @var_values. + This implementation is faster than an expression visitor because + we do not rebuild each expression. + """ + + if (expr in self.var_asked): + # Expr has already been asked + if expr not in viewvalues(self._vars): + # Create var + identifier = m2_expr.ExprId( + "%s%s" % ( + self.var_prefix, + next(self.var_indice) + ), + size = expr.size + ) + self._vars[identifier] = expr + + # Recursion stop case + return + else: + # First time for @expr + self.var_asked.add(expr) + + if isinstance(expr, m2_expr.ExprOp): + for a in expr.args: + self.find_variables_rec(a) + + elif isinstance(expr, m2_expr.ExprInt): + pass + + elif isinstance(expr, m2_expr.ExprId): + pass + + elif isinstance(expr, m2_expr.ExprLoc): + pass + + elif isinstance(expr, m2_expr.ExprMem): + self.find_variables_rec(expr.ptr) + + elif isinstance(expr, m2_expr.ExprCompose): + for arg in expr.args: + self.find_variables_rec(arg) + + elif isinstance(expr, m2_expr.ExprSlice): + self.find_variables_rec(expr.arg) + + elif isinstance(expr, m2_expr.ExprCond): + self.find_variables_rec(expr.cond) + self.find_variables_rec(expr.src1) + self.find_variables_rec(expr.src2) + + else: + raise NotImplementedError("Type not handled: %s" % expr) + + @property + def vars(self): + return self._vars_ordered + + @property + def equation(self): + return self._equation + + def __str__(self): + "Display variables and final equation" + out = "" + for var_id, var_expr in viewitems(self.vars): + out += "%s = %s\n" % (var_id, var_expr) + out += "Final: %s" % self.equation + return out + + +class ExprRandom(object): + """Return an expression randomly generated""" + + # Identifiers length + identifier_len = 5 + # Identifiers' name charset + identifier_charset = string.ascii_letters + # Number max value + number_max = 0xFFFFFFFF + # Available operations + operations_by_args_number = {1: ["-"], + 2: ["<<", "<<<", ">>", ">>>"], + "2+": ["+", "*", "&", "|", "^"], + } + # Maximum number of argument for operations + operations_max_args_number = 5 + # If set, output expression is a perfect tree + perfect_tree = True + # Max argument size in slice, relative to slice size + slice_add_size = 10 + # Maximum number of layer in compose + compose_max_layer = 5 + # Maximum size of memory address in bits + memory_max_address_size = 32 + # Re-use already generated elements to mimic a more realistic behavior + reuse_element = True + generated_elements = {} # (depth, size) -> [Expr] + + @classmethod + def identifier(cls, size=32): + """Return a random identifier + @size: (optional) identifier size + """ + return m2_expr.ExprId("".join([random.choice(cls.identifier_charset) + for _ in range(cls.identifier_len)]), + size=size) + + @classmethod + def number(cls, size=32): + """Return a random number + @size: (optional) number max bits + """ + num = random.randint(0, cls.number_max % (2**size)) + return m2_expr.ExprInt(num, size) + + @classmethod + def atomic(cls, size=32): + """Return an atomic Expression + @size: (optional) Expr size + """ + available_funcs = [cls.identifier, cls.number] + return random.choice(available_funcs)(size=size) + + @classmethod + def operation(cls, size=32, depth=1): + """Return an ExprOp + @size: (optional) Operation size + @depth: (optional) Expression depth + """ + operand_type = random.choice(list(cls.operations_by_args_number)) + if isinstance(operand_type, str) and "+" in operand_type: + number_args = random.randint( + int(operand_type[:-1]), + cls.operations_max_args_number + ) + else: + number_args = operand_type + + args = [cls._gen(size=size, depth=depth - 1) + for _ in range(number_args)] + operand = random.choice(cls.operations_by_args_number[operand_type]) + return m2_expr.ExprOp(operand, + *args) + + @classmethod + def slice(cls, size=32, depth=1): + """Return an ExprSlice + @size: (optional) Operation size + @depth: (optional) Expression depth + """ + start = random.randint(0, size) + stop = start + size + return cls._gen(size=random.randint(stop, stop + cls.slice_add_size), + depth=depth - 1)[start:stop] + + @classmethod + def compose(cls, size=32, depth=1): + """Return an ExprCompose + @size: (optional) Operation size + @depth: (optional) Expression depth + """ + # First layer + upper_bound = random.randint(1, size) + args = [cls._gen(size=upper_bound, depth=depth - 1)] + + # Next layers + while (upper_bound < size): + if len(args) == (cls.compose_max_layer - 1): + # We reach the maximum size + new_upper_bound = size + else: + new_upper_bound = random.randint(upper_bound + 1, size) + + args.append(cls._gen(size=new_upper_bound - upper_bound)) + upper_bound = new_upper_bound + return m2_expr.ExprCompose(*args) + + @classmethod + def memory(cls, size=32, depth=1): + """Return an ExprMem + @size: (optional) Operation size + @depth: (optional) Expression depth + """ + + address_size = random.randint(1, cls.memory_max_address_size) + return m2_expr.ExprMem(cls._gen(size=address_size, + depth=depth - 1), + size=size) + + @classmethod + def _gen(cls, size=32, depth=1): + """Internal function for generating sub-expression according to options + @size: (optional) Operation size + @depth: (optional) Expression depth + /!\ @generated_elements is left modified + """ + # Perfect tree handling + if not cls.perfect_tree: + depth = random.randint(max(0, depth - 2), depth) + + # Element re-use + if cls.reuse_element and random.choice([True, False]) and \ + (depth, size) in cls.generated_elements: + return random.choice(cls.generated_elements[(depth, size)]) + + # Recursion stop + if depth == 0: + return cls.atomic(size=size) + + # Build a more complex expression + available_funcs = [cls.operation, cls.slice, cls.compose, cls.memory] + gen = random.choice(available_funcs)(size=size, depth=depth) + + # Save it + new_value = cls.generated_elements.get((depth, size), []) + [gen] + cls.generated_elements[(depth, size)] = new_value + return gen + + @classmethod + def get(cls, size=32, depth=1, clean=True): + """Return a randomly generated expression + @size: (optional) Operation size + @depth: (optional) Expression depth + @clean: (optional) Clean expression cache between two calls + """ + # Init state + if clean: + cls.generated_elements = {} + + # Get an element + got = cls._gen(size=size, depth=depth) + + # Clear state + if clean: + cls.generated_elements = {} + + return got + +def expr_cmpu(arg1, arg2): + """ + Returns a one bit long Expression: + * 1 if @arg1 is strictly greater than @arg2 (unsigned) + * 0 otherwise. + """ + warnings.warn('DEPRECATION WARNING: use "expr_is_unsigned_greater" instead"') + return m2_expr.expr_is_unsigned_greater(arg1, arg2) + +def expr_cmps(arg1, arg2): + """ + Returns a one bit long Expression: + * 1 if @arg1 is strictly greater than @arg2 (signed) + * 0 otherwise. + """ + warnings.warn('DEPRECATION WARNING: use "expr_is_signed_greater" instead"') + return m2_expr.expr_is_signed_greater(arg1, arg2) + + +class CondConstraint(object): + + """Stand for a constraint on an Expr""" + + # str of the associated operator + operator = "" + + def __init__(self, expr): + self.expr = expr + + def __repr__(self): + return "<%s %s 0>" % (self.expr, self.operator) + + def to_constraint(self): + """Transform itself into a constraint using Expr""" + raise NotImplementedError("Abstract method") + + +class CondConstraintZero(CondConstraint): + + """Stand for a constraint like 'A == 0'""" + operator = m2_expr.TOK_EQUAL + + def to_constraint(self): + return m2_expr.ExprAssign(self.expr, m2_expr.ExprInt(0, self.expr.size)) + + +class CondConstraintNotZero(CondConstraint): + + """Stand for a constraint like 'A != 0'""" + operator = "!=" + + def to_constraint(self): + cst1, cst2 = m2_expr.ExprInt(0, 1), m2_expr.ExprInt(1, 1) + return m2_expr.ExprAssign(cst1, m2_expr.ExprCond(self.expr, cst1, cst2)) + + +ConstrainedValue = collections.namedtuple("ConstrainedValue", + ["constraints", "value"]) + + +class ConstrainedValues(set): + + """Set of ConstrainedValue""" + + def __str__(self): + out = [] + for sol in self: + out.append("%s with constraints:" % sol.value) + for constraint in sol.constraints: + out.append("\t%s" % constraint) + return "\n".join(out) + + +def possible_values(expr): + """Return possible values for expression @expr, associated with their + condition constraint as a ConstrainedValues instance + @expr: Expr instance + """ + + consvals = ConstrainedValues() + + # Terminal expression + if (isinstance(expr, m2_expr.ExprInt) or + isinstance(expr, m2_expr.ExprId) or + isinstance(expr, m2_expr.ExprLoc)): + consvals.add(ConstrainedValue(frozenset(), expr)) + # Unary expression + elif isinstance(expr, m2_expr.ExprSlice): + consvals.update(ConstrainedValue(consval.constraints, + consval.value[expr.start:expr.stop]) + for consval in possible_values(expr.arg)) + elif isinstance(expr, m2_expr.ExprMem): + consvals.update(ConstrainedValue(consval.constraints, + m2_expr.ExprMem(consval.value, + expr.size)) + for consval in possible_values(expr.ptr)) + elif isinstance(expr, m2_expr.ExprAssign): + consvals.update(possible_values(expr.src)) + # Special case: constraint insertion + elif isinstance(expr, m2_expr.ExprCond): + src1cond = CondConstraintNotZero(expr.cond) + src2cond = CondConstraintZero(expr.cond) + consvals.update(ConstrainedValue(consval.constraints.union([src1cond]), + consval.value) + for consval in possible_values(expr.src1)) + consvals.update(ConstrainedValue(consval.constraints.union([src2cond]), + consval.value) + for consval in possible_values(expr.src2)) + # N-ary expression + elif isinstance(expr, m2_expr.ExprOp): + # For details, see ExprCompose + consvals_args = [possible_values(arg) for arg in expr.args] + for consvals_possibility in itertools.product(*consvals_args): + args_value = [consval.value for consval in consvals_possibility] + args_constraint = itertools.chain(*[consval.constraints + for consval in consvals_possibility]) + consvals.add(ConstrainedValue(frozenset(args_constraint), + m2_expr.ExprOp(expr.op, *args_value))) + elif isinstance(expr, m2_expr.ExprCompose): + # Generate each possibility for sub-argument, associated with the start + # and stop bit + consvals_args = [ + list(possible_values(arg)) + for arg in expr.args + ] + for consvals_possibility in itertools.product(*consvals_args): + # Merge constraint of each sub-element + args_constraint = itertools.chain(*[consval.constraints + for consval in consvals_possibility]) + # Gen the corresponding constraints / ExprCompose + args = [consval.value for consval in consvals_possibility] + consvals.add( + ConstrainedValue(frozenset(args_constraint), + m2_expr.ExprCompose(*args))) + else: + raise RuntimeError("Unsupported type for expr: %s" % type(expr)) + + return consvals diff --git a/miasm/expression/expression_reduce.py b/miasm/expression/expression_reduce.py new file mode 100644 index 00000000..64d90956 --- /dev/null +++ b/miasm/expression/expression_reduce.py @@ -0,0 +1,280 @@ +""" +Expression reducer: +Apply reduction rules to an Expression ast +""" + +import logging +from miasm.expression.expression import ExprInt, ExprId, ExprLoc, ExprOp, \ + ExprSlice, ExprCompose, ExprMem, ExprCond + +log_reduce = logging.getLogger("expr_reduce") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log_reduce.addHandler(console_handler) +log_reduce.setLevel(logging.WARNING) + + + +class ExprNode(object): + """Clone of Expression object with additional information""" + + def __init__(self, expr): + self.expr = expr + + +class ExprNodeInt(ExprNode): + def __init__(self, expr): + assert expr.is_int() + super(ExprNodeInt, self).__init__(expr) + self.arg = None + + def __repr__(self): + if self.info is not None: + out = repr(self.info) + else: + out = str(self.expr) + return out + + +class ExprNodeId(ExprNode): + def __init__(self, expr): + assert expr.is_id() + super(ExprNodeId, self).__init__(expr) + self.arg = None + + def __repr__(self): + if self.info is not None: + out = repr(self.info) + else: + out = str(self.expr) + return out + + +class ExprNodeLoc(ExprNode): + def __init__(self, expr): + assert expr.is_loc() + super(ExprNodeLoc, self).__init__(expr) + self.arg = None + + def __repr__(self): + if self.info is not None: + out = repr(self.info) + else: + out = str(self.expr) + return out + + +class ExprNodeMem(ExprNode): + def __init__(self, expr): + assert expr.is_mem() + super(ExprNodeMem, self).__init__(expr) + self.ptr = None + + def __repr__(self): + if self.info is not None: + out = repr(self.info) + else: + out = "@%d[%r]" % (self.expr.size, self.ptr) + return out + + +class ExprNodeOp(ExprNode): + def __init__(self, expr): + assert expr.is_op() + super(ExprNodeOp, self).__init__(expr) + self.args = None + + def __repr__(self): + if self.info is not None: + out = repr(self.info) + else: + if len(self.args) == 1: + out = "(%s(%r))" % (self.expr.op, self.args[0]) + else: + out = "(%s)" % self.expr.op.join(repr(arg) for arg in self.args) + return out + + +class ExprNodeSlice(ExprNode): + def __init__(self, expr): + assert expr.is_slice() + super(ExprNodeSlice, self).__init__(expr) + self.arg = None + + def __repr__(self): + if self.info is not None: + out = repr(self.info) + else: + out = "%r[%d:%d]" % (self.arg, self.expr.start, self.expr.stop) + return out + + +class ExprNodeCompose(ExprNode): + def __init__(self, expr): + assert expr.is_compose() + super(ExprNodeCompose, self).__init__(expr) + self.args = None + + def __repr__(self): + if self.info is not None: + out = repr(self.info) + else: + out = "{%s}" % ', '.join(repr(arg) for arg in self.args) + return out + + +class ExprNodeCond(ExprNode): + def __init__(self, expr): + assert expr.is_cond() + super(ExprNodeCond, self).__init__(expr) + self.cond = None + self.src1 = None + self.src2 = None + + def __repr__(self): + if self.info is not None: + out = repr(self.info) + else: + out = "(%r?%r:%r)" % (self.cond, self.src1, self.src2) + return out + + +class ExprReducer(object): + """Apply reduction rules to an expr + + reduction_rules: list of ordered reduction rules + + List of function representing reduction rules + Function API: + reduction_xxx(self, node, lvl=0) + with: + * node: the ExprNode to qualify + * lvl: [optional] the recursion level + Returns: + * None if the reduction rule is not applied + * the resulting information to store in the ExprNode.info + + allow_none_result: allow missing reduction rules + """ + + reduction_rules = [] + allow_none_result = False + + def expr2node(self, expr): + """Build ExprNode mirror of @expr + + @expr: Expression to analyze + """ + + if isinstance(expr, ExprId): + node = ExprNodeId(expr) + elif isinstance(expr, ExprLoc): + node = ExprNodeLoc(expr) + elif isinstance(expr, ExprInt): + node = ExprNodeInt(expr) + elif isinstance(expr, ExprMem): + son = self.expr2node(expr.ptr) + node = ExprNodeMem(expr) + node.ptr = son + elif isinstance(expr, ExprSlice): + son = self.expr2node(expr.arg) + node = ExprNodeSlice(expr) + node.arg = son + elif isinstance(expr, ExprOp): + sons = [self.expr2node(arg) for arg in expr.args] + node = ExprNodeOp(expr) + node.args = sons + elif isinstance(expr, ExprCompose): + sons = [self.expr2node(arg) for arg in expr.args] + node = ExprNodeCompose(expr) + node.args = sons + elif isinstance(expr, ExprCond): + node = ExprNodeCond(expr) + node.cond = self.expr2node(expr.cond) + node.src1 = self.expr2node(expr.src1) + node.src2 = self.expr2node(expr.src2) + else: + raise TypeError("Unknown Expr Type %r", type(expr)) + return node + + def reduce(self, expr, **kwargs): + """Returns an ExprNode tree mirroring @expr tree. The ExprNode is + computed by applying reduction rules to the expression @expr + + @expr: an Expression + """ + + node = self.expr2node(expr) + return self.categorize(node, lvl=0, **kwargs) + + def categorize(self, node, lvl=0, **kwargs): + """Recursively apply rules to @node + + @node: ExprNode to analyze + @lvl: actual recursion level + """ + + expr = node.expr + log_reduce.debug("\t" * lvl + "Reduce...: %s", node.expr) + if isinstance(expr, ExprId): + node = ExprNodeId(expr) + elif isinstance(expr, ExprInt): + node = ExprNodeInt(expr) + elif isinstance(expr, ExprLoc): + node = ExprNodeLoc(expr) + elif isinstance(expr, ExprMem): + ptr = self.categorize(node.ptr, lvl=lvl + 1, **kwargs) + node = ExprNodeMem(ExprMem(ptr.expr, expr.size)) + node.ptr = ptr + elif isinstance(expr, ExprSlice): + arg = self.categorize(node.arg, lvl=lvl + 1, **kwargs) + node = ExprNodeSlice(ExprSlice(arg.expr, expr.start, expr.stop)) + node.arg = arg + elif isinstance(expr, ExprOp): + new_args = [] + for arg in node.args: + new_a = self.categorize(arg, lvl=lvl + 1, **kwargs) + assert new_a.expr.size == arg.expr.size + new_args.append(new_a) + node = ExprNodeOp(ExprOp(expr.op, *[x.expr for x in new_args])) + node.args = new_args + expr = node.expr + elif isinstance(expr, ExprCompose): + new_args = [] + new_expr_args = [] + for arg in node.args: + arg = self.categorize(arg, lvl=lvl + 1, **kwargs) + new_args.append(arg) + new_expr_args.append(arg.expr) + new_expr = ExprCompose(*new_expr_args) + node = ExprNodeCompose(new_expr) + node.args = new_args + elif isinstance(expr, ExprCond): + cond = self.categorize(node.cond, lvl=lvl + 1, **kwargs) + src1 = self.categorize(node.src1, lvl=lvl + 1, **kwargs) + src2 = self.categorize(node.src2, lvl=lvl + 1, **kwargs) + node = ExprNodeCond(ExprCond(cond.expr, src1.expr, src2.expr)) + node.cond, node.src1, node.src2 = cond, src1, src2 + else: + raise TypeError("Unknown Expr Type %r", type(expr)) + + node.info = self.apply_rules(node, lvl=lvl, **kwargs) + log_reduce.debug("\t" * lvl + "Reduce result: %s %r", + node.expr, node.info) + return node + + def apply_rules(self, node, lvl=0, **kwargs): + """Find and apply reduction rules to @node + + @node: ExprNode to analyse + @lvl: actuel recursion level + """ + + for rule in self.reduction_rules: + ret = rule(self, node, lvl=lvl, **kwargs) + + if ret is not None: + log_reduce.debug("\t" * lvl + "Rule found: %r", rule) + return ret + if not self.allow_none_result: + raise RuntimeError('Missing reduction rule for %r' % node.expr) diff --git a/miasm/expression/modint.py b/miasm/expression/modint.py new file mode 100644 index 00000000..22d17b9b --- /dev/null +++ b/miasm/expression/modint.py @@ -0,0 +1,259 @@ +#-*- coding:utf-8 -*- + +from builtins import range +from functools import total_ordering + +@total_ordering +class moduint(object): + + def __init__(self, arg): + self.arg = int(arg) % self.__class__.limit + assert(self.arg >= 0 and self.arg < self.__class__.limit) + + def __repr__(self): + return self.__class__.__name__ + '(' + hex(self.arg) + ')' + + def __hash__(self): + return hash(self.arg) + + @classmethod + def maxcast(cls, c2): + c2 = c2.__class__ + if cls.size > c2.size: + return cls + else: + return c2 + + def __eq__(self, y): + if isinstance(y, moduint): + return self.arg == y.arg + return self.arg == y + + def __ne__(self, y): + # required Python 2.7.14 + return not self == y + + def __lt__(self, y): + if isinstance(y, moduint): + return self.arg < y.arg + return self.arg < y + + def __add__(self, y): + if isinstance(y, moduint): + cls = self.maxcast(y) + return cls(self.arg + y.arg) + else: + return self.__class__(self.arg + y) + + def __and__(self, y): + if isinstance(y, moduint): + cls = self.maxcast(y) + return cls(self.arg & y.arg) + else: + return self.__class__(self.arg & y) + + def __div__(self, y): + # Python: 8 / -7 == -2 (C-like: -1) + # int(float) trick cannot be used, due to information loss + den = int(y) + num = int(self) + result_sign = 1 if (den * num) >= 0 else -1 + cls = self.__class__ + if isinstance(y, moduint): + cls = self.maxcast(y) + return (abs(num) // abs(den)) * result_sign + + def __floordiv__(self, y): + return self.__div__(y) + + def __int__(self): + return int(self.arg) + + def __long__(self): + return int(self.arg) + + def __index__(self): + return int(self.arg) + + def __invert__(self): + return self.__class__(~self.arg) + + def __lshift__(self, y): + if isinstance(y, moduint): + cls = self.maxcast(y) + return cls(self.arg << y.arg) + else: + return self.__class__(self.arg << y) + + def __mod__(self, y): + # See __div__ for implementation choice + cls = self.__class__ + if isinstance(y, moduint): + cls = self.maxcast(y) + return cls(self.arg - y * (self // y)) + + def __mul__(self, y): + if isinstance(y, moduint): + cls = self.maxcast(y) + return cls(self.arg * y.arg) + else: + return self.__class__(self.arg * y) + + def __neg__(self): + return self.__class__(-self.arg) + + def __or__(self, y): + if isinstance(y, moduint): + cls = self.maxcast(y) + return cls(self.arg | y.arg) + else: + return self.__class__(self.arg | y) + + def __radd__(self, y): + return self.__add__(y) + + def __rand__(self, y): + return self.__and__(y) + + def __rdiv__(self, y): + if isinstance(y, moduint): + cls = self.maxcast(y) + return cls(y.arg // self.arg) + else: + return self.__class__(y // self.arg) + + def __rfloordiv__(self, y): + return self.__rdiv__(y) + + def __rlshift__(self, y): + if isinstance(y, moduint): + cls = self.maxcast(y) + return cls(y.arg << self.arg) + else: + return self.__class__(y << self.arg) + + def __rmod__(self, y): + if isinstance(y, moduint): + cls = self.maxcast(y) + return cls(y.arg % self.arg) + else: + return self.__class__(y % self.arg) + + def __rmul__(self, y): + return self.__mul__(y) + + def __ror__(self, y): + return self.__or__(y) + + def __rrshift__(self, y): + if isinstance(y, moduint): + cls = self.maxcast(y) + return cls(y.arg >> self.arg) + else: + return self.__class__(y >> self.arg) + + def __rshift__(self, y): + if isinstance(y, moduint): + cls = self.maxcast(y) + return cls(self.arg >> y.arg) + else: + return self.__class__(self.arg >> y) + + def __rsub__(self, y): + if isinstance(y, moduint): + cls = self.maxcast(y) + return cls(y.arg - self.arg) + else: + return self.__class__(y - self.arg) + + def __rxor__(self, y): + return self.__xor__(y) + + def __sub__(self, y): + if isinstance(y, moduint): + cls = self.maxcast(y) + return cls(self.arg - y.arg) + else: + return self.__class__(self.arg - y) + + def __xor__(self, y): + if isinstance(y, moduint): + cls = self.maxcast(y) + return cls(self.arg ^ y.arg) + else: + return self.__class__(self.arg ^ y) + + def __hex__(self): + return hex(self.arg) + + def __abs__(self): + return abs(self.arg) + + def __rpow__(self, v): + return v ** self.arg + + def __pow__(self, v): + return self.__class__(self.arg ** v) + + +class modint(moduint): + + def __init__(self, arg): + if isinstance(arg, moduint): + arg = arg.arg + a = arg % self.__class__.limit + if a >= self.__class__.limit // 2: + a -= self.__class__.limit + self.arg = a + assert( + self.arg >= -self.__class__.limit // 2 and + self.arg < self.__class__.limit + ) + + +def is_modint(a): + return isinstance(a, moduint) + + +def size2mask(size): + return (1 << size) - 1 + +mod_size2uint = {} +mod_size2int = {} + +mod_uint2size = {} +mod_int2size = {} + +def define_int(size): + """Build the 'modint' instance corresponding to size @size""" + global mod_size2int, mod_int2size + + name = 'int%d' % size + cls = type(name, (modint,), {"size": size, "limit": 1 << size}) + globals()[name] = cls + mod_size2int[size] = cls + mod_int2size[cls] = size + return cls + +def define_uint(size): + """Build the 'moduint' instance corresponding to size @size""" + global mod_size2uint, mod_uint2size + + name = 'uint%d' % size + cls = type(name, (moduint,), {"size": size, "limit": 1 << size}) + globals()[name] = cls + mod_size2uint[size] = cls + mod_uint2size[cls] = size + return cls + +def define_common_int(): + "Define common int" + common_int = range(1, 257) + + for i in common_int: + define_int(i) + + for i in common_int: + define_uint(i) + +define_common_int() diff --git a/miasm/expression/parser.py b/miasm/expression/parser.py new file mode 100644 index 00000000..20113eab --- /dev/null +++ b/miasm/expression/parser.py @@ -0,0 +1,84 @@ +import pyparsing +from miasm.expression.expression import ExprInt, ExprId, ExprLoc, ExprSlice, \ + ExprMem, ExprCond, ExprCompose, ExprOp, ExprAssign, LocKey + +integer = pyparsing.Word(pyparsing.nums).setParseAction(lambda t: + int(t[0])) +hex_word = pyparsing.Literal('0x') + pyparsing.Word(pyparsing.hexnums) +hex_int = pyparsing.Combine(hex_word).setParseAction(lambda t: + int(t[0], 16)) + +str_int_pos = (hex_int | integer) +str_int_neg = (pyparsing.Suppress('-') + \ + (hex_int | integer)).setParseAction(lambda t: -t[0]) + +str_int = str_int_pos | str_int_neg + +STR_EXPRINT = pyparsing.Suppress("ExprInt") +STR_EXPRID = pyparsing.Suppress("ExprId") +STR_EXPRLOC = pyparsing.Suppress("ExprLoc") +STR_EXPRSLICE = pyparsing.Suppress("ExprSlice") +STR_EXPRMEM = pyparsing.Suppress("ExprMem") +STR_EXPRCOND = pyparsing.Suppress("ExprCond") +STR_EXPRCOMPOSE = pyparsing.Suppress("ExprCompose") +STR_EXPROP = pyparsing.Suppress("ExprOp") +STR_EXPRASSIGN = pyparsing.Suppress("ExprAssign") + +LOCKEY = pyparsing.Suppress("LocKey") + +STR_COMMA = pyparsing.Suppress(",") +LPARENTHESIS = pyparsing.Suppress("(") +RPARENTHESIS = pyparsing.Suppress(")") + + +T_INF = pyparsing.Suppress("<") +T_SUP = pyparsing.Suppress(">") + + +string_quote = pyparsing.QuotedString(quoteChar="'", escChar='\\', escQuote='\\') +string_dquote = pyparsing.QuotedString(quoteChar='"', escChar='\\', escQuote='\\') + + +string = string_quote | string_dquote + +expr = pyparsing.Forward() + +expr_int = STR_EXPRINT + LPARENTHESIS + str_int + STR_COMMA + str_int + RPARENTHESIS +expr_id = STR_EXPRID + LPARENTHESIS + string + STR_COMMA + str_int + RPARENTHESIS +expr_loc = STR_EXPRLOC + LPARENTHESIS + T_INF + LOCKEY + str_int + T_SUP + STR_COMMA + str_int + RPARENTHESIS +expr_slice = STR_EXPRSLICE + LPARENTHESIS + expr + STR_COMMA + str_int + STR_COMMA + str_int + RPARENTHESIS +expr_mem = STR_EXPRMEM + LPARENTHESIS + expr + STR_COMMA + str_int + RPARENTHESIS +expr_cond = STR_EXPRCOND + LPARENTHESIS + expr + STR_COMMA + expr + STR_COMMA + expr + RPARENTHESIS +expr_compose = STR_EXPRCOMPOSE + LPARENTHESIS + pyparsing.delimitedList(expr, delim=',') + RPARENTHESIS +expr_op = STR_EXPROP + LPARENTHESIS + string + STR_COMMA + pyparsing.delimitedList(expr, delim=',') + RPARENTHESIS +expr_aff = STR_EXPRASSIGN + LPARENTHESIS + expr + STR_COMMA + expr + RPARENTHESIS + +expr << (expr_int | expr_id | expr_loc | expr_slice | expr_mem | expr_cond | \ + expr_compose | expr_op | expr_aff) + +def parse_loc_key(t): + assert len(t) == 2 + loc_key, size = LocKey(t[0]), t[1] + return ExprLoc(loc_key, size) + +expr_int.setParseAction(lambda t: ExprInt(*t)) +expr_id.setParseAction(lambda t: ExprId(*t)) +expr_loc.setParseAction(parse_loc_key) +expr_slice.setParseAction(lambda t: ExprSlice(*t)) +expr_mem.setParseAction(lambda t: ExprMem(*t)) +expr_cond.setParseAction(lambda t: ExprCond(*t)) +expr_compose.setParseAction(lambda t: ExprCompose(*t)) +expr_op.setParseAction(lambda t: ExprOp(*t)) +expr_aff.setParseAction(lambda t: ExprAssign(*t)) + + +def str_to_expr(str_in): + """Parse the @str_in and return the corresponoding Expression + @str_in: repr string of an Expression""" + + try: + value = expr.parseString(str_in) + except: + raise RuntimeError("Cannot parse expression %s" % str_in) + assert len(value) == 1 + return value[0] diff --git a/miasm/expression/simplifications.py b/miasm/expression/simplifications.py new file mode 100644 index 00000000..585a9c6b --- /dev/null +++ b/miasm/expression/simplifications.py @@ -0,0 +1,207 @@ +# # +# Simplification methods library # +# # + +import logging + +from future.utils import viewitems + +from miasm.expression import simplifications_common +from miasm.expression import simplifications_cond +from miasm.expression import simplifications_explicit +from miasm.expression.expression_helper import fast_unify +import miasm.expression.expression as m2_expr + +# Expression Simplifier +# --------------------- + +log_exprsimp = logging.getLogger("exprsimp") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log_exprsimp.addHandler(console_handler) +log_exprsimp.setLevel(logging.WARNING) + + +class ExpressionSimplifier(object): + + """Wrapper on expression simplification passes. + + Instance handle passes lists. + + Available passes lists are: + - commons: common passes such as constant folding + - heavy : rare passes (for instance, in case of obfuscation) + """ + + # Common passes + PASS_COMMONS = { + m2_expr.ExprOp: [ + simplifications_common.simp_cst_propagation, + simplifications_common.simp_cond_op_int, + simplifications_common.simp_cond_factor, + simplifications_common.simp_add_multiple, + # CC op + simplifications_common.simp_cc_conds, + simplifications_common.simp_subwc_cf, + simplifications_common.simp_subwc_of, + simplifications_common.simp_sign_subwc_cf, + simplifications_common.simp_double_zeroext, + simplifications_common.simp_double_signext, + simplifications_common.simp_zeroext_eq_cst, + simplifications_common.simp_ext_eq_ext, + + simplifications_common.simp_cmp_int, + simplifications_common.simp_sign_inf_zeroext, + simplifications_common.simp_cmp_int_int, + simplifications_common.simp_ext_cst, + simplifications_common.simp_zeroext_and_cst_eq_cst, + simplifications_common.simp_test_signext_inf, + simplifications_common.simp_test_zeroext_inf, + simplifications_common.simp_cond_inf_eq_unsigned_zero, + + ], + + m2_expr.ExprSlice: [ + simplifications_common.simp_slice, + simplifications_common.simp_slice_of_ext, + simplifications_common.simp_slice_of_op_ext, + ], + m2_expr.ExprCompose: [simplifications_common.simp_compose], + m2_expr.ExprCond: [ + simplifications_common.simp_cond, + simplifications_common.simp_cond_zeroext, + simplifications_common.simp_cond_add, + # CC op + simplifications_common.simp_cond_flag, + simplifications_common.simp_cmp_int_arg, + + simplifications_common.simp_cond_eq_zero, + simplifications_common.simp_x_and_cst_eq_cst, + simplifications_common.simp_cond_logic_ext, + simplifications_common.simp_cond_sign_bit, + simplifications_common.simp_cond_eq_1_0, + ], + m2_expr.ExprMem: [simplifications_common.simp_mem], + + } + + + # Heavy passes + PASS_HEAVY = {} + + # Cond passes + PASS_COND = { + m2_expr.ExprSlice: [ + simplifications_cond.expr_simp_inf_signed, + simplifications_cond.expr_simp_inf_unsigned_inversed + ], + m2_expr.ExprOp: [ + simplifications_cond.expr_simp_inverse, + ], + m2_expr.ExprCond: [ + simplifications_cond.expr_simp_equal + ] + } + + + # Available passes lists are: + # - highlevel: transform high level operators to explicit computations + PASS_HIGH_TO_EXPLICIT = { + m2_expr.ExprOp: [ + simplifications_explicit.simp_flags, + simplifications_explicit.simp_ext, + ], + } + + + def __init__(self): + self.expr_simp_cb = {} + self.simplified_exprs = set() + + def enable_passes(self, passes): + """Add passes from @passes + @passes: dict(Expr class : list(callback)) + + Callback signature: Expr callback(ExpressionSimplifier, Expr) + """ + + # Clear cache of simplifiied expressions when adding a new pass + self.simplified_exprs.clear() + + for k, v in viewitems(passes): + self.expr_simp_cb[k] = fast_unify(self.expr_simp_cb.get(k, []) + v) + + def apply_simp(self, expression): + """Apply enabled simplifications on expression + @expression: Expr instance + Return an Expr instance""" + + cls = expression.__class__ + debug_level = log_exprsimp.level >= logging.DEBUG + for simp_func in self.expr_simp_cb.get(cls, []): + # Apply simplifications + before = expression + expression = simp_func(self, expression) + after = expression + + if debug_level and before != after: + log_exprsimp.debug("[%s] %s => %s", simp_func, before, after) + + # If class changes, stop to prevent wrong simplifications + if expression.__class__ is not cls: + break + + return expression + + def expr_simp(self, expression): + """Apply enabled simplifications on expression and find a stable state + @expression: Expr instance + Return an Expr instance""" + + if expression in self.simplified_exprs: + return expression + + # Find a stable state + while True: + # Canonize and simplify + e_new = self.apply_simp(expression.canonize()) + if e_new == expression: + break + + # Launch recursivity + expression = self.expr_simp_wrapper(e_new) + self.simplified_exprs.add(expression) + # Mark expression as simplified + self.simplified_exprs.add(e_new) + + return e_new + + def expr_simp_wrapper(self, expression, callback=None): + """Apply enabled simplifications on expression + @expression: Expr instance + @manual_callback: If set, call this function instead of normal one + Return an Expr instance""" + + if expression in self.simplified_exprs: + return expression + + if callback is None: + callback = self.expr_simp + + return expression.visit(callback, lambda e: e not in self.simplified_exprs) + + def __call__(self, expression, callback=None): + "Wrapper on expr_simp_wrapper" + return self.expr_simp_wrapper(expression, callback) + + +# Public ExprSimplificationPass instance with commons passes +expr_simp = ExpressionSimplifier() +expr_simp.enable_passes(ExpressionSimplifier.PASS_COMMONS) + +expr_simp_high_to_explicit = ExpressionSimplifier() +expr_simp_high_to_explicit.enable_passes(ExpressionSimplifier.PASS_HIGH_TO_EXPLICIT) + +expr_simp_explicit = ExpressionSimplifier() +expr_simp_explicit.enable_passes(ExpressionSimplifier.PASS_COMMONS) +expr_simp_explicit.enable_passes(ExpressionSimplifier.PASS_HIGH_TO_EXPLICIT) diff --git a/miasm/expression/simplifications_common.py b/miasm/expression/simplifications_common.py new file mode 100644 index 00000000..9737278b --- /dev/null +++ b/miasm/expression/simplifications_common.py @@ -0,0 +1,1556 @@ +# ----------------------------- # +# Common simplifications passes # +# ----------------------------- # + +from future.utils import viewitems + +from miasm.expression.modint import mod_size2int, mod_size2uint +from miasm.expression.expression import ExprInt, ExprSlice, ExprMem, \ + ExprCond, ExprOp, ExprCompose, TOK_INF_SIGNED, TOK_INF_UNSIGNED, \ + TOK_INF_EQUAL_SIGNED, TOK_INF_EQUAL_UNSIGNED, TOK_EQUAL +from miasm.expression.expression_helper import parity, op_propag_cst, \ + merge_sliceto_slice + + +def simp_cst_propagation(e_s, expr): + """This passe includes: + - Constant folding + - Common logical identities + - Common binary identities + """ + + # merge associatif op + args = list(expr.args) + op_name = expr.op + # simpl integer manip + # int OP int => int + # TODO: <<< >>> << >> are architecture dependent + if op_name in op_propag_cst: + while (len(args) >= 2 and + args[-1].is_int() and + args[-2].is_int()): + int2 = args.pop() + int1 = args.pop() + if op_name == '+': + out = int1.arg + int2.arg + elif op_name == '*': + out = int1.arg * int2.arg + elif op_name == '**': + out =int1.arg ** int2.arg + elif op_name == '^': + out = int1.arg ^ int2.arg + elif op_name == '&': + out = int1.arg & int2.arg + elif op_name == '|': + out = int1.arg | int2.arg + elif op_name == '>>': + if int(int2) > int1.size: + out = 0 + else: + out = int1.arg >> int2.arg + elif op_name == '<<': + if int(int2) > int1.size: + out = 0 + else: + out = int1.arg << int2.arg + elif op_name == 'a>>': + tmp1 = mod_size2int[int1.arg.size](int1.arg) + tmp2 = mod_size2uint[int2.arg.size](int2.arg) + if tmp2 > int1.size: + is_signed = int(int1) & (1 << (int1.size - 1)) + if is_signed: + out = -1 + else: + out = 0 + else: + out = mod_size2uint[int1.arg.size](tmp1 >> tmp2) + elif op_name == '>>>': + shifter = int2.arg % int2.size + out = (int1.arg >> shifter) | (int1.arg << (int2.size - shifter)) + elif op_name == '<<<': + shifter = int2.arg % int2.size + out = (int1.arg << shifter) | (int1.arg >> (int2.size - shifter)) + elif op_name == '/': + out = int1.arg // int2.arg + elif op_name == '%': + out = int1.arg % int2.arg + elif op_name == 'sdiv': + assert int2.arg.arg + tmp1 = mod_size2int[int1.arg.size](int1.arg) + tmp2 = mod_size2int[int2.arg.size](int2.arg) + out = mod_size2uint[int1.arg.size](tmp1 // tmp2) + elif op_name == 'smod': + assert int2.arg.arg + tmp1 = mod_size2int[int1.arg.size](int1.arg) + tmp2 = mod_size2int[int2.arg.size](int2.arg) + out = mod_size2uint[int1.arg.size](tmp1 % tmp2) + elif op_name == 'umod': + assert int2.arg.arg + tmp1 = mod_size2uint[int1.arg.size](int1.arg) + tmp2 = mod_size2uint[int2.arg.size](int2.arg) + out = mod_size2uint[int1.arg.size](tmp1 % tmp2) + elif op_name == 'udiv': + assert int2.arg.arg + tmp1 = mod_size2uint[int1.arg.size](int1.arg) + tmp2 = mod_size2uint[int2.arg.size](int2.arg) + out = mod_size2uint[int1.arg.size](tmp1 // tmp2) + + + + args.append(ExprInt(out, int1.size)) + + # cnttrailzeros(int) => int + if op_name == "cnttrailzeros" and args[0].is_int(): + i = 0 + while args[0].arg & (1 << i) == 0 and i < args[0].size: + i += 1 + return ExprInt(i, args[0].size) + + # cntleadzeros(int) => int + if op_name == "cntleadzeros" and args[0].is_int(): + if args[0].arg == 0: + return ExprInt(args[0].size, args[0].size) + i = args[0].size - 1 + while args[0].arg & (1 << i) == 0: + i -= 1 + return ExprInt(expr.size - (i + 1), args[0].size) + + # -(-(A)) => A + if (op_name == '-' and len(args) == 1 and args[0].is_op('-') and + len(args[0].args) == 1): + return args[0].args[0] + + # -(int) => -int + if op_name == '-' and len(args) == 1 and args[0].is_int(): + return ExprInt(-int(args[0]), expr.size) + # A op 0 =>A + if op_name in ['+', '|', "^", "<<", ">>", "<<<", ">>>"] and len(args) > 1: + if args[-1].is_int(0): + args.pop() + # A - 0 =>A + if op_name == '-' and len(args) > 1 and args[-1].is_int(0): + assert len(args) == 2 # Op '-' with more than 2 args: SantityCheckError + return args[0] + + # A * 1 =>A + if op_name == "*" and len(args) > 1 and args[-1].is_int(1): + args.pop() + + # for cannon form + # A * -1 => - A + if op_name == "*" and len(args) > 1 and args[-1] == args[-1].mask: + args.pop() + args[-1] = - args[-1] + + # op A => A + if op_name in ['+', '*', '^', '&', '|', '>>', '<<', + 'a>>', '<<<', '>>>', 'sdiv', 'smod', 'umod', 'udiv'] and len(args) == 1: + return args[0] + + # A-B => A + (-B) + if op_name == '-' and len(args) > 1: + if len(args) > 2: + raise ValueError( + 'sanity check fail on expr -: should have one or 2 args ' + + '%r %s' % (expr, expr) + ) + return ExprOp('+', args[0], -args[1]) + + # A op 0 => 0 + if op_name in ['&', "*"] and args[-1].is_int(0): + return ExprInt(0, expr.size) + + # - (A + B +...) => -A + -B + -C + if op_name == '-' and len(args) == 1 and args[0].is_op('+'): + args = [-a for a in args[0].args] + return ExprOp('+', *args) + + # -(a?int1:int2) => (a?-int1:-int2) + if (op_name == '-' and len(args) == 1 and + args[0].is_cond() and + args[0].src1.is_int() and args[0].src2.is_int()): + int1 = args[0].src1 + int2 = args[0].src2 + int1 = ExprInt(-int1.arg, int1.size) + int2 = ExprInt(-int2.arg, int2.size) + return ExprCond(args[0].cond, int1, int2) + + i = 0 + while i < len(args) - 1: + j = i + 1 + while j < len(args): + # A ^ A => 0 + if op_name == '^' and args[i] == args[j]: + args[i] = ExprInt(0, args[i].size) + del args[j] + continue + # A + (- A) => 0 + if op_name == '+' and args[j].is_op("-"): + if len(args[j].args) == 1 and args[i] == args[j].args[0]: + args[i] = ExprInt(0, args[i].size) + del args[j] + continue + # (- A) + A => 0 + if op_name == '+' and args[i].is_op("-"): + if len(args[i].args) == 1 and args[j] == args[i].args[0]: + args[i] = ExprInt(0, args[i].size) + del args[j] + continue + # A | A => A + if op_name == '|' and args[i] == args[j]: + del args[j] + continue + # A & A => A + if op_name == '&' and args[i] == args[j]: + del args[j] + continue + j += 1 + i += 1 + + if op_name in ['|', '&', '%', '/', '**'] and len(args) == 1: + return args[0] + + # A <<< A.size => A + if (op_name in ['<<<', '>>>'] and + args[1].is_int() and + args[1].arg == args[0].size): + return args[0] + + # (A <<< X) <<< Y => A <<< (X+Y) (or <<< >>>) if X + Y does not overflow + if (op_name in ['<<<', '>>>'] and + args[0].is_op() and + args[0].op in ['<<<', '>>>']): + A = args[0].args[0] + X = args[0].args[1] + Y = args[1] + if op_name != args[0].op and e_s(X - Y) == ExprInt(0, X.size): + return args[0].args[0] + elif X.is_int() and Y.is_int(): + new_X = int(X) % expr.size + new_Y = int(Y) % expr.size + if op_name == args[0].op: + rot = (new_X + new_Y) % expr.size + op = op_name + else: + rot = new_Y - new_X + op = op_name + if rot < 0: + rot = - rot + op = {">>>": "<<<", "<<<": ">>>"}[op_name] + args = [A, ExprInt(rot, expr.size)] + op_name = op + + else: + # Do not consider this case, too tricky (overflow on addition / + # subtraction) + pass + + # A >> X >> Y => A >> (X+Y) if X + Y does not overflow + # To be sure, only consider the simplification when X.msb and Y.msb are 0 + if (op_name in ['<<', '>>'] and + args[0].is_op(op_name)): + X = args[0].args[1] + Y = args[1] + if (e_s(X.msb()) == ExprInt(0, 1) and + e_s(Y.msb()) == ExprInt(0, 1)): + args = [args[0].args[0], X + Y] + + # ((var >> int1) << int1) => var & mask + # ((var << int1) >> int1) => var & mask + if (op_name in ['<<', '>>'] and + args[0].is_op() and + args[0].op in ['<<', '>>'] and + op_name != args[0]): + var = args[0].args[0] + int1 = args[0].args[1] + int2 = args[1] + if int1 == int2 and int1.is_int() and int(int1) < expr.size: + if op_name == '>>': + mask = ExprInt((1 << (expr.size - int(int1))) - 1, expr.size) + else: + mask = ExprInt( + ((1 << int(int1)) - 1) ^ ((1 << expr.size) - 1), + expr.size + ) + ret = var & mask + return ret + + # ((A & A.mask) + if op_name == "&" and args[-1] == expr.mask: + return ExprOp('&', *args[:-1]) + + # ((A | A.mask) + if op_name == "|" and args[-1] == expr.mask: + return args[-1] + + # ! (!X + int) => X - int + # TODO + + # ((A & mask) >> shift) with mask < 2**shift => 0 + if op_name == ">>" and args[1].is_int() and args[0].is_op("&"): + if (args[0].args[1].is_int() and + 2 ** args[1].arg > args[0].args[1].arg): + return ExprInt(0, args[0].size) + + # parity(int) => int + if op_name == 'parity' and args[0].is_int(): + return ExprInt(parity(int(args[0])), 1) + + # (-a) * b * (-c) * (-d) => (-a) * b * c * d + if op_name == "*" and len(args) > 1: + new_args = [] + counter = 0 + for arg in args: + if arg.is_op('-') and len(arg.args) == 1: + new_args.append(arg.args[0]) + counter += 1 + else: + new_args.append(arg) + if counter % 2: + return -ExprOp(op_name, *new_args) + args = new_args + + # -(a * b * int) => a * b * (-int) + if op_name == "-" and args[0].is_op('*') and args[0].args[-1].is_int(): + args = args[0].args + return ExprOp('*', *(list(args[:-1]) + [ExprInt(-int(args[-1]), expr.size)])) + + + # A << int with A ExprCompose => move index + if (op_name == "<<" and args[0].is_compose() and + args[1].is_int() and int(args[1]) != 0): + final_size = args[0].size + shift = int(args[1]) + new_args = [] + # shift indexes + for index, arg in args[0].iter_args(): + new_args.append((arg, index+shift, index+shift+arg.size)) + # filter out expression + filter_args = [] + min_index = final_size + for tmp, start, stop in new_args: + if start >= final_size: + continue + if stop > final_size: + tmp = tmp[:tmp.size - (stop - final_size)] + filter_args.append(tmp) + min_index = min(start, min_index) + # create entry 0 + assert min_index != 0 + tmp = ExprInt(0, min_index) + args = [tmp] + filter_args + return ExprCompose(*args) + + # A >> int with A ExprCompose => move index + if op_name == ">>" and args[0].is_compose() and args[1].is_int(): + final_size = args[0].size + shift = int(args[1]) + new_args = [] + # shift indexes + for index, arg in args[0].iter_args(): + new_args.append((arg, index-shift, index+arg.size-shift)) + # filter out expression + filter_args = [] + max_index = 0 + for tmp, start, stop in new_args: + if stop <= 0: + continue + if start < 0: + tmp = tmp[-start:] + filter_args.append(tmp) + max_index = max(stop, max_index) + # create entry 0 + tmp = ExprInt(0, final_size - max_index) + args = filter_args + [tmp] + return ExprCompose(*args) + + + # Compose(a) OP Compose(b) with a/b same bounds => Compose(a OP b) + if op_name in ['|', '&', '^'] and all([arg.is_compose() for arg in args]): + bounds = set() + for arg in args: + bound = tuple([tmp.size for tmp in arg.args]) + bounds.add(bound) + if len(bounds) == 1: + new_args = [[tmp] for tmp in args[0].args] + for sub_arg in args[1:]: + for i, tmp in enumerate(sub_arg.args): + new_args[i].append(tmp) + args = [] + for i, arg in enumerate(new_args): + args.append(ExprOp(op_name, *arg)) + return ExprCompose(*args) + + return ExprOp(op_name, *args) + + +def simp_cond_op_int(_, expr): + "Extract conditions from operations" + + + # x?a:b + x?c:d + e => x?(a+c+e:b+d+e) + if not expr.op in ["+", "|", "^", "&", "*", '<<', '>>', 'a>>']: + return expr + if len(expr.args) < 2: + return expr + conds = set() + for arg in expr.args: + if arg.is_cond(): + conds.add(arg) + if len(conds) != 1: + return expr + cond = list(conds).pop() + + args1, args2 = [], [] + for arg in expr.args: + if arg.is_cond(): + args1.append(arg.src1) + args2.append(arg.src2) + else: + args1.append(arg) + args2.append(arg) + + return ExprCond(cond.cond, + ExprOp(expr.op, *args1), + ExprOp(expr.op, *args2)) + + +def simp_cond_factor(e_s, expr): + "Merge similar conditions" + if not expr.op in ["+", "|", "^", "&", "*", '<<', '>>', 'a>>']: + return expr + if len(expr.args) < 2: + return expr + + if expr.op in ['>>', '<<', 'a>>']: + assert len(expr.args) == 2 + + # Note: the following code is correct for non-commutative operation only if + # there is 2 arguments. Otherwise, the order is not conserved + + # Regroup sub-expression by similar conditions + conds = {} + not_conds = [] + multi_cond = False + for arg in expr.args: + if not arg.is_cond(): + not_conds.append(arg) + continue + cond = arg.cond + if not cond in conds: + conds[cond] = [] + else: + multi_cond = True + conds[cond].append(arg) + if not multi_cond: + return expr + + # Rebuild the new expression + c_out = not_conds + for cond, vals in viewitems(conds): + new_src1 = [x.src1 for x in vals] + new_src2 = [x.src2 for x in vals] + src1 = e_s.expr_simp_wrapper(ExprOp(expr.op, *new_src1)) + src2 = e_s.expr_simp_wrapper(ExprOp(expr.op, *new_src2)) + c_out.append(ExprCond(cond, src1, src2)) + + if len(c_out) == 1: + new_e = c_out[0] + else: + new_e = ExprOp(expr.op, *c_out) + return new_e + + +def simp_slice(e_s, expr): + "Slice optimization" + + # slice(A, 0, a.size) => A + if expr.start == 0 and expr.stop == expr.arg.size: + return expr.arg + # Slice(int) => int + if expr.arg.is_int(): + total_bit = expr.stop - expr.start + mask = (1 << (expr.stop - expr.start)) - 1 + return ExprInt(int((expr.arg.arg >> expr.start) & mask), total_bit) + # Slice(Slice(A, x), y) => Slice(A, z) + if expr.arg.is_slice(): + if expr.stop - expr.start > expr.arg.stop - expr.arg.start: + raise ValueError('slice in slice: getting more val', str(expr)) + + return ExprSlice(expr.arg.arg, expr.start + expr.arg.start, + expr.start + expr.arg.start + (expr.stop - expr.start)) + if expr.arg.is_compose(): + # Slice(Compose(A), x) => Slice(A, y) + for index, arg in expr.arg.iter_args(): + if index <= expr.start and index+arg.size >= expr.stop: + return arg[expr.start - index:expr.stop - index] + # Slice(Compose(A, B, C), x) => Compose(A, B, C) with truncated A/B/C + out = [] + for index, arg in expr.arg.iter_args(): + # arg is before slice start + if expr.start >= index + arg.size: + continue + # arg is after slice stop + elif expr.stop <= index: + continue + # arg is fully included in slice + elif expr.start <= index and index + arg.size <= expr.stop: + out.append(arg) + continue + # arg is truncated at start + if expr.start > index: + slice_start = expr.start - index + else: + # arg is not truncated at start + slice_start = 0 + # a is truncated at stop + if expr.stop < index + arg.size: + slice_stop = arg.size + expr.stop - (index + arg.size) - slice_start + else: + slice_stop = arg.size + out.append(arg[slice_start:slice_stop]) + + return ExprCompose(*out) + + # ExprMem(x, size)[:A] => ExprMem(x, a) + # XXXX todo hum, is it safe? + if (expr.arg.is_mem() and + expr.start == 0 and + expr.arg.size > expr.stop and expr.stop % 8 == 0): + return ExprMem(expr.arg.ptr, size=expr.stop) + # distributivity of slice and & + # (a & int)[x:y] => 0 if int[x:y] == 0 + if expr.arg.is_op("&") and expr.arg.args[-1].is_int(): + tmp = e_s.expr_simp_wrapper(expr.arg.args[-1][expr.start:expr.stop]) + if tmp.is_int(0): + return tmp + # distributivity of slice and exprcond + # (a?int1:int2)[x:y] => (a?int1[x:y]:int2[x:y]) + # (a?compose1:compose2)[x:y] => (a?compose1[x:y]:compose2[x:y]) + if (expr.arg.is_cond() and + (expr.arg.src1.is_int() or expr.arg.src1.is_compose()) and + (expr.arg.src2.is_int() or expr.arg.src2.is_compose())): + src1 = expr.arg.src1[expr.start:expr.stop] + src2 = expr.arg.src2[expr.start:expr.stop] + return ExprCond(expr.arg.cond, src1, src2) + + # (a * int)[0:y] => (a[0:y] * int[0:y]) + if expr.start == 0 and expr.arg.is_op("*") and expr.arg.args[-1].is_int(): + args = [e_s.expr_simp_wrapper(a[expr.start:expr.stop]) for a in expr.arg.args] + return ExprOp(expr.arg.op, *args) + + # (a >> int)[x:y] => a[x+int:y+int] with int+y <= a.size + # (a << int)[x:y] => a[x-int:y-int] with x-int >= 0 + if (expr.arg.is_op() and expr.arg.op in [">>", "<<"] and + expr.arg.args[1].is_int()): + arg, shift = expr.arg.args + shift = int(shift) + if expr.arg.op == ">>": + if shift + expr.stop <= arg.size: + return arg[expr.start + shift:expr.stop + shift] + elif expr.arg.op == "<<": + if expr.start - shift >= 0: + return arg[expr.start - shift:expr.stop - shift] + else: + raise ValueError('Bad case') + + return expr + + +def simp_compose(e_s, expr): + "Commons simplification on ExprCompose" + args = merge_sliceto_slice(expr) + out = [] + # compose of compose + for arg in args: + if arg.is_compose(): + out += arg.args + else: + out.append(arg) + args = out + # Compose(a) with a.size = compose.size => a + if len(args) == 1 and args[0].size == expr.size: + return args[0] + + # {(X[z:], 0, X.size-z), (0, X.size-z, X.size)} => (X >> z) + if len(args) == 2 and args[1].is_int(0): + if (args[0].is_slice() and + args[0].stop == args[0].arg.size and + args[0].size + args[1].size == args[0].arg.size): + new_expr = args[0].arg >> ExprInt(args[0].start, args[0].arg.size) + return new_expr + + # {@X[base + i] 0 X, @Y[base + i + X] X (X + Y)} => @(X+Y)[base + i] + for i, arg in enumerate(args[:-1]): + nxt = args[i + 1] + if arg.is_mem() and nxt.is_mem(): + gap = e_s(nxt.ptr - arg.ptr) + if gap.is_int() and arg.size % 8 == 0 and int(gap) == arg.size // 8: + args = args[:i] + [ExprMem(arg.ptr, + arg.size + nxt.size)] + args[i + 2:] + return ExprCompose(*args) + + # {a, x?b:d, x?c:e, f} => x?{a, b, c, f}:{a, d, e, f} + conds = set(arg.cond for arg in expr.args if arg.is_cond()) + if len(conds) == 1: + cond = list(conds)[0] + args1, args2 = [], [] + for arg in expr.args: + if arg.is_cond(): + args1.append(arg.src1) + args2.append(arg.src2) + else: + args1.append(arg) + args2.append(arg) + arg1 = e_s(ExprCompose(*args1)) + arg2 = e_s(ExprCompose(*args2)) + return ExprCond(cond, arg1, arg2) + return ExprCompose(*args) + + +def simp_cond(_, expr): + """ + Common simplifications on ExprCond. + Eval exprcond src1/src2 with satifiable/unsatisfiable condition propagation + """ + if (not expr.cond.is_int()) and expr.cond.size == 1: + src1 = expr.src1.replace_expr({expr.cond: ExprInt(1, 1)}) + src2 = expr.src2.replace_expr({expr.cond: ExprInt(0, 1)}) + if src1 != expr.src1 or src2 != expr.src2: + return ExprCond(expr.cond, src1, src2) + + # -A ? B:C => A ? B:C + if expr.cond.is_op('-') and len(expr.cond.args) == 1: + expr = ExprCond(expr.cond.args[0], expr.src1, expr.src2) + # a?x:x + elif expr.src1 == expr.src2: + expr = expr.src1 + # int ? A:B => A or B + elif expr.cond.is_int(): + if expr.cond.arg == 0: + expr = expr.src2 + else: + expr = expr.src1 + # a?(a?b:c):x => a?b:x + elif expr.src1.is_cond() and expr.cond == expr.src1.cond: + expr = ExprCond(expr.cond, expr.src1.src1, expr.src2) + # a?x:(a?b:c) => a?x:c + elif expr.src2.is_cond() and expr.cond == expr.src2.cond: + expr = ExprCond(expr.cond, expr.src1, expr.src2.src2) + # a|int ? b:c => b with int != 0 + elif (expr.cond.is_op('|') and + expr.cond.args[1].is_int() and + expr.cond.args[1].arg != 0): + return expr.src1 + + # (C?int1:int2)?(A:B) => + elif (expr.cond.is_cond() and + expr.cond.src1.is_int() and + expr.cond.src2.is_int()): + int1 = expr.cond.src1.arg.arg + int2 = expr.cond.src2.arg.arg + if int1 and int2: + expr = expr.src1 + elif int1 == 0 and int2 == 0: + expr = expr.src2 + elif int1 == 0 and int2: + expr = ExprCond(expr.cond.cond, expr.src2, expr.src1) + elif int1 and int2 == 0: + expr = ExprCond(expr.cond.cond, expr.src1, expr.src2) + + elif expr.cond.is_compose(): + # {0, X, 0}?(A:B) => X?(A:B) + args = [arg for arg in expr.cond.args if not arg.is_int(0)] + if len(args) == 1: + arg = args.pop() + return ExprCond(arg, expr.src1, expr.src2) + elif len(args) < len(expr.cond.args): + return ExprCond(ExprCompose(*args), expr.src1, expr.src2) + return expr + + +def simp_mem(_, expr): + """ + Common simplifications on ExprMem: + @32[x?a:b] => x?@32[a]:@32[b] + """ + if expr.ptr.is_cond(): + cond = expr.ptr + ret = ExprCond(cond.cond, + ExprMem(cond.src1, expr.size), + ExprMem(cond.src2, expr.size)) + return ret + return expr + + + + +def test_cc_eq_args(expr, *sons_op): + """ + Return True if expression's arguments match the list in sons_op, and their + sub arguments are identical. Ex: + CC_S<=( + FLAG_SIGN_SUB(A, B), + FLAG_SUB_OF(A, B), + FLAG_EQ_CMP(A, B) + ) + """ + if not expr.is_op(): + return False + if len(expr.args) != len(sons_op): + return False + all_args = set() + for i, arg in enumerate(expr.args): + if not arg.is_op(sons_op[i]): + return False + all_args.add(arg.args) + return len(all_args) == 1 + + +def simp_cc_conds(_, expr): + """ + High level simplifications. Example: + CC_U<(FLAG_SUB_CF(A, B) => A =") and + test_cc_eq_args( + expr, + "FLAG_SUB_CF" + )): + expr = ExprCond( + ExprOp(TOK_INF_UNSIGNED, *expr.args[0].args), + ExprInt(0, expr.size), + ExprInt(1, expr.size)) + + elif (expr.is_op("CC_U<") and + test_cc_eq_args( + expr, + "FLAG_SUB_CF" + )): + expr = ExprOp(TOK_INF_UNSIGNED, *expr.args[0].args) + + elif (expr.is_op("CC_NEG") and + test_cc_eq_args( + expr, + "FLAG_SIGN_SUB" + )): + expr = ExprOp(TOK_INF_SIGNED, *expr.args[0].args) + + elif (expr.is_op("CC_POS") and + test_cc_eq_args( + expr, + "FLAG_SIGN_SUB" + )): + expr = ExprCond( + ExprOp(TOK_INF_SIGNED, *expr.args[0].args), + ExprInt(0, expr.size), + ExprInt(1, expr.size) + ) + + elif (expr.is_op("CC_EQ") and + test_cc_eq_args( + expr, + "FLAG_EQ" + )): + arg = expr.args[0].args[0] + expr = ExprOp(TOK_EQUAL, arg, ExprInt(0, arg.size)) + + elif (expr.is_op("CC_NE") and + test_cc_eq_args( + expr, + "FLAG_EQ" + )): + arg = expr.args[0].args[0] + expr = ExprCond( + ExprOp(TOK_EQUAL,arg, ExprInt(0, arg.size)), + ExprInt(0, expr.size), + ExprInt(1, expr.size) + ) + elif (expr.is_op("CC_NE") and + test_cc_eq_args( + expr, + "FLAG_EQ_CMP" + )): + expr = ExprCond( + ExprOp(TOK_EQUAL, *expr.args[0].args), + ExprInt(0, expr.size), + ExprInt(1, expr.size) + ) + + elif (expr.is_op("CC_EQ") and + test_cc_eq_args( + expr, + "FLAG_EQ_CMP" + )): + expr = ExprOp(TOK_EQUAL, *expr.args[0].args) + + elif (expr.is_op("CC_NE") and + test_cc_eq_args( + expr, + "FLAG_EQ_AND" + )): + expr = ExprOp("&", *expr.args[0].args) + + elif (expr.is_op("CC_EQ") and + test_cc_eq_args( + expr, + "FLAG_EQ_AND" + )): + expr = ExprCond( + ExprOp("&", *expr.args[0].args), + ExprInt(0, expr.size), + ExprInt(1, expr.size) + ) + + elif (expr.is_op("CC_S>") and + test_cc_eq_args( + expr, + "FLAG_SIGN_SUB", + "FLAG_SUB_OF", + "FLAG_EQ_CMP", + )): + expr = ExprCond( + ExprOp(TOK_INF_EQUAL_SIGNED, *expr.args[0].args), + ExprInt(0, expr.size), + ExprInt(1, expr.size) + ) + + elif (expr.is_op("CC_S>") and + len(expr.args) == 3 and + expr.args[0].is_op("FLAG_SIGN_SUB") and + expr.args[2].is_op("FLAG_EQ_CMP") and + expr.args[0].args == expr.args[2].args and + expr.args[1].is_int(0)): + expr = ExprCond( + ExprOp(TOK_INF_EQUAL_SIGNED, *expr.args[0].args), + ExprInt(0, expr.size), + ExprInt(1, expr.size) + ) + + + + elif (expr.is_op("CC_S>=") and + test_cc_eq_args( + expr, + "FLAG_SIGN_SUB", + "FLAG_SUB_OF" + )): + expr = ExprCond( + ExprOp(TOK_INF_SIGNED, *expr.args[0].args), + ExprInt(0, expr.size), + ExprInt(1, expr.size) + ) + + elif (expr.is_op("CC_S<") and + test_cc_eq_args( + expr, + "FLAG_SIGN_SUB", + "FLAG_SUB_OF" + )): + expr = ExprOp(TOK_INF_SIGNED, *expr.args[0].args) + + elif (expr.is_op("CC_S<=") and + test_cc_eq_args( + expr, + "FLAG_SIGN_SUB", + "FLAG_SUB_OF", + "FLAG_EQ_CMP", + )): + expr = ExprOp(TOK_INF_EQUAL_SIGNED, *expr.args[0].args) + + elif (expr.is_op("CC_S<=") and + len(expr.args) == 3 and + expr.args[0].is_op("FLAG_SIGN_SUB") and + expr.args[2].is_op("FLAG_EQ_CMP") and + expr.args[0].args == expr.args[2].args and + expr.args[1].is_int(0)): + expr = ExprOp(TOK_INF_EQUAL_SIGNED, *expr.args[0].args) + + elif (expr.is_op("CC_U<=") and + test_cc_eq_args( + expr, + "FLAG_SUB_CF", + "FLAG_EQ_CMP", + )): + expr = ExprOp(TOK_INF_EQUAL_UNSIGNED, *expr.args[0].args) + + elif (expr.is_op("CC_U>") and + test_cc_eq_args( + expr, + "FLAG_SUB_CF", + "FLAG_EQ_CMP", + )): + expr = ExprCond( + ExprOp(TOK_INF_EQUAL_UNSIGNED, *expr.args[0].args), + ExprInt(0, expr.size), + ExprInt(1, expr.size) + ) + + elif (expr.is_op("CC_S<") and + test_cc_eq_args( + expr, + "FLAG_SIGN_ADD", + "FLAG_ADD_OF" + )): + arg0, arg1 = expr.args[0].args + expr = ExprOp(TOK_INF_SIGNED, arg0, -arg1) + + return expr + + + +def simp_cond_flag(_, expr): + """FLAG_EQ_CMP(X, Y)?A:B => (X == Y)?A:B""" + cond = expr.cond + if cond.is_op("FLAG_EQ_CMP"): + return ExprCond(ExprOp(TOK_EQUAL, *cond.args), expr.src1, expr.src2) + return expr + + +def simp_cmp_int(expr_simp, expr): + """ + ({X, 0} == int) => X == int[:] + X + int1 == int2 => X == int2-int1 + X ^ int1 == int2 => X == int1^int2 + """ + if (expr.is_op(TOK_EQUAL) and + expr.args[1].is_int() and + expr.args[0].is_compose() and + len(expr.args[0].args) == 2 and + expr.args[0].args[1].is_int(0)): + # ({X, 0} == int) => X == int[:] + src = expr.args[0].args[0] + int_val = int(expr.args[1]) + new_int = ExprInt(int_val, src.size) + expr = expr_simp( + ExprOp(TOK_EQUAL, src, new_int) + ) + elif not expr.is_op(TOK_EQUAL): + return expr + assert len(expr.args) == 2 + + left, right = expr.args + if left.is_int() and not right.is_int(): + left, right = right, left + if not right.is_int(): + return expr + if not (left.is_op() and left.op in ['+', '^']): + return expr + if not left.args[-1].is_int(): + return expr + # X + int1 == int2 => X == int2-int1 + # WARNING: + # X - 0x10 <=u 0x20 gives X in [0x10 0x30] + # which is not equivalet to A <=u 0x10 + + left_orig = left + left, last_int = left.args[:-1], left.args[-1] + + if len(left) == 1: + left = left[0] + else: + left = ExprOp(left.op, *left) + + if left_orig.op == "+": + new_int = expr_simp(right - last_int) + elif left_orig.op == '^': + new_int = expr_simp(right ^ last_int) + else: + raise RuntimeError("Unsupported operator") + + expr = expr_simp( + ExprOp(TOK_EQUAL, left, new_int), + ) + return expr + + + +def simp_cmp_int_arg(_, expr): + """ + (0x10 <= R0) ? A:B + => + (R0 < 0x10) ? B:A + """ + cond = expr.cond + if not cond.is_op(): + return expr + op = cond.op + if op not in [ + TOK_EQUAL, + TOK_INF_SIGNED, + TOK_INF_EQUAL_SIGNED, + TOK_INF_UNSIGNED, + TOK_INF_EQUAL_UNSIGNED + ]: + return expr + arg1, arg2 = cond.args + if arg2.is_int(): + return expr + if not arg1.is_int(): + return expr + src1, src2 = expr.src1, expr.src2 + if op == TOK_EQUAL: + return ExprCond(ExprOp(TOK_EQUAL, arg2, arg1), src1, src2) + + arg1, arg2 = arg2, arg1 + src1, src2 = src2, src1 + if op == TOK_INF_SIGNED: + op = TOK_INF_EQUAL_SIGNED + elif op == TOK_INF_EQUAL_SIGNED: + op = TOK_INF_SIGNED + elif op == TOK_INF_UNSIGNED: + op = TOK_INF_EQUAL_UNSIGNED + elif op == TOK_INF_EQUAL_UNSIGNED: + op = TOK_INF_UNSIGNED + return ExprCond(ExprOp(op, arg1, arg2), src1, src2) + + +def simp_subwc_cf(_, expr): + """SUBWC_CF(A, B, SUB_CF(C, D)) => SUB_CF({A, C}, {B, D})""" + if not expr.is_op('FLAG_SUBWC_CF'): + return expr + op3 = expr.args[2] + if not op3.is_op("FLAG_SUB_CF"): + return expr + + op1 = ExprCompose(expr.args[0], op3.args[0]) + op2 = ExprCompose(expr.args[1], op3.args[1]) + + return ExprOp("FLAG_SUB_CF", op1, op2) + + +def simp_subwc_of(_, expr): + """SUBWC_OF(A, B, SUB_CF(C, D)) => SUB_OF({A, C}, {B, D})""" + if not expr.is_op('FLAG_SUBWC_OF'): + return expr + op3 = expr.args[2] + if not op3.is_op("FLAG_SUB_CF"): + return expr + + op1 = ExprCompose(expr.args[0], op3.args[0]) + op2 = ExprCompose(expr.args[1], op3.args[1]) + + return ExprOp("FLAG_SUB_OF", op1, op2) + + +def simp_sign_subwc_cf(_, expr): + """SIGN_SUBWC(A, B, SUB_CF(C, D)) => SIGN_SUB({A, C}, {B, D})""" + if not expr.is_op('FLAG_SIGN_SUBWC'): + return expr + op3 = expr.args[2] + if not op3.is_op("FLAG_SUB_CF"): + return expr + + op1 = ExprCompose(expr.args[0], op3.args[0]) + op2 = ExprCompose(expr.args[1], op3.args[1]) + + return ExprOp("FLAG_SIGN_SUB", op1, op2) + +def simp_double_zeroext(_, expr): + """A.zeroExt(X).zeroExt(Y) => A.zeroExt(Y)""" + if not (expr.is_op() and expr.op.startswith("zeroExt")): + return expr + arg1 = expr.args[0] + if not (arg1.is_op() and arg1.op.startswith("zeroExt")): + return expr + arg2 = arg1.args[0] + return ExprOp(expr.op, arg2) + +def simp_double_signext(_, expr): + """A.signExt(X).signExt(Y) => A.signExt(Y)""" + if not (expr.is_op() and expr.op.startswith("signExt")): + return expr + arg1 = expr.args[0] + if not (arg1.is_op() and arg1.op.startswith("signExt")): + return expr + arg2 = arg1.args[0] + return ExprOp(expr.op, arg2) + +def simp_zeroext_eq_cst(_, expr): + """A.zeroExt(X) == int => A == int[:A.size]""" + if not expr.is_op(TOK_EQUAL): + return expr + arg1, arg2 = expr.args + if not arg2.is_int(): + return expr + if not (arg1.is_op() and arg1.op.startswith("zeroExt")): + return expr + src = arg1.args[0] + if int(arg2) > (1 << src.size): + # Always false + return ExprInt(0, expr.size) + return ExprOp(TOK_EQUAL, src, ExprInt(int(arg2), src.size)) + +def simp_cond_zeroext(_, expr): + """ + X.zeroExt()?(A:B) => X ? A:B + X.signExt()?(A:B) => X ? A:B + """ + if not ( + expr.cond.is_op() and + ( + expr.cond.op.startswith("zeroExt") or + expr.cond.op.startswith("signExt") + ) + ): + return expr + + ret = ExprCond(expr.cond.args[0], expr.src1, expr.src2) + return ret + +def simp_ext_eq_ext(_, expr): + """ + A.zeroExt(X) == B.zeroExt(X) => A == B + A.signExt(X) == B.signExt(X) => A == B + """ + if not expr.is_op(TOK_EQUAL): + return expr + arg1, arg2 = expr.args + if (not ((arg1.is_op() and arg1.op.startswith("zeroExt") and + arg2.is_op() and arg2.op.startswith("zeroExt")) or + (arg1.is_op() and arg1.op.startswith("signExt") and + arg2.is_op() and arg2.op.startswith("signExt")))): + return expr + if arg1.args[0].size != arg2.args[0].size: + return expr + return ExprOp(TOK_EQUAL, arg1.args[0], arg2.args[0]) + +def simp_cond_eq_zero(_, expr): + """(X == 0)?(A:B) => X?(B:A)""" + cond = expr.cond + if not cond.is_op(TOK_EQUAL): + return expr + arg1, arg2 = cond.args + if not arg2.is_int(0): + return expr + new_expr = ExprCond(arg1, expr.src2, expr.src1) + return new_expr + +def simp_sign_inf_zeroext(expr_s, expr): + """ + /!\ Ensure before: X.zeroExt(X.size) => X + + X.zeroExt() 0 + X.zeroExt() <=s 0 => X == 0 + + X.zeroExt() X.zeroExt() X.zeroExt() <=u cst (cst positive) + + X.zeroExt() 0 (cst negative) + X.zeroExt() <=s cst => 0 (cst negative) + + """ + if not (expr.is_op(TOK_INF_SIGNED) or expr.is_op(TOK_INF_EQUAL_SIGNED)): + return expr + arg1, arg2 = expr.args + if not arg2.is_int(): + return expr + if not (arg1.is_op() and arg1.op.startswith("zeroExt")): + return expr + src = arg1.args[0] + assert src.size < arg1.size + + # If cst is zero + if arg2.is_int(0): + if expr.is_op(TOK_INF_SIGNED): + # X.zeroExt() 0 + return ExprInt(0, expr.size) + else: + # X.zeroExt() <=s 0 => X == 0 + return ExprOp(TOK_EQUAL, src, ExprInt(0, src.size)) + + # cst is not zero + cst = int(arg2) + if cst & (1 << (arg2.size - 1)): + # cst is negative + return ExprInt(0, expr.size) + # cst is positive + if expr.is_op(TOK_INF_SIGNED): + # X.zeroExt() X.zeroExt() X.zeroExt() <=u cst (cst positive) + return ExprOp(TOK_INF_EQUAL_UNSIGNED, src, expr_s(arg2[:src.size])) + + +def simp_zeroext_and_cst_eq_cst(expr_s, expr): + """ + A.zeroExt(X) & ... & int == int => A & ... & int[:A.size] == int[:A.size] + """ + if not expr.is_op(TOK_EQUAL): + return expr + arg1, arg2 = expr.args + if not arg2.is_int(): + return expr + if not arg1.is_op('&'): + return expr + is_ok = True + sizes = set() + for arg in arg1.args: + if arg.is_int(): + continue + if (arg.is_op() and + arg.op.startswith("zeroExt")): + sizes.add(arg.args[0].size) + continue + is_ok = False + break + if not is_ok: + return expr + if len(sizes) != 1: + return expr + size = list(sizes)[0] + if int(arg2) > ((1 << size) - 1): + return expr + args = [expr_s(arg[:size]) for arg in arg1.args] + left = ExprOp('&', *args) + right = expr_s(arg2[:size]) + ret = ExprOp(TOK_EQUAL, left, right) + return ret + + +def test_one_bit_set(arg): + """ + Return True if arg has form 1 << X + """ + return arg != 0 and ((arg & (arg - 1)) == 0) + +def simp_x_and_cst_eq_cst(_, expr): + """ + (x & ... & onebitmask == onebitmask) ? A:B => (x & ... & onebitmask) ? A:B + """ + cond = expr.cond + if not cond.is_op(TOK_EQUAL): + return expr + arg1, mask2 = cond.args + if not mask2.is_int(): + return expr + if not test_one_bit_set(int(mask2)): + return expr + if not arg1.is_op('&'): + return expr + mask1 = arg1.args[-1] + if mask1 != mask2: + return expr + cond = ExprOp('&', *arg1.args) + return ExprCond(cond, expr.src1, expr.src2) + +def simp_cmp_int_int(_, expr): + """ + IntA int + IntA int + IntA <=s IntB => int + IntA <=u IntB => int + IntA == IntB => int + """ + if expr.op not in [ + TOK_EQUAL, + TOK_INF_SIGNED, TOK_INF_UNSIGNED, + TOK_INF_EQUAL_SIGNED, TOK_INF_EQUAL_UNSIGNED, + ]: + return expr + if not all(arg.is_int() for arg in expr.args): + return expr + int_a, int_b = expr.args + if expr.is_op(TOK_EQUAL): + if int_a == int_b: + return ExprInt(1, 1) + return ExprInt(0, expr.size) + + if expr.op in [TOK_INF_SIGNED, TOK_INF_EQUAL_SIGNED]: + int_a = int(mod_size2int[int_a.size](int(int_a))) + int_b = int(mod_size2int[int_b.size](int(int_b))) + else: + int_a = int(mod_size2uint[int_a.size](int(int_a))) + int_b = int(mod_size2uint[int_b.size](int(int_b))) + + if expr.op in [TOK_INF_SIGNED, TOK_INF_UNSIGNED]: + ret = int_a < int_b + else: + ret = int_a <= int_b + + if ret: + ret = 1 + else: + ret = 0 + return ExprInt(ret, 1) + + +def simp_ext_cst(_, expr): + """ + Int.zeroExt(X) => Int + Int.signExt(X) => Int + """ + if not (expr.op.startswith("zeroExt") or expr.op.startswith("signExt")): + return expr + arg = expr.args[0] + if not arg.is_int(): + return expr + if expr.op.startswith("zeroExt"): + ret = int(arg) + else: + ret = int(mod_size2int[arg.size](int(arg))) + ret = ExprInt(ret, expr.size) + return ret + + +def simp_slice_of_ext(_, expr): + """ + C.zeroExt(X)[A:B] => 0 if A >= size(C) + C.zeroExt(X)[A:B] => C[A:B] if B <= size(C) + A.zeroExt(X)[0:Y] => A.zeroExt(Y) + """ + if not expr.arg.is_op(): + return expr + if not expr.arg.op.startswith("zeroExt"): + return expr + arg = expr.arg.args[0] + + if expr.start >= arg.size: + # C.zeroExt(X)[A:B] => 0 if A >= size(C) + return ExprInt(0, expr.size) + if expr.stop <= arg.size: + # C.zeroExt(X)[A:B] => C[A:B] if B <= size(C) + return arg[expr.start:expr.stop] + if expr.start == 0: + # A.zeroExt(X)[0:Y] => A.zeroExt(Y) + return arg.zeroExtend(expr.stop) + return expr + +def simp_slice_of_op_ext(expr_s, expr): + """ + (X.zeroExt() + {Z, } + ... + Int)[0:8] => X + ... + int[:] + (X.zeroExt() | ... | Int)[0:8] => X | ... | int[:] + ... + """ + if expr.start != 0: + return expr + src = expr.arg + if not src.is_op(): + return expr + if src.op not in ['+', '|', '^', '&']: + return expr + is_ok = True + for arg in src.args: + if arg.is_int(): + continue + if (arg.is_op() and + arg.op.startswith("zeroExt") and + arg.args[0].size == expr.stop): + continue + if arg.is_compose(): + continue + is_ok = False + break + if not is_ok: + return expr + args = [expr_s(arg[:expr.stop]) for arg in src.args] + return ExprOp(src.op, *args) + + +def simp_cond_logic_ext(expr_s, expr): + """(X.zeroExt() + ... + Int) ? A:B => X + ... + int[:] ? A:B""" + cond = expr.cond + if not cond.is_op(): + return expr + if cond.op not in ["&", "^", "|"]: + return expr + is_ok = True + sizes = set() + for arg in cond.args: + if arg.is_int(): + continue + if (arg.is_op() and + arg.op.startswith("zeroExt")): + sizes.add(arg.args[0].size) + continue + is_ok = False + break + if not is_ok: + return expr + if len(sizes) != 1: + return expr + size = list(sizes)[0] + args = [expr_s(arg[:size]) for arg in cond.args] + cond = ExprOp(cond.op, *args) + return ExprCond(cond, expr.src1, expr.src2) + + +def simp_cond_sign_bit(_, expr): + """(a & .. & 0x80000000) ? A:B => (a & ...) (a == b)?Y:X + (a^b)?X:Y => (a == b)?Y:X + """ + cond = expr.cond + if not cond.is_op(): + return expr + if cond.op not in ['+', '^']: + return expr + if len(cond.args) != 2: + return expr + arg1, arg2 = cond.args + if cond.is_op('+'): + new_cond = ExprOp('==', arg1, expr_s(-arg2)) + elif cond.is_op('^'): + new_cond = ExprOp('==', arg1, arg2) + else: + raise ValueError('Bad case') + return ExprCond(new_cond, expr.src2, expr.src1) + + +def simp_cond_eq_1_0(expr_s, expr): + """ + (a == b)?ExprInt(1, 1):ExprInt(0, 1) => a == b + (a a == b + ... + """ + cond = expr.cond + if not cond.is_op(): + return expr + if cond.op not in [ + TOK_EQUAL, + TOK_INF_SIGNED, TOK_INF_EQUAL_SIGNED, + TOK_INF_UNSIGNED, TOK_INF_EQUAL_UNSIGNED + ]: + return expr + if expr.src1 != ExprInt(1, 1) or expr.src2 != ExprInt(0, 1): + return expr + return cond + + +def simp_cond_inf_eq_unsigned_zero(expr_s, expr): + """ + (a <=u 0) => a == 0 + """ + if not expr.is_op(TOK_INF_EQUAL_UNSIGNED): + return expr + if not expr.args[1].is_int(0): + return expr + return ExprOp(TOK_EQUAL, expr.args[0], expr.args[1]) + + +def simp_test_signext_inf(expr_s, expr): + """A.signExt() A = (1 << (base.size - 1)) or + tmp < -(1 << (base.size - 1)) ): + return ExprInt(1, 1) + return expr + + +def simp_test_zeroext_inf(expr_s, expr): + """A.zeroExt() A = (1 << base.size): + return ExprInt(1, 1) + return expr + + +def simp_add_multiple(_, expr): + """ + X + X => 2 * X + X + X * int1 => X * (1 + int1) + X * int1 + (- X) => X * (int1 - 1) + X + (X << int1) => X * (1 + 2 ** int1) + Correct even if addition overflow/underflow + """ + if not expr.is_op('+'): + return expr + + # Extract each argument and its counter + operands = {} + for arg in expr.args: + if arg.is_op('*') and arg.args[1].is_int(): + base_expr, factor = arg.args + operands[base_expr] = operands.get(base_expr, 0) + int(factor) + elif arg.is_op('<<') and arg.args[1].is_int(): + base_expr, factor = arg.args + operands[base_expr] = operands.get(base_expr, 0) + 2 ** int(factor) + elif arg.is_op("-"): + arg = arg.args[0] + if arg.is_op('<<') and arg.args[1].is_int(): + base_expr, factor = arg.args + operands[base_expr] = operands.get(base_expr, 0) - (2 ** int(factor)) + else: + operands[arg] = operands.get(arg, 0) - 1 + else: + operands[arg] = operands.get(arg, 0) + 1 + out = [] + + # Best effort to factor common args: + # (a + b) * 3 + a + b => (a + b) * 4 + # Does not factor: + # (a + b) * 3 + 2 * a + b => (a + b) * 4 + a + modified = True + while modified: + modified = False + for arg, count in list(viewitems(operands)): + if not arg.is_op('+'): + continue + components = arg.args + if not all(component in operands for component in components): + continue + counters = set(operands[component] for component in components) + if len(counters) != 1: + continue + counter = counters.pop() + for component in components: + del operands[component] + operands[arg] += counter + modified = True + break + + for arg, count in viewitems(operands): + if count == 0: + continue + if count == 1: + out.append(arg) + continue + out.append(arg * ExprInt(count, expr.size)) + + if len(out) == len(expr.args): + # No reductions + return expr + if not out: + return ExprInt(0, expr.size) + if len(out) == 1: + return out[0] + return ExprOp('+', *out) diff --git a/miasm/expression/simplifications_cond.py b/miasm/expression/simplifications_cond.py new file mode 100644 index 00000000..6167cb4d --- /dev/null +++ b/miasm/expression/simplifications_cond.py @@ -0,0 +1,178 @@ +################################################################################ +# +# By choice, Miasm2 does not handle comparison as a single operation, but with +# operations corresponding to comparison computation. +# One may want to detect those comparison; this library is designed to add them +# in Miasm2 engine thanks to : +# - Conditions computation in ExprOp +# - Simplifications to catch known condition forms +# +# Conditions currently supported : +# ="): + op_cf, = args + return ~op_cf + + elif expr.is_op("CC_S<"): + op_nf, op_of = args + return op_nf ^ op_of + + elif expr.is_op("CC_S>"): + op_nf, op_of, op_zf = args + return ~(op_zf | (op_nf ^ op_of)) + + elif expr.is_op("CC_S<="): + op_nf, op_of, op_zf = args + return op_zf | (op_nf ^ op_of) + + elif expr.is_op("CC_S>="): + op_nf, op_of = args + return ~(op_nf ^ op_of) + + elif expr.is_op("CC_U>"): + op_cf, op_zf = args + return ~(op_cf | op_zf) + + elif expr.is_op("CC_U<"): + op_cf, = args + return op_cf + + elif expr.is_op("CC_NEG"): + op_nf, = args + return op_nf + + elif expr.is_op("CC_EQ"): + op_zf, = args + return op_zf + + elif expr.is_op("CC_NE"): + op_zf, = args + return ~op_zf + + elif expr.is_op("CC_POS"): + op_nf, = args + return ~op_nf + + return expr + diff --git a/miasm/expression/smt2_helper.py b/miasm/expression/smt2_helper.py new file mode 100644 index 00000000..53d323e8 --- /dev/null +++ b/miasm/expression/smt2_helper.py @@ -0,0 +1,296 @@ +# Helper functions for the generation of SMT2 expressions +# The SMT2 expressions will be returned as a string. +# The expressions are divided as follows +# +# - generic SMT2 operations +# - definitions of SMT2 structures +# - bit vector operations +# - array operations + +# generic SMT2 operations + +def smt2_eq(a, b): + """ + Assignment: a = b + """ + return "(= {} {})".format(a, b) + + +def smt2_implies(a, b): + """ + Implication: a => b + """ + return "(=> {} {})".format(a, b) + + +def smt2_and(*args): + """ + Conjunction: a and b and c ... + """ + # transform args into strings + args = [str(arg) for arg in args] + return "(and {})".format(' '.join(args)) + + +def smt2_or(*args): + """ + Disjunction: a or b or c ... + """ + # transform args into strings + args = [str(arg) for arg in args] + return "(or {})".format(' '.join(args)) + + +def smt2_ite(cond, a, b): + """ + If-then-else: cond ? a : b + """ + return "(ite {} {} {})".format(cond, a, b) + + +def smt2_distinct(*args): + """ + Distinction: a != b != c != ... + """ + # transform args into strings + args = [str(arg) for arg in args] + return "(distinct {})".format(' '.join(args)) + + +def smt2_assert(expr): + """ + Assertion that @expr holds + """ + return "(assert {})".format(expr) + + +# definitions + +def declare_bv(bv, size): + """ + Declares an bit vector @bv of size @size + """ + return "(declare-fun {} () {})".format(bv, bit_vec(size)) + + +def declare_array(a, bv1, bv2): + """ + Declares an SMT2 array represented as a map + from a bit vector to another bit vector. + :param a: array name + :param bv1: SMT2 bit vector + :param bv2: SMT2 bit vector + """ + return "(declare-fun {} () (Array {} {}))".format(a, bv1, bv2) + + +def bit_vec_val(v, size): + """ + Declares a bit vector value + :param v: int, value of the bit vector + :param size: size of the bit vector + """ + return "(_ bv{} {})".format(v, size) + + +def bit_vec(size): + """ + Returns a bit vector of size @size + """ + return "(_ BitVec {})".format(size) + + +# bit vector operations + +def bvadd(a, b): + """ + Addition: a + b + """ + return "(bvadd {} {})".format(a, b) + + +def bvsub(a, b): + """ + Subtraction: a - b + """ + return "(bvsub {} {})".format(a, b) + + +def bvmul(a, b): + """ + Multiplication: a * b + """ + return "(bvmul {} {})".format(a, b) + + +def bvand(a, b): + """ + Bitwise AND: a & b + """ + return "(bvand {} {})".format(a, b) + + +def bvor(a, b): + """ + Bitwise OR: a | b + """ + return "(bvor {} {})".format(a, b) + + +def bvxor(a, b): + """ + Bitwise XOR: a ^ b + """ + return "(bvxor {} {})".format(a, b) + + +def bvneg(bv): + """ + Unary minus: - bv + """ + return "(bvneg {})".format(bv) + + +def bvsdiv(a, b): + """ + Signed division: a / b + """ + return "(bvsdiv {} {})".format(a, b) + + +def bvudiv(a, b): + """ + Unsigned division: a / b + """ + return "(bvudiv {} {})".format(a, b) + + +def bvsmod(a, b): + """ + Signed modulo: a mod b + """ + return "(bvsmod {} {})".format(a, b) + + +def bvurem(a, b): + """ + Unsigned modulo: a mod b + """ + return "(bvurem {} {})".format(a, b) + + +def bvshl(a, b): + """ + Shift left: a << b + """ + return "(bvshl {} {})".format(a, b) + + +def bvlshr(a, b): + """ + Logical shift right: a >> b + """ + return "(bvlshr {} {})".format(a, b) + + +def bvashr(a, b): + """ + Arithmetic shift right: a a>> b + """ + return "(bvashr {} {})".format(a, b) + + +def bv_rotate_left(a, b, size): + """ + Rotates bits of a to the left b times: a <<< b + + Since ((_ rotate_left b) a) does not support + symbolic values for b, the implementation is + based on a C implementation. + + Therefore, the rotation will be computed as + a << (b & (size - 1))) | (a >> (size - (b & (size - 1)))) + + :param a: bit vector + :param b: bit vector + :param size: size of a + """ + + # define constant + s = bit_vec_val(size, size) + + # shift = b & (size - 1) + shift = bvand(b, bvsub(s, bit_vec_val(1, size))) + + # (a << shift) | (a >> size - shift) + rotate = bvor(bvshl(a, shift), + bvlshr(a, bvsub(s, shift))) + + return rotate + + +def bv_rotate_right(a, b, size): + """ + Rotates bits of a to the right b times: a >>> b + + Since ((_ rotate_right b) a) does not support + symbolic values for b, the implementation is + based on a C implementation. + + Therefore, the rotation will be computed as + a >> (b & (size - 1))) | (a << (size - (b & (size - 1)))) + + :param a: bit vector + :param b: bit vector + :param size: size of a + """ + + # define constant + s = bit_vec_val(size, size) + + # shift = b & (size - 1) + shift = bvand(b, bvsub(s, bit_vec_val(1, size))) + + # (a >> shift) | (a << size - shift) + rotate = bvor(bvlshr(a, shift), + bvshl(a, bvsub(s, shift))) + + return rotate + + +def bv_extract(high, low, bv): + """ + Extracts bits from a bit vector + :param high: end bit + :param low: start bit + :param bv: bit vector + """ + return "((_ extract {} {}) {})".format(high, low, bv) + + +def bv_concat(a, b): + """ + Concatenation of two SMT2 expressions + """ + return "(concat {} {})".format(a, b) + + +# array operations + +def array_select(array, index): + """ + Reads from an SMT2 array at index @index + :param array: SMT2 array + :param index: SMT2 expression, index of the array + """ + return "(select {} {})".format(array, index) + + +def array_store(array, index, value): + """ + Writes an value into an SMT2 array at address @index + :param array: SMT array + :param index: SMT2 expression, index of the array + :param value: SMT2 expression, value to write + """ + return "(store {} {} {})".format(array, index, value) diff --git a/miasm/ir/__init__.py b/miasm/ir/__init__.py new file mode 100644 index 00000000..0627b488 --- /dev/null +++ b/miasm/ir/__init__.py @@ -0,0 +1 @@ +"Intermediate representation methods" diff --git a/miasm/ir/analysis.py b/miasm/ir/analysis.py new file mode 100644 index 00000000..67df793e --- /dev/null +++ b/miasm/ir/analysis.py @@ -0,0 +1,113 @@ +#-*- coding:utf-8 -*- + +import warnings +import logging + +from miasm.ir.ir import IntermediateRepresentation, AssignBlock +from miasm.expression.expression import ExprOp, ExprAssign +from miasm.analysis.data_flow import dead_simp as new_dead_simp_imp + + +log = logging.getLogger("analysis") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.WARNING) + + +class ira(IntermediateRepresentation): + """IR Analysis + This class provides higher level manipulations on IR, such as dead + instruction removals. + + This class can be used as a common parent with + `miasm.ir.ir::IntermediateRepresentation` class. + + For instance: + class ira_x86_16(ir_x86_16, ira) + + """ + ret_reg = None + + def call_effects(self, addr, instr): + """Default modelisation of a function call to @addr. This may be used to: + + * insert dependencies to arguments (stack base, registers, ...) + * add some side effects (stack clean, return value, ...) + + Return a couple: + * list of assignments to add to the current irblock + * list of additional irblocks + + @addr: (Expr) address of the called function + @instr: native instruction which is responsible of the call + """ + + call_assignblk = AssignBlock( + [ + ExprAssign(self.ret_reg, ExprOp('call_func_ret', addr, self.sp)), + ExprAssign(self.sp, ExprOp('call_func_stack', addr, self.sp)) + ], + instr + ) + return [call_assignblk], [] + + def add_instr_to_current_state(self, instr, block, assignments, ir_blocks_all, gen_pc_updt): + """ + Add the IR effects of an instruction to the current state. + If the instruction is a function call, replace the original IR by a + model of the sub function + + Returns a bool: + * True if the current assignments list must be split + * False in other cases. + + @instr: native instruction + @block: native block source + @assignments: current irbloc + @ir_blocks_all: list of additional effects + @gen_pc_updt: insert PC update effects between instructions + """ + if instr.is_subcall(): + call_assignblks, extra_irblocks = self.call_effects( + instr.args[0], + instr + ) + assignments += call_assignblks + ir_blocks_all += extra_irblocks + return True + + if gen_pc_updt is not False: + self.gen_pc_update(assignments, instr) + + assignblk, ir_blocks_extra = self.instr2ir(instr) + assignments.append(assignblk) + ir_blocks_all += ir_blocks_extra + if ir_blocks_extra: + return True + return False + + def sizeof_char(self): + "Return the size of a char in bits" + raise NotImplementedError("Abstract method") + + def sizeof_short(self): + "Return the size of a short in bits" + raise NotImplementedError("Abstract method") + + def sizeof_int(self): + "Return the size of an int in bits" + raise NotImplementedError("Abstract method") + + def sizeof_long(self): + "Return the size of a long in bits" + raise NotImplementedError("Abstract method") + + def sizeof_pointer(self): + "Return the size of a void* in bits" + raise NotImplementedError("Abstract method") + + def dead_simp(self, ircfg): + """Deprecated: See miasm.analysis.data_flow.dead_simp()""" + warnings.warn('DEPRECATION WARNING: Please use miasm.analysis.data_flow.dead_simp(ira) instead of ira.dead_simp()') + new_dead_simp_imp(self, ircfg) diff --git a/miasm/ir/ir.py b/miasm/ir/ir.py new file mode 100644 index 00000000..eb9857b1 --- /dev/null +++ b/miasm/ir/ir.py @@ -0,0 +1,929 @@ +#-*- coding:utf-8 -*- + +# +# Copyright (C) 2013 Fabrice Desclaux +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +from builtins import zip +import warnings + +from itertools import chain +from future.utils import viewvalues, viewitems + +import miasm.expression.expression as m2_expr +from miasm.expression.expression_helper import get_missing_interval +from miasm.core.asmblock import AsmBlock, AsmConstraint +from miasm.core.graph import DiGraph +from functools import reduce + + +def _expr_loc_to_symb(expr, loc_db): + if not expr.is_loc(): + return expr + if loc_db is None: + name = str(expr) + else: + names = loc_db.get_location_names(expr.loc_key) + if not names: + name = loc_db.pretty_str(expr.loc_key) + else: + # Use only one name for readability + name = sorted(names)[0] + return m2_expr.ExprId(name, expr.size) + +class AssignBlock(object): + """Represent parallel IR assignment, such as: + EAX = EBX + EBX = EAX + + -> Exchange between EBX and EAX + + AssignBlock can be seen as a dictionary where keys are the destinations + (ExprId or ExprMem), and values their corresponding sources. + + Also provides common manipulation on this assignments. + + """ + __slots__ = ["_assigns", "_instr"] + + def __init__(self, irs=None, instr=None): + """Create a new AssignBlock + @irs: (optional) sequence of ExprAssign, or dictionary dst (Expr) -> src + (Expr) + @instr: (optional) associate an instruction with this AssignBlock + + """ + if irs is None: + irs = [] + self._instr = instr + self._assigns = {} # ExprAssign.dst -> ExprAssign.src + + # Concurrent assignments are handled in _set + if hasattr(irs, "items"): + for dst, src in viewitems(irs): + self._set(dst, src) + else: + for expraff in irs: + self._set(expraff.dst, expraff.src) + + @property + def instr(self): + """Return the associated instruction, if any""" + return self._instr + + def _set(self, dst, src): + """ + Special cases: + * if dst is an ExprSlice, expand it to assign the full Expression + * if dst already known, sources are merged + """ + if dst.size != src.size: + raise RuntimeError( + "sanitycheck: args must have same size! %s" % + ([(str(arg), arg.size) for arg in [dst, src]])) + + if isinstance(dst, m2_expr.ExprSlice): + # Complete the source with missing slice parts + new_dst = dst.arg + rest = [(m2_expr.ExprSlice(dst.arg, r[0], r[1]), r[0], r[1]) + for r in dst.slice_rest()] + all_a = [(src, dst.start, dst.stop)] + rest + all_a.sort(key=lambda x: x[1]) + args = [expr for (expr, _, _) in all_a] + new_src = m2_expr.ExprCompose(*args) + else: + new_dst, new_src = dst, src + + if new_dst in self._assigns and isinstance(new_src, m2_expr.ExprCompose): + if not isinstance(self[new_dst], m2_expr.ExprCompose): + # prev_RAX = 0x1122334455667788 + # input_RAX[0:8] = 0x89 + # final_RAX -> ? (assignment are in parallel) + raise RuntimeError("Concurrent access on same bit not allowed") + + # Consider slice grouping + expr_list = [(new_dst, new_src), + (new_dst, self[new_dst])] + # Find collision + e_colision = reduce(lambda x, y: x.union(y), + (self.get_modified_slice(dst, src) + for (dst, src) in expr_list), + set()) + + # Sort interval collision + known_intervals = sorted([(x[1], x[2]) for x in e_colision]) + + for i, (_, stop) in enumerate(known_intervals[:-1]): + if stop > known_intervals[i + 1][0]: + raise RuntimeError( + "Concurrent access on same bit not allowed") + + # Fill with missing data + missing_i = get_missing_interval(known_intervals, 0, new_dst.size) + remaining = ((m2_expr.ExprSlice(new_dst, *interval), + interval[0], + interval[1]) + for interval in missing_i) + + # Build the merging expression + args = list(e_colision.union(remaining)) + args.sort(key=lambda x: x[1]) + starts = [start for (_, start, _) in args] + assert len(set(starts)) == len(starts) + args = [expr for (expr, _, _) in args] + new_src = m2_expr.ExprCompose(*args) + + # Sanity check + if not isinstance(new_dst, (m2_expr.ExprId, m2_expr.ExprMem)): + raise TypeError("Destination cannot be a %s" % type(new_dst)) + + self._assigns[new_dst] = new_src + + def __setitem__(self, dst, src): + raise RuntimeError('AssignBlock is immutable') + + def __getitem__(self, key): + return self._assigns[key] + + def __contains__(self, key): + return key in self._assigns + + def iteritems(self): + for dst, src in viewitems(self._assigns): + yield dst, src + + def items(self): + return [(dst, src) for dst, src in viewitems(self._assigns)] + + def itervalues(self): + for src in viewvalues(self._assigns): + yield src + + def keys(self): + return list(self._assigns) + + def values(self): + return list(viewvalues(self._assigns)) + + def __iter__(self): + for dst in self._assigns: + yield dst + + def __delitem__(self, _): + raise RuntimeError('AssignBlock is immutable') + + def update(self, _): + raise RuntimeError('AssignBlock is immutable') + + def __eq__(self, other): + if set(self.keys()) != set(other.keys()): + return False + return all(other[dst] == src for dst, src in viewitems(self)) + + def __ne__(self, other): + return not self == other + + def __len__(self): + return len(self._assigns) + + def get(self, key, default): + return self._assigns.get(key, default) + + @staticmethod + def get_modified_slice(dst, src): + """Return an Expr list of extra expressions needed during the + object instantiation""" + if not isinstance(src, m2_expr.ExprCompose): + raise ValueError("Get mod slice not on expraff slice", str(src)) + modified_s = [] + for index, arg in src.iter_args(): + if not (isinstance(arg, m2_expr.ExprSlice) and + arg.arg == dst and + index == arg.start and + index+arg.size == arg.stop): + # If x is not the initial expression + modified_s.append((arg, index, index+arg.size)) + return modified_s + + def get_w(self): + """Return a set of elements written""" + return set(self.keys()) + + def get_rw(self, mem_read=False, cst_read=False): + """Return a dictionary associating written expressions to a set of + their read requirements + @mem_read: (optional) mem_read argument of `get_r` + @cst_read: (optional) cst_read argument of `get_r` + """ + out = {} + for dst, src in viewitems(self): + src_read = src.get_r(mem_read=mem_read, cst_read=cst_read) + if isinstance(dst, m2_expr.ExprMem) and mem_read: + # Read on destination happens only with ExprMem + src_read.update(dst.ptr.get_r(mem_read=mem_read, + cst_read=cst_read)) + out[dst] = src_read + return out + + def get_r(self, mem_read=False, cst_read=False): + """Return a set of elements reads + @mem_read: (optional) mem_read argument of `get_r` + @cst_read: (optional) cst_read argument of `get_r` + """ + return set( + chain.from_iterable( + viewvalues( + self.get_rw( + mem_read=mem_read, + cst_read=cst_read + ) + ) + ) + ) + + def __str__(self): + out = [] + for dst, src in sorted(viewitems(self._assigns)): + out.append("%s = %s" % (dst, src)) + return "\n".join(out) + + def dst2ExprAssign(self, dst): + """Return an ExprAssign corresponding to @dst equation + @dst: Expr instance""" + return m2_expr.ExprAssign(dst, self[dst]) + + def simplify(self, simplifier): + """ + Return a new AssignBlock with expression simplified + + @simplifier: ExpressionSimplifier instance + """ + new_assignblk = {} + for dst, src in viewitems(self): + if dst == src: + continue + new_src = simplifier(src) + new_dst = simplifier(dst) + new_assignblk[new_dst] = new_src + return AssignBlock(irs=new_assignblk, instr=self.instr) + + def to_string(self, loc_db=None): + out = [] + for dst, src in viewitems(self): + new_src = src.visit(lambda expr:_expr_loc_to_symb(expr, loc_db)) + new_dst = dst.visit(lambda expr:_expr_loc_to_symb(expr, loc_db)) + line = "%s = %s" % (new_dst, new_src) + out.append(line) + out.append("") + return "\n".join(out) + +class IRBlock(object): + """Intermediate representation block object. + + Stand for an intermediate representation basic block. + """ + + __slots__ = ["_loc_key", "_assignblks", "_dst", "_dst_linenb"] + + def __init__(self, loc_key, assignblks): + """ + @loc_key: LocKey of the IR basic block + @assignblks: list of AssignBlock + """ + + assert isinstance(loc_key, m2_expr.LocKey) + self._loc_key = loc_key + for assignblk in assignblks: + assert isinstance(assignblk, AssignBlock) + self._assignblks = tuple(assignblks) + self._dst = None + self._dst_linenb = None + + def __eq__(self, other): + if self.__class__ is not other.__class__: + return False + if self.loc_key != other.loc_key: + return False + if len(self.assignblks) != len(other.assignblks): + return False + for assignblk1, assignblk2 in zip(self.assignblks, other.assignblks): + if assignblk1 != assignblk2: + return False + return True + + def __ne__(self, other): + return not self == other + + def get_label(self): + warnings.warn('DEPRECATION WARNING: use ".loc_key" instead of ".label"') + return self.loc_key + + loc_key = property(lambda self:self._loc_key) + label = property(get_label) + + @property + def assignblks(self): + return self._assignblks + + @property + def irs(self): + warnings.warn('DEPRECATION WARNING: use "irblock.assignblks" instead of "irblock.irs"') + return self._assignblks + + def __iter__(self): + """Iterate on assignblks""" + return self._assignblks.__iter__() + + def __getitem__(self, index): + """Getitem on assignblks""" + return self._assignblks.__getitem__(index) + + def __len__(self): + """Length of assignblks""" + return self._assignblks.__len__() + + def is_dst_set(self): + return self._dst is not None + + def cache_dst(self): + final_dst = None + final_linenb = None + for linenb, assignblk in enumerate(self): + for dst, src in viewitems(assignblk): + if dst.is_id("IRDst"): + if final_dst is not None: + raise ValueError('Multiple destinations!') + final_dst = src + final_linenb = linenb + self._dst = final_dst + self._dst_linenb = final_linenb + return final_dst + + @property + def dst(self): + """Return the value of IRDst for the IRBlock""" + if self.is_dst_set(): + return self._dst + return self.cache_dst() + + def set_dst(self, value): + """Generate a new IRBlock with a dst (IRBlock) fixed to @value""" + irs = [] + dst_found = False + for assignblk in self: + new_assignblk = {} + for dst, src in viewitems(assignblk): + if dst.is_id("IRDst"): + assert dst_found is False + dst_found = True + new_assignblk[dst] = value + else: + new_assignblk[dst] = src + irs.append(AssignBlock(new_assignblk, assignblk.instr)) + return IRBlock(self.loc_key, irs) + + @property + def dst_linenb(self): + """Line number of the IRDst setting statement in the current irs""" + if not self.is_dst_set(): + self.cache_dst() + return self._dst_linenb + + def __str__(self): + out = [] + out.append(str(self.loc_key)) + for assignblk in self: + for dst, src in viewitems(assignblk): + out.append('\t%s = %s' % (dst, src)) + out.append("") + return "\n".join(out) + + + def modify_exprs(self, mod_dst=None, mod_src=None): + """ + Generate a new IRBlock with its AssignBlock expressions modified + according to @mod_dst and @mod_src + @mod_dst: function called to modify Expression destination + @mod_src: function called to modify Expression source + """ + + if mod_dst is None: + mod_dst = lambda expr:expr + if mod_src is None: + mod_src = lambda expr:expr + + assignblks = [] + for assignblk in self: + new_assignblk = {} + for dst, src in viewitems(assignblk): + new_assignblk[mod_dst(dst)] = mod_src(src) + assignblks.append(AssignBlock(new_assignblk, assignblk.instr)) + return IRBlock(self.loc_key, assignblks) + + def to_string(self, loc_db=None): + out = [] + if loc_db is None: + node_name = "%s:" % self.loc_key + else: + names = loc_db.get_location_names(self.loc_key) + if not names: + node_name = "%s:" % loc_db.pretty_str(self.loc_key) + else: + node_name = "".join("%s:\n" % name for name in names) + out.append(node_name) + + for assignblk in self: + out.append(assignblk.to_string(loc_db)) + return '\n'.join(out) + + + def simplify(self, simplifier): + """ + Simplify expressions in each assignblock + @simplifier: ExpressionSimplifier instance + """ + modified = False + assignblks = [] + for assignblk in self: + new_assignblk = assignblk.simplify(simplifier) + if assignblk != new_assignblk: + modified = True + assignblks.append(new_assignblk) + return modified, IRBlock(self.loc_key, assignblks) + + +class irbloc(IRBlock): + """ + DEPRECATED object + Use IRBlock instead of irbloc + """ + + def __init__(self, loc_key, irs, lines=None): + warnings.warn('DEPRECATION WARNING: use "IRBlock" instead of "irblock"') + super(irbloc, self).__init__(loc_key, irs) + + +class IRCFG(DiGraph): + + """DiGraph for IR instances""" + + def __init__(self, irdst, loc_db, blocks=None, *args, **kwargs): + """Instantiate a IRCFG + @loc_db: LocationDB instance + @blocks: IR blocks + """ + self.loc_db = loc_db + if blocks is None: + blocks = {} + self._blocks = blocks + self._irdst = irdst + super(IRCFG, self).__init__(*args, **kwargs) + + @property + def IRDst(self): + return self._irdst + + @property + def blocks(self): + return self._blocks + + def add_irblock(self, irblock): + """ + Add the @irblock to the current IRCFG + @irblock: IRBlock instance + """ + self.blocks[irblock.loc_key] = irblock + self.add_node(irblock.loc_key) + + for dst in self.dst_trackback(irblock): + if dst.is_int(): + dst_loc_key = self.loc_db.get_or_create_offset_location(int(dst)) + dst = m2_expr.ExprLoc(dst_loc_key, irblock.dst.size) + if dst.is_loc(): + self.add_uniq_edge(irblock.loc_key, dst.loc_key) + + def node2lines(self, node): + if self.loc_db is None: + node_name = str(node) + else: + node_name = self.loc_db.pretty_str(node) + yield self.DotCellDescription( + text="%s" % node_name, + attr={ + 'align': 'center', + 'colspan': 2, + 'bgcolor': 'grey', + } + ) + if node not in self._blocks: + yield [self.DotCellDescription(text="NOT PRESENT", attr={})] + return + for i, assignblk in enumerate(self._blocks[node]): + for dst, src in viewitems(assignblk): + + new_src = src.visit(lambda expr:_expr_loc_to_symb(expr, self.loc_db)) + new_dst = dst.visit(lambda expr:_expr_loc_to_symb(expr, self.loc_db)) + line = "%s = %s" % (new_dst, new_src) + if self._dot_offset: + yield [self.DotCellDescription(text="%-4d" % i, attr={}), + self.DotCellDescription(text=line, attr={})] + else: + yield self.DotCellDescription(text=line, attr={}) + yield self.DotCellDescription(text="", attr={}) + + def edge_attr(self, src, dst): + if src not in self._blocks or dst not in self._blocks: + return {} + src_irdst = self._blocks[src].dst + edge_color = "blue" + if isinstance(src_irdst, m2_expr.ExprCond): + src1, src2 = src_irdst.src1, src_irdst.src2 + if src1.is_loc(dst): + edge_color = "limegreen" + elif src2.is_loc(dst): + edge_color = "red" + return {"color": edge_color} + + def node_attr(self, node): + if node not in self._blocks: + return {'style': 'filled', 'fillcolor': 'red'} + return {} + + def dot(self, offset=False): + """ + @offset: (optional) if set, add the corresponding line number in each + node + """ + self._dot_offset = offset + return super(IRCFG, self).dot() + + def get_loc_key(self, addr): + """Transforms an ExprId/ExprInt/loc_key/int into a loc_key + @addr: an ExprId/ExprInt/loc_key/int""" + + if isinstance(addr, m2_expr.LocKey): + return addr + elif isinstance(addr, m2_expr.ExprLoc): + return addr.loc_key + + try: + addr = int(addr) + except (ValueError, TypeError): + return None + + return self.loc_db.get_offset_location(addr) + + + def get_or_create_loc_key(self, addr): + """Transforms an ExprId/ExprInt/loc_key/int into a loc_key + If the offset @addr is not in the LocationDB, create it + @addr: an ExprId/ExprInt/loc_key/int""" + + loc_key = self.get_loc_key(addr) + if loc_key is not None: + return loc_key + + return self.loc_db.add_location(offset=int(addr)) + + def get_block(self, addr): + """Returns the irbloc associated to an ExprId/ExprInt/loc_key/int + @addr: an ExprId/ExprInt/loc_key/int""" + + loc_key = self.get_loc_key(addr) + if loc_key is None: + return None + return self.blocks.get(loc_key, None) + + def getby_offset(self, offset): + """ + Return the set of loc_keys of irblocks containing @offset + @offset: address + """ + out = set() + for irb in viewvalues(self.blocks): + for assignblk in irb: + instr = assignblk.instr + if instr is None: + continue + if instr.offset <= offset < instr.offset + instr.l: + out.add(irb.loc_key) + return out + + + def simplify(self, simplifier): + """ + Simplify expressions in each irblocks + @simplifier: ExpressionSimplifier instance + """ + modified = False + for loc_key, block in list(viewitems(self.blocks)): + assignblks = [] + for assignblk in block: + new_assignblk = assignblk.simplify(simplifier) + if assignblk != new_assignblk: + modified = True + assignblks.append(new_assignblk) + self.blocks[loc_key] = IRBlock(loc_key, assignblks) + return modified + + def get_rw(self, regs_ids=None): + """ + Calls get_rw(irb) for each bloc + @regs_ids : ids of registers used in IR + """ + if regs_ids is None: + regs_ids = [] + for irblock in viewvalues(self.blocks): + irblock.get_rw(regs_ids) + + def _extract_dst(self, todo, done): + """ + Naive extraction of @todo destinations + WARNING: @todo and @done are modified + """ + out = set() + while todo: + dst = todo.pop() + if dst.is_loc(): + done.add(dst) + elif dst.is_mem() or dst.is_int(): + done.add(dst) + elif dst.is_cond(): + todo.add(dst.src1) + todo.add(dst.src2) + elif dst.is_id(): + out.add(dst) + else: + done.add(dst) + return out + + def dst_trackback(self, irb): + """ + Naive backtracking of IRDst + @irb: irbloc instance + """ + todo = set([irb.dst]) + done = set() + + for assignblk in reversed(irb): + if not todo: + break + out = self._extract_dst(todo, done) + found = set() + follow = set() + for dst in out: + if dst in assignblk: + follow.add(assignblk[dst]) + found.add(dst) + + follow.update(out.difference(found)) + todo = follow + + return done + + +class DiGraphIR(IRCFG): + """ + DEPRECATED object + Use IRCFG instead of DiGraphIR + """ + + def __init__(self, *args, **kwargs): + warnings.warn('DEPRECATION WARNING: use "IRCFG" instead of "DiGraphIR"') + raise NotImplementedError("Depreceated") + + +class IntermediateRepresentation(object): + """ + Intermediate representation object + + Allow native assembly to intermediate representation traduction + """ + + def __init__(self, arch, attrib, loc_db): + self.pc = arch.getpc(attrib) + self.sp = arch.getsp(attrib) + self.arch = arch + self.attrib = attrib + self.loc_db = loc_db + self.IRDst = None + + def get_ir(self, instr): + raise NotImplementedError("Abstract Method") + + def new_ircfg(self, *args, **kwargs): + """ + Return a new instance of IRCFG + """ + return IRCFG(self.IRDst, self.loc_db, *args, **kwargs) + + def new_ircfg_from_asmcfg(self, asmcfg, *args, **kwargs): + """ + Return a new instance of IRCFG from an @asmcfg + @asmcfg: AsmCFG instance + """ + + ircfg = IRCFG(self.IRDst, self.loc_db, *args, **kwargs) + for block in asmcfg.blocks: + self.add_asmblock_to_ircfg(block, ircfg) + return ircfg + + def instr2ir(self, instr): + ir_bloc_cur, extra_irblocks = self.get_ir(instr) + for index, irb in enumerate(extra_irblocks): + irs = [] + for assignblk in irb: + irs.append(AssignBlock(assignblk, instr)) + extra_irblocks[index] = IRBlock(irb.loc_key, irs) + assignblk = AssignBlock(ir_bloc_cur, instr) + return assignblk, extra_irblocks + + def add_instr_to_ircfg(self, instr, ircfg, loc_key=None, gen_pc_updt=False): + """ + Add the native instruction @instr to the @ircfg + @instr: instruction instance + @ircfg: IRCFG instance + @loc_key: loc_key instance of the instruction destination + @gen_pc_updt: insert PC update effects between instructions + """ + + if loc_key is None: + offset = getattr(instr, "offset", None) + loc_key = self.loc_db.add_location(offset=offset) + block = AsmBlock(loc_key) + block.lines = [instr] + self.add_asmblock_to_ircfg(block, ircfg, gen_pc_updt) + return loc_key + + def gen_pc_update(self, assignments, instr): + offset = m2_expr.ExprInt(instr.offset, self.pc.size) + assignments.append(AssignBlock({self.pc:offset}, instr)) + + def add_instr_to_current_state(self, instr, block, assignments, ir_blocks_all, gen_pc_updt): + """ + Add the IR effects of an instruction to the current state. + + Returns a bool: + * True if the current assignments list must be split + * False in other cases. + + @instr: native instruction + @block: native block source + @assignments: list of current AssignBlocks + @ir_blocks_all: list of additional effects + @gen_pc_updt: insert PC update effects between instructions + """ + if gen_pc_updt is not False: + self.gen_pc_update(assignments, instr) + + assignblk, ir_blocks_extra = self.instr2ir(instr) + assignments.append(assignblk) + ir_blocks_all += ir_blocks_extra + if ir_blocks_extra: + return True + return False + + def add_asmblock_to_ircfg(self, block, ircfg, gen_pc_updt=False): + """ + Add a native block to the current IR + @block: native assembly block + @ircfg: IRCFG instance + @gen_pc_updt: insert PC update effects between instructions + """ + + loc_key = block.loc_key + ir_blocks_all = [] + + assignments = [] + for instr in block.lines: + if loc_key is None: + assignments = [] + loc_key = self.get_loc_key_for_instr(instr) + split = self.add_instr_to_current_state( + instr, block, assignments, + ir_blocks_all, gen_pc_updt + ) + if split: + ir_blocks_all.append(IRBlock(loc_key, assignments)) + loc_key = None + assignments = [] + if loc_key is not None: + ir_blocks_all.append(IRBlock(loc_key, assignments)) + + new_ir_blocks_all = self.post_add_asmblock_to_ircfg(block, ircfg, ir_blocks_all) + for irblock in new_ir_blocks_all: + ircfg.add_irblock(irblock) + return new_ir_blocks_all + + def add_block(self, block, gen_pc_updt=False): + """ + DEPRECATED function + Use add_block instead of add_block + """ + warnings.warn("""DEPRECATION WARNING + ircfg is now out of IntermediateRepresentation + Use: + ircfg = ir_arch.new_ircfg() + ir_arch.add_asmblock_to_ircfg(block, ircfg) + """) + raise RuntimeError("API Deprecated") + + def add_bloc(self, block, gen_pc_updt=False): + """ + DEPRECATED function + Use add_block instead of add_block + """ + self.add_block(block, gen_pc_updt) + + def get_next_loc_key(self, instr): + loc_key = self.loc_db.get_or_create_offset_location(instr.offset + instr.l) + return loc_key + + def get_loc_key_for_instr(self, instr): + """Returns the loc_key associated to an instruction + @instr: current instruction""" + return self.loc_db.get_or_create_offset_location(instr.offset) + + def gen_loc_key_and_expr(self, size): + """ + Return a loc_key and it's corresponding ExprLoc + @size: size of expression + """ + loc_key = self.loc_db.add_location() + return loc_key, m2_expr.ExprLoc(loc_key, size) + + def expr_fix_regs_for_mode(self, expr, *args, **kwargs): + return expr + + def expraff_fix_regs_for_mode(self, expr, *args, **kwargs): + return expr + + def irbloc_fix_regs_for_mode(self, irblock, *args, **kwargs): + return irblock + + def is_pc_written(self, block): + """Return the first Assignblk of the @blockin which PC is written + @block: IRBlock instance""" + all_pc = viewvalues(self.arch.pc) + for assignblk in block: + if assignblk.dst in all_pc: + return assignblk + return None + + def set_empty_dst_to_next(self, block, ir_blocks): + for index, irblock in enumerate(ir_blocks): + if irblock.dst is not None: + continue + next_loc_key = block.get_next() + if next_loc_key is None: + loc_key = None + if block.lines: + line = block.lines[-1] + if line.offset is not None: + loc_key = self.loc_db.get_or_create_offset_location(line.offset + line.l) + if loc_key is None: + loc_key = self.loc_db.add_location() + block.add_cst(loc_key, AsmConstraint.c_next) + else: + loc_key = next_loc_key + dst = m2_expr.ExprLoc(loc_key, self.pc.size) + if irblock.assignblks: + instr = irblock.assignblks[-1].instr + else: + instr = None + assignblk = AssignBlock({self.IRDst: dst}, instr) + ir_blocks[index] = IRBlock(irblock.loc_key, list(irblock.assignblks) + [assignblk]) + + def post_add_asmblock_to_ircfg(self, block, ircfg, ir_blocks): + self.set_empty_dst_to_next(block, ir_blocks) + + new_irblocks = [] + for irblock in ir_blocks: + new_irblock = self.irbloc_fix_regs_for_mode(irblock, self.attrib) + ircfg.add_irblock(new_irblock) + new_irblocks.append(new_irblock) + return new_irblocks + + +class ir(IntermediateRepresentation): + """ + DEPRECATED object + Use IntermediateRepresentation instead of ir + """ + + def __init__(self, loc_key, irs, lines=None): + warnings.warn('DEPRECATION WARNING: use "IntermediateRepresentation" instead of "ir"') + super(ir, self).__init__(loc_key, irs, lines) diff --git a/miasm/ir/symbexec.py b/miasm/ir/symbexec.py new file mode 100644 index 00000000..23df0ffb --- /dev/null +++ b/miasm/ir/symbexec.py @@ -0,0 +1,1124 @@ +from __future__ import print_function +from builtins import range +import logging +from collections import MutableMapping + +from future.utils import viewitems + +from miasm.expression.expression import ExprOp, ExprId, ExprLoc, ExprInt, \ + ExprMem, ExprCompose, ExprSlice, ExprCond +from miasm.expression.simplifications import expr_simp_explicit +from miasm.ir.ir import AssignBlock + +log = logging.getLogger("symbexec") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.INFO) + + +def get_block(ir_arch, ircfg, mdis, addr): + """Get IRBlock at address @addr""" + loc_key = ircfg.get_or_create_loc_key(addr) + if not loc_key in ircfg.blocks: + offset = mdis.loc_db.get_location_offset(loc_key) + block = mdis.dis_block(offset) + ir_arch.add_asmblock_to_ircfg(block, ircfg) + irblock = ircfg.get_block(loc_key) + if irblock is None: + raise LookupError('No block found at that address: %s' % ir_arch.loc_db.pretty_str(loc_key)) + return irblock + + +class StateEngine(object): + """Stores an Engine state""" + + def merge(self, other): + """Generate a new state, representing the merge of self and @other + @other: a StateEngine instance""" + + raise NotImplementedError("Abstract method") + + +class SymbolicState(StateEngine): + """Stores a SymbolicExecutionEngine state""" + + def __init__(self, dct): + self._symbols = frozenset(viewitems(dct)) + + def __hash__(self): + return hash((self.__class__, self._symbols)) + + def __eq__(self, other): + if self is other: + return True + if self.__class__ != other.__class__: + return False + return self.symbols == other.symbols + + def __ne__(self, other): + return not self == other + + def __iter__(self): + for dst, src in self._symbols: + yield dst, src + + def iteritems(self): + """Iterate on stored memory/values""" + return self.__iter__() + + def merge(self, other): + """Merge two symbolic states + Only equal expressions are kept in both states + @other: second symbolic state + """ + + symb_a = self.symbols + symb_b = other.symbols + intersection = set(symb_a).intersection(set(symb_b)) + out = {} + for dst in intersection: + if symb_a[dst] == symb_b[dst]: + out[dst] = symb_a[dst] + return self.__class__(out) + + @property + def symbols(self): + """Return the dictionary of known symbols""" + return dict(self._symbols) + + +INTERNAL_INTBASE_NAME = "__INTERNAL_INTBASE__" + + +def get_expr_base_offset(expr): + """Return a couple representing the symbolic/concrete part of an addition + expression. + + If there is no symbolic part, ExprId(INTERNAL_INTBASE_NAME) is used + If there is not concrete part, 0 is used + @expr: Expression instance + + """ + if expr.is_int(): + internal_intbase = ExprId(INTERNAL_INTBASE_NAME, expr.size) + return internal_intbase, int(expr) + + if not expr.is_op('+'): + return expr, 0 + if expr.args[-1].is_int(): + args, offset = expr.args[:-1], int(expr.args[-1]) + if len(args) == 1: + return args[0], offset + return ExprOp('+', *args), offset + return expr, 0 + + +class MemArray(MutableMapping): + """Link between base and its (offset, Expr) + + Given an expression (say *base*), this structure will store every memory + content relatively to an integer offset from *base*. + + The value associated to a given offset is a description of the slice of a + stored expression. The slice size depends on the configutation of the + MemArray. For example, for a slice size of 8 bits, the assignment: + - @32[EAX+0x10] = EBX + + will store for the base EAX: + - 0x10: (EBX, 0) + - 0x11: (EBX, 1) + - 0x12: (EBX, 2) + - 0x13: (EBX, 3) + + If the *base* is EAX+EBX, this structure can store the following contents: + - @32[EAX+EBX] + - @8[EAX+EBX+0x100] + But not: + - @32[EAX+0x10] (which is stored in another MemArray based on EAX) + - @32[EAX+EBX+ECX] (which is stored in another MemArray based on + EAX+EBX+ECX) + + """ + + def __init__(self, base, expr_simp=expr_simp_explicit): + self._base = base + self.expr_simp = expr_simp + self._mask = int(base.mask) + self._offset_to_expr = {} + + @property + def base(self): + """Expression representing the symbolic base address""" + return self._base + + @property + def mask(self): + """Mask offset""" + return self._mask + + def __contains__(self, offset): + return offset in self._offset_to_expr + + def __getitem__(self, offset): + assert 0 <= offset <= self._mask + return self._offset_to_expr.__getitem__(offset) + + def __setitem__(self, offset, value): + raise RuntimeError("Use write api to update keys") + + def __delitem__(self, offset): + assert 0 <= offset <= self._mask + return self._offset_to_expr.__delitem__(offset) + + def __iter__(self): + for offset, _ in viewitems(self._offset_to_expr): + yield offset + + def __len__(self): + return len(self._offset_to_expr) + + def __repr__(self): + out = [] + out.append("Base: %s" % self.base) + for offset, (index, value) in sorted(viewitems(self._offset_to_expr)): + out.append("%16X %d %s" % (offset, index, value)) + return '\n'.join(out) + + def copy(self): + """Copy object instance""" + obj = MemArray(self.base, self.expr_simp) + obj._offset_to_expr = self._offset_to_expr.copy() + return obj + + @staticmethod + def offset_to_ptr(base, offset): + """ + Return an expression representing the @base + @offset + @base: symbolic base address + @offset: relative offset integer to the @base address + """ + if base.is_id(INTERNAL_INTBASE_NAME): + ptr = ExprInt(offset, base.size) + elif offset == 0: + ptr = base + else: + ptr = base + ExprInt(offset, base.size) + return ptr.canonize() + + def read(self, offset, size): + """ + Return memory at @offset with @size as an Expr list + @offset: integer (in bytes) + @size: integer (in bits), byte aligned + + Consider the following state: + - 0x10: (EBX, 0) + - 0x11: (EBX, 1) + - 0x12: (EBX, 2) + - 0x13: (EBX, 3) + + A read at 0x10 of 32 bits should return: EBX + """ + + assert size % 8 == 0 + # Parts is (Expr's offset, size, Expr) + parts = [] + for index in range(size // 8): + # Wrap read: + # @32[EAX+0xFFFFFFFF] is ok and will read at 0xFFFFFFFF, 0, 1, 2 + request_offset = (offset + index) & self._mask + if request_offset in self._offset_to_expr: + # Known memory portion + off, data = self._offset_to_expr[request_offset] + parts.append((off, 1, data)) + continue + + # Unknown memory portion + ptr = self.offset_to_ptr(self.base, request_offset) + data = ExprMem(ptr, 8) + parts.append((0, 1, data)) + + # Group similar datas + # XXX TODO: only little endian here + index = 0 + while index + 1 < len(parts): + off_a, size_a, data_a = parts[index] + off_b, size_b, data_b = parts[index+1] + if data_a == data_b and off_a + size_a == off_b: + # Read consecutive bytes of a variable + # [(0, 8, x), (1, 8, x)] => (0, 16, x) + parts[index:index+2] = [(off_a, size_a + size_b, data_a)] + continue + if data_a.is_int() and data_b.is_int(): + # Read integer parts + # [(0, 8, 0x11223344), (1, 8, 0x55667788)] => (0, 16, 0x7744) + int1 = self.expr_simp(data_a[off_a*8:(off_a+size_a)*8]) + int2 = self.expr_simp(data_b[off_b*8:(off_b+size_b)*8]) + assert int1.is_int() and int2.is_int() + int1, int2 = int(int1), int(int2) + result = ExprInt((int2 << (size_a * 8)) | int1, (size_a + size_b) * 8) + parts[index:index+2] = [(0, size_a + size_b, result)] + continue + if data_a.is_mem() and data_b.is_mem(): + # Read consecutive bytes of a memory variable + ptr_base_a, ptr_offset_a = get_expr_base_offset(data_a.ptr) + ptr_base_b, ptr_offset_b = get_expr_base_offset(data_b.ptr) + if ptr_base_a != ptr_base_b: + index += 1 + continue + if (ptr_offset_a + off_a + size_a) & self._mask == (ptr_offset_b + off_b) & self._mask: + assert size_a <= data_a.size // 8 - off_a + assert size_b <= data_b.size // 8 - off_b + # Successive comparable symbolic pointers + # [(0, 8, @8[ptr]), (0, 8, @8[ptr+1])] => (0, 16, @16[ptr]) + ptr = self.offset_to_ptr(ptr_base_a, (ptr_offset_a + off_a) & self._mask) + + data = ExprMem(ptr, (size_a + size_b) * 8) + parts[index:index+2] = [(0, size_a + size_b, data)] + + continue + + index += 1 + + # Slice datas + read_mem = [] + for off, bytesize, data in parts: + if data.size // 8 != bytesize: + data = data[off * 8: (off + bytesize) * 8] + read_mem.append(data) + + return read_mem + + def write(self, offset, expr): + """ + Write @expr at @offset + @offset: integer (in bytes) + @expr: Expr instance value + """ + assert expr.size % 8 == 0 + assert offset <= self._mask + for index in range(expr.size // 8): + # Wrap write: + # @32[EAX+0xFFFFFFFF] is ok and will write at 0xFFFFFFFF, 0, 1, 2 + request_offset = (offset + index) & self._mask + # XXX TODO: only little endian here + self._offset_to_expr[request_offset] = (index, expr) + + tmp = self.expr_simp(expr[index * 8: (index + 1) * 8]) + # Special case: Simplify slice of pointer (simplification is ok + # here, as we won't store the simplified expression) + if tmp.is_slice() and tmp.arg.is_mem() and tmp.start % 8 == 0: + new_ptr = self.expr_simp( + tmp.arg.ptr + ExprInt(tmp.start // 8, tmp.arg.ptr.size) + ) + tmp = ExprMem(new_ptr, tmp.stop - tmp.start) + # Test if write to original value + if tmp.is_mem(): + src_ptr, src_off = get_expr_base_offset(tmp.ptr) + if src_ptr == self.base and src_off == request_offset: + del self._offset_to_expr[request_offset] + + + def _get_variable_parts(self, index, known_offsets, forward=True): + """ + Find consecutive memory parts representing the same variable. The part + starts at offset known_offsets[@index] and search is in offset direction + determined by @forward + Return the number of consecutive parts of the same variable. + + @index: index of the memory offset in known_offsets + @known_offsets: sorted offsets + @forward: Search in offset growing direction if True, else in reverse + order + """ + + offset = known_offsets[index] + value_byte_index, value = self._offset_to_expr[offset] + assert value.size % 8 == 0 + + if forward: + start, end, step = value_byte_index + 1, value.size // 8, 1 + else: + start, end, step = value_byte_index - 1, -1, -1 + + partnum = 1 + for value_offset in range(start, end, step): + offset += step + # Check if next part is in known_offsets + next_index = index + step * partnum + if not 0 <= next_index < len(known_offsets): + break + + offset_next = known_offsets[next_index] + if offset_next != offset: + break + + # Check if next part is a part of the searched value + byte_index, value_next = self._offset_to_expr[offset_next] + if byte_index != value_offset: + break + if value != value_next: + break + partnum += 1 + + return partnum + + + def _build_value_at_offset(self, value, offset, start, length): + """ + Return a couple. The first element is the memory Expression representing + the value at @offset, the second is its value. The value is truncated + at byte @start with @length + + @value: Expression to truncate + @offset: offset in bytes of the variable (integer) + @start: value's byte offset (integer) + @length: length in bytes (integer) + """ + + ptr = self.offset_to_ptr(self.base, offset) + size = length * 8 + if start == 0 and size == value.size: + result = value + else: + result = self.expr_simp(value[start * 8: start * 8 + size]) + + return ExprMem(ptr, size), result + + + def memory(self): + """ + Iterate on stored memory/values + + The goal here is to group entities. + + Consider the following state: + EAX + 0x10 = (0, EDX) + EAX + 0x11 = (1, EDX) + EAX + 0x12 = (2, EDX) + EAX + 0x13 = (3, EDX) + + The function should return: + @32[EAX + 0x10] = EDX + """ + + if not self._offset_to_expr: + return + known_offsets = sorted(self._offset_to_expr) + index = 0 + # Test if the first element is the continuation of the last byte. If + # yes, merge and output it first. + min_int = 0 + max_int = (1 << self.base.size) - 1 + limit_index = len(known_offsets) + + first_element = None + # Special case where a variable spreads on max_int/min_int + if known_offsets[0] == min_int and known_offsets[-1] == max_int: + min_offset, max_offset = known_offsets[0], known_offsets[-1] + min_byte_index, min_value = self._offset_to_expr[min_offset] + max_byte_index, max_value = self._offset_to_expr[max_offset] + if min_value == max_value and max_byte_index + 1 == min_byte_index: + # Look for current variable start + partnum_before = self._get_variable_parts(len(known_offsets) - 1, known_offsets, False) + # Look for current variable end + partnum_after = self._get_variable_parts(0, known_offsets) + + partnum = partnum_before + partnum_after + offset = known_offsets[-partnum_before] + index_value, value = self._offset_to_expr[offset] + + mem, result = self._build_value_at_offset(value, offset, index_value, partnum) + first_element = mem, result + index = partnum_after + limit_index = len(known_offsets) - partnum_before + + # Special cases are done, walk and merge variables + while index < limit_index: + offset = known_offsets[index] + index_value, value = self._offset_to_expr[offset] + partnum = self._get_variable_parts(index, known_offsets) + mem, result = self._build_value_at_offset(value, offset, index_value, partnum) + yield mem, result + index += partnum + + if first_element is not None: + yield first_element + + def dump(self): + """Display MemArray content""" + for mem, value in self.memory(): + print("%s = %s" % (mem, value)) + + +class MemSparse(object): + """Link a symbolic memory pointer to its MemArray. + + For each symbolic memory object, this object will extract the memory pointer + *ptr*. It then splits *ptr* into a symbolic and an integer part. For + example, the memory @[ESP+4] will give ESP+4 for *ptr*. *ptr* is then split + into its base ESP and its offset 4. Each symbolic base address uses a + different MemArray. + + Example: + - @32[EAX+EBX] + - @8[EAX+EBX+0x100] + Will be stored in the same MemArray with a EAX+EBX base + + """ + + def __init__(self, addrsize, expr_simp=expr_simp_explicit): + """ + @addrsize: size (in bits) of the addresses manipulated by the MemSparse + @expr_simp: an ExpressionSimplifier instance + """ + self.addrsize = addrsize + self.expr_simp = expr_simp + self.base_to_memarray = {} + + def __contains__(self, expr): + """ + Return True if the whole @expr is present + For partial check, use 'contains_partial' + """ + if not expr.is_mem(): + return False + ptr = expr.ptr + base, offset = get_expr_base_offset(ptr) + memarray = self.base_to_memarray.get(base, None) + if memarray is None: + return False + for i in range(expr.size // 8): + if offset + i not in memarray: + return False + return True + + def contains_partial(self, expr): + """ + Return True if a part of @expr is present in memory + """ + if not expr.is_mem(): + return False + ptr = expr.ptr + base, offset = get_expr_base_offset(ptr) + memarray = self.base_to_memarray.get(base, None) + if memarray is None: + return False + for i in range(expr.size // 8): + if offset + i in memarray: + return True + return False + + def clear(self): + """Reset the current object content""" + self.base_to_memarray.clear() + + def copy(self): + """Copy the current object instance""" + base_to_memarray = {} + for base, memarray in viewitems(self.base_to_memarray): + base_to_memarray[base] = memarray.copy() + obj = MemSparse(self.addrsize, self.expr_simp) + obj.base_to_memarray = base_to_memarray + return obj + + def __delitem__(self, expr): + """ + Delete a value @expr *fully* present in memory + For partial delete, use delete_partial + """ + ptr = expr.ptr + base, offset = get_expr_base_offset(ptr) + memarray = self.base_to_memarray.get(base, None) + if memarray is None: + raise KeyError + # Check if whole entity is in the MemArray before deleting it + for i in range(expr.size // 8): + if (offset + i) & memarray.mask not in memarray: + raise KeyError + for i in range(expr.size // 8): + del memarray[(offset + i) & memarray.mask] + + def delete_partial(self, expr): + """ + Delete @expr from memory. Skip parts of @expr which are not present in + memory. + """ + ptr = expr.ptr + base, offset = get_expr_base_offset(ptr) + memarray = self.base_to_memarray.get(base, None) + if memarray is None: + raise KeyError + # Check if whole entity is in the MemArray before deleting it + for i in range(expr.size // 8): + real_offset = (offset + i) & memarray.mask + if real_offset in memarray: + del memarray[real_offset] + + def read(self, ptr, size): + """ + Return the value associated with the Expr at address @ptr + @ptr: Expr representing the memory address + @size: memory size (in bits), byte aligned + """ + assert size % 8 == 0 + base, offset = get_expr_base_offset(ptr) + memarray = self.base_to_memarray.get(base, None) + if memarray is not None: + mems = memarray.read(offset, size) + ret = ExprCompose(*mems) + else: + ret = ExprMem(ptr, size) + return ret + + def write(self, ptr, expr): + """ + Update the corresponding Expr @expr at address @ptr + @ptr: Expr representing the memory address + @expr: Expr instance + """ + assert ptr.size == self.addrsize + base, offset = get_expr_base_offset(ptr) + memarray = self.base_to_memarray.get(base, None) + if memarray is None: + memarray = MemArray(base, self.expr_simp) + self.base_to_memarray[base] = memarray + memarray.write(offset, expr) + + def iteritems(self): + """Iterate on stored memory variables and their values.""" + for _, memarray in viewitems(self.base_to_memarray): + for mem, value in memarray.memory(): + yield mem, value + + def items(self): + """Return stored memory variables and their values.""" + return list(self.iteritems()) + + def dump(self): + """Display MemSparse content""" + for mem, value in viewitems(self): + print("%s = %s" % (mem, value)) + + def __repr__(self): + out = [] + for _, memarray in sorted(viewitems(self.base_to_memarray)): + out.append(repr(memarray)) + return '\n'.join(out) + + +class SymbolMngr(object): + """Symbolic store manager (IDs and MEMs)""" + + def __init__(self, init=None, addrsize=None, expr_simp=expr_simp_explicit): + assert addrsize is not None + if init is None: + init = {} + self.addrsize = addrsize + self.expr_simp = expr_simp + self.symbols_id = {} + self.symbols_mem = MemSparse(addrsize, expr_simp) + self.mask = (1 << addrsize) - 1 + for expr, value in viewitems(init): + self.write(expr, value) + + def __contains__(self, expr): + if expr.is_id(): + return self.symbols_id.__contains__(expr) + if expr.is_mem(): + return self.symbols_mem.__contains__(expr) + return False + + def __getitem__(self, expr): + return self.read(expr) + + def __setitem__(self, expr, value): + self.write(expr, value) + + def __delitem__(self, expr): + if expr.is_id(): + del self.symbols_id[expr] + elif expr.is_mem(): + del self.symbols_mem[expr] + else: + raise TypeError("Bad source expr") + + def copy(self): + """Copy object instance""" + obj = SymbolMngr(self, addrsize=self.addrsize, expr_simp=self.expr_simp) + return obj + + def clear(self): + """Forget every variables values""" + self.symbols_id.clear() + self.symbols_mem.clear() + + def read(self, src): + """ + Return the value corresponding to Expr @src + @src: ExprId or ExprMem instance + """ + if src.is_id(): + return self.symbols_id.get(src, src) + elif src.is_mem(): + # Only byte aligned accesses are supported for now + assert src.size % 8 == 0 + return self.symbols_mem.read(src.ptr, src.size) + else: + raise TypeError("Bad source expr") + + def write(self, dst, src): + """ + Update @dst with @src expression + @dst: ExprId or ExprMem instance + @src: Expression instance + """ + assert dst.size == src.size + if dst.is_id(): + if dst == src: + if dst in self.symbols_id: + del self.symbols_id[dst] + else: + self.symbols_id[dst] = src + elif dst.is_mem(): + # Only byte aligned accesses are supported for now + assert dst.size % 8 == 0 + self.symbols_mem.write(dst.ptr, src) + else: + raise TypeError("Bad destination expr") + + def dump(self, ids=True, mems=True): + """Display memory content""" + if ids: + for variable, value in self.ids(): + print('%s = %s' % (variable, value)) + if mems: + for mem, value in self.memory(): + print('%s = %s' % (mem, value)) + + def __repr__(self): + out = [] + for variable, value in viewitems(self): + out.append('%s = %s' % (variable, value)) + return "\n".join(out) + + def iteritems(self): + """ExprId/ExprMem iteritems of the current state""" + for variable, value in self.ids(): + yield variable, value + for variable, value in self.memory(): + yield variable, value + + def items(self): + """Return variables/values of the current state""" + return list(self.iteritems()) + + def __iter__(self): + for expr, _ in self.iteritems(): + yield expr + + def ids(self): + """Iterate on variables and their values.""" + for expr, value in viewitems(self.symbols_id): + yield expr, value + + def memory(self): + """Iterate on memory variables and their values.""" + for mem, value in viewitems(self.symbols_mem): + yield mem, value + + def keys(self): + """Variables of the current state""" + return list(self) + + +def merge_ptr_read(known, ptrs): + """ + Merge common memory parts in a multiple byte memory. + @ptrs: memory bytes list + @known: ptrs' associated boolean for present/unpresent memory part in the + store + """ + assert known + out = [] + known.append(None) + ptrs.append(None) + last, value, size = known[0], ptrs[0], 8 + for index, part in enumerate(known[1:], 1): + if part == last: + size += 8 + else: + out.append((last, value, size)) + last, value, size = part, ptrs[index], 8 + return out + + +class SymbolicExecutionEngine(object): + """ + Symbolic execution engine + Allow IR code emulation in symbolic domain + + + Examples: + from miasm.ir.symbexec import SymbolicExecutionEngine + from miasm.ir.ir import AssignBlock + + ir_arch = ir_x86_32() + + init_state = { + ir_arch.arch.regs.EAX: ir_arch.arch.regs.EBX, + ExprMem(id_x+ExprInt(0x10, 32), 32): id_a, + } + + sb_exec = SymbolicExecutionEngine(ir_arch, init_state) + + >>> sb_exec.dump() + EAX = a + @32[x + 0x10] = a + >>> sb_exec.dump(mems=False) + EAX = a + + >>> print sb_exec.eval_expr(ir_arch.arch.regs.EAX + ir_arch.arch.regs.ECX) + EBX + ECX + + Inspecting state: + - dump + - modified + State manipulation: + - '.state' (rw) + + Evaluation (read only): + - eval_expr + - eval_assignblk + Evaluation with state update: + - eval_updt_expr + - eval_updt_assignblk + - eval_updt_irblock + + Start a symbolic execution based on provisioned '.ir_arch' blocks: + - run_block_at + - run_at + """ + + StateEngine = SymbolicState + + def __init__(self, ir_arch, state=None, + sb_expr_simp=expr_simp_explicit): + + self.expr_to_visitor = { + ExprInt: self.eval_exprint, + ExprId: self.eval_exprid, + ExprLoc: self.eval_exprloc, + ExprMem: self.eval_exprmem, + ExprSlice: self.eval_exprslice, + ExprCond: self.eval_exprcond, + ExprOp: self.eval_exprop, + ExprCompose: self.eval_exprcompose, + } + + if state is None: + state = {} + + self.symbols = SymbolMngr(addrsize=ir_arch.addrsize, expr_simp=sb_expr_simp) + + for dst, src in viewitems(state): + self.symbols.write(dst, src) + + self.ir_arch = ir_arch + self.expr_simp = sb_expr_simp + + def get_state(self): + """Return the current state of the SymbolicEngine""" + state = self.StateEngine(dict(self.symbols)) + return state + + def set_state(self, state): + """Restaure the @state of the engine + @state: StateEngine instance + """ + self.symbols = SymbolMngr(addrsize=self.ir_arch.addrsize, expr_simp=self.expr_simp) + for dst, src in viewitems(dict(state)): + self.symbols[dst] = src + + state = property(get_state, set_state) + + def eval_expr_visitor(self, expr, cache=None): + """ + [DEV]: Override to change the behavior of an Expr evaluation. + This function recursively applies 'eval_expr*' to @expr. + This function uses @cache to speedup re-evaluation of expression. + """ + if cache is None: + cache = {} + + ret = cache.get(expr, None) + if ret is not None: + return ret + + new_expr = self.expr_simp(expr) + ret = cache.get(new_expr, None) + if ret is not None: + return ret + + func = self.expr_to_visitor.get(new_expr.__class__, None) + if func is None: + raise TypeError("Unknown expr type") + + ret = func(new_expr, cache=cache) + ret = self.expr_simp(ret) + assert ret is not None + + cache[expr] = ret + cache[new_expr] = ret + return ret + + def eval_exprint(self, expr, **kwargs): + """[DEV]: Evaluate an ExprInt using the current state""" + return expr + + def eval_exprid(self, expr, **kwargs): + """[DEV]: Evaluate an ExprId using the current state""" + ret = self.symbols.read(expr) + return ret + + def eval_exprloc(self, expr, **kwargs): + """[DEV]: Evaluate an ExprLoc using the current state""" + offset = self.ir_arch.loc_db.get_location_offset(expr.loc_key) + if offset is not None: + ret = ExprInt(offset, expr.size) + else: + ret = expr + return ret + + def eval_exprmem(self, expr, **kwargs): + """[DEV]: Evaluate an ExprMem using the current state + This function first evaluate the memory pointer value. + Override 'mem_read' to modify the effective memory accesses + """ + ptr = self.eval_expr_visitor(expr.ptr, **kwargs) + mem = ExprMem(ptr, expr.size) + ret = self.mem_read(mem) + return ret + + def eval_exprcond(self, expr, **kwargs): + """[DEV]: Evaluate an ExprCond using the current state""" + cond = self.eval_expr_visitor(expr.cond, **kwargs) + src1 = self.eval_expr_visitor(expr.src1, **kwargs) + src2 = self.eval_expr_visitor(expr.src2, **kwargs) + ret = ExprCond(cond, src1, src2) + return ret + + def eval_exprslice(self, expr, **kwargs): + """[DEV]: Evaluate an ExprSlice using the current state""" + arg = self.eval_expr_visitor(expr.arg, **kwargs) + ret = ExprSlice(arg, expr.start, expr.stop) + return ret + + def eval_exprop(self, expr, **kwargs): + """[DEV]: Evaluate an ExprOp using the current state""" + args = [] + for oarg in expr.args: + arg = self.eval_expr_visitor(oarg, **kwargs) + args.append(arg) + ret = ExprOp(expr.op, *args) + return ret + + def eval_exprcompose(self, expr, **kwargs): + """[DEV]: Evaluate an ExprCompose using the current state""" + args = [] + for arg in expr.args: + args.append(self.eval_expr_visitor(arg, **kwargs)) + ret = ExprCompose(*args) + return ret + + def eval_expr(self, expr, eval_cache=None): + """ + Evaluate @expr + @expr: Expression instance to evaluate + @cache: None or dictionary linking variables to their values + """ + if eval_cache is None: + eval_cache = {} + ret = self.eval_expr_visitor(expr, cache=eval_cache) + assert ret is not None + return ret + + def modified(self, init_state=None, ids=True, mems=True): + """ + Return the modified variables. + @init_state: a base dictionary linking variables to their initial values + to diff. Can be None. + @ids: track ids only + @mems: track mems only + """ + if init_state is None: + init_state = {} + if ids: + for variable, value in viewitems(self.symbols.symbols_id): + if variable in init_state and init_state[variable] == value: + continue + yield variable, value + if mems: + for mem, value in self.symbols.memory(): + if mem in init_state and init_state[mem] == value: + continue + yield mem, value + + def dump(self, ids=True, mems=True): + """ + Display modififed variables + @ids: display modified ids + @mems: display modified memory + """ + + for variable, value in self.modified(None, ids, mems): + print("%-18s" % variable, "=", "%s" % value) + + def eval_assignblk(self, assignblk): + """ + Evaluate AssignBlock using the current state + + Returns a dictionary containing modified keys associated to their values + + @assignblk: AssignBlock instance + """ + pool_out = {} + eval_cache = {} + for dst, src in viewitems(assignblk): + src = self.eval_expr(src, eval_cache) + if dst.is_mem(): + ptr = self.eval_expr(dst.ptr, eval_cache) + # Test if mem lookup is known + tmp = ExprMem(ptr, dst.size) + pool_out[tmp] = src + elif dst.is_id(): + pool_out[dst] = src + else: + raise ValueError("Unknown destination type", str(dst)) + + return pool_out + + def apply_change(self, dst, src): + """ + Apply @dst = @src on the current state WITHOUT evaluating both side + @dst: Expr, destination + @src: Expr, source + """ + if dst.is_mem(): + self.mem_write(dst, src) + else: + self.symbols.write(dst, src) + + def eval_updt_assignblk(self, assignblk): + """ + Apply an AssignBlock on the current state + @assignblk: AssignBlock instance + """ + mem_dst = [] + dst_src = self.eval_assignblk(assignblk) + for dst, src in viewitems(dst_src): + self.apply_change(dst, src) + if dst.is_mem(): + mem_dst.append(dst) + return mem_dst + + def eval_updt_irblock(self, irb, step=False): + """ + Symbolic execution of the @irb on the current state + @irb: irbloc instance + @step: display intermediate steps + """ + for assignblk in irb: + if step: + print('Instr', assignblk.instr) + print('Assignblk:') + print(assignblk) + print('_' * 80) + self.eval_updt_assignblk(assignblk) + if step: + self.dump(mems=False) + self.dump(ids=False) + print('_' * 80) + dst = self.eval_expr(self.ir_arch.IRDst) + + return dst + + def run_block_at(self, ircfg, addr, step=False): + """ + Symbolic execution of the block at @addr + @addr: address to execute (int or ExprInt or label) + @step: display intermediate steps + """ + irblock = ircfg.get_block(addr) + if irblock is not None: + addr = self.eval_updt_irblock(irblock, step=step) + return addr + + def run_at(self, ircfg, addr, lbl_stop=None, step=False): + """ + Symbolic execution starting at @addr + @addr: address to execute (int or ExprInt or label) + @lbl_stop: LocKey to stop execution on + @step: display intermediate steps + """ + while True: + irblock = ircfg.get_block(addr) + if irblock is None: + break + if irblock.loc_key == lbl_stop: + break + addr = self.eval_updt_irblock(irblock, step=step) + return addr + + def del_mem_above_stack(self, stack_ptr): + """ + Remove all stored memory values with following properties: + * pointer based on initial stack value + * pointer below current stack pointer + """ + stack_ptr = self.eval_expr(stack_ptr) + base, stk_offset = get_expr_base_offset(stack_ptr) + memarray = self.symbols.symbols_mem.base_to_memarray.get(base, None) + if memarray: + to_del = set() + for offset in memarray: + if ((offset - stk_offset) & int(stack_ptr.mask)) >> (stack_ptr.size - 1) != 0: + to_del.add(offset) + + for offset in to_del: + del memarray[offset] + + def eval_updt_expr(self, expr): + """ + Evaluate @expr and apply side effect if needed (ie. if expr is an + assignment). Return the evaluated value + """ + + # Update value if needed + if expr.is_aff(): + ret = self.eval_expr(expr.src) + self.eval_updt_assignblk(AssignBlock([expr])) + else: + ret = self.eval_expr(expr) + + return ret + + def mem_read(self, expr): + """ + [DEV]: Override to modify the effective memory reads + + Read symbolic value at ExprMem @expr + @expr: ExprMem + """ + return self.symbols.read(expr) + + def mem_write(self, dst, src): + """ + [DEV]: Override to modify the effective memory writes + + Write symbolic value @src at ExprMem @dst + @dst: destination ExprMem + @src: source Expression + """ + self.symbols.write(dst, src) diff --git a/miasm/ir/symbexec_top.py b/miasm/ir/symbexec_top.py new file mode 100644 index 00000000..d37293b5 --- /dev/null +++ b/miasm/ir/symbexec_top.py @@ -0,0 +1,221 @@ +from future.utils import viewitems + +from miasm.ir.symbexec import SymbolicExecutionEngine, StateEngine +from miasm.expression.simplifications import expr_simp +from miasm.expression.expression import ExprId, ExprInt, ExprSlice,\ + ExprMem, ExprCond, ExprCompose, ExprOp + + +TOPSTR = "TOP" + +def exprid_top(expr): + """Return a TOP expression (ExprId("TOP") of size @expr.size + @expr: expression to replace with TOP + """ + return ExprId(TOPSTR, expr.size) + + +class SymbolicStateTop(StateEngine): + + def __init__(self, dct, regstop): + self._symbols = frozenset(viewitems(dct)) + self._regstop = frozenset(regstop) + + def __hash__(self): + return hash((self.__class__, self._symbols, self._regstop)) + + def __str__(self): + out = [] + for dst, src in sorted(self._symbols): + out.append("%s = %s" % (dst, src)) + for dst in self._regstop: + out.append('TOP %s' %dst) + return "\n".join(out) + + def __eq__(self, other): + if self is other: + return True + if self.__class__ != other.__class__: + return False + return (self.symbols == other.symbols and + self.regstop == other.regstop) + + def __ne__(self, other): + return not self.__eq__(other) + + def __iter__(self): + for dst, src in self._symbols: + yield dst, src + + def merge(self, other): + """Merge two symbolic states + Only equal expressions are kept in both states + @other: second symbolic state + """ + symb_a = self.symbols + symb_b = other.symbols + intersection = set(symb_a).intersection(symb_b) + diff = set(symb_a).union(symb_b).difference(intersection) + symbols = {} + regstop = set() + for dst in diff: + if dst.is_id(): + regstop.add(dst) + for dst in intersection: + if symb_a[dst] == symb_b[dst]: + symbols[dst] = symb_a[dst] + else: + regstop.add(dst) + return self.__class__(symbols, regstop) + + @property + def symbols(self): + """Return the dictionary of known symbols""" + return dict(self._symbols) + + @property + def regstop(self): + """Return the set of expression with TOP values""" + return self._regstop + +class SymbExecTopNoMem(SymbolicExecutionEngine): + """ + Symbolic execution, include TOP value. + ExprMem are not propagated. + Any computation involving a TOP will generate TOP. + """ + + StateEngine = SymbolicStateTop + + def __init__(self, ir_arch, state, regstop, + sb_expr_simp=expr_simp): + known_symbols = dict(state) + super(SymbExecTopNoMem, self).__init__(ir_arch, known_symbols, + sb_expr_simp) + self.regstop = set(regstop) + + def get_state(self): + """Return the current state of the SymbolicEngine""" + return self.StateEngine(self.symbols, self.regstop) + + def eval_expr(self, expr, eval_cache=None): + if expr in self.regstop: + return exprid_top(expr) + if eval_cache is None: + eval_cache = {} + ret = self.apply_expr_on_state_visit_cache(expr, self.symbols, eval_cache) + return ret + + def manage_mem(self, expr, state, cache, level): + ptr = self.apply_expr_on_state_visit_cache(expr.arg, state, cache, level+1) + ret = ExprMem(ptr, expr.size) + ret = self.get_mem_state(ret) + if ret.is_mem() and not ret.arg.is_int() and ret.arg == ptr: + ret = exprid_top(expr) + assert expr.size == ret.size + return ret + + + def eval_exprid(self, expr, **kwargs): + """[DEV]: Evaluate an ExprId using the current state""" + if expr in self.regstop: + ret = exprid_top(expr) + else: + ret = self.symbols.read(expr) + return ret + + def eval_exprloc(self, expr, **kwargs): + offset = self.ir_arch.loc_db.get_location_offset(expr.loc_key) + if offset is not None: + ret = ExprInt(offset, expr.size) + else: + ret = expr + return ret + + def eval_exprcond(self, expr, **kwargs): + """[DEV]: Evaluate an ExprCond using the current state""" + cond = self.eval_expr_visitor(expr.cond, **kwargs) + src1 = self.eval_expr_visitor(expr.src1, **kwargs) + src2 = self.eval_expr_visitor(expr.src2, **kwargs) + if cond.is_id(TOPSTR) or src1.is_id(TOPSTR) or src2.is_id(TOPSTR): + ret = exprid_top(expr) + else: + ret = ExprCond(cond, src1, src2) + return ret + + def eval_exprslice(self, expr, **kwargs): + """[DEV]: Evaluate an ExprSlice using the current state""" + arg = self.eval_expr_visitor(expr.arg, **kwargs) + if arg.is_id(TOPSTR): + ret = exprid_top(expr) + else: + ret = ExprSlice(arg, expr.start, expr.stop) + return ret + + def eval_exprop(self, expr, **kwargs): + """[DEV]: Evaluate an ExprOp using the current state""" + args = [] + for oarg in expr.args: + arg = self.eval_expr_visitor(oarg, **kwargs) + if arg.is_id(TOPSTR): + return exprid_top(expr) + args.append(arg) + ret = ExprOp(expr.op, *args) + return ret + + def eval_exprcompose(self, expr, **kwargs): + """[DEV]: Evaluate an ExprCompose using the current state""" + args = [] + for arg in expr.args: + arg = self.eval_expr_visitor(arg, **kwargs) + if arg.is_id(TOPSTR): + return exprid_top(expr) + args.append(arg) + ret = ExprCompose(*args) + return ret + + def apply_change(self, dst, src): + eval_cache = {} + if dst.is_mem(): + # If Write to TOP, forget all memory information + ret = self.eval_expr(dst.arg, eval_cache) + if ret.is_id(TOPSTR): + to_del = set() + for dst_tmp in self.symbols: + if dst_tmp.is_mem(): + to_del.add(dst_tmp) + for dst_to_del in to_del: + del self.symbols[dst_to_del] + return + src_o = self.expr_simp(src) + + # Force update. Ex: + # EBX += 1 (state: EBX = EBX+1) + # EBX -= 1 (state: EBX = EBX, must be updated) + if dst in self.regstop: + self.regstop.discard(dst) + self.symbols[dst] = src_o + + if dst == src_o: + # Avoid useless X = X information + del self.symbols[dst] + + if src_o.is_id(TOPSTR): + if dst in self.symbols: + del self.symbols[dst] + self.regstop.add(dst) + +class SymbExecTop(SymbExecTopNoMem): + """ + Symbolic execution, include TOP value. + ExprMem are propagated. + Any computation involving a TOP will generate TOP. + WARNING: avoid memory aliases here! + """ + + def manage_mem(self, expr, state, cache, level): + ptr = self.apply_expr_on_state_visit_cache(expr.arg, state, cache, level+1) + ret = ExprMem(ptr, expr.size) + ret = self.get_mem_state(ret) + assert expr.size == ret.size + return ret diff --git a/miasm/ir/symbexec_types.py b/miasm/ir/symbexec_types.py new file mode 100644 index 00000000..c969a2f5 --- /dev/null +++ b/miasm/ir/symbexec_types.py @@ -0,0 +1,131 @@ +from __future__ import print_function + +from future.utils import viewitems + +from miasm.ir.symbexec import SymbolicExecutionEngine, StateEngine +from miasm.expression.simplifications import expr_simp +from miasm.expression.expression import ExprId, ExprMem + + +class SymbolicStateCTypes(StateEngine): + """Store C types of symbols""" + + def __init__(self, symbols): + tmp = {} + for expr, types in viewitems(symbols): + tmp[expr] = frozenset(types) + self._symbols = frozenset(viewitems(tmp)) + + def __hash__(self): + return hash((self.__class__, self._symbols)) + + def __str__(self): + out = [] + for dst, src in sorted(self._symbols): + out.append("%s = %s" % (dst, src)) + return "\n".join(out) + + def __eq__(self, other): + if self is other: + return True + if self.__class__ != other.__class__: + return False + return self.symbols == other.symbols + + def __ne__(self, other): + return not self.__eq__(other) + + def __iter__(self): + for dst, src in self._symbols: + yield dst, src + + def merge(self, other): + """Merge two symbolic states + The resulting types are the union of types of both states. + @other: second symbolic state + """ + symb_a = self.symbols + symb_b = other.symbols + symbols = {} + for expr in set(symb_a).union(set(symb_b)): + ctypes = symb_a.get(expr, set()).union(symb_b.get(expr, set())) + if ctypes: + symbols[expr] = ctypes + return self.__class__(symbols) + + @property + def symbols(self): + """Return the dictionary of known symbols'types""" + return dict(self._symbols) + + +class SymbExecCType(SymbolicExecutionEngine): + """Engine of C types propagation + WARNING: avoid memory aliases here! + """ + + StateEngine = SymbolicStateCTypes + OBJC_INTERNAL = "___OBJC___" + + def __init__(self, ir_arch, + symbols, + chandler, + sb_expr_simp=expr_simp): + self.chandler = chandler + + super(SymbExecCType, self).__init__(ir_arch, + {}, + sb_expr_simp) + self.symbols = dict(symbols) + + def get_state(self): + """Return the current state of the SymbolicEngine""" + return self.StateEngine(self.symbols) + + def eval_assignblk(self, assignblk): + """ + Evaluate AssignBlock on the current state + @assignblk: AssignBlock instance + """ + pool_out = {} + for dst, src in viewitems(assignblk): + objcs = self.chandler.expr_to_types(src, self.symbols) + if isinstance(dst, ExprMem): + continue + elif isinstance(dst, ExprId): + pool_out[dst] = frozenset(objcs) + else: + raise ValueError("Unsupported assignment", str(dst)) + return pool_out + + def eval_expr(self, expr, eval_cache=None): + return frozenset(self.chandler.expr_to_types(expr, self.symbols)) + + def apply_change(self, dst, src): + if src is None: + if dst in self.symbols: + del self.symbols[dst] + else: + self.symbols[dst] = src + + def del_mem_above_stack(self, stack_ptr): + """No stack deletion""" + return + + def dump_id(self): + """ + Dump modififed registers symbols only + """ + for expr, expr_types in sorted(viewitems(self.symbols)): + if not expr.is_mem(): + print(expr) + for expr_type in expr_types: + print('\t', expr_type) + + def dump_mem(self): + """ + Dump modififed memory symbols + """ + for expr, value in sorted(viewitems(self.symbols)): + if expr.is_mem(): + print(expr, value) diff --git a/miasm/ir/translators/C.py b/miasm/ir/translators/C.py new file mode 100644 index 00000000..9a96487a --- /dev/null +++ b/miasm/ir/translators/C.py @@ -0,0 +1,528 @@ +from miasm.ir.translators.translator import Translator +from miasm.expression.modint import size2mask +from miasm.expression.expression import ExprInt, ExprCond, ExprCompose, \ + TOK_EQUAL, \ + TOK_INF_SIGNED, TOK_INF_UNSIGNED, \ + TOK_INF_EQUAL_SIGNED, TOK_INF_EQUAL_UNSIGNED + +def int_size_to_bn(value, size): + if size < 32: + int_str = "%.8x" % value + size_nibble = 8 + else: + # size must be multiple of 4 + size = ((size + 31) // 32) * 32 + size_nibble = size // 4 + fmt_str = "%%.%dx" % size_nibble + int_str = fmt_str % value + assert len(int_str) == size_nibble + return int_str, size_nibble + + +TOK_CMP_TO_NATIVE_C = { + TOK_EQUAL: "==", + TOK_INF_SIGNED: "<", + TOK_INF_UNSIGNED: "<", + TOK_INF_EQUAL_SIGNED: "<=", + TOK_INF_EQUAL_UNSIGNED: "<=", +} + +TOK_CMP_TO_BIGNUM_C = { + TOK_EQUAL: "equal", + TOK_INF_SIGNED: "inf_signed", + TOK_INF_UNSIGNED: "inf_unsigned", + TOK_INF_EQUAL_SIGNED: "inf_equal_signed", + TOK_INF_EQUAL_UNSIGNED: "inf_equal_unsigned", +} + + +class TranslatorC(Translator): + "Translate a Miasm expression to an equivalent C code" + + # Implemented language + __LANG__ = "C" + + # Operations translation + dct_shift = {'a>>': "right_arith", + '>>': "right_logic", + '<<': "left_logic", + } + dct_rot = {'<<<': 'rot_left', + '>>>': 'rot_right', + } + + NATIVE_INT_MAX_SIZE = 64 + + def __init__(self, loc_db=None, **kwargs): + """Instance a C translator + @loc_db: LocationDB instance + """ + super(TranslatorC, self).__init__(**kwargs) + # symbol pool + self.loc_db = loc_db + + def _size2mask(self, size): + """Return a C string corresponding to the size2mask operation, with support for + @size <= 64""" + assert size <= 64 + mask = size2mask(size) + return "0x%x" % mask + + def from_ExprId(self, expr): + return str(expr) + + def from_ExprInt(self, expr): + if expr.size <= self.NATIVE_INT_MAX_SIZE: + assert expr.size <= 64 + out = "0x%x" % int(expr) + if expr.size == 64: + out += "ULL" + return out + value, int_size = int_size_to_bn(int(expr), expr.size) + return 'bignum_from_string("%s", %d)' % (value, int_size) + + def from_ExprLoc(self, expr): + loc_key = expr.loc_key + if self.loc_db is None: + return str(loc_key) + offset = self.loc_db.get_location_offset(loc_key) + if offset is None: + return str(loc_key) + + if expr.size <= self.NATIVE_INT_MAX_SIZE: + return "0x%x" % offset + + value, int_size = int_size_to_bn(offset, 64) + return 'bignum_from_string("%s", %d)' % (value, int_size) + + def from_ExprAssign(self, expr): + new_dst = self.from_expr(expr.dst) + new_src = self.from_expr(expr.src) + return "%s = %s" % (new_dst, new_src) + + def from_ExprCond(self, expr): + cond = self.from_expr(expr.cond) + src1 = self.from_expr(expr.src1) + src2 = self.from_expr(expr.src2) + if not expr.cond.size <= self.NATIVE_INT_MAX_SIZE: + cond = "(!bignum_is_zero(%s))" % cond + out = "(%s?%s:%s)" % (cond, src1, src2) + return out + + def from_ExprMem(self, expr): + ptr = expr.ptr + if ptr.size <= self.NATIVE_INT_MAX_SIZE: + new_ptr = self.from_expr(ptr) + if expr.size <= self.NATIVE_INT_MAX_SIZE: + # Native ptr, Native Mem + return "MEM_LOOKUP_%.2d(jitcpu, %s)" % (expr.size, new_ptr) + else: + # Native ptr, BN mem + return "MEM_LOOKUP_INT_BN(jitcpu, %d, %s)" % (expr.size, new_ptr) + # BN ptr + new_ptr = self.from_expr(ptr) + + if expr.size <= self.NATIVE_INT_MAX_SIZE: + # BN ptr, Native Mem + return "MEM_LOOKUP_BN_INT(jitcpu, %d, %s)" % (expr.size, new_ptr) + else: + # BN ptr, BN mem + return "MEM_LOOKUP_BN_BN(jitcpu, %d, %s)" % (expr.size, new_ptr) + + def from_ExprOp(self, expr): + if len(expr.args) == 1: + if expr.op == 'parity': + arg = expr.args[0] + out = self.from_expr(arg) + if arg.size <= self.NATIVE_INT_MAX_SIZE: + out = "(%s&%s)" % (out, self._size2mask(arg.size)) + else: + out = 'bignum_mask(%s, 8)' % (out, 8) + out = 'bignum_to_uint64(%s)' % out + out = 'parity(%s)' % out + return out + + elif expr.op.startswith("zeroExt_"): + arg = expr.args[0] + if expr.size == arg.size: + return arg + return self.from_expr(ExprCompose(arg, ExprInt(0, expr.size - arg.size))) + + elif expr.op.startswith("signExt_"): + arg = expr.args[0] + if expr.size == arg.size: + return arg + add_size = expr.size - arg.size + new_expr = ExprCompose( + arg, + ExprCond( + arg.msb(), + ExprInt(size2mask(add_size), add_size), + ExprInt(0, add_size) + ) + ) + return self.from_expr(new_expr) + + + elif expr.op in ['cntleadzeros', 'cnttrailzeros']: + arg = expr.args[0] + out = self.from_expr(arg) + if arg.size <= self.NATIVE_INT_MAX_SIZE: + out = "%s(0x%x, %s)" % (expr.op, expr.args[0].size, out) + else: + out = "bignum_%s(%s, %d)" % (expr.op, out, arg.size) + return out + + elif expr.op == '!': + arg = expr.args[0] + out = self.from_expr(arg) + if expr.size <= self.NATIVE_INT_MAX_SIZE: + out = "(~ %s)&%s" % (out, self._size2mask(arg.size)) + else: + out = "bignum_not(%s)" % out + out = "bignum_mask(%s, expr.size)" % out + return out + + elif expr.op in [ + "ftan", "frndint", "f2xm1", "fsin", "fsqrt", "fabs", "fcos", + "fchs", + ]: + return "fpu_%s%d(%s)" % ( + expr.op, + expr.size, + self.from_expr(expr.args[0]), + ) + elif (expr.op.startswith("access_") or + expr.op.startswith("load_") or + expr.op.startswith("fxam_c")): + arg = expr.args[0] + out = self.from_expr(arg) + out = "%s(%s)" % (expr.op, out) + return out + + elif expr.op == "-": + arg = expr.args[0] + out = self.from_expr(arg) + if arg.size <= self.NATIVE_INT_MAX_SIZE: + out = "(%s(%s))" % (expr.op, out) + out = "(%s&%s)" % (out, self._size2mask(arg.size)) + else: + out = "bignum_sub(bignum_from_uint64(0), %s)" % out + out = "bignum_mask(%s, %d)"% (out, expr.size) + return out + + elif expr.op.startswith("fpround_"): + return "%s_fp%d(%s)" % ( + expr.op, + expr.size, + self.from_expr(expr.args[0]), + ) + elif expr.op == "sint_to_fp": + size = expr.size + arg = expr.args[0] + if size not in [32, 64]: + raise RuntimeError( + "Unsupported size for sint_to_fp: %r" % size + ) + return "%s_%d(%s)" % (expr.op, size, self.from_expr(arg)) + elif expr.op.startswith("fp_to_sint"): + dest_size = expr.size + arg_size = expr.args[0].size + if (arg_size, dest_size) in [ + (32, 32), (64, 64), (64, 32), + ]: + func = "fp%d_to_sint%d" % (arg_size, dest_size) + else: + raise RuntimeError( + "Unsupported size for fp_to_sint: %r to %r" % ( + arg_size, + dest_size + )) + return "%s(%s)" % (func, self.from_expr(expr.args[0])) + elif expr.op.startswith("fpconvert_fp"): + dest_size = expr.size + arg_size = expr.args[0].size + if (arg_size, dest_size) in [ + (32, 64), (64, 32) + ]: + func = "fp%d_to_fp%d" % (arg_size, dest_size) + else: + raise RuntimeError( + "Unsupported size for fpconvert: %r to %r" % (arg_size, + dest_size) + ) + return "%s(%s)" % (func, self.from_expr(expr.args[0])) + else: + raise NotImplementedError('Unknown op: %r' % expr.op) + + elif len(expr.args) == 2: + if expr.op in self.dct_shift: + arg0 = self.from_expr(expr.args[0]) + arg1 = self.from_expr(expr.args[1]) + if expr.size <= self.NATIVE_INT_MAX_SIZE: + out = 'SHIFT_%s(%d, %s, %s)' % ( + self.dct_shift[expr.op].upper(), + expr.args[0].size, + arg0, + arg1 + ) + else: + op = { + "<<": "lshift", + ">>": "rshift", + "a>>": "a_rshift" + } + out = "bignum_%s(%s, bignum_to_uint64(%s))" % ( + op[expr.op], arg0, arg1 + ) + out = "bignum_mask(%s, %d)"% (out, expr.size) + return out + + elif expr.is_associative(): + args = [self.from_expr(arg) + for arg in expr.args] + if expr.size <= self.NATIVE_INT_MAX_SIZE: + out = (" %s " % expr.op).join(args) + out = "((%s)&%s)" % (out, self._size2mask(expr.size)) + else: + op_to_bn_func = { + "+": "add", + "*": "mul", + "|": "or", + "^": "xor", + "&": "and", + } + args = list(expr.args) + out = self.from_expr(args.pop()) + while args: + out = 'bignum_mask(bignum_%s(%s, %s), %d)' % ( + op_to_bn_func[expr.op], + out, + self.from_expr(args.pop()), + expr.size + ) + return out + + elif expr.op in ['-']: + return '(((%s&%s) %s (%s&%s))&%s)' % ( + self.from_expr(expr.args[0]), + self._size2mask(expr.args[0].size), + str(expr.op), + self.from_expr(expr.args[1]), + self._size2mask(expr.args[1].size), + self._size2mask(expr.args[0].size) + ) + elif expr.op in self.dct_rot: + arg0 = self.from_expr(expr.args[0]) + arg1 = self.from_expr(expr.args[1]) + if expr.size <= self.NATIVE_INT_MAX_SIZE: + out = '(%s(%s, %s, %s) &%s)' % ( + self.dct_rot[expr.op], + expr.args[0].size, + arg0, + arg1, + self._size2mask(expr.args[0].size), + ) + else: + op = { + ">>>": "ror", + "<<<": "rol" + } + out = "bignum_%s(%s, %d, bignum_to_uint64(%s))" % ( + op[expr.op], arg0, expr.size, arg1 + ) + out = "bignum_mask(%s, %d)"% (out, expr.size) + return out + + elif expr.op == 'x86_cpuid': + return "%s(%s, %s)" % (expr.op, + self.from_expr(expr.args[0]), + self.from_expr(expr.args[1])) + elif expr.op.startswith("fcom"): + arg0 = self.from_expr(expr.args[0]) + arg1 = self.from_expr(expr.args[1]) + if not expr.args[0].size <= self.NATIVE_INT_MAX_SIZE: + raise ValueError("Bad semantic: fpu do operations do not support such size") + out = "fpu_%s(%s, %s)" % (expr.op, arg0, arg1) + return out + + elif expr.op in ["fadd", "fsub", "fdiv", 'fmul', "fscale", + "fprem", "fyl2x", "fpatan"]: + arg0 = self.from_expr(expr.args[0]) + arg1 = self.from_expr(expr.args[1]) + if not expr.args[0].size <= self.NATIVE_INT_MAX_SIZE: + raise ValueError("Bad semantic: fpu do operations do not support such size") + out = "fpu_%s%d(%s, %s)" % (expr.op, expr.size, arg0, arg1) + return out + + elif expr.op == "segm": + return "segm2addr(jitcpu, %s, %s)" % ( + self.from_expr(expr.args[0]), + self.from_expr(expr.args[1]) + ) + + elif expr.op in ['udiv', 'umod']: + arg0 = self.from_expr(expr.args[0]) + arg1 = self.from_expr(expr.args[1]) + + if expr.size <= self.NATIVE_INT_MAX_SIZE: + out = '%s%d(%s, %s)' % ( + expr.op, + expr.args[0].size, + arg0, + arg1 + ) + else: + out = "bignum_%s(%s, %s)" % ( + expr.op, + arg0, + arg1 + ) + out = "bignum_mask(%s, %d)"% (out, expr.size) + return out + + + + elif expr.op in ['sdiv', 'smod']: + arg0 = self.from_expr(expr.args[0]) + arg1 = self.from_expr(expr.args[1]) + + if expr.size <= self.NATIVE_INT_MAX_SIZE: + out = '%s%d(%s, %s)' % ( + expr.op, + expr.args[0].size, + arg0, + arg1 + ) + else: + out = "bignum_%s(%s, %s, %d)" % ( + expr.op, + arg0, + arg1, + expr.size + ) + out = "bignum_mask(%s, %d)"% (out, expr.size) + return out + + elif expr.op in ["bcdadd", "bcdadd_cf"]: + return "%s_%d(%s, %s)" % ( + expr.op, expr.args[0].size, + self.from_expr(expr.args[0]), + self.from_expr(expr.args[1]) + ) + + + elif expr.op in [ + TOK_EQUAL, + TOK_INF_SIGNED, + TOK_INF_UNSIGNED, + TOK_INF_EQUAL_SIGNED, + TOK_INF_EQUAL_UNSIGNED, + ]: + arg0 = self.from_expr(expr.args[0]) + arg1 = self.from_expr(expr.args[1]) + + if expr.size <= self.NATIVE_INT_MAX_SIZE: + op = TOK_CMP_TO_NATIVE_C[expr.op] + if expr.op in [TOK_INF_SIGNED, TOK_INF_EQUAL_SIGNED]: + cast = "(int%d_t)" % expr.args[0].size + else: + cast = "(uint%d_t)" % expr.args[0].size + out = '((%s%s %s %s%s)?1:0)' % ( + cast, + arg0, + op, + cast, + arg1 + ) + else: + op = TOK_CMP_TO_BIGNUM_C[expr.op] + out = "bignum_is_%s(%s, %s)" % ( + op, + arg0, + arg1 + ) + out = "bignum_mask(%s, %d)"% (out, expr.size) + return out + + + else: + raise NotImplementedError('Unknown op: %r' % expr.op) + + elif len(expr.args) >= 3 and expr.is_associative(): # ????? + oper = ['(%s&%s)' % ( + self.from_expr(arg), + self._size2mask(arg.size), + ) + for arg in expr.args] + oper = str(expr.op).join(oper) + return "((%s)&%s)" % ( + oper, + self._size2mask(expr.args[0].size) + ) + else: + raise NotImplementedError('Unknown op: %s' % expr.op) + + def from_ExprSlice(self, expr): + out = self.from_expr(expr.arg) + if expr.arg.size <= self.NATIVE_INT_MAX_SIZE: + # XXX check mask for 64 bit & 32 bit compat + out = "((%s>>%d) &%s)" % ( + out, expr.start, + self._size2mask(expr.stop - expr.start) + ) + else: + out = "bignum_rshift(%s, %d)" % (out, expr.start) + out = "bignum_mask(%s, %d)" % (out, expr.stop - expr.start) + + if expr.size <= self.NATIVE_INT_MAX_SIZE: + # Convert bignum to int + out = "bignum_to_uint64(%s)" % out + return out + + def from_ExprCompose(self, expr): + if expr.size <= self.NATIVE_INT_MAX_SIZE: + + out = [] + # XXX check mask for 64 bit & 32 bit compat + if expr.size in [8, 16, 32, 64, 128]: + size = expr.size + else: + # Uncommon expression size, use at least uint8 + size = max(expr.size, 8) + next_power = 1 + while next_power <= size: + next_power <<= 1 + size = next_power + + dst_cast = "uint%d_t" % size + for index, arg in expr.iter_args(): + out.append("(((%s)(%s & %s)) << %d)" % ( + dst_cast, + self.from_expr(arg), + self._size2mask(arg.size), + index) + ) + out = ' | '.join(out) + return '(' + out + ')' + else: + # Convert all parts to bignum + args = [] + for index, arg in expr.iter_args(): + arg_str = self.from_expr(arg) + if arg.size <= self.NATIVE_INT_MAX_SIZE: + arg_str = '((%s) & %s)' % (arg_str, self._size2mask(arg.size)) + arg_str = 'bignum_from_uint64(%s)' % arg_str + else: + arg_str = 'bignum_mask(%s, %d)' % (arg_str, arg.size) + arg_str = 'bignum_lshift(%s, %d)' % (arg_str, index) + args.append(arg_str) + out = args.pop() + while args: + arg = args.pop() + out = "bignum_or(%s, %s)" % (out, arg) + return out + + +# Register the class +Translator.register(TranslatorC) diff --git a/miasm/ir/translators/__init__.py b/miasm/ir/translators/__init__.py new file mode 100644 index 00000000..45e19803 --- /dev/null +++ b/miasm/ir/translators/__init__.py @@ -0,0 +1,13 @@ +"""IR Translators""" +from miasm.ir.translators.translator import Translator +import miasm.ir.translators.C +import miasm.ir.translators.python +import miasm.ir.translators.miasm_ir +import miasm.ir.translators.smt2 +try: + import miasm.ir.translators.z3_ir +except ImportError: + # Nothing to do, z3 not available + pass + +__all__ = ["Translator"] diff --git a/miasm/ir/translators/miasm_ir.py b/miasm/ir/translators/miasm_ir.py new file mode 100644 index 00000000..a460d446 --- /dev/null +++ b/miasm/ir/translators/miasm_ir.py @@ -0,0 +1,45 @@ +from builtins import map +from miasm.ir.translators.translator import Translator + + +class TranslatorMiasm(Translator): + "Translate a Miasm expression to its Python building form" + + __LANG__ = "Miasm" + + def from_ExprId(self, expr): + return "ExprId(%s, size=%d)" % (repr(expr.name), expr.size) + + def from_ExprInt(self, expr): + return "ExprInt(0x%x, %d)" % (int(expr), expr.size) + + def from_ExprCond(self, expr): + return "ExprCond(%s, %s, %s)" % (self.from_expr(expr.cond), + self.from_expr(expr.src1), + self.from_expr(expr.src2)) + + def from_ExprSlice(self, expr): + return "ExprSlice(%s, %d, %d)" % (self.from_expr(expr.arg), + expr.start, + expr.stop) + + def from_ExprOp(self, expr): + return "ExprOp(%s, %s)" % ( + repr(expr.op), + ", ".join(map(self.from_expr, expr.args)) + ) + + def from_ExprCompose(self, expr): + args = ["%s" % self.from_expr(arg) for arg in expr.args] + return "ExprCompose(%s)" % ", ".join(args) + + def from_ExprAssign(self, expr): + return "ExprAssign(%s, %s)" % (self.from_expr(expr.dst), + self.from_expr(expr.src)) + + def from_ExprMem(self, expr): + return "ExprMem(%s, size=%d)" % (self.from_expr(expr.ptr), expr.size) + + +# Register the class +Translator.register(TranslatorMiasm) diff --git a/miasm/ir/translators/python.py b/miasm/ir/translators/python.py new file mode 100644 index 00000000..0da2318d --- /dev/null +++ b/miasm/ir/translators/python.py @@ -0,0 +1,98 @@ +from builtins import map +from miasm.expression.expression import ExprInt +from miasm.ir.translators.translator import Translator + + +class TranslatorPython(Translator): + """Translate a Miasm expression to an equivalent Python code + + Memory is abstracted using the unimplemented function: + int memory(int address, int size) + """ + + # Implemented language + __LANG__ = "Python" + # Operations translation + op_no_translate = ["+", "-", "/", "%", ">>", "<<", "&", "^", "|", "*"] + + def from_ExprInt(self, expr): + return str(expr) + + def from_ExprId(self, expr): + return str(expr) + + def from_ExprLoc(self, expr): + return str(expr) + + def from_ExprMem(self, expr): + return "memory(%s, 0x%x)" % ( + self.from_expr(expr.ptr), + expr.size // 8 + ) + + def from_ExprSlice(self, expr): + out = self.from_expr(expr.arg) + if expr.start != 0: + out = "(%s >> %d)" % (out, expr.start) + return "(%s & 0x%x)" % (out, (1 << (expr.stop - expr.start)) - 1) + + def from_ExprCompose(self, expr): + out = [] + for index, arg in expr.iter_args(): + out.append( + "((%s & 0x%x) << %d)" % ( + self.from_expr(arg), + (1 << arg.size) - 1, + index + ) + ) + return "(%s)" % ' | '.join(out) + + def from_ExprCond(self, expr): + return "(%s if (%s) else %s)" % ( + self.from_expr(expr.src1), + self.from_expr(expr.cond), + self.from_expr(expr.src2) + ) + + def from_ExprOp(self, expr): + if expr.op in self.op_no_translate: + args = list(map(self.from_expr, expr.args)) + if len(expr.args) == 1: + return "((%s %s) & 0x%x)" % ( + expr.op, + args[0], + (1 << expr.size) - 1 + ) + else: + return "((%s) & 0x%x)" % ( + (" %s " % expr.op).join(args), + (1 << expr.size) - 1 + ) + elif expr.op == "parity": + return "(%s & 0x1)" % self.from_expr(expr.args[0]) + + elif expr.op in ["<<<", ">>>"]: + amount_raw = expr.args[1] + amount = expr.args[1] % ExprInt(amount_raw.size, expr.size) + amount_inv = ExprInt(expr.size, expr.size) - amount + if expr.op == "<<<": + amount, amount_inv = amount_inv, amount + part1 = "(%s >> %s)"% (self.from_expr(expr.args[0]), + self.from_expr(amount)) + part2 = "(%s << %s)"% (self.from_expr(expr.args[0]), + self.from_expr(amount_inv)) + + return "((%s | %s) &0x%x)" % (part1, part2, int(expr.mask)) + + raise NotImplementedError("Unknown operator: %s" % expr.op) + + def from_ExprAssign(self, expr): + return "%s = %s" % ( + self.from_expr(expr.dst), + self.from_expr(expr.src) + ) + + +# Register the class +Translator.register(TranslatorPython) diff --git a/miasm/ir/translators/smt2.py b/miasm/ir/translators/smt2.py new file mode 100644 index 00000000..61a4962f --- /dev/null +++ b/miasm/ir/translators/smt2.py @@ -0,0 +1,326 @@ +from builtins import map +from builtins import range +import logging + +from miasm.ir.translators.translator import Translator +from miasm.expression.smt2_helper import * + +log = logging.getLogger("translator_smt2") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.WARNING) + +class SMT2Mem(object): + """ + Memory abstraction for TranslatorSMT2. Memory elements are only accessed, + never written. To give a concrete value for a given memory cell in a solver, + add "mem32.get(address, size) == " constraints to your equation. + The endianness of memory accesses is handled accordingly to the "endianness" + attribute. + Note: Will have one memory space for each addressing size used. + For example, if memory is accessed via 32 bits values and 16 bits values, + these access will not occur in the same address space. + + Adapted from Z3Mem + """ + + def __init__(self, endianness="<", name="mem"): + """Initializes an SMT2Mem object with a given @name and @endianness. + @endianness: Endianness of memory representation. '<' for little endian, + '>' for big endian. + @name: name of memory Arrays generated. They will be named + name+str(address size) (for example mem32, mem16...). + """ + if endianness not in ['<', '>']: + raise ValueError("Endianness should be '>' (big) or '<' (little)") + self.endianness = endianness + self.mems = {} # Address size -> SMT2 memory array + self.name = name + # initialise address size + self.addr_size = 0 + + def get_mem_array(self, size): + """Returns an SMT Array used internally to represent memory for addresses + of size @size. + @size: integer, size in bit of addresses in the memory to get. + Return an string with the name of the SMT array.. + """ + try: + mem = self.mems[size] + except KeyError: + # Lazy instantiation + self.mems[size] = self.name + str(size) + mem = self.mems[size] + return mem + + def __getitem__(self, addr): + """One byte memory access. Different address sizes with the same value + will result in different memory accesses. + @addr: an SMT2 expression, the address to read. + Return an SMT2 expression of size 8 bits representing a memory access. + """ + size = self.addr_size + mem = self.get_mem_array(size) + return array_select(mem, addr) + + def get(self, addr, size, addr_size): + """ Memory access at address @addr of size @size with + address size @addr_size. + @addr: an SMT2 expression, the address to read. + @size: int, size of the read in bits. + @addr_size: int, size of the address + Return a SMT2 expression representing a memory access. + """ + # set address size per read access + self.addr_size = addr_size + + original_size = size + if original_size % 8 != 0: + # Size not aligned on 8bits -> read more than size and extract after + size = ((original_size // 8) + 1) * 8 + res = self[addr] + if self.is_little_endian(): + for i in range(1, size // 8): + index = bvadd(addr, bit_vec_val(i, addr_size)) + res = bv_concat(self[index], res) + else: + for i in range(1, size // 8): + res = bv_concat(res, self[index]) + if size == original_size: + return res + else: + # Size not aligned, extract right sized result + return bv_extract(original_size-1, 0, res) + + def is_little_endian(self): + """True if this memory is little endian.""" + return self.endianness == "<" + + def is_big_endian(self): + """True if this memory is big endian.""" + return not self.is_little_endian() + + +class TranslatorSMT2(Translator): + """Translate a Miasm expression into an equivalent SMT2 + expression. Memory is abstracted via SMT2Mem. + The result of from_expr will be an SMT2 expression. + + If you want to interact with the memory abstraction after the translation, + you can instantiate your own SMT2Mem that will be equivalent to the one + used by TranslatorSMT2. + + TranslatorSMT2 provides the creation of a valid SMT2 file. For this, + it keeps track of the translated bit vectors. + + Adapted from TranslatorZ3 + """ + + # Implemented language + __LANG__ = "smt2" + + def __init__(self, endianness="<", loc_db=None, **kwargs): + """Instance a SMT2 translator + @endianness: (optional) memory endianness + """ + super(TranslatorSMT2, self).__init__(**kwargs) + # memory abstraction + self._mem = SMT2Mem(endianness) + # map of translated bit vectors + self._bitvectors = dict() + # symbol pool + self.loc_db = loc_db + + def from_ExprInt(self, expr): + return bit_vec_val(expr.arg.arg, expr.size) + + def from_ExprId(self, expr): + if str(expr) not in self._bitvectors: + self._bitvectors[str(expr)] = expr.size + return str(expr) + + def from_ExprLoc(self, expr): + loc_key = expr.loc_key + if self.loc_db is None or self.loc_db.get_location_offset(loc_key) is None: + if str(loc_key) not in self._bitvectors: + self._bitvectors[str(loc_key)] = expr.size + return str(loc_key) + + offset = self.loc_db.get_location_offset(loc_key) + return bit_vec_val(str(offset), expr.size) + + def from_ExprMem(self, expr): + addr = self.from_expr(expr.ptr) + # size to read from memory + size = expr.size + # size of memory address + addr_size = expr.ptr.size + return self._mem.get(addr, size, addr_size) + + def from_ExprSlice(self, expr): + res = self.from_expr(expr.arg) + res = bv_extract(expr.stop-1, expr.start, res) + return res + + def from_ExprCompose(self, expr): + res = None + for arg in expr.args: + e = bv_extract(arg.size-1, 0, self.from_expr(arg)) + if res: + res = bv_concat(e, res) + else: + res = e + return res + + def from_ExprCond(self, expr): + cond = self.from_expr(expr.cond) + src1 = self.from_expr(expr.src1) + src2 = self.from_expr(expr.src2) + + # (and (distinct cond (_ bv0 )) true) + zero = bit_vec_val(0, expr.cond.size) + distinct = smt2_distinct(cond, zero) + distinct_and = smt2_and(distinct, "true") + + # (ite ((and (distinct cond (_ bv0 )) true) src1 src2)) + return smt2_ite(distinct_and, src1, src2) + + def from_ExprOp(self, expr): + args = list(map(self.from_expr, expr.args)) + res = args[0] + + if len(args) > 1: + for arg in args[1:]: + if expr.op == "+": + res = bvadd(res, arg) + elif expr.op == "-": + res = bvsub(res, arg) + elif expr.op == "*": + res = bvmul(res, arg) + elif expr.op == "/": + res = bvsdiv(res, arg) + elif expr.op == "sdiv": + res = bvsdiv(res, arg) + elif expr.op == "udiv": + res = bvudiv(res, arg) + elif expr.op == "%": + res = bvsmod(res, arg) + elif expr.op == "smod": + res = bvsmod(res, arg) + elif expr.op == "umod": + res = bvurem(res, arg) + elif expr.op == "&": + res = bvand(res, arg) + elif expr.op == "^": + res = bvxor(res, arg) + elif expr.op == "|": + res = bvor(res, arg) + elif expr.op == "<<": + res = bvshl(res, arg) + elif expr.op == ">>": + res = bvlshr(res, arg) + elif expr.op == "a>>": + res = bvashr(res, arg) + elif expr.op == "<<<": + res = bv_rotate_left(res, arg, expr.size) + elif expr.op == ">>>": + res = bv_rotate_right(res, arg, expr.size) + else: + raise NotImplementedError("Unsupported OP yet: %s" % expr.op) + elif expr.op == 'parity': + arg = bv_extract(7, 0, res) + res = bit_vec_val(1, 1) + for i in range(8): + res = bvxor(res, bv_extract(i, i, arg)) + elif expr.op == '-': + res = bvneg(res) + elif expr.op == "cnttrailzeros": + src = res + size = expr.size + size_smt2 = bit_vec_val(size, size) + one_smt2 = bit_vec_val(1, size) + zero_smt2 = bit_vec_val(0, size) + # src & (1 << (size - 1)) + op = bvand(src, bvshl(one_smt2, bvsub(size_smt2, one_smt2))) + # op != 0 + cond = smt2_distinct(op, zero_smt2) + # ite(cond, size - 1, src) + res = smt2_ite(cond, bvsub(size_smt2, one_smt2), src) + for i in range(size - 2, -1, -1): + # smt2 expression of i + i_smt2 = bit_vec_val(i, size) + # src & (1 << i) + op = bvand(src, bvshl(one_smt2, i_smt2)) + # op != 0 + cond = smt2_distinct(op, zero_smt2) + # ite(cond, i, res) + res = smt2_ite(cond, i_smt2, res) + elif expr.op == "cntleadzeros": + src = res + size = expr.size + one_smt2 = bit_vec_val(1, size) + zero_smt2 = bit_vec_val(0, size) + # (src & 1) != 0 + cond = smt2_distinct(bvand(src, one_smt2), zero_smt2) + # ite(cond, 0, src) + res= smt2_ite(cond, zero_smt2, src) + for i in range(size - 1, 0, -1): + index = - i % size + index_smt2 = bit_vec_val(index, size) + # src & (1 << index) + op = bvand(src, bvshl(one_smt2, index_smt2)) + # op != 0 + cond = smt2_distinct(op, zero_smt2) + # ite(cond, index, res) + value_smt2 = bit_vec_val(size - (index + 1), size) + res = smt2_ite(cond, value_smt2, res) + else: + raise NotImplementedError("Unsupported OP yet: %s" % expr.op) + + return res + + def from_ExprAssign(self, expr): + src = self.from_expr(expr.src) + dst = self.from_expr(expr.dst) + return smt2_assert(smt2_eq(src, dst)) + + def to_smt2(self, exprs, logic="QF_ABV", model=False): + """ + Converts a valid SMT2 file for a given list of + SMT2 expressions. + + :param exprs: list of SMT2 expressions + :param logic: SMT2 logic + :param model: model generation flag + :return: String of the SMT2 file + """ + ret = "" + ret += "(set-logic {})\n".format(logic) + + # define bit vectors + for bv in self._bitvectors: + size = self._bitvectors[bv] + ret += "{}\n".format(declare_bv(bv, size)) + + # define memory arrays + for size in self._mem.mems: + mem = self._mem.mems[size] + ret += "{}\n".format(declare_array(mem, bit_vec(size), bit_vec(8))) + + # merge SMT2 expressions + for expr in exprs: + ret += expr + "\n" + + # define action + ret += "(check-sat)\n" + + # enable model generation + if model: + ret += "(get-model)\n" + + return ret + + +# Register the class +Translator.register(TranslatorSMT2) diff --git a/miasm/ir/translators/translator.py b/miasm/ir/translators/translator.py new file mode 100644 index 00000000..c9368f09 --- /dev/null +++ b/miasm/ir/translators/translator.py @@ -0,0 +1,127 @@ +from future.utils import viewitems + +import miasm.expression.expression as m2_expr +from miasm.core.utils import BoundedDict + + +class Translator(object): + "Abstract parent class for translators." + + # Registered translators + available_translators = [] + # Implemented language + __LANG__ = "" + + @classmethod + def register(cls, translator): + """Register a translator + @translator: Translator sub-class + """ + cls.available_translators.append(translator) + + @classmethod + def to_language(cls, target_lang, *args, **kwargs): + """Return the corresponding translator instance + @target_lang: str (case insensitive) wanted language + Raise a NotImplementedError in case of unmatched language + """ + target_lang = target_lang.lower() + for translator in cls.available_translators: + if translator.__LANG__.lower() == target_lang: + return translator(*args, **kwargs) + + raise NotImplementedError("Unknown target language: %s" % target_lang) + + @classmethod + def available_languages(cls): + "Return the list of registered languages" + return [translator.__LANG__ for translator in cls.available_translators] + + def __init__(self, cache_size=1000): + """Instance a translator + @cache_size: (optional) Expr cache size + """ + self._cache = BoundedDict(cache_size) + + def from_ExprInt(self, expr): + """Translate an ExprInt + @expr: ExprInt to translate + """ + raise NotImplementedError("Abstract method") + + def from_ExprId(self, expr): + """Translate an ExprId + @expr: ExprId to translate + """ + raise NotImplementedError("Abstract method") + + def from_ExprLoc(self, expr): + """Translate an ExprLoc + @expr: ExprLoc to translate + """ + raise NotImplementedError("Abstract method") + + def from_ExprCompose(self, expr): + """Translate an ExprCompose + @expr: ExprCompose to translate + """ + raise NotImplementedError("Abstract method") + + def from_ExprSlice(self, expr): + """Translate an ExprSlice + @expr: ExprSlice to translate + """ + raise NotImplementedError("Abstract method") + + def from_ExprOp(self, expr): + """Translate an ExprOp + @expr: ExprOp to translate + """ + raise NotImplementedError("Abstract method") + + def from_ExprMem(self, expr): + """Translate an ExprMem + @expr: ExprMem to translate + """ + raise NotImplementedError("Abstract method") + + def from_ExprAssign(self, expr): + """Translate an ExprAssign + @expr: ExprAssign to translate + """ + raise NotImplementedError("Abstract method") + + def from_ExprCond(self, expr): + """Translate an ExprCond + @expr: ExprCond to translate + """ + raise NotImplementedError("Abstract method") + + def from_expr(self, expr): + """Translate an expression according to its type + @expr: expression to translate + """ + # Use cache + if expr in self._cache: + return self._cache[expr] + + # Handle Expr type + handlers = { + m2_expr.ExprInt: self.from_ExprInt, + m2_expr.ExprId: self.from_ExprId, + m2_expr.ExprLoc: self.from_ExprLoc, + m2_expr.ExprCompose: self.from_ExprCompose, + m2_expr.ExprSlice: self.from_ExprSlice, + m2_expr.ExprOp: self.from_ExprOp, + m2_expr.ExprMem: self.from_ExprMem, + m2_expr.ExprAssign: self.from_ExprAssign, + m2_expr.ExprCond: self.from_ExprCond + } + for target, handler in viewitems(handlers): + if isinstance(expr, target): + ## Compute value and update the internal cache + ret = handler(expr) + self._cache[expr] = ret + return ret + raise ValueError("Unhandled type for %s" % expr) + diff --git a/miasm/ir/translators/z3_ir.py b/miasm/ir/translators/z3_ir.py new file mode 100644 index 00000000..7dc77cfc --- /dev/null +++ b/miasm/ir/translators/z3_ir.py @@ -0,0 +1,281 @@ +from builtins import map +from builtins import range +import imp +import logging + +# Raise an ImportError if z3 is not available WITHOUT actually importing it +imp.find_module("z3") + +from miasm.ir.translators.translator import Translator + +log = logging.getLogger("translator_z3") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.WARNING) + +class Z3Mem(object): + """Memory abstration for TranslatorZ3. Memory elements are only accessed, + never written. To give a concrete value for a given memory cell in a solver, + add "mem32.get(address, size) == " constraints to your equation. + The endianness of memory accesses is handled accordingly to the "endianness" + attribute. + + Note: Will have one memory space for each addressing size used. + For example, if memory is accessed via 32 bits values and 16 bits values, + these access will not occur in the same address space. + """ + + def __init__(self, endianness="<", name="mem"): + """Initializes a Z3Mem object with a given @name and @endianness. + @endianness: Endianness of memory representation. '<' for little endian, + '>' for big endian. + @name: name of memory Arrays generated. They will be named + name+str(address size) (for example mem32, mem16...). + """ + # Import z3 only on demand + global z3 + import z3 + + if endianness not in ['<', '>']: + raise ValueError("Endianness should be '>' (big) or '<' (little)") + self.endianness = endianness + self.mems = {} # Address size -> memory z3.Array + self.name = name + + def get_mem_array(self, size): + """Returns a z3 Array used internally to represent memory for addresses + of size @size. + @size: integer, size in bit of addresses in the memory to get. + Return a z3 Array: BitVecSort(size) -> BitVecSort(8). + """ + try: + mem = self.mems[size] + except KeyError: + # Lazy instantiation + self.mems[size] = z3.Array(self.name + str(size), + z3.BitVecSort(size), + z3.BitVecSort(8)) + mem = self.mems[size] + return mem + + def __getitem__(self, addr): + """One byte memory access. Different address sizes with the same value + will result in different memory accesses. + @addr: a z3 BitVec, the address to read. + Return a z3 BitVec of size 8 bits representing a memory access. + """ + size = addr.size() + mem = self.get_mem_array(size) + return mem[addr] + + def get(self, addr, size): + """ Memory access at address @addr of size @size. + @addr: a z3 BitVec, the address to read. + @size: int, size of the read in bits. + Return a z3 BitVec of size @size representing a memory access. + """ + original_size = size + if original_size % 8 != 0: + # Size not aligned on 8bits -> read more than size and extract after + size = ((original_size // 8) + 1) * 8 + res = self[addr] + if self.is_little_endian(): + for i in range(1, size // 8): + res = z3.Concat(self[addr+i], res) + else: + for i in range(1, size //8): + res = z3.Concat(res, self[addr+i]) + if size == original_size: + return res + else: + # Size not aligned, extract right sized result + return z3.Extract(original_size-1, 0, res) + + def is_little_endian(self): + """True if this memory is little endian.""" + return self.endianness == "<" + + def is_big_endian(self): + """True if this memory is big endian.""" + return not self.is_little_endian() + + +class TranslatorZ3(Translator): + """Translate a Miasm expression to an equivalent z3 python binding + expression. Memory is abstracted via z3.Array (see Z3Mem). + The result of from_expr will be a z3 Expr. + + If you want to interact with the memory abstraction after the translation, + you can instantiate your own Z3Mem, that will be equivalent to the one + used by TranslatorZ3. + """ + + # Implemented language + __LANG__ = "z3" + # Operations translation + trivial_ops = ["+", "-", "/", "%", "&", "^", "|", "*", "<<"] + + def __init__(self, endianness="<", loc_db=None, **kwargs): + """Instance a Z3 translator + @endianness: (optional) memory endianness + """ + # Import z3 only on demand + global z3 + import z3 + + super(TranslatorZ3, self).__init__(**kwargs) + self._mem = Z3Mem(endianness) + self.loc_db = loc_db + + def from_ExprInt(self, expr): + return z3.BitVecVal(expr.arg.arg, expr.size) + + def from_ExprId(self, expr): + return z3.BitVec(str(expr), expr.size) + + def from_ExprLoc(self, expr): + if self.loc_db is None: + # No loc_db, fallback to default name + return z3.BitVec(str(expr), expr.size) + loc_key = expr.loc_key + offset = self.loc_db.get_location_offset(loc_key) + if offset is not None: + return z3.BitVecVal(offset, expr.size) + # fallback to default name + return z3.BitVec(str(loc_key), expr.size) + + def from_ExprMem(self, expr): + addr = self.from_expr(expr.ptr) + return self._mem.get(addr, expr.size) + + def from_ExprSlice(self, expr): + res = self.from_expr(expr.arg) + res = z3.Extract(expr.stop-1, expr.start, res) + return res + + def from_ExprCompose(self, expr): + res = None + for arg in expr.args: + e = z3.Extract(arg.size-1, 0, self.from_expr(arg)) + if res != None: + res = z3.Concat(e, res) + else: + res = e + return res + + def from_ExprCond(self, expr): + cond = self.from_expr(expr.cond) + src1 = self.from_expr(expr.src1) + src2 = self.from_expr(expr.src2) + return z3.If(cond != 0, src1, src2) + + def _abs(self, z3_value): + return z3.If(z3_value >= 0,z3_value,-z3_value) + + def _sdivC(self, num, den): + """Divide (signed) @num by @den (z3 values) as C would + See modint.__div__ for implementation choice + """ + result_sign = z3.If(num * den >= 0, + z3.BitVecVal(1, num.size()), + z3.BitVecVal(-1, num.size()), + ) + return z3.UDiv(self._abs(num), self._abs(den)) * result_sign + + def from_ExprOp(self, expr): + args = list(map(self.from_expr, expr.args)) + res = args[0] + + if len(args) > 1: + for arg in args[1:]: + if expr.op in self.trivial_ops: + res = eval("res %s arg" % expr.op) + elif expr.op == ">>": + res = z3.LShR(res, arg) + elif expr.op == "a>>": + res = res >> arg + elif expr.op == "<<<": + res = z3.RotateLeft(res, arg) + elif expr.op == ">>>": + res = z3.RotateRight(res, arg) + elif expr.op == "sdiv": + res = self._sdivC(res, arg) + elif expr.op == "udiv": + res = z3.UDiv(res, arg) + elif expr.op == "smod": + res = res - (arg * (self._sdivC(res, arg))) + elif expr.op == "umod": + res = z3.URem(res, arg) + elif expr.op == "==": + res = z3.If( + args[0] == args[1], + z3.BitVecVal(1, 1), + z3.BitVecVal(0, 1) + ) + elif expr.op == " +#include "structmember.h" +#include +#include +#include "compat_py23.h" +#include "queue.h" +#include "vm_mngr.h" +#include "vm_mngr_py.h" +#include "bn.h" +#include "JitCore.h" + + +void JitCpu_dealloc(JitCpu* self) +{ + Py_TYPE(self)->tp_free((PyObject*)self); +} + + +PyObject * JitCpu_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + JitCpu *self; + + self = (JitCpu *)type->tp_alloc(type, 0); + return (PyObject *)self; +} + +PyObject * JitCpu_get_vmmngr(JitCpu *self, void *closure) +{ + if (self->pyvm) { + Py_INCREF(self->pyvm); + return (PyObject*)self->pyvm; + } + Py_INCREF(Py_None); + return Py_None; +} + +PyObject * JitCpu_set_vmmngr(JitCpu *self, PyObject *value, void *closure) +{ + self->pyvm = (VmMngr*)value; + return 0; +} + +PyObject * JitCpu_get_jitter(JitCpu *self, void *closure) +{ + if (self->jitter) { + Py_INCREF(self->jitter); + return self->jitter; + } + Py_INCREF(Py_None); + return Py_None; +} + +PyObject * JitCpu_set_jitter(JitCpu *self, PyObject *value, void *closure) +{ + self->jitter = value; + return 0; +} + +uint8_t MEM_LOOKUP_08(JitCpu* jitcpu, uint64_t addr) +{ + return vm_MEM_LOOKUP_08(&(jitcpu->pyvm->vm_mngr), addr); +} + +uint16_t MEM_LOOKUP_16(JitCpu* jitcpu, uint64_t addr) +{ + return vm_MEM_LOOKUP_16(&(jitcpu->pyvm->vm_mngr), addr); +} + +uint32_t MEM_LOOKUP_32(JitCpu* jitcpu, uint64_t addr) +{ + return vm_MEM_LOOKUP_32(&(jitcpu->pyvm->vm_mngr), addr); +} + +uint64_t MEM_LOOKUP_64(JitCpu* jitcpu, uint64_t addr) +{ + return vm_MEM_LOOKUP_64(&(jitcpu->pyvm->vm_mngr), addr); +} + +bn_t MEM_LOOKUP_BN_BN(JitCpu* jitcpu, int size, bn_t addr) +{ + uint64_t ptr; + int i; + uint8_t tmp; + bn_t val = bignum_from_int(0); + + ptr = bignum_to_uint64(addr); + + + for (i=0; i < size; i += 8) { + tmp = vm_MEM_LOOKUP_08(&((VmMngr*)jitcpu->pyvm)->vm_mngr, ptr); + ptr += 1; + val = bignum_or(val, bignum_lshift(bignum_from_int(tmp), i)); + } + + return val; +} + + +uint64_t MEM_LOOKUP_BN_INT(JitCpu* jitcpu, int size, bn_t addr) +{ + uint64_t ptr; + uint64_t val = 0; + + ptr = bignum_to_uint64(addr); + + switch (size) { + case 8: + val = vm_MEM_LOOKUP_08(&(jitcpu->pyvm->vm_mngr), ptr); + break; + case 16: + val = vm_MEM_LOOKUP_16(&(jitcpu->pyvm->vm_mngr), ptr); + break; + case 32: + val = vm_MEM_LOOKUP_32(&(jitcpu->pyvm->vm_mngr), ptr); + break; + case 64: + val = vm_MEM_LOOKUP_64(&(jitcpu->pyvm->vm_mngr), ptr); + break; + default: + fprintf(stderr, "Error: bad READ size %d\n", size); + exit(-1); + break; + } + + return val; +} + + + +bn_t MEM_LOOKUP_INT_BN(JitCpu* jitcpu, int size, uint64_t addr) +{ + int i; + uint8_t tmp; + bn_t val = bignum_from_int(0); + + for (i=0; i < size; i += 8) { + tmp = vm_MEM_LOOKUP_08(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr); + addr += 1; + val = bignum_or(val, bignum_lshift(bignum_from_int(tmp), i)); + } + + return val; +} + + +void MEM_LOOKUP_INT_BN_TO_PTR(JitCpu* jitcpu, int size, uint64_t addr, char* ptr) +{ + bn_t ret; + + if (size % 8) { + fprintf(stderr, "Bad size %d\n", size); + exit(-1); + } + + ret = MEM_LOOKUP_INT_BN(jitcpu, size, addr); + memcpy(ptr, (char*)&ret, size / 8); +} + + +void MEM_WRITE_BN_BN(JitCpu* jitcpu, int size, bn_t addr, bn_t src) +{ + uint64_t ptr; + int val; + int i; + + ptr = bignum_to_uint64(addr); + for (i=0; i < size; i += 8) { + val = bignum_to_uint64(src) & 0xFF; + vm_MEM_WRITE_08(&((VmMngr*)jitcpu->pyvm)->vm_mngr, ptr, val); + ptr += 1; + src = bignum_rshift(src, 8); + } +} + + +void MEM_WRITE_BN_INT(JitCpu* jitcpu, int size, bn_t addr, uint64_t src) +{ + uint64_t ptr; + ptr = bignum_to_uint64(addr); + + switch (size) { + case 8: + vm_MEM_WRITE_08(&((VmMngr*)jitcpu->pyvm)->vm_mngr, ptr, (unsigned char)src); + break; + case 16: + vm_MEM_WRITE_16(&((VmMngr*)jitcpu->pyvm)->vm_mngr, ptr, (unsigned short)src); + break; + case 32: + vm_MEM_WRITE_32(&((VmMngr*)jitcpu->pyvm)->vm_mngr, ptr, (unsigned int)src); + break; + case 64: + vm_MEM_WRITE_64(&((VmMngr*)jitcpu->pyvm)->vm_mngr, ptr, src); + break; + default: + fprintf(stderr, "Error: bad write size %d\n", size); + exit(-1); + break; + } +} + +void MEM_WRITE_INT_BN(JitCpu* jitcpu, int size, uint64_t addr, bn_t src) +{ + int val; + int i; + + for (i=0; i < size; i += 8) { + val = bignum_to_uint64(src) & 0xFF; + vm_MEM_WRITE_08(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, val); + addr += 1; + src = bignum_rshift(src, 8); + } +} + + +void MEM_WRITE_INT_BN_FROM_PTR(JitCpu* jitcpu, int size, uint64_t addr, char* ptr) +{ + bn_t val; + + if (size % 8) { + fprintf(stderr, "Bad size %d\n", size); + exit(-1); + } + + val = bignum_from_int(0); + memcpy(&val, ptr, size / 8); + MEM_WRITE_INT_BN(jitcpu, size, addr, val); +} + + + +PyObject* vm_get_mem(JitCpu *self, PyObject* args) +{ + PyObject *py_addr; + PyObject *py_len; + + uint64_t addr; + uint64_t size; + PyObject *obj_out; + char * buf_out; + int ret; + + if (!PyArg_ParseTuple(args, "OO", &py_addr, &py_len)) + return NULL; + + PyGetInt(py_addr, addr); + PyGetInt(py_len, size); + + ret = vm_read_mem(&(((VmMngr*)self->pyvm)->vm_mngr), addr, &buf_out, size); + if (ret < 0) { + PyErr_SetString(PyExc_RuntimeError, "cannot find address"); + return NULL; + } + + obj_out = PyBytes_FromStringAndSize(buf_out, size); + free(buf_out); + return obj_out; +} diff --git a/miasm/jitter/JitCore.h b/miasm/jitter/JitCore.h new file mode 100644 index 00000000..15efc7d2 --- /dev/null +++ b/miasm/jitter/JitCore.h @@ -0,0 +1,306 @@ +#ifndef JITCORE_H +#define JITCORE_H + +#if _WIN32 +#define _MIASM_EXPORT __declspec(dllexport) +#else +#define _MIASM_EXPORT +#endif + +#define RAISE(errtype, msg) {PyObject* p; p = PyErr_Format( errtype, msg ); return p;} +#define RAISE_ret0(errtype, msg) {PyObject* p; p = PyErr_Format( errtype, msg ); return 0;} + + +#if PY_MAJOR_VERSION >= 3 +#define getset_reg_bn(regname, size) \ + static PyObject *JitCpu_get_ ## regname (JitCpu *self, void *closure) \ + { \ + bn_t bn; \ + int j; \ + PyObject* py_long; \ + PyObject* py_long_new; \ + PyObject* py_tmp; \ + PyObject* cst_32; \ + uint64_t tmp; \ + py_long = PyLong_FromLong(0); \ + cst_32 = PyLong_FromLong(32); \ + bn = ((vm_cpu_t*)(self->cpu))-> regname; \ + bn = bignum_mask(bn, (size)); \ + for (j = BN_BYTE_SIZE - 4; j >= 0 ; j -= 4) { \ + tmp = bignum_to_uint64(bignum_mask(bignum_rshift(bn, 8 * j), 32)); \ + py_tmp = PyLong_FromUnsignedLong(tmp); \ + py_long_new = PyObject_CallMethod(py_long, "__lshift__", "O", cst_32); \ + Py_DECREF(py_long); \ + py_long = PyObject_CallMethod(py_long_new, "__add__", "O", py_tmp); \ + Py_DECREF(py_long_new); \ + Py_DECREF(py_tmp); \ + } \ + Py_DECREF(cst_32); \ + return py_long; \ + } \ + \ + static int JitCpu_set_ ## regname (JitCpu *self, PyObject *value, void *closure) \ + { \ + bn_t bn; \ + int j; \ + PyObject* py_long = value; \ + PyObject* py_long_new; \ + PyObject* py_tmp; \ + PyObject* cst_32; \ + PyObject* cst_ffffffff; \ + uint64_t tmp; \ + if (PyLong_Check(py_long)){ \ + Py_INCREF(py_long); \ + } else { \ + RAISE(PyExc_TypeError,"arg must be int"); \ + } \ + \ + cst_ffffffff = PyLong_FromLong(0xffffffff); \ + cst_32 = PyLong_FromLong(32); \ + bn = bignum_from_int(0); \ + \ + for (j = 0; j < BN_BYTE_SIZE; j += 4) { \ + py_tmp = PyObject_CallMethod(py_long, "__and__", "O", cst_ffffffff); \ + py_long_new = PyObject_CallMethod(py_long, "__rshift__", "O", cst_32); \ + Py_DECREF(py_long); \ + py_long = py_long_new; \ + tmp = PyLong_AsUnsignedLongMask(py_tmp); \ + Py_DECREF(py_tmp); \ + bn = bignum_or(bn, bignum_lshift(bignum_from_uint64(tmp), 8 * j)); \ + } \ + \ + ((vm_cpu_t*)(self->cpu))-> regname = bignum_mask(bn, (size)); \ + Py_DECREF(py_long); \ + Py_DECREF(cst_32); \ + Py_DECREF(cst_ffffffff); \ + return 0; \ + } + + +#else +#define getset_reg_bn(regname, size) \ + static PyObject *JitCpu_get_ ## regname (JitCpu *self, void *closure) \ + { \ + bn_t bn; \ + int j; \ + PyObject* py_long; \ + PyObject* py_long_new; \ + PyObject* py_tmp; \ + PyObject* cst_32; \ + uint64_t tmp; \ + py_long = PyLong_FromLong(0); \ + cst_32 = PyLong_FromLong(32); \ + bn = ((vm_cpu_t*)(self->cpu))-> regname; \ + bn = bignum_mask(bn, (size)); \ + for (j = BN_BYTE_SIZE - 4; j >= 0 ; j -= 4) { \ + tmp = bignum_to_uint64(bignum_mask(bignum_rshift(bn, 8 * j), 32)); \ + py_tmp = PyLong_FromUnsignedLong(tmp); \ + py_long_new = PyObject_CallMethod(py_long, "__lshift__", "O", cst_32); \ + Py_DECREF(py_long); \ + py_long = PyObject_CallMethod(py_long_new, "__add__", "O", py_tmp); \ + Py_DECREF(py_long_new); \ + Py_DECREF(py_tmp); \ + } \ + Py_DECREF(cst_32); \ + return py_long; \ + } \ + \ + static int JitCpu_set_ ## regname (JitCpu *self, PyObject *value, void *closure) \ + { \ + bn_t bn; \ + int j; \ + PyObject* py_long = value; \ + PyObject* py_long_new; \ + PyObject* py_tmp; \ + PyObject* cst_32; \ + PyObject* cst_ffffffff; \ + uint64_t tmp; \ + \ + if (PyInt_Check(py_long)){ \ + tmp = (uint64_t)PyInt_AsLong(py_long); \ + py_long = PyLong_FromLong((long)tmp); \ + } else if (PyLong_Check(py_long)){ \ + Py_INCREF(py_long); \ + } \ + else{ \ + RAISE(PyExc_TypeError,"arg must be int"); \ + } \ + \ + cst_ffffffff = PyLong_FromLong(0xffffffff); \ + cst_32 = PyLong_FromLong(32); \ + bn = bignum_from_int(0); \ + \ + for (j = 0; j < BN_BYTE_SIZE; j += 4) { \ + py_tmp = PyObject_CallMethod(py_long, "__and__", "O", cst_ffffffff); \ + py_long_new = PyObject_CallMethod(py_long, "__rshift__", "O", cst_32); \ + Py_DECREF(py_long); \ + py_long = py_long_new; \ + tmp = PyLong_AsUnsignedLongMask(py_tmp); \ + Py_DECREF(py_tmp); \ + bn = bignum_or(bn, bignum_lshift(bignum_from_uint64(tmp), 8 * j)); \ + } \ + \ + ((vm_cpu_t*)(self->cpu))-> regname = bignum_mask(bn, (size)); \ + Py_DECREF(py_long); \ + Py_DECREF(cst_32); \ + Py_DECREF(cst_ffffffff); \ + return 0; \ + } +#endif + + + + + + + + + + + +#define getset_reg_u64(regname) \ + static PyObject *JitCpu_get_ ## regname (JitCpu *self, void *closure) \ + { \ + return PyLong_FromUnsignedLongLong((uint64_t)(((vm_cpu_t*)(self->cpu))-> regname )); \ + } \ + static int JitCpu_set_ ## regname (JitCpu *self, PyObject *value, void *closure) \ + { \ + uint64_t val; \ + PyGetInt_retneg(value, val); \ + ((vm_cpu_t*)(self->cpu))-> regname = val; \ + return 0; \ + } + +#define getset_reg_u32(regname) \ + static PyObject *JitCpu_get_ ## regname (JitCpu *self, void *closure) \ + { \ + return PyLong_FromUnsignedLongLong((uint32_t)(((vm_cpu_t*)(self->cpu))-> regname )); \ + } \ + static int JitCpu_set_ ## regname (JitCpu *self, PyObject *value, void *closure) \ + { \ + uint32_t val; \ + PyGetInt_retneg(value, val); \ + ((vm_cpu_t*)(self->cpu))-> regname = val; \ + return 0; \ + } + + +#define getset_reg_u16(regname) \ + static PyObject *JitCpu_get_ ## regname (JitCpu *self, void *closure) \ + { \ + return PyLong_FromUnsignedLongLong((uint16_t)(((vm_cpu_t*)(self->cpu))-> regname )); \ + } \ + static int JitCpu_set_ ## regname (JitCpu *self, PyObject *value, void *closure) \ + { \ + uint16_t val; \ + PyGetInt_retneg(value, val); \ + ((vm_cpu_t*)(self->cpu))-> regname = val; \ + return 0; \ + } + + +#define get_reg(reg) do { \ + o = PyLong_FromUnsignedLongLong((uint64_t)((vm_cpu_t*)(self->cpu))->reg); \ + PyDict_SetItemString(dict, #reg, o); \ + Py_DECREF(o); \ + } while(0); + + +#define get_reg_bn(reg, size) do { \ + bn_t bn; \ + int j; \ + PyObject* py_long; \ + PyObject* py_long_new; \ + PyObject* py_tmp; \ + PyObject* cst_32; \ + uint64_t tmp; \ + py_long = PyLong_FromLong(0); \ + cst_32 = PyLong_FromLong(32); \ + bn = ((vm_cpu_t*)(self->cpu))-> reg; \ + bn = bignum_mask(bn, size); \ + for (j = BN_BYTE_SIZE - 4; j >= 0 ; j -= 4) { \ + tmp = bignum_to_uint64(bignum_mask(bignum_rshift(bn, 8 * j), 32)); \ + py_tmp = PyLong_FromUnsignedLong(tmp); \ + py_long_new = PyObject_CallMethod(py_long, "__lshift__", "O", cst_32); \ + Py_DECREF(py_long); \ + py_long = PyObject_CallMethod(py_long_new, "__add__", "O", py_tmp); \ + Py_DECREF(py_long_new); \ + Py_DECREF(py_tmp); \ + } \ + PyDict_SetItemString(dict, #reg, py_long); \ + Py_DECREF(py_long); \ + Py_DECREF(cst_32); \ + } while(0); + + +#define get_reg_off(reg) do { \ + o = PyLong_FromUnsignedLongLong((uint64_t)offsetof(vm_cpu_t, reg)); \ + PyDict_SetItemString(dict, #reg, o); \ + Py_DECREF(o); \ + } while(0); + + + + +typedef struct { + uint8_t is_local; + uint64_t address; +} block_id; + +typedef struct { + PyObject_HEAD + VmMngr *pyvm; + PyObject *jitter; + void* cpu; +} JitCpu; + + +typedef struct _reg_dict{ + char* name; + size_t offset; + size_t size; +} reg_dict; + + + +void JitCpu_dealloc(JitCpu* self); +PyObject * JitCpu_new(PyTypeObject *type, PyObject *args, PyObject *kwds); +PyObject * JitCpu_get_vmmngr(JitCpu *self, void *closure); +PyObject * JitCpu_set_vmmngr(JitCpu *self, PyObject *value, void *closure); +PyObject * JitCpu_get_jitter(JitCpu *self, void *closure); +PyObject * JitCpu_set_jitter(JitCpu *self, PyObject *value, void *closure); +void Resolve_dst(block_id* BlockDst, uint64_t addr, uint64_t is_local); + +#define Resolve_dst(b, arg_addr, arg_is_local) do {(b)->address = (arg_addr); (b)->is_local = (arg_is_local);} while(0) + + + +_MIASM_EXPORT uint8_t MEM_LOOKUP_08(JitCpu* jitcpu, uint64_t addr); +_MIASM_EXPORT uint16_t MEM_LOOKUP_16(JitCpu* jitcpu, uint64_t addr); +_MIASM_EXPORT uint32_t MEM_LOOKUP_32(JitCpu* jitcpu, uint64_t addr); +_MIASM_EXPORT uint64_t MEM_LOOKUP_64(JitCpu* jitcpu, uint64_t addr); + +_MIASM_EXPORT bn_t MEM_LOOKUP_BN_BN(JitCpu* jitcpu, int size, bn_t addr); +_MIASM_EXPORT bn_t MEM_LOOKUP_INT_BN(JitCpu* jitcpu, int size, uint64_t addr); + +_MIASM_EXPORT uint64_t MEM_LOOKUP_BN_INT(JitCpu* jitcpu, int size, bn_t addr); + +_MIASM_EXPORT void MEM_WRITE_BN_BN(JitCpu* jitcpu, int size, bn_t addr, bn_t src); +_MIASM_EXPORT void MEM_WRITE_BN_INT(JitCpu* jitcpu, int size, bn_t addr, uint64_t src); +_MIASM_EXPORT void MEM_WRITE_INT_BN(JitCpu* jitcpu, int size, uint64_t addr, bn_t src); + + +PyObject* vm_get_mem(JitCpu *self, PyObject* args); + +_MIASM_EXPORT void MEM_LOOKUP_INT_BN_TO_PTR(JitCpu* jitcpu, int size, uint64_t addr, char* ptr); +_MIASM_EXPORT void MEM_WRITE_INT_BN_FROM_PTR(JitCpu* jitcpu, int size, uint64_t addr, char* ptr); + + + +#define VM_exception_flag (jitcpu->pyvm->vm_mngr.exception_flags) +#define CPU_exception_flag (((vm_cpu_t*)jitcpu->cpu)->exception_flags) +#define CPU_exception_flag_at_instr ((CPU_exception_flag) && ((CPU_exception_flag) > EXCEPT_NUM_UPDT_EIP)) +#define JIT_RET_EXCEPTION 1 +#define JIT_RET_NO_EXCEPTION 0 + +#endif diff --git a/miasm/jitter/Jitgcc.c b/miasm/jitter/Jitgcc.c new file mode 100644 index 00000000..0a39c998 --- /dev/null +++ b/miasm/jitter/Jitgcc.c @@ -0,0 +1,106 @@ +#include +#include +#include +#include "compat_py23.h" + +typedef struct { + uint8_t is_local; + uint64_t address; +} block_id; + +typedef int (*jitted_func)(block_id*, PyObject*); + + +PyObject* gcc_exec_block(PyObject* self, PyObject* args) +{ + jitted_func func; + PyObject* jitcpu; + PyObject* func_py; + PyObject* lbl2ptr; + PyObject* stop_offsets; + PyObject* retaddr = NULL; + int status; + block_id BlockDst; + uint64_t max_exec_per_call = 0; + uint64_t cpt; + int do_cpt; + + + if (!PyArg_ParseTuple(args, "OOOO|K", + &retaddr, &jitcpu, &lbl2ptr, &stop_offsets, + &max_exec_per_call)) + return NULL; + + /* The loop will decref retaddr always once */ + Py_INCREF(retaddr); + + if (max_exec_per_call == 0) { + do_cpt = 0; + cpt = 1; + } else { + do_cpt = 1; + cpt = max_exec_per_call; + } + + + + for (;;) { + if (cpt == 0) + return retaddr; + if (do_cpt) + cpt --; + // Init + BlockDst.is_local = 0; + BlockDst.address = 0; + + // Get the expected jitted function address + func_py = PyDict_GetItem(lbl2ptr, retaddr); + if (func_py) + func = (jitted_func) PyLong_AsVoidPtr((PyObject*) func_py); + else { + if (BlockDst.is_local == 1) { + fprintf(stderr, "return on local label!\n"); + exit(EXIT_FAILURE); + } + // retaddr is not jitted yet + return retaddr; + } + // Execute it + status = func(&BlockDst, jitcpu); + Py_DECREF(retaddr); + retaddr = PyLong_FromUnsignedLongLong(BlockDst.address); + + // Check exception + if (status) + return retaddr; + + // Check stop offsets + if (PySet_Contains(stop_offsets, retaddr)) + return retaddr; + } +} + + + +static PyObject *GccError; + + +static PyMethodDef GccMethods[] = { + {"gcc_exec_block", gcc_exec_block, METH_VARARGS, + "gcc exec block"}, + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + + + +MOD_INIT(Jitgcc) +{ + PyObject *module; + + MOD_DEF(module, "Jitgcc", "gcc module", GccMethods); + + if (module == NULL) + return NULL; + + return module; +} diff --git a/miasm/jitter/Jitllvm.c b/miasm/jitter/Jitllvm.c new file mode 100644 index 00000000..efe5250f --- /dev/null +++ b/miasm/jitter/Jitllvm.c @@ -0,0 +1,99 @@ +#include + +#include + +#include +#include "compat_py23.h" +#include "queue.h" +#include "vm_mngr.h" +#include "vm_mngr_py.h" +#include "bn.h" +#include "JitCore.h" +// Needed to get the JitCpu.cpu offset, arch independent +#include "arch/JitCore_x86.h" + +PyObject* llvm_exec_block(PyObject* self, PyObject* args) +{ + uint64_t (*func)(void*, void*, void*, uint8_t*); + vm_cpu_t* cpu; + vm_mngr_t* vm; + uint64_t ret; + JitCpu* jitcpu; + uint8_t status; + PyObject* func_py; + PyObject* lbl2ptr; + PyObject* stop_offsets; + PyObject* retaddr = NULL; + uint64_t max_exec_per_call = 0; + uint64_t cpt; + int do_cpt; + + if (!PyArg_ParseTuple(args, "OOOO|K", + &retaddr, &jitcpu, &lbl2ptr, &stop_offsets, + &max_exec_per_call)) + return NULL; + + cpu = jitcpu->cpu; + vm = &(jitcpu->pyvm->vm_mngr); + /* The loop will decref retaddr always once */ + Py_INCREF(retaddr); + + if (max_exec_per_call == 0) { + do_cpt = 0; + cpt = 1; + } else { + do_cpt = 1; + cpt = max_exec_per_call; + } + + for (;;) { + // Handle cpt + if (cpt == 0) + return retaddr; + if (do_cpt) + cpt --; + + // Get the expected jitted function address + func_py = PyDict_GetItem(lbl2ptr, retaddr); + if (func_py) + func = PyLong_AsVoidPtr((PyObject*) func_py); + else + // retaddr is not jitted yet + return retaddr; + + // Execute it + ret = func((void*) jitcpu, (void*)(intptr_t) cpu, (void*)(intptr_t) vm, &status); + Py_DECREF(retaddr); + retaddr = PyLong_FromUnsignedLongLong(ret); + + // Check exception + if (status) + return retaddr; + + // Check stop offsets + if (PySet_Contains(stop_offsets, retaddr)) + return retaddr; + } +} + + +static PyMethodDef LLVMMethods[] = { + {"llvm_exec_block", llvm_exec_block, METH_VARARGS, + "llvm exec block"}, + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + + + + +MOD_INIT(Jitllvm) +{ + PyObject *module; + + MOD_DEF(module, "Jitllvm", "llvm module", LLVMMethods); + + if (module == NULL) + return NULL; + + return module; +} diff --git a/miasm/jitter/__init__.py b/miasm/jitter/__init__.py new file mode 100644 index 00000000..460e327d --- /dev/null +++ b/miasm/jitter/__init__.py @@ -0,0 +1 @@ +"JustInTime compilation feature" diff --git a/miasm/jitter/arch/JitCore_aarch64.c b/miasm/jitter/arch/JitCore_aarch64.c new file mode 100644 index 00000000..9e1a870e --- /dev/null +++ b/miasm/jitter/arch/JitCore_aarch64.c @@ -0,0 +1,562 @@ +#include +#include "structmember.h" +#include +#include +#include "../compat_py23.h" +#include "../queue.h" +#include "../vm_mngr.h" +#include "../vm_mngr_py.h" +#include "../bn.h" +#include "../JitCore.h" +#include "../op_semantics.h" +#include "JitCore_aarch64.h" + + + +reg_dict gpreg_dict[] = { + {.name = "X0", .offset = offsetof(vm_cpu_t, X0), .size = 64}, + {.name = "X1", .offset = offsetof(vm_cpu_t, X1), .size = 64}, + {.name = "X2", .offset = offsetof(vm_cpu_t, X2), .size = 64}, + {.name = "X3", .offset = offsetof(vm_cpu_t, X3), .size = 64}, + {.name = "X4", .offset = offsetof(vm_cpu_t, X4), .size = 64}, + {.name = "X5", .offset = offsetof(vm_cpu_t, X5), .size = 64}, + {.name = "X6", .offset = offsetof(vm_cpu_t, X6), .size = 64}, + {.name = "X7", .offset = offsetof(vm_cpu_t, X7), .size = 64}, + {.name = "X8", .offset = offsetof(vm_cpu_t, X8), .size = 64}, + {.name = "X9", .offset = offsetof(vm_cpu_t, X9), .size = 64}, + {.name = "X10", .offset = offsetof(vm_cpu_t, X10), .size = 64}, + {.name = "X11", .offset = offsetof(vm_cpu_t, X11), .size = 64}, + {.name = "X12", .offset = offsetof(vm_cpu_t, X12), .size = 64}, + {.name = "X13", .offset = offsetof(vm_cpu_t, X13), .size = 64}, + {.name = "X14", .offset = offsetof(vm_cpu_t, X14), .size = 64}, + {.name = "X15", .offset = offsetof(vm_cpu_t, X15), .size = 64}, + {.name = "X16", .offset = offsetof(vm_cpu_t, X16), .size = 64}, + {.name = "X17", .offset = offsetof(vm_cpu_t, X17), .size = 64}, + {.name = "X18", .offset = offsetof(vm_cpu_t, X18), .size = 64}, + {.name = "X19", .offset = offsetof(vm_cpu_t, X19), .size = 64}, + {.name = "X20", .offset = offsetof(vm_cpu_t, X20), .size = 64}, + {.name = "X21", .offset = offsetof(vm_cpu_t, X21), .size = 64}, + {.name = "X22", .offset = offsetof(vm_cpu_t, X22), .size = 64}, + {.name = "X23", .offset = offsetof(vm_cpu_t, X23), .size = 64}, + {.name = "X24", .offset = offsetof(vm_cpu_t, X24), .size = 64}, + {.name = "X25", .offset = offsetof(vm_cpu_t, X25), .size = 64}, + {.name = "X26", .offset = offsetof(vm_cpu_t, X26), .size = 64}, + {.name = "X27", .offset = offsetof(vm_cpu_t, X27), .size = 64}, + {.name = "X28", .offset = offsetof(vm_cpu_t, X28), .size = 64}, + {.name = "X29", .offset = offsetof(vm_cpu_t, X29), .size = 64}, + {.name = "LR", .offset = offsetof(vm_cpu_t, LR), .size = 64}, + + {.name = "SP", .offset = offsetof(vm_cpu_t, SP), .size = 64}, + {.name = "PC", .offset = offsetof(vm_cpu_t, PC), .size = 64}, + + {.name = "zf", .offset = offsetof(vm_cpu_t, zf), .size = 8}, + {.name = "nf", .offset = offsetof(vm_cpu_t, nf), .size = 8}, + {.name = "of", .offset = offsetof(vm_cpu_t, of), .size = 8}, + {.name = "cf", .offset = offsetof(vm_cpu_t, cf), .size = 8}, + + {.name = "exception_flags", .offset = offsetof(vm_cpu_t, exception_flags), .size = 32}, + {.name = "interrupt_num", .offset = offsetof(vm_cpu_t, interrupt_num), .size = 32}, + +}; + +/************************** JitCpu object **************************/ + + + + +PyObject* cpu_get_gpreg(JitCpu* self) +{ + PyObject *dict = PyDict_New(); + PyObject *o; + + get_reg(X0); + get_reg(X1); + get_reg(X2); + get_reg(X3); + get_reg(X4); + get_reg(X5); + get_reg(X6); + get_reg(X7); + get_reg(X8); + get_reg(X9); + get_reg(X10); + get_reg(X11); + get_reg(X12); + get_reg(X13); + get_reg(X14); + get_reg(X15); + get_reg(X16); + get_reg(X17); + get_reg(X18); + get_reg(X19); + get_reg(X20); + get_reg(X21); + get_reg(X22); + get_reg(X23); + get_reg(X24); + get_reg(X25); + get_reg(X26); + get_reg(X27); + get_reg(X28); + get_reg(X29); + get_reg(LR); + get_reg(SP); + get_reg(PC); + + get_reg(zf); + get_reg(nf); + get_reg(of); + get_reg(cf); + + return dict; +} + + + +PyObject* cpu_set_gpreg(JitCpu* self, PyObject *args) +{ + PyObject* dict; + PyObject *d_key, *d_value = NULL; + Py_ssize_t pos = 0; + char* d_key_name; + uint64_t val; + unsigned int i, found; + + if (!PyArg_ParseTuple(args, "O", &dict)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + if(!PyDict_Check(dict)) + RAISE(PyExc_TypeError, "arg must be dict"); + while(PyDict_Next(dict, &pos, &d_key, &d_value)){ + PyGetStr(d_key_name, d_key); + PyGetInt(d_value, val); + + found = 0; + for (i=0; i < sizeof(gpreg_dict)/sizeof(reg_dict); i++){ + if (strcmp(d_key_name, gpreg_dict[i].name)) + continue; + *((uint32_t*)(((char*)(self->cpu)) + gpreg_dict[i].offset)) = val; + found = 1; + break; + } + + if (found) + continue; + fprintf(stderr, "unknown key: %s\n", d_key_name); + RAISE(PyExc_ValueError, "unknown reg"); + } + Py_INCREF(Py_None); + return Py_None; +} + + +PyObject * cpu_init_regs(JitCpu* self) +{ + memset(self->cpu, 0, sizeof(vm_cpu_t)); + + Py_INCREF(Py_None); + return Py_None; +} + +void dump_gpregs(vm_cpu_t* vmcpu) +{ + printf("X0 %.16"PRIX64" X1 %.16"PRIX64" X2 %.16"PRIX64" X3 %.16"PRIX64" "\ + "X4 %.16"PRIX64" X5 %.16"PRIX64" X6 %.16"PRIX64" X7 %.16"PRIX64"\n", + vmcpu->X0, vmcpu->X1, vmcpu->X2, vmcpu->X3, vmcpu->X4, vmcpu->X5, vmcpu->X6, vmcpu->X7); + printf("X8 %.16"PRIX64" X9 %.16"PRIX64" X10 %.16"PRIX64" X11 %.16"PRIX64" "\ + "X12 %.16"PRIX64" X13 %.16"PRIX64" X14 %.16"PRIX64" X15 %.16"PRIX64"\n", + vmcpu->X8, vmcpu->X9, vmcpu->X10, vmcpu->X11, + vmcpu->X12, vmcpu->X13, vmcpu->X14, vmcpu->X15); + printf("X16 %.16"PRIX64" X17 %.16"PRIX64" X18 %.16"PRIX64" X19 %.16"PRIX64" "\ + "X20 %.16"PRIX64" X21 %.16"PRIX64" X22 %.16"PRIX64" X23 %.16"PRIX64"\n", + vmcpu->X16, vmcpu->X17, vmcpu->X18, vmcpu->X19, + vmcpu->X20, vmcpu->X21, vmcpu->X22, vmcpu->X23); + printf("X24 %.16"PRIX64" X25 %.16"PRIX64" X26 %.16"PRIX64" X27 %.16"PRIX64" "\ + "X28 %.16"PRIX64" X29 %.16"PRIX64" LR %.16"PRIX64"\n", + vmcpu->X24, vmcpu->X25, vmcpu->X26, vmcpu->X27, + vmcpu->X28, vmcpu->X29, vmcpu->LR); + + + printf("SP %.16"PRIX64" PC %.16"PRIX64" "\ + "zf %"PRIX32" nf %"PRIX32" of %"PRIX32" cf %"PRIX32"\n", + vmcpu->SP, vmcpu->PC, + vmcpu->zf, vmcpu->nf, vmcpu->of, vmcpu->cf); +} + + +PyObject * cpu_dump_gpregs(JitCpu* self, PyObject* args) +{ + vm_cpu_t* vmcpu; + + vmcpu = self->cpu; + dump_gpregs(vmcpu); + Py_INCREF(Py_None); + return Py_None; +} + + +PyObject * cpu_dump_gpregs_with_attrib(JitCpu* self, PyObject* args) +{ + return cpu_dump_gpregs(self, args); +} + + +PyObject* cpu_set_exception(JitCpu* self, PyObject* args) +{ + PyObject *item1; + uint64_t i; + + if (!PyArg_ParseTuple(args, "O", &item1)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + + PyGetInt(item1, i); + + ((vm_cpu_t*)self->cpu)->exception_flags = i; + Py_INCREF(Py_None); + return Py_None; +} + +PyObject* cpu_get_exception(JitCpu* self, PyObject* args) +{ + return PyLong_FromUnsignedLongLong((uint64_t)(((vm_cpu_t*)self->cpu)->exception_flags)); +} + + + + + +void check_automod(JitCpu* jitcpu, uint64_t addr, uint64_t size) +{ + PyObject *result; + + if (!(((VmMngr*)jitcpu->pyvm)->vm_mngr.exception_flags & EXCEPT_CODE_AUTOMOD)) + return; + result = PyObject_CallMethod(jitcpu->jitter, "automod_cb", "LL", addr, size); + Py_DECREF(result); + +} + +void MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src) +{ + vm_MEM_WRITE_08(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); + check_automod(jitcpu, addr, 8); +} + +void MEM_WRITE_16(JitCpu* jitcpu, uint64_t addr, uint16_t src) +{ + vm_MEM_WRITE_16(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); + check_automod(jitcpu, addr, 16); +} + +void MEM_WRITE_32(JitCpu* jitcpu, uint64_t addr, uint32_t src) +{ + vm_MEM_WRITE_32(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); + check_automod(jitcpu, addr, 32); +} + +void MEM_WRITE_64(JitCpu* jitcpu, uint64_t addr, uint64_t src) +{ + vm_MEM_WRITE_64(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); + check_automod(jitcpu, addr, 64); +} + + +PyObject* vm_set_mem(JitCpu *self, PyObject* args) +{ + PyObject *py_addr; + PyObject *py_buffer; + Py_ssize_t py_length; + + char * buffer; + uint64_t size; + uint64_t addr; + int ret; + + if (!PyArg_ParseTuple(args, "OO", &py_addr, &py_buffer)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + + PyGetInt(py_addr, addr); + + if(!PyBytes_Check(py_buffer)) + RAISE(PyExc_TypeError,"arg must be bytes"); + + size = PyBytes_Size(py_buffer); + PyBytes_AsStringAndSize(py_buffer, &buffer, &py_length); + + ret = vm_write_mem(&(((VmMngr*)self->pyvm)->vm_mngr), addr, buffer, size); + if (ret < 0) + RAISE(PyExc_TypeError,"arg must be str"); + check_automod(self, addr, size*8); + + Py_INCREF(Py_None); + return Py_None; +} + +static PyMemberDef JitCpu_members[] = { + {NULL} /* Sentinel */ +}; + +static PyMethodDef JitCpu_methods[] = { + {"init_regs", (PyCFunction)cpu_init_regs, METH_NOARGS, + "X"}, + {"dump_gpregs", (PyCFunction)cpu_dump_gpregs, METH_NOARGS, + "X"}, + {"dump_gpregs_with_attrib", (PyCFunction)cpu_dump_gpregs_with_attrib, METH_VARARGS, + "X"}, + {"get_gpreg", (PyCFunction)cpu_get_gpreg, METH_NOARGS, + "X"}, + {"set_gpreg", (PyCFunction)cpu_set_gpreg, METH_VARARGS, + "X"}, + {"get_exception", (PyCFunction)cpu_get_exception, METH_VARARGS, + "X"}, + {"set_exception", (PyCFunction)cpu_set_exception, METH_VARARGS, + "X"}, + {"set_mem", (PyCFunction)vm_set_mem, METH_VARARGS, + "X"}, + {"get_mem", (PyCFunction)vm_get_mem, METH_VARARGS, + "X"}, + {NULL} /* Sentinel */ +}; + +static int +JitCpu_init(JitCpu *self, PyObject *args, PyObject *kwds) +{ + self->cpu = malloc(sizeof(vm_cpu_t)); + if (self->cpu == NULL) { + fprintf(stderr, "cannot alloc vm_cpu_t\n"); + exit(EXIT_FAILURE); + } + return 0; +} + + + +getset_reg_u64(X0); +getset_reg_u64(X1); +getset_reg_u64(X2); +getset_reg_u64(X3); +getset_reg_u64(X4); +getset_reg_u64(X5); +getset_reg_u64(X6); +getset_reg_u64(X7); +getset_reg_u64(X8); +getset_reg_u64(X9); +getset_reg_u64(X10); +getset_reg_u64(X11); +getset_reg_u64(X12); +getset_reg_u64(X13); +getset_reg_u64(X14); +getset_reg_u64(X15); +getset_reg_u64(X16); +getset_reg_u64(X17); +getset_reg_u64(X18); +getset_reg_u64(X19); +getset_reg_u64(X20); +getset_reg_u64(X21); +getset_reg_u64(X22); +getset_reg_u64(X23); +getset_reg_u64(X24); +getset_reg_u64(X25); +getset_reg_u64(X26); +getset_reg_u64(X27); +getset_reg_u64(X28); +getset_reg_u64(X29); +getset_reg_u64(LR); +getset_reg_u64(SP); +getset_reg_u64(PC); + +getset_reg_u32(zf); +getset_reg_u32(nf); +getset_reg_u32(of); +getset_reg_u32(cf); + + +getset_reg_u32(exception_flags); +getset_reg_u32(interrupt_num); + + +PyObject* get_gpreg_offset_all(void) +{ + PyObject *dict = PyDict_New(); + PyObject *o; + + get_reg_off(exception_flags); + + get_reg_off(X0); + get_reg_off(X1); + get_reg_off(X2); + get_reg_off(X3); + get_reg_off(X4); + get_reg_off(X5); + get_reg_off(X6); + get_reg_off(X7); + get_reg_off(X8); + get_reg_off(X9); + get_reg_off(X10); + get_reg_off(X11); + get_reg_off(X12); + get_reg_off(X13); + get_reg_off(X14); + get_reg_off(X15); + get_reg_off(X16); + get_reg_off(X17); + get_reg_off(X18); + get_reg_off(X19); + get_reg_off(X20); + get_reg_off(X21); + get_reg_off(X22); + get_reg_off(X23); + get_reg_off(X24); + get_reg_off(X25); + get_reg_off(X26); + get_reg_off(X27); + get_reg_off(X28); + get_reg_off(X29); + get_reg_off(LR); + get_reg_off(SP); + get_reg_off(PC); + + /* eflag */ + get_reg_off(zf); + get_reg_off(nf); + get_reg_off(of); + get_reg_off(cf); + + return dict; +} + + +static PyGetSetDef JitCpu_getseters[] = { + {"vmmngr", + (getter)JitCpu_get_vmmngr, (setter)JitCpu_set_vmmngr, + "vmmngr", + NULL}, + + {"jitter", + (getter)JitCpu_get_jitter, (setter)JitCpu_set_jitter, + "jitter", + NULL}, + + + + {"X0" , (getter)JitCpu_get_X0 , (setter)JitCpu_set_X0 , "X0" , NULL}, + {"X1" , (getter)JitCpu_get_X1 , (setter)JitCpu_set_X1 , "X1" , NULL}, + {"X2" , (getter)JitCpu_get_X2 , (setter)JitCpu_set_X2 , "X2" , NULL}, + {"X3" , (getter)JitCpu_get_X3 , (setter)JitCpu_set_X3 , "X3" , NULL}, + {"X4" , (getter)JitCpu_get_X4 , (setter)JitCpu_set_X4 , "X4" , NULL}, + {"X5" , (getter)JitCpu_get_X5 , (setter)JitCpu_set_X5 , "X5" , NULL}, + {"X6" , (getter)JitCpu_get_X6 , (setter)JitCpu_set_X6 , "X6" , NULL}, + {"X7" , (getter)JitCpu_get_X7 , (setter)JitCpu_set_X7 , "X7" , NULL}, + {"X8" , (getter)JitCpu_get_X8 , (setter)JitCpu_set_X8 , "X8" , NULL}, + {"X9" , (getter)JitCpu_get_X9 , (setter)JitCpu_set_X9 , "X9" , NULL}, + + {"X10" , (getter)JitCpu_get_X10 , (setter)JitCpu_set_X10 , "X10" , NULL}, + {"X11" , (getter)JitCpu_get_X11 , (setter)JitCpu_set_X11 , "X11" , NULL}, + {"X12" , (getter)JitCpu_get_X12 , (setter)JitCpu_set_X12 , "X12" , NULL}, + {"X13" , (getter)JitCpu_get_X13 , (setter)JitCpu_set_X13 , "X13" , NULL}, + {"X14" , (getter)JitCpu_get_X14 , (setter)JitCpu_set_X14 , "X14" , NULL}, + {"X15" , (getter)JitCpu_get_X15 , (setter)JitCpu_set_X15 , "X15" , NULL}, + {"X16" , (getter)JitCpu_get_X16 , (setter)JitCpu_set_X16 , "X16" , NULL}, + {"X17" , (getter)JitCpu_get_X17 , (setter)JitCpu_set_X17 , "X17" , NULL}, + {"X18" , (getter)JitCpu_get_X18 , (setter)JitCpu_set_X18 , "X18" , NULL}, + {"X19" , (getter)JitCpu_get_X19 , (setter)JitCpu_set_X19 , "X19" , NULL}, + + {"X20" , (getter)JitCpu_get_X20 , (setter)JitCpu_set_X20 , "X20" , NULL}, + {"X21" , (getter)JitCpu_get_X21 , (setter)JitCpu_set_X21 , "X21" , NULL}, + {"X22" , (getter)JitCpu_get_X22 , (setter)JitCpu_set_X22 , "X22" , NULL}, + {"X23" , (getter)JitCpu_get_X23 , (setter)JitCpu_set_X23 , "X23" , NULL}, + {"X24" , (getter)JitCpu_get_X24 , (setter)JitCpu_set_X24 , "X24" , NULL}, + {"X25" , (getter)JitCpu_get_X25 , (setter)JitCpu_set_X25 , "X25" , NULL}, + {"X26" , (getter)JitCpu_get_X26 , (setter)JitCpu_set_X26 , "X26" , NULL}, + {"X27" , (getter)JitCpu_get_X27 , (setter)JitCpu_set_X27 , "X27" , NULL}, + {"X28" , (getter)JitCpu_get_X28 , (setter)JitCpu_set_X28 , "X28" , NULL}, + {"X29" , (getter)JitCpu_get_X29 , (setter)JitCpu_set_X29 , "X29" , NULL}, + + {"LR" , (getter)JitCpu_get_LR , (setter)JitCpu_set_LR , "LR" , NULL}, + + + + {"SP" , (getter)JitCpu_get_SP , (setter)JitCpu_set_SP , "SP" , NULL}, + {"PC" , (getter)JitCpu_get_PC , (setter)JitCpu_set_PC , "PC" , NULL}, + + {"zf", (getter)JitCpu_get_zf, (setter)JitCpu_set_zf, "zf", NULL}, + {"nf", (getter)JitCpu_get_nf, (setter)JitCpu_set_nf, "nf", NULL}, + {"of", (getter)JitCpu_get_of, (setter)JitCpu_set_of, "of", NULL}, + {"cf", (getter)JitCpu_get_cf, (setter)JitCpu_set_cf, "cf", NULL}, + + {"exception_flags", (getter)JitCpu_get_exception_flags, (setter)JitCpu_set_exception_flags, "exception_flags", NULL}, + {"interrupt_num", (getter)JitCpu_get_interrupt_num, (setter)JitCpu_set_interrupt_num, "interrupt_num", NULL}, + + {NULL} /* Sentinel */ +}; + + +static PyTypeObject JitCpuType = { + PyVarObject_HEAD_INIT(NULL, 0) + "JitCore_aarch64.JitCpu", /*tp_name*/ + sizeof(JitCpu), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)JitCpu_dealloc,/*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + "JitCpu objects", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + JitCpu_methods, /* tp_methods */ + JitCpu_members, /* tp_members */ + JitCpu_getseters, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)JitCpu_init, /* tp_init */ + 0, /* tp_alloc */ + JitCpu_new, /* tp_new */ +}; + + + +static PyMethodDef JitCore_aarch64_Methods[] = { + {"get_gpreg_offset_all", (PyCFunction)get_gpreg_offset_all, METH_NOARGS}, + {NULL, NULL, 0, NULL} /* Sentinel */ + +}; + + + +MOD_INIT(JitCore_aarch64) +{ + PyObject *module; + + MOD_DEF(module, "JitCore_aarch64", "JitCore_aarch64 module", JitCore_aarch64_Methods); + + if (module == NULL) + return NULL; + + if (PyType_Ready(&JitCpuType) < 0) + return NULL; + + Py_INCREF(&JitCpuType); + if (PyModule_AddObject(module, "JitCpu", (PyObject *)&JitCpuType) < 0) + return NULL; + + return module; +} + diff --git a/miasm/jitter/arch/JitCore_aarch64.h b/miasm/jitter/arch/JitCore_aarch64.h new file mode 100644 index 00000000..fa958244 --- /dev/null +++ b/miasm/jitter/arch/JitCore_aarch64.h @@ -0,0 +1,57 @@ + +typedef struct { + uint32_t exception_flags; + uint32_t interrupt_num; + + /* gpregs */ + + uint64_t X0; + uint64_t X1; + uint64_t X2; + uint64_t X3; + uint64_t X4; + uint64_t X5; + uint64_t X6; + uint64_t X7; + uint64_t X8; + uint64_t X9; + uint64_t X10; + uint64_t X11; + uint64_t X12; + uint64_t X13; + uint64_t X14; + uint64_t X15; + uint64_t X16; + uint64_t X17; + uint64_t X18; + uint64_t X19; + uint64_t X20; + uint64_t X21; + uint64_t X22; + uint64_t X23; + uint64_t X24; + uint64_t X25; + uint64_t X26; + uint64_t X27; + uint64_t X28; + uint64_t X29; + uint64_t LR; + uint64_t SP; + + uint64_t PC; + + /* eflag */ + uint32_t zf; + uint32_t nf; + uint32_t of; + uint32_t cf; +}vm_cpu_t; + +_MIASM_EXPORT void dump_gpregs(vm_cpu_t* vmcpu); + +_MIASM_EXPORT void MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src); +_MIASM_EXPORT void MEM_WRITE_16(JitCpu* jitcpu, uint64_t addr, uint16_t src); +_MIASM_EXPORT void MEM_WRITE_32(JitCpu* jitcpu, uint64_t addr, uint32_t src); +_MIASM_EXPORT void MEM_WRITE_64(JitCpu* jitcpu, uint64_t addr, uint64_t src); + +#define RETURN_PC return BlockDst; diff --git a/miasm/jitter/arch/JitCore_arm.c b/miasm/jitter/arch/JitCore_arm.c new file mode 100644 index 00000000..64f30cf4 --- /dev/null +++ b/miasm/jitter/arch/JitCore_arm.c @@ -0,0 +1,507 @@ +#include +#include "structmember.h" +#include +#include +#include "../compat_py23.h" +#include "../queue.h" +#include "../vm_mngr.h" +#include "../vm_mngr_py.h" +#include "../bn.h" +#include "../JitCore.h" +#include "../op_semantics.h" +#include "JitCore_arm.h" + + + +reg_dict gpreg_dict[] = { + {.name = "R0", .offset = offsetof(vm_cpu_t, R0), .size = 32}, + {.name = "R1", .offset = offsetof(vm_cpu_t, R1), .size = 32}, + {.name = "R2", .offset = offsetof(vm_cpu_t, R2), .size = 32}, + {.name = "R3", .offset = offsetof(vm_cpu_t, R3), .size = 32}, + {.name = "R4", .offset = offsetof(vm_cpu_t, R4), .size = 32}, + {.name = "R5", .offset = offsetof(vm_cpu_t, R5), .size = 32}, + {.name = "R6", .offset = offsetof(vm_cpu_t, R6), .size = 32}, + {.name = "R7", .offset = offsetof(vm_cpu_t, R7), .size = 32}, + {.name = "R8", .offset = offsetof(vm_cpu_t, R8), .size = 32}, + {.name = "R9", .offset = offsetof(vm_cpu_t, R9), .size = 32}, + {.name = "R10", .offset = offsetof(vm_cpu_t, R10), .size = 32}, + {.name = "R11", .offset = offsetof(vm_cpu_t, R11), .size = 32}, + {.name = "R12", .offset = offsetof(vm_cpu_t, R12), .size = 32}, + {.name = "SP", .offset = offsetof(vm_cpu_t, SP), .size = 32}, + {.name = "LR", .offset = offsetof(vm_cpu_t, LR), .size = 32}, + {.name = "PC", .offset = offsetof(vm_cpu_t, PC), .size = 32}, + + {.name = "zf", .offset = offsetof(vm_cpu_t, zf), .size = 8}, + {.name = "nf", .offset = offsetof(vm_cpu_t, nf), .size = 8}, + {.name = "of", .offset = offsetof(vm_cpu_t, of), .size = 8}, + {.name = "cf", .offset = offsetof(vm_cpu_t, cf), .size = 8}, + + {.name = "ge0", .offset = offsetof(vm_cpu_t, ge0), .size = 8}, + {.name = "ge1", .offset = offsetof(vm_cpu_t, ge1), .size = 8}, + {.name = "ge2", .offset = offsetof(vm_cpu_t, ge2), .size = 8}, + {.name = "ge3", .offset = offsetof(vm_cpu_t, ge3), .size = 8}, + + {.name = "exception_flags", .offset = offsetof(vm_cpu_t, exception_flags), .size = 32}, + {.name = "interrupt_num", .offset = offsetof(vm_cpu_t, interrupt_num), .size = 32}, +}; + +/************************** JitCpu object **************************/ + + + + +PyObject* cpu_get_gpreg(JitCpu* self) +{ + PyObject *dict = PyDict_New(); + PyObject *o; + + get_reg(R0); + get_reg(R1); + get_reg(R2); + get_reg(R3); + get_reg(R4); + get_reg(R5); + get_reg(R6); + get_reg(R7); + get_reg(R8); + get_reg(R9); + get_reg(R10); + get_reg(R11); + get_reg(R12); + get_reg(SP); + get_reg(LR); + get_reg(PC); + + get_reg(zf); + get_reg(nf); + get_reg(of); + get_reg(cf); + + get_reg(ge0); + get_reg(ge1); + get_reg(ge2); + get_reg(ge3); + + return dict; +} + + + +PyObject* cpu_set_gpreg(JitCpu* self, PyObject *args) +{ + PyObject* dict; + PyObject *d_key, *d_value = NULL; + Py_ssize_t pos = 0; + char* d_key_name; + uint64_t val; + unsigned int i, found; + + if (!PyArg_ParseTuple(args, "O", &dict)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + if(!PyDict_Check(dict)) + RAISE(PyExc_TypeError, "arg must be dict"); + while(PyDict_Next(dict, &pos, &d_key, &d_value)){ + PyGetStr(d_key_name, d_key); + PyGetInt(d_value, val); + + found = 0; + for (i=0; i < sizeof(gpreg_dict)/sizeof(reg_dict); i++){ + if (strcmp(d_key_name, gpreg_dict[i].name)) + continue; + *((uint32_t*)(((char*)(self->cpu)) + gpreg_dict[i].offset)) = val; + found = 1; + break; + } + + if (found) + continue; + fprintf(stderr, "unknown key: %s\n", d_key); + RAISE(PyExc_ValueError, "unknown reg"); + } + Py_INCREF(Py_None); + return Py_None; +} + + +PyObject * cpu_init_regs(JitCpu* self) +{ + memset(self->cpu, 0, sizeof(vm_cpu_t)); + + Py_INCREF(Py_None); + return Py_None; +} + +void dump_gpregs(vm_cpu_t* vmcpu) +{ + printf("R0 %.8"PRIX32" R1 %.8"PRIX32" R2 %.8"PRIX32" R3 %.8"PRIX32" ", + vmcpu->R0, vmcpu->R1, vmcpu->R2, vmcpu->R3); + printf("R4 %.8"PRIX32" R5 %.8"PRIX32" R6 %.8"PRIX32" R7 %.8"PRIX32"\n", + vmcpu->R4, vmcpu->R5, vmcpu->R6, vmcpu->R7); + printf("R8 %.8"PRIX32" R9 %.8"PRIX32" R10 %.8"PRIX32" R11 %.8"PRIX32" ", + vmcpu->R8, vmcpu->R9, vmcpu->R10, vmcpu->R11); + printf("R12 %.8"PRIX32" SP %.8"PRIX32" LR %.8"PRIX32" PC %.8"PRIX32" ", + vmcpu->R12, vmcpu->SP, vmcpu->LR, vmcpu->PC); + printf("zf %"PRIX32" nf %"PRIX32" of %"PRIX32" cf %"PRIX32"\n", + vmcpu->zf, vmcpu->nf, vmcpu->of, vmcpu->cf); +} + + +PyObject * cpu_dump_gpregs(JitCpu* self, PyObject* args) +{ + vm_cpu_t* vmcpu; + + vmcpu = self->cpu; + dump_gpregs(vmcpu); + Py_INCREF(Py_None); + return Py_None; +} + + +PyObject * cpu_dump_gpregs_with_attrib(JitCpu* self, PyObject* args) +{ + return cpu_dump_gpregs(self, args); +} + + + +PyObject* cpu_set_exception(JitCpu* self, PyObject* args) +{ + PyObject *item1; + uint64_t i; + + if (!PyArg_ParseTuple(args, "O", &item1)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + + PyGetInt(item1, i); + + ((vm_cpu_t*)self->cpu)->exception_flags = i; + Py_INCREF(Py_None); + return Py_None; +} + +PyObject* cpu_get_exception(JitCpu* self, PyObject* args) +{ + return PyLong_FromUnsignedLongLong((uint64_t)(((vm_cpu_t*)self->cpu)->exception_flags)); +} + + + + + +void check_automod(JitCpu* jitcpu, uint64_t addr, uint64_t size) +{ + PyObject *result; + + if (!(((VmMngr*)jitcpu->pyvm)->vm_mngr.exception_flags & EXCEPT_CODE_AUTOMOD)) + return; + result = PyObject_CallMethod(jitcpu->jitter, "automod_cb", "LL", addr, size); + Py_DECREF(result); + +} + +void MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src) +{ + vm_MEM_WRITE_08(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); + check_automod(jitcpu, addr, 8); +} + +void MEM_WRITE_16(JitCpu* jitcpu, uint64_t addr, uint16_t src) +{ + vm_MEM_WRITE_16(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); + check_automod(jitcpu, addr, 16); +} + +void MEM_WRITE_32(JitCpu* jitcpu, uint64_t addr, uint32_t src) +{ + vm_MEM_WRITE_32(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); + check_automod(jitcpu, addr, 32); +} + +void MEM_WRITE_64(JitCpu* jitcpu, uint64_t addr, uint64_t src) +{ + vm_MEM_WRITE_64(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); + check_automod(jitcpu, addr, 64); +} + +PyObject* vm_set_mem(JitCpu *self, PyObject* args) +{ + PyObject *py_addr; + PyObject *py_buffer; + Py_ssize_t py_length; + + char * buffer; + uint64_t size; + uint64_t addr; + int ret; + + if (!PyArg_ParseTuple(args, "OO", &py_addr, &py_buffer)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + + PyGetInt(py_addr, addr); + + if(!PyBytes_Check(py_buffer)) + RAISE(PyExc_TypeError,"arg must be bytes"); + + size = PyBytes_Size(py_buffer); + PyBytes_AsStringAndSize(py_buffer, &buffer, &py_length); + + ret = vm_write_mem(&(((VmMngr*)self->pyvm)->vm_mngr), addr, buffer, size); + if (ret < 0) + RAISE(PyExc_TypeError,"arg must be str"); + check_automod(self, addr, size*8); + + Py_INCREF(Py_None); + return Py_None; +} + +PyObject* cpu_set_interrupt_num(JitCpu* self, PyObject* args) +{ + PyObject *item1; + uint64_t i; + + if (!PyArg_ParseTuple(args, "O", &item1)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + + PyGetInt(item1, i); + + ((vm_cpu_t*)self->cpu)->interrupt_num = i; + Py_INCREF(Py_None); + return Py_None; +} + +PyObject* cpu_get_interrupt_num(JitCpu* self, PyObject* args) +{ + return PyLong_FromUnsignedLongLong((uint64_t)(((vm_cpu_t*)self->cpu)->interrupt_num)); +} + +static PyMemberDef JitCpu_members[] = { + {NULL} /* Sentinel */ +}; + +static PyMethodDef JitCpu_methods[] = { + {"init_regs", (PyCFunction)cpu_init_regs, METH_NOARGS, + "X"}, + {"dump_gpregs", (PyCFunction)cpu_dump_gpregs, METH_NOARGS, + "X"}, + {"dump_gpregs_with_attrib", (PyCFunction)cpu_dump_gpregs_with_attrib, METH_VARARGS, + "X"}, + {"get_gpreg", (PyCFunction)cpu_get_gpreg, METH_NOARGS, + "X"}, + {"set_gpreg", (PyCFunction)cpu_set_gpreg, METH_VARARGS, + "X"}, + {"get_exception", (PyCFunction)cpu_get_exception, METH_VARARGS, + "X"}, + {"set_exception", (PyCFunction)cpu_set_exception, METH_VARARGS, + "X"}, + {"get_interrupt_num", (PyCFunction)cpu_get_interrupt_num, METH_VARARGS, + "X"}, + {"set_interrupt_num", (PyCFunction)cpu_set_interrupt_num, METH_VARARGS, + "X"}, + {"set_mem", (PyCFunction)vm_set_mem, METH_VARARGS, + "X"}, + {"get_mem", (PyCFunction)vm_get_mem, METH_VARARGS, + "X"}, + {NULL} /* Sentinel */ +}; + +static int +JitCpu_init(JitCpu *self, PyObject *args, PyObject *kwds) +{ + self->cpu = malloc(sizeof(vm_cpu_t)); + if (self->cpu == NULL) { + fprintf(stderr, "cannot alloc vm_cpu_t\n"); + exit(EXIT_FAILURE); + } + return 0; +} + +getset_reg_u32(R0); +getset_reg_u32(R1); +getset_reg_u32(R2); +getset_reg_u32(R3); +getset_reg_u32(R4); +getset_reg_u32(R5); +getset_reg_u32(R6); +getset_reg_u32(R7); +getset_reg_u32(R8); +getset_reg_u32(R9); +getset_reg_u32(R10); +getset_reg_u32(R11); +getset_reg_u32(R12); +getset_reg_u32(SP); +getset_reg_u32(LR); +getset_reg_u32(PC); + +getset_reg_u32(zf); +getset_reg_u32(nf); +getset_reg_u32(of); +getset_reg_u32(cf); + +getset_reg_u32(ge0); +getset_reg_u32(ge1); +getset_reg_u32(ge2); +getset_reg_u32(ge3); + +getset_reg_u32(exception_flags); +getset_reg_u32(interrupt_num); + +PyObject* get_gpreg_offset_all(void) +{ + PyObject *dict = PyDict_New(); + PyObject *o; + + get_reg_off(exception_flags); + get_reg_off(interrupt_num); + + get_reg_off(R0); + get_reg_off(R1); + get_reg_off(R2); + get_reg_off(R3); + get_reg_off(R4); + get_reg_off(R5); + get_reg_off(R6); + get_reg_off(R7); + get_reg_off(R8); + get_reg_off(R9); + get_reg_off(R10); + get_reg_off(R11); + get_reg_off(R12); + get_reg_off(SP); + get_reg_off(LR); + get_reg_off(PC); + + /* eflag */ + get_reg_off(zf); + get_reg_off(nf); + get_reg_off(of); + get_reg_off(cf); + + get_reg_off(ge0); + get_reg_off(ge1); + get_reg_off(ge2); + get_reg_off(ge3); + + return dict; +} + +static PyGetSetDef JitCpu_getseters[] = { + {"vmmngr", + (getter)JitCpu_get_vmmngr, (setter)JitCpu_set_vmmngr, + "vmmngr", + NULL}, + + {"jitter", + (getter)JitCpu_get_jitter, (setter)JitCpu_set_jitter, + "jitter", + NULL}, + + + + {"R0" , (getter)JitCpu_get_R0 , (setter)JitCpu_set_R0 , "R0" , NULL}, + {"R1" , (getter)JitCpu_get_R1 , (setter)JitCpu_set_R1 , "R1" , NULL}, + {"R2" , (getter)JitCpu_get_R2 , (setter)JitCpu_set_R2 , "R2" , NULL}, + {"R3" , (getter)JitCpu_get_R3 , (setter)JitCpu_set_R3 , "R3" , NULL}, + {"R4" , (getter)JitCpu_get_R4 , (setter)JitCpu_set_R4 , "R4" , NULL}, + {"R5" , (getter)JitCpu_get_R5 , (setter)JitCpu_set_R5 , "R5" , NULL}, + {"R6" , (getter)JitCpu_get_R6 , (setter)JitCpu_set_R6 , "R6" , NULL}, + {"R7" , (getter)JitCpu_get_R7 , (setter)JitCpu_set_R7 , "R7" , NULL}, + {"R8" , (getter)JitCpu_get_R8 , (setter)JitCpu_set_R8 , "R8" , NULL}, + {"R9" , (getter)JitCpu_get_R9 , (setter)JitCpu_set_R9 , "R9" , NULL}, + {"R10", (getter)JitCpu_get_R10, (setter)JitCpu_set_R10, "R10", NULL}, + {"R11", (getter)JitCpu_get_R11, (setter)JitCpu_set_R11, "R11", NULL}, + {"R12", (getter)JitCpu_get_R12, (setter)JitCpu_set_R12, "R12", NULL}, + {"SP" , (getter)JitCpu_get_SP , (setter)JitCpu_set_SP , "SP" , NULL}, + {"LR" , (getter)JitCpu_get_LR , (setter)JitCpu_set_LR , "LR" , NULL}, + {"PC" , (getter)JitCpu_get_PC , (setter)JitCpu_set_PC , "PC" , NULL}, + + {"zf", (getter)JitCpu_get_zf, (setter)JitCpu_set_zf, "zf", NULL}, + {"nf", (getter)JitCpu_get_nf, (setter)JitCpu_set_nf, "nf", NULL}, + {"of", (getter)JitCpu_get_of, (setter)JitCpu_set_of, "of", NULL}, + {"cf", (getter)JitCpu_get_cf, (setter)JitCpu_set_cf, "cf", NULL}, + + {"ge0", (getter)JitCpu_get_ge0, (setter)JitCpu_set_ge0, "ge0", NULL}, + {"ge1", (getter)JitCpu_get_ge1, (setter)JitCpu_set_ge1, "ge1", NULL}, + {"ge2", (getter)JitCpu_get_ge2, (setter)JitCpu_set_ge2, "ge2", NULL}, + {"ge3", (getter)JitCpu_get_ge3, (setter)JitCpu_set_ge3, "ge3", NULL}, + + {"exception_flags", (getter)JitCpu_get_exception_flags, (setter)JitCpu_set_exception_flags, "exception_flags", NULL}, + {"interrupt_num", (getter)JitCpu_get_interrupt_num, (setter)JitCpu_set_interrupt_num, "interrupt_num", NULL}, + + {NULL} /* Sentinel */ +}; + + +static PyTypeObject JitCpuType = { + PyVarObject_HEAD_INIT(NULL, 0) + "JitCore_arm.JitCpu", /*tp_name*/ + sizeof(JitCpu), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)JitCpu_dealloc,/*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + "JitCpu objects", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + JitCpu_methods, /* tp_methods */ + JitCpu_members, /* tp_members */ + JitCpu_getseters, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)JitCpu_init, /* tp_init */ + 0, /* tp_alloc */ + JitCpu_new, /* tp_new */ +}; + + + +static PyMethodDef JitCore_arm_Methods[] = { + + /* + + */ + {"get_gpreg_offset_all", (PyCFunction)get_gpreg_offset_all, METH_NOARGS}, + {NULL, NULL, 0, NULL} /* Sentinel */ + +}; + + + +MOD_INIT(JitCore_arm) +{ + PyObject *module; + + MOD_DEF(module, "JitCore_arm", "JitCore_arm module", JitCore_arm_Methods); + + if (module == NULL) + return NULL; + + if (PyType_Ready(&JitCpuType) < 0) + return NULL; + + Py_INCREF(&JitCpuType); + if (PyModule_AddObject(module, "JitCpu", (PyObject *)&JitCpuType) < 0) + return NULL; + + return module; +} + diff --git a/miasm/jitter/arch/JitCore_arm.h b/miasm/jitter/arch/JitCore_arm.h new file mode 100644 index 00000000..67a1096a --- /dev/null +++ b/miasm/jitter/arch/JitCore_arm.h @@ -0,0 +1,47 @@ + +typedef struct { + uint32_t exception_flags; + uint32_t interrupt_num; + + /* gpregs */ + uint32_t R0; + uint32_t R1; + uint32_t R2; + uint32_t R3; + uint32_t R4; + uint32_t R5; + uint32_t R6; + uint32_t R7; + uint32_t R8; + uint32_t R9; + uint32_t R10; + uint32_t R11; + uint32_t R12; + uint32_t SP; + uint32_t LR; + uint32_t PC; + + /* eflag */ + uint32_t zf; + uint32_t nf; + uint32_t of; + uint32_t cf; + + /* ge */ + uint32_t ge0; + uint32_t ge1; + uint32_t ge2; + uint32_t ge3; + + uint32_t bp_num; +}vm_cpu_t; + + +_MIASM_EXPORT void dump_gpregs(vm_cpu_t* vmcpu); + +_MIASM_EXPORT void MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src); +_MIASM_EXPORT void MEM_WRITE_16(JitCpu* jitcpu, uint64_t addr, uint16_t src); +_MIASM_EXPORT void MEM_WRITE_32(JitCpu* jitcpu, uint64_t addr, uint32_t src); +_MIASM_EXPORT void MEM_WRITE_64(JitCpu* jitcpu, uint64_t addr, uint64_t src); + +#define RETURN_PC return BlockDst; diff --git a/miasm/jitter/arch/JitCore_mep.c b/miasm/jitter/arch/JitCore_mep.c new file mode 100644 index 00000000..6e7f1767 --- /dev/null +++ b/miasm/jitter/arch/JitCore_mep.c @@ -0,0 +1,617 @@ +// Inspired from JitCore_mep.c + +#include +#include "structmember.h" +#include + +#include +#include "../compat_py23.h" +#include "../queue.h" +#include "../vm_mngr.h" +#include "../vm_mngr_py.h" +#include "../bn.h" +#include "../JitCore.h" +#include "JitCore_mep.h" + + +reg_dict gpreg_dict[] = { + {.name = "R0", .offset = offsetof(vm_cpu_t, R0), .size = 32}, + {.name = "R1", .offset = offsetof(vm_cpu_t, R1), .size = 32}, + {.name = "R2", .offset = offsetof(vm_cpu_t, R2), .size = 32}, + {.name = "R3", .offset = offsetof(vm_cpu_t, R3), .size = 32}, + {.name = "R4", .offset = offsetof(vm_cpu_t, R4), .size = 32}, + {.name = "R5", .offset = offsetof(vm_cpu_t, R5), .size = 32}, + {.name = "R6", .offset = offsetof(vm_cpu_t, R6), .size = 32}, + {.name = "R7", .offset = offsetof(vm_cpu_t, R7), .size = 32}, + {.name = "R8", .offset = offsetof(vm_cpu_t, R8), .size = 32}, + {.name = "R9", .offset = offsetof(vm_cpu_t, R9), .size = 32}, + {.name = "R10", .offset = offsetof(vm_cpu_t, R10), .size = 32}, + {.name = "R11", .offset = offsetof(vm_cpu_t, R11), .size = 32}, + {.name = "R12", .offset = offsetof(vm_cpu_t, R12), .size = 32}, + {.name = "TP", .offset = offsetof(vm_cpu_t, TP), .size = 32}, + {.name = "GP", .offset = offsetof(vm_cpu_t, GP), .size = 32}, + {.name = "SP", .offset = offsetof(vm_cpu_t, SP), .size = 32}, + + {.name = "PC", .offset = offsetof(vm_cpu_t, PC), .size = 32}, + {.name = "LP", .offset = offsetof(vm_cpu_t, LP), .size = 32}, + {.name = "SAR", .offset = offsetof(vm_cpu_t, SAR), .size = 32}, + {.name = "S3", .offset = offsetof(vm_cpu_t, S3), .size = 32}, + {.name = "RPB", .offset = offsetof(vm_cpu_t, RPB), .size = 32}, + {.name = "RPE", .offset = offsetof(vm_cpu_t, RPE), .size = 32}, + {.name = "RPC", .offset = offsetof(vm_cpu_t, RPC), .size = 32}, + {.name = "HI", .offset = offsetof(vm_cpu_t, HI), .size = 32}, + {.name = "LO", .offset = offsetof(vm_cpu_t, LO), .size = 32}, + {.name = "S9", .offset = offsetof(vm_cpu_t, S9), .size = 32}, + {.name = "S10", .offset = offsetof(vm_cpu_t, S10), .size = 32}, + {.name = "S11", .offset = offsetof(vm_cpu_t, S11), .size = 32}, + {.name = "MB0", .offset = offsetof(vm_cpu_t, MB0), .size = 32}, + {.name = "ME0", .offset = offsetof(vm_cpu_t, ME0), .size = 32}, + {.name = "MB1", .offset = offsetof(vm_cpu_t, MB1), .size = 32}, + {.name = "ME1", .offset = offsetof(vm_cpu_t, ME1), .size = 32}, + {.name = "PSW", .offset = offsetof(vm_cpu_t, PSW), .size = 32}, + {.name = "ID", .offset = offsetof(vm_cpu_t, ID), .size = 32}, + {.name = "TMP", .offset = offsetof(vm_cpu_t, TMP), .size = 32}, + {.name = "EPC", .offset = offsetof(vm_cpu_t, EPC), .size = 32}, + {.name = "EXC", .offset = offsetof(vm_cpu_t, EXC), .size = 32}, + {.name = "CFG", .offset = offsetof(vm_cpu_t, CFG), .size = 32}, + {.name = "S22", .offset = offsetof(vm_cpu_t, S22), .size = 32}, + {.name = "NPC", .offset = offsetof(vm_cpu_t, NPC), .size = 32}, + {.name = "DBG", .offset = offsetof(vm_cpu_t, DBG), .size = 32}, + {.name = "DEPC", .offset = offsetof(vm_cpu_t, DEPC), .size = 32}, + {.name = "OPT", .offset = offsetof(vm_cpu_t, OPT), .size = 32}, + {.name = "RCFG", .offset = offsetof(vm_cpu_t, RCFG), .size = 32}, + {.name = "CCFG", .offset = offsetof(vm_cpu_t, CCFG), .size = 32}, + {.name = "S29", .offset = offsetof(vm_cpu_t, S29), .size = 32}, + {.name = "S30", .offset = offsetof(vm_cpu_t, S30), .size = 32}, + {.name = "S31", .offset = offsetof(vm_cpu_t, S31), .size = 32}, + {.name = "S32", .offset = offsetof(vm_cpu_t, S32), .size = 32}, + {.name = "take_jmp", .offset = offsetof(vm_cpu_t, take_jmp), .size = 32}, + {.name = "last_addr", .offset = offsetof(vm_cpu_t, last_addr), .size = 32}, + {.name = "is_repeat_end", .offset = offsetof(vm_cpu_t, is_repeat_end), .size = 32}, + + {.name = "PC_end", .offset = offsetof(vm_cpu_t, PC_end), .size = 32}, + {.name = "RPE_instr_count", .offset = offsetof(vm_cpu_t, RPE_instr_count), .size = 32}, + {.name = "RPC_current", .offset = offsetof(vm_cpu_t, RPC_current), .size = 32}, + +}; + +/************************** JitCpu object **************************/ + + + +PyObject* cpu_get_gpreg(JitCpu* self) +{ + PyObject *dict = PyDict_New(); + PyObject *o; + + get_reg(R0); + get_reg(R1); + get_reg(R2); + get_reg(R3); + get_reg(R4); + get_reg(R5); + get_reg(R6); + get_reg(R7); + get_reg(R8); + get_reg(R9); + get_reg(R10); + get_reg(R11); + get_reg(R12); + get_reg(TP); + get_reg(GP); + get_reg(SP); + + get_reg(PC); + get_reg(LP); + get_reg(SAR); + get_reg(S3); + get_reg(RPB); + get_reg(RPE); + get_reg(RPC); + get_reg(HI); + get_reg(LO); + get_reg(S9); + get_reg(S10); + get_reg(S11); + get_reg(MB0); + get_reg(ME0); + get_reg(MB1); + get_reg(ME1); + get_reg(PSW); + get_reg(ID); + get_reg(TMP); + get_reg(EPC); + get_reg(EXC); + get_reg(CFG); + get_reg(S22); + get_reg(NPC); + get_reg(DBG); + get_reg(DEPC); + get_reg(OPT); + get_reg(RCFG); + get_reg(CCFG); + get_reg(S29); + get_reg(S30); + get_reg(S31); + get_reg(S32); + + get_reg(PC_end); + get_reg(RPE_instr_count); + get_reg(RPC_current); + + + return dict; +} + + +PyObject* cpu_set_gpreg(JitCpu* self, PyObject *args) +{ + PyObject* dict; + PyObject *d_key, *d_value = NULL; + Py_ssize_t pos = 0; + char* d_key_name; + uint64_t val; + unsigned int i, found; + + if (!PyArg_ParseTuple(args, "O", &dict)) + return NULL; + if(!PyDict_Check(dict)) + RAISE(PyExc_TypeError, "arg must be dict"); + while(PyDict_Next(dict, &pos, &d_key, &d_value)){ + PyGetStr(d_key_name, d_key); + PyGetInt(d_value, val); + + found = 0; + for (i=0; i < sizeof(gpreg_dict)/sizeof(reg_dict); i++){ + if (strcmp(d_key_name, gpreg_dict[i].name)) + continue; + *((uint32_t*)(((char*)(self->cpu)) + gpreg_dict[i].offset)) = val; + found = 1; + break; + } + + if (found) + continue; + fprintf(stderr, "unknown key: %s\n", d_key_name); + RAISE(PyExc_ValueError, "unknown reg"); + } + Py_INCREF(Py_None); + return Py_None; +} + + + + +PyObject * cpu_init_regs(JitCpu* self) +{ + memset(self->cpu, 0, sizeof(vm_cpu_t)); + + Py_INCREF(Py_None); + return Py_None; + +} + +void dump_gpregs(vm_cpu_t* vmcpu) +{ + printf("R0 %.4"PRIX32" ", vmcpu->R0); + printf("R1 %.4"PRIX32" ", vmcpu->R1); + printf("R2 %.4"PRIX32" ", vmcpu->R2); + printf("R3 %.4"PRIX32" ", vmcpu->R3); + printf("R4 %.4"PRIX32" ", vmcpu->R4); + printf("R5 %.4"PRIX32" ", vmcpu->R5); + printf("R6 %.4"PRIX32" ", vmcpu->R6); + printf("R7 %.4"PRIX32" ", vmcpu->R7); + printf("R8 %.4"PRIX32" ", vmcpu->R8); + printf("R9 %.4"PRIX32" ", vmcpu->R9); + printf("R10 %.4"PRIX32" ", vmcpu->R10); + printf("R11 %.4"PRIX32" ", vmcpu->R11); + printf("R12 %.4"PRIX32" ", vmcpu->R12); + printf("TP %.4"PRIX32" ", vmcpu->TP); + printf("GP %.4"PRIX32" ", vmcpu->GP); + printf("SP %.4"PRIX32" ", vmcpu->SP); + printf("\n"); +} + + +PyObject * cpu_dump_gpregs(JitCpu* self, PyObject* args) +{ + vm_cpu_t* vmcpu; + + vmcpu = self->cpu; + dump_gpregs(vmcpu); + Py_INCREF(Py_None); + return Py_None; +} + +PyObject * cpu_dump_gpregs_with_attrib(JitCpu* self, PyObject* args) +{ + return cpu_dump_gpregs(self, args); +} + +PyObject* cpu_set_exception(JitCpu* self, PyObject* args) +{ + PyObject *item1; + uint64_t i; + + if (!PyArg_ParseTuple(args, "O", &item1)) + return NULL; + + PyGetInt(item1, i); + + ((vm_cpu_t*)self->cpu)->exception_flags = i; + Py_INCREF(Py_None); + return Py_None; +} + +PyObject* cpu_get_exception(JitCpu* self, PyObject* args) +{ + return PyLong_FromUnsignedLongLong((uint64_t)(((vm_cpu_t*)self->cpu)->exception_flags)); +} + +void check_automod(JitCpu* jitcpu, uint64_t addr, uint64_t size) +{ + PyObject *result; + + if (!(((VmMngr*)jitcpu->pyvm)->vm_mngr.exception_flags & EXCEPT_CODE_AUTOMOD)) + return; + result = PyObject_CallMethod(jitcpu->jitter, "automod_cb", "LL", addr, size); + Py_DECREF(result); + +} + +void MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src) +{ + vm_MEM_WRITE_08(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); + check_automod(jitcpu, addr, 8); +} + +void MEM_WRITE_16(JitCpu* jitcpu, uint64_t addr, uint16_t src) +{ + vm_MEM_WRITE_16(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); + check_automod(jitcpu, addr, 16); +} + +void MEM_WRITE_32(JitCpu* jitcpu, uint64_t addr, uint32_t src) +{ + vm_MEM_WRITE_32(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); + check_automod(jitcpu, addr, 32); +} + +void MEM_WRITE_64(JitCpu* jitcpu, uint64_t addr, uint64_t src) +{ + vm_MEM_WRITE_64(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); + check_automod(jitcpu, addr, 64); +} + + +PyObject* vm_set_mem(JitCpu *self, PyObject* args) +{ + PyObject *py_addr; + PyObject *py_buffer; + Py_ssize_t py_length; + + char * buffer; + uint64_t size; + uint64_t addr; + int ret = 0x1337; + + if (!PyArg_ParseTuple(args, "OO", &py_addr, &py_buffer)) + return NULL; + + PyGetInt(py_addr, addr); + + if(!PyBytes_Check(py_buffer)) + RAISE(PyExc_TypeError,"arg must be bytes"); + + size = PyBytes_Size(py_buffer); + PyBytes_AsStringAndSize(py_buffer, &buffer, &py_length); + + ret = vm_write_mem(&(((VmMngr*)self->pyvm)->vm_mngr), addr, buffer, size); + if (ret < 0) + RAISE(PyExc_TypeError,"arg must be str"); + check_automod(self, addr, size*8); + + Py_INCREF(Py_None); + return Py_None; +} + +static PyMemberDef JitCpu_members[] = { + {NULL} /* Sentinel */ +}; + +static PyMethodDef JitCpu_methods[] = { + {"init_regs", (PyCFunction)cpu_init_regs, METH_NOARGS, "X"}, + {"dump_gpregs", (PyCFunction)cpu_dump_gpregs, METH_NOARGS, "X"}, + {"dump_gpregs_with_attrib", (PyCFunction)cpu_dump_gpregs_with_attrib, METH_VARARGS, "X"}, + {"get_gpreg", (PyCFunction)cpu_get_gpreg, METH_NOARGS, "X"}, + {"set_gpreg", (PyCFunction)cpu_set_gpreg, METH_VARARGS, "X"}, + {"get_exception", (PyCFunction)cpu_get_exception, METH_VARARGS, "X"}, + {"set_exception", (PyCFunction)cpu_set_exception, METH_VARARGS, "X"}, + {"set_mem", (PyCFunction)vm_set_mem, METH_VARARGS, "X"}, + {"get_mem", (PyCFunction)vm_get_mem, METH_VARARGS, "X"}, + {NULL} /* Sentinel */ +}; + +static int +JitCpu_init(JitCpu *self, PyObject *args, PyObject *kwds) +{ + self->cpu = malloc(sizeof(vm_cpu_t)); + if (self->cpu == NULL) { + fprintf(stderr, "cannot alloc vm_cpu_t\n"); + exit(0); + } + return 0; +} + +getset_reg_u32(R0); +getset_reg_u32(R1); +getset_reg_u32(R2); +getset_reg_u32(R3); +getset_reg_u32(R4); +getset_reg_u32(R5); +getset_reg_u32(R6); +getset_reg_u32(R7); +getset_reg_u32(R8); +getset_reg_u32(R9); +getset_reg_u32(R10); +getset_reg_u32(R11); +getset_reg_u32(R12); +getset_reg_u32(TP); +getset_reg_u32(GP); +getset_reg_u32(SP); + +getset_reg_u32(PC); +getset_reg_u32(LP); +getset_reg_u32(SAR); +getset_reg_u32(S3); +getset_reg_u32(RPB); +getset_reg_u32(RPE); +getset_reg_u32(RPC); +getset_reg_u32(HI); +getset_reg_u32(LO); +getset_reg_u32(S9); +getset_reg_u32(S10); +getset_reg_u32(S11); +getset_reg_u32(MB0); +getset_reg_u32(ME0); +getset_reg_u32(MB1); +getset_reg_u32(ME1); +getset_reg_u32(PSW); +getset_reg_u32(ID); +getset_reg_u32(TMP); +getset_reg_u32(EPC); +getset_reg_u32(EXC); +getset_reg_u32(CFG); +getset_reg_u32(S22); +getset_reg_u32(NPC); +getset_reg_u32(DBG); +getset_reg_u32(DEPC); +getset_reg_u32(OPT); +getset_reg_u32(RCFG); +getset_reg_u32(CCFG); +getset_reg_u32(S29); +getset_reg_u32(S30); +getset_reg_u32(S31); +getset_reg_u32(S32); + +getset_reg_u32(PC_end); +getset_reg_u32(RPE_instr_count); +getset_reg_u32(RPC_current); + + + +PyObject* get_gpreg_offset_all(void) +{ + PyObject *dict = PyDict_New(); + PyObject *o; + get_reg_off(exception_flags); + + get_reg_off(R0); + get_reg_off(R1); + get_reg_off(R2); + get_reg_off(R3); + get_reg_off(R4); + get_reg_off(R5); + get_reg_off(R6); + get_reg_off(R7); + get_reg_off(R8); + get_reg_off(R9); + get_reg_off(R10); + get_reg_off(R11); + get_reg_off(R12); + get_reg_off(TP); + get_reg_off(GP); + get_reg_off(SP); + + get_reg_off(PC); + get_reg_off(LP); + get_reg_off(SAR); + get_reg_off(S3); + get_reg_off(RPB); + get_reg_off(RPE); + get_reg_off(RPC); + get_reg_off(HI); + get_reg_off(LO); + get_reg_off(S9); + get_reg_off(S10); + get_reg_off(S11); + get_reg_off(MB0); + get_reg_off(ME0); + get_reg_off(MB1); + get_reg_off(ME1); + get_reg_off(PSW); + get_reg_off(ID); + get_reg_off(TMP); + get_reg_off(EPC); + get_reg_off(EXC); + get_reg_off(CFG); + get_reg_off(S22); + get_reg_off(NPC); + get_reg_off(DBG); + get_reg_off(DEPC); + get_reg_off(OPT); + get_reg_off(RCFG); + get_reg_off(CCFG); + get_reg_off(S29); + get_reg_off(S30); + get_reg_off(S31); + get_reg_off(S32); + + get_reg_off(PC_end); + get_reg_off(RPE_instr_count); + get_reg_off(RPC_current); + + + return dict; +} + + + + +static PyGetSetDef JitCpu_getseters[] = { + {"vmmngr", + (getter)JitCpu_get_vmmngr, (setter)JitCpu_set_vmmngr, + "vmmngr", + NULL}, + + {"jitter", + (getter)JitCpu_get_jitter, (setter)JitCpu_set_jitter, + "jitter", + NULL}, + + + {"R0" , (getter)JitCpu_get_R0 , (setter)JitCpu_set_R0 , "R0" , NULL}, + {"R1" , (getter)JitCpu_get_R1 , (setter)JitCpu_set_R1 , "R1" , NULL}, + {"R2" , (getter)JitCpu_get_R2 , (setter)JitCpu_set_R2 , "R2" , NULL}, + {"R3" , (getter)JitCpu_get_R3 , (setter)JitCpu_set_R3 , "R3" , NULL}, + {"R4" , (getter)JitCpu_get_R4 , (setter)JitCpu_set_R4 , "R4" , NULL}, + {"R5" , (getter)JitCpu_get_R5 , (setter)JitCpu_set_R5 , "R5" , NULL}, + {"R6" , (getter)JitCpu_get_R6 , (setter)JitCpu_set_R6 , "R6" , NULL}, + {"R7" , (getter)JitCpu_get_R7 , (setter)JitCpu_set_R7 , "R7" , NULL}, + {"R8" , (getter)JitCpu_get_R8 , (setter)JitCpu_set_R8 , "R8" , NULL}, + {"R9" , (getter)JitCpu_get_R9 , (setter)JitCpu_set_R9 , "R9" , NULL}, + {"R10" , (getter)JitCpu_get_R10 , (setter)JitCpu_set_R10 , "R10" , NULL}, + {"R11" , (getter)JitCpu_get_R11 , (setter)JitCpu_set_R11 , "R11" , NULL}, + {"R12" , (getter)JitCpu_get_R12 , (setter)JitCpu_set_R12 , "R12" , NULL}, + {"TP" , (getter)JitCpu_get_TP , (setter)JitCpu_set_TP , "TP" , NULL}, + {"GP" , (getter)JitCpu_get_GP , (setter)JitCpu_set_GP , "GP" , NULL}, + {"SP" , (getter)JitCpu_get_SP , (setter)JitCpu_set_SP , "SP" , NULL}, + + {"PC" , (getter)JitCpu_get_PC , (setter)JitCpu_set_PC , "PC" , NULL}, + {"LP" , (getter)JitCpu_get_LP , (setter)JitCpu_set_LP , "LP" , NULL}, + {"SAR" , (getter)JitCpu_get_SAR , (setter)JitCpu_set_SAR , "SAR" , NULL}, + {"S3" , (getter)JitCpu_get_S3 , (setter)JitCpu_set_S3 , "S3" , NULL}, + {"RPB" , (getter)JitCpu_get_RPB , (setter)JitCpu_set_RPB , "RPB" , NULL}, + {"RPE" , (getter)JitCpu_get_RPE , (setter)JitCpu_set_RPE , "RPE" , NULL}, + {"RPC" , (getter)JitCpu_get_RPC , (setter)JitCpu_set_RPC , "RPC" , NULL}, + {"HI" , (getter)JitCpu_get_HI , (setter)JitCpu_set_HI , "HI" , NULL}, + {"LO" , (getter)JitCpu_get_LO , (setter)JitCpu_set_LO , "LO" , NULL}, + {"S9" , (getter)JitCpu_get_S9 , (setter)JitCpu_set_S9 , "S9" , NULL}, + {"S10" , (getter)JitCpu_get_S10 , (setter)JitCpu_set_S10 , "S10" , NULL}, + {"S11" , (getter)JitCpu_get_S11 , (setter)JitCpu_set_S11 , "S11" , NULL}, + {"MB0" , (getter)JitCpu_get_MB0 , (setter)JitCpu_set_MB0 , "MB0" , NULL}, + {"ME0" , (getter)JitCpu_get_ME0 , (setter)JitCpu_set_ME0 , "ME0" , NULL}, + {"MB1" , (getter)JitCpu_get_MB1 , (setter)JitCpu_set_MB1 , "MB1" , NULL}, + {"ME1" , (getter)JitCpu_get_ME1 , (setter)JitCpu_set_ME1 , "ME1" , NULL}, + {"PSW" , (getter)JitCpu_get_PSW , (setter)JitCpu_set_PSW , "PSW" , NULL}, + {"ID" , (getter)JitCpu_get_ID , (setter)JitCpu_set_ID , "ID" , NULL}, + {"TMP" , (getter)JitCpu_get_TMP , (setter)JitCpu_set_TMP , "TMP" , NULL}, + {"EPC" , (getter)JitCpu_get_EPC , (setter)JitCpu_set_EPC , "EPC" , NULL}, + {"EXC" , (getter)JitCpu_get_EXC , (setter)JitCpu_set_EXC , "EXC" , NULL}, + {"CFG" , (getter)JitCpu_get_CFG , (setter)JitCpu_set_CFG , "CFG" , NULL}, + {"S22" , (getter)JitCpu_get_S22 , (setter)JitCpu_set_S22 , "S22" , NULL}, + {"NPC" , (getter)JitCpu_get_NPC , (setter)JitCpu_set_NPC , "NPC" , NULL}, + {"DBG" , (getter)JitCpu_get_DBG , (setter)JitCpu_set_DBG , "DBG" , NULL}, + {"DEPC" , (getter)JitCpu_get_DEPC , (setter)JitCpu_set_DEPC , "DEPC" , NULL}, + {"OPT" , (getter)JitCpu_get_OPT , (setter)JitCpu_set_OPT , "OPT" , NULL}, + {"RCFG" , (getter)JitCpu_get_RCFG , (setter)JitCpu_set_RCFG , "RCFG" , NULL}, + {"CCFG" , (getter)JitCpu_get_CCFG , (setter)JitCpu_set_CCFG , "CCFG" , NULL}, + {"S29" , (getter)JitCpu_get_S29 , (setter)JitCpu_set_S29 , "S29" , NULL}, + {"S30" , (getter)JitCpu_get_S30 , (setter)JitCpu_set_S30 , "S30" , NULL}, + {"S31" , (getter)JitCpu_get_S31 , (setter)JitCpu_set_S31 , "S31" , NULL}, + {"S32" , (getter)JitCpu_get_S32 , (setter)JitCpu_set_S32 , "S32" , NULL}, + + {"PC_end" , (getter)JitCpu_get_PC_end , (setter)JitCpu_set_PC_end , "PC_end" , NULL}, + {"RPE_instr_count" , (getter)JitCpu_get_RPE_instr_count , (setter)JitCpu_set_RPE_instr_count , "RPE_instr_count" , NULL}, + {"RPC_current" , (getter)JitCpu_get_RPC_current , (setter)JitCpu_set_RPC_current , "RPC_current" , NULL}, + + + + {NULL} /* Sentinel */ +}; + + + +static PyTypeObject JitCpuType = { + PyVarObject_HEAD_INIT(NULL, 0) + "JitCore_mep.JitCpu", /*tp_name*/ + sizeof(JitCpu), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)JitCpu_dealloc,/*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + "JitCpu objects", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + JitCpu_methods, /* tp_methods */ + JitCpu_members, /* tp_members */ + JitCpu_getseters, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)JitCpu_init, /* tp_init */ + 0, /* tp_alloc */ + JitCpu_new, /* tp_new */ +}; + + + +static PyMethodDef JitCore_mep_Methods[] = { + + /* + + */ + {"get_gpreg_offset_all", (PyCFunction)get_gpreg_offset_all, METH_NOARGS}, + {NULL, NULL, 0, NULL} /* Sentinel */ + +}; + + + +MOD_INIT(JitCore_mep) +{ + PyObject *module; + + MOD_DEF(module, "JitCore_mep", "JitCore_mep module", JitCore_mep_Methods); + + if (module == NULL) + return NULL; + + if (PyType_Ready(&JitCpuType) < 0) + return NULL; + + Py_INCREF(&JitCpuType); + if (PyModule_AddObject(module, "JitCpu", (PyObject *)&JitCpuType) < 0) + return NULL; + + return module; +} diff --git a/miasm/jitter/arch/JitCore_mep.h b/miasm/jitter/arch/JitCore_mep.h new file mode 100644 index 00000000..bcf2283e --- /dev/null +++ b/miasm/jitter/arch/JitCore_mep.h @@ -0,0 +1,82 @@ +// Inspired from JitCore_msp430.h + +typedef struct { + /* miasm flags */ + uint32_t exception_flags; + + /* gpregs */ + uint32_t R0; + uint32_t R1; + uint32_t R2; + uint32_t R3; + uint32_t R4; + uint32_t R5; + uint32_t R6; + uint32_t R7; + uint32_t R8; + uint32_t R9; + uint32_t R10; + uint32_t R11; + uint32_t R12; + uint32_t TP; + uint32_t GP; + uint32_t SP; + + /* csregs */ + uint32_t PC; + uint32_t LP; + uint32_t SAR; + uint32_t S3; + uint32_t RPB; + uint32_t RPE; + uint32_t RPC; + uint32_t HI; + uint32_t LO; + uint32_t S9; + uint32_t S10; + uint32_t S11; + uint32_t MB0; + uint32_t ME0; + uint32_t MB1; + uint32_t ME1; + uint32_t PSW; + uint32_t ID; + uint32_t TMP; + uint32_t EPC; + uint32_t EXC; + uint32_t CFG; + uint32_t S22; + uint32_t NPC; + uint32_t DBG; + uint32_t DEPC; + uint32_t OPT; + uint32_t RCFG; + uint32_t CCFG; + uint32_t S29; + uint32_t S30; + uint32_t S31; + uint32_t S32; + + /* miasm specific regs */ + uint32_t PC_end; + uint32_t RPE_instr_count; + uint32_t RPC_current; + + + uint32_t take_jmp; + uint32_t last_addr; + uint32_t is_repeat_end; + uint32_t in_erepeat; + + /* flags */ + +} vm_cpu_t; + +_MIASM_EXPORT void dump_gpregs(vm_cpu_t* vmcpu); + +_MIASM_EXPORT void MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src); +_MIASM_EXPORT void MEM_WRITE_16(JitCpu* jitcpu, uint64_t addr, uint16_t src); +_MIASM_EXPORT void MEM_WRITE_32(JitCpu* jitcpu, uint64_t addr, uint32_t src); +_MIASM_EXPORT void MEM_WRITE_64(JitCpu* jitcpu, uint64_t addr, uint64_t src); + +#define RETURN_PC return BlockDst; diff --git a/miasm/jitter/arch/JitCore_mips32.c b/miasm/jitter/arch/JitCore_mips32.c new file mode 100644 index 00000000..1455fec9 --- /dev/null +++ b/miasm/jitter/arch/JitCore_mips32.c @@ -0,0 +1,531 @@ +#include +#include "structmember.h" +#include +#include +#include "../compat_py23.h" +#include "../queue.h" +#include "../vm_mngr.h" +#include "../vm_mngr_py.h" +#include "../bn.h" +#include "../JitCore.h" +#include "../op_semantics.h" +#include "JitCore_mips32.h" + + + +reg_dict gpreg_dict[] = { {.name = "ZERO", .offset = offsetof(vm_cpu_t, ZERO), .size = 32}, + {.name = "AT", .offset = offsetof(vm_cpu_t, AT), .size = 32}, + {.name = "V0", .offset = offsetof(vm_cpu_t, V0), .size = 32}, + {.name = "V1", .offset = offsetof(vm_cpu_t, V1), .size = 32}, + {.name = "A0", .offset = offsetof(vm_cpu_t, A0), .size = 32}, + {.name = "A1", .offset = offsetof(vm_cpu_t, A1), .size = 32}, + {.name = "A2", .offset = offsetof(vm_cpu_t, A2), .size = 32}, + {.name = "A3", .offset = offsetof(vm_cpu_t, A3), .size = 32}, + {.name = "T0", .offset = offsetof(vm_cpu_t, T0), .size = 32}, + {.name = "T1", .offset = offsetof(vm_cpu_t, T1), .size = 32}, + {.name = "T2", .offset = offsetof(vm_cpu_t, T2), .size = 32}, + {.name = "T3", .offset = offsetof(vm_cpu_t, T3), .size = 32}, + {.name = "T4", .offset = offsetof(vm_cpu_t, T4), .size = 32}, + {.name = "T5", .offset = offsetof(vm_cpu_t, T5), .size = 32}, + {.name = "T6", .offset = offsetof(vm_cpu_t, T6), .size = 32}, + {.name = "T7", .offset = offsetof(vm_cpu_t, T7), .size = 32}, + {.name = "S0", .offset = offsetof(vm_cpu_t, S0), .size = 32}, + {.name = "S1", .offset = offsetof(vm_cpu_t, S1), .size = 32}, + {.name = "S2", .offset = offsetof(vm_cpu_t, S2), .size = 32}, + {.name = "S3", .offset = offsetof(vm_cpu_t, S3), .size = 32}, + {.name = "S4", .offset = offsetof(vm_cpu_t, S4), .size = 32}, + {.name = "S5", .offset = offsetof(vm_cpu_t, S5), .size = 32}, + {.name = "S6", .offset = offsetof(vm_cpu_t, S6), .size = 32}, + {.name = "S7", .offset = offsetof(vm_cpu_t, S7), .size = 32}, + {.name = "T8", .offset = offsetof(vm_cpu_t, T8), .size = 32}, + {.name = "T9", .offset = offsetof(vm_cpu_t, T9), .size = 32}, + {.name = "K0", .offset = offsetof(vm_cpu_t, K0), .size = 32}, + {.name = "K1", .offset = offsetof(vm_cpu_t, K1), .size = 32}, + {.name = "GP", .offset = offsetof(vm_cpu_t, GP), .size = 32}, + {.name = "SP", .offset = offsetof(vm_cpu_t, SP), .size = 32}, + {.name = "FP", .offset = offsetof(vm_cpu_t, FP), .size = 32}, + {.name = "RA", .offset = offsetof(vm_cpu_t, RA), .size = 32}, + {.name = "PC", .offset = offsetof(vm_cpu_t, PC), .size = 32}, + {.name = "PC_FETCH", .offset = offsetof(vm_cpu_t, PC_FETCH), .size = 32}, + {.name = "R_LO", .offset = offsetof(vm_cpu_t, R_LO), .size = 32}, + {.name = "R_HI", .offset = offsetof(vm_cpu_t, R_HI), .size = 32}, +}; + +/************************** JitCpu object **************************/ + + + +PyObject* cpu_get_gpreg(JitCpu* self) +{ + PyObject *dict = PyDict_New(); + PyObject *o; + + get_reg(ZERO); + get_reg(AT); + get_reg(V0); + get_reg(V1); + get_reg(A0); + get_reg(A1); + get_reg(A2); + get_reg(A3); + get_reg(T0); + get_reg(T1); + get_reg(T2); + get_reg(T3); + get_reg(T4); + get_reg(T5); + get_reg(T6); + get_reg(T7); + get_reg(S0); + get_reg(S1); + get_reg(S2); + get_reg(S3); + get_reg(S4); + get_reg(S5); + get_reg(S6); + get_reg(S7); + get_reg(T8); + get_reg(T9); + get_reg(K0); + get_reg(K1); + get_reg(GP); + get_reg(SP); + get_reg(FP); + get_reg(RA); + get_reg(PC); + get_reg(PC_FETCH); + get_reg(R_LO); + get_reg(R_HI); + + return dict; +} + + + + +PyObject* cpu_set_gpreg(JitCpu* self, PyObject *args) +{ + PyObject* dict; + PyObject *d_key, *d_value = NULL; + Py_ssize_t pos = 0; + char* d_key_name; + uint64_t val; + unsigned int i, found; + + if (!PyArg_ParseTuple(args, "O", &dict)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + if(!PyDict_Check(dict)) + RAISE(PyExc_TypeError, "arg must be dict"); + while(PyDict_Next(dict, &pos, &d_key, &d_value)){ + PyGetStr(d_key_name, d_key); + PyGetInt(d_value, val); + + found = 0; + for (i=0; i < sizeof(gpreg_dict)/sizeof(reg_dict); i++){ + if (strcmp(d_key_name, gpreg_dict[i].name)) + continue; + *((uint32_t*)(((char*)(self->cpu)) + gpreg_dict[i].offset)) = val; + found = 1; + break; + } + + if (found) + continue; + fprintf(stderr, "unknown key: %s\n", d_key_name); + RAISE(PyExc_ValueError, "unknown reg"); + } + Py_INCREF(Py_None); + return Py_None; +} + + + + +PyObject * cpu_init_regs(JitCpu* self) +{ + memset(self->cpu, 0, sizeof(vm_cpu_t)); + + Py_INCREF(Py_None); + return Py_None; + +} + + +void dump_gpregs(vm_cpu_t* vmcpu) +{ + + printf("ZR %.8"PRIX32" AT %.8"PRIX32" V0 %.8"PRIX32" V1 %.8"PRIX32" ", + vmcpu->ZERO, vmcpu->AT, vmcpu->V0, vmcpu->V1); + printf("A0 %.8"PRIX32" A1 %.8"PRIX32" A2 %.8"PRIX32" A3 %.8"PRIX32" ", + vmcpu->A0, vmcpu->A1, vmcpu->A2, vmcpu->A3); + printf("T0 %.8"PRIX32" T1 %.8"PRIX32" T2 %.8"PRIX32" T3 %.8"PRIX32" ", + vmcpu->T0, vmcpu->T1, vmcpu->T2, vmcpu->T3); + printf("T4 %.8"PRIX32" T5 %.8"PRIX32" T6 %.8"PRIX32" T7 %.8"PRIX32"\n", + vmcpu->T4, vmcpu->T5, vmcpu->T6, vmcpu->T7); + printf("S0 %.8"PRIX32" S1 %.8"PRIX32" S2 %.8"PRIX32" S3 %.8"PRIX32" ", + vmcpu->S0, vmcpu->S1, vmcpu->S2, vmcpu->S3); + printf("S4 %.8"PRIX32" S5 %.8"PRIX32" S6 %.8"PRIX32" S7 %.8"PRIX32" ", + vmcpu->S4, vmcpu->S5, vmcpu->S6, vmcpu->S7); + printf("T8 %.8"PRIX32" T9 %.8"PRIX32" K0 %.8"PRIX32" K1 %.8"PRIX32" ", + vmcpu->T8, vmcpu->T9, vmcpu->K0, vmcpu->K1); + printf("GP %.8"PRIX32" SP %.8"PRIX32" FP %.8"PRIX32" RA %.8"PRIX32"\n", + vmcpu->GP, vmcpu->SP, vmcpu->FP, vmcpu->RA); + printf("PC %.8"PRIX32"\n", + vmcpu->PC); +} + + +PyObject * cpu_dump_gpregs(JitCpu* self, PyObject* args) +{ + vm_cpu_t* vmcpu; + + vmcpu = self->cpu; + dump_gpregs(vmcpu); + Py_INCREF(Py_None); + return Py_None; +} + + +PyObject* cpu_set_exception(JitCpu* self, PyObject* args) +{ + PyObject *item1; + uint64_t i; + + if (!PyArg_ParseTuple(args, "O", &item1)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + + PyGetInt(item1, i); + + ((vm_cpu_t*)self->cpu)->exception_flags = i; + Py_INCREF(Py_None); + return Py_None; +} + +PyObject* cpu_get_exception(JitCpu* self, PyObject* args) +{ + return PyLong_FromUnsignedLongLong((uint64_t)(((vm_cpu_t*)self->cpu)->exception_flags)); +} + + + + + + +void check_automod(JitCpu* jitcpu, uint64_t addr, uint64_t size) +{ + PyObject *result; + + if (!(((VmMngr*)jitcpu->pyvm)->vm_mngr.exception_flags & EXCEPT_CODE_AUTOMOD)) + return; + result = PyObject_CallMethod(jitcpu->jitter, "automod_cb", "LL", addr, size); + Py_DECREF(result); + +} + + +void MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src) +{ + vm_MEM_WRITE_08(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); + check_automod(jitcpu, addr, 8); +} + +void MEM_WRITE_16(JitCpu* jitcpu, uint64_t addr, uint16_t src) +{ + vm_MEM_WRITE_16(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); + check_automod(jitcpu, addr, 16); +} + +void MEM_WRITE_32(JitCpu* jitcpu, uint64_t addr, uint32_t src) +{ + vm_MEM_WRITE_32(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); + check_automod(jitcpu, addr, 32); +} + +void MEM_WRITE_64(JitCpu* jitcpu, uint64_t addr, uint64_t src) +{ + vm_MEM_WRITE_64(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); + check_automod(jitcpu, addr, 64); +} + + +PyObject* vm_set_mem(JitCpu *self, PyObject* args) +{ + PyObject *py_addr; + PyObject *py_buffer; + Py_ssize_t py_length; + + char * buffer; + uint64_t size; + uint64_t addr; + int ret; + + if (!PyArg_ParseTuple(args, "OO", &py_addr, &py_buffer)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + + PyGetInt(py_addr, addr); + + if(!PyBytes_Check(py_buffer)) + RAISE(PyExc_TypeError,"arg must be bytes"); + + size = PyBytes_Size(py_buffer); + PyBytes_AsStringAndSize(py_buffer, &buffer, &py_length); + + ret = vm_write_mem(&(((VmMngr*)self->pyvm)->vm_mngr), addr, buffer, size); + if (ret < 0) + RAISE(PyExc_TypeError,"arg must be str"); + check_automod(self, addr, size*8); + + Py_INCREF(Py_None); + return Py_None; +} + +static PyMemberDef JitCpu_members[] = { + {NULL} /* Sentinel */ +}; + +static PyMethodDef JitCpu_methods[] = { + {"init_regs", (PyCFunction)cpu_init_regs, METH_NOARGS, + "X"}, + {"dump_gpregs", (PyCFunction)cpu_dump_gpregs, METH_NOARGS, + "X"}, + {"get_gpreg", (PyCFunction)cpu_get_gpreg, METH_NOARGS, + "X"}, + {"set_gpreg", (PyCFunction)cpu_set_gpreg, METH_VARARGS, + "X"}, + {"get_exception", (PyCFunction)cpu_get_exception, METH_VARARGS, + "X"}, + {"set_exception", (PyCFunction)cpu_set_exception, METH_VARARGS, + "X"}, + {"set_mem", (PyCFunction)vm_set_mem, METH_VARARGS, + "X"}, + {"get_mem", (PyCFunction)vm_get_mem, METH_VARARGS, + "X"}, + {NULL} /* Sentinel */ +}; + + +static int +JitCpu_init(JitCpu *self, PyObject *args, PyObject *kwds) +{ + self->cpu = malloc(sizeof(vm_cpu_t)); + if (self->cpu == NULL) { + fprintf(stderr, "cannot alloc vm_cpu_t\n"); + exit(EXIT_FAILURE); + } + return 0; +} + +getset_reg_u32(ZERO); +getset_reg_u32(AT); +getset_reg_u32(V0); +getset_reg_u32(V1); +getset_reg_u32(A0); +getset_reg_u32(A1); +getset_reg_u32(A2); +getset_reg_u32(A3); +getset_reg_u32(T0); +getset_reg_u32(T1); +getset_reg_u32(T2); +getset_reg_u32(T3); +getset_reg_u32(T4); +getset_reg_u32(T5); +getset_reg_u32(T6); +getset_reg_u32(T7); +getset_reg_u32(S0); +getset_reg_u32(S1); +getset_reg_u32(S2); +getset_reg_u32(S3); +getset_reg_u32(S4); +getset_reg_u32(S5); +getset_reg_u32(S6); +getset_reg_u32(S7); +getset_reg_u32(T8); +getset_reg_u32(T9); +getset_reg_u32(K0); +getset_reg_u32(K1); +getset_reg_u32(GP); +getset_reg_u32(SP); +getset_reg_u32(FP); +getset_reg_u32(RA); +getset_reg_u32(PC); +getset_reg_u32(PC_FETCH); +getset_reg_u32(R_LO); +getset_reg_u32(R_HI); + + +PyObject* get_gpreg_offset_all(void) +{ + PyObject *dict = PyDict_New(); + PyObject *o; + + get_reg_off(exception_flags); + + + get_reg_off(ZERO); + get_reg_off(AT); + get_reg_off(V0); + get_reg_off(V1); + get_reg_off(A0); + get_reg_off(A1); + get_reg_off(A2); + get_reg_off(A3); + get_reg_off(T0); + get_reg_off(T1); + get_reg_off(T2); + get_reg_off(T3); + get_reg_off(T4); + get_reg_off(T5); + get_reg_off(T6); + get_reg_off(T7); + get_reg_off(S0); + get_reg_off(S1); + get_reg_off(S2); + get_reg_off(S3); + get_reg_off(S4); + get_reg_off(S5); + get_reg_off(S6); + get_reg_off(S7); + get_reg_off(T8); + get_reg_off(T9); + get_reg_off(K0); + get_reg_off(K1); + get_reg_off(GP); + get_reg_off(SP); + get_reg_off(FP); + get_reg_off(RA); + get_reg_off(PC); + get_reg_off(PC_FETCH); + get_reg_off(R_LO); + get_reg_off(R_HI); + + return dict; +} + + +static PyGetSetDef JitCpu_getseters[] = { + {"vmmngr", + (getter)JitCpu_get_vmmngr, (setter)JitCpu_set_vmmngr, + "vmmngr", + NULL}, + + {"jitter", + (getter)JitCpu_get_jitter, (setter)JitCpu_set_jitter, + "jitter", + NULL}, + + {"ZERO" , (getter)JitCpu_get_ZERO , (setter)JitCpu_set_ZERO , "ZERO" , NULL}, + {"AT" , (getter)JitCpu_get_AT , (setter)JitCpu_set_AT , "AT" , NULL}, + {"V0" , (getter)JitCpu_get_V0 , (setter)JitCpu_set_V0 , "V0" , NULL}, + {"V1" , (getter)JitCpu_get_V1 , (setter)JitCpu_set_V1 , "V1" , NULL}, + {"A0" , (getter)JitCpu_get_A0 , (setter)JitCpu_set_A0 , "A0" , NULL}, + {"A1" , (getter)JitCpu_get_A1 , (setter)JitCpu_set_A1 , "A1" , NULL}, + {"A2" , (getter)JitCpu_get_A2 , (setter)JitCpu_set_A2 , "A2" , NULL}, + {"A3" , (getter)JitCpu_get_A3 , (setter)JitCpu_set_A3 , "A3" , NULL}, + {"T0" , (getter)JitCpu_get_T0 , (setter)JitCpu_set_T0 , "T0" , NULL}, + {"T1" , (getter)JitCpu_get_T1 , (setter)JitCpu_set_T1 , "T1" , NULL}, + {"T2" , (getter)JitCpu_get_T2 , (setter)JitCpu_set_T2 , "T2" , NULL}, + {"T3" , (getter)JitCpu_get_T3 , (setter)JitCpu_set_T3 , "T3" , NULL}, + {"T4" , (getter)JitCpu_get_T4 , (setter)JitCpu_set_T4 , "T4" , NULL}, + {"T5" , (getter)JitCpu_get_T5 , (setter)JitCpu_set_T5 , "T5" , NULL}, + {"T6" , (getter)JitCpu_get_T6 , (setter)JitCpu_set_T6 , "T6" , NULL}, + {"T7" , (getter)JitCpu_get_T7 , (setter)JitCpu_set_T7 , "T7" , NULL}, + {"S0" , (getter)JitCpu_get_S0 , (setter)JitCpu_set_S0 , "S0" , NULL}, + {"S1" , (getter)JitCpu_get_S1 , (setter)JitCpu_set_S1 , "S1" , NULL}, + {"S2" , (getter)JitCpu_get_S2 , (setter)JitCpu_set_S2 , "S2" , NULL}, + {"S3" , (getter)JitCpu_get_S3 , (setter)JitCpu_set_S3 , "S3" , NULL}, + {"S4" , (getter)JitCpu_get_S4 , (setter)JitCpu_set_S4 , "S4" , NULL}, + {"S5" , (getter)JitCpu_get_S5 , (setter)JitCpu_set_S5 , "S5" , NULL}, + {"S6" , (getter)JitCpu_get_S6 , (setter)JitCpu_set_S6 , "S6" , NULL}, + {"S7" , (getter)JitCpu_get_S7 , (setter)JitCpu_set_S7 , "S7" , NULL}, + {"T8" , (getter)JitCpu_get_T8 , (setter)JitCpu_set_T8 , "T8" , NULL}, + {"T9" , (getter)JitCpu_get_T9 , (setter)JitCpu_set_T9 , "T9" , NULL}, + {"K0" , (getter)JitCpu_get_K0 , (setter)JitCpu_set_K0 , "K0" , NULL}, + {"K1" , (getter)JitCpu_get_K1 , (setter)JitCpu_set_K1 , "K1" , NULL}, + {"GP" , (getter)JitCpu_get_GP , (setter)JitCpu_set_GP , "GP" , NULL}, + {"SP" , (getter)JitCpu_get_SP , (setter)JitCpu_set_SP , "SP" , NULL}, + {"FP" , (getter)JitCpu_get_FP , (setter)JitCpu_set_FP , "FP" , NULL}, + {"RA" , (getter)JitCpu_get_RA , (setter)JitCpu_set_RA , "RA" , NULL}, + {"PC" , (getter)JitCpu_get_PC , (setter)JitCpu_set_PC , "PC" , NULL}, + {"PC_FETCH" , (getter)JitCpu_get_PC_FETCH , (setter)JitCpu_set_PC_FETCH , "PC_FETCH" , NULL}, + {"R_LO" , (getter)JitCpu_get_R_LO , (setter)JitCpu_set_R_LO , "R_LO" , NULL}, + {"R_HI" , (getter)JitCpu_get_R_HI , (setter)JitCpu_set_R_HI , "R_HI" , NULL}, + + {NULL} /* Sentinel */ +}; + + +static PyTypeObject JitCpuType = { + PyVarObject_HEAD_INIT(NULL, 0) + "JitCore_mips32.JitCpu", /*tp_name*/ + sizeof(JitCpu), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)JitCpu_dealloc,/*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + "JitCpu objects", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + JitCpu_methods, /* tp_methods */ + JitCpu_members, /* tp_members */ + JitCpu_getseters, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)JitCpu_init, /* tp_init */ + 0, /* tp_alloc */ + JitCpu_new, /* tp_new */ +}; + + + +static PyMethodDef JitCore_mips32_Methods[] = { + + /* + + */ + {"get_gpreg_offset_all", (PyCFunction)get_gpreg_offset_all, METH_NOARGS}, + {NULL, NULL, 0, NULL} /* Sentinel */ + +}; + + + + + +MOD_INIT(JitCore_mips32) +{ + PyObject *module; + + MOD_DEF(module, "JitCore_mips32", "JitCore_mips32 module", JitCore_mips32_Methods); + + if (module == NULL) + return NULL; + + if (PyType_Ready(&JitCpuType) < 0) + return NULL; + + Py_INCREF(&JitCpuType); + if (PyModule_AddObject(module, "JitCpu", (PyObject *)&JitCpuType) < 0) + return NULL; + + return module; +} diff --git a/miasm/jitter/arch/JitCore_mips32.h b/miasm/jitter/arch/JitCore_mips32.h new file mode 100644 index 00000000..e20d5133 --- /dev/null +++ b/miasm/jitter/arch/JitCore_mips32.h @@ -0,0 +1,343 @@ + +typedef struct { + uint32_t exception_flags; + + /* gpregs */ + + uint32_t ZERO; + uint32_t AT; + uint32_t V0; + uint32_t V1; + uint32_t A0; + uint32_t A1; + uint32_t A2; + uint32_t A3; + uint32_t T0; + uint32_t T1; + uint32_t T2; + uint32_t T3; + uint32_t T4; + uint32_t T5; + uint32_t T6; + uint32_t T7; + uint32_t S0; + uint32_t S1; + uint32_t S2; + uint32_t S3; + uint32_t S4; + uint32_t S5; + uint32_t S6; + uint32_t S7; + uint32_t T8; + uint32_t T9; + uint32_t K0; + uint32_t K1; + uint32_t GP; + uint32_t SP; + uint32_t FP; + uint32_t RA; + uint32_t PC; + uint32_t PC_FETCH; + uint32_t R_LO; + uint32_t R_HI; + + + double F0; + double F1; + double F2; + double F3; + double F4; + double F5; + double F6; + double F7; + double F8; + double F9; + double F10; + double F11; + double F12; + double F13; + double F14; + double F15; + double F16; + double F17; + double F18; + double F19; + double F20; + double F21; + double F22; + double F23; + double F24; + double F25; + double F26; + double F27; + double F28; + double F29; + double F30; + double F31; + + uint32_t INDEX; + uint32_t CPR0_1; + uint32_t CPR0_2; + uint32_t CPR0_3; + uint32_t CPR0_4; + uint32_t CPR0_5; + uint32_t CPR0_6; + uint32_t CPR0_7; + uint32_t CPR0_8; + uint32_t CPR0_9; + uint32_t CPR0_10; + uint32_t CPR0_11; + uint32_t CPR0_12; + uint32_t CPR0_13; + uint32_t CPR0_14; + uint32_t CPR0_15; + uint32_t ENTRYLO0; + uint32_t CPR0_17; + uint32_t CPR0_18; + uint32_t CPR0_19; + uint32_t CPR0_20; + uint32_t CPR0_21; + uint32_t CPR0_22; + uint32_t CPR0_23; + uint32_t ENTRYLO1; + uint32_t CPR0_25; + uint32_t CPR0_26; + uint32_t CPR0_27; + uint32_t CPR0_28; + uint32_t CPR0_29; + uint32_t CPR0_30; + uint32_t CPR0_31; + uint32_t CPR0_32; + uint32_t CPR0_33; + uint32_t CPR0_34; + uint32_t CPR0_35; + uint32_t CPR0_36; + uint32_t CPR0_37; + uint32_t CPR0_38; + uint32_t CPR0_39; + uint32_t PAGEMASK; + uint32_t CPR0_41; + uint32_t CPR0_42; + uint32_t CPR0_43; + uint32_t CPR0_44; + uint32_t CPR0_45; + uint32_t CPR0_46; + uint32_t CPR0_47; + uint32_t CPR0_48; + uint32_t CPR0_49; + uint32_t CPR0_50; + uint32_t CPR0_51; + uint32_t CPR0_52; + uint32_t CPR0_53; + uint32_t CPR0_54; + uint32_t CPR0_55; + uint32_t CPR0_56; + uint32_t CPR0_57; + uint32_t CPR0_58; + uint32_t CPR0_59; + uint32_t CPR0_60; + uint32_t CPR0_61; + uint32_t CPR0_62; + uint32_t CPR0_63; + uint32_t CPR0_64; + uint32_t CPR0_65; + uint32_t CPR0_66; + uint32_t CPR0_67; + uint32_t CPR0_68; + uint32_t CPR0_69; + uint32_t CPR0_70; + uint32_t CPR0_71; + uint32_t COUNT; + uint32_t CPR0_73; + uint32_t CPR0_74; + uint32_t CPR0_75; + uint32_t CPR0_76; + uint32_t CPR0_77; + uint32_t CPR0_78; + uint32_t CPR0_79; + uint32_t ENTRYHI; + uint32_t CPR0_81; + uint32_t CPR0_82; + uint32_t CPR0_83; + uint32_t CPR0_84; + uint32_t CPR0_85; + uint32_t CPR0_86; + uint32_t CPR0_87; + uint32_t CPR0_88; + uint32_t CPR0_89; + uint32_t CPR0_90; + uint32_t CPR0_91; + uint32_t CPR0_92; + uint32_t CPR0_93; + uint32_t CPR0_94; + uint32_t CPR0_95; + uint32_t CPR0_96; + uint32_t CPR0_97; + uint32_t CPR0_98; + uint32_t CPR0_99; + uint32_t CPR0_100; + uint32_t CPR0_101; + uint32_t CPR0_102; + uint32_t CPR0_103; + uint32_t CAUSE; + uint32_t CPR0_105; + uint32_t CPR0_106; + uint32_t CPR0_107; + uint32_t CPR0_108; + uint32_t CPR0_109; + uint32_t CPR0_110; + uint32_t CPR0_111; + uint32_t EPC; + uint32_t CPR0_113; + uint32_t CPR0_114; + uint32_t CPR0_115; + uint32_t CPR0_116; + uint32_t CPR0_117; + uint32_t CPR0_118; + uint32_t CPR0_119; + uint32_t CPR0_120; + uint32_t CPR0_121; + uint32_t CPR0_122; + uint32_t CPR0_123; + uint32_t CPR0_124; + uint32_t CPR0_125; + uint32_t CPR0_126; + uint32_t CPR0_127; + uint32_t CONFIG; + uint32_t CPR0_129; + uint32_t CPR0_130; + uint32_t CPR0_131; + uint32_t CPR0_132; + uint32_t CPR0_133; + uint32_t CPR0_134; + uint32_t CPR0_135; + uint32_t CPR0_136; + uint32_t CPR0_137; + uint32_t CPR0_138; + uint32_t CPR0_139; + uint32_t CPR0_140; + uint32_t CPR0_141; + uint32_t CPR0_142; + uint32_t CPR0_143; + uint32_t CPR0_144; + uint32_t CPR0_145; + uint32_t CPR0_146; + uint32_t CPR0_147; + uint32_t CPR0_148; + uint32_t CPR0_149; + uint32_t CPR0_150; + uint32_t CPR0_151; + uint32_t WATCHHI; + uint32_t CPR0_153; + uint32_t CPR0_154; + uint32_t CPR0_155; + uint32_t CPR0_156; + uint32_t CPR0_157; + uint32_t CPR0_158; + uint32_t CPR0_159; + uint32_t CPR0_160; + uint32_t CPR0_161; + uint32_t CPR0_162; + uint32_t CPR0_163; + uint32_t CPR0_164; + uint32_t CPR0_165; + uint32_t CPR0_166; + uint32_t CPR0_167; + uint32_t CPR0_168; + uint32_t CPR0_169; + uint32_t CPR0_170; + uint32_t CPR0_171; + uint32_t CPR0_172; + uint32_t CPR0_173; + uint32_t CPR0_174; + uint32_t CPR0_175; + uint32_t CPR0_176; + uint32_t CPR0_177; + uint32_t CPR0_178; + uint32_t CPR0_179; + uint32_t CPR0_180; + uint32_t CPR0_181; + uint32_t CPR0_182; + uint32_t CPR0_183; + uint32_t CPR0_184; + uint32_t CPR0_185; + uint32_t CPR0_186; + uint32_t CPR0_187; + uint32_t CPR0_188; + uint32_t CPR0_189; + uint32_t CPR0_190; + uint32_t CPR0_191; + uint32_t CPR0_192; + uint32_t CPR0_193; + uint32_t CPR0_194; + uint32_t CPR0_195; + uint32_t CPR0_196; + uint32_t CPR0_197; + uint32_t CPR0_198; + uint32_t CPR0_199; + uint32_t CPR0_200; + uint32_t CPR0_201; + uint32_t CPR0_202; + uint32_t CPR0_203; + uint32_t CPR0_204; + uint32_t CPR0_205; + uint32_t CPR0_206; + uint32_t CPR0_207; + uint32_t CPR0_208; + uint32_t CPR0_209; + uint32_t CPR0_210; + uint32_t CPR0_211; + uint32_t CPR0_212; + uint32_t CPR0_213; + uint32_t CPR0_214; + uint32_t CPR0_215; + uint32_t CPR0_216; + uint32_t CPR0_217; + uint32_t CPR0_218; + uint32_t CPR0_219; + uint32_t CPR0_220; + uint32_t CPR0_221; + uint32_t CPR0_222; + uint32_t CPR0_223; + uint32_t CPR0_224; + uint32_t CPR0_225; + uint32_t CPR0_226; + uint32_t CPR0_227; + uint32_t CPR0_228; + uint32_t CPR0_229; + uint32_t CPR0_230; + uint32_t CPR0_231; + uint32_t CPR0_232; + uint32_t CPR0_233; + uint32_t CPR0_234; + uint32_t CPR0_235; + uint32_t CPR0_236; + uint32_t CPR0_237; + uint32_t CPR0_238; + uint32_t CPR0_239; + uint32_t CPR0_240; + uint32_t CPR0_241; + uint32_t CPR0_242; + uint32_t CPR0_243; + uint32_t CPR0_244; + uint32_t CPR0_245; + uint32_t CPR0_246; + uint32_t CPR0_247; + uint32_t CPR0_248; + uint32_t CPR0_249; + uint32_t CPR0_250; + uint32_t CPR0_251; + uint32_t CPR0_252; + uint32_t CPR0_253; + uint32_t CPR0_254; + uint32_t CPR0_255; +}vm_cpu_t; + +_MIASM_EXPORT void dump_gpregs(vm_cpu_t* vmcpu); + +_MIASM_EXPORT void MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src); +_MIASM_EXPORT void MEM_WRITE_16(JitCpu* jitcpu, uint64_t addr, uint16_t src); +_MIASM_EXPORT void MEM_WRITE_32(JitCpu* jitcpu, uint64_t addr, uint32_t src); +_MIASM_EXPORT void MEM_WRITE_64(JitCpu* jitcpu, uint64_t addr, uint64_t src); + +#define RETURN_PC return BlockDst; diff --git a/miasm/jitter/arch/JitCore_msp430.c b/miasm/jitter/arch/JitCore_msp430.c new file mode 100644 index 00000000..c21296c7 --- /dev/null +++ b/miasm/jitter/arch/JitCore_msp430.c @@ -0,0 +1,477 @@ +#include +#include "structmember.h" +#include +#include +#include "../compat_py23.h" +#include "../queue.h" +#include "../vm_mngr.h" +#include "../vm_mngr_py.h" +#include "../bn.h" +#include "../JitCore.h" +#include "JitCore_msp430.h" + + +reg_dict gpreg_dict[] = { {.name = "PC", .offset = offsetof(vm_cpu_t, PC)}, + {.name = "SP", .offset = offsetof(vm_cpu_t, SP)}, + //{.name = "SR", .offset = offsetof(vm_cpu_t, SR)}, + {.name = "R3", .offset = offsetof(vm_cpu_t, R3)}, + {.name = "R4", .offset = offsetof(vm_cpu_t, R4)}, + {.name = "R5", .offset = offsetof(vm_cpu_t, R5)}, + {.name = "R6", .offset = offsetof(vm_cpu_t, R6)}, + {.name = "R7", .offset = offsetof(vm_cpu_t, R7)}, + {.name = "R8", .offset = offsetof(vm_cpu_t, R8)}, + {.name = "R9", .offset = offsetof(vm_cpu_t, R9)}, + {.name = "R10", .offset = offsetof(vm_cpu_t, R10)}, + {.name = "R11", .offset = offsetof(vm_cpu_t, R11)}, + {.name = "R12", .offset = offsetof(vm_cpu_t, R12)}, + {.name = "R13", .offset = offsetof(vm_cpu_t, R13)}, + {.name = "R14", .offset = offsetof(vm_cpu_t, R14)}, + {.name = "R15", .offset = offsetof(vm_cpu_t, R15)}, + + {.name = "zf", .offset = offsetof(vm_cpu_t, zf)}, + {.name = "nf", .offset = offsetof(vm_cpu_t, nf)}, + {.name = "of", .offset = offsetof(vm_cpu_t, of)}, + {.name = "cf", .offset = offsetof(vm_cpu_t, cf)}, + + {.name = "cpuoff", .offset = offsetof(vm_cpu_t, zf)}, + {.name = "gie", .offset = offsetof(vm_cpu_t, zf)}, + {.name = "osc", .offset = offsetof(vm_cpu_t, zf)}, + {.name = "scg0", .offset = offsetof(vm_cpu_t, zf)}, + {.name = "scg1", .offset = offsetof(vm_cpu_t, zf)}, + {.name = "res", .offset = offsetof(vm_cpu_t, zf)}, + +}; + +/************************** JitCpu object **************************/ + + + +PyObject* cpu_get_gpreg(JitCpu* self) +{ + PyObject *dict = PyDict_New(); + PyObject *o; + + get_reg(PC); + get_reg(SP); + //get_reg(SR); + get_reg(R3); + get_reg(R4); + get_reg(R5); + get_reg(R6); + get_reg(R7); + get_reg(R8); + get_reg(R9); + get_reg(R10); + get_reg(R11); + get_reg(R12); + get_reg(R13); + get_reg(R14); + get_reg(R15); + + get_reg(zf); + get_reg(nf); + get_reg(of); + get_reg(cf); + + get_reg(cpuoff); + get_reg(gie); + get_reg(osc); + get_reg(scg0); + get_reg(scg1); + get_reg(res); + + + return dict; +} + + +PyObject* cpu_set_gpreg(JitCpu* self, PyObject *args) +{ + PyObject* dict; + PyObject *d_key, *d_value = NULL; + Py_ssize_t pos = 0; + char* d_key_name; + uint64_t val; + unsigned int i, found; + + if (!PyArg_ParseTuple(args, "O", &dict)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + if(!PyDict_Check(dict)) + RAISE(PyExc_TypeError, "arg must be dict"); + while(PyDict_Next(dict, &pos, &d_key, &d_value)){ + PyGetStr(d_key_name, d_key); + PyGetInt(d_value, val); + found = 0; + for (i=0; i < sizeof(gpreg_dict)/sizeof(reg_dict); i++){ + if (strcmp(d_key_name, gpreg_dict[i].name)) + continue; + *((uint32_t*)(((char*)(self->cpu)) + gpreg_dict[i].offset)) = val; + found = 1; + break; + } + + if (found) + continue; + fprintf(stderr, "unknown key: %s\n", d_key_name); + RAISE(PyExc_ValueError, "unknown reg"); + } + Py_INCREF(Py_None); + return Py_None; +} + + + + +PyObject * cpu_init_regs(JitCpu* self) +{ + memset(self->cpu, 0, sizeof(vm_cpu_t)); + + Py_INCREF(Py_None); + return Py_None; + +} + +void dump_gpregs(vm_cpu_t* vmcpu) +{ + + printf("PC %.4"PRIX32" SP %.4"PRIX32" R3 %.4"PRIX32" ", + vmcpu->PC, vmcpu->SP, vmcpu->R3); + printf("R4 %.4"PRIX32" R5 %.4"PRIX32" R6 %.4"PRIX32" R7 %.4"PRIX32"\n", + vmcpu->R4, vmcpu->R5, vmcpu->R6, vmcpu->R7); + printf("R8 %.4"PRIX32" R9 %.4"PRIX32" R10 %.4"PRIX32" R11 %.4"PRIX32" ", + vmcpu->R8, vmcpu->R9, vmcpu->R10, vmcpu->R11); + printf("R12 %.4"PRIX32" R13 %.4"PRIX32" R14 %.4"PRIX32" R15 %.4"PRIX32"\n", + vmcpu->R12, vmcpu->R13, vmcpu->R14, vmcpu->R15); + printf("zf %"PRIX32" nf %"PRIX32" of %"PRIX32" cf %"PRIX32"\n", + vmcpu->zf, vmcpu->nf, vmcpu->of, vmcpu->cf); +} + + +PyObject * cpu_dump_gpregs(JitCpu* self, PyObject* args) +{ + vm_cpu_t* vmcpu; + + vmcpu = self->cpu; + dump_gpregs(vmcpu); + Py_INCREF(Py_None); + return Py_None; +} + +PyObject * cpu_dump_gpregs_with_attrib(JitCpu* self, PyObject* args) +{ + return cpu_dump_gpregs(self, args); +} + + +PyObject* cpu_set_exception(JitCpu* self, PyObject* args) +{ + PyObject *item1; + uint64_t i; + + if (!PyArg_ParseTuple(args, "O", &item1)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + + PyGetInt(item1, i); + + ((vm_cpu_t*)self->cpu)->exception_flags = i; + Py_INCREF(Py_None); + return Py_None; +} + +PyObject* cpu_get_exception(JitCpu* self, PyObject* args) +{ + return PyLong_FromUnsignedLongLong((uint64_t)(((vm_cpu_t*)self->cpu)->exception_flags)); +} + + + + + +void check_automod(JitCpu* jitcpu, uint64_t addr, uint64_t size) +{ + PyObject *result; + + if (!(((VmMngr*)jitcpu->pyvm)->vm_mngr.exception_flags & EXCEPT_CODE_AUTOMOD)) + return; + result = PyObject_CallMethod(jitcpu->jitter, "automod_cb", "LL", addr, size); + Py_DECREF(result); + +} + +void MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src) +{ + vm_MEM_WRITE_08(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); + check_automod(jitcpu, addr, 8); +} + +void MEM_WRITE_16(JitCpu* jitcpu, uint64_t addr, uint16_t src) +{ + vm_MEM_WRITE_16(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); + check_automod(jitcpu, addr, 16); +} + +void MEM_WRITE_32(JitCpu* jitcpu, uint64_t addr, uint32_t src) +{ + vm_MEM_WRITE_32(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); + check_automod(jitcpu, addr, 32); +} + +void MEM_WRITE_64(JitCpu* jitcpu, uint64_t addr, uint64_t src) +{ + vm_MEM_WRITE_64(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); + check_automod(jitcpu, addr, 64); +} + + +PyObject* vm_set_mem(JitCpu *self, PyObject* args) +{ + PyObject *py_addr; + PyObject *py_buffer; + Py_ssize_t py_length; + + char * buffer; + uint64_t size; + uint64_t addr; + int ret; + + if (!PyArg_ParseTuple(args, "OO", &py_addr, &py_buffer)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + + PyGetInt(py_addr, addr); + + if(!PyBytes_Check(py_buffer)) + RAISE(PyExc_TypeError,"arg must be bytes"); + + size = PyBytes_Size(py_buffer); + PyBytes_AsStringAndSize(py_buffer, &buffer, &py_length); + + ret = vm_write_mem(&(((VmMngr*)self->pyvm)->vm_mngr), addr, buffer, size); + if (ret < 0) + RAISE(PyExc_TypeError,"arg must be str"); + check_automod(self, addr, size*8); + + Py_INCREF(Py_None); + return Py_None; +} + +static PyMemberDef JitCpu_members[] = { + {NULL} /* Sentinel */ +}; + +static PyMethodDef JitCpu_methods[] = { + {"init_regs", (PyCFunction)cpu_init_regs, METH_NOARGS, + "X"}, + {"dump_gpregs", (PyCFunction)cpu_dump_gpregs, METH_NOARGS, + "X"}, + {"dump_gpregs_with_attrib", (PyCFunction)cpu_dump_gpregs_with_attrib, METH_VARARGS, + "X"}, + {"get_gpreg", (PyCFunction)cpu_get_gpreg, METH_NOARGS, + "X"}, + {"set_gpreg", (PyCFunction)cpu_set_gpreg, METH_VARARGS, + "X"}, + {"get_exception", (PyCFunction)cpu_get_exception, METH_VARARGS, + "X"}, + {"set_exception", (PyCFunction)cpu_set_exception, METH_VARARGS, + "X"}, + {"set_mem", (PyCFunction)vm_set_mem, METH_VARARGS, + "X"}, + {"get_mem", (PyCFunction)vm_get_mem, METH_VARARGS, + "X"}, + {NULL} /* Sentinel */ +}; + +static int +JitCpu_init(JitCpu *self, PyObject *args, PyObject *kwds) +{ + self->cpu = malloc(sizeof(vm_cpu_t)); + if (self->cpu == NULL) { + fprintf(stderr, "cannot alloc vm_cpu_t\n"); + exit(EXIT_FAILURE); + } + return 0; +} + +getset_reg_u16(PC); +getset_reg_u16(SP); +getset_reg_u16(R3); +getset_reg_u16(R4); +getset_reg_u16(R5); +getset_reg_u16(R6); +getset_reg_u16(R7); +getset_reg_u16(R8); +getset_reg_u16(R9); +getset_reg_u16(R10); +getset_reg_u16(R11); +getset_reg_u16(R12); +getset_reg_u16(R13); +getset_reg_u16(R14); +getset_reg_u16(R15); +getset_reg_u16(zf); +getset_reg_u16(nf); +getset_reg_u16(of); +getset_reg_u16(cf); +getset_reg_u16(cpuoff); +getset_reg_u16(gie); +getset_reg_u16(osc); +getset_reg_u16(scg0); +getset_reg_u16(scg1); +getset_reg_u16(res); + + + +PyObject* get_gpreg_offset_all(void) +{ + PyObject *dict = PyDict_New(); + PyObject *o; + get_reg_off(exception_flags); + + get_reg_off(PC); + get_reg_off(SP); + get_reg_off(R3); + get_reg_off(R4); + get_reg_off(R5); + get_reg_off(R6); + get_reg_off(R7); + get_reg_off(R8); + get_reg_off(R9); + get_reg_off(R10); + get_reg_off(R11); + get_reg_off(R12); + get_reg_off(R13); + get_reg_off(R14); + get_reg_off(R15); + + get_reg_off(zf); + get_reg_off(nf); + get_reg_off(of); + get_reg_off(cf); + get_reg_off(cpuoff); + get_reg_off(gie); + get_reg_off(osc); + get_reg_off(scg0); + get_reg_off(scg1); + get_reg_off(res); + + return dict; +} + + + + +static PyGetSetDef JitCpu_getseters[] = { + {"vmmngr", + (getter)JitCpu_get_vmmngr, (setter)JitCpu_set_vmmngr, + "vmmngr", + NULL}, + + {"jitter", + (getter)JitCpu_get_jitter, (setter)JitCpu_set_jitter, + "jitter", + NULL}, + + + {"PC" , (getter)JitCpu_get_PC , (setter)JitCpu_set_PC , "PC" , NULL}, + {"SP" , (getter)JitCpu_get_SP , (setter)JitCpu_set_SP , "SP" , NULL}, + {"R3" , (getter)JitCpu_get_R3 , (setter)JitCpu_set_R3 , "R3" , NULL}, + {"R4" , (getter)JitCpu_get_R4 , (setter)JitCpu_set_R4 , "R4" , NULL}, + {"R5" , (getter)JitCpu_get_R5 , (setter)JitCpu_set_R5 , "R5" , NULL}, + {"R6" , (getter)JitCpu_get_R6 , (setter)JitCpu_set_R6 , "R6" , NULL}, + {"R7" , (getter)JitCpu_get_R7 , (setter)JitCpu_set_R7 , "R7" , NULL}, + {"R8" , (getter)JitCpu_get_R8 , (setter)JitCpu_set_R8 , "R8" , NULL}, + {"R9" , (getter)JitCpu_get_R9 , (setter)JitCpu_set_R9 , "R9" , NULL}, + {"R10" , (getter)JitCpu_get_R10 , (setter)JitCpu_set_R10 , "R10" , NULL}, + {"R11" , (getter)JitCpu_get_R11 , (setter)JitCpu_set_R11 , "R11" , NULL}, + {"R12" , (getter)JitCpu_get_R12 , (setter)JitCpu_set_R12 , "R12" , NULL}, + {"R13" , (getter)JitCpu_get_R13 , (setter)JitCpu_set_R13 , "R13" , NULL}, + {"R14" , (getter)JitCpu_get_R14 , (setter)JitCpu_set_R14 , "R14" , NULL}, + {"R15" , (getter)JitCpu_get_R15 , (setter)JitCpu_set_R15 , "R15" , NULL}, + {"zf" , (getter)JitCpu_get_zf , (setter)JitCpu_set_zf , "zf" , NULL}, + {"nf" , (getter)JitCpu_get_nf , (setter)JitCpu_set_nf , "nf" , NULL}, + {"of" , (getter)JitCpu_get_of , (setter)JitCpu_set_of , "of" , NULL}, + {"cf" , (getter)JitCpu_get_cf , (setter)JitCpu_set_cf , "cf" , NULL}, + {"cpuoff" , (getter)JitCpu_get_cpuoff , (setter)JitCpu_set_cpuoff , "cpuoff" , NULL}, + {"gie" , (getter)JitCpu_get_gie , (setter)JitCpu_set_gie , "gie" , NULL}, + {"osc" , (getter)JitCpu_get_osc , (setter)JitCpu_set_osc , "osc" , NULL}, + {"scg0" , (getter)JitCpu_get_scg0 , (setter)JitCpu_set_scg0 , "scg0" , NULL}, + {"scg1" , (getter)JitCpu_get_scg1 , (setter)JitCpu_set_scg1 , "scg1" , NULL}, + {"res" , (getter)JitCpu_get_res , (setter)JitCpu_set_res , "res" , NULL}, + + {NULL} /* Sentinel */ +}; + + + +static PyTypeObject JitCpuType = { + PyVarObject_HEAD_INIT(NULL, 0) + "JitCore_msp430.JitCpu", /*tp_name*/ + sizeof(JitCpu), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)JitCpu_dealloc,/*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + "JitCpu objects", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + JitCpu_methods, /* tp_methods */ + JitCpu_members, /* tp_members */ + JitCpu_getseters, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)JitCpu_init, /* tp_init */ + 0, /* tp_alloc */ + JitCpu_new, /* tp_new */ +}; + + + +static PyMethodDef JitCore_msp430_Methods[] = { + + /* + + */ + {"get_gpreg_offset_all", (PyCFunction)get_gpreg_offset_all, METH_NOARGS}, + {NULL, NULL, 0, NULL} /* Sentinel */ + +}; + + + + +MOD_INIT(JitCore_msp430) +{ + PyObject *module; + + MOD_DEF(module, "JitCore_msp430", "JitCore_msp430 module", JitCore_msp430_Methods); + + if (module == NULL) + return NULL; + + if (PyType_Ready(&JitCpuType) < 0) + return NULL; + + Py_INCREF(&JitCpuType); + if (PyModule_AddObject(module, "JitCpu", (PyObject *)&JitCpuType) < 0) + return NULL; + + return module; +} diff --git a/miasm/jitter/arch/JitCore_msp430.h b/miasm/jitter/arch/JitCore_msp430.h new file mode 100644 index 00000000..1c802e9e --- /dev/null +++ b/miasm/jitter/arch/JitCore_msp430.h @@ -0,0 +1,44 @@ + +typedef struct { + uint32_t exception_flags; + + /* gpregs */ + uint32_t PC; + uint32_t SP; + uint32_t R3; + uint32_t R4; + uint32_t R5; + uint32_t R6; + uint32_t R7; + uint32_t R8; + uint32_t R9; + uint32_t R10; + uint32_t R11; + uint32_t R12; + uint32_t R13; + uint32_t R14; + uint32_t R15; + + /* eflag */ + uint32_t zf; + uint32_t nf; + uint32_t of; + uint32_t cf; + + uint32_t cpuoff; + uint32_t gie; + uint32_t osc; + uint32_t scg0; + uint32_t scg1; + uint32_t res; + +}vm_cpu_t; + +#define RETURN_PC return BlockDst; + +_MIASM_EXPORT void dump_gpregs(vm_cpu_t* vmcpu); + +_MIASM_EXPORT void MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src); +_MIASM_EXPORT void MEM_WRITE_16(JitCpu* jitcpu, uint64_t addr, uint16_t src); +_MIASM_EXPORT void MEM_WRITE_32(JitCpu* jitcpu, uint64_t addr, uint32_t src); +_MIASM_EXPORT void MEM_WRITE_64(JitCpu* jitcpu, uint64_t addr, uint64_t src); diff --git a/miasm/jitter/arch/JitCore_ppc32.c b/miasm/jitter/arch/JitCore_ppc32.c new file mode 100644 index 00000000..8a1bb79e --- /dev/null +++ b/miasm/jitter/arch/JitCore_ppc32.c @@ -0,0 +1,344 @@ +#include +#include "structmember.h" +#include +#include +#include "../compat_py23.h" +#include "../queue.h" +#include "../vm_mngr.h" +#include "../vm_mngr_py.h" +#include "../bn.h" +#include "../JitCore.h" +#include "JitCore_ppc32.h" + +reg_dict gpreg_dict[] = { +#define JITCORE_PPC_REG_EXPAND(_name, _size) \ + { .name = #_name, .offset = offsetof(struct vm_cpu, _name), .size = _size }, +#include "JitCore_ppc32_regs.h" +#undef JITCORE_PPC_REG_EXPAND +}; + +PyObject* cpu_get_gpreg(JitCpu* self) +{ + PyObject *dict = PyDict_New(); + PyObject *o; + +#define JITCORE_PPC_REG_EXPAND(_name, _size) \ + get_reg(_name); +#include "JitCore_ppc32_regs.h" +#undef JITCORE_PPC_REG_EXPAND + + return dict; +} + + + +PyObject * +cpu_set_gpreg(JitCpu *self, PyObject *args) { + PyObject *dict; + PyObject *d_key, *d_value = NULL; + Py_ssize_t pos = 0; + char* d_key_name; + uint64_t val; + unsigned int i; + + if (!PyArg_ParseTuple(args, "O", &dict)) + return NULL; + if(!PyDict_Check(dict)) + RAISE(PyExc_TypeError, "arg must be dict"); + + while(PyDict_Next(dict, &pos, &d_key, &d_value)) { + int found = 0; + PyGetStr(d_key_name, d_key); + PyGetInt(d_value, val); + + for (i=0; i < sizeof(gpreg_dict)/sizeof(reg_dict); i++){ + if (strcmp(d_key_name, gpreg_dict[i].name)) + continue; + *((uint32_t*)(((char*)(self->cpu)) + gpreg_dict[i].offset)) = val; + found = 1; + break; + } + + if (found) + continue; + fprintf(stderr, "unknown key: %s\n", d_key_name); + RAISE(PyExc_ValueError, "unknown reg"); + } + + Py_INCREF(Py_None); + return Py_None; +} + + +PyObject * +cpu_init_regs(JitCpu *self) { + memset(self->cpu, 0, sizeof(struct vm_cpu)); + + Py_INCREF(Py_None); + return Py_None; +} + +static void +dump_gpreg(const char *name, uint32_t val, int *n) { + printf("%6s %.8" PRIX32"%c", name, val, (*n + 1) % 4 == 0? '\n':' '); + *n = (*n + 1) % 4; +} + +void +dump_gpregs(struct vm_cpu *vmcpu) { + int reg_num = 0; + +#define JITCORE_PPC_REG_EXPAND(_name, _size) \ + dump_gpreg(#_name, vmcpu->_name, ®_num); +#include "JitCore_ppc32_regs.h" +#undef JITCORE_PPC_REG_EXPAND + + if ((reg_num % 4) != 0) + putchar('\n'); +} + + +PyObject * +cpu_dump_gpregs(JitCpu *self, PyObject *args) { + + dump_gpregs(self->cpu); + + Py_INCREF(Py_None); + return Py_None; +} + +PyObject * +cpu_dump_gpregs_with_attrib(JitCpu* self, PyObject* args) +{ + return cpu_dump_gpregs(self, args); +} + +PyObject * +cpu_set_exception(JitCpu *self, PyObject *args) { + PyObject *item1; + uint64_t i; + + if (!PyArg_ParseTuple(args, "O", &item1)) + return NULL; + + PyGetInt(item1, i); + + ((struct vm_cpu *)self->cpu)->exception_flags = i; + + Py_INCREF(Py_None); + return Py_None; +} + +PyObject * +cpu_get_exception(JitCpu *self, PyObject *args) { + return PyLong_FromUnsignedLongLong(((struct vm_cpu *)self->cpu)->exception_flags); +} + +static PyObject * +cpu_get_spr_access(JitCpu *self, PyObject *args) { + return PyLong_FromUnsignedLongLong(((struct vm_cpu *) self->cpu)->spr_access); +} + +void +check_automod(JitCpu *jitcpu, uint64_t addr, uint64_t size) { + PyObject *result; + + if (!(((VmMngr*)jitcpu->pyvm)->vm_mngr.exception_flags & EXCEPT_CODE_AUTOMOD)) + return; + result = PyObject_CallMethod(jitcpu->jitter, "automod_cb", "LL", addr, size); + Py_DECREF(result); +} + +void MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src) +{ + vm_MEM_WRITE_08(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); + check_automod(jitcpu, addr, 8); +} + +void MEM_WRITE_16(JitCpu* jitcpu, uint64_t addr, uint16_t src) +{ + vm_MEM_WRITE_16(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); + check_automod(jitcpu, addr, 16); +} + +void MEM_WRITE_32(JitCpu* jitcpu, uint64_t addr, uint32_t src) +{ + vm_MEM_WRITE_32(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); + check_automod(jitcpu, addr, 32); +} + +void MEM_WRITE_64(JitCpu* jitcpu, uint64_t addr, uint64_t src) +{ + vm_MEM_WRITE_64(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); + check_automod(jitcpu, addr, 64); +} + + + +PyObject * +vm_set_mem(JitCpu *self, PyObject *args) { + PyObject *py_addr; + PyObject *py_buffer; + Py_ssize_t py_length; + + char *buffer; + uint64_t size; + uint64_t addr; + int ret = 0x1337; + + if (!PyArg_ParseTuple(args, "OO", &py_addr, &py_buffer)) + return NULL; + + PyGetInt(py_addr, addr); + + if(!PyBytes_Check(py_buffer)) + RAISE(PyExc_TypeError,"arg must be bytes"); + + size = PyBytes_Size(py_buffer); + PyBytes_AsStringAndSize(py_buffer, &buffer, &py_length); + + ret = vm_write_mem(&(((VmMngr*)self->pyvm)->vm_mngr), addr, buffer, size); + if (ret < 0) + RAISE(PyExc_TypeError,"arg must be str"); + check_automod(self, addr, size*8); + + Py_INCREF(Py_None); + return Py_None; +} + +static PyMemberDef JitCpu_members[] = { + {NULL} /* Sentinel */ +}; + +static PyMethodDef JitCpu_methods[] = { + {"init_regs", (PyCFunction)cpu_init_regs, METH_NOARGS, "X"}, + {"dump_gpregs", (PyCFunction)cpu_dump_gpregs, METH_NOARGS, "X"}, + {"dump_gpregs_with_attrib", (PyCFunction)cpu_dump_gpregs_with_attrib, METH_VARARGS, "X"}, + {"get_gpreg", (PyCFunction)cpu_get_gpreg, METH_NOARGS, "X"}, + {"set_gpreg", (PyCFunction)cpu_set_gpreg, METH_VARARGS, "X"}, + {"get_exception", (PyCFunction)cpu_get_exception, METH_VARARGS, "X"}, + {"set_exception", (PyCFunction)cpu_set_exception, METH_VARARGS, "X"}, + {"get_spr_access", (PyCFunction)cpu_get_spr_access, METH_VARARGS, "X"}, + {"set_mem", (PyCFunction)vm_set_mem, METH_VARARGS, "X"}, + {"get_mem", (PyCFunction)vm_get_mem, METH_VARARGS, "X"}, + {NULL} /* Sentinel */ +}; + +static int +JitCpu_init(JitCpu *self, PyObject *args, PyObject *kwds) { + self->cpu = malloc(sizeof(struct vm_cpu)); + if (self->cpu == NULL) { + fprintf(stderr, "cannot alloc struct vm_cpu\n"); + exit(1); + } + return 0; +} + + +#define JITCORE_PPC_REG_EXPAND(_name, _size) \ +getset_reg_u32(_name); +#include "JitCore_ppc32_regs.h" +#undef JITCORE_PPC_REG_EXPAND + +PyObject * +get_gpreg_offset_all(void) { + PyObject *dict = PyDict_New(); + PyObject *o; + +#define JITCORE_PPC_REG_EXPAND(_name, _size) \ + get_reg_off(_name); +#include "JitCore_ppc32_regs.h" +#undef JITCORE_PPC_REG_EXPAND + + return dict; +} + +static PyGetSetDef JitCpu_getseters[] = { + {"vmmngr", + (getter)JitCpu_get_vmmngr, (setter)JitCpu_set_vmmngr, + "vmmngr", + NULL}, + + {"jitter", + (getter)JitCpu_get_jitter, (setter)JitCpu_set_jitter, + "jitter", + NULL}, + +#define JITCORE_PPC_REG_EXPAND(_name, _size) \ + { #_name, (getter) JitCpu_get_ ## _name , \ + (setter) JitCpu_set_ ## _name , #_name , NULL}, +#include "JitCore_ppc32_regs.h" +#undef JITCORE_PPC_REG_EXPAND + + {NULL} /* Sentinel */ +}; + + +static PyTypeObject JitCpuType = { + PyVarObject_HEAD_INIT(NULL, 0) + "JitCore_ppc.JitCpu", /*tp_name*/ + sizeof(JitCpu), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)JitCpu_dealloc,/*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + "JitCpu objects", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + JitCpu_methods, /* tp_methods */ + JitCpu_members, /* tp_members */ + JitCpu_getseters, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)JitCpu_init, /* tp_init */ + 0, /* tp_alloc */ + JitCpu_new, /* tp_new */ +}; + + + +static PyMethodDef JitCore_ppc32_Methods[] = { + {"get_gpreg_offset_all", (PyCFunction)get_gpreg_offset_all, METH_NOARGS}, + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + + + +MOD_INIT(JitCore_ppc32) +{ + PyObject *module; + + MOD_DEF(module, "JitCore_ppc32", "JitCore_ppc32 module", JitCore_ppc32_Methods); + + if (module == NULL) + return NULL; + + if (PyType_Ready(&JitCpuType) < 0) + return NULL; + + Py_INCREF(&JitCpuType); + if (PyModule_AddObject(module, "JitCpu", (PyObject *)&JitCpuType) < 0) + return NULL; + + return module; +} diff --git a/miasm/jitter/arch/JitCore_ppc32.h b/miasm/jitter/arch/JitCore_ppc32.h new file mode 100644 index 00000000..f2a5200e --- /dev/null +++ b/miasm/jitter/arch/JitCore_ppc32.h @@ -0,0 +1,24 @@ +/* + * _size can't be used yet because all register accesses are homogeneously + * 32-bit + */ +struct vm_cpu { +#define JITCORE_PPC_REG_EXPAND(_name, _size) \ + uint32_t _name; +#include "JitCore_ppc32_regs.h" +#undef JITCORE_PPC_REG_EXPAND + + uint64_t exception_flags; + uint32_t spr_access; + uint32_t reserve; + uint32_t reserve_address; +}; + +_MIASM_EXPORT void dump_gpregs(struct vm_cpu *); + +typedef struct vm_cpu vm_cpu_t; + +_MIASM_EXPORT void MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src); +_MIASM_EXPORT void MEM_WRITE_16(JitCpu* jitcpu, uint64_t addr, uint16_t src); +_MIASM_EXPORT void MEM_WRITE_32(JitCpu* jitcpu, uint64_t addr, uint32_t src); +_MIASM_EXPORT void MEM_WRITE_64(JitCpu* jitcpu, uint64_t addr, uint64_t src); diff --git a/miasm/jitter/arch/JitCore_ppc32_regs.h b/miasm/jitter/arch/JitCore_ppc32_regs.h new file mode 100644 index 00000000..d15b5e51 --- /dev/null +++ b/miasm/jitter/arch/JitCore_ppc32_regs.h @@ -0,0 +1,89 @@ +JITCORE_PPC_REG_EXPAND(R0, 32) +JITCORE_PPC_REG_EXPAND(R1, 32) +JITCORE_PPC_REG_EXPAND(R2, 32) +JITCORE_PPC_REG_EXPAND(R3, 32) +JITCORE_PPC_REG_EXPAND(R4, 32) +JITCORE_PPC_REG_EXPAND(R5, 32) +JITCORE_PPC_REG_EXPAND(R6, 32) +JITCORE_PPC_REG_EXPAND(R7, 32) +JITCORE_PPC_REG_EXPAND(R8, 32) +JITCORE_PPC_REG_EXPAND(R9, 32) +JITCORE_PPC_REG_EXPAND(R10, 32) +JITCORE_PPC_REG_EXPAND(R11, 32) +JITCORE_PPC_REG_EXPAND(R12, 32) +JITCORE_PPC_REG_EXPAND(R13, 32) +JITCORE_PPC_REG_EXPAND(R14, 32) +JITCORE_PPC_REG_EXPAND(R15, 32) +JITCORE_PPC_REG_EXPAND(R16, 32) +JITCORE_PPC_REG_EXPAND(R17, 32) +JITCORE_PPC_REG_EXPAND(R18, 32) +JITCORE_PPC_REG_EXPAND(R19, 32) +JITCORE_PPC_REG_EXPAND(R20, 32) +JITCORE_PPC_REG_EXPAND(R21, 32) +JITCORE_PPC_REG_EXPAND(R22, 32) +JITCORE_PPC_REG_EXPAND(R23, 32) +JITCORE_PPC_REG_EXPAND(R24, 32) +JITCORE_PPC_REG_EXPAND(R25, 32) +JITCORE_PPC_REG_EXPAND(R26, 32) +JITCORE_PPC_REG_EXPAND(R27, 32) +JITCORE_PPC_REG_EXPAND(R28, 32) +JITCORE_PPC_REG_EXPAND(R29, 32) +JITCORE_PPC_REG_EXPAND(R30, 32) +JITCORE_PPC_REG_EXPAND(R31, 32) + +JITCORE_PPC_REG_EXPAND(PC, 32) +JITCORE_PPC_REG_EXPAND(LR, 32) +JITCORE_PPC_REG_EXPAND(CTR, 32) +JITCORE_PPC_REG_EXPAND(MSR, 32) + +JITCORE_PPC_REG_EXPAND(XER_SO, 32) +JITCORE_PPC_REG_EXPAND(XER_OV, 32) +JITCORE_PPC_REG_EXPAND(XER_CA, 32) +JITCORE_PPC_REG_EXPAND(XER_BC, 32) + +JITCORE_PPC_REG_EXPAND(CR0_LT, 8) +JITCORE_PPC_REG_EXPAND(CR0_GT, 8) +JITCORE_PPC_REG_EXPAND(CR0_EQ, 8) +JITCORE_PPC_REG_EXPAND(CR0_SO, 8) +JITCORE_PPC_REG_EXPAND(CR1_LT, 8) +JITCORE_PPC_REG_EXPAND(CR1_GT, 8) +JITCORE_PPC_REG_EXPAND(CR1_EQ, 8) +JITCORE_PPC_REG_EXPAND(CR1_SO, 8) +JITCORE_PPC_REG_EXPAND(CR2_LT, 8) +JITCORE_PPC_REG_EXPAND(CR2_GT, 8) +JITCORE_PPC_REG_EXPAND(CR2_EQ, 8) +JITCORE_PPC_REG_EXPAND(CR2_SO, 8) +JITCORE_PPC_REG_EXPAND(CR3_LT, 8) +JITCORE_PPC_REG_EXPAND(CR3_GT, 8) +JITCORE_PPC_REG_EXPAND(CR3_EQ, 8) +JITCORE_PPC_REG_EXPAND(CR3_SO, 8) +JITCORE_PPC_REG_EXPAND(CR4_LT, 8) +JITCORE_PPC_REG_EXPAND(CR4_GT, 8) +JITCORE_PPC_REG_EXPAND(CR4_EQ, 8) +JITCORE_PPC_REG_EXPAND(CR4_SO, 8) +JITCORE_PPC_REG_EXPAND(CR5_LT, 8) +JITCORE_PPC_REG_EXPAND(CR5_GT, 8) +JITCORE_PPC_REG_EXPAND(CR5_EQ, 8) +JITCORE_PPC_REG_EXPAND(CR5_SO, 8) +JITCORE_PPC_REG_EXPAND(CR6_LT, 8) +JITCORE_PPC_REG_EXPAND(CR6_GT, 8) +JITCORE_PPC_REG_EXPAND(CR6_EQ, 8) +JITCORE_PPC_REG_EXPAND(CR6_SO, 8) +JITCORE_PPC_REG_EXPAND(CR7_LT, 8) +JITCORE_PPC_REG_EXPAND(CR7_GT, 8) +JITCORE_PPC_REG_EXPAND(CR7_EQ, 8) +JITCORE_PPC_REG_EXPAND(CR7_SO, 8) + +JITCORE_PPC_REG_EXPAND(SPRG0, 32) +JITCORE_PPC_REG_EXPAND(SPRG1, 32) +JITCORE_PPC_REG_EXPAND(SPRG2, 32) +JITCORE_PPC_REG_EXPAND(SPRG3, 32) +JITCORE_PPC_REG_EXPAND(SRR0, 32) +JITCORE_PPC_REG_EXPAND(SRR1, 32) +JITCORE_PPC_REG_EXPAND(DAR, 32) +JITCORE_PPC_REG_EXPAND(DSISR, 32) +JITCORE_PPC_REG_EXPAND(PIR, 32) +JITCORE_PPC_REG_EXPAND(PVR, 32) +JITCORE_PPC_REG_EXPAND(DEC, 32) +JITCORE_PPC_REG_EXPAND(TBL, 32) +JITCORE_PPC_REG_EXPAND(TBU, 32) diff --git a/miasm/jitter/arch/JitCore_x86.c b/miasm/jitter/arch/JitCore_x86.c new file mode 100644 index 00000000..50ce6bd5 --- /dev/null +++ b/miasm/jitter/arch/JitCore_x86.c @@ -0,0 +1,946 @@ +#include +#include "structmember.h" +#include +#include +#include "../compat_py23.h" +#include "../queue.h" +#include "../vm_mngr.h" +#include "../vm_mngr_py.h" +#include "../bn.h" +#include "../JitCore.h" +#include "../op_semantics.h" +#include "JitCore_x86.h" + + +vm_cpu_t ref_arch_regs; + +reg_dict gpreg_dict[] = { + {.name = "RAX", .offset = offsetof(vm_cpu_t, RAX), .size = 64}, + {.name = "RBX", .offset = offsetof(vm_cpu_t, RBX), .size = 64}, + {.name = "RCX", .offset = offsetof(vm_cpu_t, RCX), .size = 64}, + {.name = "RDX", .offset = offsetof(vm_cpu_t, RDX), .size = 64}, + {.name = "RSI", .offset = offsetof(vm_cpu_t, RSI), .size = 64}, + {.name = "RDI", .offset = offsetof(vm_cpu_t, RDI), .size = 64}, + {.name = "RSP", .offset = offsetof(vm_cpu_t, RSP), .size = 64}, + {.name = "RBP", .offset = offsetof(vm_cpu_t, RBP), .size = 64}, + + {.name = "R8", .offset = offsetof(vm_cpu_t, R8), .size = 64}, + {.name = "R9", .offset = offsetof(vm_cpu_t, R9), .size = 64}, + {.name = "R10", .offset = offsetof(vm_cpu_t, R10), .size = 64}, + {.name = "R11", .offset = offsetof(vm_cpu_t, R11), .size = 64}, + {.name = "R12", .offset = offsetof(vm_cpu_t, R12), .size = 64}, + {.name = "R13", .offset = offsetof(vm_cpu_t, R13), .size = 64}, + {.name = "R14", .offset = offsetof(vm_cpu_t, R14), .size = 64}, + {.name = "R15", .offset = offsetof(vm_cpu_t, R15), .size = 64}, + + {.name = "RIP", .offset = offsetof(vm_cpu_t, RIP), .size = 64}, + + {.name = "zf", .offset = offsetof(vm_cpu_t, zf), .size = 8}, + {.name = "nf", .offset = offsetof(vm_cpu_t, nf), .size = 8}, + {.name = "pf", .offset = offsetof(vm_cpu_t, pf), .size = 8}, + {.name = "of", .offset = offsetof(vm_cpu_t, of), .size = 8}, + {.name = "cf", .offset = offsetof(vm_cpu_t, cf), .size = 8}, + {.name = "af", .offset = offsetof(vm_cpu_t, af), .size = 8}, + {.name = "df", .offset = offsetof(vm_cpu_t, df), .size = 8}, + + {.name = "ES", .offset = offsetof(vm_cpu_t, ES), .size = 16}, + {.name = "CS", .offset = offsetof(vm_cpu_t, CS), .size = 16}, + {.name = "SS", .offset = offsetof(vm_cpu_t, SS), .size = 16}, + {.name = "DS", .offset = offsetof(vm_cpu_t, DS), .size = 16}, + {.name = "FS", .offset = offsetof(vm_cpu_t, FS), .size = 16}, + {.name = "GS", .offset = offsetof(vm_cpu_t, GS), .size = 16}, + + {.name = "MM0", .offset = offsetof(vm_cpu_t, MM0), .size = 64}, + {.name = "MM1", .offset = offsetof(vm_cpu_t, MM1), .size = 64}, + {.name = "MM2", .offset = offsetof(vm_cpu_t, MM2), .size = 64}, + {.name = "MM3", .offset = offsetof(vm_cpu_t, MM3), .size = 64}, + {.name = "MM4", .offset = offsetof(vm_cpu_t, MM4), .size = 64}, + {.name = "MM5", .offset = offsetof(vm_cpu_t, MM5), .size = 64}, + {.name = "MM6", .offset = offsetof(vm_cpu_t, MM6), .size = 64}, + {.name = "MM7", .offset = offsetof(vm_cpu_t, MM7), .size = 64}, + + {.name = "XMM0", .offset = offsetof(vm_cpu_t, XMM0), .size = 128}, + {.name = "XMM1", .offset = offsetof(vm_cpu_t, XMM1), .size = 128}, + {.name = "XMM2", .offset = offsetof(vm_cpu_t, XMM2), .size = 128}, + {.name = "XMM3", .offset = offsetof(vm_cpu_t, XMM3), .size = 128}, + {.name = "XMM4", .offset = offsetof(vm_cpu_t, XMM4), .size = 128}, + {.name = "XMM5", .offset = offsetof(vm_cpu_t, XMM5), .size = 128}, + {.name = "XMM6", .offset = offsetof(vm_cpu_t, XMM6), .size = 128}, + {.name = "XMM7", .offset = offsetof(vm_cpu_t, XMM7), .size = 128}, + {.name = "XMM8", .offset = offsetof(vm_cpu_t, XMM8), .size = 128}, + {.name = "XMM9", .offset = offsetof(vm_cpu_t, XMM9), .size = 128}, + {.name = "XMM10", .offset = offsetof(vm_cpu_t, XMM10), .size = 128}, + {.name = "XMM11", .offset = offsetof(vm_cpu_t, XMM11), .size = 128}, + {.name = "XMM12", .offset = offsetof(vm_cpu_t, XMM12), .size = 128}, + {.name = "XMM13", .offset = offsetof(vm_cpu_t, XMM13), .size = 128}, + {.name = "XMM14", .offset = offsetof(vm_cpu_t, XMM14), .size = 128}, + {.name = "XMM15", .offset = offsetof(vm_cpu_t, XMM15), .size = 128}, + + {.name = "tsc", .offset = offsetof(vm_cpu_t, tsc), .size = 64}, + + {.name = "exception_flags", .offset = offsetof(vm_cpu_t, exception_flags), .size = 32}, + {.name = "interrupt_num", .offset = offsetof(vm_cpu_t, interrupt_num), .size = 32}, +}; + + + +/************************** JitCpu object **************************/ + + + + + +PyObject* cpu_get_gpreg(JitCpu* self) +{ + PyObject *dict = PyDict_New(); + PyObject *o; + + get_reg(RAX); + get_reg(RBX); + get_reg(RCX); + get_reg(RDX); + get_reg(RSI); + get_reg(RDI); + get_reg(RSP); + get_reg(RBP); + + get_reg(R8); + get_reg(R9); + get_reg(R10); + get_reg(R11); + get_reg(R12); + get_reg(R13); + get_reg(R14); + get_reg(R15); + + get_reg(RIP); + + get_reg(zf); + get_reg(nf); + get_reg(pf); + get_reg(of); + get_reg(cf); + get_reg(af); + get_reg(df); + + + get_reg(ES); + get_reg(CS); + get_reg(SS); + get_reg(DS); + get_reg(FS); + get_reg(GS); + + get_reg(MM0); + get_reg(MM1); + get_reg(MM2); + get_reg(MM3); + get_reg(MM4); + get_reg(MM5); + get_reg(MM6); + get_reg(MM7); + + get_reg_bn(XMM0, 128); + get_reg_bn(XMM1, 128); + get_reg_bn(XMM2, 128); + get_reg_bn(XMM3, 128); + get_reg_bn(XMM4, 128); + get_reg_bn(XMM5, 128); + get_reg_bn(XMM6, 128); + get_reg_bn(XMM7, 128); + get_reg_bn(XMM8, 128); + get_reg_bn(XMM9, 128); + get_reg_bn(XMM10, 128); + get_reg_bn(XMM11, 128); + get_reg_bn(XMM12, 128); + get_reg_bn(XMM13, 128); + get_reg_bn(XMM14, 128); + get_reg_bn(XMM15, 128); + + get_reg(tsc); + + return dict; +} + + +PyObject* cpu_set_gpreg(JitCpu* self, PyObject *args) +{ + PyObject* dict; + PyObject *d_key, *d_value = NULL; + char* d_key_name; + Py_ssize_t pos = 0; + uint64_t val; + unsigned int i, found; + + if (!PyArg_ParseTuple(args, "O", &dict)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + if(!PyDict_Check(dict)) + RAISE(PyExc_TypeError, "arg must be dict"); + while(PyDict_Next(dict, &pos, &d_key, &d_value)){ + PyGetStr(d_key_name, d_key); + found = 0; + for (i=0; i < sizeof(gpreg_dict)/sizeof(reg_dict); i++){ + if (strcmp(d_key_name, gpreg_dict[i].name)) + continue; + found = 1; + switch (gpreg_dict[i].size) { + case 8: + PyGetInt(d_value, val); + *((uint8_t*)(((char*)(self->cpu)) + gpreg_dict[i].offset)) = val; + break; + case 16: + PyGetInt(d_value, val); + *((uint16_t*)(((char*)(self->cpu)) + gpreg_dict[i].offset)) = val; + break; + case 32: + PyGetInt(d_value, val); + *((uint32_t*)(((char*)(self->cpu)) + gpreg_dict[i].offset)) = val; + break; + case 64: + PyGetInt(d_value, val); + *((uint64_t*)(((char*)(self->cpu)) + gpreg_dict[i].offset)) = val; + break; + case 128: + { + bn_t bn; + int j; + PyObject* py_long = d_value; + PyObject* py_long_new; + PyObject* py_tmp; + PyObject* cst_32; + PyObject* cst_ffffffff; + uint64_t tmp; + + +#if PY_MAJOR_VERSION >= 3 + if (PyLong_Check(py_long)){ + /* Already PyLong */ + /* Increment ref as we will decement it next */ + Py_INCREF(py_long); + } else { + RAISE(PyExc_TypeError,"arg must be int"); + } +#else + if (PyInt_Check(py_long)){ + tmp = (uint64_t)PyInt_AsLong(py_long); + py_long = PyLong_FromLong((long)tmp); + } else if (PyLong_Check(py_long)){ + /* Already PyLong */ + /* Increment ref as we will decement it next */ + Py_INCREF(py_long); + } + else{ + RAISE(PyExc_TypeError,"arg must be int"); + } +#endif + + + cst_ffffffff = PyLong_FromLong(0xffffffff); + cst_32 = PyLong_FromLong(32); + bn = bignum_from_int(0); + + for (j = 0; j < BN_BYTE_SIZE; j += 4) { + py_tmp = PyObject_CallMethod(py_long, "__and__", "O", cst_ffffffff); + tmp = PyLong_AsUnsignedLongMask(py_tmp); + Py_DECREF(py_tmp); + bn = bignum_lshift(bn, 32); + bn = bignum_or(bn, bignum_from_uint64(tmp)); + + py_long_new = PyObject_CallMethod(py_long, "__rshift__", "O", cst_32); + Py_DECREF(py_long); + py_long = py_long_new; + } + Py_DECREF(py_long); + Py_DECREF(cst_32); + Py_DECREF(cst_ffffffff); + *(bn_t*)(((char*)(self->cpu)) + gpreg_dict[i].offset) = bignum_mask(bn, 128); + } + break; + } + break; + } + + if (found) + continue; + fprintf(stderr, "unknown key: %s\n", d_key_name); + RAISE(PyExc_ValueError, "unknown reg"); + } + Py_INCREF(Py_None); + return Py_None; +} + +PyObject * cpu_init_regs(JitCpu* self) +{ + memset(self->cpu, 0, sizeof(vm_cpu_t)); + ((vm_cpu_t*)self->cpu)->tsc = 0x1122334455667788ULL; + ((vm_cpu_t*)self->cpu)->i_f = 1; + Py_INCREF(Py_None); + return Py_None; + +} + +void dump_gpregs_16(vm_cpu_t* vmcpu) +{ + printf("EAX %.8"PRIX32" EBX %.8"PRIX32" ECX %.8"PRIX32" EDX %.8"PRIX32" ", + (uint32_t)(vmcpu->RAX & 0xFFFFFFFF), + (uint32_t)(vmcpu->RBX & 0xFFFFFFFF), + (uint32_t)(vmcpu->RCX & 0xFFFFFFFF), + (uint32_t)(vmcpu->RDX & 0xFFFFFFFF)); + printf("ESI %.8"PRIX32" EDI %.8"PRIX32" ESP %.8"PRIX32" EBP %.8"PRIX32" ", + (uint32_t)(vmcpu->RSI & 0xFFFFFFFF), + (uint32_t)(vmcpu->RDI & 0xFFFFFFFF), + (uint32_t)(vmcpu->RSP & 0xFFFFFFFF), + (uint32_t)(vmcpu->RBP & 0xFFFFFFFF)); + printf("EIP %.8"PRIX32" ", + (uint32_t)(vmcpu->RIP & 0xFFFFFFFF)); + printf("zf %.1d nf %.1d of %.1d cf %.1d\n", + (uint32_t)(vmcpu->zf & 0x1), + (uint32_t)(vmcpu->nf & 0x1), + (uint32_t)(vmcpu->of & 0x1), + (uint32_t)(vmcpu->cf & 0x1)); +} + +void dump_gpregs_32(vm_cpu_t* vmcpu) +{ + + printf("EAX %.8"PRIX32" EBX %.8"PRIX32" ECX %.8"PRIX32" EDX %.8"PRIX32" ", + (uint32_t)(vmcpu->RAX & 0xFFFFFFFF), + (uint32_t)(vmcpu->RBX & 0xFFFFFFFF), + (uint32_t)(vmcpu->RCX & 0xFFFFFFFF), + (uint32_t)(vmcpu->RDX & 0xFFFFFFFF)); + printf("ESI %.8"PRIX32" EDI %.8"PRIX32" ESP %.8"PRIX32" EBP %.8"PRIX32" ", + (uint32_t)(vmcpu->RSI & 0xFFFFFFFF), + (uint32_t)(vmcpu->RDI & 0xFFFFFFFF), + (uint32_t)(vmcpu->RSP & 0xFFFFFFFF), + (uint32_t)(vmcpu->RBP & 0xFFFFFFFF)); + printf("EIP %.8"PRIX32" ", + (uint32_t)(vmcpu->RIP & 0xFFFFFFFF)); + printf("zf %.1d nf %.1d of %.1d cf %.1d\n", + (uint32_t)(vmcpu->zf & 0x1), + (uint32_t)(vmcpu->nf & 0x1), + (uint32_t)(vmcpu->of & 0x1), + (uint32_t)(vmcpu->cf & 0x1)); + +} + +void dump_gpregs_64(vm_cpu_t* vmcpu) +{ + + printf("RAX %.16"PRIX64" RBX %.16"PRIX64" RCX %.16"PRIX64" RDX %.16"PRIX64" ", + vmcpu->RAX, vmcpu->RBX, vmcpu->RCX, vmcpu->RDX); + printf("RSI %.16"PRIX64" RDI %.16"PRIX64" RSP %.16"PRIX64" RBP %.16"PRIX64" ", + vmcpu->RSI, vmcpu->RDI, vmcpu->RSP, vmcpu->RBP); + printf("RIP %.16"PRIX64"\n", + vmcpu->RIP); + printf("R8 %.16"PRIX64" R9 %.16"PRIX64" R10 %.16"PRIX64" R11 %.16"PRIX64" ", + vmcpu->R8, vmcpu->R9, vmcpu->R10, vmcpu->R11); + printf("R12 %.16"PRIX64" R13 %.16"PRIX64" R14 %.16"PRIX64" R15 %.16"PRIX64" ", + vmcpu->R12, vmcpu->R13, vmcpu->R14, vmcpu->R15); + + + printf("zf %.1d nf %.1d of %.1d cf %.1d\n", + vmcpu->zf, vmcpu->nf, vmcpu->of, vmcpu->cf); + +} + +PyObject * cpu_dump_gpregs(JitCpu* self, PyObject* args) +{ + vm_cpu_t* vmcpu; + + vmcpu = self->cpu; + dump_gpregs_64(vmcpu); + Py_INCREF(Py_None); + return Py_None; +} + + +PyObject * cpu_dump_gpregs_with_attrib(JitCpu* self, PyObject* args) +{ + vm_cpu_t* vmcpu; + PyObject *item1; + uint64_t attrib; + + if (!PyArg_ParseTuple(args, "O", &item1)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + + PyGetInt(item1, attrib); + + vmcpu = self->cpu; + if (attrib == 16 || attrib == 32) + dump_gpregs_32(vmcpu); + else if (attrib == 64) + dump_gpregs_64(vmcpu); + else { + RAISE(PyExc_TypeError,"Bad attrib"); + } + + Py_INCREF(Py_None); + return Py_None; +} + + + +PyObject* cpu_set_exception(JitCpu* self, PyObject* args) +{ + PyObject *item1; + uint64_t i; + + if (!PyArg_ParseTuple(args, "O", &item1)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + + PyGetInt(item1, i); + + ((vm_cpu_t*)self->cpu)->exception_flags = i; + Py_INCREF(Py_None); + return Py_None; +} + +PyObject* cpu_get_exception(JitCpu* self, PyObject* args) +{ + return PyLong_FromUnsignedLongLong((uint64_t)(((vm_cpu_t*)self->cpu)->exception_flags)); +} + +PyObject* cpu_set_interrupt_num(JitCpu* self, PyObject* args) +{ + PyObject *item1; + uint64_t i; + + if (!PyArg_ParseTuple(args, "O", &item1)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + + PyGetInt(item1, i); + + ((vm_cpu_t*)self->cpu)->interrupt_num = i; + Py_INCREF(Py_None); + return Py_None; +} + +PyObject* cpu_get_interrupt_num(JitCpu* self, PyObject* args) +{ + return PyLong_FromUnsignedLongLong((uint64_t)(((vm_cpu_t*)self->cpu)->interrupt_num)); +} + +PyObject* cpu_set_segm_base(JitCpu* self, PyObject* args) +{ + PyObject *item1, *item2; + uint64_t segm_num, segm_base; + + if (!PyArg_ParseTuple(args, "OO", &item1, &item2)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + + PyGetInt(item1, segm_num); + PyGetInt(item2, segm_base); + ((vm_cpu_t*)self->cpu)->segm_base[segm_num] = segm_base; + + Py_INCREF(Py_None); + return Py_None; +} + +PyObject* cpu_get_segm_base(JitCpu* self, PyObject* args) +{ + PyObject *item1; + uint64_t segm_num; + PyObject* v; + + if (!PyArg_ParseTuple(args, "O", &item1)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + PyGetInt(item1, segm_num); + v = PyLong_FromLong((long)(((vm_cpu_t*)self->cpu)->segm_base[segm_num])); + return v; +} + +uint64_t segm2addr(JitCpu* jitcpu, uint64_t segm, uint64_t addr) +{ + return addr + ((vm_cpu_t*)jitcpu->cpu)->segm_base[segm]; +} + +void MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src) +{ + vm_MEM_WRITE_08(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); +} + +void MEM_WRITE_16(JitCpu* jitcpu, uint64_t addr, uint16_t src) +{ + vm_MEM_WRITE_16(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); +} + +void MEM_WRITE_32(JitCpu* jitcpu, uint64_t addr, uint32_t src) +{ + vm_MEM_WRITE_32(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); +} + +void MEM_WRITE_64(JitCpu* jitcpu, uint64_t addr, uint64_t src) +{ + vm_MEM_WRITE_64(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); +} + + + +PyObject* vm_set_mem(JitCpu *self, PyObject* args) +{ + PyObject *py_addr; + PyObject *py_buffer; + Py_ssize_t py_length; + + char * buffer; + uint64_t size; + uint64_t addr; + int ret; + + if (!PyArg_ParseTuple(args, "OO", &py_addr, &py_buffer)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + + PyGetInt(py_addr, addr); + + if(!PyBytes_Check(py_buffer)) + RAISE(PyExc_TypeError,"arg must be bytes"); + + size = PyBytes_Size(py_buffer); + PyBytes_AsStringAndSize(py_buffer, &buffer, &py_length); + + ret = vm_write_mem(&(((VmMngr*)self->pyvm)->vm_mngr), addr, buffer, size); + if (ret < 0) + RAISE(PyExc_TypeError,"arg must be str"); + + Py_INCREF(Py_None); + return Py_None; +} + +static PyMemberDef JitCpu_members[] = { + {NULL} /* Sentinel */ +}; + +static PyMethodDef JitCpu_methods[] = { + {"init_regs", (PyCFunction)cpu_init_regs, METH_NOARGS, + "X"}, + {"dump_gpregs", (PyCFunction)cpu_dump_gpregs, METH_NOARGS, + "X"}, + {"dump_gpregs_with_attrib", (PyCFunction)cpu_dump_gpregs_with_attrib, METH_VARARGS, + "X"}, + {"get_gpreg", (PyCFunction)cpu_get_gpreg, METH_NOARGS, + "X"}, + {"set_gpreg", (PyCFunction)cpu_set_gpreg, METH_VARARGS, + "X"}, + {"get_segm_base", (PyCFunction)cpu_get_segm_base, METH_VARARGS, + "X"}, + {"set_segm_base", (PyCFunction)cpu_set_segm_base, METH_VARARGS, + "X"}, + {"get_exception", (PyCFunction)cpu_get_exception, METH_VARARGS, + "X"}, + {"set_exception", (PyCFunction)cpu_set_exception, METH_VARARGS, + "X"}, + {"set_mem", (PyCFunction)vm_set_mem, METH_VARARGS, + "X"}, + {"get_mem", (PyCFunction)vm_get_mem, METH_VARARGS, + "X"}, + {"get_interrupt_num", (PyCFunction)cpu_get_interrupt_num, METH_VARARGS, + "X"}, + {"set_interrupt_num", (PyCFunction)cpu_set_interrupt_num, METH_VARARGS, + "X"}, + {NULL} /* Sentinel */ +}; + +static int +JitCpu_init(JitCpu *self, PyObject *args, PyObject *kwds) +{ + self->cpu = malloc(sizeof(vm_cpu_t)); + if (self->cpu == NULL) { + fprintf(stderr, "cannot alloc vm_cpu_t\n"); + exit(EXIT_FAILURE); + } + return 0; +} + +#define getset_reg_E_u32(regname) \ + static PyObject *JitCpu_get_E ## regname (JitCpu *self, void *closure) \ + { \ + return PyLong_FromUnsignedLongLong((uint32_t)(((vm_cpu_t*)(self->cpu))->R ## regname & 0xFFFFFFFF )); \ + } \ + static int JitCpu_set_E ## regname (JitCpu *self, PyObject *value, void *closure) \ + { \ + uint64_t val; \ + PyGetInt_retneg(value, val); \ + val &= 0xFFFFFFFF; \ + val |= ((vm_cpu_t*)(self->cpu))->R ##regname & 0xFFFFFFFF00000000ULL; \ + ((vm_cpu_t*)(self->cpu))->R ## regname = val; \ + return 0; \ + } + + + +#define getset_reg_R_u16(regname) \ + static PyObject *JitCpu_get_ ## regname (JitCpu *self, void *closure) \ + { \ + return PyLong_FromUnsignedLongLong((uint16_t)(((vm_cpu_t*)(self->cpu))->R ## regname & 0xFFFF )); \ + } \ + static int JitCpu_set_ ## regname (JitCpu *self, PyObject *value, void *closure) \ + { \ + uint64_t val; \ + PyGetInt_retneg(value, val); \ + val &= 0xFFFF; \ + val |= ((vm_cpu_t*)(self->cpu))->R ##regname & 0xFFFFFFFFFFFF0000ULL; \ + ((vm_cpu_t*)(self->cpu))->R ## regname = val; \ + return 0; \ + } + + +getset_reg_u64(RAX); +getset_reg_u64(RBX); +getset_reg_u64(RCX); +getset_reg_u64(RDX); +getset_reg_u64(RSI); +getset_reg_u64(RDI); +getset_reg_u64(RSP); +getset_reg_u64(RBP); + +getset_reg_u64(R8); +getset_reg_u64(R9); +getset_reg_u64(R10); +getset_reg_u64(R11); +getset_reg_u64(R12); +getset_reg_u64(R13); +getset_reg_u64(R14); +getset_reg_u64(R15); + +getset_reg_u64(RIP); + +getset_reg_u64(zf); +getset_reg_u64(nf); +getset_reg_u64(pf); +getset_reg_u64(of); +getset_reg_u64(cf); +getset_reg_u64(af); +getset_reg_u64(df); + + +getset_reg_u16(ES); +getset_reg_u16(CS); +getset_reg_u16(SS); +getset_reg_u16(DS); +getset_reg_u16(FS); +getset_reg_u16(GS); + +getset_reg_E_u32(AX); +getset_reg_E_u32(BX); +getset_reg_E_u32(CX); +getset_reg_E_u32(DX); +getset_reg_E_u32(SI); +getset_reg_E_u32(DI); +getset_reg_E_u32(SP); +getset_reg_E_u32(BP); +getset_reg_E_u32(IP); + +getset_reg_R_u16(AX); +getset_reg_R_u16(BX); +getset_reg_R_u16(CX); +getset_reg_R_u16(DX); +getset_reg_R_u16(SI); +getset_reg_R_u16(DI); +getset_reg_R_u16(SP); +getset_reg_R_u16(BP); + +getset_reg_R_u16(IP); + +getset_reg_u64(MM0); +getset_reg_u64(MM1); +getset_reg_u64(MM2); +getset_reg_u64(MM3); +getset_reg_u64(MM4); +getset_reg_u64(MM5); +getset_reg_u64(MM6); +getset_reg_u64(MM7); + +getset_reg_bn(XMM0, 128); +getset_reg_bn(XMM1, 128); +getset_reg_bn(XMM2, 128); +getset_reg_bn(XMM3, 128); +getset_reg_bn(XMM4, 128); +getset_reg_bn(XMM5, 128); +getset_reg_bn(XMM6, 128); +getset_reg_bn(XMM7, 128); +getset_reg_bn(XMM8, 128); +getset_reg_bn(XMM9, 128); +getset_reg_bn(XMM10, 128); +getset_reg_bn(XMM11, 128); +getset_reg_bn(XMM12, 128); +getset_reg_bn(XMM13, 128); +getset_reg_bn(XMM14, 128); +getset_reg_bn(XMM15, 128); + +getset_reg_u64(tsc); + +getset_reg_u32(exception_flags); +getset_reg_u32(interrupt_num); + + +PyObject* get_gpreg_offset_all(void) +{ + PyObject *dict = PyDict_New(); + PyObject *o; + get_reg_off(exception_flags); + + get_reg_off(RAX); + get_reg_off(RBX); + get_reg_off(RCX); + get_reg_off(RDX); + get_reg_off(RSI); + get_reg_off(RDI); + get_reg_off(RSP); + get_reg_off(RBP); + get_reg_off(R8); + get_reg_off(R9); + get_reg_off(R10); + get_reg_off(R11); + get_reg_off(R12); + get_reg_off(R13); + get_reg_off(R14); + get_reg_off(R15); + get_reg_off(RIP); + get_reg_off(zf); + get_reg_off(nf); + get_reg_off(pf); + get_reg_off(of); + get_reg_off(cf); + get_reg_off(af); + get_reg_off(df); + get_reg_off(tf); + get_reg_off(i_f); + get_reg_off(iopl_f); + get_reg_off(nt); + get_reg_off(rf); + get_reg_off(vm); + get_reg_off(ac); + get_reg_off(vif); + get_reg_off(vip); + get_reg_off(i_d); + get_reg_off(my_tick); + get_reg_off(cond); + + get_reg_off(float_st0); + get_reg_off(float_st1); + get_reg_off(float_st2); + get_reg_off(float_st3); + get_reg_off(float_st4); + get_reg_off(float_st5); + get_reg_off(float_st6); + get_reg_off(float_st7); + + get_reg_off(ES); + get_reg_off(CS); + get_reg_off(SS); + get_reg_off(DS); + get_reg_off(FS); + get_reg_off(GS); + + get_reg_off(MM0); + get_reg_off(MM1); + get_reg_off(MM2); + get_reg_off(MM3); + get_reg_off(MM4); + get_reg_off(MM5); + get_reg_off(MM6); + get_reg_off(MM7); + + get_reg_off(XMM0); + get_reg_off(XMM1); + get_reg_off(XMM2); + get_reg_off(XMM3); + get_reg_off(XMM4); + get_reg_off(XMM5); + get_reg_off(XMM6); + get_reg_off(XMM7); + get_reg_off(XMM8); + get_reg_off(XMM9); + get_reg_off(XMM10); + get_reg_off(XMM11); + get_reg_off(XMM12); + get_reg_off(XMM13); + get_reg_off(XMM14); + get_reg_off(XMM15); + + get_reg_off(tsc); + + get_reg_off(interrupt_num); + get_reg_off(exception_flags); + + get_reg_off(float_stack_ptr); + get_reg_off(reg_float_cs); + get_reg_off(reg_float_eip); + get_reg_off(reg_float_control); + + return dict; +} + + +static PyGetSetDef JitCpu_getseters[] = { + {"vmmngr", + (getter)JitCpu_get_vmmngr, (setter)JitCpu_set_vmmngr, + "vmmngr", + NULL}, + + {"jitter", + (getter)JitCpu_get_jitter, (setter)JitCpu_set_jitter, + "jitter", + NULL}, + + + {"RAX", (getter)JitCpu_get_RAX, (setter)JitCpu_set_RAX, "RAX", NULL}, + {"RBX", (getter)JitCpu_get_RBX, (setter)JitCpu_set_RBX, "RBX", NULL}, + {"RCX", (getter)JitCpu_get_RCX, (setter)JitCpu_set_RCX, "RCX", NULL}, + {"RDX", (getter)JitCpu_get_RDX, (setter)JitCpu_set_RDX, "RDX", NULL}, + {"RSI", (getter)JitCpu_get_RSI, (setter)JitCpu_set_RSI, "RSI", NULL}, + {"RDI", (getter)JitCpu_get_RDI, (setter)JitCpu_set_RDI, "RDI", NULL}, + {"RSP", (getter)JitCpu_get_RSP, (setter)JitCpu_set_RSP, "RSP", NULL}, + {"RBP", (getter)JitCpu_get_RBP, (setter)JitCpu_set_RBP, "RBP", NULL}, + {"R8", (getter)JitCpu_get_R8, (setter)JitCpu_set_R8, "R8", NULL}, + {"R9", (getter)JitCpu_get_R9, (setter)JitCpu_set_R9, "R9", NULL}, + {"R10", (getter)JitCpu_get_R10, (setter)JitCpu_set_R10, "R10", NULL}, + {"R11", (getter)JitCpu_get_R11, (setter)JitCpu_set_R11, "R11", NULL}, + {"R12", (getter)JitCpu_get_R12, (setter)JitCpu_set_R12, "R12", NULL}, + {"R13", (getter)JitCpu_get_R13, (setter)JitCpu_set_R13, "R13", NULL}, + {"R14", (getter)JitCpu_get_R14, (setter)JitCpu_set_R14, "R14", NULL}, + {"R15", (getter)JitCpu_get_R15, (setter)JitCpu_set_R15, "R15", NULL}, + {"RIP", (getter)JitCpu_get_RIP, (setter)JitCpu_set_RIP, "RIP", NULL}, + {"zf", (getter)JitCpu_get_zf, (setter)JitCpu_set_zf, "zf", NULL}, + {"nf", (getter)JitCpu_get_nf, (setter)JitCpu_set_nf, "nf", NULL}, + {"pf", (getter)JitCpu_get_pf, (setter)JitCpu_set_pf, "pf", NULL}, + {"of", (getter)JitCpu_get_of, (setter)JitCpu_set_of, "of", NULL}, + {"cf", (getter)JitCpu_get_cf, (setter)JitCpu_set_cf, "cf", NULL}, + {"af", (getter)JitCpu_get_af, (setter)JitCpu_set_af, "af", NULL}, + {"df", (getter)JitCpu_get_df, (setter)JitCpu_set_df, "df", NULL}, + {"ES", (getter)JitCpu_get_ES, (setter)JitCpu_set_ES, "ES", NULL}, + {"CS", (getter)JitCpu_get_CS, (setter)JitCpu_set_CS, "CS", NULL}, + {"SS", (getter)JitCpu_get_SS, (setter)JitCpu_set_SS, "SS", NULL}, + {"DS", (getter)JitCpu_get_DS, (setter)JitCpu_set_DS, "DS", NULL}, + {"FS", (getter)JitCpu_get_FS, (setter)JitCpu_set_FS, "FS", NULL}, + {"GS", (getter)JitCpu_get_GS, (setter)JitCpu_set_GS, "GS", NULL}, + + {"EAX", (getter)JitCpu_get_EAX, (setter)JitCpu_set_EAX, "EAX", NULL}, + {"EBX", (getter)JitCpu_get_EBX, (setter)JitCpu_set_EBX, "EBX", NULL}, + {"ECX", (getter)JitCpu_get_ECX, (setter)JitCpu_set_ECX, "ECX", NULL}, + {"EDX", (getter)JitCpu_get_EDX, (setter)JitCpu_set_EDX, "EDX", NULL}, + {"ESI", (getter)JitCpu_get_ESI, (setter)JitCpu_set_ESI, "ESI", NULL}, + {"EDI", (getter)JitCpu_get_EDI, (setter)JitCpu_set_EDI, "EDI", NULL}, + {"ESP", (getter)JitCpu_get_ESP, (setter)JitCpu_set_ESP, "ESP", NULL}, + {"EBP", (getter)JitCpu_get_EBP, (setter)JitCpu_set_EBP, "EBP", NULL}, + {"EIP", (getter)JitCpu_get_EIP, (setter)JitCpu_set_EIP, "EIP", NULL}, + + {"AX", (getter)JitCpu_get_AX, (setter)JitCpu_set_AX, "AX", NULL}, + {"BX", (getter)JitCpu_get_BX, (setter)JitCpu_set_BX, "BX", NULL}, + {"CX", (getter)JitCpu_get_CX, (setter)JitCpu_set_CX, "CX", NULL}, + {"DX", (getter)JitCpu_get_DX, (setter)JitCpu_set_DX, "DX", NULL}, + {"SI", (getter)JitCpu_get_SI, (setter)JitCpu_set_SI, "SI", NULL}, + {"DI", (getter)JitCpu_get_DI, (setter)JitCpu_set_DI, "DI", NULL}, + {"SP", (getter)JitCpu_get_SP, (setter)JitCpu_set_SP, "SP", NULL}, + {"BP", (getter)JitCpu_get_BP, (setter)JitCpu_set_BP, "BP", NULL}, + + {"IP", (getter)JitCpu_get_IP, (setter)JitCpu_set_IP, "IP", NULL}, + + {"MM0", (getter)JitCpu_get_MM0, (setter)JitCpu_set_MM0, "MM0", NULL}, + {"MM1", (getter)JitCpu_get_MM1, (setter)JitCpu_set_MM1, "MM1", NULL}, + {"MM2", (getter)JitCpu_get_MM2, (setter)JitCpu_set_MM2, "MM2", NULL}, + {"MM3", (getter)JitCpu_get_MM3, (setter)JitCpu_set_MM3, "MM3", NULL}, + {"MM4", (getter)JitCpu_get_MM4, (setter)JitCpu_set_MM4, "MM4", NULL}, + {"MM5", (getter)JitCpu_get_MM5, (setter)JitCpu_set_MM5, "MM5", NULL}, + {"MM6", (getter)JitCpu_get_MM6, (setter)JitCpu_set_MM6, "MM6", NULL}, + {"MM7", (getter)JitCpu_get_MM7, (setter)JitCpu_set_MM7, "MM7", NULL}, + + {"XMM0", (getter)JitCpu_get_XMM0, (setter)JitCpu_set_XMM0, "XMM0", NULL}, + {"XMM1", (getter)JitCpu_get_XMM1, (setter)JitCpu_set_XMM1, "XMM1", NULL}, + {"XMM2", (getter)JitCpu_get_XMM2, (setter)JitCpu_set_XMM2, "XMM2", NULL}, + {"XMM3", (getter)JitCpu_get_XMM3, (setter)JitCpu_set_XMM3, "XMM3", NULL}, + {"XMM4", (getter)JitCpu_get_XMM4, (setter)JitCpu_set_XMM4, "XMM4", NULL}, + {"XMM5", (getter)JitCpu_get_XMM5, (setter)JitCpu_set_XMM5, "XMM5", NULL}, + {"XMM6", (getter)JitCpu_get_XMM6, (setter)JitCpu_set_XMM6, "XMM6", NULL}, + {"XMM7", (getter)JitCpu_get_XMM7, (setter)JitCpu_set_XMM7, "XMM7", NULL}, + {"XMM8", (getter)JitCpu_get_XMM8, (setter)JitCpu_set_XMM8, "XMM8", NULL}, + {"XMM9", (getter)JitCpu_get_XMM9, (setter)JitCpu_set_XMM9, "XMM9", NULL}, + {"XMM10", (getter)JitCpu_get_XMM10, (setter)JitCpu_set_XMM10, "XMM10", NULL}, + {"XMM11", (getter)JitCpu_get_XMM11, (setter)JitCpu_set_XMM11, "XMM11", NULL}, + {"XMM12", (getter)JitCpu_get_XMM12, (setter)JitCpu_set_XMM12, "XMM12", NULL}, + {"XMM13", (getter)JitCpu_get_XMM13, (setter)JitCpu_set_XMM13, "XMM13", NULL}, + {"XMM14", (getter)JitCpu_get_XMM14, (setter)JitCpu_set_XMM14, "XMM14", NULL}, + {"XMM15", (getter)JitCpu_get_XMM15, (setter)JitCpu_set_XMM15, "XMM15", NULL}, + + {"tsc", (getter)JitCpu_get_tsc, (setter)JitCpu_set_tsc, "tsc", NULL}, + + {"exception_flags", (getter)JitCpu_get_exception_flags, (setter)JitCpu_set_exception_flags, "exception_flags", NULL}, + {"interrupt_num", (getter)JitCpu_get_interrupt_num, (setter)JitCpu_set_interrupt_num, "interrupt_num", NULL}, + + + {NULL} /* Sentinel */ +}; + + +static PyTypeObject JitCpuType = { + PyVarObject_HEAD_INIT(NULL, 0) + "JitCore_x86.JitCpu", /*tp_name*/ + sizeof(JitCpu), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)JitCpu_dealloc,/*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + "JitCpu objects", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + JitCpu_methods, /* tp_methods */ + JitCpu_members, /* tp_members */ + JitCpu_getseters, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)JitCpu_init, /* tp_init */ + 0, /* tp_alloc */ + JitCpu_new, /* tp_new */ +}; + + + +static PyMethodDef JitCore_x86_Methods[] = { + + /* + + */ + {"get_gpreg_offset_all", (PyCFunction)get_gpreg_offset_all, METH_NOARGS}, + {NULL, NULL, 0, NULL} /* Sentinel */ + +}; + + +MOD_INIT(JitCore_x86) +{ + PyObject *module; + + MOD_DEF(module, "JitCore_x86", "JitCore_x86 module", JitCore_x86_Methods); + + if (module == NULL) + return NULL; + + if (PyType_Ready(&JitCpuType) < 0) + return NULL; + + Py_INCREF(&JitCpuType); + if (PyModule_AddObject(module, "JitCpu", (PyObject *)&JitCpuType) < 0) + return NULL; + + return module; +} diff --git a/miasm/jitter/arch/JitCore_x86.h b/miasm/jitter/arch/JitCore_x86.h new file mode 100644 index 00000000..27d94d7c --- /dev/null +++ b/miasm/jitter/arch/JitCore_x86.h @@ -0,0 +1,136 @@ +#include "../bn.h" + +#if _WIN32 +#define _MIASM_EXPORT __declspec(dllexport) +#else +#define _MIASM_EXPORT +#endif + +typedef struct { + uint32_t exception_flags; + uint32_t interrupt_num; + + + /* gpregs */ + uint64_t RAX; + uint64_t RBX; + uint64_t RCX; + uint64_t RDX; + uint64_t RSI; + uint64_t RDI; + uint64_t RSP; + uint64_t RBP; + uint64_t R8; + uint64_t R9; + uint64_t R10; + uint64_t R11; + uint64_t R12; + uint64_t R13; + uint64_t R14; + uint64_t R15; + + uint64_t RIP; + + /* eflag */ + uint8_t zf; + uint8_t nf; + uint8_t pf; + uint8_t of; + uint8_t cf; + uint8_t af; + uint8_t df; + + uint8_t tf; + uint8_t i_f; + uint8_t iopl_f; + uint8_t nt; + uint8_t rf; + uint8_t vm; + uint8_t ac; + uint8_t vif; + uint8_t vip; + uint8_t i_d; + + bn_t my_tick; + + bn_t cond; + + uint64_t float_st0; + uint64_t float_st1; + uint64_t float_st2; + uint64_t float_st3; + uint64_t float_st4; + uint64_t float_st5; + uint64_t float_st6; + uint64_t float_st7; + + unsigned int float_c0; + unsigned int float_c1; + unsigned int float_c2; + unsigned int float_c3; + + + unsigned int float_stack_ptr; + + unsigned int reg_float_control; + + unsigned int reg_float_eip; + unsigned int reg_float_cs; + unsigned int reg_float_address; + unsigned int reg_float_ds; + + + uint64_t tsc; + + + uint16_t ES; + uint16_t CS; + uint16_t SS; + uint16_t DS; + uint16_t FS; + uint16_t GS; + + unsigned int cr0; + unsigned int cr3; + + uint64_t MM0; + uint64_t MM1; + uint64_t MM2; + uint64_t MM3; + uint64_t MM4; + uint64_t MM5; + uint64_t MM6; + uint64_t MM7; + + /* SSE */ + bn_t XMM0; + bn_t XMM1; + bn_t XMM2; + bn_t XMM3; + bn_t XMM4; + bn_t XMM5; + bn_t XMM6; + bn_t XMM7; + bn_t XMM8; + bn_t XMM9; + bn_t XMM10; + bn_t XMM11; + bn_t XMM12; + bn_t XMM13; + bn_t XMM14; + bn_t XMM15; + + uint32_t segm_base[0x10000]; + +}vm_cpu_t; + +_MIASM_EXPORT void dump_gpregs_32(vm_cpu_t* vmcpu); +_MIASM_EXPORT void dump_gpregs_64(vm_cpu_t* vmcpu); +_MIASM_EXPORT uint64_t segm2addr(JitCpu* jitcpu, uint64_t segm, uint64_t addr); + +_MIASM_EXPORT void MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src); +_MIASM_EXPORT void MEM_WRITE_16(JitCpu* jitcpu, uint64_t addr, uint16_t src); +_MIASM_EXPORT void MEM_WRITE_32(JitCpu* jitcpu, uint64_t addr, uint32_t src); +_MIASM_EXPORT void MEM_WRITE_64(JitCpu* jitcpu, uint64_t addr, uint64_t src); + +#define RETURN_PC return BlockDst; diff --git a/miasm/jitter/arch/__init__.py b/miasm/jitter/arch/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/miasm/jitter/bn.c b/miasm/jitter/bn.c new file mode 100644 index 00000000..dd4f34ef --- /dev/null +++ b/miasm/jitter/bn.c @@ -0,0 +1,933 @@ +/* + +Big number library - arithmetic on multiple-precision unsigned integers. + +This library is an implementation of arithmetic on arbitrarily large integers. + +The difference between this and other implementations, is that the data structure +has optimal memory utilization (i.e. a 1024 bit integer takes up 128 bytes RAM), +and all memory is allocated statically: no dynamic allocation for better or worse. + +Primary goals are correctness, clarity of code and clean, portable implementation. +Secondary goal is a memory footprint small enough to make it suitable for use in +embedded applications. + + +The current state is correct functionality and adequate performance. +There may well be room for performance-optimizations and improvements. + +Source: https://github.com/kokke/tiny-bignum-c + +Code slightly modified to support ast generation calculus style from Expr. + +*/ + +#include +#include +#include +#include +#include "bn.h" + +/* Functions for shifting number in-place. */ +static bn_t _lshift_one_bit(bn_t a); +static bn_t _rshift_one_bit(bn_t a); +static bn_t _lshift_word(bn_t a, int nwords); +static bn_t _rshift_word(bn_t a, int nwords); + + + + +/* Public / Exported functions. */ +bn_t bignum_init(void) +{ + int i; + bn_t n; + + for (i = 0; i < BN_ARRAY_SIZE; ++i) { + n.array[i] = 0; + } + + return n; +} + + +bn_t bignum_from_int(DTYPE_TMP i) +{ + bn_t n; + + n = bignum_init(); + /* Endianness issue if machine is not little-endian? */ +#ifdef WORD_SIZE + #if (WORD_SIZE == 1) + n.array[0] = (i & 0x000000ff); + n.array[1] = (i & 0x0000ff00) >> 8; + n.array[2] = (i & 0x00ff0000) >> 16; + n.array[3] = (i & 0xff000000) >> 24; + #elif (WORD_SIZE == 2) + n.array[0] = (i & 0x0000ffff); + n.array[1] = (i & 0xffff0000) >> 16; + #elif (WORD_SIZE == 4) + n.array[0] = i; + DTYPE_TMP num_32 = 32; + DTYPE_TMP tmp = i >> num_32; /* bit-shift with U64 operands to force 64-bit results */ + n.array[1] = tmp; + #endif +#endif + + return n; +} + + + +bn_t bignum_from_uint64(uint64_t i) +{ + bn_t n; + n = bignum_init(); + /* Endianness issue if machine is not little-endian? */ +#ifdef WORD_SIZE + #if (WORD_SIZE == 1) + n.array[0] = (i & 0x000000ff); + n.array[1] = (i & 0x0000ff00) >> 8; + n.array[2] = (i & 0x00ff0000) >> 16; + n.array[3] = (i & 0xff000000) >> 24; + #elif (WORD_SIZE == 2) + n.array[0] = (i & 0x0000ffff); + n.array[1] = (i & 0xffff0000) >> 16; + #elif (WORD_SIZE == 4) + n.array[0] = i; + DTYPE_TMP num_32 = 32; + DTYPE_TMP tmp = i >> num_32; /* bit-shift with U64 operands to force 64-bit results */ + n.array[1] = tmp; + #endif +#endif + + return n; +} + + + + + +int bignum_to_int(bn_t n) +{ + + int ret = 0; + + /* Endianness issue if machine is not little-endian? */ +#if (WORD_SIZE == 1) + ret += n.array[0]; + ret += n.array[1] << 8; + ret += n.array[2] << 16; + ret += n.array[3] << 24; +#elif (WORD_SIZE == 2) + ret += n.array[0]; + ret += n.array[1] << 16; +#elif (WORD_SIZE == 4) + ret += n.array[0]; +#endif + + + return ret; +} + + +uint64_t bignum_to_uint64(bn_t n) +{ + + uint64_t ret = 0; + + /* Endianness issue if machine is not little-endian? */ +#if (WORD_SIZE == 1) + ret += (uint64_t)(n.array[0]); + ret += (uint64_t)(n.array[1]) << 8; + ret += (uint64_t)(n.array[2]) << 16; + ret += (uint64_t)(n.array[3]) << 24; + + ret += (uint64_t)(n.array[4]) << 32; + ret += (uint64_t)(n.array[5]) << 40; + ret += (uint64_t)(n.array[6]) << 48; + ret += (uint64_t)(n.array[7]) << 56; + + +#elif (WORD_SIZE == 2) + ret += (uint64_t)(n.array[0]); + ret += (uint64_t)(n.array[1]) << 16; + ret += (uint64_t)(n.array[2]) << 32; + ret += (uint64_t)(n.array[3]) << 48; +#elif (WORD_SIZE == 4) + ret += n.array[0]; + ret += (uint64_t)(n.array[1]) << 32; +#endif + + return ret; +} + + + + +bn_t bignum_from_string(char* str, int nbytes) +{ + + require(str, "str is null"); + require(nbytes > 0, "nbytes must be positive"); + require((nbytes & 1) == 0, "string format must be in hex -> equal number of bytes"); + + bn_t n; + + n = bignum_init(); + + DTYPE tmp; /* DTYPE is defined in bn.h - uint{8,16,32,64}_t */ + int i = nbytes - (2 * WORD_SIZE); /* index into string */ + int j = 0; /* index into array */ + + /* reading last hex-byte "MSB" from string first -> big endian */ + /* MSB ~= most significant byte / block ? :) */ + while (i >= 0) { + tmp = 0; + sscanf(&str[i], SSCANF_FORMAT_STR, &tmp); + n.array[j] = tmp; + i -= (2 * WORD_SIZE); /* step WORD_SIZE hex-byte(s) back in the string. */ + j += 1; /* step one element forward in the array. */ + } + + return n; +} + +void bignum_to_string(bn_t n, char* str, int nbytes) +{ + require(str, "str is null"); + require(nbytes > 0, "nbytes must be positive"); + require((nbytes & 1) == 0, "string format must be in hex -> equal number of bytes"); + + int j = BN_ARRAY_SIZE - 1; /* index into array - reading "MSB" first -> big-endian */ + int i = 0; /* index into string representation. */ + + /* reading last array-element "MSB" first -> big endian */ + while ((j >= 0) && (nbytes > (i + 1))) { + sprintf(&str[i], SPRINTF_FORMAT_STR, n.array[j]); + i += (2 * WORD_SIZE); /* step WORD_SIZE hex-byte(s) forward in the string. */ + j -= 1; /* step one element back in the array. */ + } + + /* Zero-terminate string */ + str[i] = 0; +} + + + +bn_t bignum_dec(bn_t n) +{ + //require(n, "n is null"); + + DTYPE tmp; /* copy of n */ + DTYPE res; + + int i; + for (i = 0; i < BN_ARRAY_SIZE; ++i) { + tmp = n.array[i]; + res = tmp - 1; + n.array[i] = res; + + if (!(res > tmp)) { + break; + } + } + + return n; +} + + +bn_t bignum_inc(bn_t n) +{ + //require(n, "n is null"); + + DTYPE res; + DTYPE_TMP tmp; /* copy of n */ + + int i; + for (i = 0; i < BN_ARRAY_SIZE; ++i) { + tmp = n.array[i]; + res = tmp + 1; + n.array[i] = res; + + if (res > tmp) { + break; + } + } + + return n; +} + + + +bn_t bignum_add(bn_t a, bn_t b) +{ + //require(a, "a is null"); + //require(b, "b is null"); + //require(c, "c is null"); + bn_t c; + + DTYPE_TMP tmp; + int carry = 0; + int i; + for (i = 0; i < BN_ARRAY_SIZE; ++i) { + tmp = (DTYPE_TMP)a.array[i] + b.array[i] + carry; + carry = (tmp > MAX_VAL); + c.array[i] = (tmp & MAX_VAL); + } + + return c; +} + + +bn_t bignum_sub(bn_t a, bn_t b) +{ + //require(a, "a is null"); + //require(b, "b is null"); + //require(c, "c is null"); + bn_t c; + + DTYPE_TMP res; + DTYPE_TMP tmp1; + DTYPE_TMP tmp2; + int borrow = 0; + int i; + for (i = 0; i < BN_ARRAY_SIZE; ++i) { + tmp1 = (DTYPE_TMP)a.array[i] + (MAX_VAL + 1); /* + number_base */ + tmp2 = (DTYPE_TMP)b.array[i] + borrow;; + res = (tmp1 - tmp2); + c.array[i] = (DTYPE)(res & MAX_VAL); /* "modulo number_base" == "% (number_base - 1)" if number_base is 2^N */ + borrow = (res <= MAX_VAL); + } + + return c; +} + + + + +bn_t bignum_mul(bn_t a, bn_t b) +{ + //require(a, "a is null"); + //require(b, "b is null"); + //require(c, "c is null"); + + bn_t c; + bn_t row; + bn_t tmp; + int i, j; + + c = bignum_init(); + + for (i = 0; i < BN_ARRAY_SIZE; ++i) { + row = bignum_init(); + + for (j = 0; j < BN_ARRAY_SIZE; ++j) { + if (i + j < BN_ARRAY_SIZE) { + tmp = bignum_init(); + DTYPE_TMP intermediate = ((DTYPE_TMP)a.array[i] * (DTYPE_TMP)b.array[j]); + tmp = bignum_from_int(intermediate); + tmp = _lshift_word(tmp, i + j); + row = bignum_add(tmp, row); + } + } + c = bignum_add(c, row); + } + + return c; +} + + +bn_t bignum_udiv(bn_t a, bn_t b) +{ + //require(a, "a is null"); + //require(b, "b is null"); + //require(c, "c is null"); + + bn_t c; + bn_t current; + bn_t denom; + bn_t tmp; + + current = bignum_from_int(1); // int current = 1; + denom = bignum_assign(b); // denom = b + tmp = bignum_assign(a); // tmp = a + + const DTYPE_TMP half_max = 1 + (DTYPE_TMP)(MAX_VAL / 2); + bool overflow = false; + + while (bignum_cmp(denom, a) != LARGER) { // while (denom <= a) { + if (denom.array[BN_ARRAY_SIZE - 1] >= half_max) { + overflow = true; + break; + } + current = _lshift_one_bit(current); // current <<= 1; + denom = _lshift_one_bit(denom); // denom <<= 1; + } + if (!overflow) { + denom = _rshift_one_bit(denom); // denom >>= 1; + current = _rshift_one_bit(current); // current >>= 1; + } + c = bignum_init(); // int answer = 0; + + while (!bignum_is_zero(current)) { // while (current != 0) + if (bignum_cmp(tmp, denom) != SMALLER) { // if (dividend >= denom) + tmp = bignum_sub(tmp, denom); // dividend -= denom; + c = bignum_or(c, current); // answer |= current; + } + current = _rshift_one_bit(current); // current >>= 1; + denom = _rshift_one_bit(denom); // denom >>= 1; + } // return answer; + + return c; +} + + + +bn_t bignum_lshift(bn_t a, int nbits) +{ + //require(a, "a is null"); + //require(b, "b is null"); + require(nbits >= 0, "no negative shifts"); + + bn_t b; + + b = bignum_assign(a); + /* Handle shift in multiples of word-size */ + const int nbits_pr_word = (WORD_SIZE * 8); + int nwords = nbits / nbits_pr_word; + if (nwords != 0) { + b = _lshift_word(b, nwords); + nbits -= (nwords * nbits_pr_word); + } + + if (nbits != 0) { + int i; + for (i = (BN_ARRAY_SIZE - 1); i > 0; --i) { + b.array[i] = (b.array[i] << nbits) | (b.array[i - 1] >> ((8 * WORD_SIZE) - nbits)); + } + b.array[i] <<= nbits; + } + + return b; +} + + +bn_t bignum_rshift(bn_t a, int nbits) +{ + //require(a, "a is null"); + //require(b, "b is null"); + require(nbits >= 0, "no negative shifts"); + + bn_t b; + + b = bignum_assign(a); + /* Handle shift in multiples of word-size */ + const int nbits_pr_word = (WORD_SIZE * 8); + int nwords = nbits / nbits_pr_word; + + if (nwords != 0) { + b = _rshift_word(b, nwords); + nbits -= (nwords * nbits_pr_word); + } + if (nbits != 0) { + int i; + for (i = 0; i < (BN_ARRAY_SIZE - 1); ++i) { + b.array[i] = (b.array[i] >> nbits) | (b.array[i + 1] << ((8 * WORD_SIZE) - nbits)); + } + b.array[i] >>= nbits; + } + + return b; +} + + + +bn_t bignum_a_rshift(bn_t a, int size, int nbits) +{ + //require(a, "a is null"); + //require(b, "b is null"); + require(nbits >= 0, "no negative shifts"); + require(size > 0, "no negative shifts"); + + bn_t b; + bn_t tmp, mask; + + b = bignum_rshift(a, nbits); + + /* get sign bit */ + tmp = bignum_rshift(a, size - 1); + tmp = bignum_mask(tmp, 1); + + if (!bignum_is_zero(tmp)) { + /* generate sign propag */ + tmp = bignum_from_int(1); + tmp = bignum_lshift(tmp, size); + tmp = bignum_dec(tmp); + + mask = bignum_from_int(1); + mask = bignum_lshift(mask, size - nbits); + mask = bignum_dec(mask); + + tmp = bignum_xor(tmp, mask); + b = bignum_or(b, tmp); + } + + return b; +} + +bn_t bignum_not(bn_t a) +{ + int i; + bn_t b; + + for (i = 0; i < BN_ARRAY_SIZE; ++i) { + b.array[i] = ~a.array[i]; + } + + return b; +} + + + +bn_t bignum_umod(bn_t a, bn_t b) +{ + /* + Take divmod and throw away div part + */ + //require(a, "a is null"); + //require(b, "b is null"); + //require(c, "c is null"); + + bn_t c, d; + bn_t tmp; + + /* c = (a / b) */ + c = bignum_udiv(a, b); + /* tmp = (c * b) */ + tmp = bignum_mul(c, b); + /* c = a - tmp */ + d = bignum_sub(a, tmp); + return d; +} + + +bn_t bignum_and(bn_t a, bn_t b) +{ + //require(a, "a is null"); + //require(b, "b is null"); + //require(c, "c is null"); + bn_t c; + + int i; + for (i = 0; i < BN_ARRAY_SIZE; ++i) { + c.array[i] = (a.array[i] & b.array[i]); + } + + return c; +} + + +bn_t bignum_or(bn_t a, bn_t b) +{ + //require(a, "a is null"); + //require(b, "b is null"); + //require(c, "c is null"); + bn_t c; + int i; + for (i = 0; i < BN_ARRAY_SIZE; ++i) { + c.array[i] = (a.array[i] | b.array[i]); + } + + return c; +} + + +bn_t bignum_xor(bn_t a, bn_t b) +{ + //require(a, "a is null"); + //require(b, "b is null"); + //require(c, "c is null"); + + bn_t c; + int i; + for (i = 0; i < BN_ARRAY_SIZE; ++i) { + c.array[i] = (a.array[i] ^ b.array[i]); + } + return c; +} + + +int bignum_cmp(bn_t a, bn_t b) +{ + //require(a, "a is null"); + //require(b, "b is null"); + + int i = BN_ARRAY_SIZE; + do { + i -= 1; /* Decrement first, to start with last array element */ + if (a.array[i] > b.array[i]) { + return LARGER; + } + else if (a.array[i] < b.array[i]) { + return SMALLER; + } + } + while (i != 0); + + return EQUAL; +} + + +/* Signed compare bn */ +int bignum_cmp_signed(bn_t a, bn_t b) +{ + int i = BN_ARRAY_SIZE; + do { + i -= 1; /* Decrement first, to start with last array element */ + if ((DTYPE_SIGNED)a.array[i] > (DTYPE_SIGNED)b.array[i]) { + return LARGER; + } + else if ((DTYPE_SIGNED)a.array[i] < (DTYPE_SIGNED)b.array[i]) { + return SMALLER; + } + } + while (i != 0); + + return EQUAL; +} + + +/* Unsigned compare bn */ +int bignum_cmp_unsigned(bn_t a, bn_t b) +{ + return bignum_cmp(a, b); +} + + +/* Return 1 if a == b else 0 */ +int bignum_is_equal(bn_t a, bn_t b) +{ + int ret; + ret = bignum_cmp_unsigned(a, b); + if (ret == EQUAL) + return 1; + else + return 0; +} + + +/* Return 1 if a = 0, "no negative shifts"); + + if (nwords >= BN_ARRAY_SIZE) { + for (i = 0; i < BN_ARRAY_SIZE; ++i) { + a.array[i] = 0; + } + return a; + } + + for (i = 0; i < BN_ARRAY_SIZE - nwords; ++i) { + a.array[i] = a.array[i + nwords]; + } + + for (; i < BN_ARRAY_SIZE; ++i) { + a.array[i] = 0; + } + + return a; +} + + +static bn_t _lshift_word(bn_t a, int nwords) +{ + //require(a, "a is null"); + require(nwords >= 0, "no negative shifts"); + + int i; + + if (nwords >= BN_ARRAY_SIZE) { + for (i = 0; i < BN_ARRAY_SIZE; ++i) { + a.array[i] = 0; + } + return a; + } + + /* Shift whole words */ + for (i = (BN_ARRAY_SIZE - 1); i >= nwords; --i) { + a.array[i] = a.array[i - nwords]; + } + /* Zero pad shifted words. */ + for (; i >= 0; --i) { + a.array[i] = 0; + } + + return a; +} + + +static bn_t _lshift_one_bit(bn_t a) +{ + //require(a, "a is null"); + + int i; + for (i = (BN_ARRAY_SIZE - 1); i > 0; --i) { + a.array[i] = (a.array[i] << 1) | (a.array[i - 1] >> ((8 * WORD_SIZE) - 1)); + } + a.array[0] <<= 1; + + return a; +} + + +static bn_t _rshift_one_bit(bn_t a) +{ + //require(a, "a is null"); + + int i; + for (i = 0; i < (BN_ARRAY_SIZE - 1); ++i) { + a.array[i] = (a.array[i] >> 1) | (a.array[i + 1] << ((8 * WORD_SIZE) - 1)); + } + a.array[BN_ARRAY_SIZE - 1] >>= 1; + + return a; +} + + +bn_t bignum_rol(bn_t a, int size, int nbits) +{ + bn_t c; + + c = bignum_or( + bignum_lshift(a, nbits), + bignum_rshift(a, size - nbits) + ); + c = bignum_mask(c, size); + return c; +} + + +bn_t bignum_ror(bn_t a, int size, int nbits) +{ + bn_t c; + + c = bignum_or( + bignum_rshift(a, nbits), + bignum_lshift(a, size - nbits) + ); + c = bignum_mask(c, size); + return c; +} + + +int bignum_getbit(bn_t a, int pos) +{ + int d_pos, bit_pos; + + require(pos < BN_BIT_SIZE, "size must be below bignum max size"); + + d_pos = pos / (sizeof(DTYPE) * 8); + bit_pos = pos % (sizeof(DTYPE) * 8); + return !!(a.array[d_pos] & (1 << bit_pos)); + +} + + + +/* + * Count leading zeros - count the number of zero starting at the most + * significant bit + * + * Example: + * - cntleadzeros(size=32, src=2): 30 + * - cntleadzeros(size=32, src=0): 32 + */ +int bignum_cntleadzeros(bn_t n, int size) +{ + int i; + + require(size, "size must be greater than 0"); + require(size <= BN_BIT_SIZE, "size must be below bignum max size"); + + for (i = 0; i < size; i++) { + if (bignum_getbit(n, size - i - 1)) + break; + } + + return i; +} + + + +/* + * Count trailing zeros - count the number of zero starting at the least + * significant bit + * + * Example: + * - cnttrailzeros(size=32, src=2): 1 + * - cnttrailzeros(size=32, src=0): 32 + */ +int bignum_cnttrailzeros(bn_t n, int size) +{ + int i; + + require(size, "size must be greater than 0"); + require(size <= BN_BIT_SIZE, "size must be below bignum max size"); + + for (i = 0; i < size; i++) { + if (bignum_getbit(n, i)) + break; + } + + return i; +} + + + + +bn_t bignum_sdiv(bn_t a, bn_t b, int size) +{ + require(size, "size must be greater than 0"); + require(size <= BN_BIT_SIZE, "size must be below bignum max size"); + + int a_sign, b_sign; + bn_t c; + + a_sign = bignum_getbit(a, size - 1); + b_sign = bignum_getbit(b, size - 1); + + if (a_sign) { + /* neg a */ + printf("a neg\n"); + a = bignum_sub(bignum_from_int(0), a); + a = bignum_mask(a, size - 1); + } + + if (b_sign) { + /* neg b */ + printf("b neg\n"); + b = bignum_sub(bignum_from_int(0), b); + b = bignum_mask(b, size - 1); + } + + c = bignum_udiv(a, b); + if (a_sign ^ b_sign) { + c = bignum_sub(bignum_from_int(0), c); + } + + c = bignum_mask(c, size); + return c; +} + + + +bn_t bignum_smod(bn_t a, bn_t b, int size) +{ + require(size, "size must be greater than 0"); + require(size <= BN_BIT_SIZE, "size must be below bignum max size"); + + bn_t c; + + c = bignum_sdiv(a, b, size); + c = bignum_mul(c, b); + c = bignum_sub(a, c); + c = bignum_mask(c, size); + return c; +} diff --git a/miasm/jitter/bn.h b/miasm/jitter/bn.h new file mode 100644 index 00000000..1aa6b432 --- /dev/null +++ b/miasm/jitter/bn.h @@ -0,0 +1,163 @@ +#ifndef __BIGNUM_H__ +#define __BIGNUM_H__ + +#if _WIN32 +#define _MIASM_EXPORT __declspec(dllexport) +#else +#define _MIASM_EXPORT +#endif + +/* + +Big number library - arithmetic on multiple-precision unsigned integers. + +This library is an implementation of arithmetic on arbitrarily large integers. + +The difference between this and other implementations, is that the data structure +has optimal memory utilization (i.e. a 1024 bit integer takes up 128 bytes RAM), +and all memory is allocated statically: no dynamic allocation for better or worse. + +Primary goals are correctness, clarity of code and clean, portable implementation. +Secondary goal is a memory footprint small enough to make it suitable for use in +embedded applications. + + +The current state is correct functionality and adequate performance. +There may well be room for performance-optimizations and improvements. + +Source: https://github.com/kokke/tiny-bignum-c + +Code slightly modified to support ast generation calculus style from Expr. + +*/ + +#include +#include + + +/* This macro defines the word size in bytes of the array that constitues the big-number data structure. */ +#ifndef WORD_SIZE + #define WORD_SIZE 4 +#endif + +#define BN_BYTE_SIZE 32 + +#define BN_BIT_SIZE ((BN_BYTE_SIZE) * 8) + +/* Size of big-numbers in bytes */ +//#define BN_ARRAY_SIZE (128 / WORD_SIZE) +#define BN_ARRAY_SIZE (BN_BYTE_SIZE / WORD_SIZE) + + +/* Here comes the compile-time specialization for how large the underlying array size should be. */ +/* The choices are 1, 2 and 4 bytes in size with uint32, uint64 for WORD_SIZE==4, as temporary. */ +#ifndef WORD_SIZE + #error Must define WORD_SIZE to be 1, 2, 4 +#elif (WORD_SIZE == 1) + /* Data type of array in structure */ + #define DTYPE uint8_t + #define DTYPE_SIGNED int8_t + /* bitmask for getting MSB */ + #define DTYPE_MSB ((DTYPE_TMP)(0x80)) + /* Data-type larger than DTYPE, for holding intermediate results of calculations */ + #define DTYPE_TMP uint32_t + /* sprintf format string */ + #define SPRINTF_FORMAT_STR "%.02x" + #define SSCANF_FORMAT_STR "%2hhx" + /* Max value of integer type */ + #define MAX_VAL ((DTYPE_TMP)0xFF) +#elif (WORD_SIZE == 2) + #define DTYPE uint16_t + #define DTYPE_SIGNED int16_t + #define DTYPE_TMP uint32_t + #define DTYPE_MSB ((DTYPE_TMP)(0x8000)) + #define SPRINTF_FORMAT_STR "%.04x" + #define SSCANF_FORMAT_STR "%4hx" + #define MAX_VAL ((DTYPE_TMP)0xFFFF) +#elif (WORD_SIZE == 4) + #define DTYPE uint32_t + #define DTYPE_SIGNED int32_t + #define DTYPE_TMP uint64_t + #define DTYPE_MSB ((DTYPE_TMP)(0x80000000)) + #define SPRINTF_FORMAT_STR "%.08x" + #define SSCANF_FORMAT_STR "%8x" + #define MAX_VAL ((DTYPE_TMP)0xFFFFFFFF) +#endif +#ifndef DTYPE + #error DTYPE must be defined to uint8_t, uint16_t uint32_t or whatever +#endif + + +/* Custom assert macro - easy to disable */ +#define require(p, msg) assert(p && #msg) + + +/* Data-holding structure: array of DTYPEs */ +typedef struct bn +{ + DTYPE array[BN_ARRAY_SIZE]; +} bn_t; + + + +/* Tokens returned by bignum_cmp() for value comparison */ +enum { SMALLER = -1, EQUAL = 0, LARGER = 1 }; + +/* Initialization functions: */ +_MIASM_EXPORT bn_t bignum_init(void); +_MIASM_EXPORT bn_t bignum_from_int(DTYPE_TMP i); +_MIASM_EXPORT bn_t bignum_from_uint64(uint64_t i); +_MIASM_EXPORT int bignum_to_int(bn_t n); +_MIASM_EXPORT uint64_t bignum_to_uint64(bn_t n); +_MIASM_EXPORT bn_t bignum_from_string(char* str, int nbytes); +_MIASM_EXPORT void bignum_to_string(bn_t n, char* str, int maxsize); + + +/* Basic arithmetic operations: */ +_MIASM_EXPORT bn_t bignum_add(bn_t a, bn_t b); /* c = a + b */ +_MIASM_EXPORT bn_t bignum_sub(bn_t a, bn_t b); /* c = a - b */ +_MIASM_EXPORT bn_t bignum_mul(bn_t a, bn_t b); /* c = a * b */ +_MIASM_EXPORT bn_t bignum_udiv(bn_t a, bn_t b); /* c = a / b */ +_MIASM_EXPORT bn_t bignum_umod(bn_t a, bn_t b); /* c = a % b */ +_MIASM_EXPORT bn_t bignum_sdiv(bn_t a, bn_t b, int size); +_MIASM_EXPORT bn_t bignum_smod(bn_t a, bn_t b, int size); +//void bignum_udivmod(struct bn* a, struct bn* b, struct bn* c, struct bn* d); /* c = a/b, d = a%b */ + + + +/* Bitwise operations: */ +_MIASM_EXPORT bn_t bignum_and(bn_t a, bn_t b); /* c = a & b */ +_MIASM_EXPORT bn_t bignum_or(bn_t a, bn_t b); /* c = a | b */ +_MIASM_EXPORT bn_t bignum_xor(bn_t a, bn_t b); /* c = a ^ b */ +_MIASM_EXPORT bn_t bignum_lshift(bn_t a, int nbits); /* b = a << nbits */ +_MIASM_EXPORT bn_t bignum_rshift(bn_t a, int nbits); /* b = a >> nbits */ +_MIASM_EXPORT bn_t bignum_a_rshift(bn_t a, int size, int nbits); /* b = a a>> nbits */ +_MIASM_EXPORT bn_t bignum_not(bn_t a); /* c = ~a */ + +/* Special operators and comparison */ +_MIASM_EXPORT int bignum_cmp(bn_t a, bn_t b); /* Compare: returns LARGER, EQUAL or SMALLER */ +_MIASM_EXPORT int bignum_is_equal(bn_t a, bn_t b); /* Return 1 if a == b else 0 */ +_MIASM_EXPORT int bignum_is_inf_unsigned(bn_t a, bn_t b); /* Return 1 if a 1024 */ +//bn_t bignum_isqrt(bn_t a, bn_t b); /* Integer square root -- e.g. isqrt(5) => 2*/ +_MIASM_EXPORT int bignum_cntleadzeros(bn_t n, int size); +_MIASM_EXPORT int bignum_cnttrailzeros(bn_t n, int size); +_MIASM_EXPORT bn_t bignum_assign(bn_t src); /* Copy src into dst -- dst := src */ +_MIASM_EXPORT bn_t bignum_mask(bn_t src, int bits); /* c = src & ((1<address = %s; + return JIT_RET_EXCEPTION; + } + """ + + CODE_EXCEPTION_AT_INSTR = r""" + if (CPU_exception_flag_at_instr) { + %s = %s; + BlockDst->address = %s; + return JIT_RET_EXCEPTION; + } + """ + + CODE_RETURN_EXCEPTION = r""" + return JIT_RET_EXCEPTION; + """ + + CODE_RETURN_NO_EXCEPTION = r""" + %s: + %s = %s; + BlockDst->address = %s; + return JIT_RET_NO_EXCEPTION; + """ + + CODE_CPU_EXCEPTION_POST_INSTR = r""" + if (CPU_exception_flag) { + %s = DST_value; + BlockDst->address = DST_value; + return JIT_RET_EXCEPTION; + } + """ + + CODE_VM_EXCEPTION_POST_INSTR = r""" + check_memory_breakpoint(&(jitcpu->pyvm->vm_mngr)); + check_invalid_code_blocs(&(jitcpu->pyvm->vm_mngr)); + if (VM_exception_flag) { + %s = DST_value; + BlockDst->address = DST_value; + return JIT_RET_EXCEPTION; + } + """ + + CODE_INIT = r""" + int DST_case; + uint64_t DST_value; + vm_cpu_t* mycpu = (vm_cpu_t*)jitcpu->cpu; + + goto %s; + """ + + CODE_BAD_BLOCK = r""" + // Unknown mnemonic + CPU_exception_flag = EXCEPT_UNK_MNEMO; + """ + CODE_RETURN_EXCEPTION + + def __init__(self, ir_arch): + self.ir_arch = ir_arch + self.PC = self.ir_arch.pc + self.translator = TranslatorC(self.ir_arch.loc_db) + self.init_arch_C() + + def init_arch_C(self): + """Iinitialize jitter internals""" + self.id_to_c_id = {} + for reg in self.ir_arch.arch.regs.all_regs_ids: + self.id_to_c_id[reg] = ExprId('mycpu->%s' % reg, reg.size) + + self.C_PC = self.id_to_c(self.PC) + + def dst_to_c(self, src): + """Translate Expr @src into C code""" + if not isinstance(src, Expr): + src = ExprInt(src, self.PC.size) + return self.id_to_c(src) + + def patch_c_id(self, expr): + """Replace ExprId in @expr with corresponding C variables""" + return expr.replace_expr(self.id_to_c_id) + + def id_to_c(self, expr): + """Translate Expr @expr into corresponding C code""" + return self.translator.from_expr(self.patch_c_id(expr)) + + def add_label_index(self, dst2index, loc_key): + """Insert @lbl to the dictionary @dst2index with a uniq value + @dst2index: LocKey -> uniq value + @loc_key: LocKey instance""" + + if loc_key not in dst2index: + dst2index[loc_key] = len(dst2index) + + def assignblk_to_irbloc(self, instr, assignblk): + """ + Ensure IRDst is always set in the head @assignblk of the @instr + @instr: an instruction instance + @assignblk: Assignblk instance + """ + new_assignblk = dict(assignblk) + if self.ir_arch.IRDst not in assignblk: + offset = instr.offset + instr.l + loc_key = self.ir_arch.loc_db.get_or_create_offset_location(offset) + dst = ExprLoc(loc_key, self.ir_arch.IRDst.size) + new_assignblk[self.ir_arch.IRDst] = dst + irs = [AssignBlock(new_assignblk, instr)] + return IRBlock(self.ir_arch.get_loc_key_for_instr(instr), irs) + + def block2assignblks(self, block): + """ + Return the list of irblocks for a native @block + @block: AsmBlock + """ + irblocks_list = [] + for instr in block.lines: + assignblk_head, assignblks_extra = self.ir_arch.instr2ir(instr) + # Keep result in ordered list as first element is the assignblk head + # The remainings order is not really important + irblock_head = self.assignblk_to_irbloc(instr, assignblk_head) + irblocks = [irblock_head] + assignblks_extra + + # Simplify high level operators + out = [] + for irblock in irblocks: + new_irblock = self.ir_arch.irbloc_fix_regs_for_mode(irblock, self.ir_arch.attrib) + new_irblock = new_irblock.simplify(expr_simp_high_to_explicit)[1] + out.append(new_irblock) + irblocks = out + + for irblock in irblocks: + assert irblock.dst is not None + irblocks_list.append(irblocks) + + return irblocks_list + + def add_local_var(self, dst_var, dst_index, expr): + """ + Add local variable used to store temporay result + @dst_var: dictionary of Expr -> local_var_expr + @dst_index : dictionary of size -> local var count + @expr: Expression source + """ + size = expr.size + if size < 8: + size = 8 + if size not in dst_index: + raise RuntimeError("Unsupported operand size %s", size) + var_num = dst_index[size] + dst = ExprId("var_%.2d_%.2d" % (size, var_num), size) + dst_index[size] += 1 + dst_var[expr] = dst + return dst + + def get_mem_prefetch(self, assignblk): + """ + Generate temporary variables used to fetch memory used in the @assignblk + Return a dictionary: ExprMem -> temporary variable + @assignblk: AssignBlock instance + """ + mem_index = {8: 0, 16: 0, 32: 0, 64: 0, 128:0} + mem_var = {} + + # Prefetch memory read + for expr in assignblk.get_r(mem_read=True): + if not isinstance(expr, ExprMem): + continue + var_num = mem_index[expr.size] + mem_index[expr.size] += 1 + var = ExprId( + "prefetch_%.2d_%.2d" % (expr.size, var_num), expr.size + ) + mem_var[expr] = var + + # Generate memory prefetch + return mem_var + + def gen_c_assignments(self, assignblk): + """ + Return C information used to generate the C code of the @assignblk + @assignblk: an AssignBlock instance + """ + c_var = [] + c_main = [] + c_mem = [] + c_updt = [] + c_prefetch = [] + + dst_index = {8: 0, 16: 0, 32: 0, 64: 0, 128:0} + dst_var = {} + + prefetchers = self.get_mem_prefetch(assignblk) + + for expr, prefetcher in viewitems(prefetchers): + str_src = self.id_to_c(expr) + str_dst = self.id_to_c(prefetcher) + c_prefetch.append('%s = %s;' % (str_dst, str_src)) + + for var in viewvalues(prefetchers): + if var.size <= self.translator.NATIVE_INT_MAX_SIZE: + c_var.append("uint%d_t %s;" % (var.size, var)) + else: + c_var.append("bn_t %s; // %d" % (var, var.size)) + + for dst, src in viewitems(assignblk): + src = src.replace_expr(prefetchers) + if dst == self.ir_arch.IRDst: + pass + elif isinstance(dst, ExprId): + new_dst = self.add_local_var(dst_var, dst_index, dst) + if dst in self.ir_arch.arch.regs.regs_flt_expr: + # Don't mask float assignment + c_main.append( + '%s = (%s);' % (self.id_to_c(new_dst), self.id_to_c(src))) + elif new_dst.size <= self.translator.NATIVE_INT_MAX_SIZE: + c_main.append( + '%s = (%s)&%s;' % (self.id_to_c(new_dst), + self.id_to_c(src), + SIZE_TO_MASK[src.size])) + else: + c_main.append( + '%s = bignum_mask(%s, %d);' % ( + self.id_to_c(new_dst), + self.id_to_c(src), + src.size + ) + ) + elif isinstance(dst, ExprMem): + ptr = dst.ptr.replace_expr(prefetchers) + if ptr.size <= self.translator.NATIVE_INT_MAX_SIZE: + new_dst = ExprMem(ptr, dst.size) + str_dst = self.id_to_c(new_dst).replace('MEM_LOOKUP', 'MEM_WRITE') + c_mem.append('%s, %s);' % (str_dst[:-1], self.id_to_c(src))) + else: + ptr_str = self.id_to_c(ptr) + if ptr.size <= self.translator.NATIVE_INT_MAX_SIZE: + c_mem.append('%s, %s);' % (str_dst[:-1], self.id_to_c(src))) + else: + if src.size <= self.translator.NATIVE_INT_MAX_SIZE: + c_mem.append('MEM_WRITE_BN_INT(jitcpu, %d, %s, %s);' % ( + src.size, ptr_str, self.id_to_c(src)) + ) + else: + c_mem.append('MEM_WRITE_BN_BN(jitcpu, %d, %s, %s);' % ( + src.size, ptr_str, self.id_to_c(src)) + ) + else: + raise ValueError("Unknown dst") + + for dst, new_dst in viewitems(dst_var): + if dst == self.ir_arch.IRDst: + continue + + c_updt.append('%s = %s;' % (self.id_to_c(dst), self.id_to_c(new_dst))) + if dst.size <= self.translator.NATIVE_INT_MAX_SIZE: + c_var.append("uint%d_t %s;" % (new_dst.size, new_dst)) + else: + c_var.append("bn_t %s; // %d" % (new_dst, new_dst.size)) + + return c_prefetch, c_var, c_main, c_mem, c_updt + + def gen_check_memory_exception(self, address): + """Generate C code to check memory exceptions + @address: address of the faulty instruction""" + dst = self.dst_to_c(address) + return (self.CODE_EXCEPTION_MEM_AT_INSTR % (self.C_PC, dst, dst)).split('\n') + + def gen_check_cpu_exception(self, address): + """Generate C code to check cpu exceptions + @address: address of the faulty instruction""" + dst = self.dst_to_c(address) + return (self.CODE_EXCEPTION_AT_INSTR % (self.C_PC, dst, dst)).split('\n') + + def traverse_expr_dst(self, expr, dst2index): + """ + Generate the index of the destination label for the @expr + @dst2index: dictionary to link label to its index + """ + + if isinstance(expr, ExprCond): + src1, src1b = self.traverse_expr_dst(expr.src1, dst2index) + src2, src2b = self.traverse_expr_dst(expr.src2, dst2index) + cond = self.id_to_c(expr.cond) + if not expr.cond.size <= self.translator.NATIVE_INT_MAX_SIZE: + cond = "(!bignum_is_zero(%s))" % cond + + return ("((%s)?(%s):(%s))" % (cond, src1, src2), + "((%s)?(%s):(%s))" % (cond, src1b, src2b)) + if isinstance(expr, ExprInt): + offset = int(expr) + loc_key = self.ir_arch.loc_db.get_or_create_offset_location(offset) + self.add_label_index(dst2index, loc_key) + out = hex(offset) + return ("%s" % dst2index[loc_key], out) + if expr.is_loc(): + loc_key = expr.loc_key + offset = self.ir_arch.loc_db.get_location_offset(expr.loc_key) + if offset is not None: + self.add_label_index(dst2index, loc_key) + out = hex(offset) + return ("%s" % dst2index[loc_key], out) + self.add_label_index(dst2index, loc_key) + out = hex(0) + return ("%s" % dst2index[loc_key], out) + dst2index[expr] = -1 + return ("-1", self.id_to_c(expr)) + + def gen_assignblk_dst(self, dst): + """Generate C code to handle instruction destination + @dst: instruction destination Expr""" + dst2index = {} + (ret, retb) = self.traverse_expr_dst(dst, dst2index) + ret = "DST_case = %s;" % ret + retb = 'DST_value = %s;' % retb + return ['// %s' % dst2index, + '%s' % ret, + '%s' % retb], dst2index + + def gen_post_instr_checks(self, attrib): + """Generate C code for handling potential exceptions + @attrib: Attributes instance""" + out = [] + if attrib.mem_read | attrib.mem_write: + out += (self.CODE_VM_EXCEPTION_POST_INSTR % (self.C_PC)).split('\n') + if attrib.set_exception: + out += (self.CODE_CPU_EXCEPTION_POST_INSTR % (self.C_PC)).split('\n') + + if attrib.mem_read | attrib.mem_write: + out.append("reset_memory_access(&(jitcpu->pyvm->vm_mngr));") + + return out + + def gen_pre_code(self, instr_attrib): + """Callback to generate code BEFORE the instruction execution + @instr_attrib: Attributes instance""" + + out = [] + + if instr_attrib.log_mn: + out.append( + 'printf("%.8X %s\\n");' % ( + instr_attrib.instr.offset, + instr_attrib.instr.to_string(self.ir_arch.loc_db) + ) + ) + return out + + def gen_post_code(self, attrib, pc_value): + """Callback to generate code AFTER the instruction execution + @attrib: Attributes instance""" + out = [] + if attrib.log_regs: + # Update PC for dump_gpregs + out.append("%s = %s;" % (self.C_PC, pc_value)) + out.append('dump_gpregs(jitcpu->cpu);') + return out + + def gen_goto_code(self, attrib, instr_offsets, dst): + """Generate C code for a potential destination @dst + @attrib: instruction Attributes + @instr_offsets: instructions offsets list + @dst: potential instruction destination""" + + out = [] + if isinstance(dst, Expr): + out += self.gen_post_code(attrib, "DST_value") + out.append('BlockDst->address = DST_value;') + out += self.gen_post_instr_checks(attrib) + out.append('\t\treturn JIT_RET_NO_EXCEPTION;') + return out + + assert isinstance(dst, LocKey) + offset = self.ir_arch.loc_db.get_location_offset(dst) + if offset is None: + # Generate goto for local labels + return ['goto %s;' % dst] + if (offset > attrib.instr.offset and + offset in instr_offsets): + # Only generate goto for next instructions. + # (consecutive instructions) + out += self.gen_post_code(attrib, "0x%x" % offset) + out += self.gen_post_instr_checks(attrib) + out.append('goto %s;' % dst) + else: + out += self.gen_post_code(attrib, "0x%x" % offset) + out.append('BlockDst->address = DST_value;') + out += self.gen_post_instr_checks(attrib) + out.append('\t\treturn JIT_RET_NO_EXCEPTION;') + return out + + def gen_dst_goto(self, attrib, instr_offsets, dst2index): + """ + Generate code for possible @dst2index. + + @attrib: an Attributes instance + @instr_offsets: list of instructions offsets + @dst2index: link from destination to index + """ + + if not dst2index: + return [] + out = [] + out.append('switch(DST_case) {') + + stopcase = False + for dst, index in sorted(viewitems(dst2index), key=lambda lblindex: lblindex[1]): + if index == -1: + # Handle '-1' case only once + if not stopcase: + stopcase = True + else: + continue + + out.append('\tcase %d:' % index) + + out += self.gen_goto_code(attrib, instr_offsets, dst) + out.append('\t\tbreak;') + out.append('};') + return out + + def gen_c_code(self, attrib, c_dst, c_assignmnts): + """ + Generate the C code for assignblk. + @attrib: Attributes instance + @c_dst: irdst C code + """ + + c_prefetch, c_var, c_main, c_mem, c_updt = c_assignmnts + out = [] + out.append("{") + out.append("// var") + out += c_var + out.append("// Prefetch") + out += c_prefetch + out.append("// Dst") + out += c_dst + out.append("// Main") + out += c_main + + out.append("// Check op/mem exceptions") + + # Check memory access if assignblk has memory read + if c_prefetch: + out += self.gen_check_memory_exception(attrib.instr.offset) + + out.append("// Mem updt") + out += c_mem + + out.append("// Check exception Mem write") + # Check memory write exceptions + if attrib.mem_write: + out += self.gen_check_memory_exception(attrib.instr.offset) + + out.append("// Updt") + out += c_updt + + out.append("// Checks exception") + + # Check post assignblk exception flags + if attrib.set_exception: + out += self.gen_check_cpu_exception(attrib.instr.offset) + + out.append("}") + + return out + + def get_caracteristics(self, assignblk, attrib): + """ + Set the carateristics in @attrib according to the @assignblk + @assignblk: an AssignBlock instance + @attrib: an Attributes instance + """ + + # Check explicit exception raising + attrib.set_exception = self.ir_arch.arch.regs.exception_flags in assignblk + + element_read = assignblk.get_r(mem_read=True) + # Check mem read + attrib.mem_read = any(isinstance(expr, ExprMem) + for expr in element_read) + # Check mem write + attrib.mem_write = any(isinstance(dst, ExprMem) + for dst in assignblk) + + def get_attributes(self, instr, irblocks, log_mn=False, log_regs=False): + """ + Get the carateristics of each @irblocks. Returns the corresponding + attributes object. + @irblock: a list of irbloc instance + @log_mn: generate code to log instructions + @log_regs: generate code to log registers states + """ + + instr_attrib = Attributes(log_mn, log_regs) + instr_attrib.instr = instr + irblocks_attributes = [] + + for irblock in irblocks: + attributes = [] + irblocks_attributes.append(attributes) + for assignblk in irblock: + attrib = Attributes(log_mn, log_regs) + attributes.append(attrib) + self.get_caracteristics(assignblk, attrib) + attrib.instr = instr + instr_attrib.mem_read |= attrib.mem_read + instr_attrib.mem_write |= attrib.mem_write + instr_attrib.set_exception |= attrib.set_exception + + return instr_attrib, irblocks_attributes + + def gen_bad_block(self): + """ + Generate the C code for a bad_block instance + """ + return self.CODE_BAD_BLOCK.split("\n") + + def get_block_post_label(self, block): + """Get label next to the @block + @block: AsmBlock instance""" + + last_instr = block.lines[-1] + offset = last_instr.offset + last_instr.l + return self.ir_arch.loc_db.get_or_create_offset_location(offset) + + def gen_init(self, block): + """ + Generate the init C code for a @block + @block: an asm_bloc instance + """ + + instr_offsets = [line.offset for line in block.lines] + post_label = self.get_block_post_label(block) + post_offset = self.ir_arch.loc_db.get_location_offset(post_label) + instr_offsets.append(post_offset) + lbl_start = block.loc_key + return (self.CODE_INIT % lbl_start).split("\n"), instr_offsets + + def gen_irblock(self, instr_attrib, attributes, instr_offsets, irblock): + """ + Generate the C code for an @irblock + @irblock: an irbloc instance + @attributes: an Attributes instance list + """ + + out = [] + dst2index = None + for index, assignblk in enumerate(irblock): + if index == irblock.dst_linenb: + c_dst, dst2index = self.gen_assignblk_dst(irblock.dst) + else: + c_dst = [] + + c_assignmnts = self.gen_c_assignments(assignblk) + out += self.gen_c_code(attributes[index], c_dst, c_assignmnts) + + if dst2index: + out.append("// Set irdst") + # Gen goto on irdst set + out += self.gen_dst_goto(instr_attrib, instr_offsets, dst2index) + + return out + + def gen_finalize(self, block): + """ + Generate the C code for the final block instruction + """ + + loc_key = self.get_block_post_label(block) + offset = self.ir_arch.loc_db.get_location_offset(loc_key) + dst = self.dst_to_c(offset) + code = self.CODE_RETURN_NO_EXCEPTION % (loc_key, self.C_PC, dst, dst) + return code.split('\n') + + def gen_c(self, block, log_mn=False, log_regs=False): + """ + Generate the C code for the @block and return it as a list of lines + @log_mn: log mnemonics + @log_regs: log registers + """ + + if isinstance(block, AsmBlockBad): + return self.gen_bad_block() + irblocks_list = self.block2assignblks(block) + out, instr_offsets = self.gen_init(block) + assert len(block.lines) == len(irblocks_list) + for instr, irblocks in zip(block.lines, irblocks_list): + instr_attrib, irblocks_attributes = self.get_attributes(instr, irblocks, log_mn, log_regs) + for index, irblock in enumerate(irblocks): + label = str(irblock.loc_key) + out.append("%-40s // %.16X %s" % + (label + ":", instr.offset, instr)) + if index == 0: + out += self.gen_pre_code(instr_attrib) + out += self.gen_irblock(instr_attrib, irblocks_attributes[index], instr_offsets, irblock) + + out += self.gen_finalize(block) + + return ['\t' + line for line in out] diff --git a/miasm/jitter/compat_py23.h b/miasm/jitter/compat_py23.h new file mode 100644 index 00000000..bc66d80b --- /dev/null +++ b/miasm/jitter/compat_py23.h @@ -0,0 +1,87 @@ +#ifndef __COMPAT_PY23_H__ +#define __COMPAT_PY23_H__ + + + +#if PY_MAJOR_VERSION >= 3 +#define PyGetInt(item, value) \ + if (PyLong_Check(item)){ \ + value = (uint64_t)PyLong_AsUnsignedLongLong(item); \ + } \ + else{ \ + RAISE(PyExc_TypeError,"arg must be int"); \ + } + + +#define PyGetInt_retneg(item, value) \ + if (PyLong_Check(item)){ \ + value = (uint64_t)PyLong_AsUnsignedLongLong(item); \ + } \ + else{ \ + PyErr_SetString(PyExc_TypeError, "Arg must be int"); \ + return -1; \ + } + +#define PyGetStr(dest, name) \ + if (!PyUnicode_Check((name))) \ + RAISE(PyExc_TypeError,"Page name must be bytes"); \ + (dest) = PyUnicode_AsUTF8((name)) + + + +#else +#define PyGetInt(item, value) \ + if (PyInt_Check(item)){ \ + value = (uint64_t)PyInt_AsLong(item); \ + } \ + else if (PyLong_Check(item)){ \ + value = (uint64_t)PyLong_AsUnsignedLongLong(item); \ + } \ + else{ \ + RAISE(PyExc_TypeError,"arg must be int"); \ + } + + +#define PyGetInt_retneg(item, value) \ + if (PyInt_Check(item)){ \ + value = (uint64_t)PyLong_AsLong(item); \ + } \ + else if (PyLong_Check(item)){ \ + value = (uint64_t)PyLong_AsUnsignedLongLong(item); \ + } \ + else{ \ + PyErr_SetString(PyExc_TypeError, "Arg must be int"); \ + return -1; \ + } \ + + +#define PyGetStr(dest, name) \ + if (!PyString_Check((name))) \ + RAISE(PyExc_TypeError,"Page name must be bytes"); \ + (dest) = PyString_AsString((name)) + +#endif + + + +#if PY_MAJOR_VERSION >= 3 + +#define MOD_INIT(name) PyMODINIT_FUNC PyInit_##name(void) + +#define MOD_DEF(ob, name, doc, methods) \ + static struct PyModuleDef moduledef = { \ + PyModuleDef_HEAD_INIT, name, doc, -1, methods, }; \ + ob = PyModule_Create(&moduledef); +#else + +#define MOD_INIT(name) PyMODINIT_FUNC init##name(void) + +#define MOD_DEF(ob, name, doc, methods) \ + ob = Py_InitModule3(name, methods, doc); +#endif + + + + + +#endif diff --git a/miasm/jitter/csts.py b/miasm/jitter/csts.py new file mode 100644 index 00000000..9c9919fc --- /dev/null +++ b/miasm/jitter/csts.py @@ -0,0 +1,30 @@ +#-*- coding:utf-8 -*- + + +# VM Mngr Exceptions +EXCEPT_DO_NOT_UPDATE_PC = 1 << 25 +EXCEPT_NUM_UPDT_EIP = (1<<11) + +EXCEPT_CODE_AUTOMOD = (1 << 0) +EXCEPT_SOFT_BP = (1 << 1) +EXCEPT_INT_XX = (1 << 2) +EXCEPT_SPR_ACCESS = (1 << 3) +EXCEPT_BREAKPOINT_MEMORY = (1 << 10) +# Deprecated +EXCEPT_BREAKPOINT_INTERN = EXCEPT_BREAKPOINT_MEMORY + +EXCEPT_ACCESS_VIOL = ((1 << 14) | EXCEPT_DO_NOT_UPDATE_PC) +EXCEPT_DIV_BY_ZERO = ((1 << 16) | EXCEPT_DO_NOT_UPDATE_PC) +EXCEPT_PRIV_INSN = ((1 << 17) | EXCEPT_DO_NOT_UPDATE_PC) +EXCEPT_ILLEGAL_INSN = ((1 << 18) | EXCEPT_DO_NOT_UPDATE_PC) +EXCEPT_UNK_MNEMO = ((1 << 19) | EXCEPT_DO_NOT_UPDATE_PC) + +# VM Mngr constants + +PAGE_READ = 1 +PAGE_WRITE = 2 +PAGE_EXEC = 4 + +BREAKPOINT_READ = 1 +BREAKPOINT_WRITE = 2 + diff --git a/miasm/jitter/emulatedsymbexec.py b/miasm/jitter/emulatedsymbexec.py new file mode 100644 index 00000000..4355c0b9 --- /dev/null +++ b/miasm/jitter/emulatedsymbexec.py @@ -0,0 +1,140 @@ +from miasm.core.utils import decode_hex, encode_hex +import miasm.expression.expression as m2_expr +from miasm.ir.symbexec import SymbolicExecutionEngine + + +class EmulatedSymbExec(SymbolicExecutionEngine): + """Symbolic exec instance linked with a jitter""" + + x86_cpuid = { + 0: { + 0: 0xa, + 1: 0x756E6547, + 2: 0x6C65746E, + 3: 0x49656E69, + }, + 1: { + 0: 0x00020652, + 1: 0x00000800, + 2: 0x00000209, + 3: 0x078bf9ff + }, + } + + def __init__(self, cpu, vm, *args, **kwargs): + """Instantiate an EmulatedSymbExec, associated to CPU @cpu and bind + memory accesses. + @cpu: JitCpu instance + """ + super(EmulatedSymbExec, self).__init__(*args, **kwargs) + self.cpu = cpu + self.vm = vm + + def reset_regs(self): + """Set registers value to 0. Ignore register aliases""" + for reg in self.ir_arch.arch.regs.all_regs_ids_no_alias: + self.symbols.symbols_id[reg] = m2_expr.ExprInt(0, size=reg.size) + + # Memory management + def mem_read(self, expr_mem): + """Memory read wrapper for symbolic execution + @expr_mem: ExprMem""" + + addr = expr_mem.ptr + if not addr.is_int(): + return super(EmulatedSymbExec, self).mem_read(expr_mem) + addr = int(addr) + size = expr_mem.size // 8 + value = self.cpu.get_mem(addr, size) + if self.vm.is_little_endian(): + value = value[::-1] + self.vm.add_mem_read(addr, size) + + return m2_expr.ExprInt( + int(encode_hex(value), 16), + expr_mem.size + ) + + def mem_write(self, dest, data): + """Memory read wrapper for symbolic execution + @dest: ExprMem instance + @data: Expr instance""" + + # Get the content to write + data = self.expr_simp(data) + if not isinstance(data, m2_expr.ExprInt): + raise RuntimeError("A simplification is missing: %s" % data) + to_write = data.arg.arg + + # Format information + addr = dest.ptr.arg.arg + size = data.size // 8 + content = hex(to_write).replace("0x", "").replace("L", "") + content = "0" * (size * 2 - len(content)) + content + content = decode_hex(content) + + if self.vm.is_little_endian(): + content = content[::-1] + + # Write in VmMngr context + self.cpu.set_mem(addr, content) + self.vm.add_mem_write(addr, len(content)) + + # Interaction symbexec <-> jitter + def update_cpu_from_engine(self): + """Updates @cpu instance according to new CPU values""" + + for symbol in self.symbols: + if isinstance(symbol, m2_expr.ExprId): + if hasattr(self.cpu, symbol.name): + value = self.symbols.symbols_id[symbol] + if not isinstance(value, m2_expr.ExprInt): + raise ValueError("A simplification is missing: %s" % value) + + setattr(self.cpu, symbol.name, value.arg.arg) + else: + raise NotImplementedError("Type not handled: %s" % symbol) + + + def update_engine_from_cpu(self): + """Updates CPU values according to @cpu instance""" + + for symbol in self.symbols: + if isinstance(symbol, m2_expr.ExprId): + if hasattr(self.cpu, symbol.name): + value = m2_expr.ExprInt(getattr(self.cpu, symbol.name), + symbol.size) + self.symbols.symbols_id[symbol] = value + else: + raise NotImplementedError("Type not handled: %s" % symbol) + + # CPU specific simplifications + def _simp_handle_segm(self, e_s, expr): + """Handle 'segm' operation""" + if not expr.is_op_segm(): + return expr + if not expr.args[0].is_int(): + return expr + segm_nb = int(expr.args[0]) + segmaddr = self.cpu.get_segm_base(segm_nb) + return e_s(m2_expr.ExprInt(segmaddr, expr.size) + expr.args[1]) + + def _simp_handle_x86_cpuid(self, e_s, expr): + """From miasm/jitter/op_semantics.h: x86_cpuid""" + if expr.op != "x86_cpuid": + return expr + + if any(not arg.is_int() for arg in expr.args): + return expr + a, reg_num = (int(arg) for arg in expr.args) + + # Not found error is keeped on purpose + return m2_expr.ExprInt(self.x86_cpuid[a][reg_num], expr.size) + + def enable_emulated_simplifications(self): + """Enable simplifications needing a CPU instance on associated + ExpressionSimplifier + """ + self.expr_simp.enable_passes({ + m2_expr.ExprOp: [self._simp_handle_segm, self._simp_handle_x86_cpuid], + }) diff --git a/miasm/jitter/jitcore.py b/miasm/jitter/jitcore.py new file mode 100644 index 00000000..ebda656f --- /dev/null +++ b/miasm/jitter/jitcore.py @@ -0,0 +1,309 @@ +from __future__ import print_function +# +# Copyright (C) 2011 EADS France, Fabrice Desclaux +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +from hashlib import md5 +import warnings + +from future.utils import viewvalues + +from miasm.core.asmblock import disasmEngine, AsmBlockBad +from miasm.core.interval import interval +from miasm.core.utils import BoundedDict +from miasm.expression.expression import LocKey +from miasm.jitter.csts import * + +class JitCore(object): + + "JiT management. This is an abstract class" + + # Jitted function's name + FUNCNAME = "block_entry" + + jitted_block_delete_cb = None + jitted_block_max_size = 10000 + + def __init__(self, ir_arch, bin_stream): + """Initialise a JitCore instance. + @ir_arch: ir instance for current architecture + @bin_stream: bin_stream instance + """ + # Arch related + self.ir_arch = ir_arch + self.ircfg = self.ir_arch.new_ircfg() + self.arch_name = "%s%s" % (self.ir_arch.arch.name, self.ir_arch.attrib) + + # Structures for block tracking + self.offset_to_jitted_func = BoundedDict(self.jitted_block_max_size, + delete_cb=self.jitted_block_delete_cb) + self.loc_key_to_block = {} + self.blocks_mem_interval = interval() + + # Logging & options + self.log_mn = False + self.log_regs = False + self.log_newbloc = False + self.options = {"jit_maxline": 50, # Maximum number of line jitted + "max_exec_per_call": 0 # 0 means no limit + } + + # Disassembly Engine + self.split_dis = set() + self.mdis = disasmEngine( + ir_arch.arch, ir_arch.attrib, bin_stream, + lines_wd=self.options["jit_maxline"], + loc_db=ir_arch.loc_db, + follow_call=False, + dontdis_retcall=False, + split_dis=self.split_dis, + ) + + + def set_options(self, **kwargs): + "Set options relative to the backend" + self.options.update(kwargs) + + def clear_jitted_blocks(self): + "Reset all jitted blocks" + self.offset_to_jitted_func.clear() + self.loc_key_to_block.clear() + self.blocks_mem_interval = interval() + + def add_disassembly_splits(self, *args): + """The disassembly engine will stop on address in args if they + are not at the block beginning""" + self.split_dis.update(set(args)) + + def remove_disassembly_splits(self, *args): + """The disassembly engine will no longer stop on address in args""" + self.split_dis.difference_update(set(args)) + + def load(self): + "Initialise the Jitter" + raise NotImplementedError("Abstract class") + + def set_block_min_max(self, cur_block): + "Update cur_block to set min/max address" + + if cur_block.lines: + cur_block.ad_min = cur_block.lines[0].offset + cur_block.ad_max = cur_block.lines[-1].offset + cur_block.lines[-1].l + else: + # 1 byte block for unknown mnemonic + offset = ir_arch.loc_db.get_location_offset(cur_block.loc_key) + cur_block.ad_min = offset + cur_block.ad_max = offset+1 + + + def add_block_to_mem_interval(self, vm, block): + "Update vm to include block addresses in its memory range" + self.blocks_mem_interval += interval([(block.ad_min, block.ad_max - 1)]) + + vm.reset_code_bloc_pool() + for a, b in self.blocks_mem_interval: + vm.add_code_bloc(a, b + 1) + + def add_block(self, block): + """Add a block to JiT and JiT it. + @block: asm_bloc to add + """ + raise NotImplementedError("Abstract class") + + def disasm_and_jit_block(self, addr, vm): + """Disassemble a new block and JiT it + @addr: address of the block to disassemble (LocKey or int) + @vm: VmMngr instance + """ + + # Get the block + if isinstance(addr, LocKey): + addr = self.ir_arch.loc_db.get_location_offset(addr) + if addr is None: + raise RuntimeError("Unknown offset for LocKey") + + # Prepare disassembler + self.mdis.lines_wd = self.options["jit_maxline"] + + # Disassemble it + cur_block = self.mdis.dis_block(addr) + if isinstance(cur_block, AsmBlockBad): + return cur_block + # Logging + if self.log_newbloc: + print(cur_block.to_string(self.mdis.loc_db)) + + # Update label -> block + self.loc_key_to_block[cur_block.loc_key] = cur_block + + # Store min/max block address needed in jit automod code + self.set_block_min_max(cur_block) + + # JiT it + self.add_block(cur_block) + + # Update jitcode mem range + self.add_block_to_mem_interval(vm, cur_block) + return cur_block + + def run_at(self, cpu, offset, stop_offsets): + """Run from the starting address @offset. + Execution will stop if: + - max_exec_per_call option is reached + - a new, yet unknown, block is reached after the execution of block at + address @offset + - an address in @stop_offsets is reached + @cpu: JitCpu instance + @offset: starting address (int) + @stop_offsets: set of address on which the jitter must stop + """ + + if offset is None: + offset = getattr(cpu, self.ir_arch.pc.name) + + if offset not in self.offset_to_jitted_func: + # Need to JiT the block + cur_block = self.disasm_and_jit_block(offset, cpu.vmmngr) + if isinstance(cur_block, AsmBlockBad): + errno = cur_block.errno + if errno == AsmBlockBad.ERROR_IO: + cpu.vmmngr.set_exception(EXCEPT_ACCESS_VIOL) + elif errno == AsmBlockBad.ERROR_CANNOT_DISASM: + cpu.set_exception(EXCEPT_UNK_MNEMO) + else: + raise RuntimeError("Unhandled disasm result %r" % errno) + return offset + + # Run the block and update cpu/vmmngr state + return self.exec_wrapper(offset, cpu, self.offset_to_jitted_func.data, + stop_offsets, + self.options["max_exec_per_call"]) + + def blocks_to_memrange(self, blocks): + """Return an interval instance standing for blocks addresses + @blocks: list of AsmBlock instances + """ + + mem_range = interval() + + for block in blocks: + mem_range += interval([(block.ad_min, block.ad_max - 1)]) + + return mem_range + + def __updt_jitcode_mem_range(self, vm): + """Rebuild the VM blocks address memory range + @vm: VmMngr instance + """ + + # Reset the current pool + vm.reset_code_bloc_pool() + + # Add blocks in the pool + for start, stop in self.blocks_mem_interval: + vm.add_code_bloc(start, stop + 1) + + def del_block_in_range(self, ad1, ad2): + """Find and remove jitted block in range [ad1, ad2]. + Return the list of block removed. + @ad1: First address + @ad2: Last address + """ + + # Find concerned blocks + modified_blocks = set() + for block in viewvalues(self.loc_key_to_block): + if not block.lines: + continue + if block.ad_max <= ad1 or block.ad_min >= ad2: + # Block not modified + pass + else: + # Modified blocks + modified_blocks.add(block) + + # Generate interval to delete + del_interval = self.blocks_to_memrange(modified_blocks) + + # Remove interval from monitored interval list + self.blocks_mem_interval -= del_interval + + # Remove modified blocks + for block in modified_blocks: + try: + for irblock in block.blocks: + # Remove offset -> jitted block link + offset = self.ir_arch.loc_db.get_location_offset(irblock.loc_key) + if offset in self.offset_to_jitted_func: + del(self.offset_to_jitted_func[offset]) + + except AttributeError: + # The block has never been translated in IR + offset = self.ir_arch.loc_db.get_location_offset(block.loc_key) + if offset in self.offset_to_jitted_func: + del(self.offset_to_jitted_func[offset]) + + # Remove label -> block link + del(self.loc_key_to_block[block.loc_key]) + + return modified_blocks + + def updt_automod_code_range(self, vm, mem_range): + """Remove jitted code in range @mem_range + @vm: VmMngr instance + @mem_range: list of start/stop addresses + """ + for addr_start, addr_stop in mem_range: + self.del_block_in_range(addr_start, addr_stop) + self.__updt_jitcode_mem_range(vm) + vm.reset_memory_access() + + def updt_automod_code(self, vm): + """Remove jitted code updated by memory write + @vm: VmMngr instance + """ + mem_range = [] + for addr_start, addr_stop in vm.get_memory_write(): + mem_range.append((addr_start, addr_stop)) + self.updt_automod_code_range(vm, mem_range) + + def hash_block(self, block): + """ + Build a hash of the block @block + @block: asmblock + """ + block_raw = b"".join(line.b for line in block.lines) + offset = self.ir_arch.loc_db.get_location_offset(block.loc_key) + block_hash = md5( + b"%X_%s_%s_%s_%s" % ( + offset, + self.arch_name.encode(), + b'\x01' if self.log_mn else b'\x00', + b'\x01' if self.log_regs else b'\x00', + block_raw + ) + ).hexdigest() + return block_hash + + @property + def disasm_cb(self): + warnings.warn("Deprecated API: use .mdis.dis_block_callback") + return self.mdis.dis_block_callback + + @disasm_cb.setter + def disasm_cb(self, value): + warnings.warn("Deprecated API: use .mdis.dis_block_callback") + self.mdis.dis_block_callback = value diff --git a/miasm/jitter/jitcore_cc_base.py b/miasm/jitter/jitcore_cc_base.py new file mode 100644 index 00000000..4ec0e358 --- /dev/null +++ b/miasm/jitter/jitcore_cc_base.py @@ -0,0 +1,121 @@ +#-*- coding:utf-8 -*- + +import os +import tempfile +import platform +import sysconfig +from distutils.sysconfig import get_python_inc + +from miasm.jitter.jitcore import JitCore +from miasm.core.utils import keydefaultdict + +is_win = platform.system() == "Windows" + +def gen_core(arch, attrib): + lib_dir = os.path.dirname(os.path.realpath(__file__)) + + txt = "" + txt += '#include "%s/queue.h"\n' % lib_dir + txt += '#include "%s/op_semantics.h"\n' % lib_dir + txt += '#include "%s/vm_mngr.h"\n' % lib_dir + txt += '#include "%s/vm_mngr_py.h"\n' % lib_dir + txt += '#include "%s/bn.h"\n' % lib_dir + txt += '#include "%s/JitCore.h"\n' % lib_dir + txt += '#include "%s/arch/JitCore_%s.h"\n' % (lib_dir, arch.name) + + txt += r''' +#define RAISE(errtype, msg) {PyObject* p; p = PyErr_Format( errtype, msg ); return p;} +''' + return txt + + +class myresolver(object): + + def __init__(self, offset): + self.offset = offset + + def ret(self): + return "return PyLong_FromUnsignedLongLong(0x%X);" % self.offset + + +class resolver(object): + + def __init__(self): + self.resolvers = keydefaultdict(myresolver) + + def get_resolver(self, offset): + return self.resolvers[offset] + + +class JitCore_Cc_Base(JitCore): + "JiT management, abstract class using a C compiler as backend" + + def __init__(self, ir_arch, bin_stream): + self.jitted_block_delete_cb = self.deleteCB + super(JitCore_Cc_Base, self).__init__(ir_arch, bin_stream) + self.resolver = resolver() + self.ir_arch = ir_arch + self.states = {} + self.tempdir = os.path.join(tempfile.gettempdir(), "miasm_cache") + try: + os.mkdir(self.tempdir, 0o755) + except OSError: + pass + if not os.access(self.tempdir, os.R_OK | os.W_OK): + raise RuntimeError( + 'Cannot access cache directory %s ' % self.tempdir) + self.exec_wrapper = None + self.libs = None + self.include_files = None + + def deleteCB(self, offset): + raise NotImplementedError() + + def load(self): + lib_dir = os.path.dirname(os.path.realpath(__file__)) + ext = sysconfig.get_config_var('EXT_SUFFIX') + if ext is None: + ext = ".so" if not is_win else ".lib" + + libs = [ + os.path.join(lib_dir, "VmMngr" + ext), + os.path.join( + lib_dir, + "arch", + "JitCore_%s%s" % (self.ir_arch.arch.name, ext) + ) + ] + + include_files = [ + os.path.dirname(__file__), + get_python_inc() + ] + self.include_files = include_files + self.libs = libs + + def init_codegen(self, codegen): + """ + Get the code generator @codegen + @codegen: an CGen instance + """ + self.codegen = codegen + + def gen_c_code(self, block): + """ + Return the C code corresponding to the @irblocks + @irblocks: list of irblocks + """ + f_declaration = '_MIASM_EXPORT int %s(block_id * BlockDst, JitCpu* jitcpu)' % self.FUNCNAME + out = self.codegen.gen_c( + block, + log_mn=self.log_mn, + log_regs=self.log_regs + ) + out = [f_declaration + '{'] + out + ['}\n'] + c_code = out + + return self.gen_C_source(self.ir_arch, c_code) + + @staticmethod + def gen_C_source(ir_arch, func_code): + raise NotImplementedError() diff --git a/miasm/jitter/jitcore_gcc.py b/miasm/jitter/jitcore_gcc.py new file mode 100644 index 00000000..292143a2 --- /dev/null +++ b/miasm/jitter/jitcore_gcc.py @@ -0,0 +1,141 @@ +#-*- coding:utf-8 -*- + +import os +import tempfile +import ctypes +import _ctypes +import platform +import sysconfig +from subprocess import check_call +from distutils.sysconfig import get_python_inc +from miasm.jitter import Jitgcc +from miasm.jitter.jitcore_cc_base import JitCore_Cc_Base, gen_core + +is_win = platform.system() == "Windows" + +class JitCore_Gcc(JitCore_Cc_Base): + "JiT management, using a C compiler as backend" + + def __init__(self, ir_arch, bin_stream): + super(JitCore_Gcc, self).__init__(ir_arch, bin_stream) + self.exec_wrapper = Jitgcc.gcc_exec_block + + def deleteCB(self, offset): + """Free the state associated to @offset and delete it + @offset: gcc state offset + """ + flib = None + if is_win: + flib = _ctypes.FreeLibrary + else: + flib = _ctypes.dlclose + flib(self.states[offset]._handle) + del self.states[offset] + + def load_code(self, label, fname_so): + lib = ctypes.cdll.LoadLibrary(fname_so) + func = getattr(lib, self.FUNCNAME) + addr = ctypes.cast(func, ctypes.c_void_p).value + offset = self.ir_arch.loc_db.get_location_offset(label) + self.offset_to_jitted_func[offset] = addr + self.states[offset] = lib + + def add_block(self, block): + """Add a bloc to JiT and JiT it. + @block: block to jit + """ + block_hash = self.hash_block(block) + ext = sysconfig.get_config_var('EXT_SUFFIX') + if ext is None: + ext = ".so" if not is_win else ".pyd" + fname_out = os.path.join(self.tempdir, "%s%s" % (block_hash, ext)) + + if not os.access(fname_out, os.R_OK | os.X_OK): + func_code = self.gen_c_code(block) + + # Create unique C file + fdesc, fname_in = tempfile.mkstemp(suffix=".c") + os.write(fdesc, func_code.encode()) + os.close(fdesc) + + # Create unique SO file + fdesc, fname_tmp = tempfile.mkstemp(suffix=ext) + os.close(fdesc) + + inc_dir = ["-I%s" % inc for inc in self.include_files] + libs = ["%s" % lib for lib in self.libs] + if is_win: + libs.append( + os.path.join( + get_python_inc(), + "..", + "libs", + "python27.lib" + ) + ) + cl = [ + "cl", "/nologo", "/W3", "/MP", + "/Od", "/DNDEBUG", "/D_WINDOWS", "/Gm-", "/EHsc", + "/RTC1", "/MD", "/GS", + fname_in + ] + inc_dir + libs + cl += ["/link", "/DLL", "/OUT:" + fname_tmp] + out_dir, _ = os.path.split(fname_tmp) + check_call(cl, cwd = out_dir) + basename_out, _ = os.path.splitext(fname_tmp) + basename_in, _ = os.path.splitext(os.path.basename(fname_in)) + for ext in ('.obj', '.exp', '.lib'): + artifact_out_path = os.path.join( + out_dir, + basename_out + ext + ) + if os.path.isfile(artifact_out_path): + os.remove(artifact_out_path) + artifact_in_path = os.path.join( + out_dir, + basename_in + ext + ) + if os.path.isfile(artifact_in_path): + os.remove(artifact_in_path) + else: + args = [ + "cc", + "-O3", + "-shared", + "-fPIC", + fname_in, + "-o", + fname_tmp + ] + inc_dir + libs + check_call(args) + + # Move temporary file to final file + try: + os.rename(fname_tmp, fname_out) + except WindowsError as e: + # On Windows, os.rename works slightly differently than on + # Linux; quoting the documentation: + # "On Unix, if dst exists and is a file, it will be replaced + # silently if the user has permission. The operation may fail + # on some Unix flavors if src and dst are on different + # filesystems. If successful, the renaming will be an atomic + # operation (this is a POSIX requirement). On Windows, if dst + # already exists, OSError will be raised even if it is a file; + # there may be no way to implement an atomic rename when dst + # names an existing file." + # [Error 183] Cannot create a file when that file already exists + if e.winerror != 183: + raise + os.remove(fname_tmp) + os.remove(fname_in) + + self.load_code(block.loc_key, fname_out) + + @staticmethod + def gen_C_source(ir_arch, func_code): + c_source = "" + c_source += "\n".join(func_code) + + c_source = gen_core(ir_arch.arch, ir_arch.attrib) + c_source + c_source = "#define PARITY_IMPORT\n#include \n" + c_source + return c_source diff --git a/miasm/jitter/jitcore_llvm.py b/miasm/jitter/jitcore_llvm.py new file mode 100644 index 00000000..46e93282 --- /dev/null +++ b/miasm/jitter/jitcore_llvm.py @@ -0,0 +1,134 @@ +from __future__ import print_function +import os +import importlib +import tempfile +import sysconfig + +from miasm.jitter.llvmconvert import * +import miasm.jitter.jitcore as jitcore +from miasm.jitter import Jitllvm +import platform + +is_win = platform.system() == "Windows" + +class JitCore_LLVM(jitcore.JitCore): + "JiT management, using LLVM as backend" + + # Architecture dependent libraries + arch_dependent_libs = { + "x86": "JitCore_x86", + "arm": "JitCore_arm", + "msp430": "JitCore_msp430", + "mips32": "JitCore_mips32", + "aarch64": "JitCore_aarch64", + "ppc32": "JitCore_ppc32", + } + + def __init__(self, ir_arch, bin_stream): + super(JitCore_LLVM, self).__init__(ir_arch, bin_stream) + + self.options.update( + { + "safe_mode": True, # Verify each function + "optimise": True, # Optimise functions + "log_func": False, # Print LLVM functions + "log_assembly": False, # Print assembly executed + } + ) + + self.exec_wrapper = Jitllvm.llvm_exec_block + self.ir_arch = ir_arch + + # Cache temporary dir + self.tempdir = os.path.join(tempfile.gettempdir(), "miasm_cache") + try: + os.mkdir(self.tempdir, 0o755) + except OSError: + pass + if not os.access(self.tempdir, os.R_OK | os.W_OK): + raise RuntimeError( + 'Cannot access cache directory %s ' % self.tempdir) + + def load(self): + + # Library to load within Jit context + libs_to_load = [] + + # Get architecture dependent Jitcore library (if any) + lib_dir = os.path.dirname(os.path.realpath(__file__)) + lib_dir = os.path.join(lib_dir, 'arch') + ext = sysconfig.get_config_var('EXT_SUFFIX') + if ext is None: + ext = ".so" if not is_win else ".pyd" + try: + jit_lib = os.path.join( + lib_dir, self.arch_dependent_libs[self.ir_arch.arch.name] + ext + ) + libs_to_load.append(jit_lib) + except KeyError: + pass + + # Create a context + self.context = LLVMContext_JIT(libs_to_load, self.ir_arch) + + # Set the optimisation level + self.context.optimise_level() + + # Save the current architecture parameters + self.arch = self.ir_arch.arch + + # Get the correspondence between registers and vmcpu struct + mod_name = "miasm.jitter.arch.JitCore_%s" % (self.ir_arch.arch.name) + mod = importlib.import_module(mod_name) + self.context.set_vmcpu(mod.get_gpreg_offset_all()) + + # Enable caching + self.context.enable_cache() + + def add_block(self, block): + """Add a block to JiT and JiT it. + @block: the block to add + """ + + block_hash = self.hash_block(block) + fname_out = os.path.join(self.tempdir, "%s.bc" % block_hash) + + if not os.access(fname_out, os.R_OK): + # Build a function in the context + func = LLVMFunction(self.context, self.FUNCNAME) + + # Set log level + func.log_regs = self.log_regs + func.log_mn = self.log_mn + + # Import asm block + func.from_asmblock(block) + + # Verify + if self.options["safe_mode"] is True: + func.verify() + + # Optimise + if self.options["optimise"] is True: + func.optimise() + + # Log + if self.options["log_func"] is True: + print(func) + if self.options["log_assembly"] is True: + print(func.get_assembly()) + + # Use propagate the cache filename + self.context.set_cache_filename(func, fname_out) + + # Get a pointer on the function for JiT + ptr = func.get_function_pointer() + + else: + # The cache file exists: function can be loaded from cache + ptr = self.context.get_ptr_from_cache(fname_out, self.FUNCNAME) + + # Store a pointer on the function jitted code + loc_key = block.loc_key + offset = self.ir_arch.loc_db.get_location_offset(loc_key) + self.offset_to_jitted_func[offset] = ptr diff --git a/miasm/jitter/jitcore_python.py b/miasm/jitter/jitcore_python.py new file mode 100644 index 00000000..e1e905f3 --- /dev/null +++ b/miasm/jitter/jitcore_python.py @@ -0,0 +1,219 @@ +from __future__ import print_function +from builtins import zip +import miasm.jitter.jitcore as jitcore +from miasm.expression.expression import ExprInt, ExprLoc +import miasm.jitter.csts as csts +from miasm.expression.simplifications import expr_simp_explicit +from miasm.jitter.emulatedsymbexec import EmulatedSymbExec + +################################################################################ +# Python jitter Core # +################################################################################ + + +class JitCore_Python(jitcore.JitCore): + "JiT management, using Miasm2 Symbol Execution engine as backend" + + SymbExecClass = EmulatedSymbExec + + def __init__(self, ir_arch, bin_stream): + super(JitCore_Python, self).__init__(ir_arch, bin_stream) + self.ir_arch = ir_arch + self.ircfg = self.ir_arch.new_ircfg() + + # CPU & VM (None for now) will be set later + + self.symbexec = self.SymbExecClass( + None, None, + self.ir_arch, {}, + sb_expr_simp=expr_simp_explicit + ) + self.symbexec.enable_emulated_simplifications() + + def set_cpu_vm(self, cpu, vm): + self.symbexec.cpu = cpu + self.symbexec.vm = vm + + def load(self): + "Preload symbols according to current architecture" + self.symbexec.reset_regs() + + def arch_specific(self): + """Return arch specific information for the current architecture""" + arch = self.ir_arch.arch + has_delayslot = False + if arch.name == "mips32": + from miasm.arch.mips32.jit import mipsCGen + cgen_class = mipsCGen + has_delayslot = True + elif arch.name == "arm": + from miasm.arch.arm.jit import arm_CGen + cgen_class = arm_CGen + else: + from miasm.jitter.codegen import CGen + cgen_class = CGen + return cgen_class(self.ir_arch), has_delayslot + + def add_block(self, asmblock): + """Create a python function corresponding to an AsmBlock + @asmblock: AsmBlock + """ + + # TODO: merge duplicate code with CGen, llvmconvert + codegen, has_delayslot = self.arch_specific() + irblocks_list = codegen.block2assignblks(asmblock) + instr_offsets = [line.offset for line in asmblock.lines] + + loc_db = self.ir_arch.loc_db + local_loc_keys = [] + for irblocks in irblocks_list: + for irblock in irblocks: + local_loc_keys.append(irblock.loc_key) + + def myfunc(cpu): + """Execute the function according to cpu and vmmngr states + @cpu: JitCpu instance + """ + # Get virtual memory handler + vmmngr = cpu.vmmngr + + # Get execution engine (EmulatedSymbExec instance) + exec_engine = self.symbexec + + # Refresh CPU values according to @cpu instance + exec_engine.update_engine_from_cpu() + + # Get initial loc_key + cur_loc_key = asmblock.loc_key + + # Update PC helper + update_pc = lambda value: setattr(cpu, self.ir_arch.pc.name, value) + + while True: + # Retrieve the expected irblock + for instr, irblocks in zip(asmblock.lines, irblocks_list): + for index, irblock in enumerate(irblocks): + if irblock.loc_key == cur_loc_key: + break + else: + continue + break + else: + raise RuntimeError("Unable to find the block for %r" % cur_loc_key) + + instr_attrib, irblocks_attributes = codegen.get_attributes( + instr, irblocks, self.log_mn, self.log_regs + ) + irblock_attributes = irblocks_attributes[index] + + # Do IRBlock + new_irblock = self.ir_arch.irbloc_fix_regs_for_mode( + irblock, self.ir_arch.attrib + ) + if index == 0: + # Pre code + if instr_attrib.log_mn: + print("%.8X %s" % ( + instr_attrib.instr.offset, + instr_attrib.instr.to_string(loc_db) + )) + + # Exec IRBlock + instr = instr_attrib.instr + + for index, assignblk in enumerate(irblock): + attributes = irblock_attributes[index] + + # Eval current instruction (in IR) + exec_engine.eval_updt_assignblk(assignblk) + + # Check memory access / write exception + # TODO: insert a check between memory reads and writes + if attributes.mem_read or attributes.mem_write: + # Restricted exception + flag = ~csts.EXCEPT_CODE_AUTOMOD & csts.EXCEPT_DO_NOT_UPDATE_PC + if (vmmngr.get_exception() & flag != 0): + # Do not update registers + update_pc(instr.offset) + return instr.offset + + # Update registers values + exec_engine.update_cpu_from_engine() + + # Check post assignblk exception flags + if attributes.set_exception: + # Restricted exception + if cpu.get_exception() > csts.EXCEPT_NUM_UPDT_EIP: + # Update PC + update_pc(instr.offset) + return instr.offset + + dst = exec_engine.eval_expr(self.ir_arch.IRDst) + if dst.is_int(): + loc_key = loc_db.get_or_create_offset_location(int(dst)) + dst = ExprLoc(loc_key, dst.size) + + assert dst.is_loc() + loc_key = dst.loc_key + offset = loc_db.get_location_offset(loc_key) + if offset is None: + # Avoid checks on generated label + cur_loc_key = loc_key + continue + + if instr_attrib.log_regs: + update_pc(offset) + cpu.dump_gpregs_with_attrib(self.ir_arch.attrib) + + # Post-instr checks + if instr_attrib.mem_read | instr_attrib.mem_write: + vmmngr.check_memory_breakpoint() + vmmngr.check_invalid_code_blocs() + if vmmngr.get_exception(): + update_pc(offset) + return offset + + if instr_attrib.set_exception: + if cpu.get_exception(): + update_pc(offset) + return offset + + if instr_attrib.mem_read | instr_attrib.mem_write: + vmmngr.reset_memory_access() + + # Manage resulting address + if (loc_key in local_loc_keys and + offset > instr.offset): + # Forward local jump + # Note: a backward local jump has to be promoted to extern, + # for max_exec_per_call support + cur_loc_key = loc_key + continue + + # Delay slot + if has_delayslot: + delay_slot_set = exec_engine.eval_expr(codegen.delay_slot_set) + if delay_slot_set.is_int() and int(delay_slot_set) != 0: + return int(exec_engine.eval_expr(codegen.delay_slot_dst)) + + # Extern of asmblock, must have an offset + assert offset is not None + return offset + + # Associate myfunc with current loc_key + offset = loc_db.get_location_offset(asmblock.loc_key) + assert offset is not None + self.offset_to_jitted_func[offset] = myfunc + + def exec_wrapper(self, loc_key, cpu, _offset_to_jitted_func, _stop_offsets, + _max_exec_per_call): + """Call the function @loc_key with @cpu + @loc_key: function's loc_key + @cpu: JitCpu instance + """ + + # Get Python function corresponding to @loc_key + fc_ptr = self.offset_to_jitted_func[loc_key] + + # Execute the function + return fc_ptr(cpu) diff --git a/miasm/jitter/jitload.py b/miasm/jitter/jitload.py new file mode 100644 index 00000000..292cf498 --- /dev/null +++ b/miasm/jitter/jitload.py @@ -0,0 +1,547 @@ +import logging +import warnings +from functools import wraps +from collections import Sequence, namedtuple + +from future.utils import viewitems + +from miasm.jitter.csts import * +from miasm.core.utils import * +from miasm.core.bin_stream import bin_stream_vm +from miasm.jitter.emulatedsymbexec import EmulatedSymbExec +from miasm.jitter.codegen import CGen +from miasm.jitter.jitcore_cc_base import JitCore_Cc_Base + +hnd = logging.StreamHandler() +hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) +log = logging.getLogger('jitload.py') +log.addHandler(hnd) +log.setLevel(logging.CRITICAL) +log_func = logging.getLogger('jit function call') +log_func.addHandler(hnd) +log_func.setLevel(logging.CRITICAL) + +try: + from miasm.jitter import VmMngr +except ImportError: + log.error('cannot import VmMngr') + + +def named_arguments(func): + """Function decorator to allow the use of .func_args_*() methods + with either the number of arguments or the list of the argument + names. + + The wrapper is also used to log the argument values. + + @func: function + + """ + @wraps(func) + def newfunc(self, args): + if isinstance(args, Sequence): + ret_ad, arg_vals = func(self, len(args)) + arg_vals = namedtuple("args", args)(*arg_vals) + # func_name(arguments) return address + log_func.info( + '%s(%s) ret addr: %s', + get_caller_name(1), + ', '.join( + "%s=0x%x" % (field, value) + for field, value in viewitems(arg_vals._asdict()) + ), + hex(ret_ad) + ) + return ret_ad, namedtuple("args", args)(*arg_vals) + else: + ret_ad, arg_vals = func(self, args) + # func_name(arguments) return address + log_func.info('%s(%s) ret addr: %s', + get_caller_name(1), + ', '.join(hex(arg) for arg in arg_vals), + hex(ret_ad)) + return ret_ad, arg_vals + return newfunc + + +class CallbackHandler(object): + + "Handle a list of callback" + + def __init__(self): + self.callbacks = {} # Key -> [callback list] + + def add_callback(self, key, callback): + """Add a callback to the key @key, iff the @callback isn't already + assigned to it""" + if callback not in self.callbacks.get(key, []): + self.callbacks[key] = self.callbacks.get(key, []) + [callback] + + def set_callback(self, key, *args): + "Set the list of callback for key 'key'" + self.callbacks[key] = list(args) + + def get_callbacks(self, key): + "Return the list of callbacks associated to key 'key'" + return self.callbacks.get(key, []) + + def remove_callback(self, callback): + """Remove the callback from the list. + Return the list of empty keys (removed)""" + + to_check = set() + for key, cb_list in viewitems(self.callbacks): + try: + cb_list.remove(callback) + to_check.add(key) + except ValueError: + pass + + empty_keys = [] + for key in to_check: + if len(self.callbacks[key]) == 0: + empty_keys.append(key) + del(self.callbacks[key]) + + return empty_keys + + def has_callbacks(self, key): + return key in self.callbacks + + def remove_key(self, key): + """Remove and return all callbacks associated to @key""" + callbacks = self.callbacks.get(key, []) + del self.callbacks[key] + return callbacks + + def call_callbacks(self, key, *args): + """Call callbacks associated to key 'key' with arguments args. While + callbacks return True, continue with next callback. + Iterator on other results.""" + + res = True + + for c in self.get_callbacks(key): + res = c(*args) + if res is not True: + yield res + + def __call__(self, key, *args): + "Wrapper for call_callbacks" + return self.call_callbacks(key, *args) + + +class CallbackHandlerBitflag(CallbackHandler): + + "Handle a list of callback with conditions on bitflag" + + def call_callbacks(self, bitflag, *args): + """Call each callbacks associated with bit set in bitflag. While + callbacks return True, continue with next callback. + Iterator on other results""" + + for bitflag_expected in self.callbacks: + if bitflag_expected & bitflag == bitflag_expected: + # If the flag matched + for res in super(CallbackHandlerBitflag, + self).call_callbacks(bitflag_expected, *args): + if res is not True: + yield res + + +class ExceptionHandle(object): + + "Return type for exception handler" + + def __init__(self, except_flag): + self.except_flag = except_flag + + @classmethod + def memoryBreakpoint(cls): + return cls(EXCEPT_BREAKPOINT_MEMORY) + + def __eq__(self, to_cmp): + if not isinstance(to_cmp, ExceptionHandle): + return False + return (self.except_flag == to_cmp.except_flag) + + def __ne__(self, to_cmp): + return not self.__eq__(to_cmp) + + +class Jitter(object): + + "Main class for JIT handling" + + C_Gen = CGen + + def __init__(self, ir_arch, jit_type="gcc"): + """Init an instance of jitter. + @ir_arch: ir instance for this architecture + @jit_type: JiT backend to use. Available options are: + - "gcc" + - "llvm" + - "python" + """ + + self.arch = ir_arch.arch + self.attrib = ir_arch.attrib + arch_name = ir_arch.arch.name # (ir_arch.arch.name, ir_arch.attrib) + + try: + if arch_name == "x86": + from miasm.jitter.arch import JitCore_x86 as jcore + elif arch_name == "arm": + from miasm.jitter.arch import JitCore_arm as jcore + elif arch_name == "armt": + from miasm.jitter.arch import JitCore_arm as jcore + ir_arch.arch.name = 'arm' + elif arch_name == "aarch64": + from miasm.jitter.arch import JitCore_aarch64 as jcore + elif arch_name == "msp430": + from miasm.jitter.arch import JitCore_msp430 as jcore + elif arch_name == "mips32": + from miasm.jitter.arch import JitCore_mips32 as jcore + elif arch_name == "ppc32": + from miasm.jitter.arch import JitCore_ppc32 as jcore + elif arch_name == "mep": + from miasm.jitter.arch import JitCore_mep as jcore + else: + raise ValueError("unknown jit arch: %s" % arch_name) + except ImportError: + raise RuntimeError('Unsupported jit arch: %s' % arch_name) + + self.vm = VmMngr.Vm() + self.cpu = jcore.JitCpu() + self.ir_arch = ir_arch + self.bs = bin_stream_vm(self.vm) + self.ircfg = self.ir_arch.new_ircfg() + + self.symbexec = EmulatedSymbExec( + self.cpu, self.vm, self.ir_arch, {} + ) + self.symbexec.reset_regs() + + try: + if jit_type == "llvm": + from miasm.jitter.jitcore_llvm import JitCore_LLVM as JitCore + elif jit_type == "python": + from miasm.jitter.jitcore_python import JitCore_Python as JitCore + elif jit_type == "gcc": + from miasm.jitter.jitcore_gcc import JitCore_Gcc as JitCore + else: + raise ValueError("Unknown jitter %s" % jit_type) + except ImportError: + raise RuntimeError('Unsupported jitter: %s' % jit_type) + + self.jit = JitCore(self.ir_arch, self.bs) + if isinstance(self.jit, JitCore_Cc_Base): + self.jit.init_codegen(self.C_Gen(self.ir_arch)) + elif jit_type == "python": + self.jit.set_cpu_vm(self.cpu, self.vm) + + self.cpu.init_regs() + self.vm.init_memory_page_pool() + self.vm.init_code_bloc_pool() + self.vm.init_memory_breakpoint() + + self.jit.load() + self.cpu.vmmngr = self.vm + self.cpu.jitter = self.jit + self.stack_size = 0x10000 + self.stack_base = 0x1230000 + + # Init callback handler + self.breakpoints_handler = CallbackHandler() + self.exceptions_handler = CallbackHandlerBitflag() + self.init_exceptions_handler() + self.exec_cb = None + + def init_exceptions_handler(self): + "Add common exceptions handlers" + + def exception_automod(jitter): + "Tell the JiT backend to update blocks modified" + + self.jit.updt_automod_code(jitter.vm) + self.vm.set_exception(0) + + return True + + def exception_memory_breakpoint(jitter): + "Stop the execution and return an identifier" + return ExceptionHandle.memoryBreakpoint() + + self.add_exception_handler(EXCEPT_CODE_AUTOMOD, exception_automod) + self.add_exception_handler(EXCEPT_BREAKPOINT_MEMORY, + exception_memory_breakpoint) + + def add_breakpoint(self, addr, callback): + """Add a callback associated with addr. + @addr: breakpoint address + @callback: function with definition (jitter instance) + """ + self.breakpoints_handler.add_callback(addr, callback) + self.jit.add_disassembly_splits(addr) + # De-jit previously jitted blocks + self.jit.updt_automod_code_range(self.vm, [(addr, addr)]) + + def set_breakpoint(self, addr, *args): + """Set callbacks associated with addr. + @addr: breakpoint address + @args: functions with definition (jitter instance) + """ + self.breakpoints_handler.set_callback(addr, *args) + self.jit.add_disassembly_splits(addr) + + def get_breakpoint(self, addr): + """ + Return breakpoints handlers for address @addr + @addr: integer + """ + return self.breakpoints_handler.get_callbacks(addr) + + def remove_breakpoints_by_callback(self, callback): + """Remove callbacks associated with breakpoint. + @callback: callback to remove + """ + empty_keys = self.breakpoints_handler.remove_callback(callback) + for key in empty_keys: + self.jit.remove_disassembly_splits(key) + + def remove_breakpoints_by_address(self, address): + """Remove all breakpoints associated with @address. + @address: address of breakpoints to remove + """ + callbacks = self.breakpoints_handler.remove_key(address) + if callbacks: + self.jit.remove_disassembly_splits(address) + + def add_exception_handler(self, flag, callback): + """Add a callback associated with an exception flag. + @flag: bitflag + @callback: function with definition (jitter instance) + """ + self.exceptions_handler.add_callback(flag, callback) + + def run_at(self, pc): + """Wrapper on JiT backend. Run the code at PC and return the next PC. + @pc: address of code to run""" + + return self.jit.run_at( + self.cpu, pc, + set(self.breakpoints_handler.callbacks) + ) + + def runiter_once(self, pc): + """Iterator on callbacks results on code running from PC. + Check exceptions before breakpoints.""" + + self.pc = pc + # Callback called before exec + if self.exec_cb is not None: + res = self.exec_cb(self) + if res is not True: + yield res + + # Check breakpoints + old_pc = self.pc + for res in self.breakpoints_handler.call_callbacks(self.pc, self): + if res is not True: + if isinstance(res, collections.Iterator): + # If the breakpoint is a generator, yield it step by step + for tmp in res: + yield tmp + else: + yield res + + # Check exceptions (raised by breakpoints) + exception_flag = self.get_exception() + for res in self.exceptions_handler(exception_flag, self): + if res is not True: + if isinstance(res, collections.Iterator): + for tmp in res: + yield tmp + else: + yield res + + # If a callback changed pc, re call every callback + if old_pc != self.pc: + return + + # Exceptions should never be activated before run + assert(self.get_exception() == 0) + + # Run the bloc at PC + self.pc = self.run_at(self.pc) + + # Check exceptions (raised by the execution of the block) + exception_flag = self.get_exception() + for res in self.exceptions_handler(exception_flag, self): + if res is not True: + if isinstance(res, collections.Iterator): + for tmp in res: + yield tmp + else: + yield res + + def init_run(self, pc): + """Create an iterator on pc with runiter. + @pc: address of code to run + """ + self.run_iterator = self.runiter_once(pc) + self.pc = pc + self.run = True + + def continue_run(self, step=False): + """PRE: init_run. + Continue the run of the current session until iterator returns or run is + set to False. + If step is True, run only one time. + Return the iterator value""" + + while self.run: + try: + return next(self.run_iterator) + except StopIteration: + pass + + self.run_iterator = self.runiter_once(self.pc) + + if step is True: + return None + + return None + + def init_stack(self): + self.vm.add_memory_page( + self.stack_base, + PAGE_READ | PAGE_WRITE, + b"\x00" * self.stack_size, + "Stack") + sp = self.arch.getsp(self.attrib) + setattr(self.cpu, sp.name, self.stack_base + self.stack_size) + # regs = self.cpu.get_gpreg() + # regs[sp.name] = self.stack_base+self.stack_size + # self.cpu.set_gpreg(regs) + + def get_exception(self): + return self.cpu.get_exception() | self.vm.get_exception() + + # commun functions + def get_str_ansi(self, addr, max_char=None): + """Get ansi str from vm. + @addr: address in memory + @max_char: maximum len""" + l = 0 + tmp = addr + while ((max_char is None or l < max_char) and + self.vm.get_mem(tmp, 1) != b"\x00"): + tmp += 1 + l += 1 + return self.vm.get_mem(addr, l) + + def get_str_unic(self, addr, max_char=None): + """Get unicode str from vm. + @addr: address in memory + @max_char: maximum len""" + l = 0 + tmp = addr + while ((max_char is None or l < max_char) and + self.vm.get_mem(tmp, 2) != b"\x00\x00"): + tmp += 2 + l += 2 + s = self.vm.get_mem(addr, l) + s = s.decode("utf-16le") + return s + + def set_str_ansi(self, addr, s): + """Set an ansi string in memory""" + s = s + b"\x00" + self.vm.set_mem(addr, s) + + def set_str_unic(self, addr, s): + """Set an unicode string in memory""" + s = b"\x00".join(list(s)) + b'\x00' * 3 + self.vm.set_mem(addr, s) + + @staticmethod + def handle_lib(jitter): + """Resolve the name of the function which cause the handler call. Then + call the corresponding handler from users callback. + """ + fname = jitter.libs.fad2cname[jitter.pc] + if fname in jitter.user_globals: + func = jitter.user_globals[fname] + else: + log.debug('%r', fname) + raise ValueError('unknown api', hex(jitter.pc), repr(fname)) + ret = func(jitter) + jitter.pc = getattr(jitter.cpu, jitter.ir_arch.pc.name) + + # Don't break on a None return + if ret is None: + return True + else: + return ret + + def handle_function(self, f_addr): + """Add a breakpoint which will trigger the function handler""" + self.add_breakpoint(f_addr, self.handle_lib) + + def add_lib_handler(self, libs, user_globals=None): + """Add a function to handle libs call with breakpoints + @libs: libimp instance + @user_globals: dictionary for defined user function + """ + if user_globals is None: + user_globals = {} + + self.libs = libs + out = {} + for name, func in viewitems(user_globals): + name = force_bytes(name) + out[name] = func + self.user_globals = out + + for f_addr in libs.fad2cname: + self.handle_function(f_addr) + + def eval_expr(self, expr): + """Eval expression @expr in the context of the current instance. Side + effects are passed on it""" + self.symbexec.update_engine_from_cpu() + ret = self.symbexec.eval_updt_expr(expr) + self.symbexec.update_cpu_from_engine() + + return ret + + def set_trace_log(self, + trace_instr=True, trace_regs=True, + trace_new_blocks=False): + """ + Activate/Deactivate trace log options + + @trace_instr: activate instructions tracing log + @trace_regs: activate registers tracing log + @trace_new_blocks: dump new code blocks log + """ + + # As trace state changes, clear already jitted blocks + self.jit.clear_jitted_blocks() + + self.jit.log_mn = trace_instr + self.jit.log_regs = trace_regs + self.jit.log_newbloc = trace_new_blocks + + +class jitter(Jitter): + """ + DEPRECATED object + Use Jitter instead of jitter + """ + + + def __init__(self, *args, **kwargs): + warnings.warn("Deprecated API: use Jitter") + super(jitter, self).__init__(*args, **kwargs) diff --git a/miasm/jitter/llvmconvert.py b/miasm/jitter/llvmconvert.py new file mode 100644 index 00000000..d0e0407b --- /dev/null +++ b/miasm/jitter/llvmconvert.py @@ -0,0 +1,1926 @@ +# +# +# Miasm2 Extension: # +# - Miasm2 IR to LLVM IR # +# - JiT # +# +# Requires: # +# - llvmlite (tested on v0.15) # +# +# Authors : Fabrice DESCLAUX (CEA/DAM), Camille MOUGEY (CEA/DAM) # +# +# + +from builtins import zip +from builtins import range +import os +from llvmlite import binding as llvm +from llvmlite import ir as llvm_ir +from builtins import int as int_types + +from future.utils import viewitems, viewvalues + +from miasm.expression.expression import ExprId, ExprInt, ExprMem, ExprSlice, \ + ExprCond, ExprLoc, ExprOp, ExprCompose, LocKey, Expr, \ + TOK_EQUAL, \ + TOK_INF_SIGNED, TOK_INF_UNSIGNED, \ + TOK_INF_EQUAL_SIGNED, TOK_INF_EQUAL_UNSIGNED + +import miasm.jitter.csts as m2_csts +import miasm.core.asmblock as m2_asmblock +from miasm.jitter.codegen import CGen, Attributes +from miasm.expression.expression_helper import possible_values + + +class LLVMType(llvm_ir.Type): + + "Handle LLVM Type" + + int_cache = {} + + @classmethod + def IntType(cls, size=32): + try: + return cls.int_cache[size] + except KeyError: + cls.int_cache[size] = llvm_ir.IntType(size) + return cls.int_cache[size] + + @classmethod + def pointer(cls, addr): + "Generic pointer for execution" + return llvm_e.GenericValue.pointer(addr) + + @classmethod + def generic(cls, e): + "Generic value for execution" + if isinstance(e, ExprInt): + return llvm_e.GenericValue.int(LLVMType.IntType(e.size), int(e.arg)) + elif isinstance(e, llvm_e.GenericValue): + return e + else: + raise ValueError() + + @classmethod + def fptype(cls, size): + """Return the floating type corresponding to precision @size""" + if size == 32: + precision = llvm_ir.FloatType() + elif size == 64: + precision = llvm_ir.DoubleType() + else: + raise RuntimeError("Unsupported precision: %x", size) + return precision + + +class LLVMContext(object): + + "Context for llvm binding. Stand for a LLVM Module" + + known_fc = {} + + def __init__(self, name="mod"): + "Initialize a context with a module named 'name'" + # Initialize llvm + llvm.initialize() + llvm.initialize_native_target() + llvm.initialize_native_asmprinter() + + # Initialize target for compilation + target = llvm.Target.from_default_triple() + self.target_machine = target.create_target_machine() + self.init_exec_engine() + + def canonize_label_name(self, label): + """Canonize @label names to a common form. + @label: str or asmlabel instance""" + if isinstance(label, str): + return label + elif isinstance(label, LocKey): + return str(label) + else: + raise ValueError("label must either be str or LocKey") + + def optimise_level(self, level=2): + """Set the optimisation level to @level from 0 to 2 + 0: non-optimized + 2: optimized + """ + + # Set up the optimiser pipeline + pmb = llvm.create_pass_manager_builder() + pmb.opt_level = level + pm = llvm.create_module_pass_manager() + pmb.populate(pm) + self.pass_manager = pm + + def init_exec_engine(self): + mod = llvm.parse_assembly("") + engine = llvm.create_mcjit_compiler(mod, + self.target_machine) + self.exec_engine = engine + + def new_module(self, name="mod"): + """Create a module, with needed functions""" + self.mod = llvm_ir.Module(name=name) + self.add_fc(self.known_fc) + self.add_op() + + def get_execengine(self): + "Return the Execution Engine associated with this context" + return self.exec_engine + + def get_passmanager(self): + "Return the Pass Manager associated with this context" + return self.pass_manager + + def get_module(self): + "Return the module associated with this context" + return self.mod + + def add_shared_library(self, filename): + "Load the shared library 'filename'" + return llvm.load_library_permanently(filename) + + def add_fc(self, fc, readonly=False): + "Add function into known_fc" + + for name, detail in viewitems(fc): + fnty = llvm_ir.FunctionType(detail["ret"], detail["args"]) + fn = llvm_ir.Function(self.mod, fnty, name=name) + if readonly: + fn.attributes.add("readonly") + + def add_op(self): + "Add operations functions" + + i8 = LLVMType.IntType(8) + p8 = llvm_ir.PointerType(i8) + itype = LLVMType.IntType(64) + ftype = llvm_ir.FloatType() + dtype = llvm_ir.DoubleType() + fc = {"llvm.ctpop.i8": {"ret": i8, + "args": [i8]}, + "llvm.nearbyint.f32": {"ret": ftype, + "args": [ftype]}, + "llvm.nearbyint.f64": {"ret": dtype, + "args": [dtype]}, + "llvm.trunc.f32": {"ret": ftype, + "args": [ftype]}, + "segm2addr": {"ret": itype, + "args": [p8, + itype, + itype]}, + "x86_cpuid": {"ret": itype, + "args": [itype, + itype]}, + "fpu_fcom_c0": {"ret": itype, + "args": [dtype, + dtype]}, + "fpu_fcom_c1": {"ret": itype, + "args": [dtype, + dtype]}, + "fpu_fcom_c2": {"ret": itype, + "args": [dtype, + dtype]}, + "fpu_fcom_c3": {"ret": itype, + "args": [dtype, + dtype]}, + "llvm.sqrt.f32": {"ret": ftype, + "args": [ftype]}, + "llvm.sqrt.f64": {"ret": dtype, + "args": [dtype]}, + "llvm.fabs.f32": {"ret": ftype, + "args": [ftype]}, + "llvm.fabs.f64": {"ret": dtype, + "args": [dtype]}, + } + + for k in [8, 16]: + fc["bcdadd_%s" % k] = {"ret": LLVMType.IntType(k), + "args": [LLVMType.IntType(k), + LLVMType.IntType(k)]} + fc["bcdadd_cf_%s" % k] = {"ret": LLVMType.IntType(k), + "args": [LLVMType.IntType(k), + LLVMType.IntType(k)]} + self.add_fc(fc, readonly=True) + + + def memory_lookup(self, func, addr, size): + """Perform a memory lookup at @addr of size @size (in bit)""" + raise NotImplementedError("Abstract method") + + def memory_write(self, func, addr, size, value): + """Perform a memory write at @addr of size @size (in bit) with LLVM IR @value""" + raise NotImplementedError("Abstract method") + + +class LLVMContext_JIT(LLVMContext): + + """Extend LLVMContext_JIT in order to handle memory management and custom + operations""" + + def __init__(self, library_filenames, ir_arch, name="mod"): + "Init a LLVMContext object, and load the mem management shared library" + self.library_filenames = library_filenames + self.ir_arch = ir_arch + self.arch_specific() + self.load_libraries() + LLVMContext.__init__(self, name) + self.vmcpu = {} + + def load_libraries(self): + # Get LLVM specific functions + name = "libLLVM-%d.%d" % (llvm.llvm_version_info[0], + llvm.llvm_version_info[1], + ) + try: + # On Windows, no need to add ".dll" + self.add_shared_library(name) + except RuntimeError: + try: + # On Linux, ".so" is needed + self.add_shared_library("%s.so" % name) + except RuntimeError: + pass + + # Load additional libraries + for lib_fname in self.library_filenames: + self.add_shared_library(lib_fname) + + def new_module(self, name="mod"): + LLVMContext.new_module(self, name) + self.add_memlookups() + self.add_get_exceptionflag() + self.add_log_functions() + + def arch_specific(self): + arch = self.ir_arch.arch + if arch.name == "x86": + self.PC = arch.regs.RIP + self.logging_func = "dump_gpregs_%d" % self.ir_arch.attrib + else: + self.PC = self.ir_arch.pc + self.logging_func = "dump_gpregs" + if arch.name == "mips32": + from miasm.arch.mips32.jit import mipsCGen + self.cgen_class = mipsCGen + self.has_delayslot = True + elif arch.name == "arm": + from miasm.arch.arm.jit import arm_CGen + self.cgen_class = arm_CGen + self.has_delayslot = False + else: + self.cgen_class = CGen + self.has_delayslot = False + + def add_memlookups(self): + "Add MEM_LOOKUP functions" + + fc = {} + p8 = llvm_ir.PointerType(LLVMType.IntType(8)) + for i in [8, 16, 32, 64]: + fc["MEM_LOOKUP_%02d" % i] = {"ret": LLVMType.IntType(i), + "args": [p8, + LLVMType.IntType(64)]} + + fc["MEM_WRITE_%02d" % i] = {"ret": llvm_ir.VoidType(), + "args": [p8, + LLVMType.IntType(64), + LLVMType.IntType(i)]} + + fc["MEM_LOOKUP_INT_BN_TO_PTR"] = {"ret": llvm_ir.VoidType(), + "args": [ + p8, + LLVMType.IntType(32), + LLVMType.IntType(64), + p8 + ]} + fc["MEM_WRITE_INT_BN_FROM_PTR"] = {"ret": llvm_ir.VoidType(), + "args": [ + p8, + LLVMType.IntType(32), + LLVMType.IntType(64), + p8, + ]} + + fc["reset_memory_access"] = {"ret": llvm_ir.VoidType(), + "args": [p8, + ]} + fc["check_memory_breakpoint"] = {"ret": llvm_ir.VoidType(), + "args": [p8, + ]} + fc["check_invalid_code_blocs"] = {"ret": llvm_ir.VoidType(), + "args": [p8, + ]} + self.add_fc(fc) + + def add_get_exceptionflag(self): + "Add 'get_exception_flag' function" + p8 = llvm_ir.PointerType(LLVMType.IntType(8)) + self.add_fc({"get_exception_flag": {"ret": LLVMType.IntType(64), + "args": [p8]}}, readonly=True) + + def add_log_functions(self): + "Add functions for state logging" + + p8 = llvm_ir.PointerType(LLVMType.IntType(8)) + self.add_fc({self.logging_func: {"ret": llvm_ir.VoidType(), + "args": [p8]}}, + readonly=True) + + def set_vmcpu(self, lookup_table): + "Set the correspondence between register name and vmcpu offset" + + self.vmcpu = lookup_table + + def memory_lookup(self, func, addr, size): + """Perform a memory lookup at @addr of size @size (in bit)""" + builder = func.builder + if size <= 64: + fc_name = "MEM_LOOKUP_%02d" % size + fc_ptr = self.mod.get_global(fc_name) + addr_casted = builder.zext(addr, LLVMType.IntType(64)) + ret = builder.call( + fc_ptr, [func.local_vars["jitcpu"],addr_casted] + ) + else: + # Miasm uses a memory lookup function which returns a bn_t for its + # result. We cannot simply translate this into IntType. The trick + # here is to use the function MEM_LOOKUP_INT_BN_TO_PTR which has a + # different interface: the resulting bn_t is passed through a char* + # argument. + # + # WARNING: Here, we use the fact that the serialisation of LLVM + # IntType is the *same* as the bn_t structure. + + fc_name = "MEM_LOOKUP_INT_BN_TO_PTR" + fc_ptr = self.mod.get_global(fc_name) + addr_casted = builder.zext(addr, LLVMType.IntType(64)) + size_cst = llvm_ir.Constant(LLVMType.IntType(32), size) + + value_ptr = builder.alloca(llvm_ir.IntType(size)) + value_ptr_u8 = builder.bitcast( + value_ptr, + LLVMType.IntType(8).as_pointer() + ) + + + builder.call( + fc_ptr, + [ + func.local_vars["jitcpu"], + size_cst, + addr_casted, + value_ptr_u8 + ] + ) + ret = builder.load(value_ptr) + + return ret + + def memory_write(self, func, addr, size, value): + """Perform a memory write at @addr of size @size (in bit) with LLVM IR @value""" + # Function call + builder = func.builder + if size <= 64: + fc_name = "MEM_WRITE_%02d" % size + fc_ptr = self.mod.get_global(fc_name) + dst_casted = builder.zext(addr, LLVMType.IntType(64)) + builder.call( + fc_ptr, + [ + func.local_vars["jitcpu"], + dst_casted, + value + ] + ) + else: + # The same trick as described in MEM_LOOKUP_INT_BN_TO_PTR is used + # here. + + fc_name = "MEM_WRITE_INT_BN_FROM_PTR" + fc_ptr = self.mod.get_global(fc_name) + addr_casted = builder.zext(addr, LLVMType.IntType(64)) + size_cst = llvm_ir.Constant(LLVMType.IntType(32), size) + + ret = builder.alloca(value.type) + builder.store(value, ret) + value_ptr = builder.bitcast(ret, llvm_ir.IntType(8).as_pointer()) + + builder.call( + fc_ptr, + [ + func.local_vars["jitcpu"], + size_cst, + addr_casted, + value_ptr, + ] + ) + + + @staticmethod + def cache_notify(module, buffer): + """Called when @module has been compiled to @buffer""" + if not hasattr(module, "fname_out"): + return + fname_out = module.fname_out + + if os.access(fname_out, os.R_OK): + # No need to overwrite + return + + open(fname_out, "wb").write(buffer) + + @staticmethod + def cache_getbuffer(module): + """Return a compiled buffer for @module if available""" + if not hasattr(module, "fname_out"): + return None + + fname_out = module.fname_out + if os.access(fname_out, os.R_OK): + return open(fname_out, "rb").read() + return None + + def enable_cache(self): + "Enable cache of compiled object" + # Load shared libraries + for lib_fname in self.library_filenames: + self.add_shared_library(lib_fname) + + # Activate cache + self.exec_engine.set_object_cache( + self.cache_notify, + self.cache_getbuffer + ) + + def set_cache_filename(self, func, fname_out): + "Set the filename @fname_out to use for cache for @func" + # Use a custom attribute to propagate the cache filename + func.as_llvm_mod().fname_out = fname_out + + def get_ptr_from_cache(self, file_name, func_name): + "Load @file_name and return a pointer on the jitter @func_name" + # We use an empty module to avoid losing time on function building + empty_module = llvm.parse_assembly("") + empty_module.fname_out = file_name + + engine = self.exec_engine + engine.add_module(empty_module) + engine.finalize_object() + return engine.get_function_address(func_name) + + +class LLVMContext_IRCompilation(LLVMContext): + + """Extend LLVMContext in order to handle memory management and custom + operations for Miasm IR compilation""" + + def memory_lookup(self, func, addr, size): + """Perform a memory lookup at @addr of size @size (in bit)""" + builder = func.builder + int_size = LLVMType.IntType(size) + ptr_casted = builder.inttoptr( + addr, + llvm_ir.PointerType(int_size) + ) + return builder.load(ptr_casted) + + def memory_write(self, func, addr, size, value): + """Perform a memory write at @addr of size @size (in bit) with LLVM IR @value""" + builder = func.builder + int_size = LLVMType.IntType(size) + ptr_casted = builder.inttoptr( + addr, + llvm_ir.PointerType(int_size) + ) + return builder.store(value, ptr_casted) + + +class LLVMFunction(object): + """Represent a LLVM function + + Implementation note: + A new module is created each time to avoid cumulative lag (if @new_module) + """ + + # Default logging values + log_mn = False + log_regs = True + + # Operation translation + ## Basics + op_translate = {'x86_cpuid': 'x86_cpuid', + } + ## Add the size as first argument + op_translate_with_size = {} + ## Add the size as suffix + op_translate_with_suffix_size = { + 'bcdadd': 'bcdadd', + 'bcdadd_cf': 'bcdadd_cf', + } + + def __init__(self, llvm_context, name="fc", new_module=True): + "Create a new function with name @name" + self.llvm_context = llvm_context + if new_module: + self.llvm_context.new_module() + self.mod = self.llvm_context.get_module() + + self.my_args = [] # (Expr, LLVMType, Name) + self.ret_type = None + self.builder = None + self.entry_bbl = None + + self.branch_counter = 0 + self.name = name + self._llvm_mod = None + + # Constructor utils + + def new_branch_name(self): + "Return a new branch name" + self.branch_counter += 1 + return str(self.branch_counter) + + def append_basic_block(self, label, overwrite=True): + """Add a new basic block to the current function. + @label: str or asmlabel + @overwrite: if False, do nothing if a bbl with the same name already exists + Return the corresponding LLVM Basic Block""" + name = self.llvm_context.canonize_label_name(label) + bbl = self.name2bbl.get(name, None) + if not overwrite and bbl is not None: + return bbl + bbl = self.fc.append_basic_block(name) + self.name2bbl[name] = bbl + + return bbl + + def CreateEntryBlockAlloca(self, var_type, default_value=None): + """Create an alloca instruction at the beginning of the current fc + @default_value: if set, store the default_value just after the allocation + """ + builder = self.builder + current_bbl = builder.basic_block + builder.position_at_start(self.entry_bbl) + + ret = builder.alloca(var_type) + if default_value is not None: + builder.store(default_value, ret) + builder.position_at_end(current_bbl) + return ret + + def get_ptr_by_expr(self, expr): + """"Return a pointer casted corresponding to ExprId expr. If it is not + already computed, compute it at the end of entry_bloc""" + + name = expr.name + + ptr_casted = self.local_vars_pointers.get(name, None) + if ptr_casted is not None: + # If the pointer has already been computed + return ptr_casted + + # Get current objects + builder = self.builder + current_bbl = builder.basic_block + + # Go at the right position + entry_bloc_bbl = self.entry_bbl + builder.position_at_end(entry_bloc_bbl) + + # Compute the pointer address + offset = self.llvm_context.vmcpu[name] + + # Pointer cast + ptr = builder.gep( + self.local_vars["vmcpu"], + [ + llvm_ir.Constant( + LLVMType.IntType(), + offset + ) + ] + ) + pointee_type = LLVMType.IntType(expr.size) + ptr_casted = builder.bitcast( + ptr, + llvm_ir.PointerType(pointee_type) + ) + # Store in cache + self.local_vars_pointers[name] = ptr_casted + + # Reset builder + builder.position_at_end(current_bbl) + + return ptr_casted + + def update_cache(self, name, value): + "Add 'name' = 'value' to the cache iff main_stream = True" + + if self.main_stream is True: + self.expr_cache[name] = value + + def set_ret(self, var): + "Cast @var and return it at the end of current bbl" + if var.type.width < 64: + var_casted = self.builder.zext(var, LLVMType.IntType(64)) + else: + var_casted = var + self.builder.ret(var_casted) + + def get_basic_block_by_loc_key(self, loc_key): + "Return the bbl corresponding to label, None otherwise" + return self.name2bbl.get( + self.llvm_context.canonize_label_name(loc_key), + None + ) + + def global_constant(self, name, value): + """ + Inspired from numba/cgutils.py + + Get or create a (LLVM module-)global constant with *name* or *value*. + """ + if name in self.mod.globals: + return self.mod.globals[name] + data = llvm_ir.GlobalVariable(self.mod, value.type, name=name) + data.global_constant = True + data.initializer = value + return data + + def make_bytearray(self, buf): + """ + Inspired from numba/cgutils.py + + Make a byte array constant from *buf*. + """ + b = bytearray(buf) + n = len(b) + return llvm_ir.Constant(llvm_ir.ArrayType(llvm_ir.IntType(8), n), b) + + def printf(self, format, *args): + """ + Inspired from numba/cgutils.py + + Calls printf(). + Argument `format` is expected to be a Python string. + Values to be printed are listed in `args`. + + Note: There is no checking to ensure there is correct number of values + in `args` and there type matches the declaration in the format string. + """ + assert isinstance(format, str) + mod = self.mod + # Make global constant for format string + cstring = llvm_ir.IntType(8).as_pointer() + fmt_bytes = self.make_bytearray((format + '\00').encode('ascii')) + + base_name = "printf_format" + count = 0 + while "%s_%d" % (base_name, count) in self.mod.globals: + count += 1 + global_fmt = self.global_constant( + "%s_%d" % (base_name, count), + fmt_bytes + ) + fnty = llvm_ir.FunctionType( + llvm_ir.IntType(32), + [cstring], + var_arg=True + ) + # Insert printf() + fn = mod.globals.get('printf', None) + if fn is None: + fn = llvm_ir.Function(mod, fnty, name="printf") + # Call + ptr_fmt = self.builder.bitcast(global_fmt, cstring) + return self.builder.call(fn, [ptr_fmt] + list(args)) + + # Effective constructors + + def assign(self, src, dst): + "Assign from LLVM src to M2 dst" + + # Destination + builder = self.builder + + if isinstance(dst, ExprId): + ptr_casted = self.get_ptr_by_expr(dst) + builder.store(src, ptr_casted) + + elif isinstance(dst, ExprMem): + addr = self.add_ir(dst.ptr) + self.llvm_context.memory_write(self, addr, dst.size, src) + else: + raise Exception("UnknownAssignmentType") + + def init_fc(self): + "Init the function" + + # Build type for fc signature + fc_type = llvm_ir.FunctionType( + self.ret_type, + [k[1] for k in self.my_args] + ) + + # Add fc in module + try: + fc = llvm_ir.Function(self.mod, fc_type, name=self.name) + except llvm.LLVMException: + # Overwrite the previous function + previous_fc = self.mod.get_global(self.name) + previous_fc.delete() + fc = self.mod.add_function(fc_type, self.name) + + # Name args + for i, a in enumerate(self.my_args): + fc.args[i].name = a[2] + + # Initialize local variable pool + self.local_vars = {} + self.local_vars_pointers = {} + for i, a in enumerate(self.my_args): + self.local_vars[a[2]] = fc.args[i] + + # Init cache + self.expr_cache = {} + self.main_stream = True + self.name2bbl = {} + + # Function link + self.fc = fc + + # Add a first BasicBlock + self.entry_bbl = self.append_basic_block("entry") + + # Instruction builder + self.builder = llvm_ir.IRBuilder(self.entry_bbl) + + def add_ir(self, expr): + "Add a Miasm2 IR to the last bbl. Return the var created" + + if self.main_stream is True and expr in self.expr_cache: + return self.expr_cache[expr] + + builder = self.builder + + if isinstance(expr, ExprInt): + ret = llvm_ir.Constant(LLVMType.IntType(expr.size), int(expr.arg)) + self.update_cache(expr, ret) + return ret + + if expr.is_loc(): + offset = self.llvm_context.ir_arch.loc_db.get_location_offset( + expr.loc_key + ) + ret = llvm_ir.Constant(LLVMType.IntType(expr.size), offset) + self.update_cache(expr, ret) + return ret + + if isinstance(expr, ExprId): + name = expr.name + try: + # If expr.name is already known (args) + return self.local_vars[name] + except KeyError: + pass + + ptr_casted = self.get_ptr_by_expr(expr) + + var = builder.load(ptr_casted, name) + self.update_cache(expr, var) + return var + + if isinstance(expr, ExprOp): + op = expr.op + + if (op in self.op_translate or + op in self.op_translate_with_size or + op in self.op_translate_with_suffix_size): + args = [self.add_ir(arg) for arg in expr.args] + arg_size = expr.args[0].size + + if op in self.op_translate_with_size: + fc_name = self.op_translate_with_size[op] + arg_size_cst = llvm_ir.Constant(LLVMType.IntType(64), + arg_size) + args = [arg_size_cst] + args + elif op in self.op_translate: + fc_name = self.op_translate[op] + elif op in self.op_translate_with_suffix_size: + fc_name = "%s_%s" % (self.op_translate[op], arg_size) + + fc_ptr = self.mod.get_global(fc_name) + + # Cast args if needed + casted_args = [] + for i, arg in enumerate(args): + if arg.type.width < fc_ptr.args[i].type.width: + casted_args.append( + builder.zext( + arg, + fc_ptr.args[i].type + ) + ) + else: + casted_args.append(arg) + ret = builder.call(fc_ptr, casted_args) + + # Cast ret if needed + ret_size = fc_ptr.return_value.type.width + if ret_size > expr.size: + ret = builder.trunc(ret, LLVMType.IntType(expr.size)) + + self.update_cache(expr, ret) + return ret + + if op == "-": + # Unsupported op '-' with more than 1 arg + assert len(expr.args) == 1 + zero = LLVMType.IntType(expr.size)(0) + ret = builder.sub(zero, self.add_ir(expr.args[0])) + self.update_cache(expr, ret) + return ret + + if op == "parity": + assert len(expr.args) == 1 + arg = self.add_ir(expr.args[0]) + truncated = builder.trunc(arg, LLVMType.IntType(8)) + bitcount = builder.call( + self.mod.get_global("llvm.ctpop.i8"), + [truncated] + ) + ret = builder.not_(builder.trunc(bitcount, LLVMType.IntType(1))) + self.update_cache(expr, ret) + return ret + + if op in ["cntleadzeros", "cnttrailzeros"]: + assert len(expr.args) == 1 + arg = self.add_ir(expr.args[0]) + func_name = { + "cntleadzeros": "ctlz", + "cnttrailzeros": "cttz", + }[op] + func_llvm_name = "llvm.%s.i%d" % (func_name, expr.size) + func_sig = { + func_llvm_name: { + "ret": LLVMType.IntType(expr.size), + "args": [LLVMType.IntType(expr.args[0].size)] + } + } + try: + self.mod.get_global(func_llvm_name) + except KeyError: + self.llvm_context.add_fc(func_sig, readonly=True) + ret = builder.call( + self.mod.get_global(func_llvm_name), + [arg] + ) + self.update_cache(expr, ret) + return ret + + + if op.startswith('zeroExt_'): + arg = expr.args[0] + if expr.size == arg.size: + return arg + new_expr = ExprCompose(arg, ExprInt(0, expr.size - arg.size)) + return self.add_ir(new_expr) + + if op.startswith("signExt_"): + arg = expr.args[0] + add_size = expr.size - arg.size + new_expr = ExprCompose( + arg, + ExprCond( + arg.msb(), + ExprInt(size2mask(add_size), add_size), + ExprInt(0, add_size) + ) + ) + return self.add_ir(new_expr) + + + if op == "segm": + fc_ptr = self.mod.get_global("segm2addr") + + # Cast args if needed + args = [self.add_ir(arg) for arg in expr.args] + casted_args = [] + for i, arg in enumerate(args, 1): + if arg.type.width < fc_ptr.args[i].type.width: + casted_args.append( + builder.zext( + arg, + fc_ptr.args[i].type + ) + ) + else: + casted_args.append(arg) + + ret = builder.call( + fc_ptr, + [self.local_vars["jitcpu"]] + casted_args + ) + if ret.type.width > expr.size: + ret = builder.trunc(ret, LLVMType.IntType(expr.size)) + self.update_cache(expr, ret) + return ret + + if op in ["smod", "sdiv", "umod", "udiv"]: + assert len(expr.args) == 2 + + arg_b = self.add_ir(expr.args[1]) + arg_a = self.add_ir(expr.args[0]) + + if op == "smod": + callback = builder.srem + elif op == "sdiv": + callback = builder.sdiv + elif op == "umod": + callback = builder.urem + elif op == "udiv": + callback = builder.udiv + + ret = callback(arg_a, arg_b) + self.update_cache(expr, ret) + return ret + + unsigned_cmps = { + "==": "==", + ">", "<<", "a>>"]: + assert len(expr.args) == 2 + # Undefined behavior must be enforced to 0 + count = self.add_ir(expr.args[1]) + value = self.add_ir(expr.args[0]) + itype = LLVMType.IntType(expr.size) + cond_ok = self.builder.icmp_unsigned( + "<", + count, + itype(expr.size) + ) + zero = itype(0) + if op == ">>": + callback = builder.lshr + elif op == "<<": + callback = builder.shl + elif op == "a>>": + callback = builder.ashr + # x a>> size is 0 or -1, depending on x sign + cond_neg = self.builder.icmp_signed("<", value, zero) + zero = self.builder.select(cond_neg, itype(-1), zero) + + ret = self.builder.select( + cond_ok, + callback(value, count), + zero + ) + self.update_cache(expr, ret) + return ret + + + if op in ['<<<', '>>>']: + assert len(expr.args) == 2 + # First compute rotation modulus size + count = self.add_ir(expr.args[1]) + value = self.add_ir(expr.args[0]) + itype = LLVMType.IntType(expr.size) + expr_size = itype(expr.size) + + # As shift of expr_size is undefined, we urem the shifters + shift = builder.urem(count, expr_size) + shift_inv = builder.urem( + builder.sub(expr_size, shift), + expr_size + ) + + if op == '<<<': + part_a = builder.shl(value, shift) + part_b = builder.lshr(value, shift_inv) + else: + part_a = builder.lshr(value, shift) + part_b = builder.shl(value, shift_inv) + ret = builder.or_(part_a, part_b) + self.update_cache(expr, ret) + return ret + + if op == "sint_to_fp": + fptype = LLVMType.fptype(expr.size) + arg = self.add_ir(expr.args[0]) + ret = builder.sitofp(arg, fptype) + ret = builder.bitcast(ret, llvm_ir.IntType(expr.size)) + self.update_cache(expr, ret) + return ret + + if op.startswith("fp_to_sint"): + size_arg = expr.args[0].size + fptype_orig = LLVMType.fptype(size_arg) + arg = self.add_ir(expr.args[0]) + arg = builder.bitcast(arg, fptype_orig) + # Enforce IEEE-754 behavior. This could be enhanced with + # 'llvm.experimental.constrained.nearbyint' + if size_arg == 32: + func = self.mod.get_global("llvm.nearbyint.f32") + elif size_arg == 64: + func = self.mod.get_global("llvm.nearbyint.f64") + else: + raise RuntimeError("Unsupported size") + rounded = builder.call(func, [arg]) + ret = builder.fptoui(rounded, llvm_ir.IntType(expr.size)) + self.update_cache(expr, ret) + return ret + + if op.startswith("fpconvert_fp"): + assert len(expr.args) == 1 + size_arg = expr.args[0].size + fptype = LLVMType.fptype(expr.size) + fptype_orig = LLVMType.fptype(size_arg) + arg = self.add_ir(expr.args[0]) + arg = builder.bitcast(arg, fptype_orig) + if expr.size > size_arg: + fc = builder.fpext + elif expr.size < size_arg: + fc = builder.fptrunc + else: + raise RuntimeError("Not supported, same size") + ret = fc(arg, fptype) + ret = builder.bitcast(ret, llvm_ir.IntType(expr.size)) + self.update_cache(expr, ret) + return ret + + if op.startswith("fpround_"): + assert len(expr.args) == 1 + fptype = LLVMType.fptype(expr.size) + arg = self.add_ir(expr.args[0]) + arg = builder.bitcast(arg, fptype) + if op == "fpround_towardszero" and expr.size == 32: + fc = self.mod.get_global("llvm.trunc.f32") + else: + raise RuntimeError("Not supported, same size") + rounded = builder.call(fc, [arg]) + ret = builder.bitcast(rounded, llvm_ir.IntType(expr.size)) + self.update_cache(expr, ret) + return ret + + if op in ["fcom_c0", "fcom_c1", "fcom_c2", "fcom_c3"]: + arg1 = self.add_ir(expr.args[0]) + arg2 = self.add_ir(expr.args[0]) + fc_name = "fpu_%s" % op + fc_ptr = self.mod.get_global(fc_name) + casted_args = [ + builder.bitcast(arg1, llvm_ir.DoubleType()), + builder.bitcast(arg2, llvm_ir.DoubleType()), + ] + ret = builder.call(fc_ptr, casted_args) + + # Cast ret if needed + ret_size = fc_ptr.return_value.type.width + if ret_size > expr.size: + ret = builder.trunc(ret, LLVMType.IntType(expr.size)) + self.update_cache(expr, ret) + return ret + + if op in ["fsqrt", "fabs"]: + arg = self.add_ir(expr.args[0]) + if op == "fsqrt": + op = "sqrt" + + # Apply the correct func + if expr.size == 32: + arg = builder.bitcast(arg, llvm_ir.FloatType()) + ret = builder.call( + self.mod.get_global("llvm.%s.f32" % op), + [arg] + ) + elif expr.size == 64: + arg = builder.bitcast(arg, llvm_ir.DoubleType()) + ret = builder.call( + self.mod.get_global("llvm.%s.f64" % op), + [arg] + ) + else: + raise RuntimeError("Unsupported precision: %x", expr.size) + + ret = builder.bitcast(ret, llvm_ir.IntType(expr.size)) + self.update_cache(expr, ret) + return ret + + if op in ["fadd", "fmul", "fsub", "fdiv"]: + # More than 2 args not yet supported + assert len(expr.args) == 2 + arg1 = self.add_ir(expr.args[0]) + arg2 = self.add_ir(expr.args[1]) + precision = LLVMType.fptype(expr.size) + arg1 = builder.bitcast(arg1, precision) + arg2 = builder.bitcast(arg2, precision) + if op == "fadd": + ret = builder.fadd(arg1, arg2) + elif op == "fmul": + ret = builder.fmul(arg1, arg2) + elif op == "fsub": + ret = builder.fsub(arg1, arg2) + elif op == "fdiv": + ret = builder.fdiv(arg1, arg2) + ret = builder.bitcast(ret, llvm_ir.IntType(expr.size)) + self.update_cache(expr, ret) + return ret + + if op in [ + TOK_EQUAL, + TOK_INF_SIGNED, + TOK_INF_EQUAL_SIGNED, + TOK_INF_UNSIGNED, + TOK_INF_EQUAL_UNSIGNED, + ]: + if op == TOK_EQUAL: + opname = "==" + callback = builder.icmp_unsigned + elif op == TOK_INF_SIGNED: + opname = "<" + callback = builder.icmp_signed + elif op == TOK_INF_UNSIGNED: + opname = "<" + callback = builder.icmp_unsigned + elif op == TOK_INF_EQUAL_SIGNED: + opname = "<=" + callback = builder.icmp_signed + elif op == TOK_INF_EQUAL_UNSIGNED: + opname = "<" + callback = builder.icmp_unsigned + + left = self.add_ir(expr.args[0]) + right = self.add_ir(expr.args[1]) + + ret = callback(opname, left, right) + self.update_cache(expr, ret) + + return ret + + if len(expr.args) > 1: + + if op == "*": + callback = builder.mul + elif op == "+": + callback = builder.add + elif op == "&": + callback = builder.and_ + elif op == "^": + callback = builder.xor + elif op == "|": + callback = builder.or_ + elif op == "%": + callback = builder.urem + elif op == "/": + callback = builder.udiv + else: + raise NotImplementedError('Unknown op: %s' % op) + + last = self.add_ir(expr.args[0]) + + for i in range(1, len(expr.args)): + last = callback(last, + self.add_ir(expr.args[i])) + + self.update_cache(expr, last) + + return last + + raise NotImplementedError() + + if isinstance(expr, ExprMem): + + addr = self.add_ir(expr.ptr) + return self.llvm_context.memory_lookup(self, addr, expr.size) + + if isinstance(expr, ExprCond): + # Compute cond + cond = self.add_ir(expr.cond) + zero_casted = LLVMType.IntType(expr.cond.size)(0) + condition_bool = builder.icmp_unsigned("!=", cond, + zero_casted) + then_value = self.add_ir(expr.src1) + else_value = self.add_ir(expr.src2) + ret = builder.select(condition_bool, then_value, else_value) + + self.update_cache(expr, ret) + return ret + + if isinstance(expr, ExprSlice): + + src = self.add_ir(expr.arg) + + # Remove trailing bits + if expr.start != 0: + to_shr = llvm_ir.Constant( + LLVMType.IntType(expr.arg.size), + expr.start + ) + shred = builder.lshr(src, to_shr) + else: + shred = src + + # Remove leading bits + to_and = llvm_ir.Constant( + LLVMType.IntType(expr.arg.size), + (1 << (expr.stop - expr.start)) - 1 + ) + anded = builder.and_(shred, + to_and) + + # Cast into e.size + ret = builder.trunc( + anded, + LLVMType.IntType(expr.size) + ) + + self.update_cache(expr, ret) + return ret + + if isinstance(expr, ExprCompose): + + args = [] + + # Build each part + for start, src in expr.iter_args(): + # src & size + src = self.add_ir(src) + src_casted = builder.zext( + src, + LLVMType.IntType(expr.size) + ) + to_and = llvm_ir.Constant( + LLVMType.IntType(expr.size), + (1 << src.type.width) - 1 + ) + anded = builder.and_(src_casted, + to_and) + + if (start != 0): + # result << start + to_shl = llvm_ir.Constant( + LLVMType.IntType(expr.size), + start + ) + shled = builder.shl(anded, to_shl) + final = shled + else: + # Optimisation + final = anded + + args.append(final) + + # result = part1 | part2 | ... + last = args[0] + for i in range(1, len(expr.args)): + last = builder.or_(last, args[i]) + + self.update_cache(expr, last) + return last + + raise Exception("UnkownExpression", expr.__class__.__name__) + + # JiT specifics + + def check_memory_exception(self, offset, restricted_exception=False): + """Add a check for memory errors. + @offset: offset of the current exception (int or Instruction) + If restricted_exception, check only for exception which do not + require a pc update, and do not consider automod exception""" + + # VmMngr "get_exception_flag" return's size + size = 64 + t_size = LLVMType.IntType(size) + + # Get exception flag value + # TODO: avoid costly call using a structure deref + builder = self.builder + fc_ptr = self.mod.get_global("get_exception_flag") + exceptionflag = builder.call(fc_ptr, [self.local_vars["vmmngr"]]) + + if restricted_exception is True: + flag = ~m2_csts.EXCEPT_CODE_AUTOMOD & m2_csts.EXCEPT_DO_NOT_UPDATE_PC + m2_flag = llvm_ir.Constant(t_size, flag) + exceptionflag = builder.and_(exceptionflag, m2_flag) + + # Compute cond + zero_casted = llvm_ir.Constant(t_size, 0) + condition_bool = builder.icmp_unsigned( + "!=", + exceptionflag, + zero_casted + ) + + # Create bbls + branch_id = self.new_branch_name() + then_block = self.append_basic_block('then%s' % branch_id) + merge_block = self.append_basic_block('ifcond%s' % branch_id) + + builder.cbranch(condition_bool, then_block, merge_block) + + # Deactivate object caching + current_main_stream = self.main_stream + self.main_stream = False + + # Then Bloc + builder.position_at_end(then_block) + PC = self.llvm_context.PC + if isinstance(offset, int_types): + offset = self.add_ir(ExprInt(offset, PC.size)) + self.assign(offset, PC) + self.assign(self.add_ir(ExprInt(1, 8)), ExprId("status", 32)) + self.set_ret(offset) + + builder.position_at_end(merge_block) + # Reactivate object caching + self.main_stream = current_main_stream + + def check_cpu_exception(self, offset, restricted_exception=False): + """Add a check for CPU errors. + @offset: offset of the current exception (int or Instruction) + If restricted_exception, check only for exception which do not + require a pc update""" + + # Get exception flag value + builder = self.builder + m2_exception_flag = self.llvm_context.ir_arch.arch.regs.exception_flags + t_size = LLVMType.IntType(m2_exception_flag.size) + exceptionflag = self.add_ir(m2_exception_flag) + + # Compute cond + if restricted_exception is True: + flag = m2_csts.EXCEPT_NUM_UPDT_EIP + condition_bool = builder.icmp_unsigned( + ">", + exceptionflag, + llvm_ir.Constant(t_size, flag) + ) + else: + zero_casted = llvm_ir.Constant(t_size, 0) + condition_bool = builder.icmp_unsigned( + "!=", + exceptionflag, + zero_casted + ) + + # Create bbls + branch_id = self.new_branch_name() + then_block = self.append_basic_block('then%s' % branch_id) + merge_block = self.append_basic_block('ifcond%s' % branch_id) + + builder.cbranch(condition_bool, then_block, merge_block) + + # Deactivate object caching + current_main_stream = self.main_stream + self.main_stream = False + + # Then Bloc + builder.position_at_end(then_block) + PC = self.llvm_context.PC + if isinstance(offset, int_types): + offset = self.add_ir(ExprInt(offset, PC.size)) + self.assign(offset, PC) + self.assign(self.add_ir(ExprInt(1, 8)), ExprId("status", 32)) + self.set_ret(offset) + + builder.position_at_end(merge_block) + # Reactivate object caching + self.main_stream = current_main_stream + + def gen_pre_code(self, instr_attrib): + if instr_attrib.log_mn: + loc_db = self.llvm_context.ir_arch.loc_db + self.printf( + "%.8X %s\n" % ( + instr_attrib.instr.offset, + instr_attrib.instr.to_string(loc_db) + ) + ) + + def gen_post_code(self, attributes, pc_value): + if attributes.log_regs: + # Update PC for dump_gpregs + PC = self.llvm_context.PC + t_size = LLVMType.IntType(PC.size) + dst = self.builder.zext(t_size(pc_value), t_size) + self.assign(dst, PC) + + fc_ptr = self.mod.get_global(self.llvm_context.logging_func) + self.builder.call(fc_ptr, [self.local_vars["vmcpu"]]) + + def gen_post_instr_checks(self, attrib, next_instr): + if attrib.mem_read | attrib.mem_write: + fc_ptr = self.mod.get_global("check_memory_breakpoint") + self.builder.call(fc_ptr, [self.local_vars["vmmngr"]]) + fc_ptr = self.mod.get_global("check_invalid_code_blocs") + self.builder.call(fc_ptr, [self.local_vars["vmmngr"]]) + self.check_memory_exception(next_instr, restricted_exception=False) + + if attrib.set_exception: + self.check_cpu_exception(next_instr, restricted_exception=False) + + if attrib.mem_read | attrib.mem_write: + fc_ptr = self.mod.get_global("reset_memory_access") + self.builder.call(fc_ptr, [self.local_vars["vmmngr"]]) + + def expr2cases(self, expr): + """ + Evaluate @expr and return: + - switch value -> dst + - evaluation of the switch value (if any) + """ + + to_eval = expr + dst2case = {} + case2dst = {} + for i, solution in enumerate(possible_values(expr)): + value = solution.value + index = dst2case.get(value, i) + to_eval = to_eval.replace_expr({value: ExprInt(index, value.size)}) + dst2case[value] = index + if value.is_int() or value.is_loc(): + case2dst[i] = value + else: + case2dst[i] = self.add_ir(value) + + + evaluated = self.add_ir(to_eval) + return case2dst, evaluated + + def gen_jump2dst(self, attrib, instr_offsets, dst): + """Generate the code for a jump to @dst with final check for error + + Several cases have to be considered: + - jump to an offset out of the current ASM BBL (JMP 0x11223344) + - jump to an offset inside the current ASM BBL (Go to next instruction) + - jump to an offset back in the current ASM BBL (For max_exec jit + option on self loops) + - jump to a generated IR label, which must be jitted in this same + function (REP MOVSB) + - jump to a computed offset (CALL @32[0x11223344]) + + """ + PC = self.llvm_context.PC + # We are no longer in the main stream, deactivate cache + self.main_stream = False + + offset = None + if isinstance(dst, ExprInt): + offset = int(dst) + loc_key = self.llvm_context.ir_arch.loc_db.get_or_create_offset_location(offset) + dst = ExprLoc(loc_key, dst.size) + + if isinstance(dst, ExprLoc): + loc_key = dst.loc_key + bbl = self.get_basic_block_by_loc_key(loc_key) + offset = self.llvm_context.ir_arch.loc_db.get_location_offset(loc_key) + if bbl is not None: + # "local" jump, inside this function + if offset is None: + # Avoid checks on generated label + self.builder.branch(bbl) + return + + if (offset in instr_offsets and + offset > attrib.instr.offset): + # forward local jump (ie. next instruction) + self.gen_post_code(attrib, offset) + self.gen_post_instr_checks(attrib, offset) + self.builder.branch(bbl) + return + + # reaching this point means a backward local jump, promote it to + # extern + + # "extern" jump on a defined offset, return to the caller + dst = self.add_ir(ExprInt(offset, PC.size)) + + # "extern" jump with a computed value, return to the caller + assert isinstance(dst, (llvm_ir.Instruction, llvm_ir.Value)) + # Cast @dst, if needed + # for instance, x86_32: IRDst is 32 bits, so is @dst; PC is 64 bits + if dst.type.width != PC.size: + dst = self.builder.zext(dst, LLVMType.IntType(PC.size)) + + self.gen_post_code(attrib, offset) + self.assign(dst, PC) + self.gen_post_instr_checks(attrib, dst) + self.assign(self.add_ir(ExprInt(0, 8)), ExprId("status", 32)) + self.set_ret(dst) + + + def gen_irblock(self, instr_attrib, attributes, instr_offsets, irblock): + """ + Generate the code for an @irblock + @instr_attrib: an Attributes instance or the instruction to translate + @attributes: list of Attributes corresponding to irblock assignments + @instr_offsets: offset of all asmblock's instructions + @irblock: an irblock instance + """ + + case2dst = None + case_value = None + instr = instr_attrib.instr + + for index, assignblk in enumerate(irblock): + # Enable cache + self.main_stream = True + self.expr_cache = {} + + # Prefetch memory + for element in assignblk.get_r(mem_read=True): + if isinstance(element, ExprMem): + self.add_ir(element) + + # Evaluate expressions + values = {} + for dst, src in viewitems(assignblk): + if dst == self.llvm_context.ir_arch.IRDst: + case2dst, case_value = self.expr2cases(src) + else: + values[dst] = self.add_ir(src) + + # Check memory access exception + if attributes[index].mem_read: + self.check_memory_exception( + instr.offset, + restricted_exception=True + ) + + # Update the memory + for dst, src in viewitems(values): + if isinstance(dst, ExprMem): + self.assign(src, dst) + + # Check memory write exception + if attributes[index].mem_write: + self.check_memory_exception( + instr.offset, + restricted_exception=True + ) + + # Update registers values + for dst, src in viewitems(values): + if not isinstance(dst, ExprMem): + self.assign(src, dst) + + # Check post assignblk exception flags + if attributes[index].set_exception: + self.check_cpu_exception( + instr.offset, + restricted_exception=True + ) + + # Destination + assert case2dst is not None + if len(case2dst) == 1: + # Avoid switch in this common case + self.gen_jump2dst( + instr_attrib, + instr_offsets, + next(iter(viewvalues(case2dst))) + ) + else: + current_bbl = self.builder.basic_block + + # Gen the out cases + branch_id = self.new_branch_name() + case2bbl = {} + for case, dst in list(viewitems(case2dst)): + name = "switch_%s_%d" % (branch_id, case) + bbl = self.append_basic_block(name) + case2bbl[case] = bbl + self.builder.position_at_start(bbl) + self.gen_jump2dst(instr_attrib, instr_offsets, dst) + + # Jump on the correct output + self.builder.position_at_end(current_bbl) + switch = self.builder.switch(case_value, case2bbl[0]) + for i, bbl in viewitems(case2bbl): + if i == 0: + # Default case is case 0, arbitrary + continue + switch.add_case(i, bbl) + + def gen_bad_block(self, asmblock): + """ + Translate an asm_bad_block into a CPU exception + """ + builder = self.builder + m2_exception_flag = self.llvm_context.ir_arch.arch.regs.exception_flags + t_size = LLVMType.IntType(m2_exception_flag.size) + self.assign( + self.add_ir(ExprInt(1, 8)), + ExprId("status", 32) + ) + self.assign( + t_size(m2_csts.EXCEPT_UNK_MNEMO), + m2_exception_flag + ) + offset = self.llvm_context.ir_arch.loc_db.get_location_offset( + asmblock.loc_key + ) + self.set_ret(LLVMType.IntType(64)(offset)) + + def gen_finalize(self, asmblock, codegen): + """ + In case of delayslot, generate a dummy BBL which return on the computed + IRDst or on next_label + """ + if self.llvm_context.has_delayslot: + next_label = codegen.get_block_post_label(asmblock) + builder = self.builder + + builder.position_at_end(self.get_basic_block_by_loc_key(next_label)) + + # Common code + self.assign(self.add_ir(ExprInt(0, 8)), + ExprId("status", 32)) + + # Check if IRDst has been set + zero_casted = LLVMType.IntType(codegen.delay_slot_set.size)(0) + condition_bool = builder.icmp_unsigned( + "!=", + self.add_ir(codegen.delay_slot_set), + zero_casted + ) + + # Create bbls + branch_id = self.new_branch_name() + then_block = self.append_basic_block('then%s' % branch_id) + else_block = self.append_basic_block('else%s' % branch_id) + + builder.cbranch(condition_bool, then_block, else_block) + + # Deactivate object caching + self.main_stream = False + + # Then Block + builder.position_at_end(then_block) + PC = self.llvm_context.PC + to_ret = self.add_ir(codegen.delay_slot_dst) + self.assign(to_ret, PC) + self.assign(self.add_ir(ExprInt(0, 8)), + ExprId("status", 32)) + self.set_ret(to_ret) + + # Else Block + builder.position_at_end(else_block) + PC = self.llvm_context.PC + next_label_offset = self.llvm_context.ir_arch.loc_db.get_location_offset(next_label) + to_ret = LLVMType.IntType(PC.size)(next_label_offset) + self.assign(to_ret, PC) + self.set_ret(to_ret) + + def from_asmblock(self, asmblock): + """Build the function from an asmblock (asm_block instance). + Prototype : f(i8* jitcpu, i8* vmcpu, i8* vmmngr, i8* status)""" + + # Build function signature + self.my_args.append((ExprId("jitcpu", 32), + llvm_ir.PointerType(LLVMType.IntType(8)), + "jitcpu")) + self.my_args.append((ExprId("vmcpu", 32), + llvm_ir.PointerType(LLVMType.IntType(8)), + "vmcpu")) + self.my_args.append((ExprId("vmmngr", 32), + llvm_ir.PointerType(LLVMType.IntType(8)), + "vmmngr")) + self.my_args.append((ExprId("status", 32), + llvm_ir.PointerType(LLVMType.IntType(8)), + "status")) + ret_size = 64 + + self.ret_type = LLVMType.IntType(ret_size) + + # Initialise the function + self.init_fc() + self.local_vars_pointers["status"] = self.local_vars["status"] + + if isinstance(asmblock, m2_asmblock.AsmBlockBad): + self.gen_bad_block(asmblock) + return + + # Create basic blocks (for label branchs) + entry_bbl, builder = self.entry_bbl, self.builder + for instr in asmblock.lines: + lbl = self.llvm_context.ir_arch.loc_db.get_or_create_offset_location(instr.offset) + self.append_basic_block(lbl) + + # TODO: merge duplicate code with CGen + codegen = self.llvm_context.cgen_class(self.llvm_context.ir_arch) + irblocks_list = codegen.block2assignblks(asmblock) + instr_offsets = [line.offset for line in asmblock.lines] + + # Prepare for delayslot + if self.llvm_context.has_delayslot: + for element in (codegen.delay_slot_dst, codegen.delay_slot_set): + eltype = LLVMType.IntType(element.size) + ptr = self.CreateEntryBlockAlloca( + eltype, + default_value=eltype(0) + ) + self.local_vars_pointers[element.name] = ptr + loc_key = codegen.get_block_post_label(asmblock) + offset = self.llvm_context.ir_arch.loc_db.get_location_offset(loc_key) + instr_offsets.append(offset) + self.append_basic_block(loc_key) + + # Add content + builder.position_at_end(entry_bbl) + + + for instr, irblocks in zip(asmblock.lines, irblocks_list): + instr_attrib, irblocks_attributes = codegen.get_attributes( + instr, + irblocks, + self.log_mn, + self.log_regs + ) + + # Pre-create basic blocks + for irblock in irblocks: + self.append_basic_block(irblock.loc_key, overwrite=False) + + # Generate the corresponding code + for index, irblock in enumerate(irblocks): + new_irblock = self.llvm_context.ir_arch.irbloc_fix_regs_for_mode( + irblock, self.llvm_context.ir_arch.attrib) + + # Set the builder at the beginning of the correct bbl + self.builder.position_at_end(self.get_basic_block_by_loc_key(new_irblock.loc_key)) + + if index == 0: + self.gen_pre_code(instr_attrib) + self.gen_irblock(instr_attrib, irblocks_attributes[index], instr_offsets, new_irblock) + + # Gen finalize (see codegen::CGen) is unrecheable, except with delayslot + self.gen_finalize(asmblock, codegen) + + # Branch entry_bbl on first label + builder.position_at_end(entry_bbl) + first_label_bbl = self.get_basic_block_by_loc_key(asmblock.loc_key) + builder.branch(first_label_bbl) + + + # LLVMFunction manipulation + + def __str__(self): + "Print the llvm IR corresponding to the current module" + return str(self.mod) + + def dot(self): + "Return the CFG of the current function" + return llvm.get_function_cfg(self.fc) + + def as_llvm_mod(self): + """Return a ModuleRef standing for the current function""" + if self._llvm_mod is None: + self._llvm_mod = llvm.parse_assembly(str(self.mod)) + return self._llvm_mod + + def verify(self): + "Verify the module syntax" + return self.as_llvm_mod().verify() + + def get_bytecode(self): + "Return LLVM bitcode corresponding to the current module" + return self.as_llvm_mod().as_bitcode() + + def get_assembly(self): + "Return native assembly corresponding to the current module" + return self.llvm_context.target_machine.emit_assembly(self.as_llvm_mod()) + + def optimise(self): + "Optimise the function in place" + return self.llvm_context.pass_manager.run(self.as_llvm_mod()) + + def __call__(self, *args): + "Eval the function with arguments args" + + e = self.llvm_context.get_execengine() + + genargs = [LLVMType.generic(a) for a in args] + ret = e.run_function(self.fc, genargs) + + return ret.as_int() + + def get_function_pointer(self): + "Return a pointer on the Jitted function" + engine = self.llvm_context.get_execengine() + + # Add the module and make sure it is ready for execution + engine.add_module(self.as_llvm_mod()) + engine.finalize_object() + + return engine.get_function_address(self.fc.name) + + +class LLVMFunction_IRCompilation(LLVMFunction): + """LLVMFunction made for IR export, in conjunction with + LLVMContext_IRCompilation. + + This class offers only the basics, and decision must be made by the class + user on how actual registers, ABI, etc. are reflected + + + Example of use: + >>> context = LLVMContext_IRCompilation() + >>> context.ir_arch = ir + >>> + >>> func = LLVMFunction_IRCompilation(context, name="test") + >>> func.ret_type = llvm_ir.VoidType() + >>> func.init_fc() + >>> + >>> # Insert here function additional inits + >>> XX = func.builder.alloca(...) + >>> func.local_vars_pointers["EAX"] = XX + >>> # + >>> + >>> func.from_ircfg(ircfg) + """ + + def init_fc(self): + super(LLVMFunction_IRCompilation, self).init_fc() + + # Create a global IRDst if not any + IRDst = self.llvm_context.ir_arch.IRDst + if str(IRDst) not in self.mod.globals: + llvm_ir.GlobalVariable(self.mod, LLVMType.IntType(IRDst.size), + name=str(IRDst)) + + # Create an 'exit' basic block, the final leave + self.exit_bbl = self.append_basic_block("exit") + + def gen_jump2dst(self, _attrib, _instr_offsets, dst): + self.main_stream = False + + if isinstance(dst, Expr): + if dst.is_int(): + loc = self.llvm_context.ir_arch.loc_db.getby_offset_create(int(dst)) + dst = ExprLoc(loc, dst.size) + assert dst.is_loc() + bbl = self.get_basic_block_by_loc_key(dst.loc_key) + if bbl is not None: + # "local" jump, inside this function + self.builder.branch(bbl) + return + + # extern jump + dst = self.add_ir(dst) + + # Emulate indirect jump with: + # @IRDst = dst + # goto exit + self.builder.store(dst, self.mod.get_global("IRDst")) + self.builder.branch(self.exit_bbl) + + def gen_irblock(self, irblock): + instr_attrib = Attributes() + attributes = [Attributes() for _ in range(len(irblock.assignblks))] + instr_offsets = None + return super(LLVMFunction_IRCompilation, self).gen_irblock( + instr_attrib, attributes, instr_offsets, irblock + ) + + def from_ircfg(self, ircfg, append_ret=True): + # Create basic blocks + for loc_key, irblock in viewitems(ircfg.blocks): + self.append_basic_block(loc_key) + + # Add IRBlocks + for label, irblock in viewitems(ircfg.blocks): + self.builder.position_at_end(self.get_basic_block_by_loc_key(label)) + self.gen_irblock(irblock) + + # Branch the entry BBL on the IRCFG head + self.builder.position_at_end(self.entry_bbl) + heads = ircfg.heads() + assert len(heads) == 1 + starting_label = list(heads).pop() + self.builder.branch(self.get_basic_block_by_loc_key(starting_label)) + + # Returns with the builder on the exit block + self.builder.position_at_end(self.exit_bbl) + + if append_ret: + self.builder.ret_void() diff --git a/miasm/jitter/loader/__init__.py b/miasm/jitter/loader/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/miasm/jitter/loader/elf.py b/miasm/jitter/loader/elf.py new file mode 100644 index 00000000..b36638f3 --- /dev/null +++ b/miasm/jitter/loader/elf.py @@ -0,0 +1,337 @@ +import struct +from collections import defaultdict + +from future.utils import viewitems + +from elfesteem import cstruct +from elfesteem import * +import elfesteem.elf as elf_csts + +from miasm.jitter.csts import * +from miasm.jitter.loader.utils import canon_libname_libfunc, libimp +from miasm.core.interval import interval + +import logging + +log = logging.getLogger('loader_elf') +hnd = logging.StreamHandler() +hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) +log.addHandler(hnd) +log.setLevel(logging.CRITICAL) + + +def get_import_address_elf(e): + import2addr = defaultdict(set) + for sh in e.sh: + if not hasattr(sh, 'rel'): + continue + for k, v in viewitems(sh.rel): + import2addr[('xxx', k)].add(v.offset) + return import2addr + + +def preload_elf(vm, e, runtime_lib, patch_vm_imp=True, loc_db=None): + # XXX quick hack + fa = get_import_address_elf(e) + dyn_funcs = {} + for (libname, libfunc), ads in viewitems(fa): + # Quick hack - if a symbol is already known, do not stub it + if loc_db and loc_db.get_name_location(libfunc) is not None: + continue + for ad in ads: + ad_base_lib = runtime_lib.lib_get_add_base(libname) + ad_libfunc = runtime_lib.lib_get_add_func(ad_base_lib, libfunc, ad) + + libname_s = canon_libname_libfunc(libname, libfunc) + dyn_funcs[libname_s] = ad_libfunc + if patch_vm_imp: + log.debug('patch 0x%x 0x%x %s', ad, ad_libfunc, libfunc) + set_endianness = { elf_csts.ELFDATA2MSB: ">", + elf_csts.ELFDATA2LSB: "<", + elf_csts.ELFDATANONE: "" }[e.sex] + vm.set_mem(ad, + struct.pack(set_endianness + + cstruct.size2type[e.size], + ad_libfunc)) + return runtime_lib, dyn_funcs + +def fill_loc_db_with_symbols(elf, loc_db, base_addr=0): + """Parse the elfesteem's ELF @elf to extract symbols, and fill the LocationDB + instance @loc_db with parsed symbols. + + The ELF is considered mapped at @base_addr + @elf: elfesteem's ELF instance + @loc_db: LocationDB used to retrieve symbols'offset + @base_addr: addr to reloc to (if any) + """ + # Get symbol sections + symbol_sections = [] + for section_header in elf.sh: + if hasattr(section_header, 'symbols'): + for name, sym in viewitems(section_header.symbols): + if not name or sym.value == 0: + continue + name = loc_db.find_free_name(name) + loc_db.add_location(name, sym.value, strict=False) + + if hasattr(section_header, 'reltab'): + for rel in section_header.reltab: + if not rel.sym or rel.offset == 0: + continue + name = loc_db.find_free_name(rel.sym) + loc_db.add_location(name, rel.offset, strict=False) + + if hasattr(section_header, 'symtab'): + log.debug("Find %d symbols in %r", len(section_header.symtab), + section_header) + symbol_sections.append(section_header) + elif isinstance(section_header, ( + elf_init.GNUVerDef, elf_init.GNUVerSym, elf_init.GNUVerNeed + )): + log.debug("Find GNU version related section, unsupported for now") + + for section in symbol_sections: + for symbol_entry in section.symtab: + # Here, the computation of vaddr assumes 'elf' is an executable or a + # shared object file + + # For relocatable file, symbol_entry.value is an offset from the section + # base -> not handled here + st_bind = symbol_entry.info >> 4 + st_type = symbol_entry.info & 0xF + + if st_type not in [ + elf_csts.STT_NOTYPE, + elf_csts.STT_OBJECT, + elf_csts.STT_FUNC, + elf_csts.STT_COMMON, + elf_csts.STT_GNU_IFUNC, + ]: + # Ignore symbols useless in linking + continue + + if st_bind == elf_csts.STB_GLOBAL: + # Global symbol + weak = False + elif st_bind == elf_csts.STB_WEAK: + # Weak symbol + weak = True + else: + # Ignore local & others symbols + continue + + absolute = False + if symbol_entry.shndx == 0: + # SHN_UNDEF + continue + elif symbol_entry.shndx == 0xfff1: + # SHN_ABS + absolute = True + log.debug("Absolute symbol %r - %x", symbol_entry.name, + symbol_entry.value) + elif 0xff00 <= symbol_entry.shndx <= 0xffff: + # Reserved index (between SHN_LORESERV and SHN_HIRESERVE) + raise RuntimeError("Unsupported reserved index: %r" % symbol_entry) + + name = symbol_entry.name + if name == "": + # Ignore empty symbol + log.debug("Empty symbol %r", symbol_entry) + continue + + if absolute: + vaddr = symbol_entry.value + else: + vaddr = symbol_entry.value + base_addr + + # 'weak' information is only used to force global symbols for now + already_existing_loc = loc_db.get_name_location(name) + if already_existing_loc is not None: + if weak: + # Weak symbol, this is ok to already exists, skip it + continue + else: + # Global symbol, force it + loc_db.remove_location_name(already_existing_loc, + name) + already_existing_off = loc_db.get_offset_location(vaddr) + if already_existing_off is not None: + loc_db.add_location_name(already_existing_off, name) + else: + loc_db.add_location(name=name, offset=vaddr) + + +def apply_reloc_x86(elf, vm, section, base_addr, loc_db): + """Apply relocation for x86 ELF contained in the section @section + @elf: elfesteem's ELF instance + @vm: VmMngr instance + @section: elf's section containing relocation to perform + @base_addr: addr to reloc to + @loc_db: LocationDB used to retrieve symbols'offset + """ + if elf.size == 64: + addr_writer = lambda vaddr, addr: vm.set_mem(vaddr, + struct.pack("> 32) & 0xFFFFFFFF + r_info_type = r_info & 0xFFFFFFFF + elif elf.size == 32: + r_info_sym = (r_info >> 8) & 0xFFFFFF + r_info_type = r_info & 0xFF + + is_ifunc = False + symbol_entry = None + if r_info_sym > 0: + symbol_entry = symb_section.symtab[r_info_sym] + + r_offset = reloc.offset + r_addend = reloc.cstr.sym + + if (elf.size, reloc.type) in [ + (64, elf_csts.R_X86_64_RELATIVE), + (64, elf_csts.R_X86_64_IRELATIVE), + (32, elf_csts.R_386_RELATIVE), + (32, elf_csts.R_386_IRELATIVE), + ]: + # B + A + addr = base_addr + r_addend + where = base_addr + r_offset + elif reloc.type == elf_csts.R_X86_64_64: + # S + A + addr_symb = loc_db.get_name_offset(symbol_entry.name) + if addr_symb is None: + log.warning("Unable to find symbol %r" % symbol_entry.name) + continue + addr = addr_symb + r_addend + where = base_addr + r_offset + elif (elf.size, reloc.type) in [ + (64, elf_csts.R_X86_64_TPOFF64), + (64, elf_csts.R_X86_64_DTPMOD64), + (32, elf_csts.R_386_TLS_TPOFF), + ]: + # Thread dependent, ignore for now + log.debug("Skip relocation TPOFF64 %r", reloc) + continue + elif (elf.size, reloc.type) in [ + (64, elf_csts.R_X86_64_GLOB_DAT), + (64, elf_csts.R_X86_64_JUMP_SLOT), + (32, elf_csts.R_386_JMP_SLOT), + (32, elf_csts.R_386_GLOB_DAT), + ]: + # S + addr = loc_db.get_name_offset(symbol_entry.name) + if addr is None: + log.warning("Unable to find symbol %r" % symbol_entry.name) + continue + is_ifunc = symbol_entry.info & 0xF == elf_csts.STT_GNU_IFUNC + where = base_addr + r_offset + else: + raise ValueError( + "Unknown relocation type: %d (%r)" % (reloc.type, + reloc) + ) + if is_ifunc: + # Resolve at runtime - not implemented for now + log.warning("Relocation for %r (at %x, currently pointing on %x) " + "has to be resolved at runtime", + name, where, sym_addr) + continue + + log.debug("Write %x at %x", addr, where) + addr_writer(where, addr) + + +def vm_load_elf(vm, fdata, name="", base_addr=0, loc_db=None, apply_reloc=False, + **kargs): + """ + Very dirty elf loader + TODO XXX: implement real loader + """ + elf = elf_init.ELF(fdata, **kargs) + i = interval() + all_data = {} + + for p in elf.ph.phlist: + if p.ph.type != elf_csts.PT_LOAD: + continue + log.debug( + '0x%x 0x%x 0x%x 0x%x 0x%x', p.ph.vaddr, p.ph.memsz, p.ph.offset, + p.ph.filesz, p.ph.type) + data_o = elf._content[p.ph.offset:p.ph.offset + p.ph.filesz] + addr_o = p.ph.vaddr + base_addr + a_addr = addr_o & ~0xFFF + b_addr = addr_o + max(p.ph.memsz, p.ph.filesz) + b_addr = (b_addr + 0xFFF) & ~0xFFF + all_data[addr_o] = data_o + # -2: Trick to avoid merging 2 consecutive pages + i += [(a_addr, b_addr - 2)] + for a, b in i.intervals: + vm.add_memory_page( + a, + PAGE_READ | PAGE_WRITE, + b"\x00" * (b + 2 - a), + repr(name) + ) + + for r_vaddr, data in viewitems(all_data): + vm.set_mem(r_vaddr, data) + + if loc_db is not None: + fill_loc_db_with_symbols(elf, loc_db, base_addr) + + if apply_reloc: + arch = guess_arch(elf) + sections = [] + for section in elf.sh: + if not hasattr(section, 'reltab'): + continue + if isinstance(section, elf_init.RelATable): + pass + elif isinstance(section, elf_init.RelTable): + if arch == "x86_64": + log.warning("REL section should not happen in x86_64") + else: + raise RuntimeError("Unknown relocation section type: %r" % section) + sections.append(section) + for section in sections: + if arch in ["x86_64", "x86_32"]: + apply_reloc_x86(elf, vm, section, base_addr, loc_db) + else: + log.debug("Unsupported relocation for arch %r" % arch) + + return elf + + +class libimp_elf(libimp): + pass + + +# machine, size, sex -> arch_name +ELF_machine = {(elf_csts.EM_ARM, 32, elf_csts.ELFDATA2LSB): "arml", + (elf_csts.EM_ARM, 32, elf_csts.ELFDATA2MSB): "armb", + (elf_csts.EM_AARCH64, 64, elf_csts.ELFDATA2LSB): "aarch64l", + (elf_csts.EM_AARCH64, 64, elf_csts.ELFDATA2MSB): "aarch64b", + (elf_csts.EM_MIPS, 32, elf_csts.ELFDATA2MSB): "mips32b", + (elf_csts.EM_MIPS, 32, elf_csts.ELFDATA2LSB): "mips32l", + (elf_csts.EM_386, 32, elf_csts.ELFDATA2LSB): "x86_32", + (elf_csts.EM_X86_64, 64, elf_csts.ELFDATA2LSB): "x86_64", + (elf_csts.EM_SH, 32, elf_csts.ELFDATA2LSB): "sh4", + (elf_csts.EM_PPC, 32, elf_csts.ELFDATA2MSB): "ppc32b", + } + + +def guess_arch(elf): + """Return the architecture specified by the ELF container @elf. + If unknown, return None""" + return ELF_machine.get((elf.Ehdr.machine, elf.size, elf.sex), None) diff --git a/miasm/jitter/loader/pe.py b/miasm/jitter/loader/pe.py new file mode 100644 index 00000000..7145a817 --- /dev/null +++ b/miasm/jitter/loader/pe.py @@ -0,0 +1,565 @@ +from builtins import map +import os +import struct +import logging +from collections import defaultdict + +from future.utils import viewitems, viewvalues + +from elfesteem import pe +from elfesteem import cstruct +from elfesteem import * + +from miasm.jitter.csts import * +from miasm.jitter.loader.utils import canon_libname_libfunc, libimp + +log = logging.getLogger('loader_pe') +hnd = logging.StreamHandler() +hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) +log.addHandler(hnd) +log.setLevel(logging.INFO) + + +def get_pe_dependencies(pe_obj): + """Return dependency set + @pe_obj: pe object""" + + if pe_obj.DirImport.impdesc is None: + return set() + out = set() + for dependency in pe_obj.DirImport.impdesc: + libname = dependency.dlldescname.name.lower() + out.add(libname) + return out + + +def get_import_address_pe(e): + import2addr = defaultdict(set) + if e.DirImport.impdesc is None: + return import2addr + for s in e.DirImport.impdesc: + # fthunk = e.rva2virt(s.firstthunk) + # l = "%2d %-25s %s" % (i, repr(s.dlldescname), repr(s)) + libname = s.dlldescname.name.lower() + for ii, imp in enumerate(s.impbynames): + if isinstance(imp, pe.ImportByName): + funcname = imp.name + else: + funcname = imp + # l = " %2d %-16s" % (ii, repr(funcname)) + import2addr[(libname, funcname)].add( + e.rva2virt(s.firstthunk + (e._wsize * ii) // 8) + ) + return import2addr + + +def preload_pe(vm, e, runtime_lib, patch_vm_imp=True): + fa = get_import_address_pe(e) + dyn_funcs = {} + # log.debug('imported funcs: %s' % fa) + for (libname, libfunc), ads in viewitems(fa): + for ad in ads: + ad_base_lib = runtime_lib.lib_get_add_base(libname) + ad_libfunc = runtime_lib.lib_get_add_func(ad_base_lib, libfunc, ad) + + libname_s = canon_libname_libfunc(libname, libfunc) + dyn_funcs[libname_s] = ad_libfunc + if patch_vm_imp: + vm.set_mem( + ad, struct.pack(cstruct.size2type[e._wsize], ad_libfunc)) + return dyn_funcs + + +def is_redirected_export(pe_obj, addr): + """Test if the @addr is a forwarded export address. If so, return + dllname/function name couple. If not, return False. + + An export address is a forwarded export if the rva is in the export + directory of the pe. + + @pe_obj: PE instance + @addr: virtual address of the function to test + """ + + export_dir = pe_obj.NThdr.optentries[pe.DIRECTORY_ENTRY_EXPORT] + addr_rva = pe_obj.virt2rva(addr) + if not (export_dir.rva <= addr_rva < export_dir.rva + export_dir.size): + return False + addr_end = pe_obj.virt.find(b'\x00', addr) + data = pe_obj.virt.get(addr, addr_end) + + dllname, func_info = data.split('.', 1) + dllname = dllname.lower() + + # Test if function is forwarded using ordinal + if func_info.startswith('#'): + func_info = int(func_info[1:]) + return dllname, func_info + + +def get_export_name_addr_list(e): + out = [] + # add func name + for i, n in enumerate(e.DirExport.f_names): + addr = e.DirExport.f_address[e.DirExport.f_nameordinals[i].ordinal] + f_name = n.name.name + # log.debug('%s %s' % (f_name, hex(e.rva2virt(addr.rva)))) + out.append((f_name, e.rva2virt(addr.rva))) + + # add func ordinal + for i, o in enumerate(e.DirExport.f_nameordinals): + addr = e.DirExport.f_address[o.ordinal] + # log.debug('%s %s %s' % (o.ordinal, e.DirExport.expdesc.base, + # hex(e.rva2virt(addr.rva)))) + out.append( + (o.ordinal + e.DirExport.expdesc.base, e.rva2virt(addr.rva))) + + for i, s in enumerate(e.DirExport.f_address): + if not s.rva: + continue + out.append((i + e.DirExport.expdesc.base, e.rva2virt(s.rva))) + + return out + + +def vm_load_pe(vm, fdata, align_s=True, load_hdr=True, name="", **kargs): + """Load a PE in memory (@vm) from a data buffer @fdata + @vm: VmMngr instance + @fdata: data buffer to parse + @align_s: (optional) If False, keep gaps between section + @load_hdr: (optional) If False, do not load the NThdr in memory + Return the corresponding PE instance. + + Extra arguments are passed to PE instantiation. + If all sections are aligned, they will be mapped on several different pages + Otherwise, a big page is created, containing all sections + """ + + # Parse and build a PE instance + pe = pe_init.PE(fdata, **kargs) + + # Check if all section are aligned + aligned = True + for section in pe.SHList: + if section.addr & 0xFFF: + aligned = False + break + + if aligned: + # Loader NT header + if load_hdr: + # Header length + hdr_len = max(0x200, pe.NThdr.sizeofheaders) + # Page minimum size + min_len = min(pe.SHList[0].addr, 0x1000) + + # Get and pad the pe_hdr + pe_hdr = ( + pe.content[:hdr_len] + + max(0, (min_len - hdr_len)) * b"\x00" + ) + vm.add_memory_page( + pe.NThdr.ImageBase, + PAGE_READ | PAGE_WRITE, + pe_hdr, + "%r: PE Header" % name + ) + + # Align sections size + if align_s: + # Use the next section address to compute the new size + for i, section in enumerate(pe.SHList[:-1]): + new_size = pe.SHList[i + 1].addr - section.addr + section.size = new_size + section.rawsize = new_size + section.data = strpatchwork.StrPatchwork( + section.data[:new_size] + ) + section.offset = section.addr + + # Last section alignment + last_section = pe.SHList[-1] + last_section.size = (last_section.size + 0xfff) & 0xfffff000 + + # Pad sections with null bytes and map them + for section in pe.SHList: + data = bytes(section.data) + data += b"\x00" * (section.size - len(data)) + attrib = PAGE_READ + if section.flags & 0x80000000: + attrib |= PAGE_WRITE + vm.add_memory_page( + pe.rva2virt(section.addr), + attrib, + data, + "%r: %r" % (name, section.name) + ) + + return pe + + # At least one section is not aligned + log.warning('PE is not aligned, creating big section') + min_addr = 0 if load_hdr else None + max_addr = None + data = "" + + for i, section in enumerate(pe.SHList): + if i < len(pe.SHList) - 1: + # If it is not the last section, use next section address + section.size = pe.SHList[i + 1].addr - section.addr + section.rawsize = section.size + section.offset = section.addr + + # Update min and max addresses + if min_addr is None or section.addr < min_addr: + min_addr = section.addr + max_section_len = max(section.size, len(section.data)) + if max_addr is None or section.addr + max_section_len > max_addr: + max_addr = section.addr + max_section_len + + min_addr = pe.rva2virt(min_addr) + max_addr = pe.rva2virt(max_addr) + log.debug('Min: 0x%x, Max: 0x%x, Size: 0x%x', min_addr, max_addr, + (max_addr - min_addr)) + + # Create only one big section containing the whole PE + vm.add_memory_page( + min_addr, + PAGE_READ | PAGE_WRITE, + (max_addr - min_addr) * b"\x00" + ) + + # Copy each sections content in memory + for section in pe.SHList: + log.debug('Map 0x%x bytes to 0x%x', len(section.data), + pe.rva2virt(section.addr)) + vm.set_mem(pe.rva2virt(section.addr), bytes(section.data)) + + return pe + + +def vm_load_pe_lib(vm, fname_in, libs, lib_path_base, **kargs): + """Call vm_load_pe on @fname_in and update @libs accordingly + @vm: VmMngr instance + @fname_in: library name + @libs: libimp_pe instance + @lib_path_base: DLLs relative path + Return the corresponding PE instance + Extra arguments are passed to vm_load_pe + """ + + log.info('Loading module %r', fname_in) + + fname = os.path.join(lib_path_base, fname_in) + with open(fname, "rb") as fstream: + pe = vm_load_pe(vm, fstream.read(), name=fname_in, **kargs) + libs.add_export_lib(pe, fname_in) + return pe + + +def vm_load_pe_libs(vm, libs_name, libs, lib_path_base, **kargs): + """Call vm_load_pe_lib on each @libs_name filename + @vm: VmMngr instance + @libs_name: list of str + @libs: libimp_pe instance + @lib_path_base: (optional) DLLs relative path + Return a dictionary Filename -> PE instances + Extra arguments are passed to vm_load_pe_lib + """ + return {fname: vm_load_pe_lib(vm, fname, libs, lib_path_base, **kargs) + for fname in libs_name} + + +def vm_fix_imports_pe_libs(lib_imgs, libs, lib_path_base, + patch_vm_imp=True, **kargs): + for e in viewvalues(lib_imgs): + preload_pe(e, libs, patch_vm_imp) + + +def vm2pe(myjit, fname, libs=None, e_orig=None, + min_addr=None, max_addr=None, + min_section_offset=0x1000, img_base=None, + added_funcs=None, **kwargs): + if e_orig: + size = e_orig._wsize + else: + size = 32 + mye = pe_init.PE(wsize=size) + + if min_addr is None and e_orig is not None: + min_addr = min([e_orig.rva2virt(s.addr) for s in e_orig.SHList]) + if max_addr is None and e_orig is not None: + max_addr = max([e_orig.rva2virt(s.addr + s.size) + for s in e_orig.SHList]) + + if img_base is None: + img_base = e_orig.NThdr.ImageBase + + mye.NThdr.ImageBase = img_base + all_mem = myjit.vm.get_all_memory() + addrs = list(all_mem) + addrs.sort() + mye.Opthdr.AddressOfEntryPoint = mye.virt2rva(myjit.pc) + first = True + for ad in addrs: + if not min_addr <= ad < max_addr: + continue + log.debug("0x%x", ad) + if first: + mye.SHList.add_section( + "%.8X" % ad, + addr=ad - mye.NThdr.ImageBase, + data=all_mem[ad]['data'], + offset=min_section_offset) + else: + mye.SHList.add_section( + "%.8X" % ad, + addr=ad - mye.NThdr.ImageBase, + data=all_mem[ad]['data']) + first = False + if libs: + if added_funcs is not None: + for addr, funcaddr in added_funcs: + libbase, dllname = libs.fad2info[funcaddr] + libs.lib_get_add_func(libbase, dllname, addr) + + filter_import = kwargs.get( + 'filter_import', lambda _, ad: mye.virt.is_addr_in(ad)) + new_dll = libs.gen_new_lib(mye, filter_import) + else: + new_dll = {} + + log.debug('%s', new_dll) + + mye.DirImport.add_dlldesc(new_dll) + s_imp = mye.SHList.add_section("import", rawsize=len(mye.DirImport)) + mye.DirImport.set_rva(s_imp.addr) + log.debug('%r', mye.SHList) + if e_orig: + # resource + xx = bytes(mye) + mye.content = xx + ad = e_orig.NThdr.optentries[pe.DIRECTORY_ENTRY_RESOURCE].rva + size = e_orig.NThdr.optentries[pe.DIRECTORY_ENTRY_RESOURCE].size + log.debug('dirres 0x%x', ad) + if ad != 0: + mye.NThdr.optentries[pe.DIRECTORY_ENTRY_RESOURCE].rva = ad + mye.NThdr.optentries[pe.DIRECTORY_ENTRY_RESOURCE].size = size + mye.DirRes = pe.DirRes.unpack(mye.img_rva, ad, mye) + log.debug('%r', mye.DirRes) + s_res = mye.SHList.add_section( + name="myres", + rawsize=len(mye.DirRes) + ) + mye.DirRes.set_rva(s_res.addr) + # generation + open(fname, 'wb').write(bytes(mye)) + return mye + + +class libimp_pe(libimp): + + def __init__(self, *args, **kwargs): + super(libimp_pe, self).__init__(*args, **kwargs) + # dependency -> redirector + self.created_redirected_imports = {} + + def add_export_lib(self, e, name): + if name in self.created_redirected_imports: + log.error("%r has previously been created due to redirect\ + imports due to %r. Change the loading order.", + name, self.created_redirected_imports[name]) + raise RuntimeError('Bad import: loading previously created import') + + self.all_exported_lib.append(e) + # will add real lib addresses to database + if name in self.name2off: + ad = self.name2off[name] + if e is not None and name in self.fake_libs: + log.error( + "You are trying to load %r but it has been faked previously. Try loading this module earlier.", name) + raise RuntimeError("Bad import") + else: + log.debug('new lib %s', name) + ad = e.NThdr.ImageBase + libad = ad + self.name2off[name] = ad + self.libbase2lastad[ad] = ad + 0x1 + self.lib_imp2ad[ad] = {} + self.lib_imp2dstad[ad] = {} + self.libbase_ad += 0x1000 + + ads = get_export_name_addr_list(e) + todo = ads + # done = [] + while todo: + # for imp_ord_or_name, ad in ads: + imp_ord_or_name, ad = todo.pop() + + # if export is a redirection, search redirected dll + # and get function real addr + ret = is_redirected_export(e, ad) + if ret: + exp_dname, exp_fname = ret + exp_dname = exp_dname + '.dll' + exp_dname = exp_dname.lower() + # if dll auto refes in redirection + if exp_dname == name: + libad_tmp = self.name2off[exp_dname] + if not exp_fname in self.lib_imp2ad[libad_tmp]: + # schedule func + todo = [(imp_ord_or_name, ad)] + todo + continue + else: + # import redirected lib from non loaded dll + if not exp_dname in self.name2off: + self.created_redirected_imports.setdefault( + exp_dname, set()).add(name) + + # Ensure import entry is created + new_lib_base = self.lib_get_add_base(exp_dname) + # Ensure function entry is created + _ = self.lib_get_add_func(new_lib_base, exp_fname) + + libad_tmp = self.name2off[exp_dname] + ad = self.lib_imp2ad[libad_tmp][exp_fname] + + self.lib_imp2ad[libad][imp_ord_or_name] = ad + name_inv = dict( + (value, key) for key, value in viewitems(self.name2off) + ) + c_name = canon_libname_libfunc( + name_inv[libad], imp_ord_or_name) + self.fad2cname[ad] = c_name + self.cname2addr[c_name] = ad + log.debug("Add func %s %s", hex(ad), c_name) + self.fad2info[ad] = libad, imp_ord_or_name + + def gen_new_lib(self, target_pe, filter_import=lambda peobj, ad: True, **kwargs): + """Gen a new DirImport description + @target_pe: PE instance + @filter_import: (boolean f(pe, address)) restrict addresses to keep + """ + + new_lib = [] + for lib_name, ad in viewitems(self.name2off): + # Build an IMAGE_IMPORT_DESCRIPTOR + + # Get fixed addresses + out_ads = dict() # addr -> func_name + for func_name, dst_addresses in viewitems(self.lib_imp2dstad[ad]): + out_ads.update({addr: func_name for addr in dst_addresses}) + + # Filter available addresses according to @filter_import + all_ads = [ + addr for addr in list(out_ads) if filter_import(target_pe, addr) + ] + + if not all_ads: + continue + + # Keep non-NULL elements + all_ads.sort(key=str) + for i, x in enumerate(all_ads): + if x not in [0, None]: + break + all_ads = all_ads[i:] + log.debug('ads: %s', list(map(hex, all_ads))) + + while all_ads: + # Find libname's Import Address Table + othunk = all_ads[0] + i = 0 + while (i + 1 < len(all_ads) and + all_ads[i] + target_pe._wsize // 8 == all_ads[i + 1]): + i += 1 + # 'i + 1' is IAT's length + + # Effectively build an IMAGE_IMPORT_DESCRIPTOR + funcs = [out_ads[addr] for addr in all_ads[:i + 1]] + try: + rva = target_pe.virt2rva(othunk) + except pe.InvalidOffset: + pass + else: + new_lib.append(({"name": lib_name, + "firstthunk": rva}, + funcs) + ) + + # Update elements to handle + all_ads = all_ads[i + 1:] + + return new_lib + + +def vm_load_pe_and_dependencies(vm, fname, name2module, runtime_lib, + lib_path_base, **kwargs): + """Load a binary and all its dependencies. Returns a dictionary containing + the association between binaries names and it's pe object + + @vm: virtual memory manager instance + @fname: full path of the binary + @name2module: dict containing association between name and pe + object. Updated. + @runtime_lib: libimp instance + @lib_path_base: directory of the libraries containing dependencies + + """ + + todo = [(fname, fname, 0)] + weight2name = {} + done = set() + + # Walk dependencies recursively + while todo: + name, fname, weight = todo.pop() + if name in done: + continue + done.add(name) + weight2name.setdefault(weight, set()).add(name) + if name in name2module: + pe_obj = name2module[name] + else: + try: + with open(fname, "rb") as fstream: + log.info('Loading module name %r', fname) + pe_obj = vm_load_pe( + vm, fstream.read(), name=fname, **kwargs) + except IOError: + log.error('Cannot open %s' % fname) + name2module[name] = None + continue + name2module[name] = pe_obj + + new_dependencies = get_pe_dependencies(pe_obj) + todo += [(name, os.path.join(lib_path_base, name), weight - 1) + for name in new_dependencies] + + ordered_modules = sorted(viewitems(weight2name)) + for _, modules in ordered_modules: + for name in modules: + pe_obj = name2module[name] + if pe_obj is None: + continue + # Fix imports + if pe_obj.DirExport: + runtime_lib.add_export_lib(pe_obj, name) + + for pe_obj in viewvalues(name2module): + if pe_obj is None: + continue + preload_pe(vm, pe_obj, runtime_lib, patch_vm_imp=True) + + return name2module + +# machine -> arch +PE_machine = {0x14c: "x86_32", + 0x8664: "x86_64", + } + + +def guess_arch(pe): + """Return the architecture specified by the PE container @pe. + If unknown, return None""" + return PE_machine.get(pe.Coffhdr.machine, None) diff --git a/miasm/jitter/loader/utils.py b/miasm/jitter/loader/utils.py new file mode 100644 index 00000000..a32729e1 --- /dev/null +++ b/miasm/jitter/loader/utils.py @@ -0,0 +1,100 @@ +from builtins import int as int_types +import logging + +from future.utils import viewitems, viewvalues + +from miasm.core.utils import force_bytes + +log = logging.getLogger('loader_common') +hnd = logging.StreamHandler() +hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) +log.addHandler(hnd) +log.setLevel(logging.INFO) + + +def canon_libname_libfunc(libname, libfunc): + libname = force_bytes(libname) + dn = libname.split(b'.')[0] + if isinstance(libfunc, int_types): + return str(dn), libfunc + else: + libfunc = force_bytes(libfunc) + return b"%s_%s" % (dn, libfunc) + + +class libimp(object): + + def __init__(self, lib_base_ad=0x71111000, **kargs): + self.name2off = {} + self.libbase2lastad = {} + self.libbase_ad = lib_base_ad + self.lib_imp2ad = {} + self.lib_imp2dstad = {} + self.fad2cname = {} + self.cname2addr = {} + self.fad2info = {} + self.all_exported_lib = [] + self.fake_libs = set() + + def lib_get_add_base(self, name): + name = force_bytes(name) + name = name.lower().strip(b' ') + if not b"." in name: + log.debug('warning adding .dll to modulename') + name += b'.dll' + log.debug(name) + + if name in self.name2off: + ad = self.name2off[name] + else: + ad = self.libbase_ad + log.warning("Create dummy entry for %r", name) + self.fake_libs.add(name) + self.name2off[name] = ad + self.libbase2lastad[ad] = ad + 0x4 + self.lib_imp2ad[ad] = {} + self.lib_imp2dstad[ad] = {} + self.libbase_ad += 0x1000 + return ad + + def lib_get_add_func(self, libad, imp_ord_or_name, dst_ad=None): + if not libad in viewvalues(self.name2off): + raise ValueError('unknown lib base!', hex(libad)) + + # test if not ordinatl + # if imp_ord_or_name >0x10000: + # imp_ord_or_name = vm_get_str(imp_ord_or_name, 0x100) + # imp_ord_or_name = imp_ord_or_name[:imp_ord_or_name.find('\x00')] + + #/!\ can have multiple dst ad + if not imp_ord_or_name in self.lib_imp2dstad[libad]: + self.lib_imp2dstad[libad][imp_ord_or_name] = set() + self.lib_imp2dstad[libad][imp_ord_or_name].add(dst_ad) + + if imp_ord_or_name in self.lib_imp2ad[libad]: + return self.lib_imp2ad[libad][imp_ord_or_name] + # log.debug('new imp %s %s' % (imp_ord_or_name, dst_ad)) + ad = self.libbase2lastad[libad] + self.libbase2lastad[libad] += 0x10 # arbitrary + self.lib_imp2ad[libad][imp_ord_or_name] = ad + + name_inv = dict( + (value, key) for key, value in viewitems(self.name2off) + ) + c_name = canon_libname_libfunc(name_inv[libad], imp_ord_or_name) + self.fad2cname[ad] = c_name + self.cname2addr[c_name] = ad + self.fad2info[ad] = libad, imp_ord_or_name + return ad + + def check_dst_ad(self): + for ad in self.lib_imp2dstad: + all_ads = sorted(viewvalues(self.lib_imp2dstad[ad])) + for i, x in enumerate(all_ads[:-1]): + if x is None or all_ads[i + 1] is None: + return False + if x + 4 != all_ads[i + 1]: + return False + return True + + diff --git a/miasm/jitter/op_semantics.c b/miasm/jitter/op_semantics.c new file mode 100644 index 00000000..46e6cca1 --- /dev/null +++ b/miasm/jitter/op_semantics.c @@ -0,0 +1,749 @@ +#include +#include +#include +#include +#include +#include "op_semantics.h" + +const uint8_t parity_table[256] = { + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, + 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, +}; + +uint16_t bcdadd_16(uint16_t a, uint16_t b) +{ + int carry = 0; + int i,j = 0; + uint16_t res = 0; + int nib_a, nib_b; + for (i = 0; i < 16; i += 4) { + nib_a = (a >> i) & (0xF); + nib_b = (b >> i) & (0xF); + + j = (carry + nib_a + nib_b); + if (j >= 10) { + carry = 1; + j -= 10; + j &=0xf; + } + else { + carry = 0; + } + res += j << i; + } + return res; +} + +uint16_t bcdadd_cf_16(uint16_t a, uint16_t b) +{ + int carry = 0; + int i,j = 0; + int nib_a, nib_b; + for (i = 0; i < 16; i += 4) { + nib_a = (a >> i) & (0xF); + nib_b = (b >> i) & (0xF); + + j = (carry + nib_a + nib_b); + if (j >= 10) { + carry = 1; + j -= 10; + j &=0xf; + } + else { + carry = 0; + } + } + return carry; +} + +unsigned int mul_lo_op(unsigned int size, unsigned int a, unsigned int b) +{ + unsigned int mask; + + switch (size) { + case 8: mask = 0xff; break; + case 16: mask = 0xffff; break; + case 32: mask = 0xffffffff; break; + default: fprintf(stderr, "inv size in mul %d\n", size); exit(EXIT_FAILURE); + } + + a &= mask; + b &= mask; + return ((int64_t)a * (int64_t) b) & mask; +} + +unsigned int mul_hi_op(unsigned int size, unsigned int a, unsigned int b) +{ + uint64_t res = 0; + unsigned int mask; + + switch (size) { + case 8: mask = 0xff; break; + case 16: mask = 0xffff; break; + case 32: mask = 0xffffffff; break; + default: fprintf(stderr, "inv size in mul %d\n", size); exit(EXIT_FAILURE); + } + + a &= mask; + b &= mask; + res = ((uint64_t)a * (uint64_t)b); + return (res >> 32) & mask; +} + + +unsigned int imul_lo_op_08(char a, char b) +{ + return a*b; +} + +unsigned int imul_lo_op_16(short a, short b) +{ + return a*b; +} + +unsigned int imul_lo_op_32(int a, int b) +{ + return a*b; +} + +int imul_hi_op_08(char a, char b) +{ + int64_t res = 0; + res = a*b; + return (int)(res>>8); +} + +int imul_hi_op_16(short a, short b) +{ + int64_t res = 0; + res = a*b; + return (int)(res>>16); +} + +int imul_hi_op_32(int a, int b) +{ + int64_t res = 0; + res = (int64_t)a*(int64_t)b; + return (int)(res>>32ULL); +} + +unsigned int umul16_lo(unsigned short a, unsigned short b) +{ + return (a*b) & 0xffff; +} + +unsigned int umul16_hi(unsigned short a, unsigned short b) +{ + uint32_t c; + c = a*b; + return (c>>16) & 0xffff; +} + +uint64_t rot_left(uint64_t size, uint64_t a, uint64_t b) +{ + uint64_t tmp; + + b = b & 0x3F; + b %= size; + switch(size){ + case 8: + tmp = (a << b) | ((a & 0xFF) >> (size - b)); + return tmp & 0xFF; + case 16: + tmp = (a << b) | ((a & 0xFFFF) >> (size - b)); + return tmp & 0xFFFF; + case 32: + tmp = (a << b) | ((a & 0xFFFFFFFF) >> (size - b)); + return tmp & 0xFFFFFFFF; + case 64: + tmp = (a << b) | ((a&0xFFFFFFFFFFFFFFFF) >> (size - b)); + return tmp & 0xFFFFFFFFFFFFFFFF; + + /* Support cases for rcl */ + case 9: + tmp = (a << b) | ((a & 0x1FF) >> (size - b)); + return tmp & 0x1FF; + case 17: + tmp = (a << b) | ((a & 0x1FFFF) >> (size - b)); + return tmp & 0x1FFFF; + case 33: + tmp = (a << b) | ((a & 0x1FFFFFFFF) >> (size - b)); + return tmp & 0x1FFFFFFFF; + /* TODO XXX: support rcl in 64 bit mode */ + + default: + fprintf(stderr, "inv size in rotleft %"PRIX64"\n", size); + exit(EXIT_FAILURE); + } +} + +uint64_t rot_right(uint64_t size, uint64_t a, uint64_t b) +{ + uint64_t tmp; + + b = b & 0x3F; + b %= size; + switch(size){ + case 8: + tmp = ((a & 0xFF) >> b) | (a << (size - b)); + return tmp & 0xff; + case 16: + tmp = ((a & 0xFFFF) >> b) | (a << (size - b)); + return tmp & 0xFFFF; + case 32: + tmp = ((a & 0xFFFFFFFF) >> b) | (a << (size - b)); + return tmp & 0xFFFFFFFF; + case 64: + tmp = ((a & 0xFFFFFFFFFFFFFFFF) >> b) | (a << (size - b)); + return tmp & 0xFFFFFFFFFFFFFFFF; + + /* Support cases for rcr */ + case 9: + tmp = ((a & 0x1FF) >> b) | (a << (size - b)); + return tmp & 0x1FF; + case 17: + tmp = ((a & 0x1FFFF) >> b) | (a << (size - b)); + return tmp & 0x1FFFF; + case 33: + tmp = ((a & 0x1FFFFFFFF) >> b) | (a << (size - b)); + return tmp & 0x1FFFFFFFF; + /* TODO XXX: support rcr in 64 bit mode */ + + default: + fprintf(stderr, "inv size in rotright %"PRIX64"\n", size); + exit(EXIT_FAILURE); + } +} + +/* + * Count leading zeros - count the number of zero starting at the most + * significant bit + * + * Example: + * - cntleadzeros(size=32, src=2): 30 + * - cntleadzeros(size=32, src=0): 32 + */ +uint64_t cntleadzeros(uint64_t size, uint64_t src) +{ + int64_t i; + + for (i=(int64_t)size-1; i>=0; i--){ + if (src & (1ull << i)) + return (uint64_t)(size - (i + 1)); + } + return (uint64_t)size; +} + +/* + * Count trailing zeros - count the number of zero starting at the least + * significant bit + * + * Example: + * - cnttrailzeros(size=32, src=2): 1 + * - cnttrailzeros(size=32, src=0): 32 + */ +unsigned int cnttrailzeros(uint64_t size, uint64_t src) +{ + uint64_t i; + for (i=0; i3){ + fprintf(stderr, "not implemented x86_cpuid reg %x\n", reg_num); + exit(EXIT_FAILURE); + } + // cases are output: EAX: 0; EBX: 1; ECX: 2; EDX: 3 + if (a == 0){ + switch(reg_num){ + case 0: + return 0xa; + // "GenuineIntel" + case 1: + return 0x756E6547; + case 2: + return 0x6C65746E; + case 3: + return 0x49656E69; + } + } + + else if (a == 1){ + switch(reg_num){ + case 0: + // Using a version too high will enable recent + // instruction set + return 0x000006FB; + //return 0x00020652; + case 1: + //return 0x02040800; + return 0x00000800; + case 2: + //return 0x0004E3BD; + return 0x00000209; + case 3: + return (/* fpu */ 1 << 0) | + (/* tsc */ 1 << 4) | + (/* cx8 */ 1 << 8) | + (/* cmov */ 1 << 15) | + (/* mmx */ 1 << 23) | + (/* sse */ 1 << 25) | + (/* sse2 */ 1 << 26) | + (/* ia64 */ 1 << 30); + } + } + // Cache and TLB + else if (a == 2){ + switch(reg_num){ + case 0: + return 0x00000000; + case 1: + return 0x00000000; + case 2: + return 0x00000000; + case 3: + return 0x00000000; + } + } + // Intel thread/core and cache topology + else if (a == 4){ + switch(reg_num){ + case 0: + return 0x00000000; + case 1: + return 0x00000000; + case 2: + return 0x00000000; + case 3: + return 0x00000000; + } + } + // Extended features + else if (a == 7){ + switch(reg_num){ + case 0: + return 0x00000000; + case 1: + return (/* fsgsbase */ 1 << 0) | (/* bmi1 */ 1 << 3); + case 2: + return 0x00000000; + case 3: + return 0x00000000; + } + } + else{ + fprintf(stderr, "WARNING not implemented x86_cpuid index %X!\n", a); + exit(EXIT_FAILURE); + } + return 0; +} + +//#define DEBUG_MIASM_DOUBLE + +void dump_float(void) +{ + /* + printf("%e\n", vmmngr.float_st0); + printf("%e\n", vmmngr.float_st1); + printf("%e\n", vmmngr.float_st2); + printf("%e\n", vmmngr.float_st3); + printf("%e\n", vmmngr.float_st4); + printf("%e\n", vmmngr.float_st5); + printf("%e\n", vmmngr.float_st6); + printf("%e\n", vmmngr.float_st7); + */ +} + +uint32_t fpu_fadd32(uint32_t a, uint32_t b) +{ + float c; + c = *((float*)&a) + *((float*)&b); +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e + %e -> %e\n", a, b, c); +#endif + return *((uint32_t*)&c); +} + +uint64_t fpu_fadd64(uint64_t a, uint64_t b) +{ + double c; + c = *((double*)&a) + *((double*)&b); +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e + %e -> %e\n", a, b, c); +#endif + return *((uint64_t*)&c); +} + +uint32_t fpu_fsub32(uint32_t a, uint32_t b) +{ + float c; + c = *((float*)&a) - *((float*)&b); +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e + %e -> %e\n", a, b, c); +#endif + return *((uint32_t*)&c); +} + +uint64_t fpu_fsub64(uint64_t a, uint64_t b) +{ + double c; + c = *((double*)&a) - *((double*)&b); +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e + %e -> %e\n", a, b, c); +#endif + return *((uint64_t*)&c); +} + +uint32_t fpu_fmul32(uint32_t a, uint32_t b) +{ + float c; + c = *((float*)&a) * *((float*)&b); +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e * %e -> %e\n", a, b, c); +#endif + return *((uint32_t*)&c); +} + +uint64_t fpu_fmul64(uint64_t a, uint64_t b) +{ + double c; + c = *((double*)&a) * *((double*)&b); +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e * %e -> %e\n", a, b, c); +#endif + return *((uint64_t*)&c); +} + +uint32_t fpu_fdiv32(uint32_t a, uint32_t b) +{ + float c; + c = *((float*)&a) / *((float*)&b); +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e * %e -> %e\n", a, b, c); +#endif + return *((uint32_t*)&c); +} + +uint64_t fpu_fdiv64(uint64_t a, uint64_t b) +{ + double c; + c = *((double*)&a) / *((double*)&b); +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e * %e -> %e\n", a, b, c); +#endif + return *((uint64_t*)&c); +} + +double fpu_ftan(double a) +{ + double b; + b = tan(a); +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e tan %e\n", a, b); +#endif + return b; +} + +double fpu_frndint(double a) +{ + int64_t b; + double c; + b = (int64_t)a; + c = (double)b; +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e double %e\n", a, c); +#endif + return c; +} + +double fpu_fsin(double a) +{ + double b; + b = sin(a); +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e sin %e\n", a, b); +#endif + return b; +} + +double fpu_fcos(double a) +{ + double b; + b = cos(a); +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e cos %e\n", a, b); +#endif + return b; +} + + +double fpu_fscale(double a, double b) +{ + double c; + c = a * exp2(trunc(b)); +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e *exp2 %e -> %e\n", a, b, c); +#endif + return c; +} + +double fpu_f2xm1(double a) +{ + double b; + b = exp2(a)-1; +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e exp2 -1 %e\n", a, b); +#endif + return b; +} + +uint32_t fpu_fsqrt32(uint32_t a) +{ + float b; + b = sqrtf(*((float*)&a)); +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e sqrt %e\n", a, b); +#endif + return *((uint32_t*)&b); +} + +uint64_t fpu_fsqrt64(uint64_t a) +{ + double b; + b = sqrt(*((double*)&a)); +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e sqrt %e\n", a, b); +#endif + return *((uint64_t*)&b); +} + +uint64_t fpu_fabs64(uint64_t a) +{ + double b; + b = abs(*((double*)&a)); +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e abs %e\n", a, b); +#endif + return *((uint64_t*)&b); +} + +uint64_t fpu_fprem64(uint64_t a, uint64_t b) +{ + double c; + c = fmod(*((double*)&a), *((double*)&b)); +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e %% %e -> %e\n", a, b, c); +#endif + return *((uint64_t*)&c); +} + +double fpu_fchs(double a) +{ + double b; + b = -a; +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf(" - %e -> %e\n", a, b); +#endif + return b; +} + +double fpu_fyl2x(double a, double b) +{ + double c; + c = b * (log(a) / log(2)); +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("%e * log(%e) -> %e\n", b, a, c); +#endif + return c; +} + +double fpu_fpatan(double a, double b) +{ + double c; + c = atan2(b, a); +#ifdef DEBUG_MIASM_DOUBLE + dump_float(); + printf("arctan(%e / %e) -> %e\n", b, a, c); +#endif + return c; +} + +unsigned int fpu_fcom_c0(double a, double b) +{ + if (isnan(a) || isnan(b)) + return 1; + if (a>=b) + return 0; + return 1; +} +unsigned int fpu_fcom_c1(double a, double b) +{ + //XXX + return 0; +} +unsigned int fpu_fcom_c2(double a, double b) +{ + if (isnan(a) || isnan(b)) + return 1; + return 0; +} +unsigned int fpu_fcom_c3(double a, double b) +{ + if (isnan(a) || isnan(b)) + return 1; + if (a==b) + return 1; + return 0; +} + +uint64_t sint_to_fp_64(int64_t a) +{ + double result = (double) a; + return *((uint64_t*)&result); +} + +uint32_t sint_to_fp_32(int32_t a) +{ + float result = (float) a; + return *((uint32_t*)&result); +} + +int32_t fp32_to_sint32(uint32_t a) +{ + // Enforce nearbyint (IEEE-754 behavior) + float rounded = *((float*)&a); + rounded = nearbyintf(rounded); + return (int32_t) rounded; +} + +int64_t fp64_to_sint64(uint64_t a) +{ + // Enforce nearbyint (IEEE-754 behavior) + double rounded = *((double*)&a); + rounded = nearbyint(rounded); + return (int64_t) rounded; +} + +int32_t fp64_to_sint32(uint64_t a) +{ + // Enforce nearbyint (IEEE-754 behavior) + double rounded = *((double*)&a); + rounded = nearbyint(rounded); + return (int32_t) rounded; +} + +uint32_t fp64_to_fp32(uint64_t a) +{ + float result = (float) *((double*)&a); + return *((uint32_t*)&result); +} + +uint64_t fp32_to_fp64(uint32_t a) +{ + double result = (double) *((float*)&a); + return *((uint64_t*)&result); +} + +uint32_t fpround_towardszero_fp32(uint32_t a) +{ + float rounded = *((float*)&a); + rounded = truncf(rounded); + return *((uint32_t*)&rounded); +} + +uint64_t fpround_towardszero_fp64(uint64_t a) +{ + double rounded = *((float*)&a); + rounded = trunc(rounded); + return *((uint64_t*)&rounded); +} + + +UDIV(8) +UDIV(16) +UDIV(32) +UDIV(64) + +UMOD(8) +UMOD(16) +UMOD(32) +UMOD(64) + +SDIV(8) +SDIV(16) +SDIV(32) +SDIV(64) + +SMOD(8) +SMOD(16) +SMOD(32) +SMOD(64) diff --git a/miasm/jitter/op_semantics.h b/miasm/jitter/op_semantics.h new file mode 100644 index 00000000..690cfb35 --- /dev/null +++ b/miasm/jitter/op_semantics.h @@ -0,0 +1,167 @@ +#ifndef OP_SEMANTICS_H +#define OP_SEMANTICS_H + +#include + +#if _WIN32 +#define _MIASM_EXPORT __declspec(dllexport) +#define _MIASM_IMPORT __declspec(dllimport) +#else +#define _MIASM_EXPORT +#define _MIASM_IMPORT +#endif + +#define CC_P 1 +#ifdef PARITY_IMPORT +_MIASM_IMPORT extern const uint8_t parity_table[256]; +#else +_MIASM_EXPORT extern const uint8_t parity_table[256]; +#endif +#define parity(a) parity_table[(a) & 0xFF] + +_MIASM_EXPORT unsigned int my_imul08(unsigned int a, unsigned int b); +_MIASM_EXPORT unsigned int mul_lo_op(unsigned int size, unsigned int a, unsigned int b); +_MIASM_EXPORT unsigned int mul_hi_op(unsigned int size, unsigned int a, unsigned int b); +_MIASM_EXPORT unsigned int imul_lo_op_08(char a, char b); +_MIASM_EXPORT unsigned int imul_lo_op_16(short a, short b); +_MIASM_EXPORT unsigned int imul_lo_op_32(int a, int b); +_MIASM_EXPORT int imul_hi_op_08(char a, char b); +_MIASM_EXPORT int imul_hi_op_16(short a, short b); +_MIASM_EXPORT int imul_hi_op_32(int a, int b); + + +_MIASM_EXPORT unsigned int umul16_lo(unsigned short a, unsigned short b); +_MIASM_EXPORT unsigned int umul16_hi(unsigned short a, unsigned short b); + + +_MIASM_EXPORT uint64_t rot_left(uint64_t size, uint64_t a, uint64_t b); +_MIASM_EXPORT uint64_t rot_right(uint64_t size, uint64_t a, uint64_t b); + +_MIASM_EXPORT uint64_t cntleadzeros(uint64_t size, uint64_t src); +_MIASM_EXPORT unsigned int cnttrailzeros(uint64_t size, uint64_t src); + +#define UDIV(sizeA) \ + uint ## sizeA ## _t udiv ## sizeA (uint ## sizeA ## _t a, uint ## sizeA ## _t b) \ + { \ + uint ## sizeA ## _t r; \ + if (b == 0) { \ + fprintf(stderr, "Should not happen\n"); \ + exit(EXIT_FAILURE); \ + } \ + r = a/b; \ + return r; \ + } + + +#define UMOD(sizeA) \ + uint ## sizeA ## _t umod ## sizeA (uint ## sizeA ## _t a, uint ## sizeA ## _t b) \ + { \ + uint ## sizeA ## _t r; \ + if (b == 0) { \ + fprintf(stderr, "Should not happen\n"); \ + exit(EXIT_FAILURE); \ + } \ + r = a%b; \ + return r; \ + } + + +#define SDIV(sizeA) \ + int ## sizeA ## _t sdiv ## sizeA (int ## sizeA ## _t a, int ## sizeA ## _t b) \ + { \ + int ## sizeA ## _t r; \ + if (b == 0) { \ + fprintf(stderr, "Should not happen\n"); \ + exit(EXIT_FAILURE); \ + } \ + r = a/b; \ + return r; \ + } + + +#define SMOD(sizeA) \ + int ## sizeA ## _t smod ## sizeA (int ## sizeA ## _t a, int ## sizeA ## _t b) \ + { \ + int ## sizeA ## _t r; \ + if (b == 0) { \ + fprintf(stderr, "Should not happen\n"); \ + exit(EXIT_FAILURE); \ + } \ + r = a%b; \ + return r; \ + } + +_MIASM_EXPORT uint64_t udiv64(uint64_t a, uint64_t b); +_MIASM_EXPORT uint64_t umod64(uint64_t a, uint64_t b); +_MIASM_EXPORT int64_t sdiv64(int64_t a, int64_t b); +_MIASM_EXPORT int64_t smod64(int64_t a, int64_t b); + +_MIASM_EXPORT uint32_t udiv32(uint32_t a, uint32_t b); +_MIASM_EXPORT uint32_t umod32(uint32_t a, uint32_t b); +_MIASM_EXPORT int32_t sdiv32(int32_t a, int32_t b); +_MIASM_EXPORT int32_t smod32(int32_t a, int32_t b); + +_MIASM_EXPORT uint16_t udiv16(uint16_t a, uint16_t b); +_MIASM_EXPORT uint16_t umod16(uint16_t a, uint16_t b); +_MIASM_EXPORT int16_t sdiv16(int16_t a, int16_t b); +_MIASM_EXPORT int16_t smod16(int16_t a, int16_t b); + +_MIASM_EXPORT uint8_t udiv8(uint8_t a, uint8_t b); +_MIASM_EXPORT uint8_t umod8(uint8_t a, uint8_t b); +_MIASM_EXPORT int8_t sdiv8(int8_t a, int8_t b); +_MIASM_EXPORT int8_t smod8(int8_t a, int8_t b); + +_MIASM_EXPORT unsigned int x86_cpuid(unsigned int a, unsigned int reg_num); + +_MIASM_EXPORT uint32_t fpu_fadd32(uint32_t a, uint32_t b); +_MIASM_EXPORT uint64_t fpu_fadd64(uint64_t a, uint64_t b); +_MIASM_EXPORT uint32_t fpu_fsub32(uint32_t a, uint32_t b); +_MIASM_EXPORT uint64_t fpu_fsub64(uint64_t a, uint64_t b); +_MIASM_EXPORT uint32_t fpu_fmul32(uint32_t a, uint32_t b); +_MIASM_EXPORT uint64_t fpu_fmul64(uint64_t a, uint64_t b); +_MIASM_EXPORT uint32_t fpu_fdiv32(uint32_t a, uint32_t b); +_MIASM_EXPORT uint64_t fpu_fdiv64(uint64_t a, uint64_t b); +_MIASM_EXPORT double fpu_ftan(double a); +_MIASM_EXPORT double fpu_frndint(double a); +_MIASM_EXPORT double fpu_fsin(double a); +_MIASM_EXPORT double fpu_fcos(double a); +_MIASM_EXPORT double fpu_fscale(double a, double b); +_MIASM_EXPORT double fpu_f2xm1(double a); +_MIASM_EXPORT uint32_t fpu_fsqrt32(uint32_t a); +_MIASM_EXPORT uint64_t fpu_fsqrt64(uint64_t a); +_MIASM_EXPORT uint64_t fpu_fabs64(uint64_t a); +_MIASM_EXPORT uint64_t fpu_fprem64(uint64_t a, uint64_t b); +_MIASM_EXPORT double fpu_fchs(double a); +_MIASM_EXPORT double fpu_fyl2x(double a, double b); +_MIASM_EXPORT double fpu_fpatan(double a, double b); +_MIASM_EXPORT unsigned int fpu_fcom_c0(double a, double b); +_MIASM_EXPORT unsigned int fpu_fcom_c1(double a, double b); +_MIASM_EXPORT unsigned int fpu_fcom_c2(double a, double b); +_MIASM_EXPORT unsigned int fpu_fcom_c3(double a, double b); + +_MIASM_EXPORT uint64_t sint_to_fp_64(int64_t a); +_MIASM_EXPORT uint32_t sint_to_fp_32(int32_t a); +_MIASM_EXPORT int32_t fp32_to_sint32(uint32_t a); +_MIASM_EXPORT int64_t fp64_to_sint64(uint64_t a); +_MIASM_EXPORT int32_t fp64_to_sint32(uint64_t a); +_MIASM_EXPORT uint32_t fp64_to_fp32(uint64_t a); +_MIASM_EXPORT uint64_t fp32_to_fp64(uint32_t a); +_MIASM_EXPORT uint32_t fpround_towardszero_fp32(uint32_t a); +_MIASM_EXPORT uint64_t fpround_towardszero_fp64(uint64_t a); + +#define SHIFT_RIGHT_ARITH(size, value, shift) \ + ((uint ## size ## _t)((((uint64_t) (shift)) > ((size) - 1))? \ + (((int ## size ## _t) (value)) < 0 ? -1 : 0) : \ + (((int ## size ## _t) (value)) >> (shift)))) + +#define SHIFT_RIGHT_LOGIC(size, value, shift) \ + ((uint ## size ## _t)((((uint64_t) (shift)) > ((size) - 1))? \ + 0 : \ + (((uint ## size ## _t) (value)) >> (shift)))) + +#define SHIFT_LEFT_LOGIC(size, value, shift) \ + ((uint ## size ## _t)((((uint64_t) (shift)) > ((size) - 1))? \ + 0 : \ + (((uint ## size ## _t) (value)) << (shift)))) + +#endif diff --git a/miasm/jitter/queue.h b/miasm/jitter/queue.h new file mode 100644 index 00000000..0caf72fb --- /dev/null +++ b/miasm/jitter/queue.h @@ -0,0 +1,553 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)queue.h 8.5 (Berkeley) 8/20/94 + * $FreeBSD$ + */ + +#ifndef _SYS_QUEUE_H_ +#define _SYS_QUEUE_H_ + +//#include + +/* + * This file defines four types of data structures: singly-linked lists, + * singly-linked tail queues, lists and tail queues. + * + * A singly-linked list is headed by a single forward pointer. The elements + * are singly linked for minimum space and pointer manipulation overhead at + * the expense of O(n) removal for arbitrary elements. New elements can be + * added to the list after an existing element or at the head of the list. + * Elements being removed from the head of the list should use the explicit + * macro for this purpose for optimum efficiency. A singly-linked list may + * only be traversed in the forward direction. Singly-linked lists are ideal + * for applications with large datasets and few or no removals or for + * implementing a LIFO queue. + * + * A singly-linked tail queue is headed by a pair of pointers, one to the + * head of the list and the other to the tail of the list. The elements are + * singly linked for minimum space and pointer manipulation overhead at the + * expense of O(n) removal for arbitrary elements. New elements can be added + * to the list after an existing element, at the head of the list, or at the + * end of the list. Elements being removed from the head of the tail queue + * should use the explicit macro for this purpose for optimum efficiency. + * A singly-linked tail queue may only be traversed in the forward direction. + * Singly-linked tail queues are ideal for applications with large datasets + * and few or no removals or for implementing a FIFO queue. + * + * A list is headed by a single forward pointer (or an array of forward + * pointers for a hash table header). The elements are doubly linked + * so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before + * or after an existing element or at the head of the list. A list + * may only be traversed in the forward direction. + * + * A tail queue is headed by a pair of pointers, one to the head of the + * list and the other to the tail of the list. The elements are doubly + * linked so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before or + * after an existing element, at the head of the list, or at the end of + * the list. A tail queue may be traversed in either direction. + * + * For details on the use of these macros, see the queue(3) manual page. + * + * + * SLIST LIST STAILQ TAILQ + * _HEAD + + + + + * _HEAD_INITIALIZER + + + + + * _ENTRY + + + + + * _INIT + + + + + * _EMPTY + + + + + * _FIRST + + + + + * _NEXT + + + + + * _PREV - - - + + * _LAST - - + + + * _FOREACH + + + + + * _FOREACH_SAFE + + + + + * _FOREACH_REVERSE - - - + + * _FOREACH_REVERSE_SAFE - - - + + * _INSERT_HEAD + + + + + * _INSERT_BEFORE - + - + + * _INSERT_AFTER + + + + + * _INSERT_TAIL - - + + + * _CONCAT - - + + + * _REMOVE_HEAD + - + - + * _REMOVE + + + + + * + */ +#define QUEUE_MACRO_DEBUG 0 +#if QUEUE_MACRO_DEBUG +/* Store the last 2 places the queue element or head was altered */ +struct qm_trace { + char * lastfile; + int lastline; + char * prevfile; + int prevline; +}; + +#define TRACEBUF struct qm_trace trace; +#define TRASHIT(x) do {(x) = (void *)-1;} while (0) + +#define QMD_TRACE_HEAD(head) do { \ + (head)->trace.prevline = (head)->trace.lastline; \ + (head)->trace.prevfile = (head)->trace.lastfile; \ + (head)->trace.lastline = __LINE__; \ + (head)->trace.lastfile = __FILE__; \ +} while (0) + +#define QMD_TRACE_ELEM(elem) do { \ + (elem)->trace.prevline = (elem)->trace.lastline; \ + (elem)->trace.prevfile = (elem)->trace.lastfile; \ + (elem)->trace.lastline = __LINE__; \ + (elem)->trace.lastfile = __FILE__; \ +} while (0) + +#else +#define QMD_TRACE_ELEM(elem) +#define QMD_TRACE_HEAD(head) +#define TRACEBUF +#define TRASHIT(x) +#endif /* QUEUE_MACRO_DEBUG */ + +/* + * Singly-linked List declarations. + */ +#define SLIST_HEAD(name, type) \ +struct name { \ + struct type *slh_first; /* first element */ \ +} + +#define SLIST_HEAD_INITIALIZER(head) \ + { NULL } + +#define SLIST_ENTRY(type) \ +struct { \ + struct type *sle_next; /* next element */ \ +} + +/* + * Singly-linked List functions. + */ +#define SLIST_EMPTY(head) ((head)->slh_first == NULL) + +#define SLIST_FIRST(head) ((head)->slh_first) + +#define SLIST_FOREACH(var, head, field) \ + for ((var) = SLIST_FIRST((head)); \ + (var); \ + (var) = SLIST_NEXT((var), field)) + +#define SLIST_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = SLIST_FIRST((head)); \ + (var) && ((tvar) = SLIST_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define SLIST_FOREACH_PREVPTR(var, varp, head, field) \ + for ((varp) = &SLIST_FIRST((head)); \ + ((var) = *(varp)) != NULL; \ + (varp) = &SLIST_NEXT((var), field)) + +#define SLIST_INIT(head) do { \ + SLIST_FIRST((head)) = NULL; \ +} while (0) + +#define SLIST_INSERT_AFTER(slistelm, elm, field) do { \ + SLIST_NEXT((elm), field) = SLIST_NEXT((slistelm), field); \ + SLIST_NEXT((slistelm), field) = (elm); \ +} while (0) + +#define SLIST_INSERT_HEAD(head, elm, field) do { \ + SLIST_NEXT((elm), field) = SLIST_FIRST((head)); \ + SLIST_FIRST((head)) = (elm); \ +} while (0) + +#define SLIST_NEXT(elm, field) ((elm)->field.sle_next) + +#define SLIST_REMOVE(head, elm, type, field) do { \ + if (SLIST_FIRST((head)) == (elm)) { \ + SLIST_REMOVE_HEAD((head), field); \ + } \ + else { \ + struct type *curelm = SLIST_FIRST((head)); \ + while (SLIST_NEXT(curelm, field) != (elm)) \ + curelm = SLIST_NEXT(curelm, field); \ + SLIST_NEXT(curelm, field) = \ + SLIST_NEXT(SLIST_NEXT(curelm, field), field); \ + } \ +} while (0) + +#define SLIST_REMOVE_HEAD(head, field) do { \ + SLIST_FIRST((head)) = SLIST_NEXT(SLIST_FIRST((head)), field); \ +} while (0) + +/* + * Singly-linked Tail queue declarations. + */ +#define STAILQ_HEAD(name, type) \ +struct name { \ + struct type *stqh_first;/* first element */ \ + struct type **stqh_last;/* addr of last next element */ \ +} + +#define STAILQ_HEAD_INITIALIZER(head) \ + { NULL, &(head).stqh_first } + +#define STAILQ_ENTRY(type) \ +struct { \ + struct type *stqe_next; /* next element */ \ +} + +/* + * Singly-linked Tail queue functions. + */ +#define STAILQ_CONCAT(head1, head2) do { \ + if (!STAILQ_EMPTY((head2))) { \ + *(head1)->stqh_last = (head2)->stqh_first; \ + (head1)->stqh_last = (head2)->stqh_last; \ + STAILQ_INIT((head2)); \ + } \ +} while (0) + +#define STAILQ_EMPTY(head) ((head)->stqh_first == NULL) + +#define STAILQ_FIRST(head) ((head)->stqh_first) + +#define STAILQ_FOREACH(var, head, field) \ + for((var) = STAILQ_FIRST((head)); \ + (var); \ + (var) = STAILQ_NEXT((var), field)) + + +#define STAILQ_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = STAILQ_FIRST((head)); \ + (var) && ((tvar) = STAILQ_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define STAILQ_INIT(head) do { \ + STAILQ_FIRST((head)) = NULL; \ + (head)->stqh_last = &STAILQ_FIRST((head)); \ +} while (0) + +#define STAILQ_INSERT_AFTER(head, tqelm, elm, field) do { \ + if ((STAILQ_NEXT((elm), field) = STAILQ_NEXT((tqelm), field)) == NULL)\ + (head)->stqh_last = &STAILQ_NEXT((elm), field); \ + STAILQ_NEXT((tqelm), field) = (elm); \ +} while (0) + +#define STAILQ_INSERT_HEAD(head, elm, field) do { \ + if ((STAILQ_NEXT((elm), field) = STAILQ_FIRST((head))) == NULL) \ + (head)->stqh_last = &STAILQ_NEXT((elm), field); \ + STAILQ_FIRST((head)) = (elm); \ +} while (0) + +#define STAILQ_INSERT_TAIL(head, elm, field) do { \ + STAILQ_NEXT((elm), field) = NULL; \ + *(head)->stqh_last = (elm); \ + (head)->stqh_last = &STAILQ_NEXT((elm), field); \ +} while (0) + +#define STAILQ_LAST(head, type, field) \ + (STAILQ_EMPTY((head)) ? \ + NULL : \ + ((struct type *) \ + ((char *)((head)->stqh_last) - __offsetof(struct type, field)))) + +#define STAILQ_NEXT(elm, field) ((elm)->field.stqe_next) + +#define STAILQ_REMOVE(head, elm, type, field) do { \ + if (STAILQ_FIRST((head)) == (elm)) { \ + STAILQ_REMOVE_HEAD((head), field); \ + } \ + else { \ + struct type *curelm = STAILQ_FIRST((head)); \ + while (STAILQ_NEXT(curelm, field) != (elm)) \ + curelm = STAILQ_NEXT(curelm, field); \ + if ((STAILQ_NEXT(curelm, field) = \ + STAILQ_NEXT(STAILQ_NEXT(curelm, field), field)) == NULL)\ + (head)->stqh_last = &STAILQ_NEXT((curelm), field);\ + } \ +} while (0) + +#define STAILQ_REMOVE_HEAD(head, field) do { \ + if ((STAILQ_FIRST((head)) = \ + STAILQ_NEXT(STAILQ_FIRST((head)), field)) == NULL) \ + (head)->stqh_last = &STAILQ_FIRST((head)); \ +} while (0) + +#define STAILQ_REMOVE_HEAD_UNTIL(head, elm, field) do { \ + if ((STAILQ_FIRST((head)) = STAILQ_NEXT((elm), field)) == NULL) \ + (head)->stqh_last = &STAILQ_FIRST((head)); \ +} while (0) + +/* + * List declarations. + */ +#define LIST_HEAD(name, type) \ +struct name { \ + struct type *lh_first; /* first element */ \ +} + +#define LIST_HEAD_INITIALIZER(head) \ + { NULL } + +#define LIST_ENTRY(type) \ +struct { \ + struct type *le_next; /* next element */ \ + struct type **le_prev; /* address of previous next element */ \ +} + +/* + * List functions. + */ + +#define LIST_EMPTY(head) ((head)->lh_first == NULL) + +#define LIST_FIRST(head) ((head)->lh_first) + +#define LIST_FOREACH(var, head, field) \ + for ((var) = LIST_FIRST((head)); \ + (var); \ + (var) = LIST_NEXT((var), field)) + +#define LIST_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = LIST_FIRST((head)); \ + (var) && ((tvar) = LIST_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define LIST_INIT(head) do { \ + LIST_FIRST((head)) = NULL; \ +} while (0) + +#define LIST_INSERT_AFTER(listelm, elm, field) do { \ + if ((LIST_NEXT((elm), field) = LIST_NEXT((listelm), field)) != NULL)\ + LIST_NEXT((listelm), field)->field.le_prev = \ + &LIST_NEXT((elm), field); \ + LIST_NEXT((listelm), field) = (elm); \ + (elm)->field.le_prev = &LIST_NEXT((listelm), field); \ +} while (0) + +#define LIST_INSERT_BEFORE(listelm, elm, field) do { \ + (elm)->field.le_prev = (listelm)->field.le_prev; \ + LIST_NEXT((elm), field) = (listelm); \ + *(listelm)->field.le_prev = (elm); \ + (listelm)->field.le_prev = &LIST_NEXT((elm), field); \ +} while (0) + +#define LIST_INSERT_HEAD(head, elm, field) do { \ + if ((LIST_NEXT((elm), field) = LIST_FIRST((head))) != NULL) \ + LIST_FIRST((head))->field.le_prev = &LIST_NEXT((elm), field);\ + LIST_FIRST((head)) = (elm); \ + (elm)->field.le_prev = &LIST_FIRST((head)); \ +} while (0) + +#define LIST_NEXT(elm, field) ((elm)->field.le_next) + +#define LIST_REMOVE(elm, field) do { \ + if (LIST_NEXT((elm), field) != NULL) \ + LIST_NEXT((elm), field)->field.le_prev = \ + (elm)->field.le_prev; \ + *(elm)->field.le_prev = LIST_NEXT((elm), field); \ +} while (0) + +/* + * Tail queue declarations. + */ +#define TAILQ_HEAD(name, type) \ +struct name { \ + struct type *tqh_first; /* first element */ \ + struct type **tqh_last; /* addr of last next element */ \ + TRACEBUF \ +} + +#define TAILQ_HEAD_INITIALIZER(head) \ + { NULL, &(head).tqh_first } + +#define TAILQ_ENTRY(type) \ +struct { \ + struct type *tqe_next; /* next element */ \ + struct type **tqe_prev; /* address of previous next element */ \ + TRACEBUF \ +} + +/* + * Tail queue functions. + */ +#define TAILQ_CONCAT(head1, head2, field) do { \ + if (!TAILQ_EMPTY(head2)) { \ + *(head1)->tqh_last = (head2)->tqh_first; \ + (head2)->tqh_first->field.tqe_prev = (head1)->tqh_last; \ + (head1)->tqh_last = (head2)->tqh_last; \ + TAILQ_INIT((head2)); \ + QMD_TRACE_HEAD(head); \ + QMD_TRACE_HEAD(head2); \ + } \ +} while (0) + +#define TAILQ_EMPTY(head) ((head)->tqh_first == NULL) + +#define TAILQ_FIRST(head) ((head)->tqh_first) + +#define TAILQ_FOREACH(var, head, field) \ + for ((var) = TAILQ_FIRST((head)); \ + (var); \ + (var) = TAILQ_NEXT((var), field)) + +#define TAILQ_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = TAILQ_FIRST((head)); \ + (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define TAILQ_FOREACH_REVERSE(var, head, headname, field) \ + for ((var) = TAILQ_LAST((head), headname); \ + (var); \ + (var) = TAILQ_PREV((var), headname, field)) + +#define TAILQ_FOREACH_REVERSE_SAFE(var, head, headname, field, tvar) \ + for ((var) = TAILQ_LAST((head), headname); \ + (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1); \ + (var) = (tvar)) + +#define TAILQ_INIT(head) do { \ + TAILQ_FIRST((head)) = NULL; \ + (head)->tqh_last = &TAILQ_FIRST((head)); \ + QMD_TRACE_HEAD(head); \ +} while (0) + +#define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \ + if ((TAILQ_NEXT((elm), field) = TAILQ_NEXT((listelm), field)) != NULL)\ + TAILQ_NEXT((elm), field)->field.tqe_prev = \ + &TAILQ_NEXT((elm), field); \ + else { \ + (head)->tqh_last = &TAILQ_NEXT((elm), field); \ + QMD_TRACE_HEAD(head); \ + } \ + TAILQ_NEXT((listelm), field) = (elm); \ + (elm)->field.tqe_prev = &TAILQ_NEXT((listelm), field); \ + QMD_TRACE_ELEM(&(elm)->field); \ + QMD_TRACE_ELEM(&listelm->field); \ +} while (0) + +#define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \ + (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \ + TAILQ_NEXT((elm), field) = (listelm); \ + *(listelm)->field.tqe_prev = (elm); \ + (listelm)->field.tqe_prev = &TAILQ_NEXT((elm), field); \ + QMD_TRACE_ELEM(&(elm)->field); \ + QMD_TRACE_ELEM(&listelm->field); \ +} while (0) + +#define TAILQ_INSERT_HEAD(head, elm, field) do { \ + if ((TAILQ_NEXT((elm), field) = TAILQ_FIRST((head))) != NULL) \ + TAILQ_FIRST((head))->field.tqe_prev = \ + &TAILQ_NEXT((elm), field); \ + else \ + (head)->tqh_last = &TAILQ_NEXT((elm), field); \ + TAILQ_FIRST((head)) = (elm); \ + (elm)->field.tqe_prev = &TAILQ_FIRST((head)); \ + QMD_TRACE_HEAD(head); \ + QMD_TRACE_ELEM(&(elm)->field); \ +} while (0) + +#define TAILQ_INSERT_TAIL(head, elm, field) do { \ + TAILQ_NEXT((elm), field) = NULL; \ + (elm)->field.tqe_prev = (head)->tqh_last; \ + *(head)->tqh_last = (elm); \ + (head)->tqh_last = &TAILQ_NEXT((elm), field); \ + QMD_TRACE_HEAD(head); \ + QMD_TRACE_ELEM(&(elm)->field); \ +} while (0) + +#define TAILQ_LAST(head, headname) \ + (*(((struct headname *)((head)->tqh_last))->tqh_last)) + +#define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next) + +#define TAILQ_PREV(elm, headname, field) \ + (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last)) + +#define TAILQ_REMOVE(head, elm, field) do { \ + if ((TAILQ_NEXT((elm), field)) != NULL) \ + TAILQ_NEXT((elm), field)->field.tqe_prev = \ + (elm)->field.tqe_prev; \ + else { \ + (head)->tqh_last = (elm)->field.tqe_prev; \ + QMD_TRACE_HEAD(head); \ + } \ + *(elm)->field.tqe_prev = TAILQ_NEXT((elm), field); \ + TRASHIT((elm)->field.tqe_next); \ + TRASHIT((elm)->field.tqe_prev); \ + QMD_TRACE_ELEM(&(elm)->field); \ +} while (0) + + +#ifdef _KERNEL + +/* + * XXX insque() and remque() are an old way of handling certain queues. + * They bogusly assumes that all queue heads look alike. + */ + +struct quehead { + struct quehead *qh_link; + struct quehead *qh_rlink; +}; + +#if defined(__GNUC__) || defined(__INTEL_COMPILER) + +static __inline void +insque(void *a, void *b) +{ + struct quehead *element = (struct quehead *)a, + *head = (struct quehead *)b; + + element->qh_link = head->qh_link; + element->qh_rlink = head; + head->qh_link = element; + element->qh_link->qh_rlink = element; +} + +static __inline void +remque(void *a) +{ + struct quehead *element = (struct quehead *)a; + + element->qh_link->qh_rlink = element->qh_rlink; + element->qh_rlink->qh_link = element->qh_link; + element->qh_rlink = 0; +} + +#else /* !(__GNUC__ || __INTEL_COMPILER) */ + +void insque(void *a, void *b); +void remque(void *a); + +#endif /* __GNUC__ || __INTEL_COMPILER */ + +#endif /* _KERNEL */ + +#endif /* !_SYS_QUEUE_H_ */ diff --git a/miasm/jitter/vm_mngr.c b/miasm/jitter/vm_mngr.c new file mode 100644 index 00000000..bd1de2f4 --- /dev/null +++ b/miasm/jitter/vm_mngr.c @@ -0,0 +1,926 @@ +/* +** Copyright (C) 2011 EADS France, Fabrice Desclaux +** +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License as published by +** the Free Software Foundation; either version 2 of the License, or +** (at your option) any later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License along +** with this program; if not, write to the Free Software Foundation, Inc., +** 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +*/ +#include "vm_mngr.h" + +#include + +#include +#include + +#include "queue.h" + + + +/****************memory manager**************/ + + + + +#define MIN(a,b) (((a)<(b))?(a):(b)) +#define MAX(a,b) (((a)>(b))?(a):(b)) + +// #define DEBUG_MIASM_AUTOMOD_CODE + +void memory_access_list_init(struct memory_access_list * access) +{ + access->array = NULL; + access->allocated = 0; + access->num = 0; +} + +void memory_access_list_reset(struct memory_access_list * access) +{ + if (access->array) { + free(access->array); + access->array = NULL; + } + access->allocated = 0; + access->num = 0; +} + +void memory_access_list_add(struct memory_access_list * access, uint64_t start, uint64_t stop) +{ + if (access->num >= access->allocated) { + if (access->allocated == 0) + access->allocated = 1; + else + access->allocated *= 2; + access->array = realloc(access->array, access->allocated * sizeof(struct memory_access)); + if (access->array == NULL) { + fprintf(stderr, "cannot realloc struct memory_access access->array\n"); + exit(EXIT_FAILURE); + } + } + access->array[access->num].start = start; + access->array[access->num].stop = stop; + access->num += 1; +} + + + +uint16_t set_endian16(vm_mngr_t* vm_mngr, uint16_t val) +{ + if (vm_mngr->sex == __BYTE_ORDER) + return val; + else + return Endian16_Swap(val); +} + +uint32_t set_endian32(vm_mngr_t* vm_mngr, uint32_t val) +{ + if (vm_mngr->sex == __BYTE_ORDER) + return val; + else + return Endian32_Swap(val); +} + +uint64_t set_endian64(vm_mngr_t* vm_mngr, uint64_t val) +{ + if (vm_mngr->sex == __BYTE_ORDER) + return val; + else + return Endian64_Swap(val); +} + +void print_val(uint64_t base, uint64_t addr) +{ + uint64_t *ptr = (uint64_t *) (intptr_t) addr; + fprintf(stderr, "addr 0x%"PRIX64" val 0x%"PRIX64"\n", addr-base, *ptr); +} + +int midpoint(int imin, int imax) +{ + return (imin + imax) / 2; +} + + +int find_page_node(struct memory_page_node * array, uint64_t key, int imin, int imax) +{ + // continue searching while [imin,imax] is not empty + while (imin <= imax) { + // calculate the midpoint for roughly equal partition + int imid = midpoint(imin, imax); + if(array[imid].ad <= key && key < array[imid].ad + array[imid].size) + // key found at index imid + return imid; + // determine which subarray to search + else if (array[imid].ad < key) + // change min index to search upper subarray + imin = imid + 1; + else + // change max index to search lower subarray + imax = imid - 1; + } + // key was not found + return -1; +} + +struct memory_page_node * get_memory_page_from_address(vm_mngr_t* vm_mngr, uint64_t ad, int raise_exception) +{ + struct memory_page_node * mpn; + int i; + + i = find_page_node(vm_mngr->memory_pages_array, + ad, + 0, + vm_mngr->memory_pages_number - 1); + if (i >= 0) { + mpn = &vm_mngr->memory_pages_array[i]; + if ((mpn->ad <= ad) && (ad < mpn->ad + mpn->size)) + return mpn; + } + if (raise_exception) { + fprintf(stderr, "WARNING: address 0x%"PRIX64" is not mapped in virtual memory:\n", ad); + vm_mngr->exception_flags |= EXCEPT_ACCESS_VIOL; + } + return NULL; +} + +static uint64_t memory_page_read(vm_mngr_t* vm_mngr, unsigned int my_size, uint64_t ad) +{ + struct memory_page_node * mpn; + unsigned char * addr; + uint64_t ret = 0; + struct memory_breakpoint_info * b; + + + mpn = get_memory_page_from_address(vm_mngr, ad, 1); + if (!mpn) + return 0; + + if ((mpn->access & PAGE_READ) == 0){ + fprintf(stderr, "access to non readable page!! %"PRIX64"\n", ad); + vm_mngr->exception_flags |= EXCEPT_ACCESS_VIOL; + return 0; + } + + /* check read breakpoint */ + LIST_FOREACH(b, &vm_mngr->memory_breakpoint_pool, next){ + if ((b->access & BREAKPOINT_READ) == 0) + continue; + if ((b->ad <= ad) && (ad < b->ad + b->size)) + vm_mngr->exception_flags |= EXCEPT_BREAKPOINT_MEMORY; + } + + + addr = &((unsigned char*)mpn->ad_hp)[ad - mpn->ad]; + + /* read fits in a page */ + if (ad - mpn->ad + my_size/8 <= mpn->size){ + switch(my_size){ + case 8: + ret = *((unsigned char*)addr)&0xFF; + break; + case 16: + ret = *((unsigned short*)addr)&0xFFFF; + ret = set_endian16(vm_mngr, (uint16_t)ret); + break; + case 32: + ret = *((unsigned int*)addr)&0xFFFFFFFF; + ret = set_endian32(vm_mngr, (uint32_t)ret); + break; + case 64: + ret = *((uint64_t*)addr)&0xFFFFFFFFFFFFFFFFULL; + ret = set_endian64(vm_mngr, ret); + break; + default: + fprintf(stderr, "Bad memory access size %d\n", my_size); + exit(EXIT_FAILURE); + break; + } + } + /* read is multiple page wide */ + else{ + unsigned int new_size = my_size; + int index = 0; + while (new_size){ + mpn = get_memory_page_from_address(vm_mngr, ad, 1); + if (!mpn) + return 0; + addr = &((unsigned char*)mpn->ad_hp)[ad - mpn->ad]; + ret |= ((uint64_t)(*((unsigned char*)addr)&0xFF))<<(index); + index +=8; + new_size -= 8; + ad ++; + } + switch(my_size){ + case 8: + ret = ret; + break; + case 16: + ret = set_endian16(vm_mngr, (uint16_t)ret); + break; + case 32: + ret = set_endian32(vm_mngr, (uint32_t)ret); + break; + case 64: + ret = set_endian64(vm_mngr, ret); + break; + default: + fprintf(stderr, "Bad memory access size %d\n", my_size); + exit(EXIT_FAILURE); + break; + } + } + return ret; +} + +static void memory_page_write(vm_mngr_t* vm_mngr, unsigned int my_size, + uint64_t ad, uint64_t src) +{ + struct memory_page_node * mpn; + unsigned char * addr; + struct memory_breakpoint_info * b; + + mpn = get_memory_page_from_address(vm_mngr, ad, 1); + if (!mpn) + return; + + if ((mpn->access & PAGE_WRITE) == 0){ + fprintf(stderr, "access to non writable page!! %"PRIX64"\n", ad); + vm_mngr->exception_flags |= EXCEPT_ACCESS_VIOL; + return ; + } + + /* check read breakpoint*/ + LIST_FOREACH(b, &vm_mngr->memory_breakpoint_pool, next){ + if ((b->access & BREAKPOINT_WRITE) == 0) + continue; + if ((b->ad <= ad) && (ad < b->ad + b->size)) + vm_mngr->exception_flags |= EXCEPT_BREAKPOINT_MEMORY; + } + + addr = &((unsigned char*)mpn->ad_hp)[ad - mpn->ad]; + + /* write fits in a page */ + if (ad - mpn->ad + my_size/8 <= mpn->size){ + switch(my_size){ + case 8: + *((unsigned char*)addr) = src&0xFF; + break; + case 16: + src = set_endian16(vm_mngr, (uint16_t)src); + *((unsigned short*)addr) = src&0xFFFF; + break; + case 32: + src = set_endian32(vm_mngr, (uint32_t)src); + *((unsigned int*)addr) = src&0xFFFFFFFF; + break; + case 64: + src = set_endian64(vm_mngr, src); + *((uint64_t*)addr) = src&0xFFFFFFFFFFFFFFFFULL; + break; + default: + fprintf(stderr, "Bad memory access size %d\n", my_size); + exit(EXIT_FAILURE); + break; + } + } + /* write is multiple page wide */ + else{ + switch(my_size){ + + case 8: + src = src; + break; + case 16: + src = set_endian16(vm_mngr, (uint16_t)src); + break; + case 32: + src = set_endian32(vm_mngr, (uint32_t)src); + break; + case 64: + src = set_endian64(vm_mngr, src); + break; + default: + fprintf(stderr, "Bad memory access size %d\n", my_size); + exit(EXIT_FAILURE); + break; + } + while (my_size){ + mpn = get_memory_page_from_address(vm_mngr, ad, 1); + if (!mpn) + return; + + addr = &((unsigned char*)mpn->ad_hp)[ad - mpn->ad]; + *((unsigned char*)addr) = src&0xFF; + my_size -= 8; + src >>=8; + ad ++; + } + } +} + +// ################## + +void dump_code_bloc(vm_mngr_t* vm_mngr) +{ + struct code_bloc_node * cbp; + LIST_FOREACH(cbp, &vm_mngr->code_bloc_pool, next){ + fprintf(stderr, "%"PRIX64"%"PRIX64"\n", cbp->ad_start, cbp->ad_stop); + } + +} + +void add_range_to_list(struct memory_access_list * access, uint64_t addr1, uint64_t addr2) +{ + if (access->num > 0) { + /* Check match on upper bound */ + if (access->array[access->num-1].stop == addr1) { + access->array[access->num-1].stop = addr2; + return; + } + + /* Check match on lower bound */ + if (access->array[0].start == addr2) { + access->array[0].start = addr1; + return; + } + } + + /* No merge, add to the list */ + memory_access_list_add(access, addr1, addr2); +} + + +void add_mem_read(vm_mngr_t* vm_mngr, uint64_t addr, uint64_t size) +{ + add_range_to_list(&(vm_mngr->memory_r), addr, addr + size); +} + +void add_mem_write(vm_mngr_t* vm_mngr, uint64_t addr, uint64_t size) +{ + add_range_to_list(&(vm_mngr->memory_w), addr, addr + size); +} + +void check_invalid_code_blocs(vm_mngr_t* vm_mngr) +{ + int i; + struct code_bloc_node * cbp; + for (i=0;imemory_w.num; i++) { + if (vm_mngr->exception_flags & EXCEPT_CODE_AUTOMOD) + break; + if (vm_mngr->memory_w.array[i].stop <= vm_mngr->code_bloc_pool_ad_min || + vm_mngr->memory_w.array[i].start >=vm_mngr->code_bloc_pool_ad_max) + continue; + + LIST_FOREACH(cbp, &vm_mngr->code_bloc_pool, next){ + if ((cbp->ad_start < vm_mngr->memory_w.array[i].stop) && + (vm_mngr->memory_w.array[i].start < cbp->ad_stop)){ +#ifdef DEBUG_MIASM_AUTOMOD_CODE + fprintf(stderr, "**********************************\n"); + fprintf(stderr, "self modifying code %"PRIX64" %"PRIX64"\n", + vm_mngr->memory_w.array[i].start, + vm_mngr->memory_w.array[i].stop); + fprintf(stderr, "**********************************\n"); +#endif + vm_mngr->exception_flags |= EXCEPT_CODE_AUTOMOD; + break; + } + } + } +} + + +void check_memory_breakpoint(vm_mngr_t* vm_mngr) +{ + int i; + struct memory_breakpoint_info * memory_bp; + + /* Check memory breakpoints */ + LIST_FOREACH(memory_bp, &vm_mngr->memory_breakpoint_pool, next) { + if (vm_mngr->exception_flags & EXCEPT_BREAKPOINT_MEMORY) + break; + if (memory_bp->access & BREAKPOINT_READ) { + for (i=0;imemory_r.num; i++) { + if ((memory_bp->ad < vm_mngr->memory_r.array[i].stop) && + (vm_mngr->memory_r.array[i].start < memory_bp->ad + memory_bp->size)) { + vm_mngr->exception_flags |= EXCEPT_BREAKPOINT_MEMORY; + break; + } + } + } + if (memory_bp->access & BREAKPOINT_WRITE) { + for (i=0;imemory_w.num; i++) { + if ((memory_bp->ad < vm_mngr->memory_w.array[i].stop) && + (vm_mngr->memory_w.array[i].start < memory_bp->ad + memory_bp->size)) { + vm_mngr->exception_flags |= EXCEPT_BREAKPOINT_MEMORY; + break; + } + } + } + } +} + + +PyObject* get_memory_pylist(vm_mngr_t* vm_mngr, struct memory_access_list* memory_list) +{ + int i; + PyObject *pylist; + PyObject *range; + pylist = PyList_New(memory_list->num); + for (i=0;inum;i++) { + range = PyTuple_New(2); + PyTuple_SetItem(range, 0, PyLong_FromUnsignedLongLong((uint64_t)memory_list->array[i].start)); + PyTuple_SetItem(range, 1, PyLong_FromUnsignedLongLong((uint64_t)memory_list->array[i].stop)); + PyList_SetItem(pylist, i, range); + } + return pylist; + +} + +PyObject* get_memory_read(vm_mngr_t* vm_mngr) +{ + return get_memory_pylist(vm_mngr, &vm_mngr->memory_r); +} + +PyObject* get_memory_write(vm_mngr_t* vm_mngr) +{ + return get_memory_pylist(vm_mngr, &vm_mngr->memory_w); +} + +void vm_MEM_WRITE_08(vm_mngr_t* vm_mngr, uint64_t addr, unsigned char src) +{ + add_mem_write(vm_mngr, addr, 1); + memory_page_write(vm_mngr, 8, addr, src); +} + +void vm_MEM_WRITE_16(vm_mngr_t* vm_mngr, uint64_t addr, unsigned short src) +{ + add_mem_write(vm_mngr, addr, 2); + memory_page_write(vm_mngr, 16, addr, src); +} +void vm_MEM_WRITE_32(vm_mngr_t* vm_mngr, uint64_t addr, unsigned int src) +{ + add_mem_write(vm_mngr, addr, 4); + memory_page_write(vm_mngr, 32, addr, src); +} +void vm_MEM_WRITE_64(vm_mngr_t* vm_mngr, uint64_t addr, uint64_t src) +{ + add_mem_write(vm_mngr, addr, 8); + memory_page_write(vm_mngr, 64, addr, src); +} + +unsigned char vm_MEM_LOOKUP_08(vm_mngr_t* vm_mngr, uint64_t addr) +{ + unsigned char ret; + add_mem_read(vm_mngr, addr, 1); + ret = (unsigned char)memory_page_read(vm_mngr, 8, addr); + return ret; +} +unsigned short vm_MEM_LOOKUP_16(vm_mngr_t* vm_mngr, uint64_t addr) +{ + unsigned short ret; + add_mem_read(vm_mngr, addr, 2); + ret = (unsigned short)memory_page_read(vm_mngr, 16, addr); + return ret; +} +unsigned int vm_MEM_LOOKUP_32(vm_mngr_t* vm_mngr, uint64_t addr) +{ + unsigned int ret; + add_mem_read(vm_mngr, addr, 4); + ret = (unsigned int)memory_page_read(vm_mngr, 32, addr); + return ret; +} +uint64_t vm_MEM_LOOKUP_64(vm_mngr_t* vm_mngr, uint64_t addr) +{ + uint64_t ret; + add_mem_read(vm_mngr, addr, 8); + ret = memory_page_read(vm_mngr, 64, addr); + return ret; +} + + +int vm_read_mem(vm_mngr_t* vm_mngr, uint64_t addr, char** buffer_ptr, uint64_t size) +{ + char* buffer; + uint64_t len; + struct memory_page_node * mpn; + + buffer = malloc(size); + *buffer_ptr = buffer; + if (!buffer){ + fprintf(stderr, "Error: cannot alloc read\n"); + exit(EXIT_FAILURE); + } + + /* read is multiple page wide */ + while (size){ + mpn = get_memory_page_from_address(vm_mngr, addr, 1); + if (!mpn){ + free(*buffer_ptr); + PyErr_SetString(PyExc_RuntimeError, "Error: cannot find address"); + return -1; + } + + len = MIN(size, mpn->size - (addr - mpn->ad)); + memcpy(buffer, (char*)mpn->ad_hp + (addr - mpn->ad), len); + buffer += len; + addr += len; + size -= len; + } + + return 0; +} + +int vm_write_mem(vm_mngr_t* vm_mngr, uint64_t addr, char *buffer, uint64_t size) +{ + uint64_t len; + struct memory_page_node * mpn; + + /* write is multiple page wide */ + while (size){ + mpn = get_memory_page_from_address(vm_mngr, addr, 1); + if (!mpn){ + PyErr_SetString(PyExc_RuntimeError, "Error: cannot find address"); + return -1; + } + + len = MIN(size, mpn->size - (addr - mpn->ad)); + memcpy((char*)mpn->ad_hp + (addr-mpn->ad), buffer, len); + buffer += len; + addr += len; + size -= len; + } + + return 0; +} + + + +int is_mapped(vm_mngr_t* vm_mngr, uint64_t addr, uint64_t size) +{ + uint64_t len; + struct memory_page_node * mpn; + + /* test multiple page wide */ + while (size){ + mpn = get_memory_page_from_address(vm_mngr, addr, 0); + if (!mpn) + return 0; + + len = MIN(size, mpn->size - (addr - mpn->ad)); + addr += len; + size -= len; + } + + return 1; +} + +struct memory_page_node * create_memory_page_node(uint64_t ad, unsigned int size, unsigned int access, char* name) +{ + struct memory_page_node * mpn; + void* ad_hp; + + mpn = malloc(sizeof(*mpn)); + if (!mpn){ + fprintf(stderr, "Error: cannot alloc mpn\n"); + return NULL; + } + ad_hp = malloc(size); + if (!ad_hp){ + free(mpn); + fprintf(stderr, "Error: cannot alloc %d\n", size); + return NULL; + } + mpn->name = malloc(strlen(name) + 1); + if (!mpn->name){ + free(mpn); + free(ad_hp); + fprintf(stderr, "Error: cannot alloc\n"); + return NULL; + } + + mpn->ad = ad; + mpn->size = size; + mpn->access = access; + mpn->ad_hp = ad_hp; + strcpy(mpn->name, name); + + return mpn; +} + + +struct code_bloc_node * create_code_bloc_node(uint64_t ad_start, uint64_t ad_stop) +{ + struct code_bloc_node * cbp; + + cbp = malloc(sizeof(*cbp)); + if (!cbp){ + fprintf(stderr, "Error: cannot alloc cbp\n"); + exit(EXIT_FAILURE); + } + + cbp->ad_start = ad_start; + cbp->ad_stop = ad_stop; + + return cbp; +} + + +void add_code_bloc(vm_mngr_t* vm_mngr, struct code_bloc_node* cbp) +{ + LIST_INSERT_HEAD(&vm_mngr->code_bloc_pool, cbp, next); + if (vm_mngr->code_bloc_pool_ad_min> cbp->ad_start) + vm_mngr->code_bloc_pool_ad_min = cbp->ad_start; + if (vm_mngr->code_bloc_pool_ad_max< cbp->ad_stop) + vm_mngr->code_bloc_pool_ad_max = cbp->ad_stop; +} + +void dump_code_bloc_pool(vm_mngr_t* vm_mngr) +{ + struct code_bloc_node * cbp; + + LIST_FOREACH(cbp, &vm_mngr->code_bloc_pool, next){ + printf("ad start %"PRIX64" ad_stop %"PRIX64"\n", + cbp->ad_start, + cbp->ad_stop); + } +} + + +void init_memory_page_pool(vm_mngr_t* vm_mngr) +{ + + vm_mngr->memory_pages_number = 0; + vm_mngr->memory_pages_array = NULL; +} + +void init_code_bloc_pool(vm_mngr_t* vm_mngr) +{ + LIST_INIT(&vm_mngr->code_bloc_pool); + vm_mngr->code_bloc_pool_ad_min = 0xffffffffffffffffULL; + vm_mngr->code_bloc_pool_ad_max = 0; + + memory_access_list_init(&(vm_mngr->memory_r)); + memory_access_list_init(&(vm_mngr->memory_w)); + + +} + +void init_memory_breakpoint(vm_mngr_t* vm_mngr) +{ + LIST_INIT(&vm_mngr->memory_breakpoint_pool); +} + + +void reset_memory_page_pool(vm_mngr_t* vm_mngr) +{ + struct memory_page_node * mpn; + int i; + for (i=0;imemory_pages_number; i++) { + mpn = &vm_mngr->memory_pages_array[i]; + free(mpn->ad_hp); + free(mpn->name); + } + free(vm_mngr->memory_pages_array); + vm_mngr->memory_pages_array = NULL; + vm_mngr->memory_pages_number = 0; +} + + +void reset_code_bloc_pool(vm_mngr_t* vm_mngr) +{ + struct code_bloc_node * cbp; + + + while (!LIST_EMPTY(&vm_mngr->code_bloc_pool)) { + cbp = LIST_FIRST(&vm_mngr->code_bloc_pool); + LIST_REMOVE(cbp, next); + free(cbp); + } + vm_mngr->code_bloc_pool_ad_min = 0xffffffffffffffffULL; + vm_mngr->code_bloc_pool_ad_max = 0; +} + +void reset_memory_access(vm_mngr_t* vm_mngr) +{ + memory_access_list_reset(&(vm_mngr->memory_r)); + memory_access_list_reset(&(vm_mngr->memory_w)); +} + +void reset_memory_breakpoint(vm_mngr_t* vm_mngr) +{ + struct memory_breakpoint_info * mpn; + + while (!LIST_EMPTY(&vm_mngr->memory_breakpoint_pool)) { + mpn = LIST_FIRST(&vm_mngr->memory_breakpoint_pool); + LIST_REMOVE(mpn, next); + free(mpn); + } + +} + + + +/* We don't use dichotomy here for the insertion */ +int is_mpn_in_tab(vm_mngr_t* vm_mngr, struct memory_page_node* mpn_a) +{ + struct memory_page_node * mpn; + int i; + + for (i=0;imemory_pages_number; i++) { + mpn = &vm_mngr->memory_pages_array[i]; + if (mpn->ad >= mpn_a->ad + mpn_a->size) + continue; + if (mpn->ad + mpn->size <= mpn_a->ad) + continue; + fprintf(stderr, + "Error: attempt to add page (0x%"PRIX64" 0x%"PRIX64") " + "overlapping page (0x%"PRIX64" 0x%"PRIX64")\n", + mpn_a->ad, mpn_a->ad + mpn_a->size, + mpn->ad, mpn->ad + mpn->size); + + return 1; + } + + return 0; +} + + +/* We don't use dichotomy here for the insertion */ +void add_memory_page(vm_mngr_t* vm_mngr, struct memory_page_node* mpn_a) +{ + struct memory_page_node * mpn; + int i; + + for (i=0; i < vm_mngr->memory_pages_number; i++) { + mpn = &vm_mngr->memory_pages_array[i]; + if (mpn->ad < mpn_a->ad) + continue; + break; + } + vm_mngr->memory_pages_array = realloc(vm_mngr->memory_pages_array, + sizeof(struct memory_page_node) * + (vm_mngr->memory_pages_number+1)); + if (vm_mngr->memory_pages_array == NULL) { + fprintf(stderr, "cannot realloc struct memory_page_node vm_mngr->memory_pages_array\n"); + exit(EXIT_FAILURE); + } + + + memmove(&vm_mngr->memory_pages_array[i+1], + &vm_mngr->memory_pages_array[i], + sizeof(struct memory_page_node) * (vm_mngr->memory_pages_number - i) + ); + + vm_mngr->memory_pages_array[i] = *mpn_a; + vm_mngr->memory_pages_number ++; + +} + +/* Return a char* representing the repr of vm_mngr_t object */ +char* dump(vm_mngr_t* vm_mngr) +{ + char buf[0x100]; + int length; + char *buf_final; + int i; + char buf_addr[0x20]; + char buf_size[0x20]; + struct memory_page_node * mpn; + /* 0x1234567812345678 0x1234567812345678 */ + char* intro = "Addr Size Access Comment\n"; + size_t total_len = strlen(intro) + 1; + + buf_final = malloc(total_len); + if (buf_final == NULL) { + fprintf(stderr, "Error: cannot alloc char* buf_final\n"); + exit(EXIT_FAILURE); + } + strcpy(buf_final, intro); + for (i=0; i< vm_mngr->memory_pages_number; i++) { + mpn = &vm_mngr->memory_pages_array[i]; + snprintf(buf_addr, sizeof(buf_addr), + "0x%"PRIX64, (uint64_t)mpn->ad); + snprintf(buf_size, sizeof(buf_size), + "0x%"PRIX64, (uint64_t)mpn->size); + + length = snprintf(buf, sizeof(buf) - 1, + "%-18s %-18s %c%c%c %s", + buf_addr, + buf_size, + mpn->access & PAGE_READ? 'R':'_', + mpn->access & PAGE_WRITE? 'W':'_', + mpn->access & PAGE_EXEC? 'X':'_', + mpn->name + ); + strcat(buf, "\n"); + total_len += length + 1 + 1; + buf_final = realloc(buf_final, total_len); + if (buf_final == NULL) { + fprintf(stderr, "cannot realloc char* buf_final\n"); + exit(EXIT_FAILURE); + } + strcat(buf_final, buf); + } + + return buf_final; +} + +void dump_memory_breakpoint_pool(vm_mngr_t* vm_mngr) +{ + struct memory_breakpoint_info * mpn; + + LIST_FOREACH(mpn, &vm_mngr->memory_breakpoint_pool, next){ + printf("ad %"PRIX64" size %"PRIX64" access %"PRIX64"\n", + mpn->ad, + mpn->size, + mpn->access + ); + } +} + + +void add_memory_breakpoint(vm_mngr_t* vm_mngr, uint64_t ad, uint64_t size, unsigned int access) +{ + struct memory_breakpoint_info * mpn_a; + mpn_a = malloc(sizeof(*mpn_a)); + if (!mpn_a) { + fprintf(stderr, "Error: cannot alloc\n"); + exit(EXIT_FAILURE); + } + mpn_a->ad = ad; + mpn_a->size = size; + mpn_a->access = access; + + LIST_INSERT_HEAD(&vm_mngr->memory_breakpoint_pool, mpn_a, next); + +} + +void remove_memory_breakpoint(vm_mngr_t* vm_mngr, uint64_t ad, unsigned int access) +{ + struct memory_breakpoint_info * mpn; + + LIST_FOREACH(mpn, &vm_mngr->memory_breakpoint_pool, next){ + if (mpn->ad == ad && mpn->access == access) + LIST_REMOVE(mpn, next); + } + +} + + +/********************************************/ + +void hexdump(char* m, unsigned int l) +{ + unsigned int i, j, last; + last = 0; + for (i=0;iexception_flags; +} + + diff --git a/miasm/jitter/vm_mngr.h b/miasm/jitter/vm_mngr.h new file mode 100644 index 00000000..660e6998 --- /dev/null +++ b/miasm/jitter/vm_mngr.h @@ -0,0 +1,302 @@ +/* +** Copyright (C) 2011 EADS France, Fabrice Desclaux +** +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License as published by +** the Free Software Foundation; either version 2 of the License, or +** (at your option) any later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License along +** with this program; if not, write to the Free Software Foundation, Inc., +** 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +*/ +#ifndef CODENAT_H +#define CODENAT_H + +#if defined(_WIN32) || defined(_WIN64) +#define _CRT_SECURE_NO_WARNINGS +#endif + +#if _WIN32 +#define _MIASM_EXPORT __declspec(dllexport) +#else +#define _MIASM_EXPORT +#endif + +#include +#include + +#include "queue.h" + +#ifdef __APPLE__ +#define __BYTE_ORDER __BYTE_ORDER__ +#elif defined(__NetBSD__) || defined(__OpenBSD__) +#define __BYTE_ORDER _BYTE_ORDER +#define __BIG_ENDIAN _BIG_ENDIAN +#define __LITTLE_ENDIAN _LITTLE_ENDIAN +#elif defined(_WIN32) || defined(_WIN64) +#define __BIG_ENDIAN '>' +#define __LITTLE_ENDIAN '<' +#define __BYTE_ORDER __LITTLE_ENDIAN +#endif + + +#define Endian16_Swap(value) \ + ((((uint16_t)((value) & 0x00FF)) << 8) | \ + (((uint16_t)((value) & 0xFF00)) >> 8)) + +#define Endian32_Swap(value) \ + ((((uint32_t)((value) & 0x000000FF)) << 24) | \ + (((uint32_t)((value) & 0x0000FF00)) << 8) | \ + (((uint32_t)((value) & 0x00FF0000)) >> 8) | \ + (((uint32_t)((value) & 0xFF000000)) >> 24)) + +#define Endian64_Swap(value) \ + (((((uint64_t)value)<<56) & 0xFF00000000000000ULL) | \ + ((((uint64_t)value)<<40) & 0x00FF000000000000ULL) | \ + ((((uint64_t)value)<<24) & 0x0000FF0000000000ULL) | \ + ((((uint64_t)value)<< 8) & 0x000000FF00000000ULL) | \ + ((((uint64_t)value)>> 8) & 0x00000000FF000000ULL) | \ + ((((uint64_t)value)>>24) & 0x0000000000FF0000ULL) | \ + ((((uint64_t)value)>>40) & 0x000000000000FF00ULL) | \ + ((((uint64_t)value)>>56) & 0x00000000000000FFULL)) + + +LIST_HEAD(code_bloc_list_head, code_bloc_node); +LIST_HEAD(memory_breakpoint_info_head, memory_breakpoint_info); + + +#define BREAKPOINT_READ 1 +#define BREAKPOINT_WRITE 2 + +#define BREAK_SIGALARM 1<<5 + +#define MAX_MEMORY_PAGE_POOL_TAB 0x100000 +#define MEMORY_PAGE_POOL_MASK_BIT 12 +#define PAGE_SIZE (1< +** +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License as published by +** the Free Software Foundation; either version 2 of the License, or +** (at your option) any later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License along +** with this program; if not, write to the Free Software Foundation, Inc., +** 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +*/ +#include +#include "structmember.h" +#include +#include +#include +#include "compat_py23.h" +#include "queue.h" +#include "vm_mngr.h" +#include "vm_mngr_py.h" + +#define MIN(a,b) (((a)<(b))?(a):(b)) +#define MAX(a,b) (((a)>(b))?(a):(b)) + +extern struct memory_page_list_head memory_page_pool; +extern struct code_bloc_list_head code_bloc_pool; + +#define RAISE(errtype, msg) {PyObject* p; p = PyErr_Format( errtype, msg ); return p;} + + + +/* XXX POC signals */ +VmMngr* global_vmmngr; + +PyObject* _vm_get_exception(unsigned int xcpt) +{ + PyObject*p; + + if (!xcpt) + p = NULL; + else if (xcpt & EXCEPT_CODE_AUTOMOD) + p = PyErr_Format( PyExc_RuntimeError, "EXCEPT_CODE_AUTOMOD" ); + else if (xcpt & EXCEPT_UNK_EIP) + p = PyErr_Format( PyExc_RuntimeError, "EXCEPT_UNK_EIP" ); + else if (xcpt & EXCEPT_UNK_MEM_AD) + p = PyErr_Format( PyExc_RuntimeError, "EXCEPT_UNK_MEM_AD" ); + + else p = PyErr_Format( PyExc_RuntimeError, "EXCEPT_UNKNOWN" ); + return p; +} + +static void sig_alarm(int signo) +{ + global_vmmngr->vm_mngr.exception_flags |= BREAK_SIGALARM; + return; +} + +PyObject* set_alarm(VmMngr* self) +{ + global_vmmngr = self; + signal(SIGALRM, sig_alarm); + + Py_INCREF(Py_None); + return Py_None; +} + + + +PyObject* vm_add_memory_page(VmMngr* self, PyObject* args) +{ + PyObject *addr; + PyObject *access; + PyObject *item_str; + PyObject *name=NULL; + uint64_t buf_size; + char* buf_data; + Py_ssize_t length; + uint64_t page_addr; + uint64_t page_access; + char* name_ptr; + + struct memory_page_node * mpn; + + if (!PyArg_ParseTuple(args, "OOO|O", &addr, &access, &item_str, &name)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + + PyGetInt(addr, page_addr); + PyGetInt(access, page_access); + + if(!PyBytes_Check(item_str)) + RAISE(PyExc_TypeError,"arg must be bytes"); + + buf_size = PyBytes_Size(item_str); + PyBytes_AsStringAndSize(item_str, &buf_data, &length); + + if (name == NULL) { + name_ptr = (char*)""; + } else { + PyGetStr(name_ptr, name); + } + mpn = create_memory_page_node(page_addr, (unsigned int)buf_size, (unsigned int)page_access, name_ptr); + if (mpn == NULL) + RAISE(PyExc_TypeError,"cannot create page"); + if (is_mpn_in_tab(&self->vm_mngr, mpn)) { + free(mpn->ad_hp); + free(mpn); + RAISE(PyExc_TypeError,"known page in memory"); + } + + memcpy(mpn->ad_hp, buf_data, buf_size); + add_memory_page(&self->vm_mngr, mpn); + + Py_INCREF(Py_None); + return Py_None; +} + + + +PyObject* vm_set_mem_access(VmMngr* self, PyObject* args) +{ + PyObject *addr; + PyObject *access; + uint64_t page_addr; + uint64_t page_access; + struct memory_page_node * mpn; + + if (!PyArg_ParseTuple(args, "OO", &addr, &access)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + + PyGetInt(addr, page_addr); + PyGetInt(access, page_access); + + mpn = get_memory_page_from_address(&self->vm_mngr, page_addr, 1); + if (!mpn){ + PyErr_SetString(PyExc_RuntimeError, "cannot find address"); + return 0; + } + + mpn->access = page_access; + + Py_INCREF(Py_None); + return Py_None; +} + +PyObject* vm_set_mem(VmMngr* self, PyObject* args) +{ + PyObject *py_addr; + PyObject *py_buffer; + Py_ssize_t py_length; + + char * buffer; + uint64_t size; + uint64_t addr; + int ret; + + if (!PyArg_ParseTuple(args, "OO", &py_addr, &py_buffer)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + + PyGetInt(py_addr, addr); + + if (!PyBytes_Check(py_buffer)) + RAISE(PyExc_TypeError,"arg must be bytes"); + + size = PyBytes_Size(py_buffer); + PyBytes_AsStringAndSize(py_buffer, &buffer, &py_length); + + ret = vm_write_mem(&self->vm_mngr, addr, buffer, size); + if (ret < 0) + RAISE(PyExc_TypeError, "Error in set_mem"); + + add_mem_write(&self->vm_mngr, addr, size); + check_invalid_code_blocs(&self->vm_mngr); + + Py_INCREF(Py_None); + return Py_None; +} + + + +PyObject* vm_get_mem_access(VmMngr* self, PyObject* args) +{ + PyObject *py_addr; + uint64_t page_addr; + struct memory_page_node * mpn; + + if (!PyArg_ParseTuple(args, "O", &py_addr)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + + PyGetInt(py_addr, page_addr); + + mpn = get_memory_page_from_address(&self->vm_mngr, page_addr, 1); + if (!mpn){ + PyErr_SetString(PyExc_RuntimeError, "cannot find address"); + return 0; + } + + return PyLong_FromUnsignedLongLong((uint64_t)mpn->access); +} + +PyObject* vm_get_mem(VmMngr* self, PyObject* args) +{ + PyObject *py_addr; + PyObject *py_len; + + uint64_t addr; + uint64_t size; + PyObject *obj_out; + char * buf_out; + int ret; + + if (!PyArg_ParseTuple(args, "OO", &py_addr, &py_len)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + + PyGetInt(py_addr, addr); + PyGetInt(py_len, size); + + ret = vm_read_mem(&self->vm_mngr, addr, &buf_out, size); + if (ret < 0) { + RAISE(PyExc_RuntimeError,"Cannot find address"); + } + + obj_out = PyBytes_FromStringAndSize(buf_out, size); + free(buf_out); + return obj_out; +} + +PyObject* vm_get_u8(VmMngr* self, PyObject* args) +{ + PyObject *py_addr; + + uint64_t addr; + PyObject *obj_out; + char * buf_out; + int ret; + uint32_t value; + + if (!PyArg_ParseTuple(args, "O", &py_addr)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + + PyGetInt(py_addr, addr); + + ret = vm_read_mem(&self->vm_mngr, addr, &buf_out, 1); + if (ret < 0) { + RAISE(PyExc_RuntimeError,"Cannot find address"); + } + + value = *(uint8_t*)buf_out; + + obj_out = PyLong_FromUnsignedLongLong(value); + free(buf_out); + return obj_out; +} + +PyObject* vm_get_u16(VmMngr* self, PyObject* args) +{ + PyObject *py_addr; + + uint64_t addr; + PyObject *obj_out; + char * buf_out; + int ret; + uint16_t value; + + if (!PyArg_ParseTuple(args, "O", &py_addr)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + + PyGetInt(py_addr, addr); + + ret = vm_read_mem(&self->vm_mngr, addr, &buf_out, 2); + if (ret < 0) { + RAISE(PyExc_RuntimeError,"Cannot find address"); + } + + value = set_endian16(&self->vm_mngr, *(uint16_t*)buf_out); + + obj_out = PyLong_FromUnsignedLongLong(value); + free(buf_out); + return obj_out; +} + +PyObject* vm_get_u32(VmMngr* self, PyObject* args) +{ + PyObject *py_addr; + + uint64_t addr; + PyObject *obj_out; + char * buf_out; + int ret; + uint32_t value; + + if (!PyArg_ParseTuple(args, "O", &py_addr)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + + PyGetInt(py_addr, addr); + + ret = vm_read_mem(&self->vm_mngr, addr, &buf_out, 4); + if (ret < 0) { + RAISE(PyExc_RuntimeError,"Cannot find address"); + } + + value = set_endian32(&self->vm_mngr, *(uint32_t*)buf_out); + + obj_out = PyLong_FromUnsignedLongLong(value); + free(buf_out); + return obj_out; +} + + +PyObject* vm_get_u64(VmMngr* self, PyObject* args) +{ + PyObject *py_addr; + + uint64_t addr; + PyObject *obj_out; + char * buf_out; + int ret; + uint64_t value; + + if (!PyArg_ParseTuple(args, "O", &py_addr)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + + PyGetInt(py_addr, addr); + + ret = vm_read_mem(&self->vm_mngr, addr, &buf_out, 8); + if (ret < 0) { + RAISE(PyExc_RuntimeError,"Cannot find address"); + } + + value = set_endian64(&self->vm_mngr, *(uint64_t*)buf_out); + + obj_out = PyLong_FromUnsignedLongLong(value); + free(buf_out); + return obj_out; +} + + +PyObject* vm_set_u8(VmMngr* self, PyObject* args) +{ + PyObject *py_addr; + PyObject *py_val; + uint64_t value; + uint64_t addr; + uint8_t final_value; + int ret; + + if (!PyArg_ParseTuple(args, "OO", &py_addr, &py_val)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + + PyGetInt(py_addr, addr); + PyGetInt(py_val, value); + + if (value > 0xFF) { + fprintf(stderr, "Warning: int to big\n"); + } + + final_value = value; + + ret = vm_write_mem(&self->vm_mngr, addr, (char*)&final_value, 1); + if (ret < 0) + RAISE(PyExc_TypeError, "Error in set_mem"); + + add_mem_write(&self->vm_mngr, addr, 1); + check_invalid_code_blocs(&self->vm_mngr); + + Py_INCREF(Py_None); + return Py_None; +} + +PyObject* vm_set_u16(VmMngr* self, PyObject* args) +{ + PyObject *py_addr; + PyObject *py_val; + uint64_t value; + uint64_t addr; + uint16_t final_value; + int ret; + + if (!PyArg_ParseTuple(args, "OO", &py_addr, &py_val)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + + PyGetInt(py_addr, addr); + PyGetInt(py_val, value); + + if (value > 0xFFFF) { + fprintf(stderr, "Warning: int to big\n"); + } + + final_value = set_endian16(&self->vm_mngr, value); + + ret = vm_write_mem(&self->vm_mngr, addr, (char*)&final_value, 2); + if (ret < 0) + RAISE(PyExc_TypeError, "Error in set_mem"); + + add_mem_write(&self->vm_mngr, addr, 2); + check_invalid_code_blocs(&self->vm_mngr); + + Py_INCREF(Py_None); + return Py_None; +} + +PyObject* vm_set_u32(VmMngr* self, PyObject* args) +{ + PyObject *py_addr; + PyObject *py_val; + uint64_t value; + uint64_t addr; + uint32_t final_value; + int ret; + + if (!PyArg_ParseTuple(args, "OO", &py_addr, &py_val)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + + PyGetInt(py_addr, addr); + PyGetInt(py_val, value); + + if (value > 0xFFFFFFFF) { + fprintf(stderr, "Warning: int to big\n"); + } + + final_value = set_endian32(&self->vm_mngr, value); + + ret = vm_write_mem(&self->vm_mngr, addr, (char*)&final_value, 4); + if (ret < 0) + RAISE(PyExc_TypeError, "Error in set_mem"); + + add_mem_write(&self->vm_mngr, addr, 4); + check_invalid_code_blocs(&self->vm_mngr); + + Py_INCREF(Py_None); + return Py_None; +} + +PyObject* vm_set_u64(VmMngr* self, PyObject* args) +{ + PyObject *py_addr; + PyObject *py_val; + uint64_t value; + uint64_t addr; + uint64_t final_value; + int ret; + + if (!PyArg_ParseTuple(args, "OO", &py_addr, &py_val)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + + PyGetInt(py_addr, addr); + PyGetInt(py_val, value); + + final_value = set_endian64(&self->vm_mngr, value); + + ret = vm_write_mem(&self->vm_mngr, addr, (char*)&final_value, 8); + if (ret < 0) + RAISE(PyExc_TypeError, "Error in set_mem"); + + add_mem_write(&self->vm_mngr, addr, 8); + check_invalid_code_blocs(&self->vm_mngr); + + Py_INCREF(Py_None); + return Py_None; +} + + + + + +PyObject* vm_add_memory_breakpoint(VmMngr* self, PyObject* args) +{ + PyObject *ad; + PyObject *size; + PyObject *access; + + uint64_t b_ad; + uint64_t b_size; + uint64_t b_access; + + if (!PyArg_ParseTuple(args, "OOO", &ad, &size, &access)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + + PyGetInt(ad, b_ad); + PyGetInt(size, b_size); + PyGetInt(access, b_access); + + add_memory_breakpoint(&self->vm_mngr, b_ad, b_size, (unsigned int)b_access); + + /* Raise exception in the following pattern: + - set_mem(XXX) + - add_memory_breakpoint(XXX) + -> Here, there is a pending breakpoint not raise + */ + check_memory_breakpoint(&self->vm_mngr); + + Py_INCREF(Py_None); + return Py_None; +} + + +PyObject* vm_remove_memory_breakpoint(VmMngr* self, PyObject* args) +{ + PyObject *ad; + PyObject *access; + uint64_t b_ad; + uint64_t b_access; + + if (!PyArg_ParseTuple(args, "OO", &ad, &access)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + + PyGetInt(ad, b_ad); + PyGetInt(access, b_access); + remove_memory_breakpoint(&self->vm_mngr, b_ad, (unsigned int)b_access); + + Py_INCREF(Py_None); + return Py_None; +} + + +PyObject* vm_set_exception(VmMngr* self, PyObject* args) +{ + PyObject *item1; + uint64_t i; + + if (!PyArg_ParseTuple(args, "O", &item1)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + + PyGetInt(item1, i); + + self->vm_mngr.exception_flags = i; + Py_INCREF(Py_None); + return Py_None; +} + +PyObject* vm_get_exception(VmMngr* self, PyObject* args) +{ + return PyLong_FromUnsignedLongLong((uint64_t)self->vm_mngr.exception_flags); +} + + + + +PyObject* vm_init_memory_page_pool(VmMngr* self, PyObject* args) +{ + init_memory_page_pool(&self->vm_mngr); + Py_INCREF(Py_None); + return Py_None; +} + +PyObject* vm_init_code_bloc_pool(VmMngr* self, PyObject* args) +{ + init_code_bloc_pool(&self->vm_mngr); + Py_INCREF(Py_None); + return Py_None; + +} + +PyObject* vm_init_memory_breakpoint(VmMngr* self, PyObject* args) +{ + init_memory_breakpoint(&self->vm_mngr); + Py_INCREF(Py_None); + return Py_None; + +} + +PyObject* vm_reset_memory_breakpoint(VmMngr* self, PyObject* args) +{ + reset_memory_breakpoint(&self->vm_mngr); + Py_INCREF(Py_None); + return Py_None; + +} + +PyObject* vm_reset_memory_access(VmMngr* self, PyObject* args) +{ + reset_memory_access(&self->vm_mngr); + Py_INCREF(Py_None); + return Py_None; +} + +PyObject* py_add_mem_read(VmMngr* self, PyObject* args) +{ + PyObject *py_addr; + PyObject *py_size; + uint64_t addr; + uint64_t size; + + if (!PyArg_ParseTuple(args, "OO", &py_addr, &py_size)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + + PyGetInt(py_addr, addr); + PyGetInt(py_size, size); + add_mem_read(&self->vm_mngr, addr, size); + Py_INCREF(Py_None); + return Py_None; + +} + +PyObject* py_add_mem_write(VmMngr* self, PyObject* args) +{ + PyObject *py_addr; + PyObject *py_size; + uint64_t addr; + uint64_t size; + + if (!PyArg_ParseTuple(args, "OO", &py_addr, &py_size)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + + PyGetInt(py_addr, addr); + PyGetInt(py_size, size); + add_mem_write(&self->vm_mngr, addr, size); + Py_INCREF(Py_None); + return Py_None; + +} + +PyObject* vm_check_invalid_code_blocs(VmMngr* self, PyObject* args) +{ + check_invalid_code_blocs(&self->vm_mngr); + Py_INCREF(Py_None); + return Py_None; +} + +PyObject* vm_check_memory_breakpoint(VmMngr* self, PyObject* args) +{ + check_memory_breakpoint(&self->vm_mngr); + Py_INCREF(Py_None); + return Py_None; +} + +PyObject *vm_dump(PyObject* self) +{ + char* buf_final; + PyObject* ret_obj; + + buf_final = dump(&((VmMngr* )self)->vm_mngr); + ret_obj = PyUnicode_FromString(buf_final); + free(buf_final); + return ret_obj; +} + +PyObject* vm_dump_memory_breakpoint(VmMngr* self, PyObject* args) +{ + dump_memory_breakpoint_pool(&self->vm_mngr); + Py_INCREF(Py_None); + return Py_None; +} + + +PyObject* vm_get_all_memory(VmMngr* self, PyObject* args) +{ + PyObject *o; + struct memory_page_node * mpn; + PyObject *dict; + PyObject *dict2; + int i; + + + dict = PyDict_New(); + + for (i=0;ivm_mngr.memory_pages_number; i++) { + mpn = &self->vm_mngr.memory_pages_array[i]; + + dict2 = PyDict_New(); + + o = PyBytes_FromStringAndSize(mpn->ad_hp, mpn->size); + PyDict_SetItemString(dict2, "data", o); + Py_DECREF(o); + + o = PyLong_FromLong((long)mpn->size); + PyDict_SetItemString(dict2, "size", o); + Py_DECREF(o); + + o = PyLong_FromLong((long)mpn->access); + PyDict_SetItemString(dict2, "access", o); + Py_DECREF(o); + + o = PyLong_FromUnsignedLongLong(mpn->ad); + PyDict_SetItem(dict, o, dict2); + Py_DECREF(o); + Py_DECREF(dict2); + } + return dict; +} + + +PyObject* vm_reset_memory_page_pool(VmMngr* self, PyObject* args) +{ + reset_memory_page_pool(&self->vm_mngr); + Py_INCREF(Py_None); + return Py_None; + +} + +PyObject* vm_reset_code_bloc_pool(VmMngr* self, PyObject* args) +{ + reset_code_bloc_pool(&self->vm_mngr); + Py_INCREF(Py_None); + return Py_None; + +} + + +PyObject* vm_add_code_bloc(VmMngr *self, PyObject *args) +{ + PyObject *item1; + PyObject *item2; + uint64_t ad_start, ad_stop, ad_code = 0; + + struct code_bloc_node * cbp; + + if (!PyArg_ParseTuple(args, "OO", &item1, &item2)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + + PyGetInt(item1, ad_start); + PyGetInt(item2, ad_stop); + + cbp = create_code_bloc_node(ad_start, ad_stop); + cbp->ad_start = ad_start; + cbp->ad_stop = ad_stop; + cbp->ad_code = ad_code; + add_code_bloc(&self->vm_mngr, cbp); + + Py_INCREF(Py_None); + return Py_None; +} + +PyObject* vm_dump_code_bloc_pool(VmMngr* self) +{ + dump_code_bloc_pool(&self->vm_mngr); + Py_INCREF(Py_None); + return Py_None; + +} + + + +PyObject* vm_is_mapped(VmMngr* self, PyObject* args) +{ + PyObject *ad; + PyObject *size; + uint64_t b_ad; + uint64_t b_size; + int ret; + + if (!PyArg_ParseTuple(args, "OO", &ad, &size)) + RAISE(PyExc_TypeError,"Cannot parse arguments"); + + PyGetInt(ad, b_ad); + PyGetInt(size, b_size); + ret = is_mapped(&self->vm_mngr, b_ad, b_size); + return PyLong_FromUnsignedLongLong((uint64_t)ret); +} + +PyObject* vm_get_memory_read(VmMngr* self, PyObject* args) +{ + PyObject* result; + result = get_memory_read(&self->vm_mngr); + Py_INCREF(result); + return result; +} + +PyObject* vm_get_memory_write(VmMngr* self, PyObject* args) +{ + PyObject* result; + result = get_memory_write(&self->vm_mngr); + Py_INCREF(result); + return result; +} + + + +static PyObject * +vm_set_big_endian(VmMngr *self, PyObject *value, void *closure) +{ + self->vm_mngr.sex = __BIG_ENDIAN; + Py_INCREF(Py_None); + return Py_None; +} + +static PyObject * +vm_set_little_endian(VmMngr *self, PyObject *value, void *closure) +{ + self->vm_mngr.sex = __LITTLE_ENDIAN; + Py_INCREF(Py_None); + return Py_None; +} + + +static PyObject * +vm_is_little_endian(VmMngr *self, PyObject *value, void *closure) +{ + if (self->vm_mngr.sex == __BIG_ENDIAN) { + return PyLong_FromUnsignedLongLong(0); + } else { + return PyLong_FromUnsignedLongLong(1); + } +} + + +static void +VmMngr_dealloc(VmMngr* self) +{ + vm_reset_memory_page_pool(self, NULL); + vm_reset_code_bloc_pool(self, NULL); + vm_reset_memory_breakpoint(self, NULL); + Py_TYPE(self)->tp_free((PyObject*)self); +} + + +static PyObject * +VmMngr_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + VmMngr *self; + + self = (VmMngr *)type->tp_alloc(type, 0); + return (PyObject *)self; +} + +static PyObject * +VmMngr_get_vmmngr(VmMngr *self, void *closure) +{ + return PyLong_FromUnsignedLongLong((uint64_t)(intptr_t)&(self->vm_mngr)); +} + +static int +VmMngr_set_vmmngr(VmMngr *self, PyObject *value, void *closure) +{ + PyErr_SetString(PyExc_TypeError, "immutable vmmngr"); + return -1; +} + +static PyMemberDef VmMngr_members[] = { + {NULL} /* Sentinel */ +}; + +static PyMethodDef VmMngr_methods[] = { + {"init_memory_page_pool", (PyCFunction)vm_init_memory_page_pool, METH_VARARGS, + "init_memory_page_pool() -> Initialize the VmMngr memory"}, + {"init_memory_breakpoint", (PyCFunction)vm_init_memory_breakpoint, METH_VARARGS, + "init_memory_breakpoint() -> Initialize the VmMngr memory breakpoints"}, + {"init_code_bloc_pool",(PyCFunction)vm_init_code_bloc_pool, METH_VARARGS, + "init_code_bloc_pool() -> Initialize the VmMngr jitted code blocks"}, + {"set_mem_access", (PyCFunction)vm_set_mem_access, METH_VARARGS, + "set_mem_access(address, access) -> Change the protection of the page at @address with @access"}, + {"set_mem", (PyCFunction)vm_set_mem, METH_VARARGS, + "set_mem(address, data) -> Set a @data in memory at @address"}, + {"is_mapped", (PyCFunction)vm_is_mapped, METH_VARARGS, + "is_mapped(address, size) -> Check if the memory region at @address of @size bytes is fully mapped"}, + {"add_code_bloc",(PyCFunction)vm_add_code_bloc, METH_VARARGS, + "add_code_bloc(address_start, address_stop) -> Add a jitted code block between [@address_start, @address_stop["}, + {"get_mem_access", (PyCFunction)vm_get_mem_access, METH_VARARGS, + "get_mem_access(address) -> Retrieve the memory protection of the page at @address"}, + {"get_mem", (PyCFunction)vm_get_mem, METH_VARARGS, + "get_mem(addr, size) -> Get the memory content at @address of @size bytes"}, + + {"get_u8", (PyCFunction)vm_get_u8, METH_VARARGS, + "get_u8(addr) -> Get a u8 at @address of @size bytes (vm endianness)"}, + {"get_u16", (PyCFunction)vm_get_u16, METH_VARARGS, + "get_u16(addr) -> Get a u16 at @address of @size bytes (vm endianness)"}, + {"get_u32", (PyCFunction)vm_get_u32, METH_VARARGS, + "get_u32(addr) -> Get a u32 at @address of @size bytes (vm endianness)"}, + {"get_u64", (PyCFunction)vm_get_u64, METH_VARARGS, + "get_u64(addr) -> Get a u64 at @address of @size bytes (vm endianness)"}, + + + {"set_u8", (PyCFunction)vm_set_u8, METH_VARARGS, + "set_u8(addr, value) -> Set a u8 at @address of @size bytes (vm endianness)"}, + {"set_u16", (PyCFunction)vm_set_u16, METH_VARARGS, + "set_u16(addr, value) -> Set a u16 at @address of @size bytes (vm endianness)"}, + {"set_u32", (PyCFunction)vm_set_u32, METH_VARARGS, + "set_u32(addr, value) -> Set a u32 at @address of @size bytes (vm endianness)"}, + {"set_u64", (PyCFunction)vm_set_u64, METH_VARARGS, + "set_u64(addr, value) -> Set a u64 at @address of @size bytes (vm endianness)"}, + + {"add_memory_page",(PyCFunction)vm_add_memory_page, METH_VARARGS, + "add_memory_page(address, access, content [, cmt]) -> Maps a memory page at @address of len(@content) bytes containing @content with protection @access\n" + "@cmt is a comment linked to the memory page"}, + {"add_memory_breakpoint",(PyCFunction)vm_add_memory_breakpoint, METH_VARARGS, + "add_memory_breakpoint(address, size, access) -> Add a memory breakpoint at @address of @size bytes with @access type"}, + {"remove_memory_breakpoint",(PyCFunction)vm_remove_memory_breakpoint, METH_VARARGS, + "remove_memory_breakpoint(address, access) -> Remove a memory breakpoint at @address with @access type"}, + {"set_exception", (PyCFunction)vm_set_exception, METH_VARARGS, + "set_exception(exception) -> Set the VmMngr exception flags to @exception"}, + {"dump_memory_breakpoint", (PyCFunction)vm_dump_memory_breakpoint, METH_VARARGS, + "dump_memory_breakpoint() -> Lists each memory breakpoint"}, + {"get_all_memory",(PyCFunction)vm_get_all_memory, METH_VARARGS, + "get_all_memory() -> Returns a dictionary representing the VmMngr memory.\n" + "Keys are the addresses of each memory page.\n" + "Values are another dictionary containing page properties ('data', 'size', 'access')" + }, + {"reset_memory_page_pool", (PyCFunction)vm_reset_memory_page_pool, METH_VARARGS, + "reset_memory_page_pool() -> Remove all memory pages"}, + {"reset_memory_breakpoint", (PyCFunction)vm_reset_memory_breakpoint, METH_VARARGS, + "reset_memory_breakpoint() -> Remove all memory breakpoints"}, + {"reset_code_bloc_pool", (PyCFunction)vm_reset_code_bloc_pool, METH_VARARGS, + "reset_code_bloc_pool() -> Remove all jitted blocks"}, + {"set_alarm", (PyCFunction)set_alarm, METH_VARARGS, + "set_alarm() -> Force a timer based alarm during a code emulation"}, + {"get_exception",(PyCFunction)vm_get_exception, METH_VARARGS, + "get_exception() -> Returns the VmMngr exception flags"}, + {"set_big_endian",(PyCFunction)vm_set_big_endian, METH_VARARGS, + "set_big_endian() -> Set the VmMngr to Big Endian"}, + {"set_little_endian",(PyCFunction)vm_set_little_endian, METH_VARARGS, + "set_little_endian() -> Set the VmMngr to Little Endian"}, + {"is_little_endian",(PyCFunction)vm_is_little_endian, METH_VARARGS, + "is_little_endian() -> Return True if the VmMngr is Little Endian"}, + {"get_memory_read",(PyCFunction)vm_get_memory_read, METH_VARARGS, + "get_memory_read() -> Retrieve last instruction READ access\n" + "This function is only valid in a memory breakpoint callback." + }, + {"get_memory_write",(PyCFunction)vm_get_memory_write, METH_VARARGS, + "get_memory_write() -> Retrieve last instruction WRITE access\n" + "This function is only valid in a memory breakpoint callback." + }, + {"reset_memory_access",(PyCFunction)vm_reset_memory_access, METH_VARARGS, + "reset_memory_access() -> Reset last memory READ/WRITE"}, + {"add_mem_read",(PyCFunction)py_add_mem_read, METH_VARARGS, + "add_mem_read(address, size) -> Add a READ access at @address of @size bytes"}, + {"add_mem_write",(PyCFunction)py_add_mem_write, METH_VARARGS, + "add_mem_write(address, size) -> Add a WRITE access at @address of @size bytes"}, + {"check_invalid_code_blocs",(PyCFunction)vm_check_invalid_code_blocs, METH_VARARGS, + "check_invalid_code_blocs() -> Set the AUTOMOD flag in exception in case of automodified code"}, + {"check_memory_breakpoint",(PyCFunction)vm_check_memory_breakpoint, METH_VARARGS, + "check_memory_breakpoint() -> Set the BREAKPOINT_MEMORY flag in exception in case of memory breakpoint occurred"}, + + {NULL} /* Sentinel */ +}; + +static int +VmMngr_init(VmMngr *self, PyObject *args, PyObject *kwds) +{ + memset(&(self->vm_mngr), 0, sizeof(self->vm_mngr)); + return 0; +} + +static PyGetSetDef VmMngr_getseters[] = { + {"vmmngr", + (getter)VmMngr_get_vmmngr, (setter)VmMngr_set_vmmngr, + "vmmngr object", + NULL}, + {NULL} /* Sentinel */ +}; + +static PyTypeObject VmMngrType = { + PyVarObject_HEAD_INIT(NULL, 0) + "VmMngr", /*tp_name*/ + sizeof(VmMngr), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)VmMngr_dealloc,/*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + vm_dump, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + "VmMngr object", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + VmMngr_methods, /* tp_methods */ + VmMngr_members, /* tp_members */ + VmMngr_getseters, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)VmMngr_init, /* tp_init */ + 0, /* tp_alloc */ + VmMngr_new, /* tp_new */ +}; + +static PyMethodDef VmMngr_Methods[] = { + {NULL, NULL, 0, NULL} /* Sentinel */ + +}; + +char vm_mngr_mod_docs[] = "vm_mngr module."; +char vm_mngr_mod_name[] = "VmMngr"; + + +MOD_INIT(VmMngr) +{ + PyObject *module; + + MOD_DEF(module, "VmMngr", "vm_mngr module", VmMngr_Methods); + + if (module == NULL) + return NULL; + + if (PyType_Ready(&VmMngrType) < 0) + return NULL; + + Py_INCREF(&VmMngrType); + if (PyModule_AddObject(module, "Vm", (PyObject *)&VmMngrType) < 0) + return NULL; + + return module; +} diff --git a/miasm/jitter/vm_mngr_py.h b/miasm/jitter/vm_mngr_py.h new file mode 100644 index 00000000..e2e43c65 --- /dev/null +++ b/miasm/jitter/vm_mngr_py.h @@ -0,0 +1,15 @@ +#ifndef VM_MNGR_PY_H +#define VM_MNGR_PY_H + +#ifdef _WIN32 +#define SIGALRM 0 +#endif + +typedef struct { + PyObject_HEAD + PyObject *vmmngr; + vm_mngr_t vm_mngr; +} VmMngr; + + +#endif// VM_MNGR_PY_H diff --git a/miasm/os_dep/__init__.py b/miasm/os_dep/__init__.py new file mode 100644 index 00000000..6aa660d8 --- /dev/null +++ b/miasm/os_dep/__init__.py @@ -0,0 +1 @@ +"Operating System specific methods" diff --git a/miasm/os_dep/common.py b/miasm/os_dep/common.py new file mode 100644 index 00000000..87602b3c --- /dev/null +++ b/miasm/os_dep/common.py @@ -0,0 +1,168 @@ +import os + +from future.utils import viewitems + +from miasm.core.utils import force_bytes +from miasm.jitter.csts import PAGE_READ, PAGE_WRITE +from miasm.core.utils import get_caller_name +from miasm.core.utils import pck64, upck64 + +BASE_SB_PATH = "file_sb" + + +def get_str_ansi(jitter, ad_str, max_char=None): + l = 0 + tmp = ad_str + while ((max_char is None or l < max_char) and + jitter.vm.get_mem(tmp, 1) != b"\x00"): + tmp += 1 + l += 1 + return jitter.vm.get_mem(ad_str, l) + + +def get_str_unic(jitter, ad_str, max_char=None): + l = 0 + tmp = ad_str + while ((max_char is None or l < max_char) and + jitter.vm.get_mem(tmp, 2) != b"\x00\x00"): + tmp += 2 + l += 2 + s = jitter.vm.get_mem(ad_str, l) + s = s.decode("utf-16le") + return s + + +def set_str_ansi(value): + value = force_bytes(value) + return value + b"\x00" + + +def set_str_unic(value): + try: + value = value.decode() + except AttributeError: + pass + return value.encode("utf-16le") + b'\x00' * 2 + + +class heap(object): + + "Light heap simulation" + + addr = 0x20000000 + align = 0x1000 + size = 32 + mask = (1 << size) - 1 + + def next_addr(self, size): + """ + @size: the size to allocate + return the future checnk address + """ + ret = self.addr + self.addr = (self.addr + size + self.align - 1) + self.addr &= self.mask ^ (self.align - 1) + return ret + + def alloc(self, jitter, size, perm=PAGE_READ | PAGE_WRITE): + """ + @jitter: a jitter instance + @size: the size to allocate + @perm: permission flags (see vm_alloc doc) + """ + return self.vm_alloc(jitter.vm, size, perm) + + def vm_alloc(self, vm, size, perm=PAGE_READ | PAGE_WRITE): + """ + @vm: a VmMngr instance + @size: the size to allocate + @perm: permission flags (PAGE_READ, PAGE_WRITE, PAGE_EXEC or any `|` + combination of them); default is PAGE_READ|PAGE_WRITE + """ + addr = self.next_addr(size) + vm.add_memory_page( + addr, + perm, + b"\x00" * (size), + "Heap alloc by %s" % get_caller_name(2) + ) + return addr + + def get_size(self, vm, ptr): + """ + @vm: a VmMngr instance + @size: ptr to get the size of the associated allocation. + + `ptr` can be the base address of a previous allocation, or an address + within the allocated range. The size of the whole allocation is always + returned, regardless ptr is the base address or not. + """ + assert vm.is_mapped(ptr, 1) + data = vm.get_all_memory() + ptr_page = data.get(ptr, None) + if ptr_page is None: + for address, page_info in viewitems(data): + if address <= ptr < address + page_info["size"]: + ptr_page = page_info + break + else: + raise RuntimeError("Must never happen (unmapped but mark as mapped by API)") + return ptr_page["size"] + + +def windows_to_sbpath(path): + """Convert a Windows path to a valid filename within the sandbox + base directory. + + """ + path = [elt for elt in path.lower().replace('/', '_').split('\\') if elt] + return os.path.join(BASE_SB_PATH, *path) + + +def unix_to_sbpath(path): + """Convert a POSIX path to a valid filename within the sandbox + base directory. + + """ + path = [elt for elt in path.split('/') if elt] + return os.path.join(BASE_SB_PATH, *path) + +def get_fmt_args(fmt, cur_arg, get_str, get_arg_n): + idx = 0 + fmt = get_str(fmt) + if isinstance(fmt, bytes): + chars_format = b'%cdfsuxX' + char_percent = b'%' + char_string = b's' + output = b"" + else: + chars_format = u'%cdfsuxX' + char_percent = u'%' + char_string = u's' + output = u"" + + while True: + if idx == len(fmt): + break + char = fmt[idx:idx+1] + idx += 1 + if char == char_percent: + token = char_percent + while True: + char = fmt[idx:idx+1] + idx += 1 + token += char + if char in chars_format: + break + if char == char_percent: + output += char + continue + if token.endswith(char_string): + addr = get_arg_n(cur_arg) + arg = get_str(addr) + else: + arg = get_arg_n(cur_arg) + char = token % arg + cur_arg += 1 + output += char + return output diff --git a/miasm/os_dep/linux/__init__.py b/miasm/os_dep/linux/__init__.py new file mode 100644 index 00000000..4434ce50 --- /dev/null +++ b/miasm/os_dep/linux/__init__.py @@ -0,0 +1 @@ +# Linux emulation diff --git a/miasm/os_dep/linux/environment.py b/miasm/os_dep/linux/environment.py new file mode 100644 index 00000000..8826abb7 --- /dev/null +++ b/miasm/os_dep/linux/environment.py @@ -0,0 +1,916 @@ +from __future__ import print_function +from collections import namedtuple +import functools +import os +import struct +import termios + +from future.utils import viewitems + +from miasm.core.interval import interval +from miasm.jitter.csts import PAGE_READ, PAGE_WRITE + + +StatInfo = namedtuple("StatInfo", [ + "st_dev", "st_ino", "st_nlink", "st_mode", "st_uid", "st_gid", "st_rdev", + "st_size", "st_blksize", "st_blocks", "st_atime", "st_atimensec", + "st_mtime", "st_mtimensec", "st_ctime", "st_ctimensec" +]) +StatFSInfo = namedtuple("StatFSInfo", [ + "f_type", "f_bsize", "f_blocks", "f_bfree", "f_bavail", "f_files", + "f_ffree", "f_fsid", "f_namelen", "f_frsize", "f_flags", "f_spare", +]) + + +class FileDescriptor(object): + """Stand for a file descriptor on a system + + According to inode(7), following types are possibles: + - socket + - symbolic link + - regular file + - block device + - directory + - character device + - FIFO + """ + + # st_mode's file type + file_type = None + # st_mode's file mode (9 least bits are file permission bits) + file_mode = 0o0777 + # st_dev / st_rdev + cont_device_id = None + device_id = 0 + # inode number (st_ino) + inode = None + # Number of hardlink (st_nlink) + nlink = 0 + # Owner / group + uid = None + gid = None + # Size (st_size / st_blksize / st_blocks) + size = 0 + blksize = 0 + blocks = 0 + # Times + atime = 0 + atimensec = 0 + mtime = 0 + mtimensec = 0 + ctime = 0 + ctimensec = 0 + + def __init__(self, number): + self.number = number + self.is_closed = False + + def stat(self): + mode = self.file_type | self.file_mode + return StatInfo( + st_dev=self.cont_device_id, st_ino=self.inode, + st_nlink=self.nlink, st_mode=mode, + st_uid=self.uid, st_gid=self.gid, + st_rdev=self.device_id, st_size=self.size, + st_blksize=self.blksize, st_blocks=self.blocks, + st_atime=self.atime, st_atimensec=self.atimensec, + st_mtime=self.mtime, st_mtimensec=self.mtimensec, + st_ctime=self.ctime, st_ctimensec=self.ctimensec + ) + + def close(self): + self.is_closed = True + + +class FileDescriptorCharDevice(FileDescriptor): + file_type = 0o0020000 # S_IFCHR + file_mode = 0o0620 + cont_device_id = 1 + device_id = 1 + + +class FileDescriptorSTDIN(FileDescriptorCharDevice): + """Special file descriptor standinf for STDIN""" + inode = 0 + + def read(self, count): + raise RuntimeError("Not implemented") + + +class FileDescriptorSTDOUT(FileDescriptorCharDevice): + """Special file descriptor standinf for STDOUT""" + inode = 1 + + def write(self, data): + print("[STDOUT] %s" % data.rstrip()) + + +class FileDescriptorSTDERR(FileDescriptorCharDevice): + """Special file descriptor standinf for STDERR""" + inode = 2 + + def write(self, data): + print("[STDERR] %s" % data.rstrip()) + + +class FileDescriptorDirectory(FileDescriptor): + """FileDescription designing a directory""" + + file_type = 0o0040000 # S_IFDIR + + def __init__(self, number, flags, filesystem, real_path): + super(FileDescriptorDirectory, self).__init__(number) + self.filesystem = filesystem + self.real_path = real_path + self.cur_listdir = None + self.flags = flags + + def listdir(self): + if self.cur_listdir is None: + self.cur_listdir = os.listdir(self.real_path) + while self.cur_listdir: + yield self.cur_listdir.pop() + + +class FileDescriptorRegularFile(FileDescriptor): + """FileDescriptor designing a regular file""" + + file_type = 0o0100000 # S_IFREG + + def __init__(self, number, flags, filesystem, real_fd): + super(FileDescriptorRegularFile, self).__init__(number) + self.flags = flags + self.filesystem = filesystem + self.real_fd = real_fd + + def write(self, data): + raise RuntimeError("Not implemented") + + def read(self, count): + return os.read(self.real_fd, count) + + def close(self): + super(FileDescriptorRegularFile, self).close() + return os.close(self.real_fd) + + def lseek(self, offset, whence): + return os.lseek(self.real_fd, offset, whence) # SEEK_SET + + def tell(self): + return self.lseek(0, 1) # SEEK_CUR + + def seek(self, offset): + return self.lseek(offset, 0) # SEEK_SET + + +class FileDescriptorSocket(FileDescriptor): + """FileDescription standing for a socket""" + + file_type = 0o0140000 # S_IFSOCK + + def __init__(self, number, family, type_, protocol): + super(FileDescriptorSocket, self).__init__(number) + self.family = family + self.type_ = type_ + self.protocol = protocol + + +class FileSystem(object): + """File system abstraction + Provides standard operations on the filesystem, (a bit like FUSE) + + API using FileSystem only used sandbox-side path. FileSystem should be the + only object able to interact with real path, outside the sandbox. + + Thus, if `resolve_path` is correctly implemented and used, it should not be + possible to modify files outside the sandboxed path + """ + + device_id = 0x1234 # ID of device containing file (stat.st_dev) + blocksize = 0x1000 # Size of block on this filesystem + f_type = 0xef53 # (Type of filesystem) EXT4_SUPER_MAGIC + nb_total_block = 0x1000 + nb_free_block = 0x100 + nb_avail_block = nb_free_block # Available to unprivileged user + nb_total_fnode = 100 # Total file nodes in filesystem + nb_free_fnode = 50 + max_filename_len = 256 + fragment_size = 0 + mount_flags = 0 + + def __init__(self, base_path, linux_env): + self.base_path = base_path + self.linux_env = linux_env + self.passthrough = [] + self.path_to_inode = {} # Real path (post-resolution) -> inode number + + def resolve_path(self, path, follow_link=True): + """Resolve @path to the corresponding sandboxed path""" + # Remove '../', etc. + path = os.path.normpath(path) + + # Passthrough + for passthrough in self.passthrough: + if hasattr(passthrough, "match"): + if passthrough.match(path): + return path + elif passthrough == path: + return path + + # Remove leading '/' if any (multiple '//' are handled by 'abspath' + if path.startswith(os.path.sep): + path = path[1:] + + base_path = os.path.abspath(self.base_path) + out_path = os.path.join(base_path, path) + assert out_path.startswith(base_path + os.path.sep) + if os.path.islink(out_path): + link_target = os.readlink(out_path) + # Link can be absolute or relative -> absolute + link = os.path.normpath(os.path.join(os.path.dirname(path), link_target)) + if follow_link: + out_path = self.resolve_path(link) + else: + out_path = link + return out_path + + def get_path_inode(self, real_path): + inode = self.path_to_inode.setdefault(real_path, len(self.path_to_inode)) + return inode + + def exists(self, path): + sb_path = self.resolve_path(path) + return os.path.exists(sb_path) + + def readlink(self, path): + sb_path = self.resolve_path(path, follow_link=False) + if not os.path.islink(sb_path): + return None + return os.readlink(sb_path) + + def statfs(self): + return StatFSInfo( + f_type=self.f_type, f_bsize=self.blocksize, + f_blocks=self.nb_total_block, f_bfree=self.nb_free_block, + f_bavail=self.nb_avail_block, f_files=self.nb_total_fnode, + f_ffree=self.nb_free_fnode, f_fsid=self.device_id, + f_namelen=self.max_filename_len, + f_frsize=self.fragment_size, f_flags=self.mount_flags, f_spare=0) + + def getattr_(self, path, follow_link=True): + sb_path = self.resolve_path(path, follow_link=follow_link) + flags = self.linux_env.O_RDONLY + if os.path.isdir(sb_path): + flags |= self.linux_env.O_DIRECTORY + + fd = self.open_(path, flags, follow_link=follow_link) + info = self.linux_env.fstat(fd) + self.linux_env.close(fd) + return info + + def open_(self, path, flags, follow_link=True): + path = self.resolve_path(path, follow_link=follow_link) + if not os.path.exists(path): + # ENOENT (No such file or directory) + return -1 + fd = self.linux_env.next_fd() + acc_mode = flags & self.linux_env.O_ACCMODE + + if os.path.isdir(path): + assert flags & self.linux_env.O_DIRECTORY == self.linux_env.O_DIRECTORY + if acc_mode == self.linux_env.O_RDONLY: + fdesc = FileDescriptorDirectory(fd, flags, self, path) + else: + raise RuntimeError("Not implemented") + elif os.path.isfile(path): + if acc_mode == os.O_RDONLY: + # Read only + real_fd = os.open(path, os.O_RDONLY) + else: + raise RuntimeError("Not implemented") + fdesc = FileDescriptorRegularFile(fd, flags, self, real_fd) + + elif os.path.islink(path): + raise RuntimeError("Not implemented") + else: + raise RuntimeError("Unknown file type for %r" % path) + + self.linux_env.file_descriptors[fd] = fdesc + # Set stat info + fdesc.cont_device_id = self.device_id + fdesc.inode = self.get_path_inode(path) + fdesc.uid = self.linux_env.user_uid + fdesc.gid = self.linux_env.user_gid + size = os.path.getsize(path) + fdesc.size = size + fdesc.blksize = self.blocksize + fdesc.blocks = (size + ((512 - (size % 512)) % 512)) // 512 + return fd + + +class Networking(object): + """Network abstraction""" + + def __init__(self, linux_env): + self.linux_env = linux_env + + def socket(self, family, type_, protocol): + fd = self.linux_env.next_fd() + fdesc = FileDescriptorSocket(fd, family, type_, protocol) + self.linux_env.file_descriptors[fd] = fdesc + return fd + + +class LinuxEnvironment(object): + """A LinuxEnvironment regroups information to simulate a Linux-like + environment""" + + # To be overridden + platform_arch = None + + # User information + user_uid = 1000 + user_euid = 1000 + user_gid = 1000 + user_egid = 1000 + user_name = b"user" + + # Memory mapping information + brk_current = 0x74000000 + mmap_current = 0x75000000 + + # System information + sys_sysname = b"Linux" + sys_nodename = b"user-pc" + sys_release = b"4.13.0-19-generic" + sys_version = b"#22-Ubuntu" + sys_machine = None + + # Filesystem + filesystem_base = "file_sb" + file_descriptors = None + + # Current process + process_tid = 1000 + process_pid = 1000 + + # Syscall restrictions + ioctl_allowed = None # list of (fd, cmd), None value for wildcard + ioctl_disallowed = None # list of (fd, cmd), None value for wildcard + + # Time + base_time = 1531900000 + + # Arch specific constant + O_ACCMODE = None + O_CLOEXEC = None + O_DIRECTORY = None + O_LARGEFILE = None + O_NONBLOCK = None + O_RDONLY = None + + def __init__(self): + stdin = FileDescriptorSTDIN(0) + stdout = FileDescriptorSTDOUT(1) + stderr = FileDescriptorSTDERR(2) + for std in [stdin, stdout, stderr]: + std.uid = self.user_uid + std.gid = self.user_gid + self.file_descriptors = { + 0: stdin, + 1: stdout, + 2: stderr, + } + self.ioctl_allowed = [ + (0, termios.TCGETS), + (0, termios.TIOCGWINSZ), + (0, termios.TIOCSWINSZ), + (1, termios.TCGETS), + (1, termios.TIOCGWINSZ), + (1, termios.TIOCSWINSZ), + ] + self.ioctl_disallowed = [ + (2, termios.TCGETS), + (0, termios.TCSETSW), + ] + self.filesystem = FileSystem(self.filesystem_base, self) + self.network = Networking(self) + + def next_fd(self): + return len(self.file_descriptors) + + def clock_gettime(self): + out = self.base_time + self.base_time += 1 + return out + + def open_(self, path, flags, follow_link=True): + """Stub for 'open' syscall""" + return self.filesystem.open_(path, flags, follow_link=follow_link) + + def socket(self, family, type_, protocol): + """Stub for 'socket' syscall""" + return self.network.socket(family, type_, protocol) + + def fstat(self, fd): + """Get file status through fd""" + fdesc = self.file_descriptors.get(fd) + if fdesc is None: + return None + return fdesc.stat() + + def stat(self, path): + """Get file status through path""" + return self.filesystem.getattr_(path) + + def lstat(self, path): + """Get file status through path (not following links)""" + return self.filesystem.getattr_(path, follow_link=False) + + def close(self, fd): + """Stub for 'close' syscall""" + fdesc = self.file_descriptors.get(fd) + if fdesc is None: + return None + return fdesc.close() + + def write(self, fd, data): + """Stub for 'write' syscall""" + fdesc = self.file_descriptors.get(fd) + if fdesc is None: + return None + fdesc.write(data) + return len(data) + + def read(self, fd, count): + """Stub for 'read' syscall""" + fdesc = self.file_descriptors.get(fd) + if fdesc is None: + return None + return fdesc.read(count) + + def getdents(self, fd, count, packing_callback): + """Stub for 'getdents' syscall + + 'getdents64' must be handled by caller (only the structure layout is + modified) + + @fd: getdents' fd argument + @count: getdents' count argument + @packing_callback(cur_len, d_ino, d_type, name) -> entry + """ + fdesc = self.file_descriptors[fd] + if not isinstance(fdesc, FileDescriptorDirectory): + raise RuntimeError("Not implemented") + + out = "" + # fdesc.listdir continues from where it stopped + for name in fdesc.listdir(): + d_ino = 1 # Not the real one + d_type = 0 # DT_UNKNOWN (getdents(2) "All applications must properly + # handle a return of DT_UNKNOWN.") + entry = packing_callback(len(out), d_ino, d_type, name) + + if len(out) + len(entry) > count: + # Report to a further call + fdesc.cur_listdir.append(name) + break + out = out + entry + return out + + def ioctl(self, fd, cmd, arg): + """Stub for 'ioctl' syscall + Return the list of element to pack back depending on target ioctl + If the ioctl is disallowed, return False + """ + allowed = False + disallowed = False + for test in [(fd, cmd), (None, cmd), (fd, None)]: + if test in self.ioctl_allowed: + allowed = True + if test in self.ioctl_disallowed: + disallowed = True + + if allowed and disallowed: + raise ValueError("fd: %x, cmd: %x is allowed and disallowed" % (fd, cmd)) + + if allowed: + if cmd == termios.TCGETS: + return 0, 0, 0, 0 + elif cmd == termios.TIOCGWINSZ: + # struct winsize + # { + # unsigned short ws_row; /* rows, in characters */ + # unsigned short ws_col; /* columns, in characters */ + # unsigned short ws_xpixel; /* horizontal size, pixels */ + # unsigned short ws_ypixel; /* vertical size, pixels */ + # }; + return 1000, 360, 1000, 1000 + elif cmd == termios.TIOCSWINSZ: + # Ignore it + return + else: + raise RuntimeError("Not implemented") + + elif disallowed: + return False + + else: + raise KeyError("Unknown ioctl fd:%x cmd:%x" % (fd, cmd)) + + def mmap(self, addr, len_, prot, flags, fd, off, vmmngr): + """Stub for 'mmap' syscall + + 'mmap2' must be implemented by calling this function with off * 4096 + """ + if addr == 0: + addr = self.mmap_current + self.mmap_current += (len_ + 0x1000) & ~0xfff + + all_mem = vmmngr.get_all_memory() + mapped = interval( + [ + (start, start + info["size"] - 1) + for start, info in viewitems(all_mem) + ] + ) + + MAP_FIXED = 0x10 + if flags & MAP_FIXED: + # Alloc missing and override + missing = interval([(addr, addr + len_ - 1)]) - mapped + for start, stop in missing: + vmmngr.add_memory_page( + start, + PAGE_READ|PAGE_WRITE, + b"\x00" * (stop - start + 1), + "mmap allocated" + ) + else: + # Find first candidate segment nearby addr + for start, stop in mapped: + if stop < addr: + continue + rounded = (stop + 1 + 0x1000) & ~0xfff + if (interval([(rounded, rounded + len_)]) & mapped).empty: + addr = rounded + break + else: + assert (interval([(addr, addr + len_)]) & mapped).empty + + vmmngr.add_memory_page( + addr, + PAGE_READ|PAGE_WRITE, + b"\x00" * len_, + "mmap allocated" + ) + + + if fd == 0xffffffff: + if off != 0: + raise RuntimeError("Not implemented") + data = b"\x00" * len_ + else: + fdesc = self.file_descriptors[fd] + cur_pos = fdesc.tell() + fdesc.seek(off) + data = fdesc.read(len_) + fdesc.seek(cur_pos) + + vmmngr.set_mem(addr, data) + return addr + + def brk(self, addr, vmmngr): + """Stub for 'brk' syscall""" + if addr == 0: + addr = self.brk_current + else: + all_mem = vmmngr.get_all_memory() + mapped = interval( + [ + (start, start + info["size"] - 1) + for start, info in viewitems(all_mem) + ] + ) + + # Alloc missing and override + missing = interval([(self.brk_current, addr)]) - mapped + for start, stop in missing: + vmmngr.add_memory_page( + start, + PAGE_READ|PAGE_WRITE, + b"\x00" * (stop - start + 1), + "BRK" + ) + + self.brk_current = addr + return addr + + +class LinuxEnvironment_x86_64(LinuxEnvironment): + platform_arch = b"x86_64" + sys_machine = b"x86_64" + + O_ACCMODE = 0x3 + O_CLOEXEC = 0x80000 + O_DIRECTORY = 0x10000 + O_LARGEFILE = 0x8000 + O_NONBLOCK = 0x800 + O_RDONLY = 0 + + +class LinuxEnvironment_arml(LinuxEnvironment): + platform_arch = b"arml" + sys_machine = b"arml" + + O_ACCMODE = 0x3 + O_CLOEXEC = 0x80000 + O_DIRECTORY = 0x4000 + O_LARGEFILE = 0x20000 + O_NONBLOCK = 0x800 + O_RDONLY = 0 + + # ARM specific + tls = 0 + # get_tls: __kuser_helper_version >= 1 + # cmpxchg: __kuser_helper_version >= 2 + # memory_barrier: __kuser_helper_version >= 3 + kuser_helper_version = 3 + + +class AuxVec(object): + """Auxiliary vector abstraction, filled with default values + (mainly based on https://lwn.net/Articles/519085) + + # Standard usage + >>> auxv = AuxVec(elf_base_addr, cont_target.entry_point, linux_env) + + # Enable AT_SECURE + >>> auxv = AuxVec(..., AuxVec.AT_SECURE=1) + # Modify AT_RANDOM + >>> auxv = AuxVec(..., AuxVec.AT_RANDOM="\x00"*0x10) + + # Using AuxVec instance for stack preparation + # First, fill memory with vectors data + >>> for AT_number, data in auxv.data_to_map(): + dest_ptr = ... + copy_to_dest(data, dest_ptr) + auxv.ptrs[AT_number] = dest_ptr + # Then, get the key: value (with value being sometime a pointer) + >>> for auxid, auxval in auxv.iteritems(): + ... + """ + + AT_PHDR = 3 + AT_PHNUM = 5 + AT_PAGESZ = 6 + AT_ENTRY = 9 + AT_UID = 11 + AT_EUID = 12 + AT_GID = 13 + AT_EGID = 14 + AT_PLATFORM = 15 + AT_HWCAP = 16 + AT_SECURE = 23 + AT_RANDOM = 25 + AT_SYSINFO_EHDR = 33 + + def __init__(self, elf_phdr_vaddr, entry_point, linux_env, **kwargs): + """Instantiate an AuxVec, with required elements: + - elf_phdr_vaddr: virtual address of the ELF's PHDR in memory + - entry_point: virtual address of the ELF entry point + - linux_env: LinuxEnvironment instance, used to provides some of the + option values + + Others options can be overridden by named arguments + + """ + self.info = { + self.AT_PHDR: elf_phdr_vaddr, + self.AT_PHNUM: 9, + self.AT_PAGESZ: 0x1000, + self.AT_ENTRY: entry_point, + self.AT_UID: linux_env.user_uid, + self.AT_EUID: linux_env.user_euid, + self.AT_GID: linux_env.user_gid, + self.AT_EGID: linux_env.user_egid, + self.AT_PLATFORM: linux_env.platform_arch, + self.AT_HWCAP: 0, + self.AT_SECURE: 0, + self.AT_RANDOM: b"\x00" * 0x10, + # vDSO is not mandatory + self.AT_SYSINFO_EHDR: None, + } + self.info.update(kwargs) + self.ptrs = {} # info key -> corresponding virtual address + + def data_to_map(self): + """Iterator on (AT_number, data) + Once the data has been mapped, the corresponding ptr must be set in + 'self.ptrs[AT_number]' + """ + for AT_number in [self.AT_PLATFORM, self.AT_RANDOM]: + yield (AT_number, self.info[AT_number]) + + def iteritems(self): + """Iterator on auxiliary vector id and values""" + for AT_number, value in viewitems(self.info): + if AT_number in self.ptrs: + value = self.ptrs[AT_number] + if value is None: + # AT to ignore + continue + yield (AT_number, value) + + items = iteritems + +def prepare_loader_x86_64(jitter, argv, envp, auxv, linux_env, + hlt_address=0x13371acc): + """Fill the environment with enough information to run a linux loader + + @jitter: Jitter instance + @argv: list of strings + @envp: dict of environment variables names to their values + @auxv: AuxVec instance + @hlt_address (default to 0x13371acc): stopping address + + Example of use: + >>> jitter = machine.jitter() + >>> jitter.init_stack() + >>> linux_env = LinuxEnvironment_x86_64() + >>> argv = ["/bin/ls", "-lah"] + >>> envp = {"PATH": "/usr/local/bin", "USER": linux_env.user_name} + >>> auxv = AuxVec(elf_base_addr, entry_point, linux_env) + >>> prepare_loader_x86_64(jitter, argv, envp, auxv, linux_env) + # One may want to enable syscall handling here + # The program can now run from the loader + >>> jitter.init_run(ld_entry_point) + >>> jitter.continue_run() + """ + # Stack layout looks like + # [data] + # - auxv values + # - envp name=value + # - argv arguments + # [auxiliary vector] + # [environment pointer] + # [argument vector] + + for AT_number, data in auxv.data_to_map(): + data += b"\x00" + jitter.cpu.RSP -= len(data) + ptr = jitter.cpu.RSP + jitter.vm.set_mem(ptr, data) + auxv.ptrs[AT_number] = ptr + + env_ptrs = [] + for name, value in viewitems(envp): + env = b"%s=%s\x00" % (name, value) + jitter.cpu.RSP -= len(env) + ptr = jitter.cpu.RSP + jitter.vm.set_mem(ptr, env) + env_ptrs.append(ptr) + + argv_ptrs = [] + for arg in argv: + arg += b"\x00" + jitter.cpu.RSP -= len(arg) + ptr = jitter.cpu.RSP + jitter.vm.set_mem(ptr, arg) + argv_ptrs.append(ptr) + + jitter.push_uint64_t(hlt_address) + jitter.push_uint64_t(0) + jitter.push_uint64_t(0) + for auxid, auxval in viewitems(auxv): + jitter.push_uint64_t(auxval) + jitter.push_uint64_t(auxid) + jitter.push_uint64_t(0) + for ptr in reversed(env_ptrs): + jitter.push_uint64_t(ptr) + jitter.push_uint64_t(0) + for ptr in reversed(argv_ptrs): + jitter.push_uint64_t(ptr) + jitter.push_uint64_t(len(argv)) + + + +def _arml__kuser_get_tls(linux_env, jitter): + # __kuser_get_tls + jitter.pc = jitter.cpu.LR + jitter.cpu.R0 = linux_env.tls + return True + +def _arml__kuser_cmpxchg(jitter): + oldval = jitter.cpu.R0 + newval = jitter.cpu.R1 + ptr = jitter.cpu.R2 + + value = struct.unpack(">> jitter = machine.jitter() + >>> jitter.init_stack() + >>> linux_env = LinuxEnvironment_arml() + >>> argv = ["/bin/ls", "-lah"] + >>> envp = {"PATH": "/usr/local/bin", "USER": linux_env.user_name} + >>> auxv = AuxVec(elf_base_addr, entry_point, linux_env) + >>> prepare_loader_arml(jitter, argv, envp, auxv, linux_env) + # One may want to enable syscall handling here + # The program can now run from the loader + >>> jitter.init_run(ld_entry_point) + >>> jitter.continue_run() + """ + # Stack layout looks like + # [data] + # - auxv values + # - envp name=value + # - argv arguments + # [auxiliary vector] + # [environment pointer] + # [argument vector] + + for AT_number, data in auxv.data_to_map(): + data += b"\x00" + jitter.cpu.SP -= len(data) + ptr = jitter.cpu.SP + jitter.vm.set_mem(ptr, data) + auxv.ptrs[AT_number] = ptr + + env_ptrs = [] + for name, value in viewitems(envp): + env = b"%s=%s\x00" % (name, value) + jitter.cpu.SP -= len(env) + ptr = jitter.cpu.SP + jitter.vm.set_mem(ptr, env) + env_ptrs.append(ptr) + + argv_ptrs = [] + for arg in argv: + arg += b"\x00" + jitter.cpu.SP -= len(arg) + ptr = jitter.cpu.SP + jitter.vm.set_mem(ptr, arg) + argv_ptrs.append(ptr) + + jitter.push_uint32_t(hlt_address) + jitter.push_uint32_t(0) + jitter.push_uint32_t(0) + for auxid, auxval in viewitems(auxv): + jitter.push_uint32_t(auxval) + jitter.push_uint32_t(auxid) + jitter.push_uint32_t(0) + for ptr in reversed(env_ptrs): + jitter.push_uint32_t(ptr) + jitter.push_uint32_t(0) + for ptr in reversed(argv_ptrs): + jitter.push_uint32_t(ptr) + jitter.push_uint32_t(len(argv)) + + # Add kernel user helpers + # from Documentation/arm/kernel_user_helpers.txt + + if linux_env.kuser_helper_version >= 1: + jitter.add_breakpoint( + 0xFFFF0FE0, + functools.partial(_arml__kuser_get_tls, linux_env) + ) + + if linux_env.kuser_helper_version >= 2: + jitter.add_breakpoint(0XFFFF0FC0, _arml__kuser_cmpxchg) + + if linux_env.kuser_helper_version >= 3: + jitter.add_breakpoint(0xFFFF0FA0, _arml__kuser_memory_barrier) + + jitter.add_breakpoint(0xffff0ffc, _arml__kuser_helper_version) diff --git a/miasm/os_dep/linux/syscall.py b/miasm/os_dep/linux/syscall.py new file mode 100644 index 00000000..1edf72c4 --- /dev/null +++ b/miasm/os_dep/linux/syscall.py @@ -0,0 +1,1040 @@ +from builtins import range +import fcntl +import functools +import logging +import struct +import termios + +from miasm.jitter.csts import EXCEPT_PRIV_INSN, EXCEPT_INT_XX + +log = logging.getLogger('syscalls') +hnd = logging.StreamHandler() +hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) +log.addHandler(hnd) +log.setLevel(logging.WARNING) + + +def _dump_struct_stat_x86_64(info): + data = struct.pack( + "QQQIIIIQQQQQQQQQQQQQ", + info.st_dev, + info.st_ino, + info.st_nlink, + info.st_mode, + info.st_uid, + info.st_gid, + 0, # 32 bit padding + info.st_rdev, + info.st_size, + info.st_blksize, + info.st_blocks, + info.st_atime, + info.st_atimensec, + info.st_mtime, + info.st_mtimensec, + info.st_ctime, + info.st_ctimensec, + 0, # unused + 0, # unused + 0, # unused + ) + return data + + +def _dump_struct_stat_arml(info): + data = struct.pack( + "QIIIIIIIIIIIIIIIIII", + info.st_dev, + 0, # pad + info.st_ino, + info.st_mode, + info.st_nlink, + info.st_uid, + info.st_gid, + info.st_rdev, + info.st_size, + info.st_blksize, + info.st_blocks, + info.st_atime, + info.st_atimensec, + info.st_mtime, + info.st_mtimensec, + info.st_ctime, + info.st_ctimensec, + 0, # unused + 0, # unused + ) + return data + + +def sys_x86_64_rt_sigaction(jitter, linux_env): + # Parse arguments + sig, act, oact, sigsetsize = jitter.syscall_args_systemv(4) + log.debug("sys_rt_sigaction(%x, %x, %x, %x)", sig, act, oact, sigsetsize) + + # Stub + if oact != 0: + # Return an empty old action + jitter.vm.set_mem(oact, b"\x00" * sigsetsize) + jitter.syscall_ret_systemv(0) + + +def sys_generic_brk(jitter, linux_env): + # Parse arguments + addr, = jitter.syscall_args_systemv(1) + log.debug("sys_brk(%d)", addr) + + # Stub + jitter.syscall_ret_systemv(linux_env.brk(addr, jitter.vm)) + + +def sys_x86_64_newuname(jitter, linux_env): + # struct utsname { + # char sysname[]; /* Operating system name (e.g., "Linux") */ + # char nodename[]; /* Name within "some implementation-defined + # network" */ + # char release[]; /* Operating system release (e.g., "2.6.28") */ + # char version[]; /* Operating system version */ + # char machine[]; /* Hardware identifier */ + # } + + # Parse arguments + nameptr, = jitter.syscall_args_systemv(1) + log.debug("sys_newuname(%x)", nameptr) + + # Stub + info = [ + linux_env.sys_sysname, + linux_env.sys_nodename, + linux_env.sys_release, + linux_env.sys_version, + linux_env.sys_machine + ] + # TODO: Elements start at 0x41 multiples on my tests... + output = b"" + for elem in info: + output += elem + output += b"\x00" * (0x41 - len(elem)) + jitter.vm.set_mem(nameptr, output) + jitter.syscall_ret_systemv(0) + + +def sys_arml_newuname(jitter, linux_env): + # struct utsname { + # char sysname[]; /* Operating system name (e.g., "Linux") */ + # char nodename[]; /* Name within "some implementation-defined + # network" */ + # char release[]; /* Operating system release (e.g., "2.6.28") */ + # char version[]; /* Operating system version */ + # char machine[]; /* Hardware identifier */ + # } + + # Parse arguments + nameptr, = jitter.syscall_args_systemv(1) + log.debug("sys_newuname(%x)", nameptr) + + # Stub + info = [ + linux_env.sys_sysname, + linux_env.sys_nodename, + linux_env.sys_release, + linux_env.sys_version, + linux_env.sys_machine + ] + # TODO: Elements start at 0x41 multiples on my tests... + output = b"" + for elem in info: + output += elem + output += b"\x00" * (0x41 - len(elem)) + jitter.vm.set_mem(nameptr, output) + jitter.syscall_ret_systemv(0) + + +def sys_generic_access(jitter, linux_env): + # Parse arguments + pathname, mode = jitter.syscall_args_systemv(2) + rpathname = jitter.get_str_ansi(pathname) + rmode = mode + if mode == 1: + rmode = "F_OK" + elif mode == 2: + rmode = "R_OK" + log.debug("sys_access(%s, %s)", rpathname, rmode) + + # Stub + # Do not check the mode + if linux_env.filesystem.exists(rpathname): + jitter.syscall_ret_systemv(0) + else: + jitter.syscall_ret_systemv(-1) + + +def sys_x86_64_openat(jitter, linux_env): + # Parse arguments + dfd, filename, flags, mode = jitter.syscall_args_systemv(4) + rpathname = jitter.get_str_ansi(filename) + log.debug("sys_openat(%x, %r, %x, %x)", dfd, rpathname, flags, mode) + + # Stub + # flags, openat particularity over 'open' are ignored + jitter.syscall_ret_systemv(linux_env.open_(rpathname, flags)) + + +def sys_x86_64_newstat(jitter, linux_env): + # Parse arguments + filename, statbuf = jitter.syscall_args_systemv(2) + rpathname = jitter.get_str_ansi(filename) + log.debug("sys_newstat(%r, %x)", rpathname, statbuf) + + # Stub + if linux_env.filesystem.exists(rpathname): + info = linux_env.stat(rpathname) + data = _dump_struct_stat_x86_64(info) + jitter.vm.set_mem(statbuf, data) + jitter.syscall_ret_systemv(0) + else: + # ENOENT (No such file or directory) + jitter.syscall_ret_systemv(-1) + + +def sys_arml_stat64(jitter, linux_env): + # Parse arguments + filename, statbuf = jitter.syscall_args_systemv(2) + rpathname = jitter.get_str_ansi(filename) + log.debug("sys_newstat(%r, %x)", rpathname, statbuf) + + # Stub + if linux_env.filesystem.exists(rpathname): + info = linux_env.stat(rpathname) + data = _dump_struct_stat_arml(info) + jitter.vm.set_mem(statbuf, data) + jitter.syscall_ret_systemv(0) + else: + # ENOENT (No such file or directory) + jitter.syscall_ret_systemv(-1) + + +def sys_x86_64_writev(jitter, linux_env): + # Parse arguments + fd, vec, vlen = jitter.syscall_args_systemv(3) + log.debug("sys_writev(%d, %d, %x)", fd, vec, vlen) + + # Stub + fdesc = linux_env.file_descriptors[fd] + for iovec_num in range(vlen): + # struct iovec { + # void *iov_base; /* Starting address */ + # size_t iov_len; /* Number of bytes to transfer */ + # }; + iovec = jitter.vm.get_mem(vec + iovec_num * 8 * 2, 8*2) + iov_base, iov_len = struct.unpack("QQ", iovec) + fdesc.write(jitter.get_str_ansi(iov_base)[:iov_len]) + + jitter.syscall_ret_systemv(vlen) + + +def sys_arml_writev(jitter, linux_env): + # Parse arguments + fd, vec, vlen = jitter.syscall_args_systemv(3) + log.debug("sys_writev(%d, %d, %x)", fd, vec, vlen) + + # Stub + fdesc = linux_env.file_descriptors[fd] + for iovec_num in range(vlen): + # struct iovec { + # void *iov_base; /* Starting address */ + # size_t iov_len; /* Number of bytes to transfer */ + # }; + iovec = jitter.vm.get_mem(vec + iovec_num * 4 * 2, 4*2) + iov_base, iov_len = struct.unpack("II", iovec) + fdesc.write(jitter.get_str_ansi(iov_base)[:iov_len]) + + jitter.syscall_ret_systemv(vlen) + + +def sys_generic_exit_group(jitter, linux_env): + # Parse arguments + status, = jitter.syscall_args_systemv(1) + log.debug("sys_exit_group(%d)", status) + + # Stub + log.debug("Exit with status code %d", status) + jitter.run = False + + +def sys_generic_read(jitter, linux_env): + # Parse arguments + fd, buf, count = jitter.syscall_args_systemv(3) + log.debug("sys_read(%d, %x, %x)", fd, buf, count) + + # Stub + data = linux_env.read(fd, count) + jitter.vm.set_mem(buf, data) + jitter.syscall_ret_systemv(len(data)) + + +def sys_x86_64_fstat(jitter, linux_env): + # Parse arguments + fd, statbuf = jitter.syscall_args_systemv(2) + log.debug("sys_fstat(%d, %x)", fd, statbuf) + + # Stub + info = linux_env.fstat(fd) + data = _dump_struct_stat_x86_64(info) + jitter.vm.set_mem(statbuf, data) + jitter.syscall_ret_systemv(0) + + +def sys_arml_fstat64(jitter, linux_env): + # Parse arguments + fd, statbuf = jitter.syscall_args_systemv(2) + log.debug("sys_fstat(%d, %x)", fd, statbuf) + + # Stub + info = linux_env.fstat(fd) + data = _dump_struct_stat_arml(info) + jitter.vm.set_mem(statbuf, data) + jitter.syscall_ret_systemv(0) + + +def sys_generic_mmap(jitter, linux_env): + # Parse arguments + addr, len_, prot, flags, fd, off = jitter.syscall_args_systemv(6) + log.debug("sys_mmap(%x, %x, %x, %x, %x, %x)", addr, len_, prot, flags, fd, off) + + # Stub + addr = linux_env.mmap(addr, len_, prot & 0xFFFFFFFF, flags & 0xFFFFFFFF, + fd & 0xFFFFFFFF, off, jitter.vm) + jitter.syscall_ret_systemv(addr) + + +def sys_generic_mmap2(jitter, linux_env): + # Parse arguments + addr, len_, prot, flags, fd, off = jitter.syscall_args_systemv(6) + log.debug("sys_mmap2(%x, %x, %x, %x, %x, %x)", addr, len_, prot, flags, fd, off) + off = off * 4096 + + # Stub + addr = linux_env.mmap(addr, len_, prot & 0xFFFFFFFF, flags & 0xFFFFFFFF, + fd & 0xFFFFFFFF, off, jitter.vm) + jitter.syscall_ret_systemv(addr) + + +def sys_generic_mprotect(jitter, linux_env): + # Parse arguments + start, len_, prot = jitter.syscall_args_systemv(3) + assert jitter.vm.is_mapped(start, len_) + log.debug("sys_mprotect(%x, %x, %x)", start, len_, prot) + + # Do nothing + jitter.syscall_ret_systemv(0) + + +def sys_generic_close(jitter, linux_env): + # Parse arguments + fd, = jitter.syscall_args_systemv(1) + log.debug("sys_close(%x)", fd) + + # Stub + linux_env.close(fd) + jitter.syscall_ret_systemv(0) + + +def sys_x86_64_arch_prctl(jitter, linux_env): + # Parse arguments + code_name = { + 0x1001: "ARCH_SET_GS", + 0x1002: "ARCH_SET_FS", + 0x1003: "ARCH_GET_FS", + 0x1004: "ARCH_GET_GS", + } + code = jitter.cpu.RDI + rcode = code_name[code] + addr = jitter.cpu.RSI + log.debug("sys_arch_prctl(%s, %x)", rcode, addr) + + if code == 0x1002: + jitter.cpu.set_segm_base(jitter.cpu.FS, addr) + else: + raise RuntimeError("Not implemented") + jitter.cpu.RAX = 0 + + +def sys_x86_64_set_tid_address(jitter, linux_env): + # Parse arguments + tidptr = jitter.cpu.RDI + # clear_child_tid = tidptr + log.debug("sys_set_tid_address(%x)", tidptr) + + jitter.cpu.RAX = linux_env.process_tid + + +def sys_x86_64_set_robust_list(jitter, linux_env): + # Parse arguments + head = jitter.cpu.RDI + len_ = jitter.cpu.RSI + # robust_list = head + log.debug("sys_set_robust_list(%x, %x)", head, len_) + jitter.cpu.RAX = 0 + +def sys_x86_64_rt_sigprocmask(jitter, linux_env): + # Parse arguments + how = jitter.cpu.RDI + nset = jitter.cpu.RSI + oset = jitter.cpu.RDX + sigsetsize = jitter.cpu.R10 + log.debug("sys_rt_sigprocmask(%x, %x, %x, %x)", how, nset, oset, sigsetsize) + if oset != 0: + raise RuntimeError("Not implemented") + jitter.cpu.RAX = 0 + + +def sys_x86_64_prlimit64(jitter, linux_env): + # Parse arguments + pid = jitter.cpu.RDI + resource = jitter.cpu.RSI + new_rlim = jitter.cpu.RDX + if new_rlim != 0: + raise RuntimeError("Not implemented") + old_rlim = jitter.cpu.R10 + log.debug("sys_prlimit64(%x, %x, %x, %x)", pid, resource, new_rlim, + old_rlim) + + # Stub + if resource == 3: + # RLIMIT_STACK + jitter.vm.set_mem(old_rlim, + struct.pack("QQ", + 0x100000, + 0x7fffffffffffffff, # RLIM64_INFINITY + )) + else: + raise RuntimeError("Not implemented") + jitter.cpu.RAX = 0 + + +def sys_x86_64_statfs(jitter, linux_env): + # Parse arguments + pathname = jitter.cpu.RDI + buf = jitter.cpu.RSI + rpathname = jitter.get_str_ansi(pathname) + log.debug("sys_statfs(%r, %x)", rpathname, buf) + + # Stub + if not linux_env.filesystem.exists(rpathname): + jitter.cpu.RAX = -1 + else: + info = linux_env.filesystem.statfs() + raise RuntimeError("Not implemented") + + +def sys_x86_64_ioctl(jitter, linux_env): + # Parse arguments + fd, cmd, arg = jitter.syscall_args_systemv(3) + log.debug("sys_ioctl(%x, %x, %x)", fd, cmd, arg) + + info = linux_env.ioctl(fd, cmd, arg) + if info is False: + jitter.syscall_ret_systemv(-1) + else: + if cmd == termios.TCGETS: + data = struct.pack("BBBB", *info) + jitter.vm.set_mem(arg, data) + elif cmd == termios.TIOCGWINSZ: + data = struct.pack("HHHH", *info) + jitter.vm.set_mem(arg, data) + else: + assert data is None + jitter.syscall_ret_systemv(0) + + +def sys_arml_ioctl(jitter, linux_env): + # Parse arguments + fd, cmd, arg = jitter.syscall_args_systemv(3) + log.debug("sys_ioctl(%x, %x, %x)", fd, cmd, arg) + + info = linux_env.ioctl(fd, cmd, arg) + if info is False: + jitter.syscall_ret_systemv(-1) + else: + if cmd == termios.TCGETS: + data = struct.pack("BBBB", *info) + jitter.vm.set_mem(arg, data) + elif cmd == termios.TIOCGWINSZ: + data = struct.pack("HHHH", *info) + jitter.vm.set_mem(arg, data) + else: + assert data is None + jitter.syscall_ret_systemv(0) + +def sys_generic_open(jitter, linux_env): + # Parse arguments + filename, flags, mode = jitter.syscall_args_systemv(3) + rpathname = jitter.get_str_ansi(filename) + log.debug("sys_open(%r, %x, %x)", rpathname, flags, mode) + # Stub + # 'mode' is ignored + jitter.syscall_ret_systemv(linux_env.open_(rpathname, flags)) + + +def sys_generic_write(jitter, linux_env): + # Parse arguments + fd, buf, count = jitter.syscall_args_systemv(3) + log.debug("sys_write(%d, %x, %x)", fd, buf, count) + + # Stub + data = jitter.vm.get_mem(buf, count) + jitter.syscall_ret_systemv(linux_env.write(fd, data)) + + +def sys_x86_64_getdents(jitter, linux_env): + # Parse arguments + fd = jitter.cpu.RDI + dirent = jitter.cpu.RSI + count = jitter.cpu.RDX + log.debug("sys_getdents(%x, %x, %x)", fd, dirent, count) + + # Stub + def packing_callback(cur_len, d_ino, d_type, name): + # struct linux_dirent { + # unsigned long d_ino; /* Inode number */ + # unsigned long d_off; /* Offset to next linux_dirent */ + # unsigned short d_reclen; /* Length of this linux_dirent */ + # char d_name[]; /* Filename (null-terminated) */ + # /* length is actually (d_reclen - 2 - + # offsetof(struct linux_dirent, d_name)) */ + # /* + # char pad; // Zero padding byte + # char d_type; // File type (only since Linux + # // 2.6.4); offset is (d_reclen - 1) + # */ + # } + d_reclen = 8 * 2 + 2 + 1 + len(name) + 1 + d_off = cur_len + d_reclen + entry = struct.pack("QqH", d_ino, d_off, d_reclen) + \ + name + b"\x00" + struct.pack("B", d_type) + assert len(entry) == d_reclen + return entry + + out = linux_env.getdents(fd, count, packing_callback) + jitter.vm.set_mem(dirent, out) + jitter.cpu.RAX = len(out) + + +def sys_arml_getdents64(jitter, linux_env): + # Parse arguments + fd = jitter.cpu.R0 + dirent = jitter.cpu.R1 + count = jitter.cpu.R2 + log.debug("sys_getdents64(%x, %x, %x)", fd, dirent, count) + + # Stub + def packing_callback(cur_len, d_ino, d_type, name): + # struct linux_dirent64 { + # ino64_t d_ino; /* 64-bit inode number */ + # off64_t d_off; /* 64-bit offset to next structure */ + # unsigned short d_reclen; /* Size of this dirent */ + # unsigned char d_type; /* File type */ + # char d_name[]; /* Filename (null-terminated) */ + # }; + d_reclen = 8 * 2 + 2 + 1 + len(name) + 1 + d_off = cur_len + d_reclen + entry = struct.pack("QqHB", d_ino, d_off, d_reclen, d_type) + \ + name + b"\x00" + assert len(entry) == d_reclen + return entry + + out = linux_env.getdents(fd, count, packing_callback) + jitter.vm.set_mem(dirent, out) + jitter.cpu.R0 = len(out) + + +def sys_x86_64_newlstat(jitter, linux_env): + # Parse arguments + filename = jitter.cpu.RDI + statbuf = jitter.cpu.RSI + rpathname = jitter.get_str_ansi(filename) + log.debug("sys_newlstat(%s, %x)", rpathname, statbuf) + + # Stub + if not linux_env.filesystem.exists(rpathname): + # ENOENT (No such file or directory) + jitter.cpu.RAX = -1 + else: + info = linux_env.lstat(rpathname) + data = _dump_struct_stat_x86_64(info) + jitter.vm.set_mem(statbuf, data) + jitter.cpu.RAX = 0 + + +def sys_arml_lstat64(jitter, linux_env): + # Parse arguments + filename = jitter.cpu.R0 + statbuf = jitter.cpu.R1 + rpathname = jitter.get_str_ansi(filename) + log.debug("sys_newlstat(%s, %x)", rpathname, statbuf) + + # Stub + if not linux_env.filesystem.exists(rpathname): + # ENOENT (No such file or directory) + jitter.cpu.R0 = -1 + else: + info = linux_env.lstat(rpathname) + data = _dump_struct_stat_arml(info) + jitter.vm.set_mem(statbuf, data) + jitter.cpu.R0 = 0 + + +def sys_x86_64_lgetxattr(jitter, linux_env): + # Parse arguments + pathname = jitter.cpu.RDI + name = jitter.cpu.RSI + value = jitter.cpu.RDX + size = jitter.cpu.R10 + rpathname = jitter.get_str_ansi(pathname) + rname = jitter.get_str_ansi(name) + log.debug("sys_lgetxattr(%r, %r, %x, %x)", rpathname, rname, value, size) + + # Stub + jitter.vm.set_mem(value, b"\x00" * size) + jitter.cpu.RAX = 0 + + +def sys_x86_64_getxattr(jitter, linux_env): + # Parse arguments + pathname = jitter.cpu.RDI + name = jitter.cpu.RSI + value = jitter.cpu.RDX + size = jitter.cpu.R10 + rpathname = jitter.get_str_ansi(pathname) + rname = jitter.get_str_ansi(name) + log.debug("sys_getxattr(%r, %r, %x, %x)", rpathname, rname, value, size) + + # Stub + jitter.vm.set_mem(value, b"\x00" * size) + jitter.cpu.RAX = 0 + + +def sys_x86_64_socket(jitter, linux_env): + # Parse arguments + family = jitter.cpu.RDI + type_ = jitter.cpu.RSI + protocol = jitter.cpu.RDX + log.debug("sys_socket(%x, %x, %x)", family, type_, protocol) + + jitter.cpu.RAX = linux_env.socket(family, type_, protocol) + + +def sys_x86_64_connect(jitter, linux_env): + # Parse arguments + fd = jitter.cpu.RDI + uservaddr = jitter.cpu.RSI + addrlen = jitter.cpu.RDX + raddr = jitter.get_str_ansi(uservaddr + 2) + log.debug("sys_connect(%x, %r, %x)", fd, raddr, addrlen) + + # Stub + # Always refuse the connexion + jitter.cpu.RAX = -1 + + +def sys_x86_64_clock_gettime(jitter, linux_env): + # Parse arguments + which_clock = jitter.cpu.RDI + tp = jitter.cpu.RSI + log.debug("sys_clock_gettime(%x, %x)", which_clock, tp) + + # Stub + value = linux_env.clock_gettime() + jitter.vm.set_mem(tp, struct.pack("Q", value)) + jitter.cpu.RAX = 0 + + +def sys_x86_64_lseek(jitter, linux_env): + # Parse arguments + fd = jitter.cpu.RDI + offset = jitter.cpu.RSI + whence = jitter.cpu.RDX + log.debug("sys_lseek(%d, %x, %x)", fd, offset, whence) + + # Stub + fdesc = linux_env.file_descriptors[fd] + mask = (1 << 64) - 1 + if offset > (1 << 63): + offset = - ((offset ^ mask) + 1) + + new_offset = fdesc.lseek(offset, whence) + jitter.cpu.RAX = new_offset + + +def sys_x86_64_munmap(jitter, linux_env): + # Parse arguments + addr = jitter.cpu.RDI + len_ = jitter.cpu.RSI + log.debug("sys_munmap(%x, %x)", addr, len_) + + # Do nothing + jitter.cpu.RAX = 0 + + +def sys_x86_64_readlink(jitter, linux_env): + # Parse arguments + path = jitter.cpu.RDI + buf = jitter.cpu.RSI + bufsize = jitter.cpu.RDX + rpath = jitter.get_str_ansi(path) + log.debug("sys_readlink(%r, %x, %x)", rpath, buf, bufsize) + + # Stub + link = linux_env.filesystem.readlink(rpath) + if link is None: + # Not a link + jitter.cpu.RAX = -1 + else: + data = link[:bufsize - 1] + b"\x00" + jitter.vm.set_mem(buf, data) + jitter.cpu.RAX = len(data) - 1 + +def sys_x86_64_getpid(jitter, linux_env): + # Parse arguments + log.debug("sys_getpid()") + + # Stub + jitter.cpu.RAX = linux_env.process_pid + + +def sys_x86_64_sysinfo(jitter, linux_env): + # Parse arguments + info = jitter.cpu.RDI + log.debug("sys_sysinfo(%x)", info) + + # Stub + data = struct.pack("QQQQQQQQQQHQQI", + 0x1234, # uptime + 0x2000, # loads (1 min) + 0x2000, # loads (5 min) + 0x2000, # loads (15 min) + 0x10000000, # total ram + 0x10000000, # free ram + 0x10000000, # shared memory + 0x0, # memory used by buffers + 0x0, # total swap + 0x0, # free swap + 0x1, # nb current processes + 0x0, # total high mem + 0x0, # available high mem + 0x1, # memory unit size + ) + jitter.vm.set_mem(info, data) + jitter.cpu.RAX = 0 + + +def sys_generic_geteuid(jitter, linux_env): + # Parse arguments + log.debug("sys_geteuid()") + + # Stub + jitter.syscall_ret_systemv(linux_env.user_euid) + + +def sys_generic_getegid(jitter, linux_env): + # Parse arguments + log.debug("sys_getegid()") + + # Stub + jitter.syscall_ret_systemv(linux_env.user_egid) + + +def sys_generic_getuid(jitter, linux_env): + # Parse arguments + log.debug("sys_getuid()") + + # Stub + jitter.syscall_ret_systemv(linux_env.user_uid) + + +def sys_generic_getgid(jitter, linux_env): + # Parse arguments + log.debug("sys_getgid()") + + # Stub + jitter.syscall_ret_systemv(linux_env.user_gid) + + +def sys_generic_setgid(jitter, linux_env): + # Parse arguments + gid, = jitter.syscall_args_systemv(1) + log.debug("sys_setgid(%x)", gid) + + # Stub + # Denied if different + if gid != linux_env.user_gid: + jitter.syscall_ret_systemv(-1) + else: + jitter.syscall_ret_systemv(0) + + +def sys_generic_setuid(jitter, linux_env): + # Parse arguments + uid, = jitter.syscall_args_systemv(1) + log.debug("sys_setuid(%x)", uid) + + # Stub + # Denied if different + if uid != linux_env.user_uid: + jitter.syscall_ret_systemv(-1) + else: + jitter.syscall_ret_systemv(0) + + +def sys_arml_set_tls(jitter, linux_env): + # Parse arguments + ptr = jitter.cpu.R0 + log.debug("sys_set_tls(%x)", ptr) + + # Stub + linux_env.tls = ptr + jitter.cpu.R0 = 0 + + +def sys_generic_fcntl64(jitter, linux_env): + # Parse arguments + fd, cmd, arg = jitter.syscall_args_systemv(3) + log.debug("sys_fcntl(%x, %x, %x)", fd, cmd, arg) + + # Stub + fdesc = linux_env.file_descriptors[fd] + if cmd == fcntl.F_GETFL: + jitter.syscall_ret_systemv(fdesc.flags) + elif cmd == fcntl.F_SETFL: + # Ignore flag change + jitter.syscall_ret_systemv(0) + elif cmd == fcntl.F_GETFD: + jitter.syscall_ret_systemv(fdesc.flags) + elif cmd == fcntl.F_SETFD: + # Ignore flag change + jitter.syscall_ret_systemv(0) + else: + raise RuntimeError("Not implemented") + + +def sys_x86_64_pread64(jitter, linux_env): + # Parse arguments + fd = jitter.cpu.RDI + buf = jitter.cpu.RSI + count = jitter.cpu.RDX + pos = jitter.cpu.R10 + log.debug("sys_pread64(%x, %x, %x, %x)", fd, buf, count, pos) + + # Stub + fdesc = linux_env.file_descriptors[fd] + cur_pos = fdesc.tell() + fdesc.seek(pos) + data = fdesc.read(count) + jitter.vm.set_mem(buf, data) + fdesc.seek(cur_pos) + jitter.cpu.RAX = len(data) + + +def sys_arml_gettimeofday(jitter, linux_env): + # Parse arguments + tv = jitter.cpu.R0 + tz = jitter.cpu.R1 + log.debug("sys_gettimeofday(%x, %x)", tv, tz) + + # Stub + value = linux_env.clock_gettime() + if tv: + jitter.vm.set_mem(tv, struct.pack("II", value, 0)) + if tz: + jitter.vm.set_mem(tz, struct.pack("II", 0, 0)) + jitter.cpu.R0 = 0 + + +syscall_callbacks_x86_64 = { + 0x0: sys_generic_read, + 0x1: sys_generic_write, + 0x2: sys_generic_open, + 0x3: sys_generic_close, + 0x4: sys_x86_64_newstat, + 0x5: sys_x86_64_fstat, + 0x6: sys_x86_64_newlstat, + 0x8: sys_x86_64_lseek, + 0x9: sys_generic_mmap, + 0x10: sys_x86_64_ioctl, + 0xA: sys_generic_mprotect, + 0xB: sys_x86_64_munmap, + 0xC: sys_generic_brk, + 0xD: sys_x86_64_rt_sigaction, + 0xE: sys_x86_64_rt_sigprocmask, + 0x11: sys_x86_64_pread64, + 0x14: sys_x86_64_writev, + 0x15: sys_generic_access, + 0x27: sys_x86_64_getpid, + 0x29: sys_x86_64_socket, + 0x2A: sys_x86_64_connect, + 0x3F: sys_x86_64_newuname, + 0x48: sys_generic_fcntl64, + 0x4E: sys_x86_64_getdents, + 0x59: sys_x86_64_readlink, + 0x63: sys_x86_64_sysinfo, + 0x66: sys_generic_getuid, + 0x68: sys_generic_getgid, + 0x6B: sys_generic_geteuid, + 0x6C: sys_generic_getegid, + 0xE4: sys_x86_64_clock_gettime, + 0x89: sys_x86_64_statfs, + 0x9E: sys_x86_64_arch_prctl, + 0xBF: sys_x86_64_getxattr, + 0xC0: sys_x86_64_lgetxattr, + 0xDA: sys_x86_64_set_tid_address, + 0xE7: sys_generic_exit_group, + 0x101: sys_x86_64_openat, + 0x111: sys_x86_64_set_robust_list, + 0x12E: sys_x86_64_prlimit64, +} + + +syscall_callbacks_arml = { + + 0x3: sys_generic_read, + 0x4: sys_generic_write, + 0x5: sys_generic_open, + 0x6: sys_generic_close, + 0x2d: sys_generic_brk, + 0x21: sys_generic_access, + 0x36: sys_arml_ioctl, + 0x7a: sys_arml_newuname, + 0x7d: sys_generic_mprotect, + 0x92: sys_arml_writev, + 0xc0: sys_generic_mmap2, + 0xc3: sys_arml_stat64, + 0xc4: sys_arml_lstat64, + 0xc5: sys_arml_fstat64, + 0xc7: sys_generic_getuid, + 0xc8: sys_generic_getgid, + 0xc9: sys_generic_geteuid, + 0xcA: sys_generic_getegid, + 0x4e: sys_arml_gettimeofday, + 0xd5: sys_generic_setuid, + 0xd6: sys_generic_setgid, + 0xd9: sys_arml_getdents64, + 0xdd: sys_generic_fcntl64, + 0xf8: sys_generic_exit_group, + + # ARM-specific ARM_NR_BASE == 0x0f0000 + 0xf0005: sys_arml_set_tls, +} + +def syscall_x86_64_exception_handler(linux_env, syscall_callbacks, jitter): + """Call to actually handle an EXCEPT_PRIV_INSN exception + In the case of an error raised by a SYSCALL, call the corresponding + syscall_callbacks + @linux_env: LinuxEnvironment_x86_64 instance + @syscall_callbacks: syscall number -> func(jitter, linux_env) + """ + # Ensure the jitter has break on a SYSCALL + cur_instr = jitter.jit.mdis.dis_instr(jitter.pc) + if cur_instr.name != "SYSCALL": + return True + + # Dispatch to SYSCALL stub + syscall_number = jitter.cpu.RAX + callback = syscall_callbacks.get(syscall_number) + if callback is None: + raise KeyError( + "No callback found for syscall number 0x%x" % syscall_number + ) + callback(jitter, linux_env) + log.debug("-> %x", jitter.cpu.RAX) + + # Clean exception and move pc to the next instruction, to let the jitter + # continue + jitter.cpu.set_exception(jitter.cpu.get_exception() ^ EXCEPT_PRIV_INSN) + jitter.pc += cur_instr.l + return True + + + +def syscall_x86_32_exception_handler(linux_env, syscall_callbacks, jitter): + """Call to actually handle an EXCEPT_PRIV_INSN exception + In the case of an error raised by a SYSCALL, call the corresponding + syscall_callbacks + @linux_env: LinuxEnvironment_x86_32 instance + @syscall_callbacks: syscall number -> func(jitter, linux_env) + """ + # Ensure the jitter has break on a SYSCALL + if jitter.cpu.interrupt_num != 0x80: + return True + + # Dispatch to SYSCALL stub + syscall_number = jitter.cpu.EAX + callback = syscall_callbacks.get(syscall_number) + if callback is None: + raise KeyError( + "No callback found for syscall number 0x%x" % syscall_number + ) + callback(jitter, linux_env) + log.debug("-> %x", jitter.cpu.EAX) + + # Clean exception and move pc to the next instruction, to let the jitter + # continue + jitter.cpu.set_exception(jitter.cpu.get_exception() ^ EXCEPT_INT_XX) + return True + + + +def syscall_arml_exception_handler(linux_env, syscall_callbacks, jitter): + """Call to actually handle an EXCEPT_PRIV_INSN exception + In the case of an error raised by a SYSCALL, call the corresponding + syscall_callbacks + @linux_env: LinuxEnvironment_arml instance + @syscall_callbacks: syscall number -> func(jitter, linux_env) + """ + # Ensure the jitter has break on a SYSCALL + if jitter.cpu.interrupt_num != 0x0: + return True + + # Dispatch to SYSCALL stub + syscall_number = jitter.cpu.R7 + callback = syscall_callbacks.get(syscall_number) + if callback is None: + raise KeyError( + "No callback found for syscall number 0x%x" % syscall_number + ) + callback(jitter, linux_env) + log.debug("-> %x", jitter.cpu.R0) + + # Clean exception and move pc to the next instruction, to let the jitter + # continue + jitter.cpu.set_exception(jitter.cpu.get_exception() ^ EXCEPT_INT_XX) + return True + + + +def enable_syscall_handling(jitter, linux_env, syscall_callbacks): + """Activate handling of syscall for the current jitter instance. + Syscall handlers are provided by @syscall_callbacks + @linux_env: LinuxEnvironment instance + @syscall_callbacks: syscall number -> func(jitter, linux_env) + + Example of use: + >>> linux_env = LinuxEnvironment_x86_64() + >>> enable_syscall_handling(jitter, linux_env, syscall_callbacks_x86_64) + """ + arch_name = jitter.jit.arch_name + if arch_name == "x8664": + handler = syscall_x86_64_exception_handler + handler = functools.partial(handler, linux_env, syscall_callbacks) + jitter.add_exception_handler(EXCEPT_PRIV_INSN, handler) + elif arch_name == "x8632": + handler = syscall_x86_32_exception_handler + handler = functools.partial(handler, linux_env, syscall_callbacks) + jitter.add_exception_handler(EXCEPT_INT_XX, handler) + elif arch_name == "arml": + handler = syscall_arml_exception_handler + handler = functools.partial(handler, linux_env, syscall_callbacks) + jitter.add_exception_handler(EXCEPT_INT_XX, handler) + else: + raise ValueError("No syscall handler implemented for %s" % arch_name) + diff --git a/miasm/os_dep/linux_stdlib.py b/miasm/os_dep/linux_stdlib.py new file mode 100644 index 00000000..b2836881 --- /dev/null +++ b/miasm/os_dep/linux_stdlib.py @@ -0,0 +1,213 @@ +#-*- coding:utf-8 -*- + +from __future__ import print_function +import struct +from sys import stdout + +try: + # Python3 binary stdout + stdout = stdout.buffer +except AttributeError: + pass + +from miasm.core.utils import int_to_byte, cmp_elts +from miasm.os_dep.common import heap +from miasm.os_dep.common import get_fmt_args as _get_fmt_args + + +class c_linobjs(object): + + base_addr = 0x20000000 + align_addr = 0x1000 + def __init__(self): + self.alloc_ad = self.base_addr + self.alloc_align = self.align_addr + self.heap = heap() + +linobjs = c_linobjs() + +ABORT_ADDR = 0x1337beef + +def xxx___libc_start_main(jitter): + """Basic implementation of __libc_start_main + + int __libc_start_main(int *(main) (int, char * *, char * *), int argc, + char * * ubp_av, void (*init) (void), + void (*fini) (void), void (*rtld_fini) (void), + void (* stack_end)); + + Note: + - init, fini, rtld_fini are ignored + - return address is forced to ABORT_ADDR, to avoid calling abort/hlt/... + - in powerpc, signature is: + + int __libc_start_main (int argc, char **argv, char **ev, ElfW (auxv_t) * + auxvec, void (*rtld_fini) (void), struct startup_info + *stinfo, char **stack_on_entry) + + """ + global ABORT_ADDR + if jitter.arch.name == "ppc32": + ret_ad, args = jitter.func_args_systemv( + ["argc", "argv", "ev", "aux_vec", "rtld_fini", "st_info", + "stack_on_entry"] + ) + + # Mimic glibc implementation + if args.stack_on_entry != 0: + argc = struct.unpack(">I", + jitter.vm.get_mem(args.stack_on_entry, 4))[0] + argv = args.stack_on_entry + 4 + envp = argv + ((argc + 1) * 4) + else: + argc = args.argc + argv = args.argv + envp = args.ev + # sda_base, main, init, fini + _, main, _, _ = struct.unpack(">IIII", + jitter.vm.get_mem(args.st_info, 4 * 4)) + + else: + ret_ad, args = jitter.func_args_systemv( + ["main", "argc", "ubp_av", "init", "fini", "rtld_fini", "stack_end"] + ) + + main = args.main + # done by __libc_init_first + size = jitter.ir_arch.pc.size // 8 + argc = args.argc + argv = args.ubp_av + envp = argv + (args.argc + 1) * size + + + # Call int main(int argc, char** argv, char** envp) + jitter.func_ret_systemv(main) + ret_ad = ABORT_ADDR + jitter.func_prepare_systemv(ret_ad, argc, argv, envp) + return True + + +def xxx_isprint(jitter): + ''' + #include + int isprint(int c); + + checks for any printable character including space. + ''' + ret_addr, args = jitter.func_args_systemv(['c']) + ret = 1 if 0x20 <= args.c & 0xFF < 0x7f else 0 + return jitter.func_ret_systemv(ret_addr, ret) + + +def xxx_memcpy(jitter): + ''' + #include + void *memcpy(void *dest, const void *src, size_t n); + + copies n bytes from memory area src to memory area dest. + ''' + ret_addr, args = jitter.func_args_systemv(['dest', 'src', 'n']) + jitter.vm.set_mem(args.dest, jitter.vm.get_mem(args.src, args.n)) + return jitter.func_ret_systemv(ret_addr, args.dest) + + +def xxx_memset(jitter): + ''' + #include + void *memset(void *s, int c, size_t n); + + fills the first n bytes of the memory area pointed to by s with the constant + byte c.''' + + ret_addr, args = jitter.func_args_systemv(['dest', 'c', 'n']) + jitter.vm.set_mem(args.dest, int_to_byte(args.c & 0xFF) * args.n) + return jitter.func_ret_systemv(ret_addr, args.dest) + + +def xxx_puts(jitter): + ''' + #include + int puts(const char *s); + + writes the string s and a trailing newline to stdout. + ''' + ret_addr, args = jitter.func_args_systemv(['s']) + index = args.s + char = jitter.vm.get_mem(index, 1) + while char != b'\x00': + stdout.write(char) + index += 1 + char = jitter.vm.get_mem(index, 1) + stdout.write(b'\n') + return jitter.func_ret_systemv(ret_addr, 1) + + +def get_fmt_args(jitter, fmt, cur_arg): + return _get_fmt_args(fmt, cur_arg, jitter.get_str_ansi, jitter.get_arg_n_systemv) + + +def xxx_snprintf(jitter): + ret_addr, args = jitter.func_args_systemv(['string', 'size', 'fmt']) + cur_arg, fmt = 3, args.fmt + size = args.size if args.size else 1 + output = get_fmt_args(jitter, fmt, cur_arg) + output = output[:size - 1] + ret = len(output) + jitter.vm.set_mem(args.string, output + b'\x00') + return jitter.func_ret_systemv(ret_addr, ret) + + +def xxx_sprintf(jitter): + ret_addr, args = jitter.func_args_systemv(['string', 'fmt']) + cur_arg, fmt = 2, args.fmt + output = get_fmt_args(jitter, fmt, cur_arg) + ret = len(output) + jitter.vm.set_mem(args.string, output + b'\x00') + return jitter.func_ret_systemv(ret_addr, ret) + + +def xxx_printf(jitter): + ret_addr, args = jitter.func_args_systemv(['fmt']) + cur_arg, fmt = 1, args.fmt + output = get_fmt_args(jitter, fmt, cur_arg) + ret = len(output) + stdout.write(output) + return jitter.func_ret_systemv(ret_addr, ret) + + +def xxx_strcpy(jitter): + ret_ad, args = jitter.func_args_systemv(["dst", "src"]) + str_src = jitter.get_str_ansi(args.src) + b'\x00' + jitter.vm.set_mem(args.dst, str_src) + jitter.func_ret_systemv(ret_ad, args.dst) + + +def xxx_strlen(jitter): + ret_ad, args = jitter.func_args_systemv(["src"]) + str_src = jitter.get_str_ansi(args.src) + jitter.func_ret_systemv(ret_ad, len(str_src)) + + +def xxx_malloc(jitter): + ret_ad, args = jitter.func_args_systemv(["msize"]) + addr = linobjs.heap.alloc(jitter, args.msize) + jitter.func_ret_systemv(ret_ad, addr) + + +def xxx_free(jitter): + ret_ad, args = jitter.func_args_systemv(["ptr"]) + jitter.func_ret_systemv(ret_ad, 0) + + +def xxx_strcmp(jitter): + ret_ad, args = jitter.func_args_systemv(["ptr_str1", "ptr_str2"]) + s1 = jitter.get_str_ansi(args.ptr_str1) + s2 = jitter.get_str_ansi(args.ptr_str2) + jitter.func_ret_systemv(ret_ad, cmp_elts(s1, s2)) + + +def xxx_strncmp(jitter): + ret_ad, args = jitter.func_args_systemv(["ptr_str1", "ptr_str2", "size"]) + s1 = jitter.get_str_ansi(args.ptr_str1, args.size) + s2 = jitter.get_str_ansi(args.ptr_str2, args.size) + jitter.func_ret_systemv(ret_ad, cmp_elts(s1, s2)) diff --git a/miasm/os_dep/win_32_structs.py b/miasm/os_dep/win_32_structs.py new file mode 100644 index 00000000..fc9c62ea --- /dev/null +++ b/miasm/os_dep/win_32_structs.py @@ -0,0 +1,231 @@ +from miasm.core.types import MemStruct, Num, Ptr, Str, \ + Array, RawStruct, Union, \ + BitField, Self, Void, Bits, \ + set_allocator, MemUnion, Struct + + +class UnicodeString(MemStruct): + fields = [ + ("length", Num("H")), + ("maxlength", Num("H")), + ("data", Ptr(" +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +from past.builtins import cmp +import struct +import os +import stat +import time +import string +import logging +from zlib import crc32 +from io import StringIO +import time +import datetime + +from future.utils import PY3, viewitems + +try: + from Crypto.Hash import MD5, SHA +except ImportError: + print("cannot find crypto, skipping") + +from miasm.jitter.csts import PAGE_READ, PAGE_WRITE, PAGE_EXEC +from miasm.core.utils import pck16, pck32, hexdump, whoami +from miasm.os_dep.common import heap, windows_to_sbpath +from miasm.os_dep.common import set_str_unic, set_str_ansi +from miasm.os_dep.common import get_fmt_args as _get_fmt_args +from miasm.os_dep.win_api_x86_32_seh import tib_address + +log = logging.getLogger("win_api_x86_32") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.WARN) + +DATE_1601_TO_1970 = 116444736000000000 + +MAX_PATH = 260 + + +""" +typedef struct tagPROCESSENTRY32 { + DWORD dwSize; + DWORD cntUsage; + DWORD th32ProcessID; + ULONG_PTR th32DefaultHeapID; + DWORD th32ModuleID; + DWORD cntThreads; + DWORD th32ParentProcessID; + LONG pcPriClassBase; + DWORD dwFlags; + TCHAR szExeFile[MAX_PATH]; +} PROCESSENTRY32, *PPROCESSENTRY32; +""" + + +ACCESS_DICT = {0x0: 0, + 0x1: 0, + 0x2: PAGE_READ, + 0x4: PAGE_READ | PAGE_WRITE, + 0x10: PAGE_EXEC, + 0x20: PAGE_EXEC | PAGE_READ, + 0x40: PAGE_EXEC | PAGE_READ | PAGE_WRITE, + 0x80: PAGE_EXEC | PAGE_READ | PAGE_WRITE, + # 0x80: PAGE_EXECUTE_WRITECOPY + 0x100: 0 + } + +ACCESS_DICT_INV = dict((x[1], x[0]) for x in viewitems(ACCESS_DICT)) + + +class whandle(object): + + def __init__(self, name, info): + self.name = name + self.info = info + + def __repr__(self): + return '<%r %r %r>' % (self.__class__.__name__, self.name, self.info) + + +class handle_generator(object): + + def __init__(self): + self.offset = 600 + self.all_handles = {} + + def add(self, name, info=None): + self.offset += 1 + h = whandle(name, info) + self.all_handles[self.offset] = h + + log.debug(repr(self)) + return self.offset + + def __repr__(self): + out = '<%r\n' % self.__class__.__name__ + ks = list(self.all_handles) + ks.sort() + + for k in ks: + out += " %r %r\n" % (k, self.all_handles[k]) + out += '>' + return out + + def __contains__(self, e): + return e in self.all_handles + + def __getitem__(self, item): + return self.all_handles.__getitem__(item) + + def __delitem__(self, item): + self.all_handles.__delitem__(item) + + +class c_winobjs(object): + + def __init__(self): + self.alloc_ad = 0x20000000 + self.alloc_align = 0x1000 + self.heap = heap() + self.handle_toolhelpsnapshot = 0xaaaa00 + self.toolhelpsnapshot_info = {} + self.handle_curprocess = 0xaaaa01 + self.dbg_present = 0 + self.tickcount = 0 + self.dw_pid_dummy1 = 0x111 + self.dw_pid_explorer = 0x222 + self.dw_pid_dummy2 = 0x333 + self.dw_pid_cur = 0x444 + self.module_fname_nux = None + self.module_name = b"test.exe" + self.module_path = b"c:\\mydir\\" + self.module_name + self.hcurmodule = None + self.module_filesize = None + self.getversion = 0x0A280105 + self.getforegroundwindow = 0x333333 + self.cryptcontext_hwnd = 0x44400 + self.cryptcontext_bnum = 0x44000 + self.cryptcontext_num = 0 + self.cryptcontext = {} + self.phhash_crypt_md5 = 0x55555 + self.files_hwnd = {} + self.windowlong_dw = 0x77700 + self.module_cur_hwnd = 0x88800 + self.module_file_nul = 0x999000 + self.runtime_dll = None + self.current_pe = None + self.tls_index = 0xf + self.tls_values = {} + self.handle_pool = handle_generator() + self.handle_mapped = {} + self.hkey_handles = { + 0x80000001: b"hkey_current_user", + 0x80000002: b"hkey_local_machine" + } + self.cur_dir = b"c:\\tmp" + + self.nt_mdl = {} + self.nt_mdl_ad = None + self.nt_mdl_cur = 0 + self.win_event_num = 0x13370 + self.cryptdll_md5_h = {} + + self.lastwin32error = 0 + self.mutex = {} + self.env_variables = {} + self.events_pool = {} + self.find_data = None + + self.current_datetime = datetime.datetime( + year=2017, month=8, day=21, + hour=13, minute=37, + second=11, microsecond=123456 + ) + +winobjs = c_winobjs() + + +process_list = [ + [ + 0x40, # DWORD dwSize; + 0, # DWORD cntUsage; + winobjs.dw_pid_dummy1, # DWORD th32ProcessID; + 0x11111111, # ULONG_PTR th32DefaultHeapID; + 0x11111112, # DWORD th32ModuleID; + 1, # DWORD cntThreads; + winobjs.dw_pid_explorer, # DWORD th32ParentProcessID; + 0xbeef, # LONG pcPriClassBase; + 0x0, # DWORD dwFlags; + b"dummy1.exe" # TCHAR szExeFile[MAX_PATH]; + ], + [ + 0x40, # DWORD dwSize; + 0, # DWORD cntUsage; + winobjs.dw_pid_explorer, # DWORD th32ProcessID; + 0x11111111, # ULONG_PTR th32DefaultHeapID; + 0x11111112, # DWORD th32ModuleID; + 1, # DWORD cntThreads; + 4, # DWORD th32ParentProcessID; + 0xbeef, # LONG pcPriClassBase; + 0x0, # DWORD dwFlags; + b"explorer.exe" # TCHAR szExeFile[MAX_PATH]; + ], + + [ + 0x40, # DWORD dwSize; + 0, # DWORD cntUsage; + winobjs.dw_pid_dummy2, # DWORD th32ProcessID; + 0x11111111, # ULONG_PTR th32DefaultHeapID; + 0x11111112, # DWORD th32ModuleID; + 1, # DWORD cntThreads; + winobjs.dw_pid_explorer, # DWORD th32ParentProcessID; + 0xbeef, # LONG pcPriClassBase; + 0x0, # DWORD dwFlags; + b"dummy2.exe" # TCHAR szExeFile[MAX_PATH]; + ], + + [ + 0x40, # DWORD dwSize; + 0, # DWORD cntUsage; + winobjs.dw_pid_cur, # DWORD th32ProcessID; + 0x11111111, # ULONG_PTR th32DefaultHeapID; + 0x11111112, # DWORD th32ModuleID; + 1, # DWORD cntThreads; + winobjs.dw_pid_explorer, # DWORD th32ParentProcessID; + 0xbeef, # LONG pcPriClassBase; + 0x0, # DWORD dwFlags; + winobjs.module_name # TCHAR szExeFile[MAX_PATH]; + ], + + +] + + +class hobj(object): + pass + + +class mdl(object): + + def __init__(self, ad, l): + self.ad = ad + self.l = l + + def __bytes__(self): + return struct.pack('LL', self.ad, self.l) + + def __str__(self): + if PY3: + return repr(self) + return self.__bytes__() + + +def kernel32_HeapAlloc(jitter): + ret_ad, args = jitter.func_args_stdcall(["heap", "flags", "size"]) + alloc_addr = winobjs.heap.alloc(jitter, args.size) + jitter.func_ret_stdcall(ret_ad, alloc_addr) + + +def kernel32_HeapFree(jitter): + ret_ad, _ = jitter.func_args_stdcall(["heap", "flags", "pmem"]) + jitter.func_ret_stdcall(ret_ad, 1) + + +def kernel32_GlobalAlloc(jitter): + ret_ad, args = jitter.func_args_stdcall(["uflags", "msize"]) + alloc_addr = winobjs.heap.alloc(jitter, args.msize) + jitter.func_ret_stdcall(ret_ad, alloc_addr) + + +def kernel32_LocalFree(jitter): + ret_ad, _ = jitter.func_args_stdcall(["lpvoid"]) + jitter.func_ret_stdcall(ret_ad, 0) + + +def kernel32_LocalAlloc(jitter): + ret_ad, args = jitter.func_args_stdcall(["uflags", "msize"]) + alloc_addr = winobjs.heap.alloc(jitter, args.msize) + jitter.func_ret_stdcall(ret_ad, alloc_addr) + +def msvcrt_new(jitter): + ret_ad, args = jitter.func_args_cdecl(["size"]) + alloc_addr = winobjs.heap.alloc(jitter, args.size) + jitter.func_ret_cdecl(ret_ad, alloc_addr) + +globals()['msvcrt_??2@YAPAXI@Z'] = msvcrt_new + +def msvcrt_delete(jitter): + ret_ad, args = jitter.func_args_cdecl(["ptr"]) + jitter.func_ret_cdecl(ret_ad, 0) + +globals()['msvcrt_??3@YAXPAX@Z'] = msvcrt_delete + +def kernel32_GlobalFree(jitter): + ret_ad, _ = jitter.func_args_stdcall(["addr"]) + jitter.func_ret_stdcall(ret_ad, 0) + + +def kernel32_IsDebuggerPresent(jitter): + ret_ad, _ = jitter.func_args_stdcall(0) + jitter.func_ret_stdcall(ret_ad, winobjs.dbg_present) + + +def kernel32_CreateToolhelp32Snapshot(jitter): + ret_ad, _ = jitter.func_args_stdcall(["dwflags", "th32processid"]) + jitter.func_ret_stdcall(ret_ad, winobjs.handle_toolhelpsnapshot) + + +def kernel32_GetCurrentProcess(jitter): + ret_ad, _ = jitter.func_args_stdcall(0) + jitter.func_ret_stdcall(ret_ad, winobjs.handle_curprocess) + + +def kernel32_GetCurrentProcessId(jitter): + ret_ad, _ = jitter.func_args_stdcall(0) + jitter.func_ret_stdcall(ret_ad, winobjs.dw_pid_cur) + + +def kernel32_Process32First(jitter): + ret_ad, args = jitter.func_args_stdcall(["s_handle", "ad_pentry"]) + + pentry = struct.pack( + 'IIIIIIIII', *process_list[0][:-1] + ) + process_list[0][-1] + jitter.vm.set_mem(args.ad_pentry, pentry) + winobjs.toolhelpsnapshot_info[args.s_handle] = 0 + + jitter.func_ret_stdcall(ret_ad, 1) + + +def kernel32_Process32Next(jitter): + ret_ad, args = jitter.func_args_stdcall(["s_handle", "ad_pentry"]) + + winobjs.toolhelpsnapshot_info[args.s_handle] += 1 + if winobjs.toolhelpsnapshot_info[args.s_handle] >= len(process_list): + ret = 0 + else: + ret = 1 + n = winobjs.toolhelpsnapshot_info[args.s_handle] + pentry = struct.pack( + 'IIIIIIIII', *process_list[n][:-1]) + process_list[n][-1] + jitter.vm.set_mem(args.ad_pentry, pentry) + jitter.func_ret_stdcall(ret_ad, ret) + + +def kernel32_GetTickCount(jitter): + ret_ad, _ = jitter.func_args_stdcall(0) + winobjs.tickcount += 1 + jitter.func_ret_stdcall(ret_ad, winobjs.tickcount) + + +def kernel32_GetVersion(jitter): + ret_ad, _ = jitter.func_args_stdcall(0) + jitter.func_ret_stdcall(ret_ad, winobjs.getversion) + + +def kernel32_GetVersionEx(jitter, str_size, set_str): + ret_ad, args = jitter.func_args_stdcall(["ptr_struct"]) + + size = jitter.vm.get_u32(args.ptr_struct) + if size in [0x14+str_size, 0x1c+str_size]: + tmp = struct.pack( + "IIIII%dsHHHBB" % str_size, + 0x114, # struct size + 0x5, # maj vers + 0x2, # min vers + 0xa28, # build nbr + 0x2, # platform id + set_str("Service pack 4"), + 3, # wServicePackMajor + 0, # wServicePackMinor + 0x100, # wSuiteMask + 1, # wProductType + 0 # wReserved + ) + tmp = tmp[:size] + jitter.vm.set_mem(args.ptr_struct, tmp) + ret = 1 + else: + ret = 0 + jitter.func_ret_stdcall(ret_ad, ret) + + +kernel32_GetVersionExA = lambda jitter: kernel32_GetVersionEx(jitter, 128, + set_str_ansi) +kernel32_GetVersionExW = lambda jitter: kernel32_GetVersionEx(jitter, 256, + set_str_unic) + + +def kernel32_GetPriorityClass(jitter): + ret_ad, _ = jitter.func_args_stdcall(["hwnd"]) + jitter.func_ret_stdcall(ret_ad, 0) + + +def kernel32_SetPriorityClass(jitter): + ret_ad, _ = jitter.func_args_stdcall(["hwnd", "dwpclass"]) + jitter.func_ret_stdcall(ret_ad, 0) + + +def kernel32_CloseHandle(jitter): + ret_ad, _ = jitter.func_args_stdcall(["hwnd"]) + jitter.func_ret_stdcall(ret_ad, 1) + + +def user32_GetForegroundWindow(jitter): + ret_ad, _ = jitter.func_args_stdcall(0) + jitter.func_ret_stdcall(ret_ad, winobjs.getforegroundwindow) + + +def user32_FindWindowA(jitter): + ret_ad, args = jitter.func_args_stdcall(["pclassname", "pwindowname"]) + if args.pclassname: + classname = jitter.get_str_ansi(args.pclassname) + log.info("FindWindowA classname %s", classname) + if args.pwindowname: + windowname = jitter.get_str_ansi(args.pwindowname) + log.info("FindWindowA windowname %s", windowname) + jitter.func_ret_stdcall(ret_ad, 0) + + +def user32_GetTopWindow(jitter): + ret_ad, _ = jitter.func_args_stdcall(["hwnd"]) + jitter.func_ret_stdcall(ret_ad, 0) + + +def user32_BlockInput(jitter): + ret_ad, _ = jitter.func_args_stdcall(["blockit"]) + jitter.func_ret_stdcall(ret_ad, 1) + + +def advapi32_CryptAcquireContext(jitter, funcname, get_str): + ret_ad, args = jitter.func_args_stdcall(["phprov", "pszcontainer", + "pszprovider", "dwprovtype", + "dwflags"]) + prov = get_str(args.pszprovider) if args.pszprovider else "NONE" + log.debug('prov: %r', prov) + jitter.vm.set_u32(args.phprov, winobjs.cryptcontext_hwnd) + jitter.func_ret_stdcall(ret_ad, 1) + + +def advapi32_CryptAcquireContextA(jitter): + advapi32_CryptAcquireContext(jitter, whoami(), jitter.get_str_ansi) + + +def advapi32_CryptAcquireContextW(jitter): + advapi32_CryptAcquireContext(jitter, whoami(), jitter.get_str_unic) + + +def advapi32_CryptCreateHash(jitter): + ret_ad, args = jitter.func_args_stdcall(["hprov", "algid", "hkey", + "dwflags", "phhash"]) + + winobjs.cryptcontext_num += 1 + + if args.algid == 0x00008003: + log.debug('algo is MD5') + jitter.vm.set_u32( + args.phhash, + winobjs.cryptcontext_bnum + winobjs.cryptcontext_num + ) + winobjs.cryptcontext[ + winobjs.cryptcontext_bnum + winobjs.cryptcontext_num] = hobj() + winobjs.cryptcontext[ + winobjs.cryptcontext_bnum + winobjs.cryptcontext_num].h = MD5.new() + elif args.algid == 0x00008004: + log.debug('algo is SHA1') + jitter.vm.set_u32( + args.phhash, + winobjs.cryptcontext_bnum + winobjs.cryptcontext_num + ) + winobjs.cryptcontext[ + winobjs.cryptcontext_bnum + winobjs.cryptcontext_num] = hobj() + winobjs.cryptcontext[ + winobjs.cryptcontext_bnum + winobjs.cryptcontext_num].h = SHA.new() + else: + raise ValueError('un impl algo1') + jitter.func_ret_stdcall(ret_ad, 1) + + +def advapi32_CryptHashData(jitter): + ret_ad, args = jitter.func_args_stdcall(["hhash", "pbdata", "dwdatalen", + "dwflags"]) + + if not args.hhash in winobjs.cryptcontext: + raise ValueError("unknown crypt context") + + data = jitter.vm.get_mem(args.pbdata, args.dwdatalen) + log.debug('will hash %X', args.dwdatalen) + log.debug(repr(data[:10]) + "...") + winobjs.cryptcontext[args.hhash].h.update(data) + jitter.func_ret_stdcall(ret_ad, 1) + + +def advapi32_CryptGetHashParam(jitter): + ret_ad, args = jitter.func_args_stdcall(["hhash", "param", "pbdata", + "dwdatalen", "dwflags"]) + + if not args.hhash in winobjs.cryptcontext: + raise ValueError("unknown crypt context") + + if args.param == 2: + # XXX todo: save h state? + h = winobjs.cryptcontext[args.hhash].h.digest() + else: + raise ValueError('not impl', args.param) + jitter.vm.set_mem(args.pbdata, h) + jitter.vm.set_u32(args.dwdatalen, len(h)) + + jitter.func_ret_stdcall(ret_ad, 1) + + +def advapi32_CryptReleaseContext(jitter): + ret_ad, _ = jitter.func_args_stdcall(["hhash", "flags"]) + jitter.func_ret_stdcall(ret_ad, 0) + + +def advapi32_CryptDeriveKey(jitter): + ret_ad, args = jitter.func_args_stdcall(["hprov", "algid", "hbasedata", + "dwflags", "phkey"]) + + if args.algid == 0x6801: + log.debug('using DES') + else: + raise ValueError('un impl algo2') + h = winobjs.cryptcontext[args.hbasedata].h.digest() + log.debug('hash %r', h) + winobjs.cryptcontext[args.hbasedata].h_result = h + jitter.vm.set_u32(args.phkey, args.hbasedata) + jitter.func_ret_stdcall(ret_ad, 1) + + +def advapi32_CryptDestroyHash(jitter): + ret_ad, _ = jitter.func_args_stdcall(["hhash"]) + jitter.func_ret_stdcall(ret_ad, 1) + + +def advapi32_CryptDecrypt(jitter): + # ret_ad, _ = jitter.func_args_stdcall(["hkey", "hhash", "final", + # "dwflags", "pbdata", + # "pdwdatalen"]) + raise ValueError("Not implemented") + # jitter.func_ret_stdcall(ret_ad, 1) + + +def kernel32_CreateFile(jitter, funcname, get_str): + ret_ad, args = jitter.func_args_stdcall(["lpfilename", "access", + "dwsharedmode", + "lpsecurityattr", + "dwcreationdisposition", + "dwflagsandattr", + "htemplatefile"]) + if args.lpfilename == 0: + jitter.func_ret_stdcall(ret_ad, 0xffffffff) + return + + fname = get_str(args.lpfilename) + log.info('CreateFile fname %s', fname) + ret = 0xffffffff + + log.debug("%r %r", fname.lower(), winobjs.module_path.lower()) + is_original_file = fname.lower() == winobjs.module_path.lower() + + if fname.upper() in [r"\\.\SICE", r"\\.\NTICE", r"\\.\SIWVID", r'\\.\SIWDEBUG']: + pass + elif fname.upper() in ['NUL']: + ret = winobjs.module_cur_hwnd + else: + # sandox path + sb_fname = windows_to_sbpath(fname) + if args.access & 0x80000000 or args.access == 1: + # read + if args.dwcreationdisposition == 2: + # create_always + if os.access(sb_fname, os.R_OK): + # but file exist + pass + else: + raise NotImplementedError("Untested case") # to test + # h = open(sb_fname, 'rb+') + elif args.dwcreationdisposition == 3: + # open_existing + if os.access(sb_fname, os.R_OK): + s = os.stat(sb_fname) + if stat.S_ISDIR(s.st_mode): + ret = winobjs.handle_pool.add(sb_fname, 0x1337) + else: + h = open(sb_fname, 'r+b') + ret = winobjs.handle_pool.add(sb_fname, h) + else: + log.warning("FILE %r DOES NOT EXIST!", fname) + elif args.dwcreationdisposition == 1: + # create new + if os.access(sb_fname, os.R_OK): + # file exist + # ret = 80 + winobjs.lastwin32error = 80 + else: + # first create an empty file + open(sb_fname, 'w').close() + # then open + h = open(sb_fname, 'r+b') + ret = winobjs.handle_pool.add(sb_fname, h) + elif args.dwcreationdisposition == 4: + # open_always + if os.access(sb_fname, os.R_OK): + s = os.stat(sb_fname) + if stat.S_ISDIR(s.st_mode): + ret = winobjs.handle_pool.add(sb_fname, 0x1337) + else: + h = open(sb_fname, 'r+b') + ret = winobjs.handle_pool.add(sb_fname, h) + else: + raise NotImplementedError("Untested case") + else: + raise NotImplementedError("Untested case") + elif args.access & 0x40000000: + # write + if args.dwcreationdisposition == 3: + # open existing + if is_original_file: + # cannot open self in write mode! + pass + elif os.access(sb_fname, os.R_OK): + s = os.stat(sb_fname) + if stat.S_ISDIR(s.st_mode): + # open dir + ret = winobjs.handle_pool.add(sb_fname, 0x1337) + else: + h = open(sb_fname, 'r+b') + ret = winobjs.handle_pool.add(sb_fname, h) + else: + raise NotImplementedError("Untested case") # to test + elif args.dwcreationdisposition == 5: + # truncate_existing + if is_original_file: + pass + else: + raise NotImplementedError("Untested case") # to test + else: + # raise NotImplementedError("Untested case") # to test + h = open(sb_fname, 'w') + ret = winobjs.handle_pool.add(sb_fname, h) + else: + raise NotImplementedError("Untested case") + + # h = open(sb_fname, 'rb+') + # ret = winobjs.handle_pool.add(sb_fname, h) + log.debug('CreateFile ret %x', ret) + jitter.func_ret_stdcall(ret_ad, ret) + + +def kernel32_CreateFileA(jitter): + kernel32_CreateFile(jitter, whoami(), jitter.get_str_ansi) + + +def kernel32_CreateFileW(jitter): + kernel32_CreateFile(jitter, whoami(), jitter.get_str_unic) + + +def kernel32_ReadFile(jitter): + ret_ad, args = jitter.func_args_stdcall(["hwnd", "lpbuffer", + "nnumberofbytestoread", + "lpnumberofbytesread", + "lpoverlapped"]) + if args.hwnd == winobjs.module_cur_hwnd: + pass + elif args.hwnd in winobjs.handle_pool: + pass + else: + raise ValueError('unknown hwnd!') + + data = None + if args.hwnd in winobjs.files_hwnd: + data = winobjs.files_hwnd[ + winobjs.module_cur_hwnd].read(args.nnumberofbytestoread) + elif args.hwnd in winobjs.handle_pool: + wh = winobjs.handle_pool[args.hwnd] + data = wh.info.read(args.nnumberofbytestoread) + else: + raise ValueError('unknown filename') + + if data is not None: + if (args.lpnumberofbytesread): + jitter.vm.set_u32(args.lpnumberofbytesread, len(data)) + jitter.vm.set_mem(args.lpbuffer, data) + + jitter.func_ret_stdcall(ret_ad, 1) + + +def kernel32_GetFileSize(jitter): + ret_ad, args = jitter.func_args_stdcall(["hwnd", "lpfilesizehight"]) + + if args.hwnd == winobjs.module_cur_hwnd: + ret = len(open(winobjs.module_fname_nux, "rb").read()) + elif args.hwnd in winobjs.handle_pool: + wh = winobjs.handle_pool[args.hwnd] + ret = len(open(wh.name, "rb").read()) + else: + raise ValueError('unknown hwnd!') + + if args.lpfilesizehight != 0: + jitter.vm.set_u32(args.lpfilesizehight, ret) + jitter.func_ret_stdcall(ret_ad, ret) + + +def kernel32_GetFileSizeEx(jitter): + ret_ad, args = jitter.func_args_stdcall(["hwnd", "lpfilesizehight"]) + + if args.hwnd == winobjs.module_cur_hwnd: + l = len(open(winobjs.module_fname_nux, "rb").read()) + elif args.hwnd in winobjs.handle_pool: + wh = winobjs.handle_pool[args.hwnd] + l = len(open(wh.name, "rb").read()) + else: + raise ValueError('unknown hwnd!') + + if args.lpfilesizehight == 0: + raise NotImplementedError("Untested case") + jitter.vm.set_mem(args.lpfilesizehight, pck32( + l & 0xffffffff) + pck32((l >> 32) & 0xffffffff)) + jitter.func_ret_stdcall(ret_ad, 1) + + +def kernel32_FlushInstructionCache(jitter): + ret_ad, _ = jitter.func_args_stdcall(["hprocess", "lpbasead", "dwsize"]) + jitter.func_ret_stdcall(ret_ad, 0x1337) + + +def kernel32_VirtualProtect(jitter): + ret_ad, args = jitter.func_args_stdcall(['lpvoid', 'dwsize', + 'flnewprotect', + 'lpfloldprotect']) + # XXX mask hpart + flnewprotect = args.flnewprotect & 0xFFF + if not flnewprotect in ACCESS_DICT: + raise ValueError('unknown access dw!') + + if args.lpfloldprotect: + old = jitter.vm.get_mem_access(args.lpvoid) + jitter.vm.set_u32(args.lpfloldprotect, ACCESS_DICT_INV[old]) + + for addr in jitter.vm.get_all_memory(): + # Multi-page + if args.lpvoid <= addr < args.lpvoid + args.dwsize: + jitter.vm.set_mem_access(addr, ACCESS_DICT[flnewprotect]) + + jitter.func_ret_stdcall(ret_ad, 1) + + +def kernel32_VirtualAlloc(jitter): + ret_ad, args = jitter.func_args_stdcall(['lpvoid', 'dwsize', + 'alloc_type', 'flprotect']) + + + if not args.flprotect in ACCESS_DICT: + raise ValueError('unknown access dw!') + + if args.lpvoid == 0: + alloc_addr = winobjs.heap.next_addr(args.dwsize) + jitter.vm.add_memory_page( + alloc_addr, ACCESS_DICT[args.flprotect], "\x00" * args.dwsize, + "Alloc in %s ret 0x%X" % (whoami(), ret_ad)) + else: + all_mem = jitter.vm.get_all_memory() + if args.lpvoid in all_mem: + alloc_addr = args.lpvoid + jitter.vm.set_mem_access(args.lpvoid, ACCESS_DICT[args.flprotect]) + else: + alloc_addr = winobjs.heap.next_addr(args.dwsize) + # alloc_addr = args.lpvoid + jitter.vm.add_memory_page( + alloc_addr, ACCESS_DICT[args.flprotect], "\x00" * args.dwsize, + "Alloc in %s ret 0x%X" % (whoami(), ret_ad)) + + log.info('VirtualAlloc addr: 0x%x', alloc_addr) + jitter.func_ret_stdcall(ret_ad, alloc_addr) + + +def kernel32_VirtualFree(jitter): + ret_ad, _ = jitter.func_args_stdcall(["lpvoid", "dwsize", "alloc_type"]) + jitter.func_ret_stdcall(ret_ad, 0) + + +def user32_GetWindowLongA(jitter): + ret_ad, _ = jitter.func_args_stdcall(["hwnd", "nindex"]) + jitter.func_ret_stdcall(ret_ad, winobjs.windowlong_dw) + + +def user32_SetWindowLongA(jitter): + ret_ad, _ = jitter.func_args_stdcall(["hwnd", "nindex", "newlong"]) + jitter.func_ret_stdcall(ret_ad, winobjs.windowlong_dw) + + +def kernel32_GetModuleFileName(jitter, funcname, set_str): + ret_ad, args = jitter.func_args_stdcall(["hmodule", "lpfilename", "nsize"]) + + if args.hmodule in [0, winobjs.hcurmodule]: + p = winobjs.module_path[:] + elif (winobjs.runtime_dll and + args.hmodule in viewvalues(winobjs.runtime_dll.name2off)): + name_inv = dict( + [ + (x[1], x[0]) + for x in viewitems(winobjs.runtime_dll.name2off) + ] + ) + p = name_inv[args.hmodule] + else: + log.warning(('Unknown module 0x%x.' + + 'Set winobjs.hcurmodule and retry'), args.hmodule) + p = None + + if p is None: + l = 0 + elif args.nsize < len(p): + p = p[:args.nsize] + l = len(p) + else: + l = len(p) + + if p: + set_str(args.lpfilename, p) + + jitter.func_ret_stdcall(ret_ad, l) + + +def kernel32_GetModuleFileNameA(jitter): + kernel32_GetModuleFileName(jitter, whoami(), jitter.set_str_ansi) + + +def kernel32_GetModuleFileNameW(jitter): + kernel32_GetModuleFileName(jitter, whoami(), jitter.set_str_unic) + + +def kernel32_CreateMutex(jitter, funcname, get_str): + ret_ad, args = jitter.func_args_stdcall(["mutexattr", "initowner", + "lpname"]) + + if args.lpname: + name = get_str(args.lpname) + log.info("CreateMutex %r", name) + else: + name = None + if args.initowner: + if name in winobjs.mutex: + raise NotImplementedError("Untested case") + # ret = 0 + else: + winobjs.mutex[name] = id(name) + ret = winobjs.mutex[name] + else: + if name in winobjs.mutex: + raise NotImplementedError("Untested case") + # ret = 0 + else: + winobjs.mutex[name] = id(name) + ret = winobjs.mutex[name] + jitter.func_ret_stdcall(ret_ad, ret) + + +def kernel32_CreateMutexA(jitter): + kernel32_CreateMutex(jitter, whoami(), jitter.get_str_ansi) + + +def kernel32_CreateMutexW(jitter): + kernel32_CreateMutex(jitter, whoami(), jitter.get_str_unic) + + +def shell32_SHGetSpecialFolderLocation(jitter): + ret_ad, args = jitter.func_args_stdcall(["hwndowner", "nfolder", "ppidl"]) + jitter.vm.set_u32(args.ppidl, args.nfolder) + jitter.func_ret_stdcall(ret_ad, 0) + + +def kernel32_SHGetPathFromIDList(jitter, funcname, set_str): + ret_ad, args = jitter.func_args_stdcall(["pidl", "ppath"]) + + if args.pidl == 7: # CSIDL_STARTUP: + s = "c:\\doc\\user\\startmenu\\programs\\startup" + set_str(args.ppath, s) + else: + raise ValueError('pidl not implemented', args.pidl) + jitter.func_ret_stdcall(ret_ad, 1) + + +def shell32_SHGetPathFromIDListW(jitter): + kernel32_SHGetPathFromIDList(jitter, whoami(), jitter.set_str_unic) + + +def shell32_SHGetPathFromIDListA(jitter): + kernel32_SHGetPathFromIDList(jitter, whoami(), jitter.set_str_ansi) + + +def kernel32_GetLastError(jitter): + ret_ad, _ = jitter.func_args_stdcall(0) + jitter.func_ret_stdcall(ret_ad, winobjs.lastwin32error) + + +def kernel32_SetLastError(jitter): + ret_ad, args = jitter.func_args_stdcall(["errcode"]) + # lasterr addr + # ad = tib_address + 0x34 + # jitter.vm.set_mem(ad, pck32(args.errcode)) + winobjs.lastwin32error = args.errcode + jitter.func_ret_stdcall(ret_ad, 0) + + +def kernel32_RestoreLastError(jitter): + kernel32_SetLastError(jitter) + + +def kernel32_LoadLibrary(jitter, get_str): + ret_ad, args = jitter.func_args_stdcall(["dllname"]) + + libname = get_str(args.dllname, 0x100) + ret = winobjs.runtime_dll.lib_get_add_base(libname) + log.info("Loading %r ret 0x%x", libname, ret) + jitter.func_ret_stdcall(ret_ad, ret) + + +def kernel32_LoadLibraryA(jitter): + kernel32_LoadLibrary(jitter, jitter.get_str_ansi) + + +def kernel32_LoadLibraryW(jitter): + kernel32_LoadLibrary(jitter, jitter.get_str_unic) + + +def kernel32_LoadLibraryEx(jitter, get_str): + ret_ad, args = jitter.func_args_stdcall(["dllname", "hfile", "flags"]) + + if args.hfile != 0: + raise NotImplementedError("Untested case") + libname = get_str(args.dllname, 0x100) + ret = winobjs.runtime_dll.lib_get_add_base(libname) + log.info("Loading %r ret 0x%x", libname, ret) + jitter.func_ret_stdcall(ret_ad, ret) + + +def kernel32_LoadLibraryExA(jitter): + kernel32_LoadLibraryEx(jitter, jitter.get_str_ansi) + + +def kernel32_LoadLibraryExW(jitter): + kernel32_LoadLibraryEx(jitter, jitter.get_str_unic) + + +def kernel32_GetProcAddress(jitter): + ret_ad, args = jitter.func_args_stdcall(["libbase", "fname"]) + fname = args.fname + if fname >= 0x10000: + fname = jitter.get_str_ansi(fname, 0x100) + if not fname: + fname = None + if fname is not None: + ad = winobjs.runtime_dll.lib_get_add_func(args.libbase, fname) + else: + ad = 0 + log.info("GetProcAddress %r %r ret 0x%x", args.libbase, fname, ad) + jitter.add_breakpoint(ad, jitter.handle_lib) + jitter.func_ret_stdcall(ret_ad, ad) + + +def kernel32_GetModuleHandle(jitter, funcname, get_str): + ret_ad, args = jitter.func_args_stdcall(["dllname"]) + + if args.dllname: + libname = get_str(args.dllname) + if libname: + ret = winobjs.runtime_dll.lib_get_add_base(libname) + else: + log.warning('unknown module!') + ret = 0 + log.info("GetModuleHandle %r ret 0x%x", libname, ret) + else: + ret = winobjs.current_pe.NThdr.ImageBase + log.info("GetModuleHandle default ret 0x%x", ret) + jitter.func_ret_stdcall(ret_ad, ret) + + +def kernel32_GetModuleHandleA(jitter): + kernel32_GetModuleHandle(jitter, whoami(), jitter.get_str_ansi) + + +def kernel32_GetModuleHandleW(jitter): + kernel32_GetModuleHandle(jitter, whoami(), jitter.get_str_unic) + + +def kernel32_VirtualLock(jitter): + ret_ad, _ = jitter.func_args_stdcall(["lpaddress", "dwsize"]) + jitter.func_ret_stdcall(ret_ad, 1) + + +class systeminfo(object): + oemId = 0 + dwPageSize = 0x1000 + lpMinimumApplicationAddress = 0x10000 + lpMaximumApplicationAddress = 0x7ffeffff + dwActiveProcessorMask = 0x1 + numberOfProcessors = 0x1 + ProcessorsType = 586 + dwAllocationgranularity = 0x10000 + wProcessorLevel = 0x6 + ProcessorRevision = 0xf0b + + def pack(self): + return struct.pack('IIIIIIIIHH', + self.oemId, + self.dwPageSize, + self.lpMinimumApplicationAddress, + self.lpMaximumApplicationAddress, + self.dwActiveProcessorMask, + self.numberOfProcessors, + self.ProcessorsType, + self.dwAllocationgranularity, + self.wProcessorLevel, + self.ProcessorRevision) + + +def kernel32_GetSystemInfo(jitter): + ret_ad, args = jitter.func_args_stdcall(["sys_ptr"]) + sysinfo = systeminfo() + jitter.vm.set_mem(args.sys_ptr, sysinfo.pack()) + jitter.func_ret_stdcall(ret_ad, 0) + + +def kernel32_IsWow64Process(jitter): + ret_ad, args = jitter.func_args_stdcall(["process", "bool_ptr"]) + jitter.vm.set_u32(args.bool_ptr, 0) + jitter.func_ret_stdcall(ret_ad, 1) + + +def kernel32_GetCommandLine(jitter, set_str): + ret_ad, _ = jitter.func_args_stdcall(0) + alloc_addr = winobjs.heap.alloc(jitter, 0x1000) + s = set_str('"%s"' % winobjs.module_path) + jitter.vm.set_mem(alloc_addr, s) + jitter.func_ret_stdcall(ret_ad, alloc_addr) + + +def kernel32_GetCommandLineA(jitter): + kernel32_GetCommandLine(jitter, set_str_ansi) + + +def kernel32_GetCommandLineW(jitter): + kernel32_GetCommandLine(jitter, set_str_unic) + + +def shell32_CommandLineToArgvW(jitter): + ret_ad, args = jitter.func_args_stdcall(["pcmd", "pnumargs"]) + cmd = jitter.get_str_unic(args.pcmd) + log.info("CommandLineToArgv %r", cmd) + tks = cmd.split(' ') + addr = winobjs.heap.alloc(jitter, len(cmd) * 2 + 4 * len(tks)) + addr_ret = winobjs.heap.alloc(jitter, 4 * (len(tks) + 1)) + o = 0 + for i, t in enumerate(tks): + jitter.set_str_unic(addr + o, t) + jitter.vm.set_u32(addr_ret + 4 * i, addr + o) + o += len(t)*2 + 2 + + jitter.vm.set_u32(addr_ret + 4 * i, 0) + jitter.vm.set_u32(args.pnumargs, len(tks)) + jitter.func_ret_stdcall(ret_ad, addr_ret) + + +def cryptdll_MD5Init(jitter): + ret_ad, args = jitter.func_args_stdcall(["ad_ctx"]) + index = len(winobjs.cryptdll_md5_h) + h = MD5.new() + winobjs.cryptdll_md5_h[index] = h + + jitter.vm.set_u32(args.ad_ctx, index) + jitter.func_ret_stdcall(ret_ad, 0) + + +def cryptdll_MD5Update(jitter): + ret_ad, args = jitter.func_args_stdcall(["ad_ctx", "ad_input", "inlen"]) + + index = jitter.vm.get_u32(args.ad_ctx) + if not index in winobjs.cryptdll_md5_h: + raise ValueError('unknown h context', index) + + data = jitter.vm.get_mem(args.ad_input, args.inlen) + winobjs.cryptdll_md5_h[index].update(data) + log.debug(hexdump(data)) + + jitter.func_ret_stdcall(ret_ad, 0) + + +def cryptdll_MD5Final(jitter): + ret_ad, args = jitter.func_args_stdcall(["ad_ctx"]) + + index = jitter.vm.get_u32(args.ad_ctx) + if not index in winobjs.cryptdll_md5_h: + raise ValueError('unknown h context', index) + h = winobjs.cryptdll_md5_h[index].digest() + jitter.vm.set_mem(args.ad_ctx + 88, h) + jitter.func_ret_stdcall(ret_ad, 0) + + +def ntdll_RtlInitAnsiString(jitter): + ret_ad, args = jitter.func_args_stdcall(["ad_ctx", "ad_str"]) + + s = jitter.get_str_ansi(args.ad_str) + l = len(s) + jitter.vm.set_mem(args.ad_ctx, + pck16(l) + pck16(l + 1) + pck32(args.ad_str)) + jitter.func_ret_stdcall(ret_ad, 0) + + +def ntdll_RtlHashUnicodeString(jitter): + ret_ad, args = jitter.func_args_stdcall(["ad_ctxu", "case_i", "h_id", + "phout"]) + + if args.h_id != 1: + raise ValueError('unk hash unicode', args.h_id) + + l1, l2, ptra = struct.unpack('HHL', jitter.vm.get_mem(args.ad_ctxu, 8)) + s = jitter.vm.get_mem(ptra, l1) + s = s[:-1] + hv = 0 + + if args.case_i: + s = s.lower() + for c in s: + hv = ((65599 * hv) + ord(c)) & 0xffffffff + jitter.vm.set_u32(args.phout, hv) + jitter.func_ret_stdcall(ret_ad, 0) + + +def kernel32_RtlMoveMemory(jitter): + ret_ad, args = jitter.func_args_stdcall(["ad_dst", "ad_src", "m_len"]) + data = jitter.vm.get_mem(args.ad_src, args.m_len) + jitter.vm.set_mem(args.ad_dst, data) + jitter.func_ret_stdcall(ret_ad, 0) + + +def ntdll_RtlAnsiCharToUnicodeChar(jitter): + ret_ad, args = jitter.func_args_stdcall(['ad_ad_ch']) + ad_ch = jitter.vm.get_u32(args.ad_ad_ch) + ch = ord(jitter.vm.get_mem(ad_ch, 1)) + jitter.vm.set_u32(args.ad_ad_ch, ad_ch + 1) + jitter.func_ret_stdcall(ret_ad, ch) + + +def ntdll_RtlFindCharInUnicodeString(jitter): + ret_ad, args = jitter.func_args_stdcall(["flags", "main_str_ad", + "search_chars_ad", "pos_ad"]) + + if args.flags != 0: + raise ValueError('unk flags') + + ml1, ml2, mptra = struct.unpack('HHL', + jitter.vm.get_mem(args.main_str_ad, 8)) + sl1, sl2, sptra = struct.unpack( + 'HHL', jitter.vm.get_mem(args.search_chars_ad, 8)) + main_data = jitter.vm.get_mem(mptra, ml1)[:-1] + search_data = jitter.vm.get_mem(sptra, sl1)[:-1] + + pos = None + for i, c in enumerate(main_data): + for s in search_data: + if s == c: + pos = i + break + if pos: + break + if pos is None: + ret = 0xC0000225 + jitter.vm.set_u32(args.pos_ad, 0) + else: + ret = 0 + jitter.vm.set_u32(args.pos_ad, pos) + + jitter.func_ret_stdcall(ret_ad, ret) + + +def ntdll_RtlComputeCrc32(jitter): + ret_ad, args = jitter.func_args_stdcall(["dwinit", "pdata", "ilen"]) + data = jitter.vm.get_mem(args.pdata, args.ilen) + crc_r = crc32(data, args.dwinit) + jitter.func_ret_stdcall(ret_ad, crc_r) + + +def ntdll_RtlExtendedIntegerMultiply(jitter): + ret_ad, args = jitter.func_args_stdcall(['multiplicand_low', + 'multiplicand_high', + 'multiplier']) + a = (args.multiplicand_high << 32) + args.multiplicand_low + a = a * args.multiplier + jitter.func_ret_stdcall(ret_ad, a & 0xffffffff, (a >> 32) & 0xffffffff) + + +def ntdll_RtlLargeIntegerAdd(jitter): + ret_ad, args = jitter.func_args_stdcall(['a_low', 'a_high', + 'b_low', 'b_high']) + a = (args.a_high << 32) + args.a_low + (args.b_high << 32) + args.b_low + jitter.func_ret_stdcall(ret_ad, a & 0xffffffff, (a >> 32) & 0xffffffff) + + +def ntdll_RtlLargeIntegerShiftRight(jitter): + ret_ad, args = jitter.func_args_stdcall(['a_low', 'a_high', 's_count']) + a = ((args.a_high << 32) + args.a_low) >> args.s_count + jitter.func_ret_stdcall(ret_ad, a & 0xffffffff, (a >> 32) & 0xffffffff) + + +def ntdll_RtlEnlargedUnsignedMultiply(jitter): + ret_ad, args = jitter.func_args_stdcall(['a', 'b']) + a = args.a * args.b + jitter.func_ret_stdcall(ret_ad, a & 0xffffffff, (a >> 32) & 0xffffffff) + + +def ntdll_RtlLargeIntegerSubtract(jitter): + ret_ad, args = jitter.func_args_stdcall(['a_low', 'a_high', + 'b_low', 'b_high']) + a = (args.a_high << 32) + args.a_low - (args.b_high << 32) + args.b_low + jitter.func_ret_stdcall(ret_ad, a & 0xffffffff, (a >> 32) & 0xffffffff) + + +def ntdll_RtlCompareMemory(jitter): + ret_ad, args = jitter.func_args_stdcall(['ad1', 'ad2', 'm_len']) + data1 = jitter.vm.get_mem(args.ad1, args.m_len) + data2 = jitter.vm.get_mem(args.ad2, args.m_len) + + i = 0 + while data1[i] == data2[i]: + i += 1 + if i >= args.m_len: + break + + jitter.func_ret_stdcall(ret_ad, i) + + +def user32_GetMessagePos(jitter): + ret_ad, _ = jitter.func_args_stdcall(0) + jitter.func_ret_stdcall(ret_ad, 0x00110022) + + +def kernel32_Sleep(jitter): + ret_ad, _ = jitter.func_args_stdcall(['t']) + jitter.func_ret_stdcall(ret_ad, 0) + + +def ntdll_ZwUnmapViewOfSection(jitter): + ret_ad, _ = jitter.func_args_stdcall(['h', 'ad']) + jitter.func_ret_stdcall(ret_ad, 0) + + +def kernel32_IsBadReadPtr(jitter): + ret_ad, _ = jitter.func_args_stdcall(['lp', 'ucb']) + jitter.func_ret_stdcall(ret_ad, 0) + + +def ntoskrnl_KeInitializeEvent(jitter): + ret_ad, args = jitter.func_args_stdcall(['my_event', 'my_type', + 'my_state']) + jitter.vm.set_u32(args.my_event, winobjs.win_event_num) + winobjs.win_event_num += 1 + + jitter.func_ret_stdcall(ret_ad, 0) + + +def ntoskrnl_RtlGetVersion(jitter): + ret_ad, args = jitter.func_args_stdcall(['ptr_version']) + + s = struct.pack("IIIII", + 0x114, # struct size + 0x5, # maj vers + 0x2, # min vers + 0x666, # build nbr + 0x2, # platform id + ) + jitter.set_str_unic("Service pack 4") + + jitter.vm.set_mem(args.ptr_version, s) + jitter.func_ret_stdcall(ret_ad, 0) + + +def ntoskrnl_RtlVerifyVersionInfo(jitter): + ret_ad, args = jitter.func_args_stdcall(['ptr_version']) + + s = jitter.vm.get_mem(args.ptr_version, 0x5 * 4) + s_size, s_majv, s_minv, s_buildn, s_platform = struct.unpack('IIIII', s) + raise NotImplementedError("Untested case") + # jitter.vm.set_mem(args.ptr_version, s) + # jitter.func_ret_stdcall(ret_ad, 0) + + +def hal_ExAcquireFastMutex(jitter): + ret_ad, _ = jitter.func_args_stdcall(0) + jitter.func_ret_stdcall(ret_ad, 0) + + +def mdl2ad(n): + return winobjs.nt_mdl_ad + 0x10 * n + + +def ad2mdl(ad): + return ((ad - winobjs.nt_mdl_ad) & 0xFFFFFFFF) // 0x10 + + +def ntoskrnl_IoAllocateMdl(jitter): + ret_ad, args = jitter.func_args_stdcall(["v_addr", "l", "second_buf", + "chargequota", "pirp"]) + m = mdl(args.v_addr, args.l) + winobjs.nt_mdl[winobjs.nt_mdl_cur] = m + jitter.vm.set_mem(mdl2ad(winobjs.nt_mdl_cur), bytes(m)) + jitter.func_ret_stdcall(ret_ad, mdl2ad(winobjs.nt_mdl_cur)) + winobjs.nt_mdl_cur += 1 + + +def ntoskrnl_MmProbeAndLockPages(jitter): + ret_ad, args = jitter.func_args_stdcall(["p_mdl", "access_mode", "op"]) + + if not ad2mdl(args.p_mdl) in winobjs.nt_mdl: + raise ValueError('unk mdl', hex(args.p_mdl)) + jitter.func_ret_stdcall(ret_ad, 0) + + +def ntoskrnl_MmMapLockedPagesSpecifyCache(jitter): + ret_ad, args = jitter.func_args_stdcall(["p_mdl", "access_mode", + "cache_type", "base_ad", + "bugcheckonfailure", + "priority"]) + if not ad2mdl(args.p_mdl) in winobjs.nt_mdl: + raise ValueError('unk mdl', hex(args.p_mdl)) + + jitter.func_ret_stdcall(ret_ad, winobjs.nt_mdl[ad2mdl(args.p_mdl)].ad) + + +def ntoskrnl_MmProtectMdlSystemAddress(jitter): + ret_ad, args = jitter.func_args_stdcall(["p_mdl", "prot"]) + if not ad2mdl(args.p_mdl) in winobjs.nt_mdl: + raise ValueError('unk mdl', hex(args.p_mdl)) + + jitter.func_ret_stdcall(ret_ad, 0) + + +def ntoskrnl_MmUnlockPages(jitter): + ret_ad, args = jitter.func_args_stdcall(['p_mdl']) + if not ad2mdl(args.p_mdl) in winobjs.nt_mdl: + raise ValueError('unk mdl', hex(args.p_mdl)) + + jitter.func_ret_stdcall(ret_ad, 0) + + +def ntoskrnl_IoFreeMdl(jitter): + ret_ad, args = jitter.func_args_stdcall(['p_mdl']) + if not ad2mdl(args.p_mdl) in winobjs.nt_mdl: + raise ValueError('unk mdl', hex(args.p_mdl)) + del(winobjs.nt_mdl[ad2mdl(args.p_mdl)]) + jitter.func_ret_stdcall(ret_ad, 0) + + +def hal_ExReleaseFastMutex(jitter): + ret_ad, _ = jitter.func_args_stdcall(0) + jitter.func_ret_stdcall(ret_ad, 0) + + +def ntoskrnl_RtlQueryRegistryValues(jitter): + ret_ad, args = jitter.func_args_stdcall(["relativeto", "path", + "querytable", + "context", + "environ"]) + # path = get_str_unic(jitter, args.path) + jitter.func_ret_stdcall(ret_ad, 0) + + +def ntoskrnl_ExAllocatePoolWithTagPriority(jitter): + ret_ad, args = jitter.func_args_stdcall(["pool_type", + "nbr_of_bytes", + "tag", "priority"]) + alloc_addr = winobjs.heap.next_addr(args.nbr_of_bytes) + jitter.vm.add_memory_page( + alloc_addr, PAGE_READ | PAGE_WRITE, "\x00" * args.nbr_of_bytes, + "Alloc in %s ret 0x%X" % (whoami(), ret_ad)) + + jitter.func_ret_stdcall(ret_ad, alloc_addr) + + +def my_lstrcmp(jitter, funcname, get_str): + ret_ad, args = jitter.func_args_stdcall(["ptr_str1", "ptr_str2"]) + s1 = get_str(args.ptr_str1) + s2 = get_str(args.ptr_str2) + log.info("Compare %r with %r", s1, s2) + jitter.func_ret_stdcall(ret_ad, cmp(s1, s2)) + +def msvcrt_wcscmp(jitter): + ret_ad, args = jitter.func_args_cdecl(["ptr_str1", "ptr_str2"]) + s1 = jitter.get_str_unic(args.ptr_str1) + s2 = jitter.get_str_unic(args.ptr_str2) + log.debug("%s('%s','%s')" % (whoami(), s1, s2)) + jitter.func_ret_cdecl(ret_ad, cmp(s1, s2)) + +def msvcrt__wcsicmp(jitter): + ret_ad, args = jitter.func_args_cdecl(["ptr_str1", "ptr_str2"]) + s1 = jitter.get_str_unic(args.ptr_str1) + s2 = jitter.get_str_unic(args.ptr_str2) + log.debug("%s('%s','%s')" % (whoami(), s1, s2)) + jitter.func_ret_cdecl(ret_ad, cmp(s1.lower(), s2.lower())) + +def msvcrt__wcsnicmp(jitter): + ret_ad, args = jitter.func_args_cdecl(["ptr_str1", "ptr_str2", "count"]) + s1 = jitter.get_str_unic(args.ptr_str1) + s2 = jitter.get_str_unic(args.ptr_str2) + log.debug("%s('%s','%s',%d)" % (whoami(), s1, s2, args.count)) + jitter.func_ret_cdecl(ret_ad, cmp(s1.lower()[:args.count], s2.lower()[:args.count])) + +def msvcrt_wcsncpy(jitter): + ret_ad, args = jitter.func_args_cdecl(["dst", "src", "n"]) + src = jitter.get_str_unic(args.src) + dst = src[:args.n] + dst += "\x00\x00" * (args.n-len(dst)+1) + jitter.vm.set_mem(args.dst, dst) + jitter.func_ret_cdecl(ret_ad, args.dst) + +def kernel32_lstrcmpA(jitter): + my_lstrcmp(jitter, whoami(), jitter.get_str_ansi) + + +def kernel32_lstrcmpiA(jitter): + my_lstrcmp(jitter, whoami(), lambda x: jitter.get_str_ansi(x).lower()) + + +def kernel32_lstrcmpW(jitter): + my_lstrcmp(jitter, whoami(), jitter.get_str_unic) + + +def kernel32_lstrcmpiW(jitter): + my_lstrcmp(jitter, whoami(), lambda x: jitter.get_str_unic(x).lower()) + + +def kernel32_lstrcmpi(jitter): + my_lstrcmp(jitter, whoami(), lambda x: jitter.get_str_ansi(x).lower()) + + +def my_strcpy(jitter, funcname, get_str, set_str): + ret_ad, args = jitter.func_args_stdcall(["ptr_str1", "ptr_str2"]) + s2 = get_str(args.ptr_str2) + set_str(args.ptr_str1, s2) + log.info("Copy '%r'", s2) + jitter.func_ret_stdcall(ret_ad, args.ptr_str1) + + +def kernel32_lstrcpyW(jitter): + my_strcpy(jitter, whoami(), jitter.get_str_unic, jitter.set_str_unic) + + +def kernel32_lstrcpyA(jitter): + my_strcpy(jitter, whoami(), jitter.get_str_ansi, jitter.set_str_ansi) + + +def kernel32_lstrcpy(jitter): + my_strcpy(jitter, whoami(), jitter.get_str_ansi, jitter.set_str_ansi) + +def msvcrt__mbscpy(jitter): + ret_ad, args = jitter.func_args_cdecl(["ptr_str1", "ptr_str2"]) + s2 = jitter.get_str_unic(args.ptr_str2) + jitter.set_str_unic(args.ptr_str1, s2) + jitter.func_ret_cdecl(ret_ad, args.ptr_str1) + +def msvcrt_wcscpy(jitter): + return msvcrt__mbscpy(jitter) + + +def kernel32_lstrcpyn(jitter): + ret_ad, args = jitter.func_args_stdcall(["ptr_str1", "ptr_str2", + "mlen"]) + s2 = jitter.get_str_ansi(args.ptr_str2) + if len(s2) >= args.mlen: + s2 = s2[:args.mlen - 1] + log.info("Copy '%r'", s2) + jitter.set_str_ansi(args.ptr_str1, s2) + jitter.func_ret_stdcall(ret_ad, args.ptr_str1) + + +def my_strlen(jitter, funcname, get_str, mylen): + ret_ad, args = jitter.func_args_stdcall(["src"]) + src = get_str(args.src) + length = mylen(src) + log.info("Len of '%r' -> 0x%x", src, length) + jitter.func_ret_stdcall(ret_ad, length) + + +def kernel32_lstrlenA(jitter): + my_strlen(jitter, whoami(), jitter.get_str_ansi, len) + + +def kernel32_lstrlenW(jitter): + my_strlen(jitter, whoami(), jitter.get_str_unic, len) + + +def kernel32_lstrlen(jitter): + my_strlen(jitter, whoami(), jitter.get_str_ansi, len) + + +def my_lstrcat(jitter, funcname, get_str, set_str): + ret_ad, args = jitter.func_args_stdcall(['ptr_str1', 'ptr_str2']) + s1 = get_str(args.ptr_str1) + s2 = get_str(args.ptr_str2) + set_str(args.ptr_str1, s1 + s2) + jitter.func_ret_stdcall(ret_ad, args.ptr_str1) + + +def kernel32_lstrcatA(jitter): + my_lstrcat(jitter, whoami(), jitter.get_str_ansi, jitter.set_str_ansi) + + +def kernel32_lstrcatW(jitter): + my_lstrcat(jitter, whoami(), jitter.get_str_unic, jitter.set_str_unic) + + +def kernel32_GetUserGeoID(jitter): + ret_ad, args = jitter.func_args_stdcall(["geoclass"]) + if args.geoclass == 14: + ret = 12345678 + elif args.geoclass == 16: + ret = 55667788 + else: + raise ValueError('unknown geolcass') + jitter.func_ret_stdcall(ret_ad, ret) + + +def my_GetVolumeInformation(jitter, funcname, get_str, set_str): + ret_ad, args = jitter.func_args_stdcall(["lprootpathname", + "lpvolumenamebuffer", + "nvolumenamesize", + "lpvolumeserialnumber", + "lpmaximumcomponentlength", + "lpfilesystemflags", + "lpfilesystemnamebuffer", + "nfilesystemnamesize"]) + if args.lprootpathname: + s = get_str(args.lprootpathname) + log.info('GetVolumeInformation %r', s) + + + if args.lpvolumenamebuffer: + s = "volumename" + s = s[:args.nvolumenamesize] + set_str(args.lpvolumenamebuffer, s) + + if args.lpvolumeserialnumber: + jitter.vm.set_u32(args.lpvolumeserialnumber, 11111111) + if args.lpmaximumcomponentlength: + jitter.vm.set_u32(args.lpmaximumcomponentlength, 0xff) + if args.lpfilesystemflags: + jitter.vm.set_u32(args.lpfilesystemflags, 22222222) + + if args.lpfilesystemnamebuffer: + s = "filesystemname" + s = s[:args.nfilesystemnamesize] + set_str(args.lpfilesystemnamebuffer, s) + + jitter.func_ret_stdcall(ret_ad, 1) + + +def kernel32_GetVolumeInformationA(jitter): + my_GetVolumeInformation( + jitter, whoami(), jitter.get_str_ansi, jitter.set_str_ansi) + + +def kernel32_GetVolumeInformationW(jitter): + my_GetVolumeInformation(jitter, whoami(), jitter.get_str_unic, jitter.set_str_unic) + + +def kernel32_MultiByteToWideChar(jitter): + ret_ad, args = jitter.func_args_stdcall(["codepage", "dwflags", + "lpmultibytestr", + "cbmultibyte", + "lpwidecharstr", + "cchwidechar"]) + src = jitter.get_str_ansi(args.lpmultibytestr) + '\x00' + l = len(src) + + src = "\x00".join(list(src)) + jitter.vm.set_mem(args.lpwidecharstr, src) + jitter.func_ret_stdcall(ret_ad, l) + + +def my_GetEnvironmentVariable(jitter, funcname, get_str, set_str, mylen): + ret_ad, args = jitter.func_args_stdcall(["lpname", "lpbuffer", + "nsize"]) + + s = get_str(args.lpname) + log.info('GetEnvironmentVariable %r', s) + if s in winobjs.env_variables: + v = winobjs.env_variables[s] + else: + log.warning('WARNING unknown env variable %r', s) + v = "" + set_str(args.lpbuffer, v) + jitter.func_ret_stdcall(ret_ad, mylen(v)) + + +def kernel32_GetEnvironmentVariableA(jitter): + my_GetEnvironmentVariable(jitter, whoami(), + jitter.get_str_ansi, + jitter.set_str_ansi, + len) + + +def kernel32_GetEnvironmentVariableW(jitter): + my_GetEnvironmentVariable(jitter, whoami(), + jitter.get_str_unic, + jitter.set_str_ansi, + len) + + +def my_GetSystemDirectory(jitter, funcname, set_str): + ret_ad, args = jitter.func_args_stdcall(["lpbuffer", "usize"]) + s = "c:\\windows\\system32" + l = len(s) + set_str(args.lpbuffer, s) + jitter.func_ret_stdcall(ret_ad, l) + + + +def kernel32_GetSystemDirectoryA(jitter): + my_GetSystemDirectory(jitter, whoami(), jitter.set_str_ansi) + + +def kernel32_GetSystemDirectoryW(jitter): + my_GetSystemDirectory(jitter, whoami(), jitter.set_str_unic) + + +def my_CreateDirectory(jitter, funcname, get_str): + ret_ad, args = jitter.func_args_stdcall(['lppath', 'secattrib']) + # path = get_str(jitter, args.lppath) + jitter.func_ret_stdcall(ret_ad, 0x1337) + + +def kernel32_CreateDirectoryW(jitter): + my_CreateDirectory(jitter, whoami(), jitter.get_str_unic) + + +def kernel32_CreateDirectoryA(jitter): + my_CreateDirectory(jitter, whoami(), jitter.get_str_ansi) + + + +def my_CreateEvent(jitter, funcname, get_str): + ret_ad, args = jitter.func_args_stdcall(["lpeventattributes", + "bmanualreset", + "binitialstate", + "lpname"]) + s = get_str(args.lpname) if args.lpname else None + if not s in winobjs.events_pool: + winobjs.events_pool[s] = (args.bmanualreset, args.binitialstate) + else: + log.warning('WARNING: known event') + jitter.func_ret_stdcall(ret_ad, id(s)) + + +def kernel32_CreateEventA(jitter): + my_CreateEvent(jitter, whoami(), jitter.get_str_ansi) + + +def kernel32_CreateEventW(jitter): + my_CreateEvent(jitter, whoami(), jitter.get_str_unic) + + +def kernel32_WaitForSingleObject(jitter): + ret_ad, args = jitter.func_args_stdcall(['handle', 'dwms']) + + t_start = time.time() * 1000 + found = False + while True: + if args.dwms and args.dwms + t_start > time.time() * 1000: + ret = 0x102 + break + for key, value in viewitems(winobjs.events_pool): + if key != args.handle: + continue + found = True + if value[1] == 1: + ret = 0 + break + if not found: + log.warning('unknown handle') + ret = 0xffffffff + break + time.sleep(0.1) + jitter.func_ret_stdcall(ret_ad, ret) + + +def kernel32_SetFileAttributesA(jitter): + ret_ad, args = jitter.func_args_stdcall(["lpfilename", + "dwfileattributes"]) + if args.lpfilename: + # fname = get_str_ansi(jitter, args.lpfilename) + ret = 1 + else: + ret = 0 + jitter.vm.set_u32(tib_address + 0x34, 3) + + jitter.func_ret_stdcall(ret_ad, ret) + + +def ntdll_RtlMoveMemory(jitter): + ret_ad, args = jitter.func_args_stdcall(["dst", "src", "l"]) + s = jitter.vm.get_mem(args.src, args.l) + jitter.vm.set_mem(args.dst, s) + jitter.func_ret_stdcall(ret_ad, 1) + + +def ntdll_ZwQuerySystemInformation(jitter): + ret_ad, args = jitter.func_args_stdcall(["systeminformationclass", + "systeminformation", + "systeminformationl", + "returnl"]) + if args.systeminformationclass == 2: + # SYSTEM_PERFORMANCE_INFORMATION + o = struct.pack('II', 0x22222222, 0x33333333) + o += "\x00" * args.systeminformationl + o = o[:args.systeminformationl] + jitter.vm.set_mem(args.systeminformation, o) + else: + raise ValueError('unknown sysinfo class', + args.systeminformationclass) + jitter.func_ret_stdcall(ret_ad, 0) + + +def ntdll_ZwProtectVirtualMemory(jitter): + ret_ad, args = jitter.func_args_stdcall(["handle", "lppvoid", + "pdwsize", + "flnewprotect", + "lpfloldprotect"]) + + ad = jitter.vm.get_u32(args.lppvoid) + # dwsize = upck32(jitter.vm.get_mem(args.pdwsize, 4)) + # XXX mask hpart + flnewprotect = args.flnewprotect & 0xFFF + + if not flnewprotect in ACCESS_DICT: + raise ValueError('unknown access dw!') + jitter.vm.set_mem_access(ad, ACCESS_DICT[flnewprotect]) + + # XXX todo real old protect + jitter.vm.set_u32(args.lpfloldprotect, 0x40) + + jitter.func_ret_stdcall(ret_ad, 1) + + +def ntdll_ZwAllocateVirtualMemory(jitter): + ret_ad, args = jitter.func_args_stdcall(["handle", "lppvoid", + "zerobits", "pdwsize", + "alloc_type", + "flprotect"]) + + # ad = upck32(jitter.vm.get_mem(args.lppvoid, 4)) + dwsize = jitter.vm.get_u32(args.pdwsize) + + if not args.flprotect in ACCESS_DICT: + raise ValueError('unknown access dw!') + + alloc_addr = winobjs.heap.next_addr(dwsize) + jitter.vm.add_memory_page( + alloc_addr, ACCESS_DICT[args.flprotect], "\x00" * dwsize, + "Alloc in %s ret 0x%X" % (whoami(), ret_ad)) + jitter.vm.set_u32(args.lppvoid, alloc_addr) + + jitter.func_ret_stdcall(ret_ad, 0) + + +def ntdll_ZwFreeVirtualMemory(jitter): + ret_ad, args = jitter.func_args_stdcall(["handle", "lppvoid", + "pdwsize", "alloc_type"]) + # ad = upck32(jitter.vm.get_mem(args.lppvoid, 4)) + # dwsize = upck32(jitter.vm.get_mem(args.pdwsize, 4)) + jitter.func_ret_stdcall(ret_ad, 0) + + +def ntdll_RtlInitString(jitter): + ret_ad, args = jitter.func_args_stdcall(["pstring", "source"]) + s = jitter.get_str_ansi(args.source) + l = len(s) + 1 + o = struct.pack('HHI', l, l, args.source) + jitter.vm.set_mem(args.pstring, o) + jitter.func_ret_stdcall(ret_ad, 0) + + +def ntdll_RtlAnsiStringToUnicodeString(jitter): + ret_ad, args = jitter.func_args_stdcall(["dst", "src", "alloc_str"]) + + l1, l2, p_src = struct.unpack('HHI', jitter.vm.get_mem(args.src, 0x8)) + s = jitter.get_str_ansi(p_src) + s = ("\x00".join(s + "\x00")) + l = len(s) + 1 + if args.alloc_str: + alloc_addr = winobjs.heap.next_addr(l) + jitter.vm.add_memory_page( + alloc_addr, PAGE_READ | PAGE_WRITE, "\x00" * l, + "Alloc in %s ret 0x%X" % (whoami(), ret_ad)) + else: + alloc_addr = p_src + jitter.vm.set_mem(alloc_addr, s) + o = struct.pack('HHI', l, l, alloc_addr) + jitter.vm.set_mem(args.dst, o) + jitter.func_ret_stdcall(ret_ad, 0) + + +def ntdll_LdrLoadDll(jitter): + ret_ad, args = jitter.func_args_stdcall(["path", "flags", + "modname", "modhandle"]) + + l1, l2, p_src = struct.unpack('HHI', + jitter.vm.get_mem(args.modname, 0x8)) + s = jitter.get_str_unic(p_src) + libname = s.lower() + + ad = winobjs.runtime_dll.lib_get_add_base(libname) + jitter.vm.set_u32(args.modhandle, ad) + + jitter.func_ret_stdcall(ret_ad, 0) + + +def ntdll_RtlFreeUnicodeString(jitter): + ret_ad, args = jitter.func_args_stdcall(['src']) + # l1, l2, p_src = struct.unpack('HHI', jitter.vm.get_mem(args.src, 0x8)) + # s = get_str_unic(jitter, p_src) + jitter.func_ret_stdcall(ret_ad, 0) + + +def ntdll_LdrGetProcedureAddress(jitter): + ret_ad, args = jitter.func_args_stdcall(["libbase", "pfname", + "opt", "p_ad"]) + + l1, l2, p_src = struct.unpack('HHI', jitter.vm.get_mem(args.pfname, 0x8)) + fname = jitter.get_str_ansi(p_src) + + ad = winobjs.runtime_dll.lib_get_add_func(args.libbase, fname) + jitter.add_breakpoint(ad, jitter.handle_lib) + + jitter.vm.set_u32(args.p_ad, ad) + + jitter.func_ret_stdcall(ret_ad, 0) + + +def ntdll_memset(jitter): + ret_ad, args = jitter.func_args_cdecl(['addr', 'c', 'size']) + jitter.vm.set_mem(args.addr, int_to_byte(args.c) * args.size) + jitter.func_ret_cdecl(ret_ad, args.addr) + + +def msvcrt_memset(jitter): + ret_ad, args = jitter.func_args_cdecl(['addr', 'c', 'size']) + jitter.vm.set_mem(args.addr, int_to_byte(args.c) * args.size) + jitter.func_ret_cdecl(ret_ad, args.addr) + +def msvcrt_strrchr(jitter): + ret_ad, args = jitter.func_args_cdecl(['pstr','c']) + s = jitter.get_str_ansi(args.pstr) + c = int_to_byte(args.c) + ret = args.pstr + s.rfind(c) + log.info("strrchr(%x '%s','%s') = %x" % (args.pstr,s,c,ret)) + jitter.func_ret_cdecl(ret_ad, ret) + +def msvcrt_wcsrchr(jitter): + ret_ad, args = jitter.func_args_cdecl(['pstr','c']) + s = jitter.get_str_unic(args.pstr) + c = int_to_byte(args.c) + ret = args.pstr + (s.rfind(c)*2) + log.info("wcsrchr(%x '%s',%s) = %x" % (args.pstr,s,c,ret)) + jitter.func_ret_cdecl(ret_ad, ret) + +def msvcrt_memcpy(jitter): + ret_ad, args = jitter.func_args_cdecl(['dst', 'src', 'size']) + s = jitter.vm.get_mem(args.src, args.size) + jitter.vm.set_mem(args.dst, s) + jitter.func_ret_cdecl(ret_ad, args.dst) + +def msvcrt_realloc(jitter): + ret_ad,args = jitter.func_args_cdecl(['ptr','new_size']) + if args.ptr == 0: + addr = winobjs.heap.alloc(jitter, args.new_size) + else: + addr = winobjs.heap.alloc(jitter, args.new_size) + size = winobjs.heap.get_size(jitter.vm, args.ptr) + data = jitter.vm.get_mem(args.ptr, size) + jitter.vm.set_mem(addr, data) + jitter.func_ret_cdecl(ret_ad, addr) + +def msvcrt_memcmp(jitter): + ret_ad, args = jitter.func_args_cdecl(['ps1', 'ps2', 'size']) + s1 = jitter.vm.get_mem(args.ps1, args.size) + s2 = jitter.vm.get_mem(args.ps2, args.size) + ret = cmp(s1, s2) + jitter.func_ret_cdecl(ret_ad, ret) + + +def shlwapi_PathFindExtensionA(jitter): + ret_ad, args = jitter.func_args_stdcall(['path_ad']) + path = jitter.get_str_ansi(args.path_ad) + i = path.rfind('.') + if i == -1: + i = args.path_ad + len(path) + else: + i = args.path_ad + i + jitter.func_ret_stdcall(ret_ad, i) + + +def shlwapi_PathRemoveFileSpecW(jitter): + ret_ad, args = jitter.func_args_stdcall(['path_ad']) + path = jitter.get_str_unic(args.path_ad) + i = path.rfind('\\') + if i == -1: + i = 0 + jitter.vm.set_mem(args.path_ad + i * 2, "\x00\x00") + path = jitter.get_str_unic(args.path_ad) + jitter.func_ret_stdcall(ret_ad, 1) + + +def shlwapi_PathIsPrefixW(jitter): + ret_ad, args = jitter.func_args_stdcall(['ptr_prefix', 'ptr_path']) + prefix = jitter.get_str_unic(args.ptr_prefix) + path = jitter.get_str_unic(args.ptr_path) + + if path.startswith(prefix): + ret = 1 + else: + ret = 0 + jitter.func_ret_stdcall(ret_ad, ret) + + +def shlwapi_PathIsDirectoryW(jitter): + ret_ad, args = jitter.func_args_stdcall(['ptr_path']) + fname = jitter.get_str_unic(args.ptr_path) + + sb_fname = windows_to_sbpath(fname) + + s = os.stat(sb_fname) + ret = 0 + if stat.S_ISDIR(s.st_mode): + ret = 1 + + jitter.func_ret_cdecl(ret_ad, ret) + + +def shlwapi_PathIsFileSpec(jitter, funcname, get_str): + ret_ad, args = jitter.func_args_stdcall(['path_ad']) + path = get_str(args.path_ad) + if path.find(':') != -1 and path.find('\\') != -1: + ret = 0 + else: + ret = 1 + + jitter.func_ret_stdcall(ret_ad, ret) + + +def shlwapi_PathGetDriveNumber(jitter, funcname, get_str): + ret_ad, args = jitter.func_args_stdcall(['path_ad']) + path = get_str(args.path_ad) + l = ord(path[0].upper()) - ord('A') + if 0 <= l <= 25: + ret = l + else: + ret = -1 + + jitter.func_ret_stdcall(ret_ad, ret) + + +def shlwapi_PathGetDriveNumberA(jitter): + shlwapi_PathGetDriveNumber(jitter, whoami(), jitter.get_str_ansi) + + +def shlwapi_PathGetDriveNumberW(jitter): + shlwapi_PathGetDriveNumber(jitter, whoami(), jitter.get_str_unic) + + +def shlwapi_PathIsFileSpecA(jitter): + shlwapi_PathIsFileSpec(jitter, whoami(), jitter.get_str_ansi) + + +def shlwapi_PathIsFileSpecW(jitter): + shlwapi_PathIsFileSpec(jitter, whoami(), jitter.get_str_unic) + + +def shlwapi_StrToIntA(jitter): + ret_ad, args = jitter.func_args_stdcall(['i_str_ad']) + i_str = jitter.get_str_ansi(args.i_str_ad) + try: + i = int(i_str) + except: + log.warning('WARNING cannot convert int') + i = 0 + + jitter.func_ret_stdcall(ret_ad, i) + + +def shlwapi_StrToInt64Ex(jitter, funcname, get_str): + ret_ad, args = jitter.func_args_stdcall(['pstr', 'flags', 'pret']) + i_str = get_str(args.pstr) + + if args.flags == 0: + r = int(i_str) + elif args.flags == 1: + r = int(i_str, 16) + else: + raise ValueError('cannot decode int') + + jitter.vm.set_mem(args.pret, struct.pack('q', r)) + jitter.func_ret_stdcall(ret_ad, 1) + + +def shlwapi_StrToInt64ExA(jitter): + shlwapi_StrToInt64Ex(jitter, whoami(), jitter.get_str_ansi) + + +def shlwapi_StrToInt64ExW(jitter): + shlwapi_StrToInt64Ex(jitter, whoami(), jitter.get_str_unic) + + +def user32_IsCharAlpha(jitter, funcname, get_str): + ret_ad, args = jitter.func_args_stdcall(["c"]) + try: + c = int_to_byte(args.c) + except: + log.error('bad char %r', args.c) + c = "\x00" + if c.isalpha(jitter): + ret = 1 + else: + ret = 0 + jitter.func_ret_stdcall(ret_ad, ret) + + +def user32_IsCharAlphaA(jitter): + user32_IsCharAlpha(jitter, whoami(), jitter.get_str_ansi) + + +def user32_IsCharAlphaW(jitter): + user32_IsCharAlpha(jitter, whoami(), jitter.get_str_unic) + + +def user32_IsCharAlphaNumericA(jitter): + ret_ad, args = jitter.func_args_stdcall(["c"]) + c = int_to_byte(args.c) + if c.isalnum(jitter): + ret = 1 + else: + ret = 0 + jitter.func_ret_stdcall(ret_ad, ret) + +def get_fmt_args(jitter, fmt, cur_arg, get_str): + return _get_fmt_args(fmt, cur_arg, get_str, jitter.get_arg_n_cdecl) + +def msvcrt_sprintf_str(jitter, get_str): + ret_ad, args = jitter.func_args_cdecl(['string', 'fmt']) + cur_arg, fmt = 2, args.fmt + return ret_ad, args, get_fmt_args(jitter, fmt, cur_arg, get_str) + +def msvcrt_sprintf(jitter): + ret_ad, args, output = msvcrt_sprintf_str(jitter, jitter.get_str_ansi) + ret = len(output) + log.info("sprintf() = '%s'" % (output)) + jitter.vm.set_mem(args.string, output + b'\x00') + return jitter.func_ret_cdecl(ret_ad, ret) + +def msvcrt_swprintf(jitter): + ret_ad, args = jitter.func_args_cdecl(['string', 'fmt']) + cur_arg, fmt = 2, args.fmt + output = get_fmt_args(jitter, fmt, cur_arg, jitter.get_str_unic) + ret = len(output) + log.info("swprintf('%s') = '%s'" % (jitter.get_str_unic(args.fmt), output)) + jitter.vm.set_mem(args.string, output.encode("utf-16le") + b'\x00\x00') + return jitter.func_ret_cdecl(ret_ad, ret) + +def msvcrt_fprintf(jitter): + ret_addr, args = jitter.func_args_cdecl(['file', 'fmt']) + cur_arg, fmt = 2, args.fmt + output = get_fmt_args(jitter, fmt, cur_arg, jitter.get_str_ansi) + ret = len(output) + log.info("fprintf(%x, '%s') = '%s'" % (args.file, jitter.get_str_ansi(args.fmt), output)) + + fd = jitter.vm.get_u32(args.file + 0x10) + if not fd in winobjs.handle_pool: + raise NotImplementedError("Untested case") + winobjs.handle_pool[fd].info.write(output) + + return jitter.func_ret_cdecl(ret_addr, ret) + +def shlwapi_StrCmpNIA(jitter): + ret_ad, args = jitter.func_args_stdcall(["ptr_str1", "ptr_str2", + "nchar"]) + s1 = jitter.get_str_ansi(args.ptr_str1).lower() + s2 = jitter.get_str_ansi(args.ptr_str2).lower() + s1 = s1[:args.nchar] + s2 = s2[:args.nchar] + jitter.func_ret_stdcall(ret_ad, cmp(s1, s2)) + + +def advapi32_RegCreateKeyW(jitter): + ret_ad, args = jitter.func_args_stdcall(["hkey", "subkey", + "phandle"]) + s_subkey = jitter.get_str_unic(args.subkey).lower() if args.subkey else "" + + ret_hkey = 0 + ret = 2 + if args.hkey in winobjs.hkey_handles: + ret = 0 + if s_subkey: + ret_hkey = hash(s_subkey) & 0xffffffff + winobjs.hkey_handles[ret_hkey] = s_subkey + else: + ret_hkey = args.hkey + + log.info("RegCreateKeyW(%x, '%s') = (%x,%d)" % (args.hkey, s_subkey, ret_hkey, ret)) + jitter.vm.set_u32(args.phandle, ret_hkey) + + jitter.func_ret_stdcall(ret_ad, ret) + +def kernel32_GetCurrentDirectoryA(jitter): + ret_ad, args = jitter.func_args_stdcall(["size","buf"]) + dir_ = winobjs.cur_dir + log.debug("GetCurrentDirectory() = '%s'" % dir_) + jitter.vm.set_mem(args.buf, dir_[:args.size-1] + b"\x00") + ret = len(dir_) + if args.size <= len(dir_): + ret += 1 + jitter.func_ret_stdcall(ret_ad, ret) + +def advapi32_RegOpenKeyEx(jitter, funcname, get_str): + ret_ad, args = jitter.func_args_stdcall(["hkey", "subkey", + "reserved", "access", + "phandle"]) + s_subkey = get_str(args.subkey).lower() if args.subkey else "" + + ret_hkey = 0 + ret = 2 + if args.hkey in winobjs.hkey_handles: + if s_subkey: + h = hash(s_subkey) & 0xffffffff + if h in winobjs.hkey_handles: + ret_hkey = h + ret = 0 + else: + log.error('unknown skey') + + jitter.vm.set_u32(args.phandle, ret_hkey) + + jitter.func_ret_stdcall(ret_ad, ret) + + +def advapi32_RegOpenKeyExA(jitter): + advapi32_RegOpenKeyEx(jitter, whoami(), jitter.get_str_ansi) + + +def advapi32_RegOpenKeyExW(jitter): + advapi32_RegOpenKeyEx(jitter, whoami(), jitter.get_str_unic) + + +def advapi32_RegSetValue(jitter, funcname, get_str): + ret_ad, args = jitter.func_args_stdcall(["hkey", "psubkey", + "valuetype", "pvalue", + "vlen"]) + if args.psubkey: + log.info("Subkey %s", get_str(args.psubkey)) + if args.pvalue: + log.info("Value %s", get_str(args.pvalue)) + jitter.func_ret_stdcall(ret_ad, 0) + +def advapi32_RegSetValueEx(jitter, funcname, get_str): + ret_ad, args = jitter.func_args_stdcall(["hkey", "lpvaluename", + "reserved", "dwtype", + "lpdata", "cbData"]) + hkey = winobjs.hkey_handles.get(args.hkey, "unknown HKEY") + value_name = get_str(args.lpvaluename) if args.lpvaluename else "" + data = get_str(args.lpdata) if args.lpdata else "" + log.info("%s('%s','%s'='%s',%x)" % (funcname, hkey, value_name, data, args.dwtype)) + jitter.func_ret_stdcall(ret_ad, 0) + +def advapi32_RegCloseKey(jitter): + ret_ad, args = jitter.func_args_stdcall(["hkey"]) + del winobjs.hkey_handles[args.hkey] + log.info("RegCloseKey(%x)" % args.hkey) + jitter.func_ret_stdcall(ret_ad, 0) + +def advapi32_RegSetValueExA(jitter): + advapi32_RegSetValueEx(jitter, whoami(), jitter.get_str_ansi) + + +def advapi32_RegSetValueExW(jitter): + advapi32_RegOpenKeyEx(jitter, whoami(), jitter.get_str_unic) + + +def advapi32_RegSetValueA(jitter): + advapi32_RegSetValue(jitter, whoami(), jitter.get_str_ansi) + + +def advapi32_RegSetValueW(jitter): + advapi32_RegSetValue(jitter, whoami(), jitter.get_str_unic) + + +def kernel32_GetThreadLocale(jitter): + ret_ad, _ = jitter.func_args_stdcall(0) + jitter.func_ret_stdcall(ret_ad, 0x40c) + +def kernel32_SetCurrentDirectory(jitter, get_str): + ret_ad, args = jitter.func_args_stdcall(['dir']) + dir_ = get_str(args.dir) + log.debug("SetCurrentDirectory('%s') = 1" % dir_) + winobjs.cur_dir = dir_ + jitter.func_ret_stdcall(ret_ad, 1) + +def kernel32_SetCurrentDirectoryW(jitter): + return kernel32_SetCurrentDirectory(jitter, jitter.get_str_unic) + +def kernel32_SetCurrentDirectoryA(jitter): + return kernel32_SetCurrentDirectory(jitter, jitter.get_str_ansi) + +def msvcrt_wcscat(jitter): + ret_ad, args = jitter.func_args_cdecl(['ptr_str1', 'ptr_str2']) + s1 = jitter.get_str_unic(args.ptr_str1) + s2 = jitter.get_str_unic(args.ptr_str2) + log.info("strcat('%s','%s')" % (s1,s2)) + jitter.vm.set_mem(args.ptr_str1, (s1 + s2).encode("utf-16le") + "\x00\x00") + jitter.func_ret_cdecl(ret_ad, args.ptr_str1) + + +def kernel32_GetLocaleInfo(jitter, funcname, set_str): + ret_ad, args = jitter.func_args_stdcall(["localeid", "lctype", + "lplcdata", "cchdata"]) + + buf = None + ret = 0 + if args.localeid == 0x40c: + if args.lctype == 0x3: + buf = "ENGLISH" + buf = buf[:args.cchdata - 1] + set_str(args.lplcdata, buf) + ret = len(buf) + else: + raise ValueError('unimpl localeid') + + jitter.func_ret_stdcall(ret_ad, ret) + + +def kernel32_GetLocaleInfoA(jitter): + kernel32_GetLocaleInfo(jitter, whoami(), jitter.set_str_ansi) + + +def kernel32_GetLocaleInfoW(jitter): + kernel32_GetLocaleInfo(jitter, whoami(), jitter.set_str_unic) + + +def kernel32_TlsAlloc(jitter): + ret_ad, _ = jitter.func_args_stdcall(0) + winobjs.tls_index += 1 + jitter.func_ret_stdcall(ret_ad, winobjs.tls_index) + + +def kernel32_TlsFree(jitter): + ret_ad, _ = jitter.func_args_stdcall(["tlsindex"]) + jitter.func_ret_stdcall(ret_ad, 0) + + +def kernel32_TlsSetValue(jitter): + ret_ad, args = jitter.func_args_stdcall(["tlsindex", "tlsvalue"]) + winobjs.tls_values[args.tlsindex] = args.tlsvalue + jitter.func_ret_stdcall(ret_ad, 1) + + +def kernel32_TlsGetValue(jitter): + ret_ad, args = jitter.func_args_stdcall(["tlsindex"]) + if not args.tlsindex in winobjs.tls_values: + raise ValueError("unknown tls val", repr(args.tlsindex)) + jitter.func_ret_stdcall(ret_ad, winobjs.tls_values[args.tlsindex]) + + +def user32_GetKeyboardType(jitter): + ret_ad, args = jitter.func_args_stdcall(["typeflag"]) + + ret = 0 + if args.typeflag == 0: + ret = 4 + else: + raise ValueError('unimpl keyboard type') + + jitter.func_ret_stdcall(ret_ad, ret) + + +def kernel32_GetStartupInfo(jitter, funcname, set_str): + ret_ad, args = jitter.func_args_stdcall(["ptr"]) + + s = "\x00" * 0x2c + "\x81\x00\x00\x00" + "\x0a" + + jitter.vm.set_mem(args.ptr, s) + jitter.func_ret_stdcall(ret_ad, args.ptr) + + +def kernel32_GetStartupInfoA(jitter): + kernel32_GetStartupInfo(jitter, whoami(), jitter.set_str_ansi) + + +def kernel32_GetStartupInfoW(jitter): + kernel32_GetStartupInfo(jitter, whoami(), jitter.set_str_unic) + + +def kernel32_GetCurrentThreadId(jitter): + ret_ad, _ = jitter.func_args_stdcall(0) + jitter.func_ret_stdcall(ret_ad, 0x113377) + + +def kernel32_InitializeCriticalSection(jitter): + ret_ad, _ = jitter.func_args_stdcall(["lpcritic"]) + jitter.func_ret_stdcall(ret_ad, 0) + + +def user32_GetSystemMetrics(jitter): + ret_ad, args = jitter.func_args_stdcall(["nindex"]) + + ret = 0 + if args.nindex in [0x2a, 0x4a]: + ret = 0 + else: + raise ValueError('unimpl index') + jitter.func_ret_stdcall(ret_ad, ret) + + +def wsock32_WSAStartup(jitter): + ret_ad, args = jitter.func_args_stdcall(["version", "pwsadata"]) + jitter.vm.set_mem(args.pwsadata, "\x01\x01\x02\x02WinSock 2.0\x00") + jitter.func_ret_stdcall(ret_ad, 0) + + +def get_current_filetime(): + """ + Get current filetime + https://msdn.microsoft.com/en-us/library/ms724228 + """ + curtime = winobjs.current_datetime + unixtime = int(time.mktime(curtime.timetuple())) + filetime = (int(unixtime * 1000000 + curtime.microsecond) * 10 + + DATE_1601_TO_1970) + return filetime + + +def unixtime_to_filetime(unixtime): + """ + Convert unixtime to filetime + https://msdn.microsoft.com/en-us/library/ms724228 + """ + return (unixtime * 10000000) + DATE_1601_TO_1970 + + +def filetime_to_unixtime(filetime): + """ + Convert filetime to unixtime + # https://msdn.microsoft.com/en-us/library/ms724228 + """ + return int((filetime - DATE_1601_TO_1970) // 10000000) + + +def datetime_to_systemtime(curtime): + + s = struct.pack('HHHHHHHH', + curtime.year, # year + curtime.month, # month + curtime.weekday(), # dayofweek + curtime.day, # day + curtime.hour, # hour + curtime.minute , # minutes + curtime.second, # seconds + int(curtime.microsecond // 1000), # millisec + ) + return s + + +def kernel32_GetSystemTimeAsFileTime(jitter): + ret_ad, args = jitter.func_args_stdcall(["lpSystemTimeAsFileTime"]) + + current_filetime = get_current_filetime() + filetime = struct.pack('II', + current_filetime & 0xffffffff, + (current_filetime>>32) & 0xffffffff) + + jitter.vm.set_mem(args.lpSystemTimeAsFileTime, filetime) + jitter.func_ret_stdcall(ret_ad, 0) + + +def kernel32_GetLocalTime(jitter): + ret_ad, args = jitter.func_args_stdcall(["lpsystemtime"]) + systemtime = datetime_to_systemtime(winobjs.current_datetime) + jitter.vm.set_mem(args.lpsystemtime, systemtime) + jitter.func_ret_stdcall(ret_ad, args.lpsystemtime) + + +def kernel32_GetSystemTime(jitter): + ret_ad, args = jitter.func_args_stdcall(["lpsystemtime"]) + systemtime = datetime_to_systemtime(winobjs.current_datetime) + jitter.vm.set_mem(args.lpsystemtime, systemtime) + jitter.func_ret_stdcall(ret_ad, args.lpsystemtime) + + +def kernel32_CreateFileMapping(jitter, funcname, get_str): + ret_ad, args = jitter.func_args_stdcall(["hfile", "lpattr", "flprotect", + "dwmaximumsizehigh", + "dwmaximumsizelow", "lpname"]) + + if args.hfile == 0xffffffff: + # Create null mapping + if args.dwmaximumsizehigh: + raise NotImplementedError("Untested case") + hmap = StringIO("\x00" * args.dwmaximumsizelow) + hmap_handle = winobjs.handle_pool.add('filemem', hmap) + + ret = winobjs.handle_pool.add('filemapping', hmap_handle) + else: + if not args.hfile in winobjs.handle_pool: + raise ValueError('unknown handle') + ret = winobjs.handle_pool.add('filemapping', args.hfile) + jitter.func_ret_stdcall(ret_ad, ret) + + +def kernel32_CreateFileMappingA(jitter): + kernel32_CreateFileMapping(jitter, whoami(), jitter.get_str_ansi) + + +def kernel32_CreateFileMappingW(jitter): + kernel32_CreateFileMapping(jitter, whoami(), jitter.get_str_unic) + + +def kernel32_MapViewOfFile(jitter): + ret_ad, args = jitter.func_args_stdcall(["hfile", "flprotect", + "dwfileoffsethigh", + "dwfileoffsetlow", + "length"]) + + if not args.hfile in winobjs.handle_pool: + raise ValueError('unknown handle') + hmap = winobjs.handle_pool[args.hfile] + if not hmap.info in winobjs.handle_pool: + raise ValueError('unknown file handle') + + hfile_o = winobjs.handle_pool[hmap.info] + fd = hfile_o.info + fd.seek((args.dwfileoffsethigh << 32) | args.dwfileoffsetlow) + data = fd.read(args.length) if args.length else fd.read() + length = len(data) + + log.debug('MapViewOfFile len: %x', len(data)) + + if not args.flprotect in ACCESS_DICT: + raise ValueError('unknown access dw!') + + alloc_addr = winobjs.heap.alloc(jitter, len(data)) + jitter.vm.set_mem(alloc_addr, data) + + winobjs.handle_mapped[alloc_addr] = (hfile_o, args.dwfileoffsethigh, + args.dwfileoffsetlow, length) + + jitter.func_ret_stdcall(ret_ad, alloc_addr) + + +def kernel32_UnmapViewOfFile(jitter): + ret_ad, args = jitter.func_args_stdcall(['ad']) + + if not args.ad in winobjs.handle_mapped: + raise NotImplementedError("Untested case") + """ + hfile_o, dwfileoffsethigh, dwfileoffsetlow, length = winobjs.handle_mapped[ad] + off = (dwfileoffsethigh<<32) | dwfileoffsetlow + s = jitter.vm.get_mem(ad, length) + hfile_o.info.seek(off) + hfile_o.info.write(s) + hfile_o.info.close() + """ + jitter.func_ret_stdcall(ret_ad, 1) + + +def kernel32_GetDriveType(jitter, funcname, get_str): + ret_ad, args = jitter.func_args_stdcall(['pathname']) + + p = get_str(args.pathname) + p = p.upper() + + log.debug('Drive: %r', p) + + ret = 0 + if p[0] == "C": + ret = 3 + + jitter.func_ret_stdcall(ret_ad, ret) + + +def kernel32_GetDriveTypeA(jitter): + kernel32_GetDriveType(jitter, whoami(), jitter.get_str_ansi) + + +def kernel32_GetDriveTypeW(jitter): + kernel32_GetDriveType(jitter, whoami(), jitter.get_str_unic) + + +def kernel32_GetDiskFreeSpace(jitter, funcname, get_str): + ret_ad, args = jitter.func_args_stdcall(["lprootpathname", + "lpsectorpercluster", + "lpbytespersector", + "lpnumberoffreeclusters", + "lptotalnumberofclusters"]) + jitter.vm.set_u32(args.lpsectorpercluster, 8) + jitter.vm.set_u32(args.lpbytespersector, 0x200) + jitter.vm.set_u32(args.lpnumberoffreeclusters, 0x222222) + jitter.vm.set_u32(args.lptotalnumberofclusters, 0x333333) + jitter.func_ret_stdcall(ret_ad, 1) + + +def kernel32_GetDiskFreeSpaceA(jitter): + kernel32_GetDiskFreeSpace(jitter, whoami(), jitter.get_str_ansi) + + +def kernel32_GetDiskFreeSpaceW(jitter): + kernel32_GetDiskFreeSpace(jitter, whoami(), jitter.get_str_unic) + + +def kernel32_VirtualQuery(jitter): + ret_ad, args = jitter.func_args_stdcall(["ad", "lpbuffer", "dwl"]) + + all_mem = jitter.vm.get_all_memory() + found = None + for basead, m in viewitems(all_mem): + if basead <= args.ad < basead + m['size']: + found = args.ad, m + break + if not found: + raise ValueError('cannot find mem', hex(args.ad)) + + if args.dwl != 0x1c: + raise ValueError('strange mem len', hex(args.dwl)) + s = struct.pack('IIIIIII', + args.ad, + basead, + ACCESS_DICT_INV[m['access']], + m['size'], + 0x1000, + ACCESS_DICT_INV[m['access']], + 0x01000000) + jitter.vm.set_mem(args.lpbuffer, s) + jitter.func_ret_stdcall(ret_ad, args.dwl) + + +def kernel32_GetProcessAffinityMask(jitter): + ret_ad, args = jitter.func_args_stdcall(["hprocess", + "procaffmask", + "systemaffmask"]) + jitter.vm.set_u32(args.procaffmask, 1) + jitter.vm.set_u32(args.systemaffmask, 1) + jitter.func_ret_stdcall(ret_ad, 1) + + +def msvcrt_rand(jitter): + ret_ad, _ = jitter.func_args_cdecl(0) + jitter.func_ret_stdcall(ret_ad, 0x666) + +def msvcrt_srand(jitter): + ret_ad, _ = jitter.func_args_cdecl(['seed']) + jitter.func_ret_stdcall(ret_ad, 0) + +def msvcrt_wcslen(jitter): + ret_ad, args = jitter.func_args_cdecl(["pwstr"]) + s = jitter.get_str_unic(args.pwstr) + jitter.func_ret_cdecl(ret_ad, len(s)) + +def kernel32_SetFilePointer(jitter): + ret_ad, args = jitter.func_args_stdcall(["hwnd", "dinstance", + "p_dinstance_high", + "movemethod"]) + + if args.hwnd == winobjs.module_cur_hwnd: + pass + elif args.hwnd in winobjs.handle_pool: + pass + else: + raise ValueError('unknown hwnd!') + + # data = None + if args.hwnd in winobjs.files_hwnd: + winobjs.files_hwnd[winobjs.module_cur_hwnd].seek(args.dinstance, args.movemethod) + elif args.hwnd in winobjs.handle_pool: + wh = winobjs.handle_pool[args.hwnd] + wh.info.seek(args.dinstance, args.movemethod) + else: + raise ValueError('unknown filename') + jitter.func_ret_stdcall(ret_ad, args.dinstance) + + +def kernel32_SetFilePointerEx(jitter): + ret_ad, args = jitter.func_args_stdcall(["hwnd", "dinstance_l", + "dinstance_h", + "pnewfileptr", + "movemethod"]) + dinstance = args.dinstance_l | (args.dinstance_h << 32) + if dinstance: + raise ValueError('Not implemented') + if args.pnewfileptr: + raise ValueError('Not implemented') + if args.hwnd == winobjs.module_cur_hwnd: + pass + elif args.hwnd in winobjs.handle_pool: + pass + else: + raise ValueError('unknown hwnd!') + + # data = None + if args.hwnd in winobjs.files_hwnd: + winobjs.files_hwnd[winobjs.module_cur_hwnd].seek(dinstance, args.movemethod) + elif args.hwnd in winobjs.handle_pool: + wh = winobjs.handle_pool[args.hwnd] + wh.info.seek(dinstance, args.movemethod) + else: + raise ValueError('unknown filename') + jitter.func_ret_stdcall(ret_ad, 1) + + +def kernel32_SetEndOfFile(jitter): + ret_ad, args = jitter.func_args_stdcall(['hwnd']) + if args.hwnd in winobjs.handle_pool: + wh = winobjs.handle_pool[args.hwnd] + wh.info.seek(0, 2) + else: + raise ValueError('unknown filename') + jitter.func_ret_stdcall(ret_ad, 1) + + +def kernel32_FlushFileBuffers(jitter): + ret_ad, args = jitter.func_args_stdcall(['hwnd']) + if args.hwnd in winobjs.handle_pool: + pass + else: + raise ValueError('unknown filename') + jitter.func_ret_stdcall(ret_ad, 1) + + +def kernel32_WriteFile(jitter): + ret_ad, args = jitter.func_args_stdcall(["hwnd", "lpbuffer", + "nnumberofbytestowrite", + "lpnumberofbyteswrite", + "lpoverlapped"]) + data = jitter.vm.get_mem(args.lpbuffer, args.nnumberofbytestowrite) + + if args.hwnd == winobjs.module_cur_hwnd: + pass + elif args.hwnd in winobjs.handle_pool: + pass + else: + raise ValueError('unknown hwnd!') + + if args.hwnd in winobjs.files_hwnd: + winobjs.files_hwnd[winobjs.module_cur_hwnd].write(data) + elif args.hwnd in winobjs.handle_pool: + wh = winobjs.handle_pool[args.hwnd] + wh.info.write(data) + else: + raise ValueError('unknown filename') + + if (args.lpnumberofbyteswrite): + jitter.vm.set_u32(args.lpnumberofbyteswrite, len(data)) + + jitter.func_ret_stdcall(ret_ad, 1) + + +def user32_IsCharUpperA(jitter): + ret_ad, args = jitter.func_args_stdcall(["c"]) + ret = 0 if args.c & 0x20 else 1 + jitter.func_ret_stdcall(ret_ad, ret) + + +def user32_IsCharLowerA(jitter): + ret_ad, args = jitter.func_args_stdcall(["c"]) + ret = 1 if args.c & 0x20 else 0 + jitter.func_ret_stdcall(ret_ad, ret) + + +def kernel32_GetSystemDefaultLangID(jitter): + ret_ad, _ = jitter.func_args_stdcall(0) + jitter.func_ret_stdcall(ret_ad, 0x409) # encglish + + +def msvcrt_malloc(jitter): + ret_ad, args = jitter.func_args_cdecl(["msize"]) + addr = winobjs.heap.alloc(jitter, args.msize) + jitter.func_ret_cdecl(ret_ad, addr) + + +def msvcrt_free(jitter): + ret_ad, _ = jitter.func_args_cdecl(["ptr"]) + jitter.func_ret_cdecl(ret_ad, 0) + + +def msvcrt_fseek(jitter): + ret_ad, args = jitter.func_args_cdecl(['stream', 'offset', 'orig']) + fd = jitter.vm.get_u32(args.stream + 0x10) + + if not fd in winobjs.handle_pool: + raise NotImplementedError("Untested case") + o = winobjs.handle_pool[fd] + o.info.seek(args.offset, args.orig) + jitter.func_ret_cdecl(ret_ad, 0) + + +def msvcrt_ftell(jitter): + ret_ad, args = jitter.func_args_cdecl(["stream"]) + fd = jitter.vm.get_u32(args.stream + 0x10) + + if not fd in winobjs.handle_pool: + raise NotImplementedError("Untested case") + o = winobjs.handle_pool[fd] + off = o.info.tell() + jitter.func_ret_cdecl(ret_ad, off) + + +def msvcrt_rewind(jitter): + ret_ad, args = jitter.func_args_cdecl(["stream"]) + fd = jitter.vm.get_u32(args.stream + 0x10) + if not fd in winobjs.handle_pool: + raise NotImplementedError("Untested case") + o = winobjs.handle_pool[fd] + # off = o.info.seek(0, 0) + jitter.func_ret_cdecl(ret_ad, 0) + + +def msvcrt_fread(jitter): + ret_ad, args = jitter.func_args_cdecl(["buf", "size", "nmemb", "stream"]) + fd = jitter.vm.get_u32(args.stream + 0x10) + if not fd in winobjs.handle_pool: + raise NotImplementedError("Untested case") + + data = winobjs.handle_pool[fd].info.read(args.size * args.nmemb) + jitter.vm.set_mem(args.buf, data) + jitter.func_ret_cdecl(ret_ad, args.nmemb) + + +def msvcrt_fwrite(jitter): + ret_ad, args = jitter.func_args_cdecl(["buf", "size", "nmemb", "stream"]) + fd = jitter.vm.get_u32(args.stream + 0x10) + if not fd in winobjs.handle_pool: + raise NotImplementedError("Unknown file handle!") + + data = jitter.vm.get_mem(args.buf, args.size*args.nmemb) + winobjs.handle_pool[fd].info.write(data) + jitter.func_ret_cdecl(ret_ad, args.nmemb) + + +def msvcrt_fclose(jitter): + ret_ad, args = jitter.func_args_cdecl(['stream']) + fd = jitter.vm.get_u32(args.stream + 0x10) + + if not fd in winobjs.handle_pool: + raise NotImplementedError("Untested case") + o = winobjs.handle_pool[fd] + # off = o.info.close() + jitter.func_ret_cdecl(ret_ad, 0) + + +def msvcrt_atexit(jitter): + ret_ad, _ = jitter.func_args_cdecl(["func"]) + jitter.func_ret_cdecl(ret_ad, 0) + + +def user32_MessageBoxA(jitter): + ret_ad, args = jitter.func_args_stdcall(["hwnd", "lptext", + "lpcaption", "utype"]) + + text = jitter.get_str_ansi(args.lptext) + caption = jitter.get_str_ansi(args.lpcaption) + + log.info('Caption: %r Text: %r', caption, text) + + jitter.func_ret_stdcall(ret_ad, 0) + + +def kernel32_myGetTempPath(jitter, set_str): + ret_ad, args = jitter.func_args_stdcall(["l", "buf"]) + l = 'c:\\temp\\' + if len(l) < args.l: + set_str(args.buf, l) + jitter.func_ret_stdcall(ret_ad, len(l)) + + +def kernel32_GetTempPathA(jitter): + kernel32_myGetTempPath(jitter, jitter.set_str_ansi) + + +def kernel32_GetTempPathW(jitter): + kernel32_myGetTempPath(jitter, jitter.set_str_unic) + + +temp_num = 0 + + +def kernel32_GetTempFileNameA(jitter): + global temp_num + ret_ad, args = jitter.func_args_stdcall(["path", "ext", "unique", "buf"]) + + temp_num += 1 + ext = jitter.get_str_ansi(args.ext) if args.ext else 'tmp' + path = jitter.get_str_ansi(args.path) if args.path else "xxx" + fname = path + "\\" + "temp%.4d" % temp_num + "." + ext + jitter.vm.set_mem(args.buf, fname) + + jitter.func_ret_stdcall(ret_ad, 0) + + +class win32_find_data(object): + fileattrib = 0 + creationtime = 0 + lastaccesstime = 0 + lastwritetime = 0 + filesizehigh = 0 + filesizelow = 0 + dwreserved0 = 0 + dwreserved1 = 0x1337beef + cfilename = "" + alternamefilename = "" + + def __init__(self, **kargs): + for k, v in viewitems(kargs): + setattr(self, k, v) + + def toStruct(self): + s = struct.pack('=IQQQIIII', + self.fileattrib, + self.creationtime, + self.lastaccesstime, + self.lastwritetime, + self.filesizehigh, + self.filesizelow, + self.dwreserved0, + self.dwreserved1) + fname = self.cfilename + '\x00' * MAX_PATH + fname = fname[:MAX_PATH] + s += fname + fname = self.alternamefilename + '\x00' * 14 + fname = fname[:14] + s += fname + return s + + +class find_data_mngr(object): + + def __init__(self): + self.patterns = {} + self.flist = [] + # handle number -> (flist index, current index in list) + self.handles = {} + + def add_list(self, pattern, flist): + index = len(self.flist) + self.flist.append(flist) + + self.patterns[pattern] = index + + def findfirst(self, pattern): + assert(pattern in self.patterns) + findex = self.patterns[pattern] + h = len(self.handles) + 1 + self.handles[h] = [findex, 0] + return h + + def findnext(self, h): + assert(h in self.handles) + findex, index = self.handles[h] + if index >= len(self.flist[findex]): + return None + fname = self.flist[findex][index] + self.handles[h][1] += 1 + + return fname + + +def kernel32_FindFirstFileA(jitter): + ret_ad, args = jitter.func_args_stdcall(["pfilepattern", "pfindfiledata"]) + + filepattern = jitter.get_str_ansi(args.pfilepattern) + h = winobjs.find_data.findfirst(filepattern) + + fname = winobjs.find_data.findnext(h) + fdata = win32_find_data(cfilename=fname) + + jitter.vm.set_mem(args.pfindfiledata, fdata.toStruct()) + jitter.func_ret_stdcall(ret_ad, h) + + +def kernel32_FindNextFileA(jitter): + ret_ad, args = jitter.func_args_stdcall(["handle", "pfindfiledata"]) + + fname = winobjs.find_data.findnext(args.handle) + if fname is None: + ret = 0 + else: + ret = 1 + fdata = win32_find_data(cfilename=fname) + jitter.vm.set_mem(args.pfindfiledata, fdata.toStruct()) + + jitter.func_ret_stdcall(ret_ad, ret) + + +def kernel32_GetNativeSystemInfo(jitter): + ret_ad, args = jitter.func_args_stdcall(["sys_ptr"]) + sysinfo = systeminfo() + jitter.vm.set_mem(args.sys_ptr, sysinfo.pack()) + jitter.func_ret_stdcall(ret_ad, 0) + + +def raw2guid(r): + o = struct.unpack('IHHHBBBBBB', r) + return '{%.8X-%.4X-%.4X-%.4X-%.2X%.2X%.2X%.2X%.2X%.2X}' % o + + +digs = string.digits + string.ascii_lowercase + + +def int2base(x, base): + if x < 0: + sign = -1 + elif x == 0: + return '0' + else: + sign = 1 + x *= sign + digits = [] + while x: + digits.append(digs[x % base]) + x /= base + if sign < 0: + digits.append('-') + digits.reverse() + return ''.join(digits) + + +def msvcrt__ultow(jitter): + ret_ad, args = jitter.func_args_cdecl(["value", "p", "radix"]) + + value = args.value & 0xFFFFFFFF + if not args.radix in [10, 16, 20]: + raise ValueError("Not tested") + s = int2base(value, args.radix) + jitter.vm.set_mem(args.p, jitter.set_str_unic(s + "\x00")) + jitter.func_ret_cdecl(ret_ad, args.p) + + +def msvcrt_myfopen(jitter, get_str): + ret_ad, args = jitter.func_args_cdecl(["pfname", "pmode"]) + + fname = get_str(args.pfname) + rw = get_str(args.pmode) + log.info("fopen %r, %r", fname, rw) + + if rw in ['r', 'rb', 'wb+','wb','wt']: + sb_fname = windows_to_sbpath(fname) + h = open(sb_fname, rw) + eax = winobjs.handle_pool.add(sb_fname, h) + dwsize = 0x20 + alloc_addr = winobjs.heap.alloc(jitter, dwsize) + pp = pck32(0x11112222) + pck32(0) + pck32(0) + pck32(0) + pck32(eax) + jitter.vm.set_mem(alloc_addr, pp) + + else: + raise ValueError('unknown access mode %s' % rw) + + jitter.func_ret_cdecl(ret_ad, alloc_addr) + + +def msvcrt__wfopen(jitter): + msvcrt_myfopen(jitter, jitter.get_str_unic) + + +def msvcrt_fopen(jitter): + msvcrt_myfopen(jitter, jitter.get_str_ansi) + + +def msvcrt_strlen(jitter): + ret_ad, args = jitter.func_args_cdecl(["src"]) + + s = jitter.get_str_ansi(args.src) + jitter.func_ret_cdecl(ret_ad, len(s)) diff --git a/miasm/os_dep/win_api_x86_32_seh.py b/miasm/os_dep/win_api_x86_32_seh.py new file mode 100644 index 00000000..90a68eec --- /dev/null +++ b/miasm/os_dep/win_api_x86_32_seh.py @@ -0,0 +1,695 @@ +#-*- coding:utf-8 -*- + +# +# Copyright (C) 2011 EADS France, Fabrice Desclaux +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +import logging +import os +import struct + +from future.utils import viewitems + +from elfesteem import pe_init + +from miasm.jitter.csts import PAGE_READ, PAGE_WRITE +from miasm.core.utils import pck32 +import miasm.arch.x86.regs as x86_regs + +from miasm.os_dep.win_32_structs import LdrDataEntry, ListEntry, \ + TEB, NT_TIB, PEB, PEB_LDR_DATA, ContextException, \ + EXCEPTION_REGISTRATION_RECORD, EXCEPTION_RECORD + +# Constants Windows +EXCEPTION_BREAKPOINT = 0x80000003 +EXCEPTION_ACCESS_VIOLATION = 0xc0000005 +EXCEPTION_INT_DIVIDE_BY_ZERO = 0xc0000094 +EXCEPTION_PRIV_INSTRUCTION = 0xc0000096 +EXCEPTION_ILLEGAL_INSTRUCTION = 0xc000001d + + +log = logging.getLogger("seh_helper") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.INFO) + +# fs:[0] Page (TIB) +tib_address = 0x7ff70000 +PEB_AD = 0x7ffdf000 +LDR_AD = 0x340000 +DEFAULT_SEH = 0x7ffff000 + +MAX_MODULES = 0x40 + +peb_address = PEB_AD +peb_ldr_data_offset = 0x1ea0 +peb_ldr_data_address = LDR_AD + peb_ldr_data_offset + + +modules_list_offset = 0x1f00 + +InInitializationOrderModuleList_offset = 0x1ee0 +InInitializationOrderModuleList_address = LDR_AD + \ + InInitializationOrderModuleList_offset + +InLoadOrderModuleList_offset = 0x1ee0 + \ + MAX_MODULES * 0x1000 +InLoadOrderModuleList_address = LDR_AD + \ + InLoadOrderModuleList_offset + + +process_environment_address = 0x10000 +process_parameters_address = 0x200000 + +return_from_exception = 0x6eadbeef + + +name2module = [] +main_pe = None +main_pe_name = b"c:\\xxx\\toto.exe" + +MAX_SEH = 5 + + +def build_teb(jitter, teb_address): + """ + Build TEB information using following structure: + + @jitter: jitter instance + @teb_address: the TEB address + """ + + # Only allocate space for ExceptionList/ProcessEnvironmentBlock/Self + jitter.vm.add_memory_page( + teb_address, + PAGE_READ | PAGE_WRITE, + b"\x00" * NT_TIB.get_offset("StackBase"), + "TEB.NtTib.ExceptionList" + ) + jitter.vm.add_memory_page( + teb_address + NT_TIB.get_offset("Self"), + PAGE_READ | PAGE_WRITE, + b"\x00" * (NT_TIB.sizeof() - NT_TIB.get_offset("Self")), + "TEB.NtTib.Self" + ) + jitter.vm.add_memory_page( + teb_address + TEB.get_offset("ProcessEnvironmentBlock"), + PAGE_READ | PAGE_WRITE, + b"\x00" * ( + TEB.get_offset("LastErrorValue") - + TEB.get_offset("ProcessEnvironmentBlock") + ), + "TEB.ProcessEnvironmentBlock" + ) + Teb = TEB(jitter.vm, teb_address) + Teb.NtTib.ExceptionList = DEFAULT_SEH + Teb.NtTib.Self = teb_address + Teb.ProcessEnvironmentBlock = peb_address + +def build_peb(jitter, peb_address): + """ + Build PEB information using following structure: + + @jitter: jitter instance + @peb_address: the PEB address + """ + + if main_pe: + offset, length = peb_address + 8, 4 + else: + offset, length = peb_address + 0xC, 0 + length += 4 + + jitter.vm.add_memory_page( + offset, + PAGE_READ | PAGE_WRITE, + b"\x00" * length, + "PEB" + ) + + Peb = PEB(jitter.vm, peb_address) + if main_pe: + Peb.ImageBaseAddress = main_pe.NThdr.ImageBase + Peb.Ldr = peb_ldr_data_address + + +def build_ldr_data(jitter, modules_info): + """ + Build Loader information using following structure: + + +0x000 Length : Uint4B + +0x004 Initialized : UChar + +0x008 SsHandle : Ptr32 Void + +0x00c InLoadOrderModuleList : _LIST_ENTRY + +0x014 InMemoryOrderModuleList : _LIST_ENTRY + +0x01C InInitializationOrderModuleList : _LIST_ENTRY + # dummy dll base + +0x024 DllBase : Ptr32 Void + + @jitter: jitter instance + @modules_info: LoadedModules instance + + """ + # ldr offset pad + offset = 0xC + addr = LDR_AD + peb_ldr_data_offset + ldrdata = PEB_LDR_DATA(jitter.vm, addr) + + main_pe = modules_info.name2module.get(main_pe_name, None) + ntdll_pe = modules_info.name2module.get("ntdll.dll", None) + + + size = 0 + if main_pe: + size += ListEntry.sizeof() * 2 + main_addr_entry = modules_info.module2entry[main_pe] + if ntdll_pe: + size += ListEntry.sizeof() + ntdll_addr_entry = modules_info.module2entry[ntdll_pe] + + jitter.vm.add_memory_page( + addr + offset, + PAGE_READ | PAGE_WRITE, + b"\x00" * size, + "Loader struct" + ) # (ldrdata.get_size() - offset)) + + if main_pe: + ldrdata.InLoadOrderModuleList.flink = main_addr_entry + ldrdata.InLoadOrderModuleList.blink = 0 + + ldrdata.InMemoryOrderModuleList.flink = main_addr_entry + \ + LdrDataEntry.get_type().get_offset("InMemoryOrderLinks") + ldrdata.InMemoryOrderModuleList.blink = 0 + + if ntdll_pe: + ldrdata.InInitializationOrderModuleList.flink = ntdll_addr_entry + \ + LdrDataEntry.get_type().get_offset("InInitializationOrderLinks") + ldrdata.InInitializationOrderModuleList.blink = 0 + + # Add dummy dll base + jitter.vm.add_memory_page(peb_ldr_data_address + 0x24, + PAGE_READ | PAGE_WRITE, pck32(0), + "Loader struct dummy dllbase") + + +class LoadedModules(object): + + """Class representing modules in memory""" + + def __init__(self): + self.modules = [] + self.name2module = {} + self.module2entry = {} + self.module2name = {} + + def add(self, name, module, module_entry): + """Track a new module + @name: module name (with extension) + @module: module object + @module_entry: address of the module entry + """ + + self.modules.append(module) + self.name2module[name] = module + self.module2entry[module] = module_entry + self.module2name[module] = name + + def __repr__(self): + return "\n".join(str(x) for x in viewitems(self.name2module)) + + +def create_modules_chain(jitter, name2module): + """ + Create the modules entries. Those modules are not linked in this function. + + @jitter: jitter instance + @name2module: dict containing association between name and its pe instance + """ + + modules_info = LoadedModules() + base_addr = LDR_AD + modules_list_offset # XXXX + offset_name = 0x500 + offset_path = 0x600 + + out = "" + for i, (fname, pe_obj) in enumerate(viewitems(name2module), 1): + if pe_obj is None: + log.warning("Unknown module: omitted from link list (%r)", + fname) + continue + addr = base_addr + i * 0x1000 + bpath = fname.replace('/', '\\') + bname_str = os.path.split(fname)[1].lower() + bname_unicode = bname_str.encode("utf-16le") + log.info("Add module %x %r", pe_obj.NThdr.ImageBase, bname_str) + + modules_info.add(bname_str, pe_obj, addr) + + # Allocate a partial LdrDataEntry (0-Flags) + jitter.vm.add_memory_page( + addr, + PAGE_READ | PAGE_WRITE, + b"\x00" * LdrDataEntry.get_offset("Flags"), + "Module info %r" % bname_str + ) + + LdrEntry = LdrDataEntry(jitter.vm, addr) + + LdrEntry.DllBase = pe_obj.NThdr.ImageBase + LdrEntry.EntryPoint = pe_obj.Opthdr.AddressOfEntryPoint + LdrEntry.SizeOfImage = pe_obj.NThdr.sizeofimage + LdrEntry.FullDllName.length = len(bname_unicode) + LdrEntry.FullDllName.maxlength = len(bname_unicode) + 2 + LdrEntry.FullDllName.data = addr + offset_path + LdrEntry.BaseDllName.length = len(bname_unicode) + LdrEntry.BaseDllName.maxlength = len(bname_unicode) + 2 + LdrEntry.BaseDllName.data = addr + offset_name + + jitter.vm.add_memory_page( + addr + offset_name, + PAGE_READ | PAGE_WRITE, + bname_unicode + b"\x00" * 2, + "Module name %r" % bname_str + ) + + bpath_unicode = bpath.encode('utf-16le') + jitter.vm.add_memory_page( + addr + offset_path, + PAGE_READ | PAGE_WRITE, + bpath_unicode + b"\x00" * 2, + "Module path %r" % bname_str + ) + + return modules_info + + +def set_link_list_entry(jitter, loaded_modules, modules_info, offset): + for i, module in enumerate(loaded_modules): + cur_module_entry = modules_info.module2entry[module] + prev_module = loaded_modules[(i - 1) % len(loaded_modules)] + next_module = loaded_modules[(i + 1) % len(loaded_modules)] + prev_module_entry = modules_info.module2entry[prev_module] + next_module_entry = modules_info.module2entry[next_module] + if i == 0: + prev_module_entry = peb_ldr_data_address + 0xC + if i == len(loaded_modules) - 1: + next_module_entry = peb_ldr_data_address + 0xC + jitter.vm.set_mem(cur_module_entry + offset, + (pck32(next_module_entry + offset) + + pck32(prev_module_entry + offset))) + + +def fix_InLoadOrderModuleList(jitter, modules_info): + """Fix InLoadOrderModuleList double link list. First module is the main pe, + then ntdll, kernel32. + + @jitter: the jitter instance + @modules_info: the LoadedModules instance + """ + + log.debug("Fix InLoadOrderModuleList") + main_pe = modules_info.name2module.get(main_pe_name, None) + kernel32_pe = modules_info.name2module.get("kernel32.dll", None) + ntdll_pe = modules_info.name2module.get("ntdll.dll", None) + special_modules = [main_pe, kernel32_pe, ntdll_pe] + if not all(special_modules): + log.warn( + 'No main pe, ldr data will be unconsistant %r', special_modules) + loaded_modules = modules_info.modules + else: + loaded_modules = [module for module in modules_info.modules + if module not in special_modules] + loaded_modules[0:0] = [main_pe] + loaded_modules[1:1] = [ntdll_pe] + loaded_modules[2:2] = [kernel32_pe] + + set_link_list_entry(jitter, loaded_modules, modules_info, 0x0) + + +def fix_InMemoryOrderModuleList(jitter, modules_info): + """Fix InMemoryOrderLinks double link list. First module is the main pe, + then ntdll, kernel32. + + @jitter: the jitter instance + @modules_info: the LoadedModules instance + """ + + log.debug("Fix InMemoryOrderModuleList") + main_pe = modules_info.name2module.get(main_pe_name, None) + kernel32_pe = modules_info.name2module.get("kernel32.dll", None) + ntdll_pe = modules_info.name2module.get("ntdll.dll", None) + special_modules = [main_pe, kernel32_pe, ntdll_pe] + if not all(special_modules): + log.warn('No main pe, ldr data will be unconsistant') + loaded_modules = modules_info.modules + else: + loaded_modules = [module for module in modules_info.modules + if module not in special_modules] + loaded_modules[0:0] = [main_pe] + loaded_modules[1:1] = [ntdll_pe] + loaded_modules[2:2] = [kernel32_pe] + + set_link_list_entry(jitter, loaded_modules, modules_info, 0x8) + + +def fix_InInitializationOrderModuleList(jitter, modules_info): + """Fix InInitializationOrderModuleList double link list. First module is the + ntdll, then kernel32. + + @jitter: the jitter instance + @modules_info: the LoadedModules instance + + """ + + log.debug("Fix InInitializationOrderModuleList") + main_pe = modules_info.name2module.get(main_pe_name, None) + kernel32_pe = modules_info.name2module.get("kernel32.dll", None) + ntdll_pe = modules_info.name2module.get("ntdll.dll", None) + special_modules = [main_pe, kernel32_pe, ntdll_pe] + if not all(special_modules): + log.warn('No main pe, ldr data will be unconsistant') + loaded_modules = modules_info.modules + else: + loaded_modules = [module for module in modules_info.modules + if module not in special_modules] + loaded_modules[0:0] = [ntdll_pe] + loaded_modules[1:1] = [kernel32_pe] + + set_link_list_entry(jitter, loaded_modules, modules_info, 0x10) + + +def add_process_env(jitter): + """ + Build a process environment structure + @jitter: jitter instance + """ + + env_unicode = 'ALLUSEESPROFILE=C:\\Documents and Settings\\All Users\x00'.encode('utf-16le') + env_unicode += b"\x00" * 0x10 + jitter.vm.add_memory_page( + process_environment_address, + PAGE_READ | PAGE_WRITE, + env_unicode, + "Process environment" + ) + jitter.vm.set_mem(process_environment_address, env_unicode) + + +def add_process_parameters(jitter): + """ + Build a process parameters structure + @jitter: jitter instance + """ + + o = b"" + o += pck32(0x1000) # size + o += b"E" * (0x48 - len(o)) + o += pck32(process_environment_address) + jitter.vm.add_memory_page( + process_parameters_address, + PAGE_READ | PAGE_WRITE, + o, "Process parameters" + ) + + +# http://blog.fireeye.com/research/2010/08/download_exec_notes.html +seh_count = 0 + + +def init_seh(jitter): + """ + Build the modules entries and create double links + @jitter: jitter instance + """ + + global seh_count + seh_count = 0 + tib_ad = jitter.cpu.get_segm_base(jitter.cpu.FS) + build_teb(jitter, tib_ad) + build_peb(jitter, peb_address) + + modules_info = create_modules_chain(jitter, name2module) + fix_InLoadOrderModuleList(jitter, modules_info) + fix_InMemoryOrderModuleList(jitter, modules_info) + fix_InInitializationOrderModuleList(jitter, modules_info) + + build_ldr_data(jitter, modules_info) + add_process_env(jitter) + add_process_parameters(jitter) + + + +def regs2ctxt(jitter, context_address): + """ + Build x86_32 cpu context for exception handling + @jitter: jitload instance + """ + + ctxt = ContextException(jitter.vm, context_address) + ctxt.memset(b"\x00") + # ContextFlags + # XXX + + # DRX + ctxt.dr0 = 0 + ctxt.dr1 = 0 + ctxt.dr2 = 0 + ctxt.dr3 = 0 + ctxt.dr4 = 0 + ctxt.dr5 = 0 + + # Float context + # XXX + + # Segment selectors + ctxt.gs = jitter.cpu.GS + ctxt.fs = jitter.cpu.FS + ctxt.es = jitter.cpu.ES + ctxt.ds = jitter.cpu.DS + + # Gpregs + ctxt.edi = jitter.cpu.EDI + ctxt.esi = jitter.cpu.ESI + ctxt.ebx = jitter.cpu.EBX + ctxt.edx = jitter.cpu.EDX + ctxt.ecx = jitter.cpu.ECX + ctxt.eax = jitter.cpu.EAX + ctxt.ebp = jitter.cpu.EBP + ctxt.eip = jitter.cpu.EIP + + # CS + ctxt.cs = jitter.cpu.CS + + # Eflags + # XXX TODO real eflag + + # ESP + ctxt.esp = jitter.cpu.ESP + + # SS + ctxt.ss = jitter.cpu.SS + + +def ctxt2regs(jitter, ctxt_ptr): + """ + Restore x86_32 registers from an exception context + @ctxt: the serialized context + @jitter: jitload instance + """ + + ctxt = ContextException(jitter.vm, ctxt_ptr) + + # Selectors + jitter.cpu.GS = ctxt.gs + jitter.cpu.FS = ctxt.fs + jitter.cpu.ES = ctxt.es + jitter.cpu.DS = ctxt.ds + + # Gpregs + jitter.cpu.EDI = ctxt.edi + jitter.cpu.ESI = ctxt.esi + jitter.cpu.EBX = ctxt.ebx + jitter.cpu.EDX = ctxt.edx + jitter.cpu.ECX = ctxt.ecx + jitter.cpu.EAX = ctxt.eax + jitter.cpu.EBP = ctxt.ebp + jitter.cpu.EIP = ctxt.eip + + # CS + jitter.cpu.CS = ctxt.cs + + # Eflag + # XXX TODO + + # ESP + jitter.cpu.ESP = ctxt.esp + # SS + jitter.cpu.SS = ctxt.ss + + +def fake_seh_handler(jitter, except_code, previous_seh=None): + """ + Create an exception context + @jitter: jitter instance + @except_code: x86 exception code + @previous_seh: (optional) last SEH address when multiple SEH are used + """ + global seh_count + log.warning('Exception at %x %r', jitter.cpu.EIP, seh_count) + seh_count += 1 + + # Get space on stack for exception handling + new_ESP = jitter.cpu.ESP - 0x3c8 + exception_base_address = new_ESP + exception_record_address = exception_base_address + 0xe8 + context_address = exception_base_address + 0xfc + fake_seh_address = exception_base_address + 0x14 + + # Save a CONTEXT + regs2ctxt(jitter, context_address) + jitter.cpu.ESP = new_ESP + + # Get current seh (fs:[0]) + tib = NT_TIB(jitter.vm, tib_address) + seh = tib.ExceptionList.deref + if previous_seh: + # Recursive SEH + while seh.get_addr() != previous_seh: + seh = seh.Next.deref + seh = seh.Next.deref + + log.info( + 'seh_ptr %x { old_seh %r eh %r} ctx_addr %x', + seh.get_addr(), + seh.Next, + seh.Handler, + context_address + ) + + # Write exception_record + except_record = EXCEPTION_RECORD(jitter.vm, exception_record_address) + except_record.memset(b"\x00") + except_record.ExceptionCode = except_code + except_record.ExceptionAddress = jitter.cpu.EIP + + # Prepare the stack + jitter.push_uint32_t(context_address) # Context + jitter.push_uint32_t(seh.get_addr()) # SEH + jitter.push_uint32_t(except_record.get_addr()) # ExceptRecords + jitter.push_uint32_t(return_from_exception) # Ret address + + # Set fake new current seh for exception + log.info("Fake seh ad %x", fake_seh_address) + fake_seh = EXCEPTION_REGISTRATION_RECORD(jitter.vm, fake_seh_address) + fake_seh.Next.val = tib.ExceptionList.val + fake_seh.Handler = 0xaaaaaaaa + tib.ExceptionList.val = fake_seh.get_addr() + dump_seh(jitter) + + # Remove exceptions + jitter.vm.set_exception(0) + jitter.cpu.set_exception(0) + + # XXX set ebx to nul? + jitter.cpu.EBX = 0 + + log.info('Jumping at %r', seh.Handler) + return seh.Handler.val + + +def dump_seh(jitter): + """ + Walk and dump the SEH entries + @jitter: jitter instance + """ + log.info('Dump_seh. Tib_address: %x', tib_address) + cur_seh_ptr = NT_TIB(jitter.vm, tib_address).ExceptionList + loop = 0 + while cur_seh_ptr and jitter.vm.is_mapped(cur_seh_ptr.val, + len(cur_seh_ptr)): + if loop > MAX_SEH: + log.warn("Too many seh, quit") + return + err = cur_seh_ptr.deref + log.info('\t' * (loop + 1) + 'seh_ptr: %x { prev_seh: %r eh %r }', + err.get_addr(), err.Next, err.Handler) + cur_seh_ptr = err.Next + loop += 1 + + +def set_win_fs_0(jitter, fs=4): + """ + Set FS segment selector and create its corresponding segment + @jitter: jitter instance + @fs: segment selector value + """ + jitter.cpu.FS = fs + jitter.cpu.set_segm_base(fs, tib_address) + segm_to_do = set([x86_regs.FS]) + return segm_to_do + + +def return_from_seh(jitter): + """Handle the return from an exception handler + @jitter: jitter instance""" + + # Get object addresses + seh_address = jitter.vm.get_u32(jitter.cpu.ESP + 0x4) + context_address = jitter.vm.get_u32(jitter.cpu.ESP + 0x8) + + # Get registers changes + log.info('Context address: %x', context_address) + status = jitter.cpu.EAX + ctxt2regs(jitter, context_address) + + # Rebuild SEH (remove fake SEH) + tib = NT_TIB(jitter.vm, tib_address) + seh = tib.ExceptionList.deref + log.info('Old seh: %x New seh: %x', seh.get_addr(), seh.Next.val) + tib.ExceptionList.val = seh.Next.val + dump_seh(jitter) + + # Handle returned values + if status == 0x0: + # ExceptionContinueExecution + log.info('SEH continue') + jitter.pc = jitter.cpu.EIP + log.info('Context::Eip: %x', jitter.pc) + + elif status == 1: + # ExceptionContinueSearch + log.info("Delegate to the next SEH handler") + # exception_base_address: context_address - 0xfc + # -> exception_record_address: exception_base_address + 0xe8 + exception_record = EXCEPTION_RECORD(jitter.vm, + context_address - 0xfc + 0xe8) + + pc = fake_seh_handler(jitter, exception_record.ExceptionCode, + seh_address) + jitter.pc = pc + + else: + # https://msdn.microsoft.com/en-us/library/aa260344%28v=vs.60%29.aspx + # But the type _EXCEPTION_DISPOSITION may take 2 others values: + # - ExceptionNestedException = 2 + # - ExceptionCollidedUnwind = 3 + raise ValueError("Valid values are ExceptionContinueExecution and " + "ExceptionContinueSearch") + + # Jitter's breakpoint compliant + return True diff --git a/miasm2/__init__.py b/miasm2/__init__.py deleted file mode 100644 index b7dbe3b4..00000000 --- a/miasm2/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"Reverse engineering framework in Python" diff --git a/miasm2/analysis/__init__.py b/miasm2/analysis/__init__.py deleted file mode 100644 index 5abdd3a3..00000000 --- a/miasm2/analysis/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"High-level tools for binary analysis" diff --git a/miasm2/analysis/binary.py b/miasm2/analysis/binary.py deleted file mode 100644 index ee733d79..00000000 --- a/miasm2/analysis/binary.py +++ /dev/null @@ -1,236 +0,0 @@ -import logging -import warnings - -from miasm2.core.bin_stream import bin_stream_str, bin_stream_elf, bin_stream_pe -from miasm2.jitter.csts import PAGE_READ -from miasm2.core.locationdb import LocationDB - - -log = logging.getLogger("binary") -console_handler = logging.StreamHandler() -console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) -log.addHandler(console_handler) -log.setLevel(logging.ERROR) - - -# Container -## Exceptions -class ContainerSignatureException(Exception): - "The container does not match the current container signature" - - -class ContainerParsingException(Exception): - "Error during container parsing" - - -## Parent class -class Container(object): - """Container abstraction layer - - This class aims to offer a common interface for abstracting container - such as PE or ELF. - """ - - available_container = [] # Available container formats - fallback_container = None # Fallback container format - - @classmethod - def from_string(cls, data, *args, **kwargs): - """Instantiate a container and parse the binary - @data: str containing the binary - """ - log.info('Load binary') - # Try each available format - for container_type in cls.available_container: - try: - return container_type(data, *args, **kwargs) - except ContainerSignatureException: - continue - except ContainerParsingException as error: - log.error(error) - - # Fallback mode - log.warning('Fallback to string input') - return cls.fallback_container(data, *args, **kwargs) - - @classmethod - def register_container(cls, container): - "Add a Container format" - cls.available_container.append(container) - - @classmethod - def register_fallback(cls, container): - "Set the Container fallback format" - cls.fallback_container = container - - @classmethod - def from_stream(cls, stream, *args, **kwargs): - """Instantiate a container and parse the binary - @stream: stream to use as binary - @vm: (optional) VmMngr instance to link with the executable - @addr: (optional) Base address of the parsed binary. If set, - force the unknown format - """ - return Container.from_string(stream.read(), *args, **kwargs) - - def parse(self, data, *args, **kwargs): - """Launch parsing of @data - @data: str containing the binary - """ - raise NotImplementedError("Abstract method") - - def __init__(self, data, loc_db=None, **kwargs): - "Alias for 'parse'" - # Init attributes - self._executable = None - self._bin_stream = None - self._entry_point = None - self._arch = None - if loc_db is None: - self._loc_db = LocationDB() - else: - self._loc_db = loc_db - - # Launch parsing - self.parse(data, **kwargs) - - @property - def bin_stream(self): - "Return the BinStream instance corresponding to container content" - return self._bin_stream - - @property - def executable(self): - "Return the abstract instance standing for parsed executable" - return self._executable - - @property - def entry_point(self): - "Return the detected entry_point" - return self._entry_point - - @property - def arch(self): - "Return the guessed architecture" - return self._arch - - @property - def loc_db(self): - "LocationDB instance preloaded with container symbols (if any)" - return self._loc_db - - @property - def symbol_pool(self): - "[DEPRECATED API]" - warnings.warn("Deprecated API: use 'loc_db'") - return self.loc_db - -## Format dependent classes -class ContainerPE(Container): - "Container abstraction for PE" - - def parse(self, data, vm=None, **kwargs): - from miasm2.jitter.loader.pe import vm_load_pe, guess_arch - from elfesteem import pe_init - - # Parse signature - if not data.startswith(b'MZ'): - raise ContainerSignatureException() - - # Build executable instance - try: - if vm is not None: - self._executable = vm_load_pe(vm, data) - else: - self._executable = pe_init.PE(data) - except Exception as error: - raise ContainerParsingException('Cannot read PE: %s' % error) - - # Check instance validity - if not self._executable.isPE() or \ - self._executable.NTsig.signature_value != 0x4550: - raise ContainerSignatureException() - - # Guess the architecture - self._arch = guess_arch(self._executable) - - # Build the bin_stream instance and set the entry point - try: - self._bin_stream = bin_stream_pe(self._executable) - ep_detected = self._executable.Opthdr.AddressOfEntryPoint - self._entry_point = self._executable.rva2virt(ep_detected) - except Exception as error: - raise ContainerParsingException('Cannot read PE: %s' % error) - - -class ContainerELF(Container): - "Container abstraction for ELF" - - def parse(self, data, vm=None, addr=0, apply_reloc=False, **kwargs): - """Load an ELF from @data - @data: bytes containing the ELF bytes - @vm (optional): VmMngr instance. If set, load the ELF in virtual memory - @addr (optional): base address the ELF in virtual memory - @apply_reloc (optional): if set, apply relocation during ELF loading - - @addr and @apply_reloc are only meaningful in the context of a - non-empty @vm - """ - from miasm2.jitter.loader.elf import vm_load_elf, guess_arch, \ - fill_loc_db_with_symbols - from elfesteem import elf_init - - # Parse signature - if not data.startswith(b'\x7fELF'): - raise ContainerSignatureException() - - # Build executable instance - try: - if vm is not None: - self._executable = vm_load_elf( - vm, - data, - loc_db=self.loc_db, - base_addr=addr, - apply_reloc=apply_reloc - ) - else: - self._executable = elf_init.ELF(data) - except Exception as error: - raise ContainerParsingException('Cannot read ELF: %s' % error) - - # Guess the architecture - self._arch = guess_arch(self._executable) - - # Build the bin_stream instance and set the entry point - try: - self._bin_stream = bin_stream_elf(self._executable) - self._entry_point = self._executable.Ehdr.entry + addr - except Exception as error: - raise ContainerParsingException('Cannot read ELF: %s' % error) - - if vm is None: - # Add known symbols (vm_load_elf already does it) - fill_loc_db_with_symbols(self._executable, self.loc_db, addr) - - - -class ContainerUnknown(Container): - "Container abstraction for unknown format" - - def parse(self, data, vm=None, addr=0, **kwargs): - self._bin_stream = bin_stream_str(data, base_address=addr) - if vm is not None: - vm.add_memory_page( - addr, - PAGE_READ, - data - ) - self._executable = None - self._entry_point = 0 - - -## Register containers -Container.register_container(ContainerPE) -Container.register_container(ContainerELF) -Container.register_fallback(ContainerUnknown) diff --git a/miasm2/analysis/cst_propag.py b/miasm2/analysis/cst_propag.py deleted file mode 100644 index 25d66318..00000000 --- a/miasm2/analysis/cst_propag.py +++ /dev/null @@ -1,185 +0,0 @@ -import logging - -from future.utils import viewitems - -from miasm2.ir.symbexec import SymbolicExecutionEngine -from miasm2.expression.expression import ExprMem -from miasm2.expression.expression_helper import possible_values -from miasm2.expression.simplifications import expr_simp -from miasm2.ir.ir import IRBlock, AssignBlock - -LOG_CST_PROPAG = logging.getLogger("cst_propag") -CONSOLE_HANDLER = logging.StreamHandler() -CONSOLE_HANDLER.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) -LOG_CST_PROPAG.addHandler(CONSOLE_HANDLER) -LOG_CST_PROPAG.setLevel(logging.WARNING) - - -class SymbExecState(SymbolicExecutionEngine): - """ - State manager for SymbolicExecution - """ - def __init__(self, ir_arch, ircfg, state): - super(SymbExecState, self).__init__(ir_arch, {}) - self.set_state(state) - - -def add_state(ircfg, todo, states, addr, state): - """ - Add or merge the computed @state for the block at @addr. Update @todo - @todo: modified block set - @states: dictionary linking a label to its entering state. - @addr: address of the considered block - @state: computed state - """ - addr = ircfg.get_loc_key(addr) - todo.add(addr) - if addr not in states: - states[addr] = state - else: - states[addr] = states[addr].merge(state) - - -def is_expr_cst(ir_arch, expr): - """Return true if @expr is only composed of ExprInt and init_regs - @ir_arch: IR instance - @expr: Expression to test""" - - elements = expr.get_r(mem_read=True) - for element in elements: - if element.is_mem(): - continue - if element.is_id() and element in ir_arch.arch.regs.all_regs_ids_init: - continue - if element.is_int(): - continue - return False - # Expr is a constant - return True - - -class SymbExecStateFix(SymbolicExecutionEngine): - """ - Emul blocks and replace expressions with their corresponding constant if - any. - - """ - # Function used to test if an Expression is considered as a constant - is_expr_cst = lambda _, ir_arch, expr: is_expr_cst(ir_arch, expr) - - def __init__(self, ir_arch, ircfg, state, cst_propag_link): - self.ircfg = ircfg - super(SymbExecStateFix, self).__init__(ir_arch, {}) - self.set_state(state) - self.cst_propag_link = cst_propag_link - - def propag_expr_cst(self, expr): - """Propagate constant expressions in @expr - @expr: Expression to update""" - elements = expr.get_r(mem_read=True) - to_propag = {} - for element in elements: - # Only ExprId can be safely propagated - if not element.is_id(): - continue - value = self.eval_expr(element) - if self.is_expr_cst(self.ir_arch, value): - to_propag[element] = value - return expr_simp(expr.replace_expr(to_propag)) - - def eval_updt_irblock(self, irb, step=False): - """ - Symbolic execution of the @irb on the current state - @irb: IRBlock instance - @step: display intermediate steps - """ - assignblks = [] - for index, assignblk in enumerate(irb): - new_assignblk = {} - links = {} - for dst, src in viewitems(assignblk): - src = self.propag_expr_cst(src) - if dst.is_mem(): - ptr = dst.ptr - ptr = self.propag_expr_cst(ptr) - dst = ExprMem(ptr, dst.size) - new_assignblk[dst] = src - - if assignblk.instr is not None: - for arg in assignblk.instr.args: - new_arg = self.propag_expr_cst(arg) - links[new_arg] = arg - self.cst_propag_link[(irb.loc_key, index)] = links - - self.eval_updt_assignblk(assignblk) - assignblks.append(AssignBlock(new_assignblk, assignblk.instr)) - self.ircfg.blocks[irb.loc_key] = IRBlock(irb.loc_key, assignblks) - - -def compute_cst_propagation_states(ir_arch, ircfg, init_addr, init_infos): - """ - Propagate "constant expressions" in a function. - The attribute "constant expression" is true if the expression is based on - constants or "init" regs values. - - @ir_arch: IntermediateRepresentation instance - @init_addr: analysis start address - @init_infos: dictionary linking expressions to their values at @init_addr - """ - - done = set() - state = SymbExecState.StateEngine(init_infos) - lbl = ircfg.get_loc_key(init_addr) - todo = set([lbl]) - states = {lbl: state} - - while todo: - if not todo: - break - lbl = todo.pop() - state = states[lbl] - if (lbl, state) in done: - continue - done.add((lbl, state)) - if lbl not in ircfg.blocks: - continue - - symbexec_engine = SymbExecState(ir_arch, ircfg, state) - addr = symbexec_engine.run_block_at(ircfg, lbl) - symbexec_engine.del_mem_above_stack(ir_arch.sp) - - for dst in possible_values(addr): - value = dst.value - if value.is_mem(): - LOG_CST_PROPAG.warning('Bad destination: %s', value) - continue - elif value.is_int(): - value = ircfg.get_loc_key(value) - add_state( - ircfg, todo, states, value, - symbexec_engine.get_state() - ) - - return states - - -def propagate_cst_expr(ir_arch, ircfg, addr, init_infos): - """ - Propagate "constant expressions" in a @ir_arch. - The attribute "constant expression" is true if the expression is based on - constants or "init" regs values. - - @ir_arch: IntermediateRepresentation instance - @addr: analysis start address - @init_infos: dictionary linking expressions to their values at @init_addr - - Returns a mapping between replaced Expression and their new values. - """ - states = compute_cst_propagation_states(ir_arch, ircfg, addr, init_infos) - cst_propag_link = {} - for lbl, state in viewitems(states): - if lbl not in ircfg.blocks: - continue - symbexec = SymbExecStateFix(ir_arch, ircfg, state, cst_propag_link) - symbexec.eval_updt_irblock(ircfg.blocks[lbl]) - return cst_propag_link diff --git a/miasm2/analysis/data_analysis.py b/miasm2/analysis/data_analysis.py deleted file mode 100644 index bd073fcb..00000000 --- a/miasm2/analysis/data_analysis.py +++ /dev/null @@ -1,204 +0,0 @@ -from __future__ import print_function - -from future.utils import viewitems - -from builtins import object -from functools import cmp_to_key -from miasm2.expression.expression \ - import get_expr_mem, get_list_rw, ExprId, ExprInt, \ - compare_exprs -from miasm2.ir.symbexec import SymbolicExecutionEngine - - -def get_node_name(label, i, n): - n_name = (label, i, n) - return n_name - - -def intra_block_flow_raw(ir_arch, ircfg, flow_graph, irb, in_nodes, out_nodes): - """ - Create data flow for an irbloc using raw IR expressions - """ - current_nodes = {} - for i, assignblk in enumerate(irb): - dict_rw = assignblk.get_rw(cst_read=True) - current_nodes.update(out_nodes) - - # gen mem arg to mem node links - all_mems = set() - for node_w, nodes_r in viewitems(dict_rw): - for n in nodes_r.union([node_w]): - all_mems.update(get_expr_mem(n)) - if not all_mems: - continue - - for n in all_mems: - node_n_w = get_node_name(irb.loc_key, i, n) - if not n in nodes_r: - continue - o_r = n.ptr.get_r(mem_read=False, cst_read=True) - for n_r in o_r: - if n_r in current_nodes: - node_n_r = current_nodes[n_r] - else: - node_n_r = get_node_name(irb.loc_key, i, n_r) - current_nodes[n_r] = node_n_r - in_nodes[n_r] = node_n_r - flow_graph.add_uniq_edge(node_n_r, node_n_w) - - # gen data flow links - for node_w, nodes_r in viewitems(dict_rw): - for n_r in nodes_r: - if n_r in current_nodes: - node_n_r = current_nodes[n_r] - else: - node_n_r = get_node_name(irb.loc_key, i, n_r) - current_nodes[n_r] = node_n_r - in_nodes[n_r] = node_n_r - - flow_graph.add_node(node_n_r) - - node_n_w = get_node_name(irb.loc_key, i + 1, node_w) - out_nodes[node_w] = node_n_w - - flow_graph.add_node(node_n_w) - flow_graph.add_uniq_edge(node_n_r, node_n_w) - - - -def inter_block_flow_link(ir_arch, ircfg, flow_graph, irb_in_nodes, irb_out_nodes, todo, link_exec_to_data): - lbl, current_nodes, exec_nodes = todo - current_nodes = dict(current_nodes) - - # link current nodes to bloc in_nodes - if not lbl in ircfg.blocks: - print("cannot find bloc!!", lbl) - return set() - irb = ircfg.blocks[lbl] - to_del = set() - for n_r, node_n_r in viewitems(irb_in_nodes[irb.loc_key]): - if not n_r in current_nodes: - continue - flow_graph.add_uniq_edge(current_nodes[n_r], node_n_r) - to_del.add(n_r) - - # if link exec to data, all nodes depends on exec nodes - if link_exec_to_data: - for n_x_r in exec_nodes: - for n_r, node_n_r in viewitems(irb_in_nodes[irb.loc_key]): - if not n_x_r in current_nodes: - continue - if isinstance(n_r, ExprInt): - continue - flow_graph.add_uniq_edge(current_nodes[n_x_r], node_n_r) - - # update current nodes using bloc out_nodes - for n_w, node_n_w in viewitems(irb_out_nodes[irb.loc_key]): - current_nodes[n_w] = node_n_w - - # get nodes involved in exec flow - x_nodes = tuple(sorted(irb.dst.get_r(), key=cmp_to_key(compare_exprs))) - - todo = set() - for lbl_dst in ircfg.successors(irb.loc_key): - todo.add((lbl_dst, tuple(viewitems(current_nodes)), x_nodes)) - - return todo - - -def create_implicit_flow(ir_arch, flow_graph, irb_in_nodes, irb_out_ndes): - - # first fix IN/OUT - # If a son read a node which in not in OUT, add it - todo = set(ir_arch.blocks.keys()) - while todo: - lbl = todo.pop() - irb = ir_arch.blocks[lbl] - for lbl_son in ir_arch.graph.successors(irb.loc_key): - if not lbl_son in ir_arch.blocks: - print("cannot find bloc!!", lbl) - continue - irb_son = ir_arch.blocks[lbl_son] - for n_r in irb_in_nodes[irb_son.loc_key]: - if n_r in irb_out_nodes[irb.loc_key]: - continue - if not isinstance(n_r, ExprId): - continue - - node_n_w = irb.loc_key, len(irb), n_r - irb_out_nodes[irb.loc_key][n_r] = node_n_w - if not n_r in irb_in_nodes[irb.loc_key]: - irb_in_nodes[irb.loc_key][n_r] = irb.loc_key, 0, n_r - node_n_r = irb_in_nodes[irb.loc_key][n_r] - for lbl_p in ir_arch.graph.predecessors(irb.loc_key): - todo.add(lbl_p) - - flow_graph.add_uniq_edge(node_n_r, node_n_w) - - -def inter_block_flow(ir_arch, ircfg, flow_graph, irb_0, irb_in_nodes, irb_out_nodes, link_exec_to_data=True): - - todo = set() - done = set() - todo.add((irb_0, (), ())) - - while todo: - state = todo.pop() - if state in done: - continue - done.add(state) - out = inter_block_flow_link(ir_arch, ircfg, flow_graph, irb_in_nodes, irb_out_nodes, state, link_exec_to_data) - todo.update(out) - - -class symb_exec_func(object): - - """ - This algorithm will do symbolic execution on a function, trying to propagate - states between basic blocks in order to extract inter-blocs dataflow. The - algorithm tries to merge states from blocks with multiple parents. - - There is no real magic here, loops and complex merging will certainly fail. - """ - - def __init__(self, ir_arch): - self.todo = set() - self.stateby_ad = {} - self.cpt = {} - self.states_var_done = set() - self.states_done = set() - self.total_done = 0 - self.ir_arch = ir_arch - - def add_state(self, parent, ad, state): - variables = dict(state.symbols) - - # get bloc dead, and remove from state - b = self.ir_arch.get_block(ad) - if b is None: - raise ValueError("unknown bloc! %s" % ad) - s = parent, ad, tuple(sorted(viewitems(variables))) - self.todo.add(s) - - def get_next_state(self): - state = self.todo.pop() - return state - - def do_step(self): - if len(self.todo) == 0: - return None - if self.total_done > 600: - print("symbexec watchdog!") - return None - self.total_done += 1 - print('CPT', self.total_done) - while self.todo: - state = self.get_next_state() - parent, ad, s = state - self.states_done.add(state) - self.states_var_done.add(state) - - sb = SymbolicExecutionEngine(self.ir_arch, dict(s)) - - return parent, ad, sb - return None diff --git a/miasm2/analysis/data_flow.py b/miasm2/analysis/data_flow.py deleted file mode 100644 index 3874b21b..00000000 --- a/miasm2/analysis/data_flow.py +++ /dev/null @@ -1,1579 +0,0 @@ -"""Data flow analysis based on miasm intermediate representation""" -from builtins import range -from collections import namedtuple -from future.utils import viewitems, viewvalues -from miasm2.core.utils import encode_hex -from miasm2.core.graph import DiGraph -from miasm2.ir.ir import AssignBlock, IRBlock -from miasm2.expression.expression import ExprLoc, ExprMem, ExprId, ExprInt,\ - ExprAssign, ExprOp -from miasm2.expression.simplifications import expr_simp -from miasm2.core.interval import interval -from miasm2.expression.expression_helper import possible_values -from miasm2.analysis.ssa import get_phi_sources_parent_block, \ - irblock_has_phi - - -class ReachingDefinitions(dict): - """ - Computes for each assignblock the set of reaching definitions. - Example: - IR block: - lbl0: - 0 A = 1 - B = 3 - 1 B = 2 - 2 A = A + B + 4 - - Reach definition of lbl0: - (lbl0, 0) => {} - (lbl0, 1) => {A: {(lbl0, 0)}, B: {(lbl0, 0)}} - (lbl0, 2) => {A: {(lbl0, 0)}, B: {(lbl0, 1)}} - (lbl0, 3) => {A: {(lbl0, 2)}, B: {(lbl0, 1)}} - - Source set 'REACHES' in: Kennedy, K. (1979). - A survey of data flow analysis techniques. - IBM Thomas J. Watson Research Division, Algorithm MK - - This class is usable as a dictionary whose structure is - { (block, index): { lvalue: set((block, index)) } } - """ - - ircfg = None - - def __init__(self, ircfg): - super(ReachingDefinitions, self).__init__() - self.ircfg = ircfg - self.compute() - - def get_definitions(self, block_lbl, assignblk_index): - """Returns the dict { lvalue: set((def_block_lbl, def_index)) } - associated with self.ircfg.@block.assignblks[@assignblk_index] - or {} if it is not yet computed - """ - return self.get((block_lbl, assignblk_index), {}) - - def compute(self): - """This is the main fixpoint""" - modified = True - while modified: - modified = False - for block in viewvalues(self.ircfg.blocks): - modified |= self.process_block(block) - - def process_block(self, block): - """ - Fetch reach definitions from predecessors and propagate it to - the assignblk in block @block. - """ - predecessor_state = {} - for pred_lbl in self.ircfg.predecessors(block.loc_key): - pred = self.ircfg.blocks[pred_lbl] - for lval, definitions in viewitems(self.get_definitions(pred_lbl, len(pred))): - predecessor_state.setdefault(lval, set()).update(definitions) - - modified = self.get((block.loc_key, 0)) != predecessor_state - if not modified: - return False - self[(block.loc_key, 0)] = predecessor_state - - for index in range(len(block)): - modified |= self.process_assignblock(block, index) - return modified - - def process_assignblock(self, block, assignblk_index): - """ - Updates the reach definitions with values defined at - assignblock @assignblk_index in block @block. - NB: the effect of assignblock @assignblk_index in stored at index - (@block, @assignblk_index + 1). - """ - - assignblk = block[assignblk_index] - defs = self.get_definitions(block.loc_key, assignblk_index).copy() - for lval in assignblk: - defs.update({lval: set([(block.loc_key, assignblk_index)])}) - - modified = self.get((block.loc_key, assignblk_index + 1)) != defs - if modified: - self[(block.loc_key, assignblk_index + 1)] = defs - - return modified - -ATTR_DEP = {"color" : "black", - "_type" : "data"} - -AssignblkNode = namedtuple('AssignblkNode', ['label', 'index', 'var']) - - -class DiGraphDefUse(DiGraph): - """Representation of a Use-Definition graph as defined by - Kennedy, K. (1979). A survey of data flow analysis techniques. - IBM Thomas J. Watson Research Division. - Example: - IR block: - lbl0: - 0 A = 1 - B = 3 - 1 B = 2 - 2 A = A + B + 4 - - Def use analysis: - (lbl0, 0, A) => {(lbl0, 2, A)} - (lbl0, 0, B) => {} - (lbl0, 1, B) => {(lbl0, 2, A)} - (lbl0, 2, A) => {} - - """ - - - def __init__(self, reaching_defs, - deref_mem=False, *args, **kwargs): - """Instantiate a DiGraph - @blocks: IR blocks - """ - self._edge_attr = {} - - # For dot display - self._filter_node = None - self._dot_offset = None - self._blocks = reaching_defs.ircfg.blocks - - super(DiGraphDefUse, self).__init__(*args, **kwargs) - self._compute_def_use(reaching_defs, - deref_mem=deref_mem) - - def edge_attr(self, src, dst): - """ - Return a dictionary of attributes for the edge between @src and @dst - @src: the source node of the edge - @dst: the destination node of the edge - """ - return self._edge_attr[(src, dst)] - - def _compute_def_use(self, reaching_defs, - deref_mem=False): - for block in viewvalues(self._blocks): - self._compute_def_use_block(block, - reaching_defs, - deref_mem=deref_mem) - - def _compute_def_use_block(self, block, reaching_defs, deref_mem=False): - for index, assignblk in enumerate(block): - assignblk_reaching_defs = reaching_defs.get_definitions(block.loc_key, index) - for lval, expr in viewitems(assignblk): - self.add_node(AssignblkNode(block.loc_key, index, lval)) - - read_vars = expr.get_r(mem_read=deref_mem) - if deref_mem and lval.is_mem(): - read_vars.update(lval.ptr.get_r(mem_read=deref_mem)) - for read_var in read_vars: - for reach in assignblk_reaching_defs.get(read_var, set()): - self.add_data_edge(AssignblkNode(reach[0], reach[1], read_var), - AssignblkNode(block.loc_key, index, lval)) - - def del_edge(self, src, dst): - super(DiGraphDefUse, self).del_edge(src, dst) - del self._edge_attr[(src, dst)] - - def add_uniq_labeled_edge(self, src, dst, edge_label): - """Adds the edge (@src, @dst) with label @edge_label. - if edge (@src, @dst) already exists, the previous label is overridden - """ - self.add_uniq_edge(src, dst) - self._edge_attr[(src, dst)] = edge_label - - def add_data_edge(self, src, dst): - """Adds an edge representing a data dependencie - and sets the label accordingly""" - self.add_uniq_labeled_edge(src, dst, ATTR_DEP) - - def node2lines(self, node): - lbl, index, reg = node - yield self.DotCellDescription(text="%s (%s)" % (lbl, index), - attr={'align': 'center', - 'colspan': 2, - 'bgcolor': 'grey'}) - src = self._blocks[lbl][index][reg] - line = "%s = %s" % (reg, src) - yield self.DotCellDescription(text=line, attr={}) - yield self.DotCellDescription(text="", attr={}) - - -def dead_simp_useful_assignblks(irarch, defuse, reaching_defs): - """Mark useful statements using previous reach analysis and defuse - - Source : Kennedy, K. (1979). A survey of data flow analysis techniques. - IBM Thomas J. Watson Research Division, Algorithm MK - - Return a set of triplets (block, assignblk number, lvalue) of - useful definitions - PRE: compute_reach(self) - - """ - ircfg = reaching_defs.ircfg - useful = set() - - for block_lbl, block in viewitems(ircfg.blocks): - successors = ircfg.successors(block_lbl) - for successor in successors: - if successor not in ircfg.blocks: - keep_all_definitions = True - break - else: - keep_all_definitions = False - - # Block has a nonexistent successor or is a leaf - if keep_all_definitions or (len(successors) == 0): - valid_definitions = reaching_defs.get_definitions(block_lbl, - len(block)) - for lval, definitions in viewitems(valid_definitions): - if lval in irarch.get_out_regs(block) or keep_all_definitions: - for definition in definitions: - useful.add(AssignblkNode(definition[0], definition[1], lval)) - - # Force keeping of specific cases - for index, assignblk in enumerate(block): - for lval, rval in viewitems(assignblk): - if (lval.is_mem() or - irarch.IRDst == lval or - lval.is_id("exception_flags") or - rval.is_function_call()): - useful.add(AssignblkNode(block_lbl, index, lval)) - - # Useful nodes dependencies - for node in useful: - for parent in defuse.reachable_parents(node): - yield parent - - -def dead_simp(irarch, ircfg): - """ - Remove useless assignments. - - This function is used to analyse relation of a * complete function * - This means the blocks under study represent a solid full function graph. - - Source : Kennedy, K. (1979). A survey of data flow analysis techniques. - IBM Thomas J. Watson Research Division, page 43 - - @ircfg: IntermediateRepresentation instance - """ - - modified = False - reaching_defs = ReachingDefinitions(ircfg) - defuse = DiGraphDefUse(reaching_defs, deref_mem=True) - useful = set(dead_simp_useful_assignblks(irarch, defuse, reaching_defs)) - for block in list(viewvalues(ircfg.blocks)): - irs = [] - for idx, assignblk in enumerate(block): - new_assignblk = dict(assignblk) - for lval in assignblk: - if AssignblkNode(block.loc_key, idx, lval) not in useful: - del new_assignblk[lval] - modified = True - irs.append(AssignBlock(new_assignblk, assignblk.instr)) - ircfg.blocks[block.loc_key] = IRBlock(block.loc_key, irs) - return modified - - -def _test_merge_next_block(ircfg, loc_key): - """ - Test if the irblock at @loc_key can be merge with its son - @ircfg: IRCFG instance - @loc_key: LocKey instance of the candidate parent irblock - """ - - if loc_key not in ircfg.blocks: - return None - sons = ircfg.successors(loc_key) - if len(sons) != 1: - return None - son = list(sons)[0] - if ircfg.predecessors(son) != [loc_key]: - return None - if son not in ircfg.blocks: - return None - - return son - - -def _do_merge_blocks(ircfg, loc_key, son_loc_key): - """ - Merge two irblocks at @loc_key and @son_loc_key - - @ircfg: DiGrpahIR - @loc_key: LocKey instance of the parent IRBlock - @loc_key: LocKey instance of the son IRBlock - """ - - assignblks = [] - for assignblk in ircfg.blocks[loc_key]: - if ircfg.IRDst not in assignblk: - assignblks.append(assignblk) - continue - affs = {} - for dst, src in viewitems(assignblk): - if dst != ircfg.IRDst: - affs[dst] = src - if affs: - assignblks.append(AssignBlock(affs, assignblk.instr)) - - assignblks += ircfg.blocks[son_loc_key].assignblks - new_block = IRBlock(loc_key, assignblks) - - ircfg.discard_edge(loc_key, son_loc_key) - - for son_successor in ircfg.successors(son_loc_key): - ircfg.add_uniq_edge(loc_key, son_successor) - ircfg.discard_edge(son_loc_key, son_successor) - del ircfg.blocks[son_loc_key] - ircfg.del_node(son_loc_key) - ircfg.blocks[loc_key] = new_block - - -def _test_jmp_only(ircfg, loc_key, heads): - """ - If irblock at @loc_key sets only IRDst to an ExprLoc, return the - corresponding loc_key target. - Avoid creating predecssors for heads LocKeys - None in other cases. - - @ircfg: IRCFG instance - @loc_key: LocKey instance of the candidate irblock - @heads: LocKey heads of the graph - - """ - - if loc_key not in ircfg.blocks: - return None - irblock = ircfg.blocks[loc_key] - if len(irblock.assignblks) != 1: - return None - items = list(viewitems(dict(irblock.assignblks[0]))) - if len(items) != 1: - return None - if len(ircfg.successors(loc_key)) != 1: - return None - # Don't create predecessors on heads - dst, src = items[0] - assert dst.is_id("IRDst") - if not src.is_loc(): - return None - dst = src.loc_key - if loc_key in heads: - predecessors = set(ircfg.predecessors(dst)) - predecessors.difference_update(set([loc_key])) - if predecessors: - return None - return dst - - -def _relink_block_node(ircfg, loc_key, son_loc_key, replace_dct): - """ - Link loc_key's parents to parents directly to son_loc_key - """ - for parent in set(ircfg.predecessors(loc_key)): - parent_block = ircfg.blocks.get(parent, None) - if parent_block is None: - continue - - new_block = parent_block.modify_exprs( - lambda expr:expr.replace_expr(replace_dct), - lambda expr:expr.replace_expr(replace_dct) - ) - - # Link parent to new dst - ircfg.add_uniq_edge(parent, son_loc_key) - - # Unlink block - ircfg.blocks[new_block.loc_key] = new_block - ircfg.del_node(loc_key) - - -def _remove_to_son(ircfg, loc_key, son_loc_key): - """ - Merge irblocks; The final block has the @son_loc_key loc_key - Update references - - Condition: - - irblock at @loc_key is a pure jump block - - @loc_key is not an entry point (can be removed) - - @irblock: IRCFG instance - @loc_key: LocKey instance of the parent irblock - @son_loc_key: LocKey instance of the son irblock - """ - - # Ircfg loop => don't mess - if loc_key == son_loc_key: - return False - - # Unlink block destinations - ircfg.del_edge(loc_key, son_loc_key) - - replace_dct = { - ExprLoc(loc_key, ircfg.IRDst.size):ExprLoc(son_loc_key, ircfg.IRDst.size) - } - - _relink_block_node(ircfg, loc_key, son_loc_key, replace_dct) - - ircfg.del_node(loc_key) - del ircfg.blocks[loc_key] - - return True - - -def _remove_to_parent(ircfg, loc_key, son_loc_key): - """ - Merge irblocks; The final block has the @loc_key loc_key - Update references - - Condition: - - irblock at @loc_key is a pure jump block - - @son_loc_key is not an entry point (can be removed) - - @irblock: IRCFG instance - @loc_key: LocKey instance of the parent irblock - @son_loc_key: LocKey instance of the son irblock - """ - - # Ircfg loop => don't mess - if loc_key == son_loc_key: - return False - - # Unlink block destinations - ircfg.del_edge(loc_key, son_loc_key) - - old_irblock = ircfg.blocks[son_loc_key] - new_irblock = IRBlock(loc_key, old_irblock.assignblks) - - ircfg.blocks[son_loc_key] = new_irblock - - ircfg.add_irblock(new_irblock) - - replace_dct = { - ExprLoc(son_loc_key, ircfg.IRDst.size):ExprLoc(loc_key, ircfg.IRDst.size) - } - - _relink_block_node(ircfg, son_loc_key, loc_key, replace_dct) - - - ircfg.del_node(son_loc_key) - del ircfg.blocks[son_loc_key] - - return True - - -def merge_blocks(ircfg, heads): - """ - This function modifies @ircfg to apply the following transformations: - - group an irblock with its son if the irblock has one and only one son and - this son has one and only one parent (spaghetti code). - - if an irblock is only made of an assignment to IRDst with a given label, - this irblock is dropped and its parent destination targets are - updated. The irblock must have a parent (avoid deleting the function head) - - if an irblock is a head of the graph and is only made of an assignment to - IRDst with a given label, this irblock is dropped and its son becomes the - head. References are fixed - - This function avoid creating predecessors on heads - - Return True if at least an irblock has been modified - - @ircfg: IRCFG instance - @heads: loc_key to keep - """ - - modified = False - todo = set(ircfg.nodes()) - while todo: - loc_key = todo.pop() - - # Test merge block - son = _test_merge_next_block(ircfg, loc_key) - if son is not None and son not in heads: - _do_merge_blocks(ircfg, loc_key, son) - todo.add(loc_key) - modified = True - continue - - # Test jmp only block - son = _test_jmp_only(ircfg, loc_key, heads) - if son is not None and loc_key not in heads: - ret = _remove_to_son(ircfg, loc_key, son) - modified |= ret - if ret: - todo.add(loc_key) - continue - - # Test head jmp only block - if (son is not None and - son not in heads and - son in ircfg.blocks): - # jmp only test done previously - ret = _remove_to_parent(ircfg, loc_key, son) - modified |= ret - if ret: - todo.add(loc_key) - continue - - - return modified - - -def remove_empty_assignblks(ircfg): - """ - Remove empty assignblks in irblocks of @ircfg - Return True if at least an irblock has been modified - - @ircfg: IRCFG instance - """ - modified = False - for loc_key, block in list(viewitems(ircfg.blocks)): - irs = [] - block_modified = False - for assignblk in block: - if len(assignblk): - irs.append(assignblk) - else: - block_modified = True - if block_modified: - new_irblock = IRBlock(loc_key, irs) - ircfg.blocks[loc_key] = new_irblock - modified = True - return modified - - -class SSADefUse(DiGraph): - """ - Generate DefUse information from SSA transformation - Links are not valid for ExprMem. - """ - - def add_var_def(self, node, src): - index2dst = self._links.setdefault(node.label, {}) - dst2src = index2dst.setdefault(node.index, {}) - dst2src[node.var] = src - - def add_def_node(self, def_nodes, node, src): - if node.var.is_id(): - def_nodes[node.var] = node - - def add_use_node(self, use_nodes, node, src): - sources = set() - if node.var.is_mem(): - sources.update(node.var.ptr.get_r(mem_read=True)) - sources.update(src.get_r(mem_read=True)) - for source in sources: - if not source.is_mem(): - use_nodes.setdefault(source, set()).add(node) - - def get_node_target(self, node): - return self._links[node.label][node.index][node.var] - - def set_node_target(self, node, src): - self._links[node.label][node.index][node.var] = src - - @classmethod - def from_ssa(cls, ssa): - """ - Return a DefUse DiGraph from a SSA graph - @ssa: SSADiGraph instance - """ - - graph = cls() - # First pass - # Link line to its use and def - def_nodes = {} - use_nodes = {} - graph._links = {} - for lbl in ssa.graph.nodes(): - block = ssa.graph.blocks.get(lbl, None) - if block is None: - continue - for index, assignblk in enumerate(block): - for dst, src in viewitems(assignblk): - node = AssignblkNode(lbl, index, dst) - graph.add_var_def(node, src) - graph.add_def_node(def_nodes, node, src) - graph.add_use_node(use_nodes, node, src) - - for dst, node in viewitems(def_nodes): - graph.add_node(node) - if dst not in use_nodes: - continue - for use in use_nodes[dst]: - graph.add_uniq_edge(node, use) - - return graph - - - - -def expr_test_visit(expr, test): - result = set() - expr.visit( - lambda expr: expr, - lambda expr: test(expr, result) - ) - if result: - return True - else: - return False - - -def expr_has_mem_test(expr, result): - if result: - # Don't analyse if we already found a candidate - return False - if expr.is_mem(): - result.add(expr) - return False - return True - - -def expr_has_mem(expr): - """ - Return True if expr contains at least one memory access - @expr: Expr instance - """ - return expr_test_visit(expr, expr_has_mem_test) - - -class PropagateThroughExprId(object): - """ - Propagate expressions though ExprId - """ - - def has_propagation_barrier(self, assignblks): - """ - Return True if propagation cannot cross the @assignblks - @assignblks: list of AssignBlock to check - """ - for assignblk in assignblks: - for dst, src in viewitems(assignblk): - if src.is_function_call(): - return True - if dst.is_mem(): - return True - return False - - def is_mem_written(self, ssa, node_a, node_b): - """ - Return True if memory is written at least once between @node_a and - @node_b - - @node: AssignblkNode representing the start position - @successor: AssignblkNode representing the end position - """ - - block_b = ssa.graph.blocks[node_b.label] - nodes_to_do = self.compute_reachable_nodes_from_a_to_b(ssa.graph, node_a.label, node_b.label) - - if node_a.label == node_b.label: - # src is dst - assert nodes_to_do == set([node_a.label]) - if self.has_propagation_barrier(block_b.assignblks[node_a.index:node_b.index]): - return True - else: - # Check everyone but node_a.label and node_b.label - for loc in nodes_to_do - set([node_a.label, node_b.label]): - block = ssa.graph.blocks[loc] - if self.has_propagation_barrier(block.assignblks): - return True - # Check node_a.label partially - block_a = ssa.graph.blocks[node_a.label] - if self.has_propagation_barrier(block_a.assignblks[node_a.index:]): - return True - if nodes_to_do.intersection(ssa.graph.successors(node_b.label)): - # There is a path from node_b.label to node_b.label => Check node_b.label fully - if self.has_propagation_barrier(block_b.assignblks): - return True - else: - # Check node_b.label partially - if self.has_propagation_barrier(block_b.assignblks[:node_b.index]): - return True - return False - - def compute_reachable_nodes_from_a_to_b(self, ssa, loc_a, loc_b): - reachables_a = set(ssa.reachable_sons(loc_a)) - reachables_b = set(ssa.reachable_parents_stop_node(loc_b, loc_a)) - return reachables_a.intersection(reachables_b) - - def propagation_allowed(self, ssa, to_replace, node_a, node_b): - """ - Return True if we can replace @node_a source present in @to_replace into - @node_b - - @node_a: AssignblkNode position - @node_b: AssignblkNode position - """ - if not expr_has_mem(to_replace[node_a.var]): - return True - if self.is_mem_written(ssa, node_a, node_b): - return False - return True - - - def get_var_definitions(self, ssa): - """ - Return a dictionary linking variable to its assignment location - @ssa: SSADiGraph instance - """ - ircfg = ssa.graph - def_dct = {} - for node in ircfg.nodes(): - for index, assignblk in enumerate(ircfg.blocks[node]): - for dst, src in viewitems(assignblk): - if not dst.is_id(): - continue - if dst in ssa.immutable_ids: - continue - assert dst not in def_dct - def_dct[dst] = node, index - return def_dct - - def phi_has_identical_sources(self, ssa, def_dct, var): - """ - If phi operation has identical source values, return it; else None - @ssa: SSADiGraph instance - @def_dct: dictionary linking variable to its assignment location - @var: Phi destination variable - """ - loc_key, index = def_dct[var] - sources = ssa.graph.blocks[loc_key][index][var] - assert sources.is_op('Phi') - sources_values = set() - for src in sources.args: - assert src in def_dct - loc_key, index = def_dct[src] - value = ssa.graph.blocks[loc_key][index][src] - sources_values.add(value) - if len(sources_values) != 1: - return None - return list(sources_values)[0] - - def get_candidates(self, ssa, head, max_expr_depth): - def_dct = self.get_var_definitions(ssa) - defuse = SSADefUse.from_ssa(ssa) - to_replace = {} - node_to_reg = {} - for node in defuse.nodes(): - if node.var in ssa.immutable_ids: - continue - src = defuse.get_node_target(node) - if max_expr_depth is not None and len(str(src)) > max_expr_depth: - continue - if src.is_function_call(): - continue - if node.var.is_mem(): - continue - if src.is_op('Phi'): - ret = self.phi_has_identical_sources(ssa, def_dct, node.var) - if ret: - to_replace[node.var] = ret - node_to_reg[node] = node.var - continue - to_replace[node.var] = src - node_to_reg[node] = node.var - return node_to_reg, to_replace, defuse - - def propagate(self, ssa, head, max_expr_depth=None): - """ - Do expression propagation - @ssa: SSADiGraph instance - @head: the head location of the graph - @max_expr_depth: the maximum allowed depth of an expression - """ - node_to_reg, to_replace, defuse = self.get_candidates(ssa, head, max_expr_depth) - modified = False - for node, reg in viewitems(node_to_reg): - for successor in defuse.successors(node): - if not self.propagation_allowed(ssa, to_replace, node, successor): - continue - - node_a = node - node_b = successor - block = ssa.graph.blocks[node_b.label] - - replace = {node_a.var: to_replace[node_a.var]} - # Replace - assignblks = list(block) - assignblk = block[node_b.index] - out = {} - for dst, src in viewitems(assignblk): - if src.is_op('Phi'): - out[dst] = src - continue - - if src.is_mem(): - ptr = src.ptr.replace_expr(replace) - new_src = ExprMem(ptr, src.size) - else: - new_src = src.replace_expr(replace) - - if dst.is_id(): - new_dst = dst - elif dst.is_mem(): - ptr = dst.ptr.replace_expr(replace) - new_dst = ExprMem(ptr, dst.size) - else: - new_dst = dst.replace_expr(replace) - if not (new_dst.is_id() or new_dst.is_mem()): - new_dst = dst - if src != new_src or dst != new_dst: - modified = True - out[new_dst] = new_src - out = AssignBlock(out, assignblk.instr) - assignblks[node_b.index] = out - new_block = IRBlock(block.loc_key, assignblks) - ssa.graph.blocks[block.loc_key] = new_block - - return modified - - - -class PropagateExprIntThroughExprId(PropagateThroughExprId): - """ - Propagate ExprInt though ExprId: classic constant propagation - This is a sub family of PropagateThroughExprId. - It reduces leaves in expressions of a program. - """ - - def get_candidates(self, ssa, head, max_expr_depth): - defuse = SSADefUse.from_ssa(ssa) - - to_replace = {} - node_to_reg = {} - for node in defuse.nodes(): - src = defuse.get_node_target(node) - if not src.is_int(): - continue - if src.is_function_call(): - continue - if node.var.is_mem(): - continue - to_replace[node.var] = src - node_to_reg[node] = node.var - return node_to_reg, to_replace, defuse - - def propagation_allowed(self, ssa, to_replace, node_a, node_b): - """ - Propagating ExprInt is always ok - """ - return True - - -class PropagateThroughExprMem(object): - """ - Propagate through ExprMem in very simple cases: - - if no memory write between source and target - - if source does not contain any memory reference - """ - - def propagate(self, ssa, head, max_expr_depth=None): - ircfg = ssa.graph - todo = set() - modified = False - for block in viewvalues(ircfg.blocks): - for i, assignblk in enumerate(block): - for dst, src in viewitems(assignblk): - if not dst.is_mem(): - continue - if expr_has_mem(src): - continue - todo.add((block.loc_key, i + 1, dst, src)) - ptr = dst.ptr - for size in range(8, dst.size, 8): - todo.add((block.loc_key, i + 1, ExprMem(ptr, size), src[:size])) - - while todo: - loc_key, index, mem_dst, mem_src = todo.pop() - block = ircfg.blocks[loc_key] - assignblks = list(block) - block_modified = False - for i in range(index, len(block)): - assignblk = block[i] - write_mem = False - assignblk_modified = False - out = dict(assignblk) - out_new = {} - for dst, src in viewitems(out): - if dst.is_mem(): - write_mem = True - ptr = dst.ptr.replace_expr({mem_dst:mem_src}) - dst = ExprMem(ptr, dst.size) - src = src.replace_expr({mem_dst:mem_src}) - out_new[dst] = src - if out != out_new: - assignblk_modified = True - - if assignblk_modified: - assignblks[i] = AssignBlock(out_new, assignblk.instr) - block_modified = True - if write_mem: - break - else: - # If no memory written, we may propagate to sons - # if son has only parent - for successor in ircfg.successors(loc_key): - predecessors = ircfg.predecessors(successor) - if len(predecessors) != 1: - continue - todo.add((successor, 0, mem_dst, mem_src)) - - if block_modified: - modified = True - new_block = IRBlock(block.loc_key, assignblks) - ircfg.blocks[block.loc_key] = new_block - return modified - - -def stack_to_reg(expr): - if expr.is_mem(): - ptr = expr.arg - SP = ir_arch_a.sp - if ptr == SP: - return ExprId("STACK.0", expr.size) - elif (ptr.is_op('+') and - len(ptr.args) == 2 and - ptr.args[0] == SP and - ptr.args[1].is_int()): - diff = int(ptr.args[1]) - assert diff % 4 == 0 - diff = (0 - diff) & 0xFFFFFFFF - return ExprId("STACK.%d" % (diff // 4), expr.size) - return False - - -def is_stack_access(ir_arch_a, expr): - if not expr.is_mem(): - return False - ptr = expr.ptr - diff = expr_simp(ptr - ir_arch_a.sp) - if not diff.is_int(): - return False - return expr - - -def visitor_get_stack_accesses(ir_arch_a, expr, stack_vars): - if is_stack_access(ir_arch_a, expr): - stack_vars.add(expr) - return expr - - -def get_stack_accesses(ir_arch_a, expr): - result = set() - expr.visit(lambda expr:visitor_get_stack_accesses(ir_arch_a, expr, result)) - return result - - -def get_interval_length(interval_in): - length = 0 - for start, stop in interval_in.intervals: - length += stop + 1 - start - return length - - -def check_expr_below_stack(ir_arch_a, expr): - """ - Return False if expr pointer is below original stack pointer - @ir_arch_a: ira instance - @expr: Expression instance - """ - ptr = expr.ptr - diff = expr_simp(ptr - ir_arch_a.sp) - if not diff.is_int(): - return True - if int(diff) == 0 or int(expr_simp(diff.msb())) == 0: - return False - return True - - -def retrieve_stack_accesses(ir_arch_a, ircfg): - """ - Walk the ssa graph and find stack based variables. - Return a dictionary linking stack base address to its size/name - @ir_arch_a: ira instance - @ircfg: IRCFG instance - """ - stack_vars = set() - for block in viewvalues(ircfg.blocks): - for assignblk in block: - for dst, src in viewitems(assignblk): - stack_vars.update(get_stack_accesses(ir_arch_a, dst)) - stack_vars.update(get_stack_accesses(ir_arch_a, src)) - stack_vars = [expr for expr in stack_vars if check_expr_below_stack(ir_arch_a, expr)] - - base_to_var = {} - for var in stack_vars: - base_to_var.setdefault(var.ptr, set()).add(var) - - - base_to_interval = {} - for addr, vars in viewitems(base_to_var): - var_interval = interval() - for var in vars: - offset = expr_simp(addr - ir_arch_a.sp) - if not offset.is_int(): - # skip non linear stack offset - continue - - start = int(offset) - stop = int(expr_simp(offset + ExprInt(var.size // 8, offset.size))) - mem = interval([(start, stop-1)]) - var_interval += mem - base_to_interval[addr] = var_interval - if not base_to_interval: - return {} - # Check if not intervals overlap - _, tmp = base_to_interval.popitem() - while base_to_interval: - addr, mem = base_to_interval.popitem() - assert (tmp & mem).empty - tmp += mem - - base_to_info = {} - for addr, vars in viewitems(base_to_var): - name = "var_%d" % (len(base_to_info)) - size = max([var.size for var in vars]) - base_to_info[addr] = size, name - return base_to_info - - -def fix_stack_vars(expr, base_to_info): - """ - Replace local stack accesses in expr using information in @base_to_info - @expr: Expression instance - @base_to_info: dictionary linking stack base address to its size/name - """ - if not expr.is_mem(): - return expr - ptr = expr.ptr - if ptr not in base_to_info: - return expr - size, name = base_to_info[ptr] - var = ExprId(name, size) - if size == expr.size: - return var - assert expr.size < size - return var[:expr.size] - - -def replace_mem_stack_vars(expr, base_to_info): - return expr.visit(lambda expr:fix_stack_vars(expr, base_to_info)) - - -def replace_stack_vars(ir_arch_a, ircfg): - """ - Try to replace stack based memory accesses by variables. - - Hypothesis: the input ircfg must have all it's accesses to stack explicitly - done through the stack register, ie every aliases on those variables is - resolved. - - WARNING: may fail - - @ir_arch_a: ira instance - @ircfg: IRCFG instance - """ - - base_to_info = retrieve_stack_accesses(ir_arch_a, ircfg) - modified = False - for block in list(viewvalues(ircfg.blocks)): - assignblks = [] - for assignblk in block: - out = {} - for dst, src in viewitems(assignblk): - new_dst = dst.visit(lambda expr:replace_mem_stack_vars(expr, base_to_info)) - new_src = src.visit(lambda expr:replace_mem_stack_vars(expr, base_to_info)) - if new_dst != dst or new_src != src: - modified |= True - - out[new_dst] = new_src - - out = AssignBlock(out, assignblk.instr) - assignblks.append(out) - new_block = IRBlock(block.loc_key, assignblks) - ircfg.blocks[block.loc_key] = new_block - return modified - - -def memlookup_test(expr, bs, is_addr_ro_variable, result): - if expr.is_mem() and expr.ptr.is_int(): - ptr = int(expr.ptr) - if is_addr_ro_variable(bs, ptr, expr.size): - result.add(expr) - return False - return True - - -def memlookup_visit(expr, bs, is_addr_ro_variable): - result = set() - expr.visit(lambda expr: expr, - lambda expr: memlookup_test(expr, bs, is_addr_ro_variable, result)) - return result - - -def get_memlookup(expr, bs, is_addr_ro_variable): - return memlookup_visit(expr, bs, is_addr_ro_variable) - - -def read_mem(bs, expr): - ptr = int(expr.ptr) - var_bytes = bs.getbytes(ptr, expr.size // 8)[::-1] - try: - value = int(encode_hex(var_bytes), 16) - except ValueError: - return expr - return ExprInt(value, expr.size) - - -def load_from_int(ir_arch, bs, is_addr_ro_variable): - """ - Replace memory read based on constant with static value - @ir_arch: ira instance - @bs: binstream instance - @is_addr_ro_variable: callback(addr, size) to test memory candidate - """ - - modified = False - for block in list(viewvalues(ir_arch.blocks)): - assignblks = list() - for assignblk in block: - out = {} - for dst, src in viewitems(assignblk): - # Test src - mems = get_memlookup(src, bs, is_addr_ro_variable) - src_new = src - if mems: - replace = {} - for mem in mems: - value = read_mem(bs, mem) - replace[mem] = value - src_new = src.replace_expr(replace) - if src_new != src: - modified = True - # Test dst pointer if dst is mem - if dst.is_mem(): - ptr = dst.ptr - mems = get_memlookup(ptr, bs, is_addr_ro_variable) - if mems: - replace = {} - for mem in mems: - value = read_mem(bs, mem) - replace[mem] = value - ptr_new = ptr.replace_expr(replace) - if ptr_new != ptr: - modified = True - dst = ExprMem(ptr_new, dst.size) - out[dst] = src_new - out = AssignBlock(out, assignblk.instr) - assignblks.append(out) - block = IRBlock(block.loc_key, assignblks) - ir_arch.blocks[block.loc_key] = block - return modified - - -class AssignBlockLivenessInfos(object): - """ - Description of live in / live out of an AssignBlock - """ - - __slots__ = ["gen", "kill", "var_in", "var_out", "live", "assignblk"] - - def __init__(self, assignblk, gen, kill): - self.gen = gen - self.kill = kill - self.var_in = set() - self.var_out = set() - self.live = set() - self.assignblk = assignblk - - def __str__(self): - out = [] - out.append("\tVarIn:" + ", ".join(str(x) for x in self.var_in)) - out.append("\tGen:" + ", ".join(str(x) for x in self.gen)) - out.append("\tKill:" + ", ".join(str(x) for x in self.kill)) - out.append( - '\n'.join( - "\t%s = %s" % (dst, src) - for (dst, src) in viewitems(self.assignblk) - ) - ) - out.append("\tVarOut:" + ", ".join(str(x) for x in self.var_out)) - return '\n'.join(out) - - -class IRBlockLivenessInfos(object): - """ - Description of live in / live out of an AssignBlock - """ - __slots__ = ["loc_key", "infos", "assignblks"] - - - def __init__(self, irblock): - self.loc_key = irblock.loc_key - self.infos = [] - self.assignblks = [] - for assignblk in irblock: - gens, kills = set(), set() - for dst, src in viewitems(assignblk): - expr = ExprAssign(dst, src) - read = expr.get_r(mem_read=True) - write = expr.get_w() - gens.update(read) - kills.update(write) - self.infos.append(AssignBlockLivenessInfos(assignblk, gens, kills)) - self.assignblks.append(assignblk) - - def __getitem__(self, index): - """Getitem on assignblks""" - return self.assignblks.__getitem__(index) - - def __str__(self): - out = [] - out.append("%s:" % self.loc_key) - for info in self.infos: - out.append(str(info)) - out.append('') - return "\n".join(out) - - -class DiGraphLiveness(DiGraph): - """ - DiGraph representing variable liveness - """ - - def __init__(self, ircfg, loc_db=None): - super(DiGraphLiveness, self).__init__() - self.ircfg = ircfg - self.loc_db = loc_db - self._blocks = {} - # Add irblocks gen/kill - for node in ircfg.nodes(): - irblock = ircfg.blocks[node] - irblockinfos = IRBlockLivenessInfos(irblock) - self.add_node(irblockinfos.loc_key) - self.blocks[irblockinfos.loc_key] = irblockinfos - for succ in ircfg.successors(node): - self.add_uniq_edge(node, succ) - for pred in ircfg.predecessors(node): - self.add_uniq_edge(pred, node) - - @property - def blocks(self): - return self._blocks - - def init_var_info(self): - """Add ircfg out regs""" - raise NotImplementedError("Abstract method") - - def node2lines(self, node): - """ - Output liveness information in dot format - """ - if self.loc_db is None: - node_name = str(node) - else: - names = self.loc_db.get_location_names(node) - if not names: - node_name = self.loc_db.pretty_str(node) - else: - node_name = "".join("%s:\n" % name for name in names) - yield self.DotCellDescription( - text="%s" % node_name, - attr={ - 'align': 'center', - 'colspan': 2, - 'bgcolor': 'grey', - } - ) - if node not in self._blocks: - yield [self.DotCellDescription(text="NOT PRESENT", attr={})] - return - - for i, info in enumerate(self._blocks[node].infos): - var_in = "VarIn:" + ", ".join(str(x) for x in info.var_in) - var_out = "VarOut:" + ", ".join(str(x) for x in info.var_out) - - assignmnts = ["%s = %s" % (dst, src) for (dst, src) in viewitems(info.assignblk)] - - if i == 0: - yield self.DotCellDescription( - text=var_in, - attr={ - 'bgcolor': 'green', - } - ) - - for assign in assignmnts: - yield self.DotCellDescription(text=assign, attr={}) - yield self.DotCellDescription( - text=var_out, - attr={ - 'bgcolor': 'green', - } - ) - yield self.DotCellDescription(text="", attr={}) - - def back_propagate_compute(self, block): - """ - Compute the liveness information in the @block. - @block: AssignBlockLivenessInfos instance - """ - infos = block.infos - modified = False - for i in reversed(range(len(infos))): - new_vars = set(infos[i].gen.union(infos[i].var_out.difference(infos[i].kill))) - if infos[i].var_in != new_vars: - modified = True - infos[i].var_in = new_vars - if i > 0 and infos[i - 1].var_out != set(infos[i].var_in): - modified = True - infos[i - 1].var_out = set(infos[i].var_in) - return modified - - def back_propagate_to_parent(self, todo, node, parent): - """ - Back propagate the liveness information from @node to @parent. - @node: loc_key of the source node - @parent: loc_key of the node to update - """ - parent_block = self.blocks[parent] - cur_block = self.blocks[node] - if cur_block.infos[0].var_in == parent_block.infos[-1].var_out: - return - var_info = cur_block.infos[0].var_in.union(parent_block.infos[-1].var_out) - parent_block.infos[-1].var_out = var_info - todo.add(parent) - - def compute_liveness(self): - """ - Compute the liveness information for the digraph. - """ - todo = set(self.leaves()) - while todo: - node = todo.pop() - cur_block = self.blocks[node] - modified = self.back_propagate_compute(cur_block) - if not modified: - continue - # We modified parent in, propagate to parents - for pred in self.predecessors(node): - self.back_propagate_to_parent(todo, node, pred) - return True - - -class DiGraphLivenessIRA(DiGraphLiveness): - """ - DiGraph representing variable liveness for IRA - """ - - def init_var_info(self, ir_arch_a): - """Add ircfg out regs""" - - for node in self.leaves(): - irblock = self.ircfg.blocks[node] - var_out = ir_arch_a.get_out_regs(irblock) - irblock_liveness = self.blocks[node] - irblock_liveness.infos[-1].var_out = var_out - - -def discard_phi_sources(ircfg, deleted_vars): - """ - Remove phi sources in @ircfg belonging to @deleted_vars set - @ircfg: IRCFG instance in ssa form - @deleted_vars: unused phi sources - """ - for block in list(viewvalues(ircfg.blocks)): - if not block.assignblks: - continue - assignblk = block[0] - todo = {} - modified = False - for dst, src in viewitems(assignblk): - if not src.is_op('Phi'): - todo[dst] = src - continue - srcs = set(expr for expr in src.args if expr not in deleted_vars) - assert(srcs) - if len(srcs) > 1: - todo[dst] = srcs - continue - todo[dst] = srcs.pop() - modified = True - if not modified: - continue - assignblks = list(block) - assignblk = dict(assignblk) - assignblk.update(todo) - assignblk = AssignBlock(assignblk, assignblks[0].instr) - assignblks[0] = assignblk - new_irblock = IRBlock(block.loc_key, assignblks) - ircfg.blocks[block.loc_key] = new_irblock - return True - - -def get_unreachable_nodes(ircfg, edges_to_del, heads): - """ - Return the unreachable nodes starting from heads and the associated edges to - be deleted. - - @ircfg: IRCFG instance - @edges_to_del: edges already marked as deleted - heads: locations of graph heads - """ - todo = set(heads) - visited_nodes = set() - new_edges_to_del = set() - while todo: - node = todo.pop() - if node in visited_nodes: - continue - visited_nodes.add(node) - for successor in ircfg.successors(node): - if (node, successor) not in edges_to_del: - todo.add(successor) - all_nodes = set(ircfg.nodes()) - nodes_to_del = all_nodes.difference(visited_nodes) - for node in nodes_to_del: - for successor in ircfg.successors(node): - if successor not in nodes_to_del: - # Frontier: link from a deleted node to a living node - new_edges_to_del.add((node, successor)) - return nodes_to_del, new_edges_to_del - - -def update_phi_with_deleted_edges(ircfg, edges_to_del): - """ - Update phi which have a source present in @edges_to_del - @ssa: IRCFG instance in ssa form - @edges_to_del: edges to delete - """ - - modified = False - blocks = dict(ircfg.blocks) - for loc_src, loc_dst in edges_to_del: - block = ircfg.blocks[loc_dst] - assert block.assignblks - assignblks = list(block) - assignblk = assignblks[0] - out = {} - for dst, phi_sources in viewitems(assignblk): - if not phi_sources.is_op('Phi'): - out = assignblk - break - var_to_parents = get_phi_sources_parent_block( - ircfg, - loc_dst, - phi_sources.args - ) - to_keep = set(phi_sources.args) - for src in phi_sources.args: - parents = var_to_parents[src] - if loc_src in parents: - to_keep.discard(src) - modified = True - assert to_keep - if len(to_keep) == 1: - out[dst] = to_keep.pop() - else: - out[dst] = ExprOp('Phi', *to_keep) - assignblk = AssignBlock(out, assignblks[0].instr) - assignblks[0] = assignblk - new_irblock = IRBlock(loc_dst, assignblks) - blocks[block.loc_key] = new_irblock - - for loc_key, block in viewitems(blocks): - ircfg.blocks[loc_key] = block - return modified - - -def del_unused_edges(ircfg, heads): - """ - Delete non accessible edges in the @ircfg graph. - @ircfg: IRCFG instance in ssa form - @heads: location of the heads of the graph - """ - - deleted_vars = set() - modified = False - edges_to_del_1 = set() - for node in ircfg.nodes(): - successors = set(ircfg.successors(node)) - block = ircfg.blocks[node] - dst = block.dst - possible_dsts = set(solution.value for solution in possible_values(dst)) - if not all(dst.is_loc() for dst in possible_dsts): - continue - possible_dsts = set(dst.loc_key for dst in possible_dsts) - if len(possible_dsts) == len(successors): - continue - dsts_to_del = successors.difference(possible_dsts) - for dst in dsts_to_del: - edges_to_del_1.add((node, dst)) - - # Remove edges and update phi accordingly - # Two cases here: - # - edge is directly linked to a phi node - # - edge is indirect linked to a phi node - nodes_to_del, edges_to_del_2 = get_unreachable_nodes(ircfg, edges_to_del_1, heads) - modified |= update_phi_with_deleted_edges(ircfg, edges_to_del_1.union(edges_to_del_2)) - - for src, dst in edges_to_del_1.union(edges_to_del_2): - ircfg.del_edge(src, dst) - for node in nodes_to_del: - block = ircfg.blocks[node] - ircfg.del_node(node) - for assignblock in block: - for dst in assignblock: - deleted_vars.add(dst) - - if deleted_vars: - modified |= discard_phi_sources(ircfg, deleted_vars) - - return modified - - -class DiGraphLivenessSSA(DiGraphLivenessIRA): - """ - DiGraph representing variable liveness is a SSA graph - """ - def __init__(self, ircfg): - super(DiGraphLivenessSSA, self).__init__(ircfg) - - self.loc_key_to_phi_parents = {} - for irblock in viewvalues(self.blocks): - if not irblock_has_phi(irblock): - continue - out = {} - for sources in viewvalues(irblock[0]): - var_to_parents = get_phi_sources_parent_block(self, irblock.loc_key, sources.args) - for var, var_parents in viewitems(var_to_parents): - out.setdefault(var, set()).update(var_parents) - self.loc_key_to_phi_parents[irblock.loc_key] = out - - def back_propagate_to_parent(self, todo, node, parent): - parent_block = self.blocks[parent] - cur_block = self.blocks[node] - irblock = self.ircfg.blocks[node] - if cur_block.infos[0].var_in == parent_block.infos[-1].var_out: - return - var_info = cur_block.infos[0].var_in.union(parent_block.infos[-1].var_out) - - if irblock_has_phi(irblock): - # Remove phi special case - out = set() - phi_sources = self.loc_key_to_phi_parents[irblock.loc_key] - for var in var_info: - if var not in phi_sources: - out.add(var) - continue - if parent in phi_sources[var]: - out.add(var) - var_info = out - - parent_block.infos[-1].var_out = var_info - todo.add(parent) diff --git a/miasm2/analysis/debugging.py b/miasm2/analysis/debugging.py deleted file mode 100644 index 824b62ce..00000000 --- a/miasm2/analysis/debugging.py +++ /dev/null @@ -1,499 +0,0 @@ -from __future__ import print_function -from builtins import map -from builtins import range -import cmd -from future.utils import viewitems - -from miasm2.core.utils import hexdump -from miasm2.core.interval import interval -import miasm2.jitter.csts as csts -from miasm2.jitter.jitload import ExceptionHandle - - -class DebugBreakpoint(object): - - "Debug Breakpoint parent class" - pass - - -class DebugBreakpointSoft(DebugBreakpoint): - - "Stand for software breakpoint" - - def __init__(self, addr): - self.addr = addr - - def __str__(self): - return "Soft BP @0x%08x" % self.addr - - -class DebugBreakpointTerminate(DebugBreakpoint): - "Stand for an execution termination" - - def __init__(self, status): - self.status = status - - def __str__(self): - return "Terminate with %s" % self.status - - -class DebugBreakpointMemory(DebugBreakpoint): - - "Stand for memory breakpoint" - - type2str = {csts.BREAKPOINT_READ: "R", - csts.BREAKPOINT_WRITE: "W"} - - def __init__(self, addr, size, access_type): - self.addr = addr - self.access_type = access_type - self.size = size - - def __str__(self): - bp_type = "" - for k, v in viewitems(self.type2str): - if k & self.access_type != 0: - bp_type += v - return "Memory BP @0x%08x, Size 0x%08x, Type %s" % ( - self.addr, - self.size, - bp_type - ) - - @classmethod - def get_access_type(cls, read=False, write=False): - value = 0 - for k, v in viewitems(cls.type2str): - if v == "R" and read is True: - value += k - if v == "W" and write is True: - value += k - return value - - -class Debugguer(object): - - "Debugguer linked with a Jitter instance" - - def __init__(self, myjit): - "myjit : jitter instance" - self.myjit = myjit - self.bp_list = [] # DebugBreakpointSoft list - self.hw_bp_list = [] # DebugBreakpointHard list - self.mem_watched = [] # Memory areas watched - - def init_run(self, addr): - self.myjit.init_run(addr) - - def add_breakpoint(self, addr): - "Add bp @addr" - bp = DebugBreakpointSoft(addr) - func = lambda x: bp - bp.func = func - self.bp_list.append(bp) - self.myjit.add_breakpoint(addr, func) - - def init_memory_breakpoint(self): - "Set exception handler on EXCEPT_BREAKPOINT_MEMORY" - raise NotImplementedError("Not implemented") - - def add_memory_breakpoint(self, addr, size, read=False, write=False): - "add mem bp @[addr, addr + size], on read/write/both" - access_type = DebugBreakpointMemory.get_access_type(read=read, - write=write) - dbm = DebugBreakpointMemory(addr, size, access_type) - self.hw_bp_list.append(dbm) - self.myjit.vm.add_memory_breakpoint(addr, size, access_type) - - def remove_breakpoint(self, dbs): - "remove the DebugBreakpointSoft instance" - self.bp_list.remove(dbs) - self.myjit.remove_breakpoints_by_callback(dbs.func) - - def remove_breakpoint_by_addr(self, addr): - "remove breakpoints @ addr" - for bp in self.get_breakpoint_by_addr(addr): - self.remove_breakpoint(bp) - - def remove_memory_breakpoint(self, dbm): - "remove the DebugBreakpointMemory instance" - self.hw_bp_list.remove(dbm) - self.myjit.vm.remove_memory_breakpoint(dbm.addr, dbm.access_type) - - def remove_memory_breakpoint_by_addr_access(self, addr, read=False, - write=False): - "remove breakpoints @ addr" - access_type = DebugBreakpointMemory.get_access_type(read=read, - write=write) - for bp in self.hw_bp_list: - if bp.addr == addr and bp.access_type == access_type: - self.remove_memory_breakpoint(bp) - - def get_breakpoint_by_addr(self, addr): - ret = [] - for dbgsoft in self.bp_list: - if dbgsoft.addr == addr: - ret.append(dbgsoft) - return ret - - def get_breakpoints(self): - return self.bp_list - - def active_trace(self, mn=None, regs=None, newbloc=None): - if mn is not None: - self.myjit.jit.log_mn = mn - if regs is not None: - self.myjit.jit.log_regs = regs - if newbloc is not None: - self.myjit.jit.log_newbloc = newbloc - - def handle_exception(self, res): - if not res: - # A breakpoint has stopped the execution - return DebugBreakpointTerminate(res) - - if isinstance(res, DebugBreakpointSoft): - print("Breakpoint reached @0x%08x" % res.addr) - elif isinstance(res, ExceptionHandle): - if res == ExceptionHandle.memoryBreakpoint(): - print("Memory breakpoint reached!") - - # Remove flag - except_flag = self.myjit.vm.get_exception() - self.myjit.vm.set_exception(except_flag ^ res.except_flag) - - else: - raise NotImplementedError("Unknown Except") - else: - raise NotImplementedError("type res") - - # Repropagate res - return res - - def step(self): - "Step in jit" - - self.myjit.jit.set_options(jit_maxline=1) - # Reset all jitted blocks - self.myjit.jit.clear_jitted_blocks() - - res = self.myjit.continue_run(step=True) - self.handle_exception(res) - - self.myjit.jit.set_options(jit_maxline=50) - self.on_step() - - return res - - def run(self): - status = self.myjit.continue_run() - return self.handle_exception(status) - - def get_mem(self, addr, size=0xF): - "hexdump @addr, size" - - hexdump(self.myjit.vm.get_mem(addr, size)) - - def get_mem_raw(self, addr, size=0xF): - "hexdump @addr, size" - return self.myjit.vm.get_mem(addr, size) - - def watch_mem(self, addr, size=0xF): - self.mem_watched.append((addr, size)) - - def on_step(self): - for addr, size in self.mem_watched: - print("@0x%08x:" % addr) - self.get_mem(addr, size) - - def get_reg_value(self, reg_name): - return getattr(self.myjit.cpu, reg_name) - - def set_reg_value(self, reg_name, value): - - # Handle PC case - if reg_name == self.myjit.ir_arch.pc.name: - self.init_run(value) - - setattr(self.myjit.cpu, reg_name, value) - - def get_gpreg_all(self): - "Return general purposes registers" - return self.myjit.cpu.get_gpreg() - - -class DebugCmd(cmd.Cmd, object): - - "CommandLineInterpreter for Debugguer instance" - - color_g = '\033[92m' - color_e = '\033[0m' - color_b = '\033[94m' - color_r = '\033[91m' - - intro = color_g + "=== Miasm2 Debugging shell ===\nIf you need help, " - intro += "type 'help' or '?'" + color_e - prompt = color_b + "$> " + color_e - - def __init__(self, dbg): - "dbg : Debugguer" - self.dbg = dbg - super(DebugCmd, self).__init__() - - # Debug methods - - def print_breakpoints(self): - bp_list = self.dbg.bp_list - if len(bp_list) == 0: - print("No breakpoints.") - else: - for i, b in enumerate(bp_list): - print("%d\t0x%08x" % (i, b.addr)) - - def print_watchmems(self): - watch_list = self.dbg.mem_watched - if len(watch_list) == 0: - print("No memory watchpoints.") - else: - print("Num\tAddress \tSize") - for i, w in enumerate(watch_list): - addr, size = w - print("%d\t0x%08x\t0x%08x" % (i, addr, size)) - - def print_registers(self): - regs = self.dbg.get_gpreg_all() - - # Display settings - title1 = "Registers" - title2 = "Values" - max_name_len = max(map(len, list(regs) + [title1])) - - # Print value table - s = "%s%s | %s" % ( - title1, " " * (max_name_len - len(title1)), title2) - print(s) - print("-" * len(s)) - for name, value in sorted(viewitems(regs), key=lambda x: x[0]): - print( - "%s%s | %s" % ( - name, - " " * (max_name_len - len(name)), - hex(value).replace("L", "") - ) - ) - - def add_breakpoints(self, bp_addr): - for addr in bp_addr: - addr = int(addr, 0) - - good = True - for i, dbg_obj in enumerate(self.dbg.bp_list): - if dbg_obj.addr == addr: - good = False - break - if good is False: - print("Breakpoint 0x%08x already set (%d)" % (addr, i)) - else: - l = len(self.dbg.bp_list) - self.dbg.add_breakpoint(addr) - print("Breakpoint 0x%08x successfully added ! (%d)" % (addr, l)) - - display_mode = { - "mn": None, - "regs": None, - "newbloc": None - } - - def update_display_mode(self): - self.display_mode = { - "mn": self.dbg.myjit.jit.log_mn, - "regs": self.dbg.myjit.jit.log_regs, - "newbloc": self.dbg.myjit.jit.log_newbloc - } - - # Command line methods - def print_warning(self, s): - print(self.color_r + s + self.color_e) - - def onecmd(self, line): - cmd_translate = { - "h": "help", - "q": "exit", - "e": "exit", - "!": "exec", - "r": "run", - "i": "info", - "b": "breakpoint", - "s": "step", - "d": "dump" - } - - if len(line) >= 2 and \ - line[1] == " " and \ - line[:1] in cmd_translate: - line = cmd_translate[line[:1]] + line[1:] - - if len(line) == 1 and line in cmd_translate: - line = cmd_translate[line] - - r = super(DebugCmd, self).onecmd(line) - return r - - def can_exit(self): - return True - - def do_display(self, arg): - if arg == "": - self.help_display() - return - - args = arg.split(" ") - if args[-1].lower() not in ["on", "off"]: - self.print_warning("/!\ %s not in 'on' / 'off'" % args[-1]) - return - mode = args[-1].lower() == "on" - d = {} - for a in args[:-1]: - d[a] = mode - self.dbg.active_trace(**d) - self.update_display_mode() - - def help_display(self): - print("Enable/Disable tracing.") - print("Usage: display ... on|off") - print("Available modes are:") - for k in self.display_mode: - print("\t%s" % k) - print("Use 'info display' to get current values") - - def do_watchmem(self, arg): - if arg == "": - self.help_watchmem() - return - - args = arg.split(" ") - if len(args) >= 2: - size = int(args[1], 0) - else: - size = 0xF - - addr = int(args[0], 0) - - self.dbg.watch_mem(addr, size) - - def help_watchmem(self): - print("Add a memory watcher.") - print("Usage: watchmem [size]") - print("Use 'info watchmem' to get current memory watchers") - - def do_info(self, arg): - av_info = [ - "registers", - "display", - "breakpoints", - "watchmem" - ] - - if arg == "": - print("'info' must be followed by the name of an info command.") - print("List of info subcommands:") - for k in av_info: - print("\t%s" % k) - - if arg.startswith("b"): - # Breakpoint - self.print_breakpoints() - - if arg.startswith("d"): - # Display - self.update_display_mode() - for k, v in viewitems(self.display_mode): - print("%s\t\t%s" % (k, v)) - - if arg.startswith("w"): - # Watchmem - self.print_watchmems() - - if arg.startswith("r"): - # Registers - self.print_registers() - - def help_info(self): - print("Generic command for showing things about the program being") - print("debugged. Use 'info' without arguments to get the list of") - print("available subcommands.") - - def do_breakpoint(self, arg): - if arg == "": - self.help_breakpoint() - else: - addrs = arg.split(" ") - self.add_breakpoints(addrs) - - def help_breakpoint(self): - print("Add breakpoints to argument addresses.") - print("Example:") - print("\tbreakpoint 0x11223344") - print("\tbreakpoint 1122 0xabcd") - - def do_step(self, arg): - if arg == "": - nb = 1 - else: - nb = int(arg) - for _ in range(nb): - self.dbg.step() - - def help_step(self): - print("Step program until it reaches a different source line.") - print("Argument N means do this N times (or till program stops") - print("for another reason).") - - def do_dump(self, arg): - if arg == "": - self.help_dump() - else: - args = arg.split(" ") - if len(args) >= 2: - size = int(args[1], 0) - else: - size = 0xF - addr = int(args[0], 0) - - self.dbg.get_mem(addr, size) - - def help_dump(self): - print("Dump [size]. Dump size bytes at addr.") - - def do_run(self, _): - self.dbg.run() - - def help_run(self): - print("Launch or continue the current program") - - def do_exit(self, _): - return True - - def do_exec(self, line): - try: - print(eval(line)) - except Exception as error: - print("*** Error: %s" % error) - - def help_exec(self): - print("Exec a python command.") - print("You can also use '!' shortcut.") - - def help_exit(self): - print("Exit the interpreter.") - print("You can also use the Ctrl-D shortcut.") - - def help_help(self): - print("Print help") - - def postloop(self): - print('\nGoodbye !') - super(DebugCmd, self).postloop() - - do_EOF = do_exit - help_EOF = help_exit diff --git a/miasm2/analysis/depgraph.py b/miasm2/analysis/depgraph.py deleted file mode 100644 index 4bfae67f..00000000 --- a/miasm2/analysis/depgraph.py +++ /dev/null @@ -1,651 +0,0 @@ -"""Provide dependency graph""" - -from functools import total_ordering - -from future.utils import viewitems - -from miasm2.expression.expression import ExprInt, ExprLoc, ExprAssign -from miasm2.core.graph import DiGraph -from miasm2.core.locationdb import LocationDB -from miasm2.expression.simplifications import expr_simp_explicit -from miasm2.ir.symbexec import SymbolicExecutionEngine -from miasm2.ir.ir import IRBlock, AssignBlock -from miasm2.ir.translators import Translator -from miasm2.expression.expression_helper import possible_values - -try: - import z3 -except ImportError: - pass - -@total_ordering -class DependencyNode(object): - - """Node elements of a DependencyGraph - - A dependency node stands for the dependency on the @element at line number - @line_nb in the IRblock named @loc_key, *before* the evaluation of this - line. - """ - - __slots__ = ["_loc_key", "_element", "_line_nb", "_hash"] - - def __init__(self, loc_key, element, line_nb): - """Create a dependency node with: - @loc_key: LocKey instance - @element: Expr instance - @line_nb: int - """ - self._loc_key = loc_key - self._element = element - self._line_nb = line_nb - self._hash = hash( - (self._loc_key, self._element, self._line_nb)) - - def __hash__(self): - """Returns a hash of @self to uniquely identify @self""" - return self._hash - - def __eq__(self, depnode): - """Returns True if @self and @depnode are equals.""" - if not isinstance(depnode, self.__class__): - return False - return (self.loc_key == depnode.loc_key and - self.element == depnode.element and - self.line_nb == depnode.line_nb) - - def __ne__(self, depnode): - # required Python 2.7.14 - return not self == depnode - - def __lt__(self, node): - """Compares @self with @node.""" - if not isinstance(node, self.__class__): - return NotImplemented - - return ((self.loc_key, self.element, self.line_nb) < - (node.loc_key, node.element, node.line_nb)) - - def __str__(self): - """Returns a string representation of DependencyNode""" - return "<%s %s %s %s>" % (self.__class__.__name__, - self.loc_key, self.element, - self.line_nb) - - def __repr__(self): - """Returns a string representation of DependencyNode""" - return self.__str__() - - @property - def loc_key(self): - "Name of the current IRBlock" - return self._loc_key - - @property - def element(self): - "Current tracked Expr" - return self._element - - @property - def line_nb(self): - "Line in the current IRBlock" - return self._line_nb - - -class DependencyState(object): - - """ - Store intermediate depnodes states during dependencygraph analysis - """ - - def __init__(self, loc_key, pending, line_nb=None): - self.loc_key = loc_key - self.history = [loc_key] - self.pending = {k: set(v) for k, v in viewitems(pending)} - self.line_nb = line_nb - self.links = set() - - # Init lazy elements - self._graph = None - - def __repr__(self): - return "" % ( - self.loc_key, - self.pending, - self.links - ) - - def extend(self, loc_key): - """Return a copy of itself, with itself in history - @loc_key: LocKey instance for the new DependencyState's loc_key - """ - new_state = self.__class__(loc_key, self.pending) - new_state.links = set(self.links) - new_state.history = self.history + [loc_key] - return new_state - - def get_done_state(self): - """Returns immutable object representing current state""" - return (self.loc_key, frozenset(self.links)) - - def as_graph(self): - """Generates a Digraph of dependencies""" - graph = DiGraph() - for node_a, node_b in self.links: - if not node_b: - graph.add_node(node_a) - else: - graph.add_edge(node_a, node_b) - for parent, sons in viewitems(self.pending): - for son in sons: - graph.add_edge(parent, son) - return graph - - @property - def graph(self): - """Returns a DiGraph instance representing the DependencyGraph""" - if self._graph is None: - self._graph = self.as_graph() - return self._graph - - def remove_pendings(self, nodes): - """Remove resolved @nodes""" - for node in nodes: - del self.pending[node] - - def add_pendings(self, future_pending): - """Add @future_pending to the state""" - for node, depnodes in viewitems(future_pending): - if node not in self.pending: - self.pending[node] = depnodes - else: - self.pending[node].update(depnodes) - - def link_element(self, element, line_nb): - """Link element to its dependencies - @element: the element to link - @line_nb: the element's line - """ - - depnode = DependencyNode(self.loc_key, element, line_nb) - if not self.pending[element]: - # Create start node - self.links.add((depnode, None)) - else: - # Link element to its known dependencies - for node_son in self.pending[element]: - self.links.add((depnode, node_son)) - - def link_dependencies(self, element, line_nb, dependencies, - future_pending): - """Link unfollowed dependencies and create remaining pending elements. - @element: the element to link - @line_nb: the element's line - @dependencies: the element's dependencies - @future_pending: the future dependencies - """ - - depnode = DependencyNode(self.loc_key, element, line_nb) - - # Update pending, add link to unfollowed nodes - for dependency in dependencies: - if not dependency.follow: - # Add non followed dependencies to the dependency graph - parent = DependencyNode( - self.loc_key, dependency.element, line_nb) - self.links.add((parent, depnode)) - continue - # Create future pending between new dependency and the current - # element - future_pending.setdefault(dependency.element, set()).add(depnode) - - -class DependencyResult(DependencyState): - - """Container and methods for DependencyGraph results""" - - def __init__(self, ircfg, initial_state, state, inputs): - - super(DependencyResult, self).__init__(state.loc_key, state.pending) - self.initial_state = initial_state - self.history = state.history - self.pending = state.pending - self.line_nb = state.line_nb - self.inputs = inputs - self.links = state.links - self._ircfg = ircfg - - # Init lazy elements - self._has_loop = None - - @property - def unresolved(self): - """Set of nodes whose dependencies weren't found""" - return set(element for element in self.pending - if element != self._ircfg.IRDst) - - @property - def relevant_nodes(self): - """Set of nodes directly and indirectly influencing inputs""" - output = set() - for node_a, node_b in self.links: - output.add(node_a) - if node_b is not None: - output.add(node_b) - return output - - @property - def relevant_loc_keys(self): - """List of loc_keys containing nodes influencing inputs. - The history order is preserved.""" - # Get used loc_keys - used_loc_keys = set(depnode.loc_key for depnode in self.relevant_nodes) - - # Keep history order - output = [] - for loc_key in self.history: - if loc_key in used_loc_keys: - output.append(loc_key) - - return output - - @property - def has_loop(self): - """True iff there is at least one data dependencies cycle (regarding - the associated depgraph)""" - if self._has_loop is None: - self._has_loop = self.graph.has_loop() - return self._has_loop - - def irblock_slice(self, irb, max_line=None): - """Slice of the dependency nodes on the irblock @irb - @irb: irbloc instance - """ - - assignblks = [] - line2elements = {} - for depnode in self.relevant_nodes: - if depnode.loc_key != irb.loc_key: - continue - line2elements.setdefault(depnode.line_nb, - set()).add(depnode.element) - - for line_nb, elements in sorted(viewitems(line2elements)): - if max_line is not None and line_nb >= max_line: - break - assignmnts = {} - for element in elements: - if element in irb[line_nb]: - # constants, loc_key, ... are not in destination - assignmnts[element] = irb[line_nb][element] - assignblks.append(AssignBlock(assignmnts)) - - return IRBlock(irb.loc_key, assignblks) - - def emul(self, ir_arch, ctx=None, step=False): - """Symbolic execution of relevant nodes according to the history - Return the values of inputs nodes' elements - @ir_arch: IntermediateRepresentation instance - @ctx: (optional) Initial context as dictionary - @step: (optional) Verbose execution - Warning: The emulation is not sound if the inputs nodes depend on loop - variant. - """ - # Init - ctx_init = {} - if ctx is not None: - ctx_init.update(ctx) - assignblks = [] - - # Build a single assignment block according to history - last_index = len(self.relevant_loc_keys) - for index, loc_key in enumerate(reversed(self.relevant_loc_keys), 1): - if index == last_index and loc_key == self.initial_state.loc_key: - line_nb = self.initial_state.line_nb - else: - line_nb = None - assignblks += self.irblock_slice(self._ircfg.blocks[loc_key], - line_nb).assignblks - - # Eval the block - loc_db = LocationDB() - temp_loc = loc_db.get_or_create_name_location("Temp") - symb_exec = SymbolicExecutionEngine(ir_arch, ctx_init) - symb_exec.eval_updt_irblock(IRBlock(temp_loc, assignblks), step=step) - - # Return only inputs values (others could be wrongs) - return {element: symb_exec.symbols[element] - for element in self.inputs} - - -class DependencyResultImplicit(DependencyResult): - - """Stand for a result of a DependencyGraph with implicit option - - Provide path constraints using the z3 solver""" - # Z3 Solver instance - _solver = None - - unsat_expr = ExprAssign(ExprInt(0, 1), ExprInt(1, 1)) - - def _gen_path_constraints(self, translator, expr, expected): - """Generate path constraint from @expr. Handle special case with - generated loc_keys - """ - out = [] - expected = self._ircfg.loc_db.canonize_to_exprloc(expected) - expected_is_loc_key = expected.is_loc() - for consval in possible_values(expr): - value = self._ircfg.loc_db.canonize_to_exprloc(consval.value) - if expected_is_loc_key and value != expected: - continue - if not expected_is_loc_key and value.is_loc_key(): - continue - - conds = z3.And(*[translator.from_expr(cond.to_constraint()) - for cond in consval.constraints]) - if expected != value: - conds = z3.And( - conds, - translator.from_expr( - ExprAssign(value, - expected)) - ) - out.append(conds) - - if out: - conds = z3.Or(*out) - else: - # Ex: expr: lblgen1, expected: 0x1234 - # -> Avoid unconsistent solution lblgen1 = 0x1234 - conds = translator.from_expr(self.unsat_expr) - return conds - - def emul(self, ir_arch, ctx=None, step=False): - # Init - ctx_init = {} - if ctx is not None: - ctx_init.update(ctx) - solver = z3.Solver() - symb_exec = SymbolicExecutionEngine(ir_arch, ctx_init) - history = self.history[::-1] - history_size = len(history) - translator = Translator.to_language("z3") - size = self._ircfg.IRDst.size - - for hist_nb, loc_key in enumerate(history, 1): - if hist_nb == history_size and loc_key == self.initial_state.loc_key: - line_nb = self.initial_state.line_nb - else: - line_nb = None - irb = self.irblock_slice(self._ircfg.blocks[loc_key], line_nb) - - # Emul the block and get back destination - dst = symb_exec.eval_updt_irblock(irb, step=step) - - # Add constraint - if hist_nb < history_size: - next_loc_key = history[hist_nb] - expected = symb_exec.eval_expr(ExprLoc(next_loc_key, size)) - solver.add(self._gen_path_constraints(translator, dst, expected)) - # Save the solver - self._solver = solver - - # Return only inputs values (others could be wrongs) - return { - element: symb_exec.eval_expr(element) - for element in self.inputs - } - - @property - def is_satisfiable(self): - """Return True iff the solution path admits at least one solution - PRE: 'emul' - """ - return self._solver.check() == z3.sat - - @property - def constraints(self): - """If satisfiable, return a valid solution as a Z3 Model instance""" - if not self.is_satisfiable: - raise ValueError("Unsatisfiable") - return self._solver.model() - - -class FollowExpr(object): - - "Stand for an element (expression, depnode, ...) to follow or not" - __slots__ = ["follow", "element"] - - def __init__(self, follow, element): - self.follow = follow - self.element = element - - def __repr__(self): - return '%s(%r, %r)' % (self.__class__.__name__, self.follow, self.element) - - @staticmethod - def to_depnodes(follow_exprs, loc_key, line): - """Build a set of FollowExpr(DependencyNode) from the @follow_exprs set - of FollowExpr - @follow_exprs: set of FollowExpr - @loc_key: LocKey instance - @line: integer - """ - dependencies = set() - for follow_expr in follow_exprs: - dependencies.add(FollowExpr(follow_expr.follow, - DependencyNode(loc_key, - follow_expr.element, - line))) - return dependencies - - @staticmethod - def extract_depnodes(follow_exprs, only_follow=False): - """Extract depnodes from a set of FollowExpr(Depnodes) - @only_follow: (optional) extract only elements to follow""" - return set(follow_expr.element - for follow_expr in follow_exprs - if not(only_follow) or follow_expr.follow) - - -class DependencyGraph(object): - - """Implementation of a dependency graph - - A dependency graph contains DependencyNode as nodes. The oriented edges - stand for a dependency. - The dependency graph is made of the lines of a group of IRblock - *explicitly* or *implicitly* involved in the equation of given element. - """ - - def __init__(self, ircfg, - implicit=False, apply_simp=True, follow_mem=True, - follow_call=True): - """Create a DependencyGraph linked to @ircfg - - @ircfg: IRCFG instance - @implicit: (optional) Track IRDst for each block in the resulting path - - Following arguments define filters used to generate dependencies - @apply_simp: (optional) Apply expr_simp_explicit - @follow_mem: (optional) Track memory syntactically - @follow_call: (optional) Track through "call" - """ - # Init - self._ircfg = ircfg - self._implicit = implicit - - # Create callback filters. The order is relevant. - self._cb_follow = [] - if apply_simp: - self._cb_follow.append(self._follow_simp_expr) - self._cb_follow.append(lambda exprs: self._follow_exprs(exprs, - follow_mem, - follow_call)) - self._cb_follow.append(self._follow_no_loc_key) - - @staticmethod - def _follow_simp_expr(exprs): - """Simplify expression so avoid tracking useless elements, - as: XOR EAX, EAX - """ - follow = set() - for expr in exprs: - follow.add(expr_simp_explicit(expr)) - return follow, set() - - @staticmethod - def get_expr(expr, follow, nofollow): - """Update @follow/@nofollow according to insteresting nodes - Returns same expression (non modifier visitor). - - @expr: expression to handle - @follow: set of nodes to follow - @nofollow: set of nodes not to follow - """ - if expr.is_id(): - follow.add(expr) - elif expr.is_int(): - nofollow.add(expr) - elif expr.is_mem(): - follow.add(expr) - return expr - - @staticmethod - def follow_expr(expr, _, nofollow, follow_mem=False, follow_call=False): - """Returns True if we must visit sub expressions. - @expr: expression to browse - @follow: set of nodes to follow - @nofollow: set of nodes not to follow - @follow_mem: force the visit of memory sub expressions - @follow_call: force the visit of call sub expressions - """ - if not follow_mem and expr.is_mem(): - nofollow.add(expr) - return False - if not follow_call and expr.is_function_call(): - nofollow.add(expr) - return False - return True - - @classmethod - def _follow_exprs(cls, exprs, follow_mem=False, follow_call=False): - """Extracts subnodes from exprs and returns followed/non followed - expressions according to @follow_mem/@follow_call - - """ - follow, nofollow = set(), set() - for expr in exprs: - expr.visit(lambda x: cls.get_expr(x, follow, nofollow), - lambda x: cls.follow_expr(x, follow, nofollow, - follow_mem, follow_call)) - return follow, nofollow - - @staticmethod - def _follow_no_loc_key(exprs): - """Do not follow loc_keys""" - follow = set() - for expr in exprs: - if expr.is_int() or expr.is_loc(): - continue - follow.add(expr) - - return follow, set() - - def _follow_apply_cb(self, expr): - """Apply callback functions to @expr - @expr : FollowExpr instance""" - follow = set([expr]) - nofollow = set() - - for callback in self._cb_follow: - follow, nofollow_tmp = callback(follow) - nofollow.update(nofollow_tmp) - - out = set(FollowExpr(True, expr) for expr in follow) - out.update(set(FollowExpr(False, expr) for expr in nofollow)) - return out - - def _track_exprs(self, state, assignblk, line_nb): - """Track pending expression in an assignblock""" - future_pending = {} - node_resolved = set() - for dst, src in viewitems(assignblk): - # Only track pending - if dst not in state.pending: - continue - # Track IRDst in implicit mode only - if dst == self._ircfg.IRDst and not self._implicit: - continue - assert dst not in node_resolved - node_resolved.add(dst) - dependencies = self._follow_apply_cb(src) - - state.link_element(dst, line_nb) - state.link_dependencies(dst, line_nb, - dependencies, future_pending) - - # Update pending nodes - state.remove_pendings(node_resolved) - state.add_pendings(future_pending) - - def _compute_intrablock(self, state): - """Follow dependencies tracked in @state in the current irbloc - @state: instance of DependencyState""" - - irb = self._ircfg.blocks[state.loc_key] - line_nb = len(irb) if state.line_nb is None else state.line_nb - - for cur_line_nb, assignblk in reversed(list(enumerate(irb[:line_nb]))): - self._track_exprs(state, assignblk, cur_line_nb) - - def get(self, loc_key, elements, line_nb, heads): - """Compute the dependencies of @elements at line number @line_nb in - the block named @loc_key in the current IRCFG, before the execution of - this line. Dependency check stop if one of @heads is reached - @loc_key: LocKey instance - @element: set of Expr instances - @line_nb: int - @heads: set of LocKey instances - Return an iterator on DiGraph(DependencyNode) - """ - # Init the algorithm - inputs = {element: set() for element in elements} - initial_state = DependencyState(loc_key, inputs, line_nb) - todo = set([initial_state]) - done = set() - dpResultcls = DependencyResultImplicit if self._implicit else DependencyResult - - while todo: - state = todo.pop() - self._compute_intrablock(state) - done_state = state.get_done_state() - if done_state in done: - continue - done.add(done_state) - if (not state.pending or - state.loc_key in heads or - not self._ircfg.predecessors(state.loc_key)): - yield dpResultcls(self._ircfg, initial_state, state, elements) - if not state.pending: - continue - - if self._implicit: - # Force IRDst to be tracked, except in the input block - state.pending[self._ircfg.IRDst] = set() - - # Propagate state to parents - for pred in self._ircfg.predecessors_iter(state.loc_key): - todo.add(state.extend(pred)) - - def get_from_depnodes(self, depnodes, heads): - """Alias for the get() method. Use the attributes of @depnodes as - argument. - PRE: Loc_Keys and lines of depnodes have to be equals - @depnodes: set of DependencyNode instances - @heads: set of LocKey instances - """ - lead = list(depnodes)[0] - elements = set(depnode.element for depnode in depnodes) - return self.get(lead.loc_key, elements, lead.line_nb, heads) diff --git a/miasm2/analysis/disasm_cb.py b/miasm2/analysis/disasm_cb.py deleted file mode 100644 index 36e120b6..00000000 --- a/miasm2/analysis/disasm_cb.py +++ /dev/null @@ -1,128 +0,0 @@ -#-*- coding:utf-8 -*- - -from __future__ import print_function - -from future.utils import viewvalues - -from miasm2.expression.expression import ExprInt, ExprId, ExprMem, match_expr -from miasm2.expression.simplifications import expr_simp -from miasm2.core.asmblock import AsmConstraintNext, AsmConstraintTo -from miasm2.core.locationdb import LocationDB -from miasm2.core.utils import upck32 - - -def get_ira(mnemo, attrib): - arch = mnemo.name, attrib - if arch == ("arm", "arm"): - from miasm2.arch.arm.ira import ir_a_arm_base as ira - elif arch == ("x86", 32): - from miasm2.arch.x86.ira import ir_a_x86_32 as ira - elif arch == ("x86", 64): - from miasm2.arch.x86.ira import ir_a_x86_64 as ira - else: - raise ValueError('unknown architecture: %s' % mnemo.name) - return ira - - -def arm_guess_subcall( - mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, loc_db): - ira = get_ira(mnemo, attrib) - - sp = LocationDB() - ir_arch = ira(sp) - ircfg = ira.new_ircfg() - print('###') - print(cur_bloc) - ir_arch.add_asmblock_to_ircfg(cur_bloc, ircfg) - - to_add = set() - for irblock in viewvalues(ircfg.blocks): - pc_val = None - lr_val = None - for exprs in irblock: - for e in exprs: - if e.dst == ir_arch.pc: - pc_val = e.src - if e.dst == mnemo.regs.LR: - lr_val = e.src - if pc_val is None or lr_val is None: - continue - if not isinstance(lr_val, ExprInt): - continue - - l = cur_bloc.lines[-1] - if lr_val.arg != l.offset + l.l: - continue - l = loc_db.get_or_create_offset_location(int(lr_val)) - c = AsmConstraintNext(l) - - to_add.add(c) - offsets_to_dis.add(int(lr_val)) - - for c in to_add: - cur_bloc.addto(c) - - -def arm_guess_jump_table( - mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, loc_db): - ira = get_ira(mnemo, attrib) - - jra = ExprId('jra') - jrb = ExprId('jrb') - - sp = LocationDB() - ir_arch = ira(sp) - ircfg = ira.new_ircfg() - ir_arch.add_asmblock_to_ircfg(cur_bloc, ircfg) - - for irblock in viewvalues(ircfg.blocks): - pc_val = None - for exprs in irblock: - for e in exprs: - if e.dst == ir_arch.pc: - pc_val = e.src - if pc_val is None: - continue - if not isinstance(pc_val, ExprMem): - continue - assert(pc_val.size == 32) - print(pc_val) - ad = pc_val.arg - ad = expr_simp(ad) - print(ad) - res = match_expr(ad, jra + jrb, set([jra, jrb])) - if res is False: - raise NotImplementedError('not fully functional') - print(res) - if not isinstance(res[jrb], ExprInt): - raise NotImplementedError('not fully functional') - base_ad = int(res[jrb]) - print(base_ad) - addrs = set() - i = -1 - max_table_entry = 10000 - max_diff_addr = 0x100000 # heuristic - while i < max_table_entry: - i += 1 - try: - ad = upck32(pool_bin.getbytes(base_ad + 4 * i, 4)) - except: - break - if abs(ad - base_ad) > max_diff_addr: - break - addrs.add(ad) - print([hex(x) for x in addrs]) - - for ad in addrs: - offsets_to_dis.add(ad) - l = loc_db.get_or_create_offset_location(ad) - c = AsmConstraintTo(l) - cur_bloc.addto(c) - -guess_funcs = [] - - -def guess_multi_cb( - mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, loc_db): - for f in guess_funcs: - f(mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, loc_db) diff --git a/miasm2/analysis/dse.py b/miasm2/analysis/dse.py deleted file mode 100644 index fee85984..00000000 --- a/miasm2/analysis/dse.py +++ /dev/null @@ -1,708 +0,0 @@ -"""Dynamic symbolic execution module. - -Offers a way to have a symbolic execution along a concrete one. -Basically, this is done through DSEEngine class, with scheme: - -dse = DSEEngine(Machine("x86_32")) -dse.attach(jitter) - -The DSE state can be updated through: - - - .update_state_from_concrete: update the values from the CPU, so the symbolic - execution will be completely concrete from this point (until changes) - - .update_state: inject information, for instance RAX = symbolic_RAX - - .symbolize_memory: symbolize (using .memory_to_expr) memory areas (ie, - reading from an address in one of these areas yield a symbol) - -The DSE run can be instrumented through: - - .add_handler: register an handler, modifying the state instead of the current - execution. Can be used for stubbing external API - - .add_lib_handler: register handlers for libraries - - .add_instrumentation: register an handler, modifying the state but continuing - the current execution. Can be used for logging facilities - - -On branch, if the decision is symbolic, one can also collect "path constraints" -and inverse them to produce new inputs potentially reaching new paths. - -Basically, this is done through DSEPathConstraint. In order to produce a new -solution, one can extend this class, and override 'handle_solution' to produce a -solution which fit its needs. It could avoid computing new solution by -overriding 'produce_solution'. - -If one is only interested in constraints associated to its path, the option -"produce_solution" should be set to False, to speed up emulation. -The constraints are accumulated in the .z3_cur z3.Solver object. - -Here are a few remainings TODO: - - handle endianness in check_state / atomic read: currently, but this is also - true for others Miasm2 symbolic engines, the endianness is not take in - account, and assumed to be Little Endian - - - too many memory dependencies in constraint tracking: in order to let z3 find - new solution, it does need information on memory values (for instance, a - lookup in a table with a symbolic index). The estimated possible involved - memory location could be too large to pass to the solver (threshold named - MAX_MEMORY_INJECT). One possible solution, not yet implemented, is to call - the solver for reducing the possible values thanks to its accumulated - constraints. -""" -from builtins import range -from collections import namedtuple - -try: - import z3 -except ImportError: - z3 = None - -from future.utils import viewitems - -from miasm2.core.utils import encode_hex, force_bytes -from miasm2.expression.expression import ExprMem, ExprInt, ExprCompose, \ - ExprAssign, ExprId, ExprLoc, LocKey -from miasm2.core.bin_stream import bin_stream_vm -from miasm2.jitter.emulatedsymbexec import EmulatedSymbExec -from miasm2.expression.expression_helper import possible_values -from miasm2.ir.translators import Translator -from miasm2.analysis.expression_range import expr_range -from miasm2.analysis.modularintervals import ModularIntervals -from miasm2.core.locationdb import LocationDB - -DriftInfo = namedtuple("DriftInfo", ["symbol", "computed", "expected"]) - -class DriftException(Exception): - """Raised when the emulation drift from the reference engine""" - - def __init__(self, info): - super(DriftException, self).__init__() - self.info = info - - def __str__(self): - if len(self.info) == 1: - return "Drift of %s: %s instead of %s" % ( - self.info[0].symbol, - self.info[0].computed, - self.info[0].expected, - ) - else: - return "Drift of:\n\t" + "\n\t".join("%s: %s instead of %s" % ( - dinfo.symbol, - dinfo.computed, - dinfo.expected) - for dinfo in self.info) - - -class ESETrackModif(EmulatedSymbExec): - """Extension of EmulatedSymbExec to be used by DSE engines - - Add the tracking of modified expressions, and the ability to symbolize - memory areas - """ - - def __init__(self, *args, **kwargs): - super(ESETrackModif, self).__init__(*args, **kwargs) - self.modified_expr = set() # Expr modified since the last reset - self.dse_memory_range = [] # List/Intervals of memory addresses to - # symbolize - self.dse_memory_to_expr = None # function(addr) -> Expr used to - # symbolize - - def mem_read(self, expr_mem): - if not expr_mem.ptr.is_int(): - return expr_mem - dst_addr = int(expr_mem.ptr) - - # Split access in atomic accesses - out = [] - for addr in range(dst_addr, dst_addr + expr_mem.size // 8): - if addr in self.dse_memory_range: - # Symbolize memory access - out.append(self.dse_memory_to_expr(addr)) - continue - atomic_access = ExprMem(ExprInt(addr, expr_mem.ptr.size), 8) - if atomic_access in self.symbols: - out.append( super(EmulatedSymbExec, self).mem_read(atomic_access)) - else: - # Get concrete value - atomic_access = ExprMem(ExprInt(addr, expr_mem.ptr.size), 8) - out.append(super(ESETrackModif, self).mem_read(atomic_access)) - - if len(out) == 1: - # Trivial case (optimization) - return out[0] - - # Simplify for constant merging (ex: {ExprInt(1, 8), ExprInt(2, 8)}) - return self.expr_simp(ExprCompose(*out)) - - def mem_write(self, expr, data): - # Call Symbolic mem_write (avoid side effects on vm) - return super(EmulatedSymbExec, self).mem_write(expr, data) - - def reset_modified(self): - """Reset modified expression tracker""" - self.modified_expr.clear() - - def apply_change(self, dst, src): - super(ESETrackModif, self).apply_change(dst, src) - self.modified_expr.add(dst) - - -class ESENoVMSideEffects(EmulatedSymbExec): - """ - Do EmulatedSymbExec without modifying memory - """ - def mem_write(self, expr, data): - return super(EmulatedSymbExec, self).mem_write(expr, data) - - -class DSEEngine(object): - """Dynamic Symbolic Execution Engine - - This class aims to be overridden for each specific purpose - """ - SYMB_ENGINE = ESETrackModif - - def __init__(self, machine): - self.machine = machine - self.loc_db = LocationDB() - self.handler = {} # addr -> callback(DSEEngine instance) - self.instrumentation = {} # addr -> callback(DSEEngine instance) - self.addr_to_cacheblocks = {} # addr -> {label -> IRBlock} - self.ir_arch = self.machine.ir(loc_db=self.loc_db) # corresponding IR - self.ircfg = self.ir_arch.new_ircfg() # corresponding IR - - # Defined after attachment - self.jitter = None # Jitload (concrete execution) - self.symb = None # SymbolicExecutionEngine - self.symb_concrete = None # Concrete SymbExec for path desambiguisation - self.mdis = None # DisasmEngine - - def prepare(self): - """Prepare the environment for attachment with a jitter""" - # Disassembler - self.mdis = self.machine.dis_engine(bin_stream_vm(self.jitter.vm), - lines_wd=1, - loc_db=self.loc_db) - - # Symbexec engine - ## Prepare symbexec engines - self.symb = self.SYMB_ENGINE(self.jitter.cpu, self.jitter.vm, - self.ir_arch, {}) - self.symb.enable_emulated_simplifications() - self.symb_concrete = ESENoVMSideEffects( - self.jitter.cpu, self.jitter.vm, - self.ir_arch, {} - ) - - ## Update registers value - self.symb.symbols[self.ir_arch.IRDst] = ExprInt( - getattr(self.jitter.cpu, self.ir_arch.pc.name), - self.ir_arch.IRDst.size - ) - - # Activate callback on each instr - self.jitter.jit.set_options(max_exec_per_call=1, jit_maxline=1) - self.jitter.exec_cb = self.callback - - # Clean jit cache to avoid multi-line basic blocks already jitted - self.jitter.jit.clear_jitted_blocks() - - def attach(self, emulator): - """Attach the DSE to @emulator - @emulator: jitload (or API equivalent) instance - - To attach *DURING A BREAKPOINT*, one may consider using the following snippet: - - def breakpoint(self, jitter): - ... - dse.attach(jitter) - dse.update... - ... - # Additional call to the exec callback is necessary, as breakpoints are - # honored AFTER exec callback - jitter.exec_cb(jitter) - - return True - - Without it, one may encounteer a DriftException error due to a - "desynchronization" between jitter and dse states. Indeed, on 'handle' - call, the jitter must be one instruction AFTER the dse. - """ - self.jitter = emulator - self.prepare() - - def handle(self, cur_addr): - r"""Handle destination - @cur_addr: Expr of the next address in concrete execution - /!\ cur_addr may be a loc_key - - In this method, self.symb is in the "just before branching" state - """ - pass - - def add_handler(self, addr, callback): - """Add a @callback for address @addr before any state update. - The state IS NOT updated after returning from the callback - @addr: int - @callback: func(dse instance)""" - self.handler[addr] = callback - - def add_lib_handler(self, libimp, namespace): - """Add search for handler based on a @libimp libimp instance - - Known functions will be looked by {name}_symb in the @namespace - """ - namespace = dict( - (force_bytes(name), func) for name, func in viewitems(namespace) - ) - - # lambda cannot contain statement - def default_func(dse): - fname = b"%s_symb" % libimp.fad2cname[dse.jitter.pc] - raise RuntimeError("Symbolic stub '%s' not found" % fname) - - for addr, fname in viewitems(libimp.fad2cname): - fname = force_bytes(fname) - fname = b"%s_symb" % fname - func = namespace.get(fname, None) - if func is not None: - self.add_handler(addr, func) - else: - self.add_handler(addr, default_func) - - def add_instrumentation(self, addr, callback): - """Add a @callback for address @addr before any state update. - The state IS updated after returning from the callback - @addr: int - @callback: func(dse instance)""" - self.instrumentation[addr] = callback - - def _check_state(self): - """Check the current state against the concrete one""" - errors = [] # List of DriftInfo - - for symbol in self.symb.modified_expr: - # Do not consider PC - if symbol in [self.ir_arch.pc, self.ir_arch.IRDst]: - continue - - # Consider only concrete values - symb_value = self.eval_expr(symbol) - if not symb_value.is_int(): - continue - symb_value = int(symb_value) - - # Check computed values against real ones - if symbol.is_id(): - if hasattr(self.jitter.cpu, symbol.name): - value = getattr(self.jitter.cpu, symbol.name) - if value != symb_value: - errors.append(DriftInfo(symbol, symb_value, value)) - elif symbol.is_mem() and symbol.ptr.is_int(): - value_chr = self.jitter.vm.get_mem( - int(symbol.ptr), - symbol.size // 8 - ) - exp_value = int(encode_hex(value_chr[::-1]), 16) - if exp_value != symb_value: - errors.append(DriftInfo(symbol, symb_value, exp_value)) - - # Check for drift, and act accordingly - if errors: - raise DriftException(errors) - - def callback(self, _): - """Called before each instruction""" - # Assert synchronization with concrete execution - self._check_state() - - # Call callbacks associated to the current address - cur_addr = self.jitter.pc - if isinstance(cur_addr, LocKey): - lbl = self.ir_arch.loc_db.loc_key_to_label(cur_addr) - cur_addr = lbl.offset - - if cur_addr in self.handler: - self.handler[cur_addr](self) - return True - - if cur_addr in self.instrumentation: - self.instrumentation[cur_addr](self) - - # Handle current address - self.handle(ExprInt(cur_addr, self.ir_arch.IRDst.size)) - - # Avoid memory issue in ExpressionSimplifier - if len(self.symb.expr_simp.simplified_exprs) > 100000: - self.symb.expr_simp.simplified_exprs.clear() - - # Get IR blocks - if cur_addr in self.addr_to_cacheblocks: - self.ircfg.blocks.clear() - self.ircfg.blocks.update(self.addr_to_cacheblocks[cur_addr]) - else: - - ## Reset cache structures - self.ircfg.blocks.clear()# = {} - - ## Update current state - asm_block = self.mdis.dis_block(cur_addr) - self.ir_arch.add_asmblock_to_ircfg(asm_block, self.ircfg) - self.addr_to_cacheblocks[cur_addr] = dict(self.ircfg.blocks) - - # Emulate the current instruction - self.symb.reset_modified() - - # Is the symbolic execution going (potentially) to jump on a lbl_gen? - if len(self.ircfg.blocks) == 1: - self.symb.run_at(self.ircfg, cur_addr) - else: - # Emulation could stuck in generated IR blocks - # But concrete execution callback is not enough precise to obtain - # the full IR blocks path - # -> Use a fully concrete execution to get back path - - # Update the concrete execution - self._update_state_from_concrete_symb( - self.symb_concrete, cpu=True, mem=True - ) - while True: - - next_addr_concrete = self.symb_concrete.run_block_at( - self.ircfg, cur_addr - ) - self.symb.run_block_at(self.ircfg, cur_addr) - - if not (isinstance(next_addr_concrete, ExprLoc) and - self.ir_arch.loc_db.get_location_offset( - next_addr_concrete.loc_key - ) is None): - # Not a lbl_gen, exit - break - - # Call handle with lbl_gen state - self.handle(next_addr_concrete) - cur_addr = next_addr_concrete - - - # At this stage, symbolic engine is one instruction after the concrete - # engine - - return True - - def _get_gpregs(self): - """Return a dict of regs: value from the jitter - This version use the regs associated to the attrib (!= cpu.get_gpreg()) - """ - out = {} - regs = self.ir_arch.arch.regs.attrib_to_regs[self.ir_arch.attrib] - for reg in regs: - if hasattr(self.jitter.cpu, reg.name): - out[reg.name] = getattr(self.jitter.cpu, reg.name) - return out - - def take_snapshot(self): - """Return a snapshot of the current state (including jitter state)""" - snapshot = { - "mem": self.jitter.vm.get_all_memory(), - "regs": self._get_gpregs(), - "symb": self.symb.symbols.copy(), - } - return snapshot - - def restore_snapshot(self, snapshot, memory=True): - """Restore a @snapshot taken with .take_snapshot - @snapshot: .take_snapshot output - @memory: (optional) if set, also restore the memory - """ - # Restore memory - if memory: - self.jitter.vm.reset_memory_page_pool() - self.jitter.vm.reset_code_bloc_pool() - for addr, metadata in viewitems(snapshot["mem"]): - self.jitter.vm.add_memory_page( - addr, - metadata["access"], - metadata["data"] - ) - - # Restore registers - self.jitter.pc = snapshot["regs"][self.ir_arch.pc.name] - for reg, value in viewitems(snapshot["regs"]): - setattr(self.jitter.cpu, reg, value) - - # Reset intern elements - self.jitter.vm.set_exception(0) - self.jitter.cpu.set_exception(0) - self.jitter.bs._atomic_mode = False - - # Reset symb exec - for key, _ in list(viewitems(self.symb.symbols)): - del self.symb.symbols[key] - for expr, value in viewitems(snapshot["symb"]): - self.symb.symbols[expr] = value - - def update_state(self, assignblk): - """From this point, assume @assignblk in the symbolic execution - @assignblk: AssignBlock/{dst -> src} - """ - for dst, src in viewitems(assignblk): - self.symb.apply_change(dst, src) - - def _update_state_from_concrete_symb(self, symbexec, cpu=True, mem=False): - if mem: - # Values will be retrieved from the concrete execution if they are - # not present - symbexec.symbols.symbols_mem.base_to_memarray.clear() - if cpu: - regs = self.ir_arch.arch.regs.attrib_to_regs[self.ir_arch.attrib] - for reg in regs: - if hasattr(self.jitter.cpu, reg.name): - value = ExprInt(getattr(self.jitter.cpu, reg.name), - size=reg.size) - symbexec.symbols[reg] = value - - def update_state_from_concrete(self, cpu=True, mem=False): - r"""Update the symbolic state with concrete values from the concrete - engine - - @cpu: (optional) if set, update registers' value - @mem: (optional) if set, update memory value - - /!\ all current states will be loss. - This function is usually called when states are no more synchronized - (at the beginning, returning from an unstubbed syscall, ...) - """ - self._update_state_from_concrete_symb(self.symb, cpu, mem) - - def eval_expr(self, expr): - """Return the evaluation of @expr: - @expr: Expr instance""" - return self.symb.eval_expr(expr) - - @staticmethod - def memory_to_expr(addr): - """Translate an address to its corresponding symbolic ID (8bits) - @addr: int""" - return ExprId("MEM_0x%x" % int(addr), 8) - - def symbolize_memory(self, memory_range): - """Register a range of memory addresses to symbolize - @memory_range: object with support of __in__ operation (intervals, list, - ...) - """ - self.symb.dse_memory_range = memory_range - self.symb.dse_memory_to_expr = self.memory_to_expr - - -class DSEPathConstraint(DSEEngine): - """Dynamic Symbolic Execution Engine keeping the path constraint - - Possible new "solutions" are produced along the path, by inversing concrete - path constraint. Thus, a "solution" is a potential initial context leading - to a new path. - - In order to produce a new solution, one can extend this class, and override - 'handle_solution' to produce a solution which fit its needs. It could avoid - computing new solution by overriding 'produce_solution'. - - If one is only interested in constraints associated to its path, the option - "produce_solution" should be set to False, to speed up emulation. - The constraints are accumulated in the .z3_cur z3.Solver object. - - """ - - # Maximum memory size to inject in constraints solving - MAX_MEMORY_INJECT = 0x10000 - - # Produce solution strategies - PRODUCE_NO_SOLUTION = 0 - PRODUCE_SOLUTION_CODE_COV = 1 - PRODUCE_SOLUTION_BRANCH_COV = 2 - PRODUCE_SOLUTION_PATH_COV = 3 - - def __init__(self, machine, produce_solution=PRODUCE_SOLUTION_CODE_COV, - known_solutions=None, - **kwargs): - """Init a DSEPathConstraint - @machine: Machine of the targeted architecture instance - @produce_solution: (optional) if set, new solutions will be computed""" - super(DSEPathConstraint, self).__init__(machine, **kwargs) - - # Dependency check - assert z3 is not None - - # Init PathConstraint specifics structures - self.cur_solver = z3.Solver() - self.new_solutions = {} # solution identifier -> solution's model - self._known_solutions = set() # set of solution identifiers - self.z3_trans = Translator.to_language("z3") - self._produce_solution_strategy = produce_solution - self._previous_addr = None - self._history = None - if produce_solution == self.PRODUCE_SOLUTION_PATH_COV: - self._history = [] # List of addresses in the current path - - def take_snapshot(self, *args, **kwargs): - snap = super(DSEPathConstraint, self).take_snapshot(*args, **kwargs) - snap["new_solutions"] = { - dst: src.copy - for dst, src in viewitems(self.new_solutions) - } - snap["cur_constraints"] = self.cur_solver.assertions() - if self._produce_solution_strategy == self.PRODUCE_SOLUTION_PATH_COV: - snap["_history"] = list(self._history) - elif self._produce_solution_strategy == self.PRODUCE_SOLUTION_BRANCH_COV: - snap["_previous_addr"] = self._previous_addr - return snap - - def restore_snapshot(self, snapshot, keep_known_solutions=True, **kwargs): - """Restore a DSEPathConstraint snapshot - @keep_known_solutions: if set, do not forget solutions already found. - -> They will not appear in 'new_solutions' - """ - super(DSEPathConstraint, self).restore_snapshot(snapshot, **kwargs) - self.new_solutions.clear() - self.new_solutions.update(snapshot["new_solutions"]) - self.cur_solver = z3.Solver() - self.cur_solver.add(snapshot["cur_constraints"]) - if not keep_known_solutions: - self._known_solutions.clear() - if self._produce_solution_strategy == self.PRODUCE_SOLUTION_PATH_COV: - self._history = list(snapshot["_history"]) - elif self._produce_solution_strategy == self.PRODUCE_SOLUTION_BRANCH_COV: - self._previous_addr = snapshot["_previous_addr"] - - def _key_for_solution_strategy(self, destination): - """Return the associated identifier for the current solution strategy""" - if self._produce_solution_strategy == self.PRODUCE_NO_SOLUTION: - # Never produce a solution - return None - elif self._produce_solution_strategy == self.PRODUCE_SOLUTION_CODE_COV: - # Decision based on code coverage - # -> produce a solution if the destination has never been seen - key = destination - - elif self._produce_solution_strategy == self.PRODUCE_SOLUTION_BRANCH_COV: - # Decision based on branch coverage - # -> produce a solution if the current branch has never been take - key = (self._previous_addr, destination) - - elif self._produce_solution_strategy == self.PRODUCE_SOLUTION_PATH_COV: - # Decision based on path coverage - # -> produce a solution if the current path has never been take - key = tuple(self._history + [destination]) - else: - raise ValueError("Unknown produce solution strategy") - - return key - - def produce_solution(self, destination): - """Called to determine if a solution for @destination should be test for - satisfiability and computed - @destination: Expr instance of the target @destination - """ - key = self._key_for_solution_strategy(destination) - if key is None: - return False - return key not in self._known_solutions - - def handle_solution(self, model, destination): - """Called when a new solution for destination @destination is founded - @model: z3 model instance - @destination: Expr instance for an addr which is not on the DSE path - """ - key = self._key_for_solution_strategy(destination) - assert key is not None - self.new_solutions[key] = model - self._known_solutions.add(key) - - def handle_correct_destination(self, destination, path_constraints): - """[DEV] Called by handle() to update internal structures giving the - correct destination (the concrete execution one). - """ - - # Update structure used by produce_solution() - if self._produce_solution_strategy == self.PRODUCE_SOLUTION_PATH_COV: - self._history.append(destination) - elif self._produce_solution_strategy == self.PRODUCE_SOLUTION_BRANCH_COV: - self._previous_addr = destination - - # Update current solver - for cons in path_constraints: - self.cur_solver.add(self.z3_trans.from_expr(cons)) - - def handle(self, cur_addr): - cur_addr = self.ir_arch.loc_db.canonize_to_exprloc(cur_addr) - symb_pc = self.eval_expr(self.ir_arch.IRDst) - possibilities = possible_values(symb_pc) - cur_path_constraint = set() # path_constraint for the concrete path - if len(possibilities) == 1: - dst = next(iter(possibilities)).value - dst = self.ir_arch.loc_db.canonize_to_exprloc(dst) - assert dst == cur_addr - else: - for possibility in possibilities: - target_addr = self.ir_arch.loc_db.canonize_to_exprloc( - possibility.value - ) - path_constraint = set() # Set of ExprAssign for the possible path - - # Get constraint associated to the possible path - memory_to_add = ModularIntervals(symb_pc.size) - for cons in possibility.constraints: - eaff = cons.to_constraint() - # eaff.get_r(mem_read=True) is not enough - # ExprAssign consider a Memory access in dst as a write - mem = eaff.dst.get_r(mem_read=True) - mem.update(eaff.src.get_r(mem_read=True)) - for expr in mem: - if expr.is_mem(): - addr_range = expr_range(expr.ptr) - # At upper bounds, add the size of the memory access - # if addr (- [a, b], then @size[addr] reachables - # values are in @8[a, b + size[ - for start, stop in addr_range: - stop += expr.size // 8 - 1 - full_range = ModularIntervals( - symb_pc.size, - [(start, stop)] - ) - memory_to_add.update(full_range) - path_constraint.add(eaff) - - if memory_to_add.length > self.MAX_MEMORY_INJECT: - # TODO re-croncretize the constraint or z3-try - raise RuntimeError("Not implemented: too long memory area") - - # Inject memory - for start, stop in memory_to_add: - for address in range(start, stop + 1): - expr_mem = ExprMem(ExprInt(address, - self.ir_arch.pc.size), - 8) - value = self.eval_expr(expr_mem) - if not value.is_int(): - raise TypeError("Rely on a symbolic memory case, " \ - "address 0x%x" % address) - path_constraint.add(ExprAssign(expr_mem, value)) - - if target_addr == cur_addr: - # Add path constraint - cur_path_constraint = path_constraint - - elif self.produce_solution(target_addr): - # Looking for a new solution - self.cur_solver.push() - for cons in path_constraint: - trans = self.z3_trans.from_expr(cons) - trans = z3.simplify(trans) - self.cur_solver.add(trans) - - result = self.cur_solver.check() - if result == z3.sat: - model = self.cur_solver.model() - self.handle_solution(model, target_addr) - self.cur_solver.pop() - - self.handle_correct_destination(cur_addr, cur_path_constraint) diff --git a/miasm2/analysis/expression_range.py b/miasm2/analysis/expression_range.py deleted file mode 100644 index 8f498549..00000000 --- a/miasm2/analysis/expression_range.py +++ /dev/null @@ -1,70 +0,0 @@ -"""Naive range analysis for expression""" - -from future.builtins import zip -from functools import reduce - -from miasm2.analysis.modularintervals import ModularIntervals - -_op_range_handler = { - "+": lambda x, y: x + y, - "&": lambda x, y: x & y, - "|": lambda x, y: x | y, - "^": lambda x, y: x ^ y, - "*": lambda x, y: x * y, - "a>>": lambda x, y: x.arithmetic_shift_right(y), - "<<": lambda x, y: x << y, - ">>": lambda x, y: x >> y, - ">>>": lambda x, y: x.rotation_right(y), - "<<<": lambda x, y: x.rotation_left(y), -} - -def expr_range(expr): - """Return a ModularIntervals containing the range of possible values of - @expr""" - max_bound = (1 << expr.size) - 1 - if expr.is_int(): - return ModularIntervals(expr.size, [(int(expr), int(expr))]) - elif expr.is_id() or expr.is_mem(): - return ModularIntervals(expr.size, [(0, max_bound)]) - elif expr.is_slice(): - interval_mask = ((1 << expr.start) - 1) ^ ((1 << expr.stop) - 1) - arg = expr_range(expr.arg) - # Mask for possible range, and shift range - return ((arg & interval_mask) >> expr.start).size_update(expr.size) - elif expr.is_compose(): - sub_ranges = [expr_range(arg) for arg in expr.args] - args_idx = [info[0] for info in expr.iter_args()] - - # No shift for the first one - ret = sub_ranges[0].size_update(expr.size) - - # Doing it progressively (2 by 2) - for shift, sub_range in zip(args_idx[1:], sub_ranges[1:]): - ret |= sub_range.size_update(expr.size) << shift - return ret - elif expr.is_op(): - # A few operation are handled with care - # Otherwise, overapproximate (ie. full range interval) - if expr.op in _op_range_handler: - sub_ranges = [expr_range(arg) for arg in expr.args] - return reduce( - _op_range_handler[expr.op], - (sub_range for sub_range in sub_ranges[1:]), - sub_ranges[0] - ) - elif expr.op == "-": - assert len(expr.args) == 1 - return - expr_range(expr.args[0]) - elif expr.op == "%": - assert len(expr.args) == 2 - op, mod = [expr_range(arg) for arg in expr.args] - if mod.intervals.length == 1: - # Modulo intervals is not supported - return op % mod.intervals.hull()[0] - - # Operand not handled, return the full domain - return ModularIntervals(expr.size, [(0, max_bound)]) - elif expr.is_cond(): - return expr_range(expr.src1).union(expr_range(expr.src2)) - else: - raise TypeError("Unsupported type: %s" % expr.__class__) diff --git a/miasm2/analysis/gdbserver.py b/miasm2/analysis/gdbserver.py deleted file mode 100644 index 61ee8955..00000000 --- a/miasm2/analysis/gdbserver.py +++ /dev/null @@ -1,453 +0,0 @@ -#-*- coding:utf-8 -*- - -from __future__ import print_function -from future.builtins import map, range - -from miasm2.core.utils import decode_hex, encode_hex, int_to_byte - -import socket -import struct -import time -import logging -from io import BytesIO -import miasm2.analysis.debugging as debugging -from miasm2.jitter.jitload import ExceptionHandle - - -class GdbServer(object): - - "Debugguer binding for GDBServer protocol" - - general_registers_order = [] - general_registers_size = {} # RegName : Size in octet - status = b"S05" - - def __init__(self, dbg, port=4455): - server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - server.bind(('localhost', port)) - server.listen(1) - self.server = server - self.dbg = dbg - - # Communication methods - - def compute_checksum(self, data): - return encode_hex(int_to_byte(sum(map(ord, data)) % 256)) - - def get_messages(self): - all_data = b"" - while True: - data = self.sock.recv(4096) - if not data: - break - all_data += data - - logging.debug("<- %r", all_data) - self.recv_queue += self.parse_messages(all_data) - - def parse_messages(self, data): - buf = BytesIO(data) - msgs = [] - - while (buf.tell() < buf.len): - token = buf.read(1) - if token == b"+": - continue - if token == b"-": - raise NotImplementedError("Resend packet") - if token == b"$": - packet_data = b"" - c = buf.read(1) - while c != b"#": - packet_data += c - c = buf.read(1) - checksum = buf.read(2) - if checksum != self.compute_checksum(packet_data): - raise ValueError("Incorrect checksum") - msgs.append(packet_data) - - return msgs - - def send_string(self, s): - self.send_queue.append(b"O" + encode_hex(s)) - - def process_messages(self): - - while self.recv_queue: - msg = self.recv_queue.pop(0) - buf = BytesIO(msg) - msg_type = buf.read(1) - - self.send_queue.append(b"+") - - if msg_type == b"q": - if msg.startswith(b"qSupported"): - self.send_queue.append(b"PacketSize=3fff") - elif msg.startswith(b"qC"): - # Current thread - self.send_queue.append(b"") - elif msg.startswith(b"qAttached"): - # Not supported - self.send_queue.append(b"") - elif msg.startswith(b"qTStatus"): - # Not supported - self.send_queue.append(b"") - elif msg.startswith(b"qfThreadInfo"): - # Not supported - self.send_queue.append(b"") - else: - raise NotImplementedError() - - elif msg_type == b"H": - # Set current thread - self.send_queue.append(b"OK") - - elif msg_type == b"?": - # Report why the target halted - self.send_queue.append(self.status) # TRAP signal - - elif msg_type == b"g": - # Report all general register values - self.send_queue.append(self.report_general_register_values()) - - elif msg_type == b"p": - # Read a specific register - reg_num = int(buf.read(), 16) - self.send_queue.append(self.read_register(reg_num)) - - elif msg_type == b"P": - # Set a specific register - reg_num, value = buf.read().split(b"=") - reg_num = int(reg_num, 16) - value = int(encode_hex(decode_hex(value)[::-1]), 16) - self.set_register(reg_num, value) - self.send_queue.append(b"OK") - - elif msg_type == b"m": - # Read memory - addr, size = (int(x, 16) for x in buf.read().split(b",", 1)) - self.send_queue.append(self.read_memory(addr, size)) - - elif msg_type == b"k": - # Kill - self.sock.close() - self.send_queue = [] - self.sock = None - - elif msg_type == b"!": - # Extending debugging will be used - self.send_queue.append(b"OK") - - elif msg_type == b"v": - if msg == b"vCont?": - # Is vCont supported ? - self.send_queue.append(b"") - - elif msg_type == b"s": - # Step - self.dbg.step() - self.send_queue.append(b"S05") # TRAP signal - - elif msg_type == b"Z": - # Add breakpoint or watchpoint - bp_type = buf.read(1) - if bp_type == b"0": - # Exec breakpoint - assert(buf.read(1) == b",") - addr, size = (int(x, 16) for x in buf.read().split(b",", 1)) - - if size != 1: - raise NotImplementedError("Bigger size") - self.dbg.add_breakpoint(addr) - self.send_queue.append(b"OK") - - elif bp_type == b"1": - # Hardware BP - assert(buf.read(1) == b",") - addr, size = (int(x, 16) for x in buf.read().split(b",", 1)) - - self.dbg.add_memory_breakpoint( - addr, - size, - read=True, - write=True - ) - self.send_queue.append(b"OK") - - elif bp_type in [b"2", b"3", b"4"]: - # Memory breakpoint - assert(buf.read(1) == b",") - read = bp_type in [b"3", b"4"] - write = bp_type in [b"2", b"4"] - addr, size = (int(x, 16) for x in buf.read().split(b",", 1)) - - self.dbg.add_memory_breakpoint( - addr, - size, - read=read, - write=write - ) - self.send_queue.append(b"OK") - - else: - raise ValueError("Impossible value") - - elif msg_type == b"z": - # Remove breakpoint or watchpoint - bp_type = buf.read(1) - if bp_type == b"0": - # Exec breakpoint - assert(buf.read(1) == b",") - addr, size = (int(x, 16) for x in buf.read().split(b",", 1)) - - if size != 1: - raise NotImplementedError("Bigger size") - dbgsoft = self.dbg.get_breakpoint_by_addr(addr) - assert(len(dbgsoft) == 1) - self.dbg.remove_breakpoint(dbgsoft[0]) - self.send_queue.append(b"OK") - - elif bp_type == b"1": - # Hardware BP - assert(buf.read(1) == b",") - addr, size = (int(x, 16) for x in buf.read().split(b",", 1)) - self.dbg.remove_memory_breakpoint_by_addr_access( - addr, - read=True, - write=True - ) - self.send_queue.append(b"OK") - - elif bp_type in [b"2", b"3", b"4"]: - # Memory breakpoint - assert(buf.read(1) == b",") - read = bp_type in [b"3", b"4"] - write = bp_type in [b"2", b"4"] - addr, size = (int(x, 16) for x in buf.read().split(b",", 1)) - - self.dbg.remove_memory_breakpoint_by_addr_access( - addr, - read=read, - write=write - ) - self.send_queue.append(b"OK") - - else: - raise ValueError("Impossible value") - - elif msg_type == b"c": - # Continue - self.status = b"" - self.send_messages() - ret = self.dbg.run() - if isinstance(ret, debugging.DebugBreakpointSoft): - self.status = b"S05" - self.send_queue.append(b"S05") # TRAP signal - elif isinstance(ret, ExceptionHandle): - if ret == ExceptionHandle.memoryBreakpoint(): - self.status = b"S05" - self.send_queue.append(b"S05") - else: - raise NotImplementedError("Unknown Except") - elif isinstance(ret, debugging.DebugBreakpointTerminate): - # Connexion should close, but keep it running as a TRAP - # The connexion will be close on instance destruction - print(ret) - self.status = b"S05" - self.send_queue.append(b"S05") - else: - raise NotImplementedError() - - else: - raise NotImplementedError( - "Not implemented: message type %r" % msg_type - ) - - def send_messages(self): - for msg in self.send_queue: - if msg == b"+": - data = b"+" - else: - data = b"$%s#%s" % (msg, self.compute_checksum(msg)) - logging.debug("-> %r", data) - self.sock.send(data) - self.send_queue = [] - - def main_loop(self): - self.recv_queue = [] - self.send_queue = [] - - self.send_string(b"Test\n") - - while (self.sock): - self.get_messages() - self.process_messages() - self.send_messages() - - def run(self): - self.sock, self.address = self.server.accept() - self.main_loop() - - # Debugguer processing methods - def report_general_register_values(self): - s = b"" - for i in range(len(self.general_registers_order)): - s += self.read_register(i) - return s - - def read_register(self, reg_num): - reg_name = self.general_registers_order[reg_num] - reg_value = self.read_register_by_name(reg_name) - size = self.general_registers_size[reg_name] - - pack_token = "" - if size == 1: - pack_token = "= 0 - if end is not None: - assert end <= self.mask - - # Helpers - - @staticmethod - def size2mask(size): - """Return the bit mask of size @size""" - return (1 << size) - 1 - - def _range2interval(func): - """Convert a function taking 2 ranges to a function taking a ModularIntervals - and applying to the current instance""" - def ret_func(self, target): - ret = interval() - for left_i, right_i in product(self.intervals, target.intervals): - ret += func(self, left_i[0], left_i[1], right_i[0], - right_i[1]) - return self.__class__(self.size, ret) - return ret_func - - def _range2integer(func): - """Convert a function taking 1 range and optional arguments to a function - applying to the current instance""" - def ret_func(self, *args): - ret = interval() - for x_min, x_max in self.intervals: - ret += func(self, x_min, x_max, *args) - return self.__class__(self.size, ret) - return ret_func - - def _promote(func): - """Check and promote the second argument from integer to - ModularIntervals with one value""" - def ret_func(self, target): - if isinstance(target, int_types): - target = ModularIntervals(self.size, interval([(target, target)])) - if not isinstance(target, ModularIntervals): - raise TypeError("Unsupported operation with %s" % target.__class__) - if target.size != self.size: - raise TypeError("Size are not the same: %s vs %s" % (self.size, - target.size)) - return func(self, target) - return ret_func - - def _unsigned2signed(self, value): - """Return the signed value of @value, based on self.size""" - if (value & (1 << (self.size - 1))): - return -(self.mask ^ value) - 1 - else: - return value - - def _signed2unsigned(self, value): - """Return the unsigned value of @value, based on self.size""" - return value & self.mask - - # Operation internals - # - # Naming convention: - # _range_{op}: takes 2 interval bounds and apply op - # _range_{op}_uniq: takes 1 interval bounds and apply op - # _interval_{op}: apply op on an ModularIntervals - # _integer_{op}: apply op on itself with possible arguments - - def _range_add(self, x_min, x_max, y_min, y_max): - """Bounds interval for x + y, with - - x, y of size 'self.size' - - @x_min <= x <= @x_max - - @y_min <= y <= @y_max - - operations are considered unsigned - From Hacker's Delight: Chapter 4 - """ - max_bound = self.mask - if (x_min + y_min <= max_bound and - x_max + y_max >= max_bound + 1): - # HD returns 0, max_bound; but this is because it cannot handle multiple - # interval. - # x_max + y_max can only overflow once, so returns - # [result_min, overflow] U [0, overflow_rest] - return interval([(x_min + y_min, max_bound), - (0, (x_max + y_max) & max_bound)]) - else: - return interval([((x_min + y_min) & max_bound, - (x_max + y_max) & max_bound)]) - - _interval_add = _range2interval(_range_add) - - def _range_minus_uniq(self, x_min, x_max): - """Bounds interval for -x, with - - x of size self.size - - @x_min <= x <= @x_max - - operations are considered unsigned - From Hacker's Delight: Chapter 4 - """ - max_bound = self.mask - if (x_min == 0 and x_max != 0): - # HD returns 0, max_bound; see _range_add - return interval([(0, 0), ((- x_max) & max_bound, max_bound)]) - else: - return interval([((- x_max) & max_bound, (- x_min) & max_bound)]) - - _interval_minus = _range2integer(_range_minus_uniq) - - def _range_or_min(self, x_min, x_max, y_min, y_max): - """Interval min for x | y, with - - x, y of size self.size - - @x_min <= x <= @x_max - - @y_min <= y <= @y_max - - operations are considered unsigned - From Hacker's Delight: Chapter 4 - """ - max_bit = 1 << (self.size - 1) - while max_bit: - if ~x_min & y_min & max_bit: - temp = (x_min | max_bit) & - max_bit - if temp <= x_max: - x_min = temp - break - elif x_min & ~y_min & max_bit: - temp = (y_min | max_bit) & - max_bit - if temp <= y_max: - y_min = temp - break - max_bit >>= 1 - return x_min | y_min - - def _range_or_max(self, x_min, x_max, y_min, y_max): - """Interval max for x | y, with - - x, y of size self.size - - @x_min <= x <= @x_max - - @y_min <= y <= @y_max - - operations are considered unsigned - From Hacker's Delight: Chapter 4 - """ - max_bit = 1 << (self.size - 1) - while max_bit: - if x_max & y_max & max_bit: - temp = (x_max - max_bit) | (max_bit - 1) - if temp >= x_min: - x_max = temp - break - temp = (y_max - max_bit) | (max_bit - 1) - if temp >= y_min: - y_max = temp - break - max_bit >>= 1 - return x_max | y_max - - def _range_or(self, x_min, x_max, y_min, y_max): - """Interval bounds for x | y, with - - x, y of size self.size - - @x_min <= x <= @x_max - - @y_min <= y <= @y_max - - operations are considered unsigned - From Hacker's Delight: Chapter 4 - """ - return interval([(self._range_or_min(x_min, x_max, y_min, y_max), - self._range_or_max(x_min, x_max, y_min, y_max))]) - - _interval_or = _range2interval(_range_or) - - def _range_and_min(self, x_min, x_max, y_min, y_max): - """Interval min for x & y, with - - x, y of size self.size - - @x_min <= x <= @x_max - - @y_min <= y <= @y_max - - operations are considered unsigned - From Hacker's Delight: Chapter 4 - """ - max_bit = (1 << (self.size - 1)) - while max_bit: - if ~x_min & ~y_min & max_bit: - temp = (x_min | max_bit) & - max_bit - if temp <= x_max: - x_min = temp - break - temp = (y_min | max_bit) & - max_bit - if temp <= y_max: - y_min = temp - break - max_bit >>= 1 - return x_min & y_min - - def _range_and_max(self, x_min, x_max, y_min, y_max): - """Interval max for x & y, with - - x, y of size self.size - - @x_min <= x <= @x_max - - @y_min <= y <= @y_max - - operations are considered unsigned - From Hacker's Delight: Chapter 4 - """ - max_bit = (1 << (self.size - 1)) - while max_bit: - if x_max & ~y_max & max_bit: - temp = (x_max & ~max_bit) | (max_bit - 1) - if temp >= x_min: - x_max = temp - break - elif ~x_max & y_max & max_bit: - temp = (y_max & ~max_bit) | (max_bit - 1) - if temp >= y_min: - y_max = temp - break - max_bit >>= 1 - return x_max & y_max - - def _range_and(self, x_min, x_max, y_min, y_max): - """Interval bounds for x & y, with - - x, y of size @size - - @x_min <= x <= @x_max - - @y_min <= y <= @y_max - - operations are considered unsigned - From Hacker's Delight: Chapter 4 - """ - return interval([(self._range_and_min(x_min, x_max, y_min, y_max), - self._range_and_max(x_min, x_max, y_min, y_max))]) - - _interval_and = _range2interval(_range_and) - - def _range_xor(self, x_min, x_max, y_min, y_max): - """Interval bounds for x ^ y, with - - x, y of size self.size - - @x_min <= x <= @x_max - - @y_min <= y <= @y_max - - operations are considered unsigned - From Hacker's Delight: Chapter 4 - """ - not_size = lambda x: x ^ self.mask - min_xor = self._range_and_min(x_min, x_max, not_size(y_max), not_size(y_min)) | self._range_and_min(not_size(x_max), not_size(x_min), y_min, y_max) - max_xor = self._range_or_max(0, - self._range_and_max(x_min, x_max, not_size(y_max), not_size(y_min)), - 0, - self._range_and_max(not_size(x_max), not_size(x_min), y_min, y_max)) - return interval([(min_xor, max_xor)]) - - _interval_xor = _range2interval(_range_xor) - - def _range_mul(self, x_min, x_max, y_min, y_max): - """Interval bounds for x * y, with - - x, y of size self.size - - @x_min <= x <= @x_max - - @y_min <= y <= @y_max - - operations are considered unsigned - This is a naive version, going to TOP on overflow""" - max_bound = self.mask - if y_max * x_max > max_bound: - return interval([(0, max_bound)]) - else: - return interval([(x_min * y_min, x_max * y_max)]) - - _interval_mul = _range2interval(_range_mul) - - def _range_mod_uniq(self, x_min, x_max, mod): - """Interval bounds for x % @mod, with - - x, @mod of size self.size - - @x_min <= x <= @x_max - - operations are considered unsigned - """ - if (x_max - x_min) >= mod: - return interval([(0, mod - 1)]) - x_max = x_max % mod - x_min = x_min % mod - if x_max < x_min: - return interval([(0, x_max), (x_min, mod - 1)]) - else: - return interval([(x_min, x_max)]) - - _integer_modulo = _range2integer(_range_mod_uniq) - - def _range_shift_uniq(self, x_min, x_max, shift, op): - """Bounds interval for x @op @shift with - - x of size self.size - - @x_min <= x <= @x_max - - operations are considered unsigned - - shift <= self.size - """ - assert shift <= self.size - # Shift operations are monotonic, and overflow results in 0 - max_bound = self.mask - - if op == "<<": - obtain_max = x_max << shift - if obtain_max > max_bound: - # Overflow at least on max, best-effort - # result '0' often happen, include it - return interval([(0, 0), ((1 << shift) - 1, max_bound)]) - else: - return interval([(x_min << shift, obtain_max)]) - elif op == ">>": - return interval([((x_min >> shift) & max_bound, - (x_max >> shift) & max_bound)]) - elif op == "a>>": - # The Miasm2 version (Expr or ModInt) could have been used, but - # introduce unnecessary dependencies for this module - # Python >> is the arithmetic one - ashr = lambda x, y: self._signed2unsigned(self._unsigned2signed(x) >> y) - end_min, end_max = ashr(x_min, shift), ashr(x_max, shift) - end_min, end_max = min(end_min, end_max), max(end_min, end_max) - return interval([(end_min, end_max)]) - else: - raise ValueError("%s is not a shifter" % op) - - def _interval_shift(self, operation, shifter): - """Apply the shifting operation @operation with a shifting - ModularIntervals @shifter on the current instance""" - # Work on a copy of shifter intervals - shifter = interval(shifter.intervals) - if (shifter.hull()[1] >= self.size): - shifter += interval([(self.size, self.size)]) - shifter &= interval([(0, self.size)]) - ret = interval() - for shift_range in shifter: - for shift in range(shift_range[0], shift_range[1] + 1): - for x_min, x_max in self.intervals: - ret += self._range_shift_uniq(x_min, x_max, shift, operation) - return self.__class__(self.size, ret) - - def _range_rotate_uniq(self, x_min, x_max, shift, op): - """Bounds interval for x @op @shift with - - x of size self.size - - @x_min <= x <= @x_max - - operations are considered unsigned - - shift <= self.size - """ - assert shift <= self.size - # Divide in sub-operations: a op b: a left b | a right (size - b) - if op == ">>>": - left, right = ">>", "<<" - elif op == "<<<": - left, right = "<<", ">>" - else: - raise ValueError("Not a rotator: %s" % op) - - left_intervals = self._range_shift_uniq(x_min, x_max, shift, left) - right_intervals = self._range_shift_uniq(x_min, x_max, - self.size - shift, right) - - result = self.__class__(self.size, left_intervals) | self.__class__(self.size, right_intervals) - return result.intervals - - def _interval_rotate(self, operation, shifter): - """Apply the rotate operation @operation with a shifting - ModularIntervals @shifter on the current instance""" - # Consider only rotation without repetition, and enumerate - # -> apply a '% size' on shifter - shifter %= self.size - ret = interval() - for shift_range in shifter: - for shift in range(shift_range[0], shift_range[1] + 1): - for x_min, x_max in self.intervals: - ret += self._range_rotate_uniq(x_min, x_max, shift, - operation) - - return self.__class__(self.size, ret) - - # Operation wrappers - - @_promote - def __add__(self, to_add): - """Add @to_add to the current intervals - @to_add: ModularInstances or integer - """ - return self._interval_add(to_add) - - @_promote - def __or__(self, to_or): - """Bitwise OR @to_or to the current intervals - @to_or: ModularInstances or integer - """ - return self._interval_or(to_or) - - @_promote - def __and__(self, to_and): - """Bitwise AND @to_and to the current intervals - @to_and: ModularInstances or integer - """ - return self._interval_and(to_and) - - @_promote - def __xor__(self, to_xor): - """Bitwise XOR @to_xor to the current intervals - @to_xor: ModularInstances or integer - """ - return self._interval_xor(to_xor) - - @_promote - def __mul__(self, to_mul): - """Multiply @to_mul to the current intervals - @to_mul: ModularInstances or integer - """ - return self._interval_mul(to_mul) - - @_promote - def __rshift__(self, to_shift): - """Logical shift right the current intervals of @to_shift - @to_shift: ModularInstances or integer - """ - return self._interval_shift('>>', to_shift) - - @_promote - def __lshift__(self, to_shift): - """Logical shift left the current intervals of @to_shift - @to_shift: ModularInstances or integer - """ - return self._interval_shift('<<', to_shift) - - @_promote - def arithmetic_shift_right(self, to_shift): - """Arithmetic shift right the current intervals of @to_shift - @to_shift: ModularInstances or integer - """ - return self._interval_shift('a>>', to_shift) - - def __neg__(self): - """Negate the current intervals""" - return self._interval_minus() - - def __mod__(self, modulo): - """Apply % @modulo on the current intervals - @modulo: integer - """ - - if not isinstance(modulo, int_types): - raise TypeError("Modulo with %s is not supported" % modulo.__class__) - return self._integer_modulo(modulo) - - @_promote - def rotation_right(self, to_rotate): - """Right rotate the current intervals of @to_rotate - @to_rotate: ModularInstances or integer - """ - return self._interval_rotate('>>>', to_rotate) - - @_promote - def rotation_left(self, to_rotate): - """Left rotate the current intervals of @to_rotate - @to_rotate: ModularInstances or integer - """ - return self._interval_rotate('<<<', to_rotate) - - # Instance operations - - @property - def mask(self): - """Return the mask corresponding to the instance size""" - return ModularIntervals.size2mask(self.size) - - def __iter__(self): - return iter(self.intervals) - - @property - def length(self): - return self.intervals.length - - def __contains__(self, other): - if isinstance(other, ModularIntervals): - other = other.intervals - return other in self.intervals - - def __str__(self): - return "%s (Size: %s)" % (self.intervals, self.size) - - def size_update(self, new_size): - """Update the instance size to @new_size - The size of elements must be <= @new_size""" - - # Increasing size is always safe - if new_size < self.size: - # Check that current values are indeed included in the new range - assert self.intervals.hull()[1] <= ModularIntervals.size2mask(new_size) - - self.size = new_size - - # For easy chainning - return self - - # Mimic Python's set operations - - @_promote - def union(self, to_union): - """Union set operation with @to_union - @to_union: ModularIntervals instance""" - return ModularIntervals(self.size, self.intervals + to_union.intervals) - - @_promote - def update(self, to_union): - """Union set operation in-place with @to_union - @to_union: ModularIntervals instance""" - self.intervals += to_union.intervals - - @_promote - def intersection(self, to_intersect): - """Intersection set operation with @to_intersect - @to_intersect: ModularIntervals instance""" - return ModularIntervals(self.size, self.intervals & to_intersect.intervals) - - @_promote - def intersection_update(self, to_intersect): - """Intersection set operation in-place with @to_intersect - @to_intersect: ModularIntervals instance""" - self.intervals &= to_intersect.intervals diff --git a/miasm2/analysis/outofssa.py b/miasm2/analysis/outofssa.py deleted file mode 100644 index 41c665af..00000000 --- a/miasm2/analysis/outofssa.py +++ /dev/null @@ -1,413 +0,0 @@ -from future.utils import viewitems, viewvalues - -from miasm2.expression.expression import ExprId -from miasm2.ir.ir import IRBlock, AssignBlock -from miasm2.analysis.ssa import get_phi_sources_parent_block, \ - irblock_has_phi - - -class Varinfo(object): - """Store liveness information for a variable""" - __slots__ = ["live_index", "loc_key", "index"] - - def __init__(self, live_index, loc_key, index): - self.live_index = live_index - self.loc_key = loc_key - self.index = index - - -class UnSSADiGraph(object): - """ - Implements unssa algorithm - Revisiting Out-of-SSA Translation for Correctness, Code Quality, and - Efficiency - """ - - def __init__(self, ssa, head, cfg_liveness): - self.cfg_liveness = cfg_liveness - self.ssa = ssa - self.head = head - - # Set of created variables - self.copy_vars = set() - # Virtual parallel copies - - # On loc_key's Phi node dst -> set((parent, src)) - self.phi_parent_sources = {} - # On loc_key's Phi node, loc_key -> set(Phi dsts) - self.phi_destinations = {} - # Phi's dst -> new var - self.phi_new_var = {} - # For a new_var representing dst: - # new_var -> set(parents of Phi's src in dst = Phi(src,...)) - self.new_var_to_srcs_parents = {} - # new_var -> set(variables to be coalesced with, named "merge_set") - self.merge_state = {} - - # Launch the algorithm in several steps - self.isolate_phi_nodes_block() - self.init_phis_merge_state() - self.order_ssa_var_dom() - self.aggressive_coalesce_block() - self.insert_parallel_copy() - self.replace_merge_sets() - self.remove_assign_eq() - - def insert_parallel_copy(self): - """ - Naive Out-of-SSA from CSSA (without coalescing for now) - - Replace Phi - - Create room for parallel copies in Phi's parents - """ - ircfg = self.ssa.graph - - for irblock in list(viewvalues(ircfg.blocks)): - if not irblock_has_phi(irblock): - continue - - # Replace Phi with Phi's dst = new_var - parallel_copies = {} - for dst in self.phi_destinations[irblock.loc_key]: - new_var = self.phi_new_var[dst] - parallel_copies[dst] = new_var - - assignblks = list(irblock) - assignblks[0] = AssignBlock(parallel_copies, irblock[0].instr) - new_irblock = IRBlock(irblock.loc_key, assignblks) - ircfg.blocks[irblock.loc_key] = new_irblock - - # Insert new_var = src in each Phi's parent, at the end of the block - parent_to_parallel_copies = {} - parallel_copies = {} - for dst in irblock[0]: - new_var = self.phi_new_var[dst] - for parent, src in self.phi_parent_sources[dst]: - parent_to_parallel_copies.setdefault(parent, {})[new_var] = src - - for parent, parallel_copies in viewitems(parent_to_parallel_copies): - parent = ircfg.blocks[parent] - assignblks = list(parent) - assignblks.append(AssignBlock(parallel_copies, parent[-1].instr)) - new_irblock = IRBlock(parent.loc_key, assignblks) - ircfg.blocks[parent.loc_key] = new_irblock - - def create_copy_var(self, var): - """ - Generate a new var standing for @var - @var: variable to replace - """ - new_var = ExprId('var%d' % len(self.copy_vars), var.size) - self.copy_vars.add(new_var) - return new_var - - def isolate_phi_nodes_block(self): - """ - Init structures and virtually insert parallel copy before/after each phi - node - """ - ircfg = self.ssa.graph - for irblock in viewvalues(ircfg.blocks): - if not irblock_has_phi(irblock): - continue - for dst, sources in viewitems(irblock[0]): - assert sources.is_op('Phi') - new_var = self.create_copy_var(dst) - self.phi_new_var[dst] = new_var - - var_to_parents = get_phi_sources_parent_block( - self.ssa.graph, - irblock.loc_key, - sources.args - ) - - for src in sources.args: - parents = var_to_parents[src] - self.new_var_to_srcs_parents.setdefault(new_var, set()).update(parents) - for parent in parents: - self.phi_parent_sources.setdefault(dst, set()).add((parent, src)) - - self.phi_destinations[irblock.loc_key] = set(irblock[0]) - - def init_phis_merge_state(self): - """ - Generate trivial coalescing of phi variable and itself - """ - for phi_new_var in viewvalues(self.phi_new_var): - self.merge_state.setdefault(phi_new_var, set([phi_new_var])) - - def order_ssa_var_dom(self): - """Compute dominance order of each ssa variable""" - ircfg = self.ssa.graph - - # compute dominator tree - dominator_tree = ircfg.compute_dominator_tree(self.head) - - # variable -> Varinfo - self.var_to_varinfo = {} - # live_index can later be used to compare dominance of AssignBlocks - live_index = 0 - - # walk in DFS over the dominator tree - for loc_key in dominator_tree.walk_depth_first_forward(self.head): - irblock = ircfg.blocks[loc_key] - - # Create live index for phi new vars - # They do not exist in the graph yet, so index is set to None - if irblock_has_phi(irblock): - for dst in irblock[0]: - if not dst.is_id(): - continue - new_var = self.phi_new_var[dst] - self.var_to_varinfo[new_var] = Varinfo(live_index, loc_key, None) - - live_index += 1 - - # Create live index for remaining assignments - for index, assignblk in enumerate(irblock): - used = False - for dst in assignblk: - if not dst.is_id(): - continue - if dst in self.ssa.immutable_ids: - # Will not be considered by the current algo, ignore it - # (for instance, IRDst) - continue - - assert dst not in self.var_to_varinfo - self.var_to_varinfo[dst] = Varinfo(live_index, loc_key, index) - used = True - if used: - live_index += 1 - - - def ssa_def_dominates(self, node_a, node_b): - """ - Return living index order of @node_a and @node_b - @node_a: Varinfo instance - @node_b: Varinfo instance - """ - ret = self.var_to_varinfo[node_a].live_index <= self.var_to_varinfo[node_b].live_index - return ret - - def merge_set_sort(self, merge_set): - """ - Return a sorted list of (live_index, var) from @merge_set in dominance - order - @merge_set: set of coalescing variables - """ - return sorted( - (self.var_to_varinfo[var].live_index, var) - for var in merge_set - ) - - def ssa_def_is_live_at(self, node_a, node_b, parent): - """ - Return True if @node_a is live during @node_b definition - If @parent is None, this is a liveness test for a post phi variable; - Else, it is a liveness test for a variable source of the phi node - - @node_a: Varinfo instance - @node_b: Varinfo instance - @parent: Optional parent location of the phi source - """ - loc_key_b, index_b = self.var_to_varinfo[node_b].loc_key, self.var_to_varinfo[node_b].index - if parent and index_b is None: - index_b = 0 - if node_a not in self.new_var_to_srcs_parents: - # node_a is not a new var (it is a "classic" var) - # -> use a basic liveness test - liveness_b = self.cfg_liveness.blocks[loc_key_b].infos[index_b] - return node_a in liveness_b.var_out - - for def_loc_key in self.new_var_to_srcs_parents[node_a]: - # Consider node_a as defined at the end of its parents blocks - # and compute liveness check accordingly - - if def_loc_key == parent: - # Same path as node_a definition, so SSA ensure b cannot be live - # on this path (otherwise, a Phi would already happen earlier) - continue - liveness_end_block = self.cfg_liveness.blocks[def_loc_key].infos[-1] - if node_b in liveness_end_block.var_out: - return True - return False - - def merge_nodes_interfere(self, node_a, node_b, parent): - """ - Return True if @node_a and @node_b interfere - @node_a: variable - @node_b: variable - @parent: Optional parent location of the phi source for liveness tests - - Interference check is: is x live at y definition (or reverse) - TODO: add Value-based interference improvement - """ - if self.var_to_varinfo[node_a].live_index == self.var_to_varinfo[node_b].live_index: - # Defined in the same AssignBlock -> interfere - return True - - if self.var_to_varinfo[node_a].live_index < self.var_to_varinfo[node_b].live_index: - return self.ssa_def_is_live_at(node_a, node_b, parent) - return self.ssa_def_is_live_at(node_b, node_a, parent) - - def merge_sets_interfere(self, merge_a, merge_b, parent): - """ - Return True if no variable in @merge_a and @merge_b interferes. - - Implementation of "Algorithm 2: Check intersection in a set of variables" - - @merge_a: a dom ordered list of equivalent variables - @merge_b: a dom ordered list of equivalent variables - @parent: Optional parent location of the phi source for liveness tests - """ - if merge_a == merge_b: - # No need to consider interference if equal - return False - - merge_a_list = self.merge_set_sort(merge_a) - merge_b_list = self.merge_set_sort(merge_b) - dom = [] - while merge_a_list or merge_b_list: - if not merge_a_list: - _, current = merge_b_list.pop(0) - elif not merge_b_list: - _, current = merge_a_list.pop(0) - else: - # compare live_indexes (standing for dominance) - if merge_a_list[-1] < merge_b_list[-1]: - _, current = merge_a_list.pop(0) - else: - _, current = merge_b_list.pop(0) - while dom and not self.ssa_def_dominates(dom[-1], current): - dom.pop() - - # Don't test node in same merge_set - if ( - # Is stack not empty? - dom and - # Trivial non-interference if dom.top() and current come - # from the same merge set - not (dom[-1] in merge_a and current in merge_a) and - not (dom[-1] in merge_b and current in merge_b) and - # Actually test for interference - self.merge_nodes_interfere(current, dom[-1], parent) - ): - return True - dom.append(current) - return False - - def aggressive_coalesce_parallel_copy(self, parallel_copies, parent): - """ - Try to coalesce variables each dst/src couple together from - @parallel_copies - - @parallel_copies: a dictionary representing dst/src parallel - assignments. - @parent: Optional parent location of the phi source for liveness tests - """ - for dst, src in viewitems(parallel_copies): - dst_merge = self.merge_state.setdefault(dst, set([dst])) - src_merge = self.merge_state.setdefault(src, set([src])) - if not self.merge_sets_interfere(dst_merge, src_merge, parent): - dst_merge.update(src_merge) - for node in dst_merge: - self.merge_state[node] = dst_merge - - def aggressive_coalesce_block(self): - """Try to coalesce phi var with their pre/post variables""" - - ircfg = self.ssa.graph - - # Run coalesce on the post phi parallel copy - for irblock in viewvalues(ircfg.blocks): - if not irblock_has_phi(irblock): - continue - parallel_copies = {} - for dst in self.phi_destinations[irblock.loc_key]: - parallel_copies[dst] = self.phi_new_var[dst] - self.aggressive_coalesce_parallel_copy(parallel_copies, None) - - # Run coalesce on the pre phi parallel copy - - # Stand for the virtual parallel copies at the end of Phi's block - # parents - parent_to_parallel_copies = {} - for dst in irblock[0]: - new_var = self.phi_new_var[dst] - for parent, src in self.phi_parent_sources[dst]: - parent_to_parallel_copies.setdefault(parent, {})[new_var] = src - - for parent, parallel_copies in viewitems(parent_to_parallel_copies): - self.aggressive_coalesce_parallel_copy(parallel_copies, parent) - - def get_best_merge_set_name(self, merge_set): - """ - For a given @merge_set, prefer an original SSA variable instead of a - created copy. In other case, take a random name. - @merge_set: set of equivalent expressions - """ - if not merge_set: - raise RuntimeError("Merge set should not be empty") - for var in merge_set: - if var not in self.copy_vars: - return var - # Get random name - return var - - - def replace_merge_sets(self): - """ - In the graph, replace all variables from merge state by their - representative variable - """ - replace = {} - merge_sets = set() - - # Elect representative for merge sets - merge_set_to_name = {} - for merge_set in viewvalues(self.merge_state): - frozen_merge_set = frozenset(merge_set) - merge_sets.add(frozen_merge_set) - var_name = self.get_best_merge_set_name(merge_set) - merge_set_to_name[frozen_merge_set] = var_name - - # Generate replacement of variable by their representative - for merge_set in merge_sets: - var_name = merge_set_to_name[merge_set] - merge_set = list(merge_set) - for var in merge_set: - replace[var] = var_name - - self.ssa.graph.simplify(lambda x: x.replace_expr(replace)) - - def remove_phi(self): - """ - Remove phi operators in @ifcfg - @ircfg: IRDiGraph instance - """ - - for irblock in list(viewvalues(self.ssa.graph.blocks)): - assignblks = list(irblock) - out = {} - for dst, src in viewitems(assignblks[0]): - if src.is_op('Phi'): - assert set([dst]) == set(src.args) - continue - out[dst] = src - assignblks[0] = AssignBlock(out, assignblks[0].instr) - self.ssa.graph.blocks[irblock.loc_key] = IRBlock(irblock.loc_key, assignblks) - - def remove_assign_eq(self): - """ - Remove trivial expressions (a=a) in the current graph - """ - for irblock in list(viewvalues(self.ssa.graph.blocks)): - assignblks = list(irblock) - for i, assignblk in enumerate(assignblks): - out = {} - for dst, src in viewitems(assignblk): - if dst == src: - continue - out[dst] = src - assignblks[i] = AssignBlock(out, assignblk.instr) - self.ssa.graph.blocks[irblock.loc_key] = IRBlock(irblock.loc_key, assignblks) diff --git a/miasm2/analysis/sandbox.py b/miasm2/analysis/sandbox.py deleted file mode 100644 index d3e8fce1..00000000 --- a/miasm2/analysis/sandbox.py +++ /dev/null @@ -1,1026 +0,0 @@ -from __future__ import print_function -from builtins import range - -import os -import logging -from argparse import ArgumentParser - -from future.utils import viewitems, viewvalues - -from miasm2.core.utils import force_bytes -from miasm2.analysis.machine import Machine -from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE -from miasm2.analysis import debugging -from miasm2.jitter.jitload import log_func - - - -class Sandbox(object): - - """ - Parent class for Sandbox abstraction - """ - - CALL_FINISH_ADDR = 0x13371acc - - @staticmethod - def code_sentinelle(jitter): - jitter.run = False - return False - - @classmethod - def _classes_(cls): - """ - Iterator on parent classes except Sanbox - """ - for base_cls in cls.__bases__: - # Avoid infinite loop - if base_cls == Sandbox: - continue - - yield base_cls - - classes = property(lambda x: x.__class__._classes_()) - - def __init__(self, fname, options, custom_methods=None, **kwargs): - """ - Initialize a sandbox - @fname: str file name - @options: namespace instance of specific options - @custom_methods: { str => func } for custom API implementations - """ - - # Initialize - self.fname = fname - self.options = options - if custom_methods is None: - custom_methods = {} - for cls in self.classes: - if cls == Sandbox: - continue - if issubclass(cls, OS): - cls.__init__(self, custom_methods, **kwargs) - else: - cls.__init__(self, **kwargs) - - # Logging options - self.jitter.set_trace_log( - trace_instr=self.options.singlestep, - trace_regs=self.options.singlestep, - trace_new_blocks=self.options.dumpblocs - ) - - if not self.options.quiet_function_calls: - log_func.setLevel(logging.INFO) - - @classmethod - def parser(cls, *args, **kwargs): - """ - Return instance of instance parser with expecting options. - Extra parameters are passed to parser initialisation. - """ - - parser = ArgumentParser(*args, **kwargs) - parser.add_argument('-a', "--address", - help="Force entry point address", default=None) - parser.add_argument('-b', "--dumpblocs", action="store_true", - help="Log disasm blocks") - parser.add_argument('-z', "--singlestep", action="store_true", - help="Log single step") - parser.add_argument('-d', "--debugging", action="store_true", - help="Debug shell") - parser.add_argument('-g', "--gdbserver", type=int, - help="Listen on port @port") - parser.add_argument("-j", "--jitter", - help="Jitter engine. Possible values are: gcc (default), llvm, python", - default="gcc") - parser.add_argument( - '-q', "--quiet-function-calls", action="store_true", - help="Don't log function calls") - parser.add_argument('-i', "--dependencies", action="store_true", - help="Load PE and its dependencies") - - for base_cls in cls._classes_(): - base_cls.update_parser(parser) - return parser - - def run(self, addr=None): - """ - Launch emulation (gdbserver, debugging, basic JIT). - @addr: (int) start address - """ - if addr is None and self.options.address is not None: - addr = int(self.options.address, 0) - - if any([self.options.debugging, self.options.gdbserver]): - dbg = debugging.Debugguer(self.jitter) - self.dbg = dbg - dbg.init_run(addr) - - if self.options.gdbserver: - port = self.options.gdbserver - print("Listen on port %d" % port) - gdb = self.machine.gdbserver(dbg, port) - self.gdb = gdb - gdb.run() - else: - cmd = debugging.DebugCmd(dbg) - self.cmd = cmd - cmd.cmdloop() - - else: - self.jitter.init_run(addr) - self.jitter.continue_run() - - def call(self, prepare_cb, addr, *args): - """ - Direct call of the function at @addr, with arguments @args prepare in - calling convention implemented by @prepare_cb - @prepare_cb: func(ret_addr, *args) - @addr: address of the target function - @args: arguments - """ - self.jitter.init_run(addr) - self.jitter.add_breakpoint(self.CALL_FINISH_ADDR, self.code_sentinelle) - prepare_cb(self.CALL_FINISH_ADDR, *args) - self.jitter.continue_run() - - - -class OS(object): - - """ - Parent class for OS abstraction - """ - - def __init__(self, custom_methods, **kwargs): - pass - - @classmethod - def update_parser(cls, parser): - pass - - -class Arch(object): - - """ - Parent class for Arch abstraction - """ - - # Architecture name - _ARCH_ = None - - def __init__(self, **kwargs): - self.machine = Machine(self._ARCH_) - self.jitter = self.machine.jitter(self.options.jitter) - - @classmethod - def update_parser(cls, parser): - pass - - -class OS_Win(OS): - # DLL to import - ALL_IMP_DLL = ["ntdll.dll", "kernel32.dll", "user32.dll", - "ole32.dll", "urlmon.dll", - "ws2_32.dll", 'advapi32.dll', "psapi.dll", - ] - modules_path = "win_dll" - - def __init__(self, custom_methods, *args, **kwargs): - from miasm2.jitter.loader.pe import vm_load_pe, vm_load_pe_libs,\ - preload_pe, libimp_pe, vm_load_pe_and_dependencies - from miasm2.os_dep import win_api_x86_32, win_api_x86_32_seh - methods = dict((name.encode(),func) for name, func in viewitems(win_api_x86_32.__dict__)) - methods.update(custom_methods) - - super(OS_Win, self).__init__(methods, *args, **kwargs) - - # Import manager - libs = libimp_pe() - self.libs = libs - win_api_x86_32.winobjs.runtime_dll = libs - - self.name2module = {} - fname_basename = os.path.basename(self.fname).lower() - - # Load main pe - with open(self.fname, "rb") as fstream: - self.pe = vm_load_pe( - self.jitter.vm, - fstream.read(), - load_hdr=self.options.load_hdr, - name=self.fname, - **kwargs - ) - self.name2module[fname_basename] = self.pe - - # Load library - if self.options.loadbasedll: - - # Load libs in memory - self.name2module.update( - vm_load_pe_libs( - self.jitter.vm, - self.ALL_IMP_DLL, - libs, - self.modules_path, - **kwargs - ) - ) - - # Patch libs imports - for pe in viewvalues(self.name2module): - preload_pe(self.jitter.vm, pe, libs) - - if self.options.dependencies: - vm_load_pe_and_dependencies( - self.jitter.vm, - fname_basename, - self.name2module, - libs, - self.modules_path, - **kwargs - ) - - win_api_x86_32.winobjs.current_pe = self.pe - - # Fix pe imports - preload_pe(self.jitter.vm, self.pe, libs) - - # Library calls handler - self.jitter.add_lib_handler(libs, methods) - - # Manage SEH - if self.options.use_windows_structs: - win_api_x86_32_seh.main_pe_name = fname_basename - win_api_x86_32_seh.main_pe = self.pe - win_api_x86_32.winobjs.hcurmodule = self.pe.NThdr.ImageBase - win_api_x86_32_seh.name2module = self.name2module - win_api_x86_32_seh.set_win_fs_0(self.jitter) - win_api_x86_32_seh.init_seh(self.jitter) - - self.entry_point = self.pe.rva2virt( - self.pe.Opthdr.AddressOfEntryPoint) - - @classmethod - def update_parser(cls, parser): - parser.add_argument('-o', "--load-hdr", action="store_true", - help="Load pe hdr") - parser.add_argument('-y', "--use-windows-structs", action="store_true", - help="Create and use windows structures (peb, ldr, seh, ...)") - parser.add_argument('-l', "--loadbasedll", action="store_true", - help="Load base dll (path './win_dll')") - parser.add_argument('-r', "--parse-resources", - action="store_true", help="Load resources") - - -class OS_Linux(OS): - - PROGRAM_PATH = "./program" - - def __init__(self, custom_methods, *args, **kwargs): - from miasm2.jitter.loader.elf import vm_load_elf, preload_elf, libimp_elf - from miasm2.os_dep import linux_stdlib - methods = linux_stdlib.__dict__ - methods.update(custom_methods) - - super(OS_Linux, self).__init__(methods, *args, **kwargs) - - # Import manager - self.libs = libimp_elf() - - with open(self.fname, "rb") as fstream: - self.elf = vm_load_elf( - self.jitter.vm, - fstream.read(), - name=self.fname, - **kwargs - ) - preload_elf(self.jitter.vm, self.elf, self.libs) - - self.entry_point = self.elf.Ehdr.entry - - # Library calls handler - self.jitter.add_lib_handler(self.libs, methods) - linux_stdlib.ABORT_ADDR = self.CALL_FINISH_ADDR - - # Arguments - self.argv = [self.PROGRAM_PATH] - if self.options.command_line: - self.argv += self.options.command_line - self.envp = self.options.environment_vars - - @classmethod - def update_parser(cls, parser): - parser.add_argument('-c', '--command-line', - action="append", - default=[], - help="Command line arguments") - parser.add_argument('--environment-vars', - action="append", - default=[], - help="Environment variables arguments") - parser.add_argument('--mimic-env', - action="store_true", - help="Mimic the environment of a starting executable") - -class OS_Linux_str(OS): - - PROGRAM_PATH = "./program" - - def __init__(self, custom_methods, *args, **kwargs): - from miasm2.jitter.loader.elf import libimp_elf - from miasm2.os_dep import linux_stdlib - methods = linux_stdlib.__dict__ - methods.update(custom_methods) - - super(OS_Linux_str, self).__init__(methods, *args, **kwargs) - - # Import manager - libs = libimp_elf() - self.libs = libs - - data = open(self.fname, "rb").read() - self.options.load_base_addr = int(self.options.load_base_addr, 0) - self.jitter.vm.add_memory_page( - self.options.load_base_addr, PAGE_READ | PAGE_WRITE, data, - "Initial Str" - ) - - # Library calls handler - self.jitter.add_lib_handler(libs, methods) - linux_stdlib.ABORT_ADDR = self.CALL_FINISH_ADDR - - # Arguments - self.argv = [self.PROGRAM_PATH] - if self.options.command_line: - self.argv += self.options.command_line - self.envp = self.options.environment_vars - - @classmethod - def update_parser(cls, parser): - parser.add_argument('-c', '--command-line', - action="append", - default=[], - help="Command line arguments") - parser.add_argument('--environment-vars', - action="append", - default=[], - help="Environment variables arguments") - parser.add_argument('--mimic-env', - action="store_true", - help="Mimic the environment of a starting executable") - parser.add_argument("load_base_addr", help="load base address") - - -class Arch_x86(Arch): - _ARCH_ = None # Arch name - STACK_SIZE = 0x10000 - STACK_BASE = 0x130000 - - def __init__(self, **kwargs): - super(Arch_x86, self).__init__(**kwargs) - - if self.options.usesegm: - self.jitter.ir_arch.do_stk_segm = True - self.jitter.ir_arch.do_ds_segm = True - self.jitter.ir_arch.do_str_segm = True - self.jitter.ir_arch.do_all_segm = True - - # Init stack - self.jitter.stack_size = self.STACK_SIZE - self.jitter.stack_base = self.STACK_BASE - self.jitter.init_stack() - - @classmethod - def update_parser(cls, parser): - parser.add_argument('-s', "--usesegm", action="store_true", - help="Use segments") - - -class Arch_x86_32(Arch_x86): - _ARCH_ = "x86_32" - - -class Arch_x86_64(Arch_x86): - _ARCH_ = "x86_64" - - -class Arch_arml(Arch): - _ARCH_ = "arml" - STACK_SIZE = 0x100000 - STACK_BASE = 0x100000 - - def __init__(self, **kwargs): - super(Arch_arml, self).__init__(**kwargs) - - # Init stack - self.jitter.stack_size = self.STACK_SIZE - self.jitter.stack_base = self.STACK_BASE - self.jitter.init_stack() - - -class Arch_armb(Arch): - _ARCH_ = "armb" - STACK_SIZE = 0x100000 - STACK_BASE = 0x100000 - - def __init__(self, **kwargs): - super(Arch_armb, self).__init__(**kwargs) - - # Init stack - self.jitter.stack_size = self.STACK_SIZE - self.jitter.stack_base = self.STACK_BASE - self.jitter.init_stack() - - -class Arch_armtl(Arch): - _ARCH_ = "armtl" - STACK_SIZE = 0x100000 - STACK_BASE = 0x100000 - - def __init__(self, **kwargs): - super(Arch_armtl, self).__init__(**kwargs) - - # Init stack - self.jitter.stack_size = self.STACK_SIZE - self.jitter.stack_base = self.STACK_BASE - self.jitter.init_stack() - - -class Arch_mips32b(Arch): - _ARCH_ = "mips32b" - STACK_SIZE = 0x100000 - STACK_BASE = 0x100000 - - def __init__(self, **kwargs): - super(Arch_mips32b, self).__init__(**kwargs) - - # Init stack - self.jitter.stack_size = self.STACK_SIZE - self.jitter.stack_base = self.STACK_BASE - self.jitter.init_stack() - - -class Arch_aarch64l(Arch): - _ARCH_ = "aarch64l" - STACK_SIZE = 0x100000 - STACK_BASE = 0x100000 - - def __init__(self, **kwargs): - super(Arch_aarch64l, self).__init__(**kwargs) - - # Init stack - self.jitter.stack_size = self.STACK_SIZE - self.jitter.stack_base = self.STACK_BASE - self.jitter.init_stack() - - -class Arch_aarch64b(Arch): - _ARCH_ = "aarch64b" - STACK_SIZE = 0x100000 - STACK_BASE = 0x100000 - - def __init__(self, **kwargs): - super(Arch_aarch64b, self).__init__(**kwargs) - - # Init stack - self.jitter.stack_size = self.STACK_SIZE - self.jitter.stack_base = self.STACK_BASE - self.jitter.init_stack() - -class Arch_ppc(Arch): - _ARCH_ = None - -class Arch_ppc32(Arch): - _ARCH_ = None - -class Arch_ppc32b(Arch_ppc32): - _ARCH_ = "ppc32b" - -class Sandbox_Win_x86_32(Sandbox, Arch_x86_32, OS_Win): - - def __init__(self, *args, **kwargs): - Sandbox.__init__(self, *args, **kwargs) - - # Pre-stack some arguments - self.jitter.push_uint32_t(2) - self.jitter.push_uint32_t(1) - self.jitter.push_uint32_t(0) - self.jitter.push_uint32_t(self.CALL_FINISH_ADDR) - - # Set the runtime guard - self.jitter.add_breakpoint(self.CALL_FINISH_ADDR, self.__class__.code_sentinelle) - - def run(self, addr=None): - """ - If addr is not set, use entrypoint - """ - if addr is None and self.options.address is None: - addr = self.entry_point - super(Sandbox_Win_x86_32, self).run(addr) - - def call(self, addr, *args, **kwargs): - """ - Direct call of the function at @addr, with arguments @args - @addr: address of the target function - @args: arguments - """ - prepare_cb = kwargs.pop('prepare_cb', self.jitter.func_prepare_stdcall) - super(self.__class__, self).call(prepare_cb, addr, *args) - - -class Sandbox_Win_x86_64(Sandbox, Arch_x86_64, OS_Win): - - def __init__(self, *args, **kwargs): - Sandbox.__init__(self, *args, **kwargs) - - # reserve stack for local reg - for _ in range(0x4): - self.jitter.push_uint64_t(0) - - # Pre-stack return address - self.jitter.push_uint64_t(self.CALL_FINISH_ADDR) - - # Set the runtime guard - self.jitter.add_breakpoint( - self.CALL_FINISH_ADDR, - self.__class__.code_sentinelle - ) - - def run(self, addr=None): - """ - If addr is not set, use entrypoint - """ - if addr is None and self.options.address is None: - addr = self.entry_point - super(Sandbox_Win_x86_64, self).run(addr) - - def call(self, addr, *args, **kwargs): - """ - Direct call of the function at @addr, with arguments @args - @addr: address of the target function - @args: arguments - """ - prepare_cb = kwargs.pop('prepare_cb', self.jitter.func_prepare_stdcall) - super(self.__class__, self).call(prepare_cb, addr, *args) - - -class Sandbox_Linux_x86_32(Sandbox, Arch_x86_32, OS_Linux): - - def __init__(self, *args, **kwargs): - Sandbox.__init__(self, *args, **kwargs) - - # Pre-stack some arguments - if self.options.mimic_env: - env_ptrs = [] - for env in self.envp: - env = force_bytes(env) - env += b"\x00" - self.jitter.cpu.ESP -= len(env) - ptr = self.jitter.cpu.ESP - self.jitter.vm.set_mem(ptr, env) - env_ptrs.append(ptr) - argv_ptrs = [] - for arg in self.argv: - arg = force_bytes(arg) - arg += b"\x00" - self.jitter.cpu.ESP -= len(arg) - ptr = self.jitter.cpu.ESP - self.jitter.vm.set_mem(ptr, arg) - argv_ptrs.append(ptr) - - self.jitter.push_uint32_t(self.CALL_FINISH_ADDR) - self.jitter.push_uint32_t(0) - for ptr in reversed(env_ptrs): - self.jitter.push_uint32_t(ptr) - self.jitter.push_uint32_t(0) - for ptr in reversed(argv_ptrs): - self.jitter.push_uint32_t(ptr) - self.jitter.push_uint32_t(len(self.argv)) - else: - self.jitter.push_uint32_t(self.CALL_FINISH_ADDR) - - # Set the runtime guard - self.jitter.add_breakpoint( - self.CALL_FINISH_ADDR, - self.__class__.code_sentinelle - ) - - def run(self, addr=None): - """ - If addr is not set, use entrypoint - """ - if addr is None and self.options.address is None: - addr = self.entry_point - super(Sandbox_Linux_x86_32, self).run(addr) - - def call(self, addr, *args, **kwargs): - """ - Direct call of the function at @addr, with arguments @args - @addr: address of the target function - @args: arguments - """ - prepare_cb = kwargs.pop('prepare_cb', self.jitter.func_prepare_systemv) - super(self.__class__, self).call(prepare_cb, addr, *args) - - - -class Sandbox_Linux_x86_64(Sandbox, Arch_x86_64, OS_Linux): - - def __init__(self, *args, **kwargs): - Sandbox.__init__(self, *args, **kwargs) - - # Pre-stack some arguments - if self.options.mimic_env: - env_ptrs = [] - for env in self.envp: - env = force_bytes(env) - env += b"\x00" - self.jitter.cpu.RSP -= len(env) - ptr = self.jitter.cpu.RSP - self.jitter.vm.set_mem(ptr, env) - env_ptrs.append(ptr) - argv_ptrs = [] - for arg in self.argv: - arg = force_bytes(arg) - arg += b"\x00" - self.jitter.cpu.RSP -= len(arg) - ptr = self.jitter.cpu.RSP - self.jitter.vm.set_mem(ptr, arg) - argv_ptrs.append(ptr) - - self.jitter.push_uint64_t(self.CALL_FINISH_ADDR) - self.jitter.push_uint64_t(0) - for ptr in reversed(env_ptrs): - self.jitter.push_uint64_t(ptr) - self.jitter.push_uint64_t(0) - for ptr in reversed(argv_ptrs): - self.jitter.push_uint64_t(ptr) - self.jitter.push_uint64_t(len(self.argv)) - else: - self.jitter.push_uint64_t(self.CALL_FINISH_ADDR) - - # Set the runtime guard - self.jitter.add_breakpoint( - self.CALL_FINISH_ADDR, - self.__class__.code_sentinelle - ) - - def run(self, addr=None): - """ - If addr is not set, use entrypoint - """ - if addr is None and self.options.address is None: - addr = self.entry_point - super(Sandbox_Linux_x86_64, self).run(addr) - - def call(self, addr, *args, **kwargs): - """ - Direct call of the function at @addr, with arguments @args - @addr: address of the target function - @args: arguments - """ - prepare_cb = kwargs.pop('prepare_cb', self.jitter.func_prepare_systemv) - super(self.__class__, self).call(prepare_cb, addr, *args) - - -class Sandbox_Linux_arml(Sandbox, Arch_arml, OS_Linux): - - def __init__(self, *args, **kwargs): - Sandbox.__init__(self, *args, **kwargs) - - # Pre-stack some arguments - if self.options.mimic_env: - env_ptrs = [] - for env in self.envp: - env = force_bytes(env) - env += b"\x00" - self.jitter.cpu.SP -= len(env) - ptr = self.jitter.cpu.SP - self.jitter.vm.set_mem(ptr, env) - env_ptrs.append(ptr) - argv_ptrs = [] - for arg in self.argv: - arg = force_bytes(arg) - arg += b"\x00" - self.jitter.cpu.SP -= len(arg) - ptr = self.jitter.cpu.SP - self.jitter.vm.set_mem(ptr, arg) - argv_ptrs.append(ptr) - - # Round SP to 4 - self.jitter.cpu.SP = self.jitter.cpu.SP & ~ 3 - - self.jitter.push_uint32_t(0) - for ptr in reversed(env_ptrs): - self.jitter.push_uint32_t(ptr) - self.jitter.push_uint32_t(0) - for ptr in reversed(argv_ptrs): - self.jitter.push_uint32_t(ptr) - self.jitter.push_uint32_t(len(self.argv)) - - self.jitter.cpu.LR = self.CALL_FINISH_ADDR - - # Set the runtime guard - self.jitter.add_breakpoint( - self.CALL_FINISH_ADDR, - self.__class__.code_sentinelle - ) - - def run(self, addr=None): - if addr is None and self.options.address is None: - addr = self.entry_point - super(Sandbox_Linux_arml, self).run(addr) - - def call(self, addr, *args, **kwargs): - """ - Direct call of the function at @addr, with arguments @args - @addr: address of the target function - @args: arguments - """ - prepare_cb = kwargs.pop('prepare_cb', self.jitter.func_prepare_systemv) - super(self.__class__, self).call(prepare_cb, addr, *args) - - -class Sandbox_Linux_armtl(Sandbox, Arch_armtl, OS_Linux): - - def __init__(self, *args, **kwargs): - Sandbox.__init__(self, *args, **kwargs) - - # Pre-stack some arguments - if self.options.mimic_env: - env_ptrs = [] - for env in self.envp: - env = force_bytes(env) - env += b"\x00" - self.jitter.cpu.SP -= len(env) - ptr = self.jitter.cpu.SP - self.jitter.vm.set_mem(ptr, env) - env_ptrs.append(ptr) - argv_ptrs = [] - for arg in self.argv: - arg = force_bytes(arg) - arg += b"\x00" - self.jitter.cpu.SP -= len(arg) - ptr = self.jitter.cpu.SP - self.jitter.vm.set_mem(ptr, arg) - argv_ptrs.append(ptr) - - # Round SP to 4 - self.jitter.cpu.SP = self.jitter.cpu.SP & ~ 3 - - self.jitter.push_uint32_t(0) - for ptr in reversed(env_ptrs): - self.jitter.push_uint32_t(ptr) - self.jitter.push_uint32_t(0) - for ptr in reversed(argv_ptrs): - self.jitter.push_uint32_t(ptr) - self.jitter.push_uint32_t(len(self.argv)) - - self.jitter.cpu.LR = self.CALL_FINISH_ADDR - - # Set the runtime guard - self.jitter.add_breakpoint( - self.CALL_FINISH_ADDR, - self.__class__.code_sentinelle - ) - - def run(self, addr=None): - if addr is None and self.options.address is None: - addr = self.entry_point - super(Sandbox_Linux_armtl, self).run(addr) - - def call(self, addr, *args, **kwargs): - """ - Direct call of the function at @addr, with arguments @args - @addr: address of the target function - @args: arguments - """ - prepare_cb = kwargs.pop('prepare_cb', self.jitter.func_prepare_systemv) - super(self.__class__, self).call(prepare_cb, addr, *args) - - - -class Sandbox_Linux_mips32b(Sandbox, Arch_mips32b, OS_Linux): - - def __init__(self, *args, **kwargs): - Sandbox.__init__(self, *args, **kwargs) - - # Pre-stack some arguments - if self.options.mimic_env: - env_ptrs = [] - for env in self.envp: - env = force_bytes(env) - env += b"\x00" - self.jitter.cpu.SP -= len(env) - ptr = self.jitter.cpu.SP - self.jitter.vm.set_mem(ptr, env) - env_ptrs.append(ptr) - argv_ptrs = [] - for arg in self.argv: - arg = force_bytes(arg) - arg += b"\x00" - self.jitter.cpu.SP -= len(arg) - ptr = self.jitter.cpu.SP - self.jitter.vm.set_mem(ptr, arg) - argv_ptrs.append(ptr) - - self.jitter.push_uint32_t(0) - for ptr in reversed(env_ptrs): - self.jitter.push_uint32_t(ptr) - self.jitter.push_uint32_t(0) - for ptr in reversed(argv_ptrs): - self.jitter.push_uint32_t(ptr) - self.jitter.push_uint32_t(len(self.argv)) - - self.jitter.cpu.RA = 0x1337beef - - # Set the runtime guard - self.jitter.add_breakpoint( - 0x1337beef, - self.__class__.code_sentinelle - ) - - def run(self, addr=None): - if addr is None and self.options.address is None: - addr = self.entry_point - super(Sandbox_Linux_mips32b, self).run(addr) - - def call(self, addr, *args, **kwargs): - """ - Direct call of the function at @addr, with arguments @args - @addr: address of the target function - @args: arguments - """ - prepare_cb = kwargs.pop('prepare_cb', self.jitter.func_prepare_systemv) - super(self.__class__, self).call(prepare_cb, addr, *args) - - -class Sandbox_Linux_armb_str(Sandbox, Arch_armb, OS_Linux_str): - - def __init__(self, *args, **kwargs): - Sandbox.__init__(self, *args, **kwargs) - - self.jitter.cpu.LR = self.CALL_FINISH_ADDR - - # Set the runtime guard - self.jitter.add_breakpoint(self.CALL_FINISH_ADDR, self.__class__.code_sentinelle) - - def run(self, addr=None): - if addr is None and self.options.address is not None: - addr = int(self.options.address, 0) - super(Sandbox_Linux_armb_str, self).run(addr) - - -class Sandbox_Linux_arml_str(Sandbox, Arch_arml, OS_Linux_str): - - def __init__(self, *args, **kwargs): - Sandbox.__init__(self, *args, **kwargs) - - self.jitter.cpu.LR = self.CALL_FINISH_ADDR - - # Set the runtime guard - self.jitter.add_breakpoint(self.CALL_FINISH_ADDR, self.__class__.code_sentinelle) - - def run(self, addr=None): - if addr is None and self.options.address is not None: - addr = int(self.options.address, 0) - super(Sandbox_Linux_arml_str, self).run(addr) - - -class Sandbox_Linux_aarch64l(Sandbox, Arch_aarch64l, OS_Linux): - - def __init__(self, *args, **kwargs): - Sandbox.__init__(self, *args, **kwargs) - - # Pre-stack some arguments - if self.options.mimic_env: - env_ptrs = [] - for env in self.envp: - env = force_bytes(env) - env += b"\x00" - self.jitter.cpu.SP -= len(env) - ptr = self.jitter.cpu.SP - self.jitter.vm.set_mem(ptr, env) - env_ptrs.append(ptr) - argv_ptrs = [] - for arg in self.argv: - arg = force_bytes(arg) - arg += b"\x00" - self.jitter.cpu.SP -= len(arg) - ptr = self.jitter.cpu.SP - self.jitter.vm.set_mem(ptr, arg) - argv_ptrs.append(ptr) - - self.jitter.push_uint64_t(0) - for ptr in reversed(env_ptrs): - self.jitter.push_uint64_t(ptr) - self.jitter.push_uint64_t(0) - for ptr in reversed(argv_ptrs): - self.jitter.push_uint64_t(ptr) - self.jitter.push_uint64_t(len(self.argv)) - - self.jitter.cpu.LR = self.CALL_FINISH_ADDR - - # Set the runtime guard - self.jitter.add_breakpoint( - self.CALL_FINISH_ADDR, - self.__class__.code_sentinelle - ) - - def run(self, addr=None): - if addr is None and self.options.address is None: - addr = self.entry_point - super(Sandbox_Linux_aarch64l, self).run(addr) - - def call(self, addr, *args, **kwargs): - """ - Direct call of the function at @addr, with arguments @args - @addr: address of the target function - @args: arguments - """ - prepare_cb = kwargs.pop('prepare_cb', self.jitter.func_prepare_systemv) - super(self.__class__, self).call(prepare_cb, addr, *args) - -class Sandbox_Linux_ppc32b(Sandbox, Arch_ppc32b, OS_Linux): - - STACK_SIZE = 0x10000 - STACK_BASE = 0xbfce0000 - - # The glue between the kernel and the ELF ABI on Linux/PowerPC is - # implemented in glibc/sysdeps/powerpc/powerpc32/dl-start.S, so we - # have to play the role of ld.so here. - def __init__(self, *args, **kwargs): - super(Sandbox_Linux_ppc32b, self).__init__(*args, **kwargs) - - # Init stack - self.jitter.stack_size = self.STACK_SIZE - self.jitter.stack_base = self.STACK_BASE - self.jitter.init_stack() - self.jitter.cpu.R1 -= 8 - - # Pre-stack some arguments - if self.options.mimic_env: - env_ptrs = [] - for env in self.envp: - env = force_bytes(env) - env += b"\x00" - self.jitter.cpu.R1 -= len(env) - ptr = self.jitter.cpu.R1 - self.jitter.vm.set_mem(ptr, env) - env_ptrs.append(ptr) - argv_ptrs = [] - for arg in self.argv: - arg = force_bytes(arg) - arg += b"\x00" - self.jitter.cpu.R1 -= len(arg) - ptr = self.jitter.cpu.R1 - self.jitter.vm.set_mem(ptr, arg) - argv_ptrs.append(ptr) - - self.jitter.push_uint32_t(0) - for ptr in reversed(env_ptrs): - self.jitter.push_uint32_t(ptr) - self.jitter.cpu.R5 = self.jitter.cpu.R1 # envp - self.jitter.push_uint32_t(0) - for ptr in reversed(argv_ptrs): - self.jitter.push_uint32_t(ptr) - self.jitter.cpu.R4 = self.jitter.cpu.R1 # argv - self.jitter.cpu.R3 = len(self.argv) # argc - self.jitter.push_uint32_t(self.jitter.cpu.R3) - - self.jitter.cpu.R6 = 0 # auxp - self.jitter.cpu.R7 = 0 # termination function - - # From the glibc, we should push a 0 here to distinguish a - # dynamically linked executable from a statically linked one. - # We actually do not do it and attempt to be somehow compatible - # with both types of executables. - #self.jitter.push_uint32_t(0) - - self.jitter.cpu.LR = self.CALL_FINISH_ADDR - - # Set the runtime guard - self.jitter.add_breakpoint( - self.CALL_FINISH_ADDR, - self.__class__.code_sentinelle - ) - - def run(self, addr=None): - """ - If addr is not set, use entrypoint - """ - if addr is None and self.options.address is None: - addr = self.entry_point - super(Sandbox_Linux_ppc32b, self).run(addr) - - def call(self, addr, *args, **kwargs): - """ - Direct call of the function at @addr, with arguments @args - @addr: address of the target function - @args: arguments - """ - prepare_cb = kwargs.pop('prepare_cb', self.jitter.func_prepare_systemv) - super(self.__class__, self).call(prepare_cb, addr, *args) diff --git a/miasm2/analysis/simplifier.py b/miasm2/analysis/simplifier.py deleted file mode 100644 index 10d5e092..00000000 --- a/miasm2/analysis/simplifier.py +++ /dev/null @@ -1,303 +0,0 @@ -""" -Apply simplification passes to an IR cfg -""" - -import logging -from functools import wraps -from miasm2.analysis.ssa import SSADiGraph -from miasm2.analysis.outofssa import UnSSADiGraph -from miasm2.analysis.data_flow import DiGraphLivenessSSA -from miasm2.expression.simplifications import expr_simp -from miasm2.analysis.data_flow import dead_simp, \ - merge_blocks, remove_empty_assignblks, \ - PropagateExprIntThroughExprId, PropagateThroughExprId, \ - PropagateThroughExprMem, del_unused_edges - - -log = logging.getLogger("simplifier") -console_handler = logging.StreamHandler() -console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) -log.addHandler(console_handler) -log.setLevel(logging.WARNING) - - -def fix_point(func): - @wraps(func) - def ret_func(self, ircfg, head): - log.debug('[%s]: start', func.__name__) - has_been_modified = False - modified = True - while modified: - modified = func(self, ircfg, head) - has_been_modified |= modified - log.debug( - '[%s]: stop %r', - func.__name__, - has_been_modified - ) - return has_been_modified - return ret_func - - -class IRCFGSimplifier(object): - """ - Simplify an IRCFG - This class applies passes until reaching a fix point - """ - - def __init__(self, ir_arch): - self.ir_arch = ir_arch - self.init_passes() - - def init_passes(self): - """ - Init the array of simplification passes - """ - self.passes = [] - - @fix_point - def simplify(self, ircfg, head): - """ - Apply passes until reaching a fix point - Return True if the graph has been modified - - @ircfg: IRCFG instance to simplify - @head: Location instance of the ircfg head - """ - modified = False - for simplify_pass in self.passes: - modified |= simplify_pass(ircfg, head) - return modified - - def __call__(self, ircfg, head): - return self.simplify(ircfg, head) - - -class IRCFGSimplifierCommon(IRCFGSimplifier): - """ - Simplify an IRCFG - This class applies following passes until reaching a fix point: - - simplify_ircfg - - do_dead_simp_ircfg - """ - def __init__(self, ir_arch, expr_simp=expr_simp): - self.expr_simp = expr_simp - super(IRCFGSimplifierCommon, self).__init__(ir_arch) - - def init_passes(self): - self.passes = [ - self.simplify_ircfg, - self.do_dead_simp_ircfg, - ] - - @fix_point - def simplify_ircfg(self, ircfg, _head): - """ - Apply self.expr_simp on the @ircfg until reaching fix point - Return True if the graph has been modified - - @ircfg: IRCFG instance to simplify - """ - modified = ircfg.simplify(self.expr_simp) - return modified - - @fix_point - def do_dead_simp_ircfg(self, ircfg, head): - """ - Apply: - - dead_simp - - remove_empty_assignblks - - merge_blocks - on the @ircfg until reaching fix point - Return True if the graph has been modified - - @ircfg: IRCFG instance to simplify - @head: Location instance of the ircfg head - """ - modified = dead_simp(self.ir_arch, ircfg) - modified |= remove_empty_assignblks(ircfg) - modified |= merge_blocks(ircfg, set([head])) - return modified - - -class IRCFGSimplifierSSA(IRCFGSimplifierCommon): - """ - Simplify an IRCFG. - The IRCF is first transformed in SSA, then apply transformations passes - and apply out-of-ssa. Final passes of IRcfgSimplifier are applied - - This class apply following pass until reaching a fix point: - - do_propagate_int - - do_propagate_mem - - do_propagate_expr - - do_dead_simp_ssa - """ - - def __init__(self, ir_arch, expr_simp=expr_simp): - super(IRCFGSimplifierSSA, self).__init__(ir_arch, expr_simp) - - self.ir_arch.ssa_var = {} - self.all_ssa_vars = {} - - self.ssa_forbidden_regs = self.get_forbidden_regs() - - self.propag_int = PropagateExprIntThroughExprId() - self.propag_expr = PropagateThroughExprId() - self.propag_mem = PropagateThroughExprMem() - - def get_forbidden_regs(self): - """ - Return a set of immutable register during SSA transformation - """ - regs = set( - [ - self.ir_arch.pc, - self.ir_arch.IRDst, - self.ir_arch.arch.regs.exception_flags - ] - ) - return regs - - def init_passes(self): - """ - Init the array of simplification passes - """ - self.passes = [ - self.simplify_ssa, - self.do_propagate_int, - self.do_propagate_mem, - self.do_propagate_expr, - self.do_dead_simp_ssa, - ] - - def ircfg_to_ssa(self, ircfg, head): - """ - Apply the SSA transformation to @ircfg using it's @head - - @ircfg: IRCFG instance to simplify - @head: Location instance of the ircfg head - """ - ssa = SSADiGraph(ircfg) - ssa.immutable_ids.update(self.ssa_forbidden_regs) - ssa.ssa_variable_to_expr.update(self.all_ssa_vars) - ssa.transform(head) - self.all_ssa_vars.update(ssa.ssa_variable_to_expr) - self.ir_arch.ssa_var.update(ssa.ssa_variable_to_expr) - return ssa - - def ssa_to_unssa(self, ssa, head): - """ - Apply the out-of-ssa transformation to @ssa using it's @head - - @ssa: SSADiGraph instance - @head: Location instance of the graph head - """ - cfg_liveness = DiGraphLivenessSSA(ssa.graph) - cfg_liveness.init_var_info(self.ir_arch) - cfg_liveness.compute_liveness() - - UnSSADiGraph(ssa, head, cfg_liveness) - return ssa.graph - - @fix_point - def simplify_ssa(self, ssa, _head): - """ - Apply self.expr_simp on the @ssa.graph until reaching fix point - Return True if the graph has been modified - - @ssa: SSADiGraph instance - """ - modified = ssa.graph.simplify(self.expr_simp) - return modified - - @fix_point - def do_propagate_int(self, ssa, head): - """ - Constant propagation in the @ssa graph - @head: Location instance of the graph head - """ - modified = self.propag_int.propagate(ssa, head) - modified |= ssa.graph.simplify(self.expr_simp) - modified |= del_unused_edges(ssa.graph, set([head])) - return modified - - @fix_point - def do_propagate_mem(self, ssa, head): - """ - Propagation of expression based on ExprInt/ExprId in the @ssa graph - @head: Location instance of the graph head - """ - modified = self.propag_mem.propagate(ssa, head) - modified |= ssa.graph.simplify(self.expr_simp) - modified |= del_unused_edges(ssa.graph, set([head])) - return modified - - @fix_point - def do_propagate_expr(self, ssa, head): - """ - Expressions propagation through ExprId in the @ssa graph - @head: Location instance of the graph head - """ - modified = self.propag_expr.propagate(ssa, head) - modified |= ssa.graph.simplify(self.expr_simp) - modified |= del_unused_edges(ssa.graph, set([head])) - return modified - - @fix_point - def do_dead_simp_ssa(self, ssa, head): - """ - Apply: - - dead_simp - - remove_empty_assignblks - - del_unused_edges - - merge_blocks - on the @ircfg until reaching fix point - Return True if the graph has been modified - - @ircfg: IRCFG instance to simplify - @head: Location instance of the ircfg head - """ - modified = dead_simp(self.ir_arch, ssa.graph) - modified |= remove_empty_assignblks(ssa.graph) - modified |= del_unused_edges(ssa.graph, set([head])) - modified |= merge_blocks(ssa.graph, set([head])) - return modified - - def do_simplify(self, ssa, head): - """ - Apply passes until reaching a fix point - Return True if the graph has been modified - """ - return super(IRCFGSimplifierSSA, self).simplify(ssa, head) - - def do_simplify_loop(self, ssa, head): - """ - Apply do_simplify until reaching a fix point - SSA is updated between each do_simplify - Return True if the graph has been modified - """ - modified = True - while modified: - modified = self.do_simplify(ssa, head) - # Update ssa structs - ssa = self.ircfg_to_ssa(ssa.graph, head) - return ssa - - def simplify(self, ircfg, head): - """ - Apply SSA transformation to @ircfg - Apply passes until reaching a fix point - Apply out-of-ssa transformation - Apply post simplification passes - - Updated simplified IRCFG instance and return it - - @ircfg: IRCFG instance to simplify - @head: Location instance of the ircfg head - """ - ssa = self.ircfg_to_ssa(ircfg, head) - ssa = self.do_simplify_loop(ssa, head) - ircfg = self.ssa_to_unssa(ssa, head) - ircfg_simplifier = IRCFGSimplifierCommon(self.ir_arch) - ircfg_simplifier.simplify(ircfg, head) - return ircfg diff --git a/miasm2/analysis/ssa.py b/miasm2/analysis/ssa.py deleted file mode 100644 index 54d17dc1..00000000 --- a/miasm2/analysis/ssa.py +++ /dev/null @@ -1,1118 +0,0 @@ -from collections import deque -from future.utils import viewitems, viewvalues - -from miasm2.expression.expression import ExprId, ExprAssign, ExprOp, \ - ExprLoc, get_expr_ids -from miasm2.ir.ir import AssignBlock, IRBlock - - -def sanitize_graph_head(ircfg, head): - """ - In multiple algorithm, the @head of the ircfg may not have predecessors. - The function transform the @ircfg in order to ensure this property - @ircfg: IRCFG instance - @head: the location of the graph's head - """ - - if not ircfg.predecessors(head): - return - original_edges = ircfg.predecessors(head) - sub_head = ircfg.loc_db.add_location() - - # Duplicate graph, replacing references to head by sub_head - replaced_expr = { - ExprLoc(head, ircfg.IRDst.size): - ExprLoc(sub_head, ircfg.IRDst.size) - } - ircfg.simplify( - lambda expr:expr.replace_expr(replaced_expr) - ) - # Duplicate head block - ircfg.add_irblock(IRBlock(sub_head, list(ircfg.blocks[head]))) - - # Remove original head block - ircfg.del_node(head) - - for src in original_edges: - ircfg.add_edge(src, sub_head) - - # Create new head, jumping to sub_head - assignblk = AssignBlock({ircfg.IRDst:ExprLoc(sub_head, ircfg.IRDst.size)}) - new_irblock = IRBlock(head, [assignblk]) - ircfg.add_irblock(new_irblock) - - -class SSA(object): - """ - Generic class for static single assignment (SSA) transformation - - Handling of - - variable generation - - variable renaming - - conversion of an IRCFG block into SSA - - Variables will be renamed to ., whereby the - index will be increased in every definition of . - - Memory expressions are stateless. The addresses are in SSA form, - but memory aliasing will occur. For instance, if it holds - that RAX == RBX.0 + (-0x8) and - - @64[RBX.0 + (-0x8)] = RDX - RCX.0 = @64[RAX], - - then it cannot be tracked that RCX.0 == RDX. - """ - - - def __init__(self, ircfg): - """ - Initialises generic class for SSA - :param ircfg: instance of IRCFG - """ - # IRCFG instance - self.ircfg = ircfg - - # SSA blocks - self.blocks = {} - - # stack for RHS - self._stack_rhs = {} - # stack for LHS - self._stack_lhs = {} - - self.ssa_variable_to_expr = {} - - # dict of SSA expressions - self.expressions = {} - - # dict of SSA to original location - self.ssa_to_location = {} - - # Don't SSA IRDst - self.immutable_ids = set([self.ircfg.IRDst]) - - def get_regs(self, expr): - return get_expr_ids(expr) - - def transform(self, *args, **kwargs): - """Transforms into SSA""" - raise NotImplementedError("Abstract method") - - def get_block(self, loc_key): - """ - Returns an IRBlock - :param loc_key: LocKey instance - :return: IRBlock - """ - irblock = self.ircfg.blocks.get(loc_key, None) - - return irblock - - def reverse_variable(self, ssa_var): - """ - Transforms a variable in SSA form into non-SSA form - :param ssa_var: ExprId, variable in SSA form - :return: ExprId, variable in non-SSA form - """ - expr = self.ssa_variable_to_expr.get(ssa_var, ssa_var) - return expr - - def reset(self): - """Resets SSA transformation""" - self.blocks = {} - self.expressions = {} - self._stack_rhs = {} - self._stack_lhs = {} - self.ssa_to_location = {} - - def _gen_var_expr(self, expr, stack): - """ - Generates a variable expression in SSA form - :param expr: variable expression which will be translated - :param stack: self._stack_rhs or self._stack_lhs - :return: variable expression in SSA form - """ - index = stack[expr] - name = "%s.%d" % (expr.name, index) - ssa_var = ExprId(name, expr.size) - self.ssa_variable_to_expr[ssa_var] = expr - - return ssa_var - - def _transform_var_rhs(self, ssa_var): - """ - Transforms a variable on the right hand side into SSA - :param ssa_var: variable - :return: transformed variable - """ - # variable has never been on the LHS - if ssa_var not in self._stack_rhs: - return ssa_var - # variable has been on the LHS - stack = self._stack_rhs - return self._gen_var_expr(ssa_var, stack) - - def _transform_var_lhs(self, expr): - """ - Transforms a variable on the left hand side into SSA - :param expr: variable - :return: transformed variable - """ - # check if variable has already been on the LHS - if expr not in self._stack_lhs: - self._stack_lhs[expr] = 0 - # save last value for RHS transformation - self._stack_rhs[expr] = self._stack_lhs[expr] - - # generate SSA expression - stack = self._stack_lhs - ssa_var = self._gen_var_expr(expr, stack) - - return ssa_var - - def _transform_expression_lhs(self, dst): - """ - Transforms an expression on the left hand side into SSA - :param dst: expression - :return: expression in SSA form - """ - if dst.is_mem(): - # transform with last RHS instance - ssa_var = self._transform_expression_rhs(dst) - else: - # transform LHS - ssa_var = self._transform_var_lhs(dst) - - # increase SSA variable counter - self._stack_lhs[dst] += 1 - - return ssa_var - - def _transform_expression_rhs(self, src): - """ - Transforms an expression on the right hand side into SSA - :param src: expression - :return: expression in SSA form - """ - # dissect expression in variables - variables = self.get_regs(src) - src_ssa = src - # transform variables - for expr in variables: - ssa_var = self._transform_var_rhs(expr) - src_ssa = src_ssa.replace_expr({expr: ssa_var}) - - return src_ssa - - @staticmethod - def _parallel_instructions(assignblk): - """ - Extracts the instruction from a AssignBlock. - - Since instructions in a AssignBlock are evaluated - in parallel, memory instructions on the left hand - side will be inserted into the start of the list. - Then, memory instruction on the LHS will be - transformed firstly. - - :param assignblk: assignblock - :return: sorted list of expressions - """ - instructions = [] - for dst in assignblk: - # dst = src - aff = assignblk.dst2ExprAssign(dst) - # insert memory expression into start of list - if dst.is_mem(): - instructions.insert(0, aff) - else: - instructions.append(aff) - - return instructions - - @staticmethod - def _convert_block(irblock, ssa_list): - """ - Transforms an IRBlock inplace into SSA - :param irblock: IRBlock to be transformed - :param ssa_list: list of SSA expressions - """ - # iterator over SSA expressions - ssa_iter = iter(ssa_list) - new_irs = [] - # walk over IR blocks' assignblocks - for assignblk in irblock.assignblks: - # list of instructions - instructions = [] - # insert SSA instructions - for _ in assignblk: - instructions.append(next(ssa_iter)) - # replace instructions of assignblock in IRBlock - new_irs.append(AssignBlock(instructions, assignblk.instr)) - return IRBlock(irblock.loc_key, new_irs) - - def _rename_expressions(self, loc_key): - """ - Transforms variables and expressions - of an IRBlock into SSA. - - IR representations of an assembly instruction are evaluated - in parallel. Thus, RHS and LHS instructions will be performed - separately. - :param loc_key: IRBlock loc_key - """ - # list of IRBlock's SSA expressions - ssa_expressions_block = [] - - # retrieve IRBlock - irblock = self.get_block(loc_key) - if irblock is None: - # Incomplete graph - return - - # iterate block's IR expressions - for index, assignblk in enumerate(irblock.assignblks): - # list of parallel instructions - instructions = self._parallel_instructions(assignblk) - # list for transformed RHS expressions - rhs = deque() - - # transform RHS - for expr in instructions: - src = expr.src - src_ssa = self._transform_expression_rhs(src) - # save transformed RHS - rhs.append(src_ssa) - - # transform LHS - for expr in instructions: - if expr.dst in self.immutable_ids or expr.dst in self.ssa_variable_to_expr: - dst_ssa = expr.dst - else: - dst_ssa = self._transform_expression_lhs(expr.dst) - - # retrieve corresponding RHS expression - src_ssa = rhs.popleft() - - # rebuild SSA expression - expr = ExprAssign(dst_ssa, src_ssa) - self.expressions[dst_ssa] = src_ssa - self.ssa_to_location[dst_ssa] = (loc_key, index) - - - # append ssa expression to list - ssa_expressions_block.append(expr) - - # replace blocks IR expressions with corresponding SSA transformations - new_irblock = self._convert_block(irblock, ssa_expressions_block) - self.ircfg.blocks[loc_key] = new_irblock - - -class SSABlock(SSA): - """ - SSA transformation on block level - - It handles - - transformation of a single IRBlock into SSA - - reassembling an SSA expression into a non-SSA - expression through iterative resolving of the RHS - """ - - def transform(self, loc_key): - """ - Transforms a block into SSA form - :param loc_key: IRBlock loc_key - """ - self._rename_expressions(loc_key) - - def reassemble_expr(self, expr): - """ - Reassembles an expression in SSA form into a solely non-SSA expression - :param expr: expression - :return: non-SSA expression - """ - # worklist - todo = {expr.copy()} - - while todo: - # current expression - cur = todo.pop() - # RHS of current expression - cur_rhs = self.expressions[cur] - - # replace cur with RHS in expr - expr = expr.replace_expr({cur: cur_rhs}) - - # parse ExprIDs on RHS - ids_rhs = self.get_regs(cur_rhs) - - # add RHS ids to worklist - for id_rhs in ids_rhs: - if id_rhs in self.expressions: - todo.add(id_rhs) - return expr - - -class SSAPath(SSABlock): - """ - SSA transformation on path level - - It handles - - transformation of a path of IRBlocks into SSA - """ - - def transform(self, path): - """ - Transforms a path into SSA - :param path: list of IRBlock loc_key - """ - for block in path: - self._rename_expressions(block) - - -class SSADiGraph(SSA): - """ - SSA transformation on DiGraph level - - It handles - - transformation of a DiGraph into SSA - - generation, insertion and filling of phi nodes - - The implemented SSA form is known as minimal SSA. - """ - - PHI_STR = 'Phi' - - - def __init__(self, ircfg): - """ - Initialises SSA class for directed graphs - :param ircfg: instance of IRCFG - """ - super(SSADiGraph, self).__init__(ircfg) - - # variable definitions - self.defs = {} - - # dict of blocks' phi nodes - self._phinodes = {} - - # IRCFG control flow graph - self.graph = ircfg - - - def transform(self, head): - """Transforms into SSA""" - sanitize_graph_head(self.graph, head) - self._init_variable_defs(head) - self._place_phi(head) - self._rename(head) - self._insert_phi() - self._convert_phi() - self._fix_no_def_var(head) - - def reset(self): - """Resets SSA transformation""" - super(SSADiGraph, self).reset() - self.defs = {} - self._phinodes = {} - - def _init_variable_defs(self, head): - """ - Initialises all variable definitions and - assigns the corresponding IRBlocks. - - All variable definitions in self.defs contain - a set of IRBlocks in which the variable gets assigned - """ - - for loc_key in self.graph.walk_depth_first_forward(head): - irblock = self.get_block(loc_key) - if irblock is None: - # Incomplete graph - continue - - # search for block's IR definitions/destinations - for assignblk in irblock.assignblks: - for dst in assignblk: - # enforce ExprId - if dst.is_id(): - # exclude immutable ids - if dst in self.immutable_ids or dst in self.ssa_variable_to_expr: - continue - # map variable definition to blocks - self.defs.setdefault(dst, set()).add(irblock.loc_key) - - def _place_phi(self, head): - """ - For all blocks, empty phi functions will be placed for every - variable in the block's dominance frontier. - - self.phinodes contains a dict for every block in the - dominance frontier. In this dict, each variable - definition maps to its corresponding phi function. - - Source: Cytron, Ron, et al. - "An efficient method of computing static single assignment form" - Proceedings of the 16th ACM SIGPLAN-SIGACT symposium on - Principles of programming languages (1989), p. 30 - """ - # dominance frontier - frontier = self.graph.compute_dominance_frontier(head) - - for variable in self.defs: - done = set() - todo = set() - intodo = set() - - for loc_key in self.defs[variable]: - todo.add(loc_key) - intodo.add(loc_key) - - while todo: - loc_key = todo.pop() - - # walk through block's dominance frontier - for node in frontier.get(loc_key, []): - if node in done: - continue - # place empty phi functions for a variable - empty_phi = self._gen_empty_phi(variable) - - # add empty phi node for variable in node - self._phinodes.setdefault(node, {})[variable] = empty_phi.src - done.add(node) - - if node not in intodo: - intodo.add(node) - todo.add(node) - - def _gen_empty_phi(self, expr): - """ - Generates an empty phi function for a variable - :param expr: variable - :return: ExprAssign, empty phi function for expr - """ - phi = ExprId(self.PHI_STR, expr.size) - return ExprAssign(expr, phi) - - def _fill_phi(self, *args): - """ - Fills a phi function with variables. - - phi(x.1, x.5, x.6) - - :param args: list of ExprId - :return: ExprOp - """ - return ExprOp(self.PHI_STR, *set(args)) - - def _rename(self, head): - """ - Transforms each variable expression in the CFG into SSA - by traversing the dominator tree in depth-first search. - - 1. Transform variables of phi functions on LHS into SSA - 2. Transform all non-phi expressions into SSA - 3. Update the successor's phi functions' RHS with current SSA variables - 4. Save current SSA variable stack for successors in the dominator tree - - Source: Cytron, Ron, et al. - "An efficient method of computing static single assignment form" - Proceedings of the 16th ACM SIGPLAN-SIGACT symposium on - Principles of programming languages (1989), p. 31 - """ - # compute dominator tree - dominator_tree = self.graph.compute_dominator_tree(head) - - # init SSA variable stack - stack = [self._stack_rhs] - - # walk in DFS over the dominator tree - for loc_key in dominator_tree.walk_depth_first_forward(head): - # restore SSA variable stack of the predecessor in the dominator tree - self._stack_rhs = stack.pop().copy() - - # Transform variables of phi functions on LHS into SSA - self._rename_phi_lhs(loc_key) - - # Transform all non-phi expressions into SSA - self._rename_expressions(loc_key) - - # Update the successor's phi functions' RHS with current SSA variables - # walk over block's successors in the CFG - for successor in self.graph.successors_iter(loc_key): - self._rename_phi_rhs(successor) - - # Save current SSA variable stack for successors in the dominator tree - for _ in dominator_tree.successors_iter(loc_key): - stack.append(self._stack_rhs) - - def _rename_phi_lhs(self, loc_key): - """ - Transforms phi function's expressions of an IRBlock - on the left hand side into SSA - :param loc_key: IRBlock loc_key - """ - if loc_key in self._phinodes: - # create temporary list of phi function assignments for inplace renaming - tmp = list(self._phinodes[loc_key]) - - # iterate over all block's phi nodes - for dst in tmp: - # transform variables on LHS inplace - self._phinodes[loc_key][self._transform_expression_lhs(dst)] = self._phinodes[loc_key].pop(dst) - - def _rename_phi_rhs(self, successor): - """ - Transforms the right hand side of each successor's phi function - into SSA. Each transformed expression of a phi function's - right hand side is of the form - - phi(., ., ..., .) - - :param successor: loc_key of block's direct successor in the CFG - """ - # if successor is in block's dominance frontier - if successor in self._phinodes: - # walk over all variables on LHS - for dst, src in list(viewitems(self._phinodes[successor])): - # transform variable on RHS in non-SSA form - expr = self.reverse_variable(dst) - # transform expr into it's SSA form using current stack - src_ssa = self._transform_expression_rhs(expr) - - # Add src_ssa to phi args - if src.is_id(self.PHI_STR): - # phi function is empty - expr = self._fill_phi(src_ssa) - else: - # phi function contains at least one value - expr = self._fill_phi(src_ssa, *src.args) - - # update phi function - self._phinodes[successor][dst] = expr - - def _insert_phi(self): - """Inserts phi functions into the list of SSA expressions""" - for loc_key in self._phinodes: - for dst in self._phinodes[loc_key]: - self.expressions[dst] = self._phinodes[loc_key][dst] - - def _convert_phi(self): - """Inserts corresponding phi functions inplace - into IRBlock at the beginning""" - for loc_key in self._phinodes: - irblock = self.get_block(loc_key) - if irblock is None: - continue - assignblk = AssignBlock(self._phinodes[loc_key]) - # insert at the beginning - new_irs = IRBlock(loc_key, [assignblk] + list(irblock.assignblks)) - self.ircfg.blocks[loc_key] = new_irs - - def _fix_no_def_var(self, head): - """ - Replace phi source variables which are not ssa vars by ssa vars. - @head: loc_key of the graph head - """ - var_to_insert = set() - for loc_key in self._phinodes: - for dst, sources in viewitems(self._phinodes[loc_key]): - for src in sources.args: - if src in self.ssa_variable_to_expr: - continue - var_to_insert.add(src) - var_to_newname = {} - newname_to_var = {} - for var in var_to_insert: - new_var = self._transform_var_lhs(var) - var_to_newname[var] = new_var - newname_to_var[new_var] = var - - # Replace non modified node used in phi with new variable - self.ircfg.simplify(lambda expr:expr.replace_expr(var_to_newname)) - - if newname_to_var: - irblock = self.ircfg.blocks[head] - assignblks = list(irblock) - assignblks[0:0] = [AssignBlock(newname_to_var, assignblks[0].instr)] - self.ircfg.blocks[head] = IRBlock(head, assignblks) - - # Updt structure - for loc_key in self._phinodes: - for dst, sources in viewitems(self._phinodes[loc_key]): - self._phinodes[loc_key][dst] = sources.replace_expr(var_to_newname) - - for var, (loc_key, index) in list(viewitems(self.ssa_to_location)): - if loc_key == head: - self.ssa_to_location[var] = loc_key, index + 1 - - for newname, var in viewitems(newname_to_var): - self.ssa_to_location[newname] = head, 0 - self.ssa_variable_to_expr[newname] = var - self.expressions[newname] = var - - -def irblock_has_phi(irblock): - """ - Return True if @irblock has Phi assignments - @irblock: IRBlock instance - """ - if not irblock.assignblks: - return False - for src in viewvalues(irblock[0]): - return src.is_op('Phi') - return False - - -class Varinfo(object): - """Store liveness information for a variable""" - __slots__ = ["live_index", "loc_key", "index"] - - def __init__(self, live_index, loc_key, index): - self.live_index = live_index - self.loc_key = loc_key - self.index = index - - -def get_var_assignment_src(ircfg, node, variables): - """ - Return the variable of @variables which is written by the irblock at @node - @node: Location - @variables: a set of variable to test - """ - irblock = ircfg.blocks[node] - for assignblk in irblock: - result = set(assignblk).intersection(variables) - if not result: - continue - assert len(result) == 1 - return list(result)[0] - return None - - -def get_phi_sources_parent_block(ircfg, loc_key, sources): - """ - Return a dictionary linking a variable to it's direct parent label - which belong to a path which affects the node. - @loc_key: the starting node - @sources: set of variables to resolve - """ - source_to_parent = {} - for parent in ircfg.predecessors(loc_key): - done = set() - todo = set([parent]) - found = False - while todo: - node = todo.pop() - if node in done: - continue - done.add(node) - ret = get_var_assignment_src(ircfg, node, sources) - if ret: - source_to_parent.setdefault(ret, set()).add(parent) - found = True - break - for pred in ircfg.predecessors(node): - todo.add(pred) - assert found - return source_to_parent - - -class UnSSADiGraph(object): - """ - Implements unssa algorithm - Revisiting Out-of-SSA Translation for Correctness, Code Quality, and - Efficiency - """ - - def __init__(self, ssa, head, cfg_liveness): - self.cfg_liveness = cfg_liveness - self.ssa = ssa - self.head = head - - # Set of created variables - self.copy_vars = set() - # Virtual parallel copies - - # On loc_key's Phi node dst -> set((parent, src)) - self.phi_parent_sources = {} - # On loc_key's Phi node, loc_key -> set(Phi dsts) - self.phi_destinations = {} - # Phi's dst -> new var - self.phi_new_var = {} - # For a new_var representing dst: - # new_var -> set(parents of Phi's src in dst = Phi(src,...)) - self.new_var_to_srcs_parents = {} - # new_var -> set(variables to be coalesced with, named "merge_set") - self.merge_state = {} - - # Launch the algorithm in several steps - self.isolate_phi_nodes_block() - self.init_phis_merge_state() - self.order_ssa_var_dom() - self.aggressive_coalesce_block() - self.insert_parallel_copy() - self.replace_merge_sets() - self.remove_assign_eq() - - def insert_parallel_copy(self): - """ - Naive Out-of-SSA from CSSA (without coalescing for now) - - Replace Phi - - Create room for parallel copies in Phi's parents - """ - ircfg = self.ssa.graph - - for irblock in list(viewvalues(ircfg.blocks)): - if not irblock_has_phi(irblock): - continue - - # Replace Phi with Phi's dst = new_var - parallel_copies = {} - for dst in self.phi_destinations[irblock.loc_key]: - new_var = self.phi_new_var[dst] - parallel_copies[dst] = new_var - - assignblks = list(irblock) - assignblks[0] = AssignBlock(parallel_copies, irblock[0].instr) - new_irblock = IRBlock(irblock.loc_key, assignblks) - ircfg.blocks[irblock.loc_key] = new_irblock - - # Insert new_var = src in each Phi's parent, at the end of the block - parent_to_parallel_copies = {} - parallel_copies = {} - for dst in irblock[0]: - new_var = self.phi_new_var[dst] - for parent, src in self.phi_parent_sources[dst]: - parent_to_parallel_copies.setdefault(parent, {})[new_var] = src - - for parent, parallel_copies in viewitems(parent_to_parallel_copies): - parent = ircfg.blocks[parent] - assignblks = list(parent) - assignblks.append(AssignBlock(parallel_copies, parent[-1].instr)) - new_irblock = IRBlock(parent.loc_key, assignblks) - ircfg.blocks[parent.loc_key] = new_irblock - - def create_copy_var(self, var): - """ - Generate a new var standing for @var - @var: variable to replace - """ - new_var = ExprId('var%d' % len(self.copy_vars), var.size) - self.copy_vars.add(new_var) - return new_var - - def isolate_phi_nodes_block(self): - """ - Init structures and virtually insert parallel copy before/after each phi - node - """ - ircfg = self.ssa.graph - for irblock in viewvalues(ircfg.blocks): - if not irblock_has_phi(irblock): - continue - for dst, sources in viewitems(irblock[0]): - assert sources.is_op('Phi') - new_var = self.create_copy_var(dst) - self.phi_new_var[dst] = new_var - - var_to_parents = get_phi_sources_parent_block( - self.ssa.graph, - irblock.loc_key, - sources.args - ) - - for src in sources.args: - parents = var_to_parents[src] - self.new_var_to_srcs_parents.setdefault(new_var, set()).update(parents) - for parent in parents: - self.phi_parent_sources.setdefault(dst, set()).add((parent, src)) - - self.phi_destinations[irblock.loc_key] = set(irblock[0]) - - def init_phis_merge_state(self): - """ - Generate trivial coalescing of phi variable and itself - """ - for phi_new_var in viewvalues(self.phi_new_var): - self.merge_state.setdefault(phi_new_var, set([phi_new_var])) - - def order_ssa_var_dom(self): - """Compute dominance order of each ssa variable""" - ircfg = self.ssa.graph - - # compute dominator tree - dominator_tree = ircfg.compute_dominator_tree(self.head) - - # variable -> Varinfo - self.var_to_varinfo = {} - # live_index can later be used to compare dominance of AssignBlocks - live_index = 0 - - # walk in DFS over the dominator tree - for loc_key in dominator_tree.walk_depth_first_forward(self.head): - irblock = ircfg.blocks[loc_key] - - # Create live index for phi new vars - # They do not exist in the graph yet, so index is set to None - if irblock_has_phi(irblock): - for dst in irblock[0]: - if not dst.is_id(): - continue - new_var = self.phi_new_var[dst] - self.var_to_varinfo[new_var] = Varinfo(live_index, loc_key, None) - - live_index += 1 - - # Create live index for remaining assignments - for index, assignblk in enumerate(irblock): - used = False - for dst in assignblk: - if not dst.is_id(): - continue - if dst in self.ssa.immutable_ids: - # Will not be considered by the current algo, ignore it - # (for instance, IRDst) - continue - - assert dst not in self.var_to_varinfo - self.var_to_varinfo[dst] = Varinfo(live_index, loc_key, index) - used = True - if used: - live_index += 1 - - - def ssa_def_dominates(self, node_a, node_b): - """ - Return living index order of @node_a and @node_b - @node_a: Varinfo instance - @node_b: Varinfo instance - """ - ret = self.var_to_varinfo[node_a].live_index <= self.var_to_varinfo[node_b].live_index - return ret - - def merge_set_sort(self, merge_set): - """ - Return a sorted list of (live_index, var) from @merge_set in dominance - order - @merge_set: set of coalescing variables - """ - return sorted( - (self.var_to_varinfo[var].live_index, var) - for var in merge_set - ) - - def ssa_def_is_live_at(self, node_a, node_b, parent): - """ - Return True if @node_a is live during @node_b definition - If @parent is None, this is a liveness test for a post phi variable; - Else, it is a liveness test for a variable source of the phi node - - @node_a: Varinfo instance - @node_b: Varinfo instance - @parent: Optional parent location of the phi source - """ - loc_key_b, index_b = self.var_to_varinfo[node_b].loc_key, self.var_to_varinfo[node_b].index - if parent and index_b is None: - index_b = 0 - if node_a not in self.new_var_to_srcs_parents: - # node_a is not a new var (it is a "classic" var) - # -> use a basic liveness test - liveness_b = self.cfg_liveness.blocks[loc_key_b].infos[index_b] - return node_a in liveness_b.var_out - - for def_loc_key in self.new_var_to_srcs_parents[node_a]: - # Consider node_a as defined at the end of its parents blocks - # and compute liveness check accordingly - - if def_loc_key == parent: - # Same path as node_a definition, so SSA ensure b cannot be live - # on this path (otherwise, a Phi would already happen earlier) - continue - liveness_end_block = self.cfg_liveness.blocks[def_loc_key].infos[-1] - if node_b in liveness_end_block.var_out: - return True - return False - - def merge_nodes_interfere(self, node_a, node_b, parent): - """ - Return True if @node_a and @node_b interfere - @node_a: variable - @node_b: variable - @parent: Optional parent location of the phi source for liveness tests - - Interference check is: is x live at y definition (or reverse) - TODO: add Value-based interference improvement - """ - if self.var_to_varinfo[node_a].live_index == self.var_to_varinfo[node_b].live_index: - # Defined in the same AssignBlock -> interfere - return True - - if self.var_to_varinfo[node_a].live_index < self.var_to_varinfo[node_b].live_index: - return self.ssa_def_is_live_at(node_a, node_b, parent) - return self.ssa_def_is_live_at(node_b, node_a, parent) - - def merge_sets_interfere(self, merge_a, merge_b, parent): - """ - Return True if no variable in @merge_a and @merge_b interferes. - - Implementation of "Algorithm 2: Check intersection in a set of variables" - - @merge_a: a dom ordered list of equivalent variables - @merge_b: a dom ordered list of equivalent variables - @parent: Optional parent location of the phi source for liveness tests - """ - if merge_a == merge_b: - # No need to consider interference if equal - return False - - merge_a_list = self.merge_set_sort(merge_a) - merge_b_list = self.merge_set_sort(merge_b) - dom = [] - while merge_a_list or merge_b_list: - if not merge_a_list: - _, current = merge_b_list.pop(0) - elif not merge_b_list: - _, current = merge_a_list.pop(0) - else: - # compare live_indexes (standing for dominance) - if merge_a_list[-1] < merge_b_list[-1]: - _, current = merge_a_list.pop(0) - else: - _, current = merge_b_list.pop(0) - while dom and not self.ssa_def_dominates(dom[-1], current): - dom.pop() - - # Don't test node in same merge_set - if ( - # Is stack not empty? - dom and - # Trivial non-interference if dom.top() and current come - # from the same merge set - not (dom[-1] in merge_a and current in merge_a) and - not (dom[-1] in merge_b and current in merge_b) and - # Actually test for interference - self.merge_nodes_interfere(current, dom[-1], parent) - ): - return True - dom.append(current) - return False - - def aggressive_coalesce_parallel_copy(self, parallel_copies, parent): - """ - Try to coalesce variables each dst/src couple together from - @parallel_copies - - @parallel_copies: a dictionary representing dst/src parallel - assignments. - @parent: Optional parent location of the phi source for liveness tests - """ - for dst, src in viewitems(parallel_copies): - dst_merge = self.merge_state.setdefault(dst, set([dst])) - src_merge = self.merge_state.setdefault(src, set([src])) - if not self.merge_sets_interfere(dst_merge, src_merge, parent): - dst_merge.update(src_merge) - for node in dst_merge: - self.merge_state[node] = dst_merge - - def aggressive_coalesce_block(self): - """Try to coalesce phi var with their pre/post variables""" - - ircfg = self.ssa.graph - - # Run coalesce on the post phi parallel copy - for irblock in viewvalues(ircfg.blocks): - if not irblock_has_phi(irblock): - continue - parallel_copies = {} - for dst in self.phi_destinations[irblock.loc_key]: - parallel_copies[dst] = self.phi_new_var[dst] - self.aggressive_coalesce_parallel_copy(parallel_copies, None) - - # Run coalesce on the pre phi parallel copy - - # Stand for the virtual parallel copies at the end of Phi's block - # parents - parent_to_parallel_copies = {} - for dst in irblock[0]: - new_var = self.phi_new_var[dst] - for parent, src in self.phi_parent_sources[dst]: - parent_to_parallel_copies.setdefault(parent, {})[new_var] = src - - for parent, parallel_copies in viewitems(parent_to_parallel_copies): - self.aggressive_coalesce_parallel_copy(parallel_copies, parent) - - def get_best_merge_set_name(self, merge_set): - """ - For a given @merge_set, prefer an original SSA variable instead of a - created copy. In other case, take a random name. - @merge_set: set of equivalent expressions - """ - if not merge_set: - raise RuntimeError("Merge set should not be empty") - for var in merge_set: - if var not in self.copy_vars: - return var - # Get random name - return var - - - def replace_merge_sets(self): - """ - In the graph, replace all variables from merge state by their - representative variable - """ - replace = {} - merge_sets = set() - - # Elect representative for merge sets - merge_set_to_name = {} - for merge_set in viewvalues(self.merge_state): - frozen_merge_set = frozenset(merge_set) - merge_sets.add(frozen_merge_set) - var_name = self.get_best_merge_set_name(merge_set) - merge_set_to_name[frozen_merge_set] = var_name - - # Generate replacement of variable by their representative - for merge_set in merge_sets: - var_name = merge_set_to_name[merge_set] - merge_set = list(merge_set) - for var in merge_set: - replace[var] = var_name - - self.ssa.graph.simplify(lambda x: x.replace_expr(replace)) - - def remove_phi(self): - """ - Remove phi operators in @ifcfg - @ircfg: IRDiGraph instance - """ - - for irblock in list(viewvalues(self.ssa.graph.blocks)): - assignblks = list(irblock) - out = {} - for dst, src in viewitems(assignblks[0]): - if src.is_op('Phi'): - assert set([dst]) == set(src.args) - continue - out[dst] = src - assignblks[0] = AssignBlock(out, assignblks[0].instr) - self.ssa.graph.blocks[irblock.loc_key] = IRBlock(irblock.loc_key, assignblks) - - def remove_assign_eq(self): - """ - Remove trivial expressions (a=a) in the current graph - """ - for irblock in list(viewvalues(self.ssa.graph.blocks)): - assignblks = list(irblock) - for i, assignblk in enumerate(assignblks): - out = {} - for dst, src in viewitems(assignblk): - if dst == src: - continue - out[dst] = src - assignblks[i] = AssignBlock(out, assignblk.instr) - self.ssa.graph.blocks[irblock.loc_key] = IRBlock(irblock.loc_key, assignblks) diff --git a/miasm2/arch/__init__.py b/miasm2/arch/__init__.py deleted file mode 100644 index 78e2dd3c..00000000 --- a/miasm2/arch/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"Architecture implementations" diff --git a/miasm2/arch/aarch64/__init__.py b/miasm2/arch/aarch64/__init__.py deleted file mode 100644 index bbad893b..00000000 --- a/miasm2/arch/aarch64/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__all__ = ["arch", "disasm", "regs", "sem"] diff --git a/miasm2/arch/aarch64/arch.py b/miasm2/arch/aarch64/arch.py deleted file mode 100644 index 3a4f6446..00000000 --- a/miasm2/arch/aarch64/arch.py +++ /dev/null @@ -1,2175 +0,0 @@ -#-*- coding:utf-8 -*- - -from builtins import range -from future.utils import viewitems, viewvalues - -import logging -from pyparsing import * -from miasm2.expression import expression as m2_expr -from miasm2.core.cpu import * -from collections import defaultdict -from miasm2.core.bin_stream import bin_stream -from miasm2.arch.aarch64 import regs as regs_module -from miasm2.arch.aarch64.regs import * -from miasm2.core.cpu import log as log_cpu -from miasm2.expression.modint import uint32, uint64, mod_size2int -from miasm2.core.asm_ast import AstInt, AstId, AstMem, AstOp - -log = logging.getLogger("aarch64dis") -console_handler = logging.StreamHandler() -console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) -log.addHandler(console_handler) -log.setLevel(logging.DEBUG) - -# refs from A_e_armv8_arm.pdf - -# log_cpu.setLevel(logging.DEBUG) - - -replace_regs = { - W0: X0[:32], - W1: X1[:32], - W2: X2[:32], - W3: X3[:32], - W4: X4[:32], - W5: X5[:32], - W6: X6[:32], - W7: X7[:32], - W8: X8[:32], - W9: X9[:32], - - W10: X10[:32], - W11: X11[:32], - W12: X12[:32], - W13: X13[:32], - W14: X14[:32], - W15: X15[:32], - W16: X16[:32], - W17: X17[:32], - W18: X18[:32], - W19: X19[:32], - - W20: X20[:32], - W21: X21[:32], - W22: X22[:32], - W23: X23[:32], - W24: X24[:32], - W25: X25[:32], - W26: X26[:32], - W27: X27[:32], - W28: X28[:32], - W29: X29[:32], - - W30: LR[:32], - - WSP: SP[:32], - - WZR: m2_expr.ExprInt(0, 32), - XZR: m2_expr.ExprInt(0, 64), - -} - - - - -shift2expr_dct = {'LSL': '<<', 'LSR': '>>', 'ASR': 'a>>', 'ROR': '>>>'} -shift_str = ["LSL", "LSR", "ASR", "ROR"] -shift_expr = ["<<", ">>", "a>>", '>>>'] - - -def cb_shift(tokens): - return shift2expr_dct[tokens[0]] - - -def cb_extreg(tokens): - return tokens[0] - - -def cb_shiftreg(tokens): - if len(tokens) == 1: - return tokens[0] - elif len(tokens) == 3: - result = AstOp(tokens[1], tokens[0], tokens[2]) - return result - else: - raise ValueError('bad string') - - -def cb_shift_sc(tokens): - if len(tokens) == 1: - return tokens[0] - elif len(tokens) == 3: - if tokens[1] != '<<': - raise ValueError('bad op') - result = AstOp("slice_at", tokens[0], tokens[2]) - return result - else: - raise ValueError('bad string') - - -def cb_extend(tokens): - if len(tokens) == 1: - return tokens[0] - result = AstOp(tokens[1], tokens[0], tokens[2]) - return result - - -def cb_deref_pc_off(tokens): - if len(tokens) == 2 and tokens[0] == "PC": - result = AstOp('preinc', AstId(ExprId('PC', 64)), tokens[1]) - return result - raise ValueError('bad string') - -def cb_deref_pc_nooff(tokens): - if len(tokens) == 1 and tokens[0] == "PC": - result = AstOp('preinc', AstId(PC)) - return result - raise ValueError('bad string') - -all_binaryop_lsl_t = literal_list(shift_str).setParseAction(cb_shift) - -all_binaryop_shiftleft_t = literal_list(["LSL"]).setParseAction(cb_shift) - -extend_lst = ['UXTB', 'UXTH', 'UXTW', 'UXTX', 'SXTB', 'SXTH', 'SXTW', 'SXTX'] -extend2_lst = ['UXTW', 'LSL', 'SXTW', 'SXTX'] - -all_extend_t = literal_list(extend_lst).setParseAction(cb_extreg) -all_extend2_t = literal_list(extend2_lst).setParseAction(cb_extreg) - - -gpregz32_extend = (gpregsz32_info.parser + Optional(all_extend_t + base_expr)).setParseAction(cb_extend) -gpregz64_extend = (gpregsz64_info.parser + Optional(all_extend_t + base_expr)).setParseAction(cb_extend) - - -shift32_off = (gpregsz32_info.parser + Optional(all_binaryop_lsl_t + base_expr)).setParseAction(cb_shiftreg) -shift64_off = (gpregsz64_info.parser + Optional(all_binaryop_lsl_t + base_expr)).setParseAction(cb_shiftreg) - - -shiftimm_imm_sc = (base_expr + all_binaryop_shiftleft_t + base_expr).setParseAction(cb_shift_sc) - -shiftimm_off_sc = shiftimm_imm_sc | base_expr - - -shift_off = (shift32_off | shift64_off) -reg_ext_off = (gpregz32_extend | gpregz64_extend) - -gpregs_32_64 = (gpregs32_info.parser | gpregs64_info.parser) -gpregsz_32_64 = (gpregsz32_info.parser | gpregsz64_info.parser | base_expr) - -simdregs = (simd08_info.parser | simd16_info.parser | simd32_info.parser | simd64_info.parser) -simdregs_h = (simd32_info.parser | simd64_info.parser | simd128_info.parser) - -simdregs_h_zero = (simd32_info.parser | simd64_info.parser | simd128_info.parser | base_expr) - - -gpregs_info = {32: gpregs32_info, - 64: gpregs64_info} -gpregsz_info = {32: gpregsz32_info, - 64: gpregsz64_info} - - -simds_info = {8: simd08_info, - 16: simd16_info, - 32: simd32_info, - 64: simd64_info, - 128: simd128_info} - - - -def cb_deref_nooff(t): - # XXX default - result = AstOp("preinc", t[0], AstInt(0)) - return result - - -def cb_deref_post(t): - assert len(t) == 2 - if isinstance(t[1], AstId) and isinstance(t[1].name, ExprId): - return - result = AstOp("postinc", *t) - return result - - -def cb_deref_pre(t): - assert len(t) == 2 - if isinstance(t[1], AstId) and isinstance(t[1].name, ExprId): - return - result = AstOp("preinc", *t) - return result - - -def cb_deref_pre_wb(t): - assert len(t) == 2 - if isinstance(t[1], AstId) and isinstance(t[1].name, ExprId): - return - result = AstOp("preinc_wb", *t) - return result - - -LBRACK = Suppress("[") -RBRACK = Suppress("]") -COMMA = Suppress(",") -POSTINC = Suppress("!") - -deref_nooff = (LBRACK + gpregs64_info.parser + RBRACK).setParseAction(cb_deref_nooff) -deref_off_post = (LBRACK + gpregs64_info.parser + RBRACK + COMMA + base_expr).setParseAction(cb_deref_post) -deref_off_pre = (LBRACK + gpregs64_info.parser + COMMA + base_expr + RBRACK).setParseAction(cb_deref_pre) -deref_off_pre_wb = (LBRACK + gpregs64_info.parser + COMMA + base_expr + RBRACK + POSTINC).setParseAction(cb_deref_pre_wb) - -deref = (deref_off_post | deref_off_pre_wb | deref_off_pre | deref_nooff) - - -deref_pc_off = (LBRACK + Literal("PC") + COMMA + base_expr + RBRACK).setParseAction(cb_deref_pc_off) -deref_pc_nooff = (LBRACK + Literal("PC") + RBRACK).setParseAction(cb_deref_pc_nooff) - -deref_pc = (deref_pc_off | deref_pc_nooff) - -def cb_deref_ext2op(t): - if len(t) == 4: - result = AstOp('segm', t[0], AstOp(t[2], t[1], t[3])) - return result - elif len(t) == 2: - result = AstOp('segm', *t) - return result - - raise ValueError("cad deref") - -deref_ext2 = (LBRACK + gpregs_32_64 + COMMA + gpregs_32_64 + Optional(all_extend2_t + base_expr) + RBRACK).setParseAction(cb_deref_ext2op) - - -class additional_info(object): - - def __init__(self): - self.except_on_instr = False - self.lnk = None - self.cond = None - -CONDS = [ - 'EQ', 'NE', 'CS', 'CC', - 'MI', 'PL', 'VS', 'VC', - 'HI', 'LS', 'GE', 'LT', - 'GT', 'LE', 'AL', 'NV'] - -CONDS_INV = [ - 'NE', 'EQ', 'CC', 'CS', - 'PL', 'MI', 'VC', 'VS', - 'LS', 'HI', 'LT', 'GE', - 'LE', 'GT', 'NV', 'AL'] - -BRCOND = ['B.' + cond for cond in CONDS] + ['CBZ', 'CBNZ', 'TBZ', 'TBNZ'] - -# for conditional selec -conds_expr, _, conds_info = gen_regs(CONDS, {}) -conds_inv_expr, _, conds_inv_info = gen_regs(CONDS_INV, {}) - - - -class aarch64_arg(m_arg): - def asm_ast_to_expr(self, value, loc_db, size_hint=None, fixed_size=None): - if size_hint is None: - size_hint = 64 - if fixed_size is None: - fixed_size = set() - if isinstance(value, AstId): - if value.name in all_regs_ids_byname: - reg = all_regs_ids_byname[value.name] - fixed_size.add(reg.size) - return reg - if isinstance(value.name, ExprId): - fixed_size.add(value.name.size) - return value.name - loc_key = loc_db.get_or_create_name_location(value.name.encode()) - return m2_expr.ExprLoc(loc_key, size_hint) - if isinstance(value, AstInt): - assert size_hint is not None - return m2_expr.ExprInt(value.value, size_hint) - if isinstance(value, AstOp): - if value.op == "segm": - segm = self.asm_ast_to_expr(value.args[0], loc_db) - ptr = self.asm_ast_to_expr(value.args[1], loc_db, None, fixed_size) - return m2_expr.ExprOp('segm', segm, ptr) - - args = [self.asm_ast_to_expr(arg, loc_db, None, fixed_size) for arg in value.args] - if len(fixed_size) == 0: - # No fixed size - pass - elif len(fixed_size) == 1: - # One fixed size, regen all - size = list(fixed_size)[0] - args = [self.asm_ast_to_expr(arg, loc_db, size, fixed_size) for arg in value.args] - else: - raise ValueError("Size conflict") - - return m2_expr.ExprOp(value.op, *args) - return None - - -class instruction_aarch64(instruction): - __slots__ = [] - delayslot = 0 - - def __init__(self, *args, **kargs): - super(instruction_aarch64, self).__init__(*args, **kargs) - - @staticmethod - def arg2str(expr, index=None, loc_db=None): - wb = False - if expr.is_id() or expr.is_int(): - return str(expr) - elif expr.is_loc(): - if loc_db is not None: - return loc_db.pretty_str(expr.loc_key) - else: - return str(expr) - elif isinstance(expr, m2_expr.ExprOp) and expr.op in shift_expr: - op_str = shift_str[shift_expr.index(expr.op)] - return "%s %s %s" % (expr.args[0], op_str, expr.args[1]) - elif isinstance(expr, m2_expr.ExprOp) and expr.op == "slice_at": - return "%s LSL %s" % (expr.args[0], expr.args[1]) - elif isinstance(expr, m2_expr.ExprOp) and expr.op in extend_lst: - op_str = expr.op - return "%s %s %s" % (expr.args[0], op_str, expr.args[1]) - elif isinstance(expr, m2_expr.ExprOp) and expr.op == "postinc": - if expr.args[1].arg != 0: - return "[%s], %s" % (expr.args[0], expr.args[1]) - else: - return "[%s]" % (expr.args[0]) - elif isinstance(expr, m2_expr.ExprOp) and expr.op == "preinc_wb": - if expr.args[1].arg != 0: - return "[%s, %s]!" % (expr.args[0], expr.args[1]) - else: - return "[%s]" % (expr.args[0]) - elif isinstance(expr, m2_expr.ExprOp) and expr.op == "preinc": - if len(expr.args) == 1: - return "[%s]" % (expr.args[0]) - elif not isinstance(expr.args[1], m2_expr.ExprInt) or expr.args[1].arg != 0: - return "[%s, %s]" % (expr.args[0], expr.args[1]) - else: - return "[%s]" % (expr.args[0]) - elif isinstance(expr, m2_expr.ExprOp) and expr.op == 'segm': - arg = expr.args[1] - if isinstance(arg, m2_expr.ExprId): - arg = str(arg) - elif arg.op == 'LSL' and arg.args[1].arg == 0: - arg = str(arg.args[0]) - else: - arg = "%s %s %s" % (arg.args[0], arg.op, arg.args[1]) - return '[%s, %s]' % (expr.args[0], arg) - - else: - raise NotImplementedError("bad op") - - def dstflow(self): - return self.name in BRCOND + ["B", "BL", "BR", "BLR"] - - def mnemo_flow_to_dst_index(self, name): - if self.name in ['CBZ', 'CBNZ']: - return 1 - elif self.name in ['TBZ', 'TBNZ']: - return 2 - else: - return 0 - - def dstflow2label(self, loc_db): - index = self.mnemo_flow_to_dst_index(self.name) - expr = self.args[index] - if not expr.is_int(): - return - addr = expr.arg + self.offset - loc_key = loc_db.get_or_create_offset_location(addr) - self.args[index] = m2_expr.ExprLoc(loc_key, expr.size) - - def breakflow(self): - return self.name in BRCOND + ["BR", "BLR", "RET", "ERET", "DRPS", "B", "BL"] - - def is_subcall(self): - return self.name in ["BLR", "BL"] - - def getdstflow(self, loc_db): - index = self.mnemo_flow_to_dst_index(self.name) - return [self.args[index]] - - def splitflow(self): - return self.name in BRCOND + ["BLR", "BL"] - - def get_symbol_size(self, symbol, loc_db): - return 64 - - def fixDstOffset(self): - index = self.mnemo_flow_to_dst_index(self.name) - e = self.args[index] - if self.offset is None: - raise ValueError('symbol not resolved %s' % l) - if not isinstance(e, m2_expr.ExprInt): - log.debug('dyn dst %r', e) - return - off = e.arg - self.offset - if int(off % 4): - raise ValueError('strange offset! %r' % off) - self.args[index] = m2_expr.ExprInt(int(off), 64) - - - -class mn_aarch64(cls_mn): - delayslot = 0 - name = "aarch64" - regs = regs_module - bintree = {} - num = 0 - all_mn = [] - all_mn_mode = defaultdict(list) - all_mn_name = defaultdict(list) - all_mn_inst = defaultdict(list) - pc = {'l': PC, 'b': PC} - sp = {'l': SP, 'b': SP} - instruction = instruction_aarch64 - max_instruction_len = 4 - alignment = 4 - - @classmethod - def getpc(cls, attrib=None): - return PC - - @classmethod - def getsp(cls, attrib=None): - return SP - - def additional_info(self): - info = additional_info() - info.lnk = False - if hasattr(self, "lnk"): - info.lnk = self.lnk.value != 0 - return info - - @classmethod - def getbits(cls, bs, attrib, start, n): - if not n: - return 0 - o = 0 - if n > bs.getlen() * 8: - raise ValueError('not enough bits %r %r' % (n, len(bs.bin) * 8)) - while n: - offset = start // 8 - n_offset = cls.endian_offset(attrib, offset) - c = cls.getbytes(bs, n_offset, 1) - if not c: - raise IOError - c = ord(c) - r = 8 - start % 8 - c &= (1 << r) - 1 - l = min(r, n) - c >>= (r - l) - o <<= l - o |= c - n -= l - start += l - return o - - @classmethod - def endian_offset(cls, attrib, offset): - if attrib == "l": - return (offset & ~3) + 3 - offset % 4 - elif attrib == "b": - return offset - else: - raise NotImplementedError('bad attrib') - - @classmethod - def check_mnemo(cls, fields): - l = sum([x.l for x in fields]) - assert l == 32, "len %r" % l - - @classmethod - def getmn(cls, name): - return name.upper() - - @classmethod - def mod_fields(cls, fields): - l = sum([x.l for x in fields]) - if l == 32: - return fields - return fields - - @classmethod - def gen_modes(cls, subcls, name, bases, dct, fields): - dct['mode'] = None - return [(subcls, name, bases, dct, fields)] - - def value(self, mode): - v = super(mn_aarch64, self).value(mode) - if mode == 'l': - return [x[::-1] for x in v] - elif mode == 'b': - return [x for x in v] - else: - raise NotImplementedError('bad attrib') - - def get_symbol_size(self, symbol, loc_db, mode): - return 32 - - def reset_class(self): - super(mn_aarch64, self).reset_class() - if hasattr(self, "sf"): - self.sf.value = None - - -def aarch64op(name, fields, args=None, alias=False): - dct = {"fields": fields, "alias":alias} - if args is not None: - dct['args'] = args - type(name, (mn_aarch64,), dct) - - -class aarch64_gpreg_noarg(reg_noarg): - parser = gpregs_32_64 - gpregs_info = gpregs_info - - def decode(self, v): - size = 64 if self.parent.sf.value else 32 - self.expr = self.gpregs_info[size].expr[v] - return True - - def encode(self): - if not test_set_sf(self.parent, self.expr.size): - return False - if not self.expr.size in self.gpregs_info: - return False - if not self.expr in self.gpregs_info[self.expr.size].expr: - return False - self.value = self.gpregs_info[self.expr.size].expr.index(self.expr) - return True - - -class aarch64_simdreg(reg_noarg, aarch64_arg): - parser = simdregs - simd_size = [8, 16, 32, 64] - - def decode(self, v): - if self.parent.size.value > len(self.simd_size): - return False - size = self.simd_size[self.parent.size.value] - self.expr = simds_info[size].expr[v] - return True - - def encode(self): - if not self.expr.size in self.simd_size: - return False - if not self.expr in simds_info[self.expr.size].expr: - return False - self.value = simds_info[self.expr.size].expr.index(self.expr) - self.parent.size.value = self.simd_size.index(self.expr.size) - return True - - -class aarch64_simdreg_h(aarch64_simdreg): - parser = simdregs_h - simd_size = [32, 64, 128] - - -class aarch64_simdreg_32_64(aarch64_simdreg): - parser = simdregs_h - simd_size = [32, 64] - - -class aarch64_simdreg_32_64_zero(aarch64_simdreg_32_64): - parser = simdregs_h_zero - - def decode(self, v): - if v == 0 and self.parent.opc.value == 1: - size = 64 if self.parent.size.value else 32 - self.expr = m2_expr.ExprInt(0, size) - return True - else: - return super(aarch64_simdreg_32_64_zero, self).decode(v) - - def encode(self): - if isinstance(self.expr, m2_expr.ExprInt): - self.parent.opc.value = 1 - self.value = 0 - return True - else: - self.parent.opc.value = 0 - return super(aarch64_simdreg_32_64_zero, self).encode() - - -class aarch64_gpreg_isf(reg_noarg, aarch64_arg): - parser = gpregs_32_64 - - def decode(self, v): - size = 32 if self.parent.sf.value else 64 - self.expr = gpregs_info[size].expr[v] - return True - - def encode(self): - if not self.expr in gpregs_info[self.expr.size].expr: - return False - self.value = gpregs_info[self.expr.size].expr.index(self.expr) - self.parent.sf.value = 1 if self.expr.size == 32 else 0 - return True - - -class aarch64_gpreg(aarch64_gpreg_noarg, aarch64_arg): - pass - - -class aarch64_gpreg_n1(aarch64_gpreg): - - def decode(self, v): - if v == 0b11111: - return False - return super(aarch64_gpreg_n1, self).decode(v) - - def encode(self): - super(aarch64_gpreg_n1, self).encode() - return self.value != 0b11111 - - -class aarch64_gpregz(aarch64_gpreg_noarg, aarch64_arg): - parser = gpregsz_32_64 - gpregs_info = gpregsz_info - - -class aarch64_gpreg0(bsi, aarch64_arg): - parser = gpregsz_32_64 - gpregs_info = gpregsz_info - - def decode(self, v): - size = 64 if self.parent.sf.value else 32 - if v == 0x1F: - self.expr = m2_expr.ExprInt(0, size) - else: - self.expr = self.gpregs_info[size].expr[v] - return True - - def encode(self): - if isinstance(self.expr, m2_expr.ExprInt): - if self.expr.arg == 0: - self.value = 0x1F - return True - return False - if not self.expr.size in self.gpregs_info: - return False - if not test_set_sf(self.parent, self.expr.size): - return False - if not self.expr in self.gpregs_info[self.expr.size].expr: - return False - self.value = self.gpregs_info[self.expr.size].expr.index(self.expr) - return True - - -class aarch64_crreg(reg_noarg, aarch64_arg): - reg_info = cr_info - parser = reg_info.parser - - -class aarch64_gpreg32_nodec(bsi): - reg_info = gpregs32_info - - -class aarch64_gpreg64_nodec(bsi): - reg_info = gpregs64_info - - -class aarch64_gpreg32_noarg(reg_noarg): - reg_info = gpregs32_info - parser = reg_info.parser - - -class aarch64_gpreg32(aarch64_gpreg32_noarg, aarch64_arg): - reg_info = gpregs32_info - parser = reg_info.parser - - -class aarch64_gpreg64_noarg(reg_noarg): - reg_info = gpregs64_info - parser = reg_info.parser - - -class aarch64_gpreg64(reg_noarg, aarch64_arg): - reg_info = gpregs64_info - parser = reg_info.parser - - -class aarch64_gpregz32_noarg(reg_noarg): - reg_info = gpregsz32_info - parser = reg_info.parser - - -class aarch64_gpregz32(aarch64_gpreg32_noarg, aarch64_arg): - reg_info = gpregsz32_info - parser = reg_info.parser - - -class aarch64_gpregz64_noarg(reg_noarg): - reg_info = gpregsz64_info - parser = reg_info.parser - - -class aarch64_gpregz64(reg_noarg, aarch64_arg): - reg_info = gpregsz64_info - parser = reg_info.parser - - -class aarch64_simd08_noarg(reg_noarg): - reg_info = simd08_info - parser = reg_info.parser - - -class aarch64_simd08(aarch64_simd08_noarg, aarch64_arg): - reg_info = simd08_info - parser = reg_info.parser - - -class aarch64_simd16_noarg(reg_noarg): - reg_info = simd16_info - parser = reg_info.parser - - -class aarch64_simd16(aarch64_simd16_noarg, aarch64_arg): - reg_info = simd16_info - parser = reg_info.parser - - -class aarch64_simd32_noarg(reg_noarg): - reg_info = simd32_info - parser = reg_info.parser - - -class aarch64_simd32(aarch64_simd32_noarg, aarch64_arg): - reg_info = simd32_info - parser = reg_info.parser - - -class aarch64_simd64_noarg(reg_noarg): - reg_info = simd64_info - parser = reg_info.parser - - -class aarch64_simd64(aarch64_simd64_noarg, aarch64_arg): - reg_info = simd64_info - parser = reg_info.parser - - -class aarch64_simd128_noarg(reg_noarg): - reg_info = simd128_info - parser = reg_info.parser - - -class aarch64_simd128(aarch64_simd128_noarg, aarch64_arg): - reg_info = simd128_info - parser = reg_info.parser - - -class aarch64_imm_32(imm_noarg, aarch64_arg): - parser = base_expr - - -class aarch64_imm_64(aarch64_imm_32): - parser = base_expr - - -class aarch64_int64_noarg(int32_noarg): - parser = base_expr - intsize = 64 - intmask = (1 << intsize) - 1 - int2expr = lambda self, x: m2_expr.ExprInt( - sign_ext(x, self.l, self.intsize), 64) - - -class aarch64_uint64_noarg(imm_noarg): - parser = base_expr - intsize = 64 - intmask = (1 << intsize) - 1 - int2expr = lambda self, x: m2_expr.ExprInt(x, 64) - - -class aarch64_uint64(aarch64_uint64_noarg, aarch64_arg): - parser = base_expr - - -def set_imm_to_size(size, expr): - if size == expr.size: - return expr - if size > expr.size: - expr = m2_expr.ExprInt(int(expr), size) - else: - if expr.arg > (1 << size) - 1: - return None - expr = m2_expr.ExprInt(int(expr), size) - return expr - - -class aarch64_imm_sf(imm_noarg): - parser = base_expr - - def fromstring(self, text, loc_db, parser_result=None): - start, stop = super(aarch64_imm_sf, self).fromstring(text, loc_db, parser_result) - if start is None: - return start, stop - size = self.parent.args[0].expr.size - if self.expr in gpregs64_info.expr + gpregs32_info.expr: - return None, None - if isinstance(self.expr, m2_expr.ExprOp): - return False - expr = set_imm_to_size(size, self.expr) - if expr is None: - return None, None - self.expr = expr - return start, stop - - def encode(self): - if not isinstance(self.expr, m2_expr.ExprInt): - return False - if not test_set_sf(self.parent, self.expr.size): - return False - value = int(self.expr) - if value >= 1 << self.l: - return False - self.value = value - return True - - def decode(self, v): - size = 64 if self.parent.sf.value else 32 - self.expr = m2_expr.ExprInt(v, size) - return True - - -class aarch64_imm_sft(aarch64_imm_sf, aarch64_arg): - - def encode(self): - if not isinstance(self.expr, m2_expr.ExprInt): - return False - if not test_set_sf(self.parent, self.expr.size): - return False - value = int(self.expr) - if value < 1 << self.l: - self.parent.shift.value = 0 - else: - if value & 0xFFF: - return False - value >>= 12 - if value >= 1 << self.l: - return False - self.parent.shift.value = 1 - self.value = value - return True - - def decode(self, v): - size = 64 if self.parent.sf.value else 32 - if self.parent.shift.value == 0: - self.expr = m2_expr.ExprInt(v, size) - elif self.parent.shift.value == 1: - self.expr = m2_expr.ExprInt(v << 12, size) - else: - return False - return True - -OPTION2SIZE = [32, 32, 32, 64, - 32, 32, 32, 64] - - -class aarch64_gpreg_ext(reg_noarg, aarch64_arg): - parser = reg_ext_off - - def encode(self): - if not isinstance(self.expr, m2_expr.ExprOp): - return False - if self.expr.op not in extend_lst: - return False - reg, amount = self.expr.args - - if not reg in gpregsz_info[self.expr.size].expr: - return False - self.value = gpregsz_info[self.expr.size].expr.index(reg) - option = extend_lst.index(self.expr.op) - if self.expr.size != OPTION2SIZE[option]: - if not test_set_sf(self.parent, self.expr.size): - return False - self.parent.option.value = option - self.parent.imm.value = int(amount) - return True - - def decode(self, v): - if self.parent.sf.value == 0: - size = 64 if self.parent.sf.value else 32 - else: - size = OPTION2SIZE[self.parent.option.value] - reg = gpregsz_info[size].expr[v] - - self.expr = m2_expr.ExprOp(extend_lst[self.parent.option.value], - reg, m2_expr.ExprInt(self.parent.imm.value, reg.size)) - return True - -EXT2_OP = { - 0b010: 'UXTW', - 0b011: 'LSL', - 0b110: 'SXTW', - 0b111: 'SXTX' -} - -EXT2_OP_INV = dict((value, key) for key, value in viewitems(EXT2_OP)) - - -class aarch64_gpreg_ext2(reg_noarg, aarch64_arg): - parser = deref_ext2 - - def get_size(self): - return self.parent.size.value - - def encode(self): - if not isinstance(self.expr, m2_expr.ExprOp): - return False - if len(self.expr.args) != 2: - return False - arg0, arg1 = self.expr.args - if (self.expr.is_op("preinc") and arg0.is_id() and arg1.is_id()): - self.parent.shift.value = 0 - self.parent.rn.value = self.parent.rn.reg_info.expr.index(arg0) - self.value = gpregs_info[arg1.size].expr.index(arg1) - self.parent.option.value = 0b011 - return True - if not (isinstance(self.expr, m2_expr.ExprOp) and self.expr.op == 'segm'): - return False - if not arg0 in self.parent.rn.reg_info.expr: - return False - self.parent.rn.value = self.parent.rn.reg_info.expr.index(arg0) - is_reg = False - self.parent.shift.value = 0 - if isinstance(arg1, m2_expr.ExprId): - reg = arg1 - self.parent.option.value = 0b011 - is_reg = True - elif isinstance(arg1, m2_expr.ExprOp) and arg1.op in viewvalues(EXT2_OP): - reg = arg1.args[0] - else: - return False - if not (reg.size in gpregs_info and - reg in gpregs_info[reg.size].expr): - return False - self.value = gpregs_info[reg.size].expr.index(reg) - if is_reg: - return True - if not (isinstance(arg1.args[1], m2_expr.ExprInt)): - return False - if arg1.op not in EXT2_OP_INV: - return False - self.parent.option.value = EXT2_OP_INV[arg1.op] - if arg1.args[1].arg == 0: - self.parent.shift.value = 0 - return True - - if arg1.args[1].arg != self.get_size(): - return False - - self.parent.shift.value = 1 - - return True - - def decode(self, v): - opt = self.parent.option.value - if opt in [0, 1, 4, 5]: - return False - elif opt in [2, 6]: - reg_expr = gpregsz32_info.expr - elif opt in [3, 7]: - reg_expr = gpregsz64_info.expr - arg = reg_expr[v] - - if opt in EXT2_OP: - if self.parent.shift.value == 1: - arg = m2_expr.ExprOp(EXT2_OP[opt], arg, - m2_expr.ExprInt(self.get_size(), arg.size)) - else: - arg = m2_expr.ExprOp(EXT2_OP[opt], arg, - m2_expr.ExprInt(0, arg.size)) - - reg = self.parent.rn.reg_info.expr[self.parent.rn.value] - self.expr = m2_expr.ExprOp('segm', reg, arg) - return True - - -class aarch64_gpreg_ext2_128(aarch64_gpreg_ext2): - - def get_size(self): - return 4 - - -def test_set_sf(parent, size): - if not hasattr(parent, 'sf'): - return False - if parent.sf.value == None: - parent.sf.value = 1 if size == 64 else 0 - return True - psize = 64 if parent.sf.value else 32 - return psize == size - - -class aarch64_gpreg_sftimm(reg_noarg, aarch64_arg): - reg_info = gpregsz_info - parser = shift_off - - def encode(self): - size = self.expr.size - if not test_set_sf(self.parent, size): - return False - if isinstance(self.expr, m2_expr.ExprId): - if not size in gpregs_info: - return False - if not self.expr in self.reg_info[size].expr: - return False - self.parent.shift.value = 0 - self.parent.imm.value = 0 - self.value = self.reg_info[size].expr.index(self.expr) - return True - - if not isinstance(self.expr, m2_expr.ExprOp): - return False - if not self.expr.op in shift_expr: - return False - args = self.expr.args - if not args[0] in self.reg_info[size].expr: - return False - if not isinstance(args[1], m2_expr.ExprInt): - return False - self.parent.shift.value = shift_expr.index(self.expr.op) - self.parent.imm.value = int(args[1]) - self.value = self.reg_info[size].expr.index(args[0]) - return True - - def decode(self, v): - size = 64 if self.parent.sf.value else 32 - e = self.reg_info[size].expr[v] - amount = self.parent.imm.value - if amount != 0: - e = m2_expr.ExprOp( - shift_expr[self.parent.shift.value], e, m2_expr.ExprInt(amount, e.size)) - self.expr = e - return True - - -def ror(value, amount, size): - mask = (1 << size) - 1 - return ((value >> amount) | (value << (size - amount))) & mask - - -def rol(value, amount, size): - mask = (1 << size) - 1 - return ((value << amount) | (value >> (size - amount)) & mask) - -# This implementation is inspired from ARM ISA v8.2 -# Exact Reference name: -# "ARM Architecture Reference Manual ARMv8, for ARMv8-A architecture profile" - -class ReservedValue(Exception): - """Reserved Value, should not happen""" - pass - -class NotEncodable(Exception): - """Instruction is not encodable""" - pass - -class bits(object): - """Stand for ARM ASL 'bits' type, ie. a bit vector""" - - __slots__ = ["size", "value"] - - def __init__(self, size, value): - """Instantiate a bitvector of size @size with value @value""" - value = int(value) - self.size = int(size) - if value & self.mask != value: - raise ValueError( - "Value %r is too large for %r bits (mask %r)", - value, - size, - self.mask - ) - self.value = value - - def concat_left(self, other_bits): - """Return a new bits instance for @other_bits . self""" - return bits(self.size + other_bits.size, - self.value | (other_bits.value << self.size)) - - @property - def mask(self): - return (1 << self.size) - 1 - - def __invert__(self): - return bits(self.size, self.value ^ self.mask) - - def __int__(self): - return self.value - - def __and__(self, other_bits): - assert other_bits.size == self.size - return bits(self.size, self.value & other_bits.value) - - def __eq__(self, other_bits): - return all((self.size == other_bits.size, - self.value == other_bits.value)) - - def __getitem__(self, info): - if isinstance(info, slice): - start = info.start if info.start else 0 - stop = info.stop if info.stop else self.value - if info.step is not None: - raise RuntimeError("Not implemented") - mask = (1 << stop) - 1 - return bits(stop - start, - (self.value >> start) & mask) - else: - raise RuntimeError("Not implemented") - - @property - def pop_count(self): - "Population count: number of bit set" - count = 0 - value = self.value - while (value > 0): - if value & 1 == 1: - count += 1 - value >>= 1 - return count - - def __str__(self): - return "'%s'" % "".join('1' if self.value & (1 << i) else '0' - for i in reversed(range(self.size))) - -# From J1-6035 -def HighestSetBit(x): - for i in reversed(range(x.size)): - if x.value & (1 << i): - return i - return - 1 - -# From J1-6037 -def Ones(N): - return bits(N, (1 << N) - 1) - -# From J1-6038 -def ROR(x, shift): - if shift == 0: - return x - return bits(x.size, ror(UInt(x), shift, x.size)) - -# From J1-6038 -def Replicate(x, N): - assert N % x.size == 0 - new = x - while new.size < N: - new = new.concat_left(x) - return new - -# From J1-6039 -def UInt(x): - return int(x) - -# From J1-6039 -def ZeroExtend(x, N): - assert N >= x.size - return bits(N, x.value) - -# From J1-5906 -def DecodeBitMasks(M, immN, imms, immr, immediate): - """ - @M: 32 or 64 - @immN: 1-bit - @imms: 6-bit - @immr: 6-bit - @immediate: boolean - """ - len_ = HighestSetBit((~imms).concat_left(immN)) - if len_ < 1: - raise ReservedValue() - assert M >= (1 << len_) - - levels = ZeroExtend(Ones(len_), 6) - - if immediate and (imms & levels) == levels: - raise ReservedValue() - S = UInt(imms & levels); - R = UInt(immr & levels); - - esize = 1 << len_ - welem = ZeroExtend(Ones(S + 1), esize) - wmask = Replicate(ROR(welem, R), M) - - # For now, 'tmask' is unused: - # - # diff = S - R; - # d = UInt(bits(len_, diff)) - # telem = ZeroExtend(Ones(d + 1), esize) - # tmask = Replicate(telem, M) - - return wmask, None - -# EncodeBitMasks doesn't have any equivalent in ARM ASL shared functions -# This implementation "reverses" DecodeBitMasks flow -def EncodeBitMasks(wmask): - # Find replicate - M = wmask.size - for i in range(1, M + 1): - if M % i != 0: - continue - if wmask == Replicate(wmask[:i], M): - break - else: - raise NotEncodable - - # Find ROR value: welem is only '1's - welem_after_ror = wmask[:i] - esize = welem_after_ror.size - S = welem_after_ror.pop_count - 1 - welem = ZeroExtend(Ones(S + 1), esize) - for i in range(welem_after_ror.size): - if ROR(welem, i) == welem_after_ror: - break - else: - raise NotEncodable - R = i - - # Find len value - for i in range(M): - if (1 << i) == esize: - break - else: - raise NotEncodable - len_ = i - levels = ZeroExtend(Ones(len_), 6) - levels = UInt(levels) - - if len_ == 6: - # N = 1 - immn = 1 - imms = S - else: - # N = 0, NOT(imms) have to be considered - immn = 0 - mask = (1 << ((6 - len_ - 1))) - 1 - mask <<= (len_ + 1) - imms = S | mask - immr = R - return immr, imms, immn - - -class aarch64_imm_nsr(aarch64_imm_sf, aarch64_arg): - parser = base_expr - - def decode(self, v): - size = 64 if self.parent.sf.value else 32 - bitmask, _ = DecodeBitMasks(size, - bits(1, self.parent.immn.value), - bits(6, v), - bits(6, self.parent.immr.value), - True - ) - self.expr = m2_expr.ExprInt(UInt(bitmask), - size) - return True - - def encode(self): - if not isinstance(self.expr, m2_expr.ExprInt): - return False - if not test_set_sf(self.parent, self.expr.size): - return False - value = self.expr.arg - if value == 0: - return False - - try: - immr, imms, immn = EncodeBitMasks(bits(self.expr.size, value)) - except NotEncodable: - return False - self.parent.immr.value = immr - self.parent.immn.value = immn - self.value = imms - return True - - -class aarch64_pcoff(aarch64_imm_32): - parser = base_expr - - -class aarch64_immhip_page(aarch64_imm_32): - parser = base_expr - - def decode(self, v): - v = ((v << 2) | self.parent.immlo.value) << 12 - v = sign_ext(v, 33, 64) - self.expr = m2_expr.ExprInt(v, 64) - return True - - def encode(self): - v = int(self.expr) - if v & (1 << 63): - v &= (1 << 33) - 1 - if v & 0xfff: - return False - v >>= 12 - self.parent.immlo.value = v & 3 - v >>= 2 - self.value = v - return True - - -class aarch64_immhi_page(aarch64_imm_32): - parser = base_expr - - def decode(self, v): - v = ((v << 2) | self.parent.immlo.value) - v = sign_ext(v, 21, 64) - self.expr = m2_expr.ExprInt(v, 64) - return True - - def encode(self): - v = int(self.expr) - if v & (1 << 63): - v &= (1 << 33) - 1 - self.parent.immlo.value = v & 3 - v >>= 2 - if v > (1 << 19) - 1: - return False - self.value = v & ((1 << 19) - 1) - return True - - -class aarch64_imm_hw(aarch64_arg): - parser = base_expr - shift_op = '<<' - - def decode(self, v): - size = 64 if self.parent.sf.value else 32 - self.expr = m2_expr.ExprInt(v << (16 * self.parent.hw.value), size) - return True - - def encode(self): - if not isinstance(self.expr, m2_expr.ExprInt): - return False - size = self.parent.args[0].expr.size - if set_imm_to_size(size, self.expr) is None: - return False - value = int(self.expr) - mask = (1 << size) - 1 - for i in range(size // 16): - if ((0xffff << (i * 16)) ^ mask) & value: - continue - self.parent.hw.value = i - self.value = value >> (i * 16) - return True - return False - - -class aarch64_imm_hw_sc(aarch64_arg): - parser = shiftimm_off_sc - shift_op = 'slice_at' - - def decode(self, v): - size = 64 if self.parent.sf.value else 32 - expr = m2_expr.ExprInt(v, size) - amount = m2_expr.ExprInt(16 * self.parent.hw.value, size) - if self.parent.hw.value: - self.expr = m2_expr.ExprOp(self.shift_op, expr, amount) - else: - self.expr = expr - return True - - def encode(self): - if isinstance(self.expr, m2_expr.ExprInt): - if self.expr.arg > 0xFFFF: - return False - self.value = int(self.expr) - self.parent.hw.value = 0 - return True - - if not (isinstance(self.expr, m2_expr.ExprOp) and - self.expr.op == self.shift_op and - len(self.expr.args) == 2 and - isinstance(self.expr.args[0], m2_expr.ExprInt) and - isinstance(self.expr.args[1], m2_expr.ExprInt)): - return False - if set_imm_to_size(self.parent.args[0].expr.size, self.expr.args[0]) is None: - return False - if set_imm_to_size(self.parent.args[0].expr.size, self.expr.args[1]) is None: - return False - arg, amount = [int(arg) for arg in self.expr.args] - if arg > 0xFFFF: - return False - if amount % 16 or amount // 16 > 4: - return False - self.value = arg - self.parent.hw.value = amount // 16 - return True - - -class aarch64_offs(imm_noarg, aarch64_arg): - parser = base_expr - - def decode(self, v): - v = v & self.lmask - v = (v << 2) - v = sign_ext(v, (self.l + 2), 64) - self.expr = m2_expr.ExprInt(v, 64) - return True - - def encode(self): - if not isinstance(self.expr, m2_expr.ExprInt): - return False - v = int(self.expr) - if v & (1 << 63): - v &= (1 << (self.l + 2)) - 1 - self.value = v >> 2 - return True - - - -class aarch64_offs_pc(imm_noarg, aarch64_arg): - parser = deref_pc - - def decode(self, v): - v = v & self.lmask - v = (v << 2) - v = sign_ext(v, (self.l + 2), 64) - self.expr = m2_expr.ExprOp("preinc", PC, m2_expr.ExprInt(v, 64)) - return True - - def encode(self): - if not self.expr.is_op('preinc'): - return False - if self.expr.args == (PC,): - v = 0 - elif (len(self.expr.args) == 2 and - self.expr.args[0] == PC and - self.expr.args[1].is_int()): - v = int(self.expr.args[1]) - else: - return None - if v & (1 << 63): - v &= (1 << (self.l + 2)) - 1 - self.value = v >> 2 - return True - - - -def set_mem_off(parent, imm): - if hasattr(parent, 'simm'): - mask = (1 << parent.simm.l) - 1 - if imm != sign_ext(imm & mask, parent.simm.l, 64): - return False - parent.simm.value = imm & mask - elif hasattr(parent, 'uimm'): - mask = (1 << parent.uimm.l) - 1 - if imm > mask: - return False - parent.uimm.value = imm - else: - raise ValueError('unknown imm') - return True - - -def get_size(parent): - if not hasattr(parent, "size"): - return 0 - if hasattr(parent.size, "amount"): - size = parent.size.amount - else: - size = parent.size.value - return size - - -class aarch64_deref(aarch64_arg): - parser = deref - - def decode_w_size(self, off): - return off - - def encode_w_size(self, off): - return off - - def get_postpre(self, parent): - if hasattr(self.parent, "postpre"): - if self.parent.postpre.value == 0: - op = 'postinc' - else: - op = 'preinc_wb' - else: - op = 'preinc' - return op - - def decode(self, v): - reg = gpregs64_info.expr[v] - off = self.parent.imm.expr.arg - op = self.get_postpre(self.parent) - off = self.decode_w_size(off) - self.expr = m2_expr.ExprOp(op, reg, m2_expr.ExprInt(off, 64)) - return True - - def encode(self): - expr = self.expr - if not isinstance(expr, m2_expr.ExprOp): - return False - if not expr.op in ['postinc', 'preinc_wb', 'preinc']: - return False - if hasattr(self.parent, "postpre"): - if expr.op == 'postinc': - self.parent.postpre.value = 0 - else: - self.parent.postpre.value = 1 - if len(expr.args) != 2: - return False - reg, off = expr.args - if not reg in gpregs64_info.expr: - return False - if not isinstance(off, m2_expr.ExprInt): - return False - imm = int(off) - imm = self.encode_w_size(imm) - if imm is False: - return False - self.parent.imm.expr = m2_expr.ExprInt(imm, 64) - if not self.parent.imm.encode(): - return False - self.value = gpregs64_info.expr.index(reg) - return True - - -class aarch64_deref_size(aarch64_deref): - - def decode_w_size(self, off): - size = get_size(self.parent) - return off << size - - def encode_w_size(self, off): - size = get_size(self.parent) - if size: - if off & ((1 << size) - 1): - return False - off >>= size - return off - - -class aarch64_deref_nooff(aarch64_deref): - parser = deref_nooff - - def decode(self, v): - reg = gpregs64_info.expr[v] - self.expr = m2_expr.ExprOp('preinc', reg) - return True - - def encode(self): - expr = self.expr - if not isinstance(expr, m2_expr.ExprOp): - return False - if expr.op != 'preinc': - return False - if len(expr.args) == 1: - reg = expr.args[0] - elif len(expr.args) == 2: - reg, off = expr.args - if not isinstance(off, m2_expr.ExprInt): - return False - if off.arg != 0: - return False - else: - return False - - if not reg in gpregs64_info.expr: - return False - self.value = gpregs64_info.expr.index(reg) - return True - - -class aarch64_sf_scale(aarch64_deref): - size2scale = {32: 2, 64: 3} - - def decode_w_size(self, off): - size = 2 + self.parent.sf.value - return off << size - - def encode_w_size(self, off): - size = self.parent.args[0].expr.size - if not size in self.size2scale: - return False - scale = self.size2scale[size] - off = int(mod_size2int[size](off) >> scale) - return off - - -class aarch64_sd_scale(aarch64_sf_scale): - size2scale = {32: 2, 64: 3, 128: 4} - - def decode_w_size(self, off): - size = 2 + self.parent.size.value - return off << size - - -class aarch64_eq(bsi): - - def decode(self, v): - return getattr(self.parent, self.ref).value == v - - def encode(self): - self.value = getattr(self.parent, self.ref).value - return True -modf = bs_mod_name(l=1, fname='modf', mn_mod=['', 'S']) -sf = bs(l=1, fname='sf', order=-1) - - -class aarch64_cond_arg(reg_noarg, aarch64_arg): - reg_info = conds_info - parser = reg_info.parser - - -class aarch64_cond_inv_arg(reg_noarg, aarch64_arg): - reg_info = conds_inv_info - parser = reg_info.parser - - -class aarch64_b40(aarch64_arg): - parser = base_expr - - def decode(self, v): - self.expr = m2_expr.ExprInt( - (self.parent.sf.value << self.l) | v, self.parent.rt.expr.size) - return True - - def encode(self): - if not isinstance(self.expr, m2_expr.ExprInt): - return False - size = self.parent.args[0].expr.size - value = int(self.expr) - self.value = value & self.lmask - if self.parent.sf.value is None: - self.parent.sf.value = value >> self.l - return True - else: - return value >> self.l == self.parent.sf.value - - -shift = bs(l=2, fname='shift') - -shiftb = bs(l=1, fname='shift', order=-1) - - -rn64_v = bs(l=5, cls=(aarch64_gpreg64_nodec,), fname='rn', order=-1) - -rn = bs(l=5, cls=(aarch64_gpreg,), fname="rn") -rs = bs(l=5, cls=(aarch64_gpreg,), fname="rs") -rm = bs(l=5, cls=(aarch64_gpreg,), fname="rm") -rd = bs(l=5, cls=(aarch64_gpreg,), fname="rd") -ra = bs(l=5, cls=(aarch64_gpregz,), fname="ra") -rt = bs(l=5, cls=(aarch64_gpregz,), fname="rt") -rt2 = bs(l=5, cls=(aarch64_gpregz,), fname="rt2") -rn0 = bs(l=5, cls=(aarch64_gpreg0,), fname="rn") - -rmz = bs(l=5, cls=(aarch64_gpregz,), fname="rm") -rnz = bs(l=5, cls=(aarch64_gpregz,), fname="rn") -rdz = bs(l=5, cls=(aarch64_gpregz,), fname="rd") - - -rn_n1 = bs(l=5, cls=(aarch64_gpreg_n1,), fname="rn") -rm_n1 = bs(l=5, cls=(aarch64_gpreg_n1,), fname="rm") - - -rn_na = bs(l=5, cls=(aarch64_gpreg_noarg,), fname="rn", order=-1) -rn32_na = bs(l=5, cls=(aarch64_gpreg32_noarg,), fname="rn", order=-1) -rn64_na = bs(l=5, cls=(aarch64_gpreg64_noarg,), fname="rn", order=-1) - -sd1 = bs(l=5, cls=(aarch64_simdreg_h,), fname="rt") -sd2 = bs(l=5, cls=(aarch64_simdreg_h,), fname="rt2") - -sdn_32_64 = bs(l=5, cls=(aarch64_simdreg_32_64,), fname="rn") -sdd_32_64 = bs(l=5, cls=(aarch64_simdreg_32_64,), fname="rd") -sdm_32_64 = bs(l=5, cls=(aarch64_simdreg_32_64,), fname="rm") -sda_32_64 = bs(l=5, cls=(aarch64_simdreg_32_64,), fname="ra") - - -sdm_32_64_zero = bs(l=5, cls=(aarch64_simdreg_32_64_zero,), fname="rm") - -crn = bs(l=4, cls=(aarch64_crreg,), fname="crn") -crm = bs(l=4, cls=(aarch64_crreg,), fname="crm") - - -rn64 = bs(l=5, cls=(aarch64_gpreg64,), fname="rn") -rs64 = bs(l=5, cls=(aarch64_gpreg64,), fname="rs") -rm64 = bs(l=5, cls=(aarch64_gpreg64,), fname="rm") -rd64 = bs(l=5, cls=(aarch64_gpreg64,), fname="rd") -rt64 = bs(l=5, cls=(aarch64_gpregz64,), fname="rt") -ra64 = bs(l=5, cls=(aarch64_gpregz64,), fname="ra") - -rn32 = bs(l=5, cls=(aarch64_gpreg32,), fname="rn") -rm32 = bs(l=5, cls=(aarch64_gpreg32,), fname="rm") -rd32 = bs(l=5, cls=(aarch64_gpreg32,), fname="rd") -rs32 = bs(l=5, cls=(aarch64_gpreg32,), fname="rs") - -sd08 = bs(l=5, cls=(aarch64_simd08,), fname="rd") -sd16 = bs(l=5, cls=(aarch64_simd16,), fname="rd") -sd32 = bs(l=5, cls=(aarch64_simd32,), fname="rd") -sd64 = bs(l=5, cls=(aarch64_simd64,), fname="rd") -sd128 = bs(l=5, cls=(aarch64_simd128,), fname="rd") - -sn08 = bs(l=5, cls=(aarch64_simd08,), fname="rn") -sn16 = bs(l=5, cls=(aarch64_simd16,), fname="rn") -sn32 = bs(l=5, cls=(aarch64_simd32,), fname="rn") -sn64 = bs(l=5, cls=(aarch64_simd64,), fname="rn") -sn128 = bs(l=5, cls=(aarch64_simd128,), fname="rn") - - -rt32 = bs(l=5, cls=(aarch64_gpregz32,), fname="rt") - -rt_isf = bs(l=5, cls=(aarch64_gpreg_isf,), fname="rt") - -rn64_deref = bs(l=5, cls=(aarch64_deref,), fname="rn") -rn64_deref_sz = bs(l=5, cls=(aarch64_deref_size,), fname="rn") -rn64_deref_sf = bs(l=5, cls=(aarch64_sf_scale,), fname="rn") -rn64_deref_sd = bs(l=5, cls=(aarch64_sd_scale,), fname="rn") - -rn64_deref_nooff = bs(l=5, cls=(aarch64_deref_nooff,), fname="rn") - -imm_sft_12 = bs(l=12, cls=(aarch64_imm_sft,)) - -# imm32_3 = bs(l=3, cls=(aarch64_imm_32,)) -imm32_3 = bs(l=3, fname="imm") -imm6 = bs(l=6, fname="imm", order=-1) -imm3 = bs(l=3, fname="imm", order=-1) -simm6 = bs(l=6, cls=(aarch64_int64_noarg, aarch64_arg), fname="imm", order=-1) -simm9 = bs(l=9, cls=(aarch64_int64_noarg,), fname="imm", order=-1) -simm7 = bs(l=7, cls=(aarch64_int64_noarg,), fname="imm", order=-1) -nzcv = bs(l=4, cls=(aarch64_uint64_noarg, aarch64_arg), fname="nzcv", order=-1) -uimm5 = bs(l=5, cls=(aarch64_uint64_noarg, aarch64_arg), fname="imm", order=-1) -uimm12 = bs(l=12, cls=(aarch64_uint64_noarg,), fname="imm", order=-1) -uimm16 = bs(l=16, cls=(aarch64_uint64_noarg, aarch64_arg), fname="imm", order=-1) -uimm7 = bs(l=7, cls=(aarch64_uint64_noarg,), fname="imm", order=-1) - -uimm8 = bs(l=8, cls=(aarch64_uint64,), fname="imm", order=-1) - -op1 = bs(l=3, cls=(aarch64_uint64, aarch64_arg), fname="op1") -op2 = bs(l=3, cls=(aarch64_uint64, aarch64_arg), fname="op2") - - -imm16 = bs(l=16, fname="imm", order=-1) - - -immlo = bs(l=2, fname='immlo') -immhip = bs(l=19, cls=(aarch64_immhip_page,)) -immhi = bs(l=19, cls=(aarch64_immhi_page,)) - -option = bs(l=3, fname='option', order=-1) - - -rm_ext = bs(l=5, cls=(aarch64_gpreg_ext,), fname="rm") -rm_sft = bs(l=5, cls=(aarch64_gpreg_sftimm,), fname="rm") - -rm_ext2 = bs(l=5, cls=(aarch64_gpreg_ext2,), fname="rm") -rm_ext2_128 = bs(l=5, cls=(aarch64_gpreg_ext2_128,), fname="rm") - - -imms = bs(l=6, cls=(aarch64_imm_nsr,), fname='imms') -immr = bs(l=6, fname='immr') -immn = bs(l=1, fname='immn') - - -imm16_hw = bs(l=16, cls=(aarch64_imm_hw,), fname='imm') -imm16_hw_sc = bs(l=16, cls=(aarch64_imm_hw_sc,), fname='imm') -hw = bs(l=2, fname='hw') - - -a_imms = bs(l=6, cls=(aarch64_imm_sf, aarch64_arg), fname="imm1", order=-1) -a_immr = bs(l=6, cls=(aarch64_imm_sf, aarch64_arg), fname="imm1", order=-1) - - - -adsu_name = {'ADD': 0, 'SUB': 1} -bs_adsu_name = bs_name(l=1, name=adsu_name) - - -offs19 = bs(l=19, cls=(aarch64_offs,), fname='off') -offs19pc = bs(l=19, cls=(aarch64_offs_pc,), fname='off') - -offs26 = bs(l=26, cls=(aarch64_offs,), fname='off') -offs14 = bs(l=14, cls=(aarch64_offs,), fname='off') - -b40 = bs(l=5, cls=(aarch64_b40,), fname='b40', order=1) - -sdsize1 = bs(l=1, fname="size") - -sdsize = bs(l=2, fname="size") -opsize = bs(l=2, fname="size") -sd = bs(l=5, cls=(aarch64_simdreg,), fname='sd') - -opc = bs(l=1, fname='opc', order=-1) - -# add/sub (imm) -aarch64op("addsub", [sf, bs_adsu_name, modf, bs('10001'), shift, imm_sft_12, rn, rd], [rd, rn, imm_sft_12]) -aarch64op("cmp", [sf, bs('1'), bs('1'), bs('10001'), shift, imm_sft_12, rn, bs('11111')], [rn, imm_sft_12], alias=True) -aarch64op("cmn", [sf, bs('0'), bs('1'), bs('10001'), shift, imm_sft_12, rn, bs('11111')], [rn, imm_sft_12], alias=True) - -aarch64op("adrp", [bs('1'), immlo, bs('10000'), immhip, rd64], [rd64, immhip]) -aarch64op("adr", [bs('0'), immlo, bs('10000'), immhi, rd64], [rd64, immhi]) - -# add/sub (reg shift) -aarch64op("addsub", [sf, bs_adsu_name, modf, bs('01011'), shift, bs('0'), rm_sft, imm6, rn, rd], [rd, rn, rm_sft]) -aarch64op("cmp", [sf, bs('1'), bs('1'), bs('01011'), shift, bs('0'), rm_sft, imm6, rn, bs('11111')], [rn, rm_sft], alias=True) -# add/sub (reg ext) -aarch64op("addsub", [sf, bs_adsu_name, modf, bs('01011'), bs('00'), bs('1'), rm_ext, option, imm3, rn, rd], [rd, rn, rm_ext]) -#aarch64op("cmp", [sf, bs('1'), bs('1'), bs('01011'), bs('00'), bs('1'), rm_ext, option, imm3, rn, bs('11111')], [rn, rm_ext], alias=True) - - -aarch64op("neg", [sf, bs('1'), modf, bs('01011'), shift, bs('0'), rm_sft, imm6, bs('11111'), rd], [rd, rm_sft], alias=True) - - -logic_name = {'AND': 0, 'ORR': 1, 'EOR': 2} -bs_logic_name = bs_name(l=2, name=logic_name) -# logical (imm) -aarch64op("logic", [sf, bs_logic_name, bs('100100'), immn, immr, imms, rn0, rd], [rd, rn0, imms]) -# ANDS -aarch64op("ands", [sf, bs('11'), bs('100100'), immn, immr, imms, rn0, rdz], [rdz, rn0, imms]) -aarch64op("tst", [sf, bs('11'), bs('100100'), immn, immr, imms, rn0, bs('11111')], [rn0, imms], alias=True) - - -# bitfield move p.149 -logicbf_name = {'SBFM': 0b00, 'BFM': 0b01, 'UBFM': 0b10} -bs_logicbf_name = bs_name(l=2, name=logicbf_name) -aarch64op("logic", [sf, bs_logicbf_name, bs('100110'), bs(l=1, cls=(aarch64_eq,), ref="sf"), a_immr, a_imms, rn, rd], [rd, rn, a_immr, a_imms]) - - -# logical (reg shift) -aarch64op("and", [sf, bs('00'), bs('01010'), shift, bs('0'), rm_sft, imm6, rn, rd], [rd, rn, rm_sft]) -aarch64op("bic", [sf, bs('00'), bs('01010'), shift, bs('1'), rm_sft, imm6, rn, rd], [rd, rn, rm_sft]) -aarch64op("orr", [sf, bs('01'), bs('01010'), shift, bs('0'), rm_sft, imm6, rn, rd], [rd, rn, rm_sft]) -aarch64op("orn", [sf, bs('01'), bs('01010'), shift, bs('1'), rm_sft, imm6, rn, rd], [rd, rn, rm_sft]) -aarch64op("mvn", [sf, bs('01'), bs('01010'), shift, bs('1'), rm_sft, imm6, bs('11111'), rd], [rd, rm_sft], alias=True) -aarch64op("eor", [sf, bs('10'), bs('01010'), shift, bs('0'), rm_sft, imm6, rn, rd], [rd, rn, rm_sft]) -aarch64op("eon", [sf, bs('10'), bs('01010'), shift, bs('1'), rm_sft, imm6, rn, rd], [rd, rn, rm_sft]) -aarch64op("ands", [sf, bs('11'), bs('01010'), shift, bs('0'), rm_sft, imm6, rn, rd], [rd, rn, rm_sft]) -aarch64op("tst", [sf, bs('11'), bs('01010'), shift, bs('0'), rm_sft, imm6, rn, bs('11111')], [rn, rm_sft], alias=True) -aarch64op("bics", [sf, bs('11'), bs('01010'), shift, bs('1'), rm_sft, imm6, rn, rd], [rd, rn, rm_sft]) - -# move reg -aarch64op("mov", [sf, bs('01'), bs('01010'), bs('00'), bs('0'), rmz, bs('000000'), bs('11111'), rd], [rd, rmz], alias=True) - - -aarch64op("adc", [sf, bs('00'), bs('11010000'), rm, bs('000000'), rn, rd], [rd, rn, rm]) -aarch64op("adcs", [sf, bs('01'), bs('11010000'), rm, bs('000000'), rn, rd], [rd, rn, rm]) - - -aarch64op("sbc", [sf, bs('10'), bs('11010000'), rm, bs('000000'), rn, rd], [rd, rn, rm]) -aarch64op("sbcs", [sf, bs('11'), bs('11010000'), rm, bs('000000'), rn, rd], [rd, rn, rm]) - - - -bcond = bs_mod_name(l=4, fname='cond', mn_mod=['EQ', 'NE', 'CS', 'CC', - 'MI', 'PL', 'VS', 'VC', - 'HI', 'LS', 'GE', 'LT', - 'GT', 'LE', 'AL', 'NV']) - -cond_arg = bs(l=4, cls=(aarch64_cond_arg,), fname="cond") -cond_inv_arg = bs(l=4, cls=(aarch64_cond_inv_arg,), fname="cond") -# unconditional branch (ret) -aarch64op("br", [bs('1101011'), bs('0000'), bs('11111'), bs('000000'), rn64, bs('00000')], [rn64]) -aarch64op("blr", [bs('1101011'), bs('0001'), bs('11111'), bs('000000'), rn64, bs('00000')], [rn64]) -aarch64op("ret", [bs('1101011'), bs('0010'), bs('11111'), bs('000000'), rn64, bs('00000')], [rn64]) -aarch64op("eret", [bs('1101011'), bs('0100'), bs('11111'), bs('000000'), bs('11111'), bs('00000')]) -aarch64op("drps", [bs('1101011'), bs('0101'), bs('11111'), bs('000000'), bs('11111'), bs('00000')]) - -# unconditional branch (imm) -aarch64op("b", [bs('0'), bs('00101'), offs26], [offs26]) -aarch64op("bl", [bs('1'), bs('00101'), offs26], [offs26]) - - -post_pre = bs(l=1, order=-1, fname='postpre') - -# conditional compare (imm) p.158 -ccmp_name = {'CCMN': 0, 'CCMP': 1} -bs_ccmp_name = bs_name(l=1, name=ccmp_name) -aarch64op("condcmp", [sf, bs_ccmp_name, bs('1'), bs('11010010'), uimm5, cond_arg, bs('1'), bs('0'), rn, bs('0'), nzcv], [rn, uimm5, nzcv, cond_arg]) -aarch64op("condcmp", [sf, bs_ccmp_name, bs('1'), bs('11010010'), rm, cond_arg, bs('0'), bs('0'), rn, bs('0'), nzcv], [rn, rm, nzcv, cond_arg]) - -ldst_b_name = {'STRB': 0, 'LDRB': 1} -bs_ldst_b_name = bs_name(l=1, name=ldst_b_name) -ldst_name = {'STR': 0, 'LDR': 1} -bs_ldst_name = bs_name(l=1, name=ldst_name) -ldst_h_name = {'STRH': 0, 'LDRH': 1} -bs_ldst_h_name = bs_name(l=1, name=ldst_h_name) - -ldst_tb_name = {'STTRB': 0, 'LDTRB': 1} -bs_ldst_tb_name = bs_name(l=1, name=ldst_tb_name) - -ldst_th_name = {'STTRH': 0, 'LDTRH': 1} -bs_ldst_th_name = bs_name(l=1, name=ldst_th_name) - -ldst_ub_name = {'STURB': 0, 'LDURB': 1} -bs_ldst_ub_name = bs_name(l=1, name=ldst_ub_name) -ldst_u_name = {'STUR': 0, 'LDUR': 1} -bs_ldst_u_name = bs_name(l=1, name=ldst_u_name) - -ldst_t_name = {'STTR': 0, 'LDTR': 1} -bs_ldst_st_name = bs_name(l=1, name=ldst_t_name) - -ldst_1u_name = {'STUR': 0b0, 'LDUR': 0b1} -bs_ldst_1u_name = bs_name(l=1, name=ldst_1u_name) - -ldst_uh_name = {'STURH': 0, 'LDURH': 1} -bs_ldst_uh_name = bs_name(l=1, name=ldst_uh_name) - - -ldst_sw_name = {'STRSW': 0, 'LDRSW': 1} -bs_ldst_sw_name = bs_name(l=1, name=ldst_sw_name) - -# load/store register (imm post index) -aarch64op("ldst", [bs('00'), bs('111'), bs('0'), bs('00'), bs('0'), bs_ldst_b_name, bs('0'), simm9, post_pre, bs('1'), rn64_deref, rt32], [rt32, rn64_deref ]) -aarch64op("ldrsb", [bs('00'), bs('111'), bs('0'), bs('00'), bs('1'), sf, bs('0'), simm9, post_pre, bs('1'), rn64_deref, rt_isf], [rt_isf, rn64_deref ]) -aarch64op("ldrsh", [bs('01'), bs('111'), bs('0'), bs('00'), bs('1'), sf, bs('0'), simm9, post_pre, bs('1'), rn64_deref, rt_isf], [rt_isf, rn64_deref ]) -aarch64op("ldst", [bs('01'), bs('111'), bs('0'), bs('00'), bs('0'), bs_ldst_h_name, bs('0'), simm9, post_pre, bs('1'), rn64_deref, rt32], [rt32, rn64_deref ]) -aarch64op("ldst", [bs('10'), bs('111'), bs('0'), bs('00'), bs('0'), bs_ldst_name, bs('0'), simm9, post_pre, bs('1'), rn64_deref, rt32], [rt32, rn64_deref ]) -aarch64op("ldrsw", [bs('10'), bs('111'), bs('0'), bs('00'), bs('10'), bs('0'), simm9, post_pre, bs('1'), rn64_deref, rt64], [rt64, rn64_deref ]) -aarch64op("ldst", [bs('11'), bs('111'), bs('0'), bs('00'), bs('0'), bs_ldst_name, bs('0'), simm9, post_pre, bs('1'), rn64_deref, rt64], [rt64, rn64_deref ]) - -aarch64op("ldst", [sdsize, bs('111'), bs('1'), bs('00'), bs('0'), bs_ldst_name, bs('0'), simm9, post_pre, bs('1'), rn64_deref, sd], [sd, rn64_deref ]) -aarch64op("ldst", [bs('00'), bs('111'), bs('1'), bs('00'), bs('1'), bs_ldst_name, bs('0'), simm9, post_pre, bs('1'), rn64_deref, sd128], [sd128, rn64_deref ]) - -# load/store register (unsigned imm) -aarch64op("ldst", [bs('00', fname="size"), bs('111'), bs('0'), bs('01'), bs('0'), bs_ldst_b_name, uimm12, rn64_deref_sz, rt32], [rt32, rn64_deref_sz ]) -aarch64op("ldrsb", [bs('00', fname="size"), bs('111'), bs('0'), bs('01'), bs('1'), sf, uimm12, rn64_deref_sz, rt_isf], [rt_isf, rn64_deref_sz ]) -aarch64op("ldrsh", [bs('01', fname="size"), bs('111'), bs('0'), bs('01'), bs('1'), sf, uimm12, rn64_deref_sz, rt_isf], [rt_isf, rn64_deref_sz ]) -aarch64op("ldst", [bs('01', fname="size"), bs('111'), bs('0'), bs('01'), bs('0'), bs_ldst_h_name, uimm12, rn64_deref_sz, rt32], [rt32, rn64_deref_sz ]) -aarch64op("ldst", [bs('10', fname="size"), bs('111'), bs('0'), bs('01'), bs('0'), bs_ldst_name, uimm12, rn64_deref_sz, rt32], [rt32, rn64_deref_sz ]) -aarch64op("ldrsw", [bs('10', fname="size"), bs('111'), bs('0'), bs('01'), bs('10'), uimm12, rn64_deref_sz, rt64], [rt64, rn64_deref_sz ]) -aarch64op("ldst", [bs('11', fname="size"), bs('111'), bs('0'), bs('01'), bs('0'), bs_ldst_name, uimm12, rn64_deref_sz, rt64], [rt64, rn64_deref_sz ]) - -aarch64op("ldst", [sdsize, bs('111'), bs('1'), bs('01'), bs('0'), bs_ldst_name, uimm12, rn64_deref_sz, sd], [sd, rn64_deref_sz ]) -aarch64op("ldst", [bs('00'), bs('111'), bs('1'), bs('01'), bs('1', fname='size', amount=4), bs_ldst_name, uimm12, rn64_deref_sz, sd128], [sd128, rn64_deref_sz ]) - -# load/store register (unp) -aarch64op("ldst", [bs('00'), bs('111'), bs('0'), bs('00'), bs('0'), bs_ldst_tb_name, bs('0'), simm9, bs('10'), rn64_deref, rt32], [rt32, rn64_deref ]) -aarch64op("ldtrsb", [bs('00'), bs('111'), bs('0'), bs('00'), bs('1'), sf, bs('0'), simm9, bs('10'), rn64_deref, rt_isf], [rt_isf, rn64_deref ]) -aarch64op("ldtrsh", [bs('01'), bs('111'), bs('0'), bs('00'), bs('1'), sf, bs('0'), simm9, bs('10'), rn64_deref, rt_isf], [rt_isf, rn64_deref ]) -aarch64op("ldsttrh",[bs('01'), bs('111'), bs('0'), bs('00'), bs('0'), bs_ldst_th_name, bs('0'), simm9, bs('10'), rn64_deref, rt32], [rt32, rn64_deref ]) -aarch64op("ldtrsw", [bs('10'), bs('111'), bs('0'), bs('00'), bs('10'), bs('0'), simm9, bs('10'), rn64_deref, rt64], [rt64, rn64_deref ]) -aarch64op("ldstt", [bs('1'), sf, bs('111'), bs('0'), bs('00'), bs('0'), bs_ldst_st_name, bs('0'), simm9, bs('10'), rn64_deref, rt], [rt, rn64_deref ]) - -aarch64op("ldstt", [sdsize, bs('111'), bs('1'), bs('00'), bs('0'), bs_ldst_st_name, bs('0'), simm9, bs('10'), rn64_deref, sd], [sd, rn64_deref ]) -aarch64op("ldst", [bs('00'), bs('111'), bs('1'), bs('00'), bs('1'), bs_ldst_st_name, bs('0'), simm9, bs('10'), rn64_deref, sd128], [sd128, rn64_deref ]) - -# load/store register (unscaled imm) -aarch64op("ldst", [bs('00'), bs('111'), bs('0'), bs('00'), bs('0'), bs_ldst_ub_name, bs('0'), simm9, bs('00'), rn64_deref, rt32], [rt32, rn64_deref ]) -aarch64op("ldursb", [bs('00'), bs('111'), bs('0'), bs('00'), bs('1'), sf, bs('0'), simm9, bs('00'), rn64_deref, rt_isf], [rt_isf, rn64_deref ]) -aarch64op("ldstuh", [bs('01'), bs('111'), bs('0'), bs('00'), bs('0'), bs_ldst_uh_name, bs('0'), simm9, bs('00'), rn64_deref, rt32], [rt32, rn64_deref ]) -aarch64op("ldursh", [bs('01'), bs('111'), bs('0'), bs('00'), bs('1'), sf, bs('0'), simm9, bs('00'), rn64_deref, rt_isf], [rt_isf, rn64_deref ]) -aarch64op("ldursw", [bs('10'), bs('111'), bs('0'), bs('00'), bs('10'), bs('0'), simm9, bs('00'), rn64_deref, rt64], [rt64, rn64_deref ]) -aarch64op("ldst", [bs('1'), sf, bs('111'), bs('0'), bs('00'), bs('0'), bs_ldst_u_name, bs('0'), simm9, bs('00'), rn64_deref, rt], [rt, rn64_deref ]) - -aarch64op("ldstu", [sdsize, bs('111'), bs('1'), bs('00'), bs('0'), bs_ldst_u_name, bs('0'), simm9, bs('00'), rn64_deref, sd], [sd, rn64_deref ]) -aarch64op("ldst", [bs('00'), bs('111'), bs('1'), bs('00'), bs('1'), bs_ldst_1u_name, bs('0'), simm9, bs('00'), rn64_deref, sd128], [sd128, rn64_deref ]) - -# load/store (register) p.728 - -aarch64op("ldstrb",[bs('00', fname="size"), bs('111'), bs('0'), bs('00'), bs('0'), bs_ldst_b_name, bs('1'), rm_ext2, option, shiftb, bs('10'), rn64_v, rt32], [rt32, rm_ext2]) - -aarch64op("ldstrh",[bs('01', fname="size"), bs('111'), bs('0'), bs('00'), bs('0'), bs_ldst_h_name, bs('1'), rm_ext2, option, shiftb, bs('10'), rn64_v, rt32], [rt32, rm_ext2]) - -aarch64op("ldrsb", [bs('00', fname="size"), bs('111'), bs('0'), bs('00'), bs('1'), sf, bs('1'), rm_ext2, option, shiftb, bs('10'), rn64_v, rt_isf], [rt_isf, rm_ext2]) - -aarch64op("ldrsh", [bs('01', fname="size"), bs('111'), bs('0'), bs('00'), bs('1'), sf, bs('1'), rm_ext2, option, shiftb, bs('10'), rn64_v, rt_isf], [rt_isf, rm_ext2]) - -aarch64op("ldst", [sdsize, bs('111'), bs('1'), bs('00'), bs('0'), bs_ldst_name, bs('1'), rm_ext2, option, shiftb, bs('10'), rn64_v, sd], [sd, rm_ext2]) -aarch64op("ldst", [bs('00', fname="size"), bs('111'), bs('1'), bs('00'), bs('1'), bs_ldst_name, bs('1'), rm_ext2_128, option, shiftb, bs('10'), rn64_v, sd128], [sd128, rm_ext2_128]) - -aarch64op("str", [bs('10', fname="size"), bs('111'), bs('0'), bs('00'), bs('0'), bs_ldst_name, bs('1'), rm_ext2, option, shiftb, bs('10'), rn64_v, rt32], [rt32, rm_ext2]) - -aarch64op("ldrsw", [bs('10', fname="size"), bs('111'), bs('0'), bs('00'), bs('10'), bs('1'), rm_ext2, option, shiftb, bs('10'), rn64_v, rt64], [rt64, rm_ext2]) - -aarch64op("ldst", [bs('11', fname="size"), bs('111'), bs('0'), bs('00'), bs('0'), bs_ldst_name, bs('1'), rm_ext2, option, shiftb, bs('10'), rn64_v, rt64], [rt64, rm_ext2]) - -# load/store literal p.137 -aarch64op("ldr", [bs('0'), sf, bs('011'), bs('0'), bs('00'), offs19pc, rt], [rt, offs19pc]) -aarch64op("ldrsw", [bs('10'), bs('011'), bs('0'), bs('00'), offs19pc, rt64], [rt64, offs19pc]) - -# load/store simd literal p.142 -aarch64op("ldr", [sdsize, bs('011'), bs('1'), bs('00'), offs19pc, sd1], [sd1, offs19pc]) - - -# move wide p.203 -movwide_name = {'MOVN': 0b00, 'MOVZ': 0b10} -bs_movwide_name = bs_name(l=2, name=movwide_name) -# mov wide (imm) -aarch64op("mov", [sf, bs_movwide_name, bs('100101'), hw, imm16_hw, rd], [rd, imm16_hw]) -aarch64op("movk", [sf, bs('11'), bs('100101'), hw, imm16_hw_sc, rd], [rd, imm16_hw_sc]) - -# stp/ldp p.139 -ldstp_name = {'STP': 0b0, 'LDP': 0b1} -bs_ldstp_name = bs_name(l=1, name=ldstp_name) -aarch64op("ldstp", [sf, bs('0'), bs('101'), bs('0'), bs('0'), post_pre, bs('1'), bs_ldstp_name, simm7, rt2, rn64_deref_sf, rt], [rt, rt2, rn64_deref_sf]) -aarch64op("ldstp", [sf, bs('0'), bs('101'), bs('0'), bs('0'), bs('1'), bs('0'), bs_ldstp_name, simm7, rt2, rn64_deref_sf, rt], [rt, rt2, rn64_deref_sf]) - -aarch64op("ldstp", [sdsize, bs('101'), bs('1'), bs('0'), post_pre, bs('1'), bs_ldstp_name, uimm7, sd2, rn64_deref_sd, sd1], [sd1, sd2, rn64_deref_sd]) -aarch64op("ldstp", [sdsize, bs('101'), bs('1'), bs('0'), bs('1'), bs('0'), bs_ldstp_name, uimm7, sd2, rn64_deref_sd, sd1], [sd1, sd2, rn64_deref_sd]) - - -# data process p.207 -datap0_name = {'RBIT': 0b000000, 'REV16': 0b000001, - 'REV': 0b000010, - 'CLZ': 0b000100, 'CLS': 0b000101} -bs_datap0_name = bs_name(l=6, name=datap0_name) -aarch64op("ldstp", [bs('0', fname='sf'), bs('1'), modf, bs('11010110'), bs('00000'), bs_datap0_name, rn, rd]) -datap1_name = {'RBIT': 0b000000, 'REV16': 0b000001, - 'REV32': 0b000010, 'REV': 0b000011, - 'CLZ': 0b000100, 'CLS': 0b000101} -bs_datap1_name = bs_name(l=6, name=datap1_name) -aarch64op("ldstp", [bs('1', fname='sf'), bs('1'), modf, bs('11010110'), bs('00000'), bs_datap1_name, rn, rd]) - - -# conditional branch p.132 -aarch64op("b.", [bs('0101010'), bs('0'), offs19, bs('0'), bcond], [offs19]) -aarch64op("cbnz", [sf, bs('011010'), bs('1'), offs19, rt], [rt, offs19]) -aarch64op("cbz", [sf, bs('011010'), bs('0'), offs19, rt], [rt, offs19]) -aarch64op("tbnz", [sf, bs('011011'), bs('1'), b40, offs14, rt], [rt, b40, offs14]) -aarch64op("tbz", [sf, bs('011011'), bs('0'), b40, offs14, rt], [rt, b40, offs14]) - - -# fmov register p.160 -aarch64op("fmov", [bs('000'), bs('11110'), bs('0'), sdsize1, bs('1'), bs('0000'), bs('00'), bs('10000'), sdn_32_64, sdd_32_64], [sdd_32_64, sdn_32_64]) -# fmov scalar imm p.160 -aarch64op("fmov", [bs('000'), bs('11110'), bs('0'), sdsize1, bs('1'), uimm8, bs('100'), bs('00000'), sdd_32_64], [sdd_32_64, uimm8]) -# floating point comparison p.164 -aarch64op("fcmp", [bs('000'), bs('11110'), bs('0'), sdsize1, bs('1'), sdm_32_64_zero, bs('00'), bs('1000'), sdn_32_64, bs('0'), opc, bs('000')], [sdn_32_64, sdm_32_64_zero]) -aarch64op("fcmpe", [bs('000'), bs('11110'), bs('0'), sdsize1, bs('1'), sdm_32_64_zero, bs('00'), bs('1000'), sdn_32_64, bs('1'), opc, bs('000')], [sdn_32_64, sdm_32_64_zero]) -# floating point convert p.161 -aarch64op("fcvtas",[sf, bs('00'), bs('11110'), bs('0'), sdsize1, bs('1'), bs('00'), bs('100'), bs('000000'), sdn_32_64, rd], [rd, sdn_32_64]) -aarch64op("fcvtzu",[sf, bs('00'), bs('11110'), bs('0'), sdsize1, bs('1'), bs('11'), bs('001'), bs('000000'), sdn_32_64, rd], [rd, sdn_32_64]) -aarch64op("fcvtzs",[sf, bs('00'), bs('11110'), bs('0'), sdsize1, bs('1'), bs('11'), bs('000'), bs('000000'), sdn_32_64, rd], [rd, sdn_32_64]) - -aarch64op("fcvt", [bs('000'), bs('11110'), bs('11'), bs('1'), bs('0001'), bs('00'), bs('10000'), sn16, sd32], [sd32, sn16]) -aarch64op("fcvt", [bs('000'), bs('11110'), bs('11'), bs('1'), bs('0001'), bs('01'), bs('10000'), sn16, sd64], [sd64, sn16]) -aarch64op("fcvt", [bs('000'), bs('11110'), bs('00'), bs('1'), bs('0001'), bs('11'), bs('10000'), sn32, sd16], [sd16, sn32]) -aarch64op("fcvt", [bs('000'), bs('11110'), bs('00'), bs('1'), bs('0001'), bs('01'), bs('10000'), sn32, sd64], [sd64, sn32]) -aarch64op("fcvt", [bs('000'), bs('11110'), bs('01'), bs('1'), bs('0001'), bs('11'), bs('10000'), sn64, sd16], [sd16, sn64]) -aarch64op("fcvt", [bs('000'), bs('11110'), bs('01'), bs('1'), bs('0001'), bs('00'), bs('10000'), sn64, sd32], [sd32, sn64]) - - - -swapargs = bs_swapargs(l=1, fname="swap", mn_mod=list(range(1 << 1))) - -aarch64op("fmov", [bs('0'), bs('00'), bs('11110'), bs('00'), bs('1'), bs('00'), bs('110'), bs('000000'), sn32, rd32], [rd32, sn32]) -aarch64op("fmov", [bs('0'), bs('00'), bs('11110'), bs('00'), bs('1'), bs('00'), bs('111'), bs('000000'), rn32, sd32], [sd32, rn32]) -aarch64op("fmov", [bs('1'), bs('00'), bs('11110'), bs('00'), bs('1'), bs('00'), bs('110'), bs('000000'), sd32, rd32], [rd32, sd32]) -aarch64op("fmov", [bs('1'), bs('00'), bs('11110'), bs('01'), bs('1'), bs('00'), bs('111'), bs('000000'), rd64, sd64], [sd64, rd64]) -aarch64op("fmov", [bs('1'), bs('00'), bs('11110'), bs('01'), bs('1'), bs('00'), bs('110'), bs('000000'), sd64, rd64], [rd64, sd64]) - - - -# floating point arith p.163 -aarch64op("fsub", [bs('0'), bs('00'), bs('11110'), bs('0'), sdsize1, bs('1'), sdm_32_64, bs('001'), bs('1'), bs('10'), sdn_32_64, sdd_32_64], [sdd_32_64, sdn_32_64, sdm_32_64]) -aarch64op("fadd", [bs('0'), bs('00'), bs('11110'), bs('0'), sdsize1, bs('1'), sdm_32_64, bs('001'), bs('0'), bs('10'), sdn_32_64, sdd_32_64], [sdd_32_64, sdn_32_64, sdm_32_64]) -aarch64op("fdiv", [bs('0'), bs('00'), bs('11110'), bs('0'), sdsize1, bs('1'), sdm_32_64, bs('000'), bs('1'), bs('10'), sdn_32_64, sdd_32_64], [sdd_32_64, sdn_32_64, sdm_32_64]) -aarch64op("fmul", [bs('0'), bs('00'), bs('11110'), bs('0'), sdsize1, bs('1'), sdm_32_64, bs('000'), bs('0'), bs('10'), sdn_32_64, sdd_32_64], [sdd_32_64, sdn_32_64, sdm_32_64]) -aarch64op("fnmul", [bs('0'), bs('00'), bs('11110'), bs('0'), sdsize1, bs('1'), sdm_32_64, bs('100'), bs('0'), bs('10'), sdn_32_64, sdd_32_64], [sdd_32_64, sdn_32_64, sdm_32_64]) - -aarch64op("fabs", [bs('0'), bs('00'), bs('11110'), bs('0'), sdsize1, bs('1'), bs('0000'), bs('01'), bs('10000'), sdn_32_64, sdd_32_64], [sdd_32_64, sdn_32_64]) -aarch64op("fneg", [bs('0'), bs('00'), bs('11110'), bs('0'), sdsize1, bs('1'), bs('0000'), bs('10'), bs('10000'), sdn_32_64, sdd_32_64], [sdd_32_64, sdn_32_64]) -aarch64op("fsqrt", [bs('0'), bs('00'), bs('11110'), bs('0'), sdsize1, bs('1'), bs('0000'), bs('11'), bs('10000'), sdn_32_64, sdd_32_64], [sdd_32_64, sdn_32_64]) - - -# floating point multiply add p.163 -aarch64op("fmadd", [bs('0'), bs('00'), bs('11111'), bs('0'), sdsize1, bs('0'), sdm_32_64, bs('0'), sda_32_64, sdn_32_64, sdd_32_64], [sdd_32_64, sdn_32_64, sdm_32_64, sda_32_64]) -aarch64op("fmsub", [bs('0'), bs('00'), bs('11111'), bs('0'), sdsize1, bs('0'), sdm_32_64, bs('1'), sda_32_64, sdn_32_64, sdd_32_64], [sdd_32_64, sdn_32_64, sdm_32_64, sda_32_64]) -aarch64op("fnmadd",[bs('0'), bs('00'), bs('11111'), bs('0'), sdsize1, bs('1'), sdm_32_64, bs('0'), sda_32_64, sdn_32_64, sdd_32_64], [sdd_32_64, sdn_32_64, sdm_32_64, sda_32_64]) -aarch64op("fnmsub",[bs('0'), bs('00'), bs('11111'), bs('0'), sdsize1, bs('1'), sdm_32_64, bs('1'), sda_32_64, sdn_32_64, sdd_32_64], [sdd_32_64, sdn_32_64, sdm_32_64, sda_32_64]) - -# conversion float integer p.235 -aarch64op("scvtf", [sf, bs('0'), bs('0'), bs('11110'), bs('0'), sdsize1, bs('1'), bs('00'), bs('010'), bs('000000'), rn, sdd_32_64], [sdd_32_64, rn]) -aarch64op("ucvtf", [sf, bs('0'), bs('0'), bs('11110'), bs('0'), sdsize1, bs('1'), bs('00'), bs('011'), bs('000000'), rn, sdd_32_64], [sdd_32_64, rn]) - - - -# conditional select p.158 -aarch64op("csel", [sf, bs('0'), bs('0'), bs('11010100'), rmz, cond_arg, bs('00'), rnz, rd], [rd, rnz, rmz, cond_arg]) -aarch64op("csinc", [sf, bs('0'), bs('0'), bs('11010100'), rmz, cond_arg, bs('01'), rnz, rd], [rd, rnz, rmz, cond_arg]) -aarch64op("csinv", [sf, bs('1'), bs('0'), bs('11010100'), rmz, cond_arg, bs('00'), rnz, rd], [rd, rnz, rmz, cond_arg]) -aarch64op("csneg", [sf, bs('1'), bs('0'), bs('11010100'), rmz, cond_arg, bs('01'), rnz, rd], [rd, rnz, rmz, cond_arg]) -aarch64op("cset", [sf, bs('0'), bs('0'), bs('11010100'), bs('11111'), cond_inv_arg, bs('01'), bs('11111'), rd], [rd, cond_inv_arg], alias=True) -aarch64op("csetm", [sf, bs('1'), bs('0'), bs('11010100'), bs('11111'), cond_inv_arg, bs('00'), bs('11111'), rd], [rd, cond_inv_arg], alias=True) - - -# multiply p.156 -aarch64op("madd", [sf, bs('00'), bs('11011'), bs('000'), rm, bs('0'), ra, rn, rd], [rd, rn, rm, ra]) -aarch64op("msub", [sf, bs('00'), bs('11011'), bs('000'), rm, bs('1'), ra, rn, rd], [rd, rn, rm, ra]) - -aarch64op("umulh", [bs('1'), bs('00'), bs('11011'), bs('110'), rm64, bs('0'), bs('11111'), rn64, rd64], [rd64, rn64, rm64]) -aarch64op("smulh", [bs('1'), bs('00'), bs('11011'), bs('010'), rm64, bs('0'), bs('11111'), rn64, rd64], [rd64, rn64, rm64]) -aarch64op("umsubh",[bs('1'), bs('00'), bs('11011'), bs('101'), rm32, bs('1'), ra64, rn32, rd64], [rd64, rn32, rm32, ra64]) - - -aarch64op("smaddl",[bs('1'), bs('00'), bs('11011'), bs('001'), rm32, bs('0'), ra64, rn32, rd64], [rd64, rn32, rm32, ra64]) -aarch64op("umaddl",[bs('1'), bs('00'), bs('11011'), bs('101'), rm32, bs('0'), ra64, rn32, rd64], [rd64, rn32, rm32, ra64]) - -aarch64op("smsubl",[bs('1'), bs('00'), bs('11011'), bs('001'), rm32, bs('1'), ra64, rn32, rd64], [rd64, rn32, rm32, ra64]) -aarch64op("umsubl",[bs('1'), bs('00'), bs('11011'), bs('101'), rm32, bs('1'), ra64, rn32, rd64], [rd64, rn32, rm32, ra64]) - -# division p.156 -aarch64op("sdiv", [sf, bs('0'), bs('0'), bs('11010110'), rm, bs('00001'), bs('1'), rn, rd], [rd, rn, rm]) -aarch64op("udiv", [sf, bs('0'), bs('0'), bs('11010110'), rm, bs('00001'), bs('0'), rn, rd], [rd, rn, rm]) - - -# extract register p.150 -aarch64op("extr", [sf, bs('00100111'), bs(l=1, cls=(aarch64_eq,), ref="sf"), bs('0'), rm, simm6, rn, rd], [rd, rn, rm, simm6]) - -# shift reg p.155 -shiftr_name = {'LSL': 0b00, 'LSR': 0b01, 'ASR': 0b10, 'ROR': 0b11} -bs_shiftr_name = bs_name(l=2, name=shiftr_name) - -aarch64op("shiftr", [sf, bs('0'), bs('0'), bs('11010110'), rm, bs('0010'), bs_shiftr_name, rn, rd], [rd, rn, rm]) - -# -aarch64op("NOP", [bs('11010101000000110010000000011111')]) - -# exception p.133 -aarch64op("brk", [bs('11010100'), bs('001'), uimm16, bs('000'), bs('00')], [uimm16]) -aarch64op("hlt", [bs('11010100'), bs('010'), uimm16, bs('000'), bs('00')], [uimm16]) -aarch64op("svc", [bs('11010100'), bs('000'), uimm16, bs('000'), bs('01')], [uimm16]) -aarch64op("hvc", [bs('11010100'), bs('000'), uimm16, bs('000'), bs('10')], [uimm16]) -aarch64op("smc", [bs('11010100'), bs('000'), uimm16, bs('000'), bs('11')], [uimm16]) - -# msr p.631 -msr_name = {'MSR': 0b0, 'MRS': 0b1} -bs_msr_name = bs_name(l=1, name=msr_name) -aarch64op("mrs", [bs('1101010100'), bs('1'), bs('1'), bs('1'), op1, crn, crm, op2, rt64], [rt64, op1, crn, crm, op2]) -aarch64op("msr", [bs('1101010100'), bs('0'), bs('1'), bs('1'), op1, crn, crm, op2, rt64], [op1, crn, crm, op2, rt64]) - -# load/store exclusive p.140 -aarch64op("stxr", [bs('1'), sf, bs('001000'), bs('0'), bs('0'), bs('0'), rs32, bs('0'), bs('11111'), rn64_deref_nooff, rt], [rs32, rt, rn64_deref_nooff]) -aarch64op("ldxr", [bs('1'), sf, bs('001000'), bs('0'), bs('1'), bs('0'), bs('11111'), bs('0'), bs('11111'), rn64_deref_nooff, rt], [rt, rn64_deref_nooff]) - - -aarch64op("stxrb", [bs('0'), bs('0'), bs('001000'), bs('0'), bs('0'), bs('0'), rs32, bs('0'), bs('11111'), rn64_deref_nooff, rt32], [rs32, rt32, rn64_deref_nooff]) -aarch64op("ldxrb", [bs('0'), bs('0'), bs('001000'), bs('0'), bs('1'), bs('0'), bs('11111'), bs('0'), bs('11111'), rn64_deref_nooff, rt32], [rt32, rn64_deref_nooff]) - -aarch64op("stxrb", [bs('0'), bs('1'), bs('001000'), bs('0'), bs('0'), bs('0'), rs32, bs('0'), bs('11111'), rn64_deref_nooff, rt32], [rs32, rt32, rn64_deref_nooff]) -aarch64op("ldxrh", [bs('0'), bs('1'), bs('001000'), bs('0'), bs('1'), bs('0'), bs('11111'), bs('0'), bs('11111'), rn64_deref_nooff, rt32], [rt32, rn64_deref_nooff]) - -aarch64op("stxp", [bs('1'), sf, bs('001000'), bs('0'), bs('0'), bs('1'), rs32, bs('0'), rt2, rn64_deref_nooff, rt], [rs32, rt, rt2, rn64_deref_nooff]) -aarch64op("ldxp", [bs('1'), sf, bs('001000'), bs('0'), bs('1'), bs('1'), bs('11111'), bs('0'), rt2, rn64_deref_nooff, rt], [rt, rt2, rn64_deref_nooff]) - -# load acquire/store release p.141 -aarch64op("ldar", [bs('1'), sf, bs('001000'), bs('1'), bs('1'), bs('0'), bs('11111'), bs('1'), bs('11111'), rn64_deref_nooff, rt], [rt, rn64_deref_nooff]) -aarch64op("ldarb",[bs('0'), bs('0'), bs('001000'), bs('1'), bs('1'), bs('0'), bs('11111'), bs('1'), bs('11111'), rn64_deref_nooff, rt], [rt, rn64_deref_nooff]) -aarch64op("ldarh",[bs('0'), bs('1'), bs('001000'), bs('0'), bs('1'), bs('0'), bs('11111'), bs('1'), bs('11111'), rn64_deref_nooff, rt], [rt, rn64_deref_nooff]) -aarch64op("ldaxp",[bs('1'), sf, bs('001000'), bs('0'), bs('1'), bs('1'), bs('11111'), bs('1'), bs('11111'), rn64_deref_nooff, rt], [rt, rn64_deref_nooff]) -aarch64op("ldaxr",[bs('1'), sf, bs('001000'), bs('0'), bs('1'), bs('0'), bs('11111'), bs('1'), bs('11111'), rn64_deref_nooff, rt], [rt, rn64_deref_nooff]) - -aarch64op("stlxr", [bs('1'), sf, bs('001000'), bs('0'), bs('0'), bs('0'), rs32, bs('1'), bs('11111'), rn64_deref_nooff, rt], [rs32, rt, rn64_deref_nooff]) -aarch64op("stlxrb",[bs('0'), bs('0'), bs('001000'), bs('0'), bs('0'), bs('0'), rs32, bs('1'), bs('11111'), rn64_deref_nooff, rt32], [rs32, rt32, rn64_deref_nooff]) -aarch64op("stlxrh",[bs('0'), bs('1'), bs('001000'), bs('0'), bs('0'), bs('0'), rs32, bs('1'), bs('11111'), rn64_deref_nooff, rt32], [rs32, rt32, rn64_deref_nooff]) -aarch64op("stlxp", [bs('1'), sf, bs('001000'), bs('0'), bs('0'), bs('1'), rs32, bs('1'), rt2, rn64_deref_nooff, rt], [rs32, rt, rt2, rn64_deref_nooff]) - -# barriers p.135 -aarch64op("dsb", [bs('1101010100'), bs('0000110011'), crm, bs('1'), bs('00'), bs('11111')], [crm]) -aarch64op("dmb", [bs('1101010100'), bs('0000110011'), crm, bs('1'), bs('01'), bs('11111')], [crm]) -aarch64op("isb", [bs('1101010100'), bs('0000110011'), crm, bs('1'), bs('10'), bs('11111')], [crm]) - -stacctype = bs_mod_name(l=1, fname='order', mn_mod=['', 'L']) -ltacctype = bs_mod_name(l=1, fname='order', mn_mod=['', 'A']) - - -aarch64op("casp", [bs('0'), sf, bs('001000'), bs('0'), ltacctype, bs('1'), rs, stacctype, bs('11111'), rn64_deref_nooff, rt], [rs, rt, rn64_deref_nooff]) -aarch64op("ldaxrb", [bs('00'), bs('001000'), bs('0'), bs('1'), bs('0'), bs('11111'), bs('1'), bs('11111'), rn64_deref_nooff, rt32], [rt32, rn64_deref_nooff]) diff --git a/miasm2/arch/aarch64/disasm.py b/miasm2/arch/aarch64/disasm.py deleted file mode 100644 index 17eec414..00000000 --- a/miasm2/arch/aarch64/disasm.py +++ /dev/null @@ -1,27 +0,0 @@ -from miasm2.core.asmblock import disasmEngine -from miasm2.arch.aarch64.arch import mn_aarch64 - -cb_aarch64_funcs = [] - - -def cb_aarch64_disasm(*args, **kwargs): - for func in cb_aarch64_funcs: - func(*args, **kwargs) - - -class dis_aarch64b(disasmEngine): - attrib = "b" - def __init__(self, bs=None, **kwargs): - super(dis_aarch64b, self).__init__( - mn_aarch64, self.attrib, bs, - dis_block_callback = cb_aarch64_disasm, - **kwargs) - - -class dis_aarch64l(disasmEngine): - attrib = "l" - def __init__(self, bs=None, **kwargs): - super(dis_aarch64l, self).__init__( - mn_aarch64, self.attrib, bs, - dis_block_callback = cb_aarch64_disasm, - **kwargs) diff --git a/miasm2/arch/aarch64/ira.py b/miasm2/arch/aarch64/ira.py deleted file mode 100644 index a895b549..00000000 --- a/miasm2/arch/aarch64/ira.py +++ /dev/null @@ -1,50 +0,0 @@ -#-*- coding:utf-8 -*- - -from miasm2.ir.analysis import ira -from miasm2.arch.aarch64.sem import ir_aarch64l, ir_aarch64b - - -class ir_a_aarch64l_base(ir_aarch64l, ira): - - def __init__(self, loc_db=None): - ir_aarch64l.__init__(self, loc_db) - self.ret_reg = self.arch.regs.X0 - - -class ir_a_aarch64b_base(ir_aarch64b, ira): - - def __init__(self, loc_db=None): - ir_aarch64b.__init__(self, loc_db) - self.ret_reg = self.arch.regs.X0 - - -class ir_a_aarch64l(ir_a_aarch64l_base): - - def __init__(self, loc_db=None): - ir_a_aarch64l_base.__init__(self, loc_db) - self.ret_reg = self.arch.regs.X0 - - def get_out_regs(self, _): - return set([self.ret_reg, self.sp]) - - def sizeof_char(self): - return 8 - - def sizeof_short(self): - return 16 - - def sizeof_int(self): - return 32 - - def sizeof_long(self): - return 32 - - def sizeof_pointer(self): - return 32 - - -class ir_a_aarch64b(ir_a_aarch64b_base, ir_a_aarch64l): - - def __init__(self, loc_db=None): - ir_a_aarch64b_base.__init__(self, loc_db) - self.ret_reg = self.arch.regs.X0 diff --git a/miasm2/arch/aarch64/jit.py b/miasm2/arch/aarch64/jit.py deleted file mode 100644 index 57b896d3..00000000 --- a/miasm2/arch/aarch64/jit.py +++ /dev/null @@ -1,80 +0,0 @@ -from builtins import range -import logging - -from miasm2.jitter.jitload import Jitter, named_arguments -from miasm2.core.locationdb import LocationDB -from miasm2.core.utils import pck64, upck64 -from miasm2.arch.aarch64.sem import ir_aarch64b, ir_aarch64l - -log = logging.getLogger('jit_aarch64') -hnd = logging.StreamHandler() -hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) -log.addHandler(hnd) -log.setLevel(logging.CRITICAL) - -class jitter_aarch64l(Jitter): - max_reg_arg = 8 - - def __init__(self, *args, **kwargs): - Jitter.__init__(self, ir_aarch64l(LocationDB()), *args, **kwargs) - self.vm.set_little_endian() - - def push_uint64_t(self, value): - self.cpu.SP -= 8 - self.vm.set_mem(self.cpu.SP, pck64(value)) - - def pop_uint64_t(self): - value = self.vm.get_u64(self.cpu.SP) - self.cpu.SP += 8 - return value - - def get_stack_arg(self, index): - return self.vm.get_u64(self.cpu.SP + 8 * index) - - # calling conventions - - @named_arguments - def func_args_stdcall(self, n_args): - args = [] - for i in range(min(n_args, self.max_reg_arg)): - args.append(getattr(self.cpu, 'X%d' % i)) - for i in range(max(0, n_args - self.max_reg_arg)): - args.append(self.get_stack_arg(i)) - ret_ad = self.cpu.LR - return ret_ad, args - - def func_ret_stdcall(self, ret_addr, ret_value=None): - self.pc = self.cpu.PC = ret_addr - if ret_value is not None: - self.cpu.X0 = ret_value - return True - - def get_arg_n_stdcall(self, index): - if index < self.max_reg_arg: - arg = self.cpu.get_gpreg()['X%d' % index] - else: - arg = self.get_stack_arg(index - self.max_reg_arg) - return arg - - def func_prepare_stdcall(self, ret_addr, *args): - for index in range(min(len(args), 4)): - setattr(self.cpu, 'X%d' % index, args[index]) - for index in range(4, len(args)): - self.vm.set_mem(self.cpu.SP + 8 * (index - 4), pck64(args[index])) - self.cpu.LR = ret_addr - - func_args_systemv = func_args_stdcall - func_ret_systemv = func_ret_stdcall - get_arg_n_systemv = get_arg_n_stdcall - func_prepare_systemv = func_prepare_stdcall - - def init_run(self, *args, **kwargs): - Jitter.init_run(self, *args, **kwargs) - self.cpu.PC = self.pc - - -class jitter_aarch64b(jitter_aarch64l): - - def __init__(self, *args, **kwargs): - Jitter.__init__(self, ir_aarch64b(LocationDB()), *args, **kwargs) - self.vm.set_big_endian() diff --git a/miasm2/arch/aarch64/regs.py b/miasm2/arch/aarch64/regs.py deleted file mode 100644 index 7ddcc0b8..00000000 --- a/miasm2/arch/aarch64/regs.py +++ /dev/null @@ -1,120 +0,0 @@ -#-*- coding:utf-8 -*- - -from builtins import range -from miasm2.expression.expression import ExprId -from miasm2.core.cpu import gen_reg, gen_regs - -exception_flags = ExprId('exception_flags', 32) -interrupt_num = ExprId('interrupt_num', 32) - - -gpregs32_str = ["W%d" % i for i in range(0x1f)] + ["WSP"] -gpregs32_expr, gpregs32_init, gpregs32_info = gen_regs( - gpregs32_str, globals(), 32) - -gpregs64_str = ["X%d" % i for i in range(0x1E)] + ["LR", "SP"] -gpregs64_expr, gpregs64_init, gpregs64_info = gen_regs( - gpregs64_str, globals(), 64) - - -gpregsz32_str = ["W%d" % i for i in range(0x1f)] + ["WZR"] -gpregsz32_expr, gpregsz32_init, gpregsz32_info = gen_regs( - gpregsz32_str, globals(), 32) - -gpregsz64_str = ["X%d" % i for i in range(0x1e)] + ["LR", "XZR"] -gpregsz64_expr, gpregsz64_init, gpregsz64_info = gen_regs( - gpregsz64_str, globals(), 64) - -cr_str = ["c%d" % i for i in range(0xf)] -cr_expr, cr_init, cr_info = gen_regs(cr_str, globals(), 32) - - -simd08_str = ["B%d" % i for i in range(0x20)] -simd08_expr, simd08_init, simd08_info = gen_regs(simd08_str, globals(), 8) - -simd16_str = ["H%d" % i for i in range(0x20)] -simd16_expr, simd16_init, simd16_info = gen_regs(simd16_str, globals(), 16) - -simd32_str = ["S%d" % i for i in range(0x20)] -simd32_expr, simd32_init, simd32_info = gen_regs(simd32_str, globals(), 32) - -simd64_str = ["D%d" % i for i in range(0x20)] -simd64_expr, simd64_init, simd64_info = gen_regs(simd64_str, globals(), 64) - -simd128_str = ["Q%d" % i for i in range(0x20)] -simd128_expr, simd128_init, simd128_info = gen_regs( - simd128_str, globals(), 128) - - -PC, _ = gen_reg("PC", 64) -WZR, _ = gen_reg("WZR", 32) -XZR, _ = gen_reg("XZR", 64) - -PC_init = ExprId("PC_init", 64) -WZR_init = ExprId("WZR_init", 32) -XZR_init = ExprId("XZR_init", 64) - -reg_zf = 'zf' -reg_nf = 'nf' -reg_of = 'of' -reg_cf = 'cf' - -zf = ExprId(reg_zf, size=1) -nf = ExprId(reg_nf, size=1) -of = ExprId(reg_of, size=1) -cf = ExprId(reg_cf, size=1) - -zf_init = ExprId("zf_init", size=1) -nf_init = ExprId("nf_init", size=1) -of_init = ExprId("of_init", size=1) -cf_init = ExprId("cf_init", size=1) - - -all_regs_ids = [ - B0, B1, B2, B3, B4, B5, B6, B7, B8, B9, B10, B11, B12, B13, B14, B15, B16, - B17, B18, B19, B20, B21, B22, B23, B24, B25, B26, B27, B28, B29, B30, B31, - - H0, H1, H2, H3, H4, H5, H6, H7, H8, H9, H10, H11, H12, H13, H14, H15, H16, - H17, H18, H19, H20, H21, H22, H23, H24, H25, H26, H27, H28, H29, H30, H31, - - S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15, S16, - S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31, - - D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15, D16, - D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, D31, - - Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, Q16, - Q17, Q18, Q19, Q20, Q21, Q22, Q23, Q24, Q25, Q26, Q27, Q28, Q29, Q30, Q31, - - W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, W12, W13, W14, W15, W16, - W17, W18, W19, W20, W21, W22, W23, W24, W25, W26, W27, W28, W29, W30, WSP, - - X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X12, X13, X14, X15, X16, - X17, X18, X19, X20, X21, X22, X23, X24, X25, X26, X27, X28, X29, LR, SP, - - exception_flags, - interrupt_num, - PC, - WZR, - XZR, - zf, nf, of, cf, - -] - - -all_regs_ids_no_alias = all_regs_ids - -attrib_to_regs = { - 'l': all_regs_ids_no_alias, - 'b': all_regs_ids_no_alias, -} - -all_regs_ids_byname = dict([(x.name, x) for x in all_regs_ids]) - -all_regs_ids_init = [ExprId("%s_init" % x.name, x.size) for x in all_regs_ids] - -regs_init = {} -for i, r in enumerate(all_regs_ids): - regs_init[r] = all_regs_ids_init[i] - -regs_flt_expr = [] diff --git a/miasm2/arch/aarch64/sem.py b/miasm2/arch/aarch64/sem.py deleted file mode 100644 index a840f6b6..00000000 --- a/miasm2/arch/aarch64/sem.py +++ /dev/null @@ -1,1502 +0,0 @@ -from builtins import range -from future.utils import viewitems - -from miasm2.expression.expression import ExprId, ExprInt, ExprLoc, ExprMem, \ - ExprCond, ExprCompose, ExprOp, ExprAssign -from miasm2.ir.ir import IntermediateRepresentation, IRBlock, AssignBlock -from miasm2.arch.aarch64.arch import mn_aarch64, conds_expr, replace_regs -from miasm2.arch.aarch64.regs import * -from miasm2.core.sembuilder import SemBuilder -from miasm2.jitter.csts import EXCEPT_DIV_BY_ZERO, EXCEPT_INT_XX - - -# CPSR: N Z C V - - -def update_flag_zf(a): - return [ExprAssign(zf, ExprOp("FLAG_EQ", a))] - - -def update_flag_zf_eq(a, b): - return [ExprAssign(zf, ExprOp("FLAG_EQ_CMP", a, b))] - - -def update_flag_nf(arg): - return [ - ExprAssign( - nf, - ExprOp("FLAG_SIGN_SUB", arg, ExprInt(0, arg.size)) - ) - ] - - -def update_flag_zn(a): - e = [] - e += update_flag_zf(a) - e += update_flag_nf(a) - return e - - -def check_ops_msb(a, b, c): - if not a or not b or not c or a != b or a != c: - raise ValueError('bad ops size %s %s %s' % (a, b, c)) - - -def update_flag_add_cf(op1, op2): - "Compute cf in @op1 + @op2" - return [ExprAssign(cf, ExprOp("FLAG_ADD_CF", op1, op2))] - - -def update_flag_add_of(op1, op2): - "Compute of in @op1 + @op2" - return [ExprAssign(of, ExprOp("FLAG_ADD_OF", op1, op2))] - - -def update_flag_sub_cf(op1, op2): - "Compote CF in @op1 - @op2" - return [ExprAssign(cf, ExprOp("FLAG_SUB_CF", op1, op2) ^ ExprInt(1, 1))] - - -def update_flag_sub_of(op1, op2): - "Compote OF in @op1 - @op2" - return [ExprAssign(of, ExprOp("FLAG_SUB_OF", op1, op2))] - - -def update_flag_arith_add_co(arg1, arg2): - e = [] - e += update_flag_add_cf(arg1, arg2) - e += update_flag_add_of(arg1, arg2) - return e - - -def update_flag_arith_add_zn(arg1, arg2): - """ - Compute zf and nf flags for (arg1 + arg2) - """ - e = [] - e += update_flag_zf_eq(arg1, -arg2) - e += [ExprAssign(nf, ExprOp("FLAG_SIGN_SUB", arg1, -arg2))] - return e - - -def update_flag_arith_sub_co(arg1, arg2): - """ - Compute cf and of flags for (arg1 - arg2) - """ - e = [] - e += update_flag_sub_cf(arg1, arg2) - e += update_flag_sub_of(arg1, arg2) - return e - - -def update_flag_arith_sub_zn(arg1, arg2): - """ - Compute zf and nf flags for (arg1 - arg2) - """ - e = [] - e += update_flag_zf_eq(arg1, arg2) - e += [ExprAssign(nf, ExprOp("FLAG_SIGN_SUB", arg1, arg2))] - return e - - - - -def update_flag_zfaddwc_eq(arg1, arg2, arg3): - return [ExprAssign(zf, ExprOp("FLAG_EQ_ADDWC", arg1, arg2, arg3))] - -def update_flag_zfsubwc_eq(arg1, arg2, arg3): - return [ExprAssign(zf, ExprOp("FLAG_EQ_SUBWC", arg1, arg2, arg3))] - - -def update_flag_arith_addwc_zn(arg1, arg2, arg3): - """ - Compute znp flags for (arg1 + arg2 + cf) - """ - e = [] - e += update_flag_zfaddwc_eq(arg1, arg2, arg3) - e += [ExprAssign(nf, ExprOp("FLAG_SIGN_ADDWC", arg1, arg2, arg3))] - return e - - -def update_flag_arith_subwc_zn(arg1, arg2, arg3): - """ - Compute znp flags for (arg1 - (arg2 + cf)) - """ - e = [] - e += update_flag_zfsubwc_eq(arg1, arg2, arg3) - e += [ExprAssign(nf, ExprOp("FLAG_SIGN_SUBWC", arg1, arg2, arg3))] - return e - - -def update_flag_addwc_cf(op1, op2, op3): - "Compute cf in @res = @op1 + @op2 + @op3" - return [ExprAssign(cf, ExprOp("FLAG_ADDWC_CF", op1, op2, op3))] - - -def update_flag_addwc_of(op1, op2, op3): - "Compute of in @res = @op1 + @op2 + @op3" - return [ExprAssign(of, ExprOp("FLAG_ADDWC_OF", op1, op2, op3))] - - -def update_flag_arith_addwc_co(arg1, arg2, arg3): - e = [] - e += update_flag_addwc_cf(arg1, arg2, arg3) - e += update_flag_addwc_of(arg1, arg2, arg3) - return e - - - -def update_flag_subwc_cf(op1, op2, op3): - "Compute cf in @res = @op1 + @op2 + @op3" - return [ExprAssign(cf, ExprOp("FLAG_SUBWC_CF", op1, op2, op3) ^ ExprInt(1, 1))] - - -def update_flag_subwc_of(op1, op2, op3): - "Compute of in @res = @op1 + @op2 + @op3" - return [ExprAssign(of, ExprOp("FLAG_SUBWC_OF", op1, op2, op3))] - - -def update_flag_arith_subwc_co(arg1, arg2, arg3): - e = [] - e += update_flag_subwc_cf(arg1, arg2, arg3) - e += update_flag_subwc_of(arg1, arg2, arg3) - return e - - -cond2expr = {'EQ': ExprOp("CC_EQ", zf), - 'NE': ExprOp("CC_NE", zf), - 'CS': ExprOp("CC_U>=", cf ^ ExprInt(1, 1)), # inv cf - 'CC': ExprOp("CC_U<", cf ^ ExprInt(1, 1)), # inv cf - 'MI': ExprOp("CC_NEG", nf), - 'PL': ExprOp("CC_POS", nf), - 'VS': ExprOp("CC_sOVR", of), - 'VC': ExprOp("CC_sNOOVR", of), - 'HI': ExprOp("CC_U>", cf ^ ExprInt(1, 1), zf), # inv cf - 'LS': ExprOp("CC_U<=", cf ^ ExprInt(1, 1), zf), # inv cf - 'GE': ExprOp("CC_S>=", nf, of), - 'LT': ExprOp("CC_S<", nf, of), - 'GT': ExprOp("CC_S>", nf, of, zf), - 'LE': ExprOp("CC_S<=", nf, of, zf), - 'AL': ExprInt(1, 1), - 'NV': ExprInt(0, 1) - } - - -def extend_arg(dst, arg): - if not isinstance(arg, ExprOp): - return arg - - op, (reg, shift) = arg.op, arg.args - if op == "SXTB": - base = reg[:8].signExtend(dst.size) - op = "<<" - elif op == "SXTH": - base = reg[:16].signExtend(dst.size) - op = "<<" - elif op == 'SXTW': - base = reg[:32].signExtend(dst.size) - op = "<<" - elif op == "SXTX": - base = reg.signExtend(dst.size) - op = "<<" - - elif op == "UXTB": - base = reg[:8].zeroExtend(dst.size) - op = "<<" - elif op == "UXTH": - base = reg[:16].zeroExtend(dst.size) - op = "<<" - elif op == 'UXTW': - base = reg[:32].zeroExtend(dst.size) - op = "<<" - elif op == "UXTX": - base = reg.zeroExtend(dst.size) - op = "<<" - - elif op in ['<<', '>>', '<>', '<<<', '>>>']: - base = reg.zeroExtend(dst.size) - else: - raise NotImplementedError('Unknown shifter operator') - - out = ExprOp(op, base, (shift.zeroExtend(dst.size) - & ExprInt(dst.size - 1, dst.size))) - return out - - -# SemBuilder context -ctx = {"PC": PC, - "LR": LR, - "nf": nf, - "zf": zf, - "cf": cf, - "of": of, - "cond2expr": cond2expr, - "extend_arg": extend_arg, - "ExprId":ExprId, - "exception_flags": exception_flags, - "interrupt_num": interrupt_num, - "EXCEPT_DIV_BY_ZERO": EXCEPT_DIV_BY_ZERO, - "EXCEPT_INT_XX": EXCEPT_INT_XX, - } - -sbuild = SemBuilder(ctx) - - -# instruction definition ############## - -@sbuild.parse -def add(arg1, arg2, arg3): - arg1 = arg2 + extend_arg(arg2, arg3) - - -@sbuild.parse -def sub(arg1, arg2, arg3): - arg1 = arg2 - extend_arg(arg2, arg3) - - -@sbuild.parse -def neg(arg1, arg2): - arg1 = - arg2 - - -@sbuild.parse -def and_l(arg1, arg2, arg3): - arg1 = arg2 & extend_arg(arg2, arg3) - - -@sbuild.parse -def eor(arg1, arg2, arg3): - arg1 = arg2 ^ extend_arg(arg2, arg3) - - -@sbuild.parse -def eon(arg1, arg2, arg3): - arg1 = arg2 ^ (~extend_arg(arg2, arg3)) - - -@sbuild.parse -def orr(arg1, arg2, arg3): - arg1 = arg2 | extend_arg(arg2, arg3) - - -@sbuild.parse -def orn(arg1, arg2, arg3): - arg1 = arg2 | (~extend_arg(arg2, arg3)) - - -@sbuild.parse -def bic(arg1, arg2, arg3): - arg1 = arg2 & (~extend_arg(arg2, arg3)) - - -def bics(ir, instr, arg1, arg2, arg3): - e = [] - tmp1, tmp2 = arg2, (~extend_arg(arg2, arg3)) - res = tmp1 & tmp2 - - e += [ExprAssign(zf, ExprOp('FLAG_EQ_AND', tmp1, tmp2))] - e += update_flag_nf(res) - - e.append(ExprAssign(arg1, res)) - return e, [] - - -@sbuild.parse -def mvn(arg1, arg2): - arg1 = (~extend_arg(arg1, arg2)) - - -def adds(ir, instr, arg1, arg2, arg3): - e = [] - arg3 = extend_arg(arg2, arg3) - res = arg2 + arg3 - - e += update_flag_arith_add_zn(arg2, arg3) - e += update_flag_arith_add_co(arg2, arg3) - - e.append(ExprAssign(arg1, res)) - - return e, [] - - -def subs(ir, instr, arg1, arg2, arg3): - e = [] - arg3 = extend_arg(arg2, arg3) - res = arg2 - arg3 - - - e += update_flag_arith_sub_zn(arg2, arg3) - e += update_flag_arith_sub_co(arg2, arg3) - - e.append(ExprAssign(arg1, res)) - return e, [] - - -def cmp(ir, instr, arg1, arg2): - e = [] - arg2 = extend_arg(arg1, arg2) - - e += update_flag_arith_sub_zn(arg1, arg2) - e += update_flag_arith_sub_co(arg1, arg2) - - return e, [] - - -def cmn(ir, instr, arg1, arg2): - e = [] - arg2 = extend_arg(arg1, arg2) - - e += update_flag_arith_add_zn(arg1, arg2) - e += update_flag_arith_add_co(arg1, arg2) - - return e, [] - - -def ands(ir, instr, arg1, arg2, arg3): - e = [] - arg3 = extend_arg(arg2, arg3) - res = arg2 & arg3 - - e += [ExprAssign(zf, ExprOp('FLAG_EQ_AND', arg2, arg3))] - e += update_flag_nf(res) - - e.append(ExprAssign(arg1, res)) - return e, [] - -def tst(ir, instr, arg1, arg2): - e = [] - arg2 = extend_arg(arg1, arg2) - res = arg1 & arg2 - - e += [ExprAssign(zf, ExprOp('FLAG_EQ_AND', arg1, arg2))] - e += update_flag_nf(res) - - return e, [] - - -@sbuild.parse -def lsl(arg1, arg2, arg3): - arg1 = arg2 << (arg3 & ExprInt(arg3.size - 1, arg3.size)) - - -@sbuild.parse -def lsr(arg1, arg2, arg3): - arg1 = arg2 >> (arg3 & ExprInt(arg3.size - 1, arg3.size)) - - -@sbuild.parse -def asr(arg1, arg2, arg3): - arg1 = ExprOp( - 'a>>', arg2, (arg3 & ExprInt(arg3.size - 1, arg3.size))) - - -@sbuild.parse -def mov(arg1, arg2): - arg1 = arg2 - - -def movk(ir, instr, arg1, arg2): - e = [] - if isinstance(arg2, ExprOp): - assert(arg2.op == 'slice_at' and - isinstance(arg2.args[0], ExprInt) and - isinstance(arg2.args[1], ExprInt)) - value, shift = int(arg2.args[0].arg), int(arg2.args[1]) - e.append( - ExprAssign(arg1[shift:shift + 16], ExprInt(value, 16))) - else: - e.append(ExprAssign(arg1[:16], ExprInt(int(arg2), 16))) - - return e, [] - - -@sbuild.parse -def movz(arg1, arg2): - arg1 = arg2 - - -@sbuild.parse -def movn(arg1, arg2): - arg1 = ~arg2 - - -@sbuild.parse -def bl(arg1): - PC = arg1 - ir.IRDst = arg1 - LR = ExprInt(instr.offset + instr.l, 64) - -@sbuild.parse -def csel(arg1, arg2, arg3, arg4): - cond_expr = cond2expr[arg4.name] - arg1 = arg2 if cond_expr else arg3 - -def ccmp(ir, instr, arg1, arg2, arg3, arg4): - e = [] - if(arg2.is_int()): - arg2=ExprInt(arg2.arg.arg,arg1.size) - default_nf = arg3[0:1] - default_zf = arg3[1:2] - default_cf = arg3[2:3] - default_of = arg3[3:4] - cond_expr = cond2expr[arg4.name] - res = arg1 - arg2 - new_nf = nf - new_zf = update_flag_zf(res)[0].src - new_cf = update_flag_sub_cf(arg1, arg2)[0].src - new_of = update_flag_sub_of(arg1, arg2)[0].src - - e.append(ExprAssign(nf, ExprCond(cond_expr, - new_nf, - default_nf))) - e.append(ExprAssign(zf, ExprCond(cond_expr, - new_zf, - default_zf))) - e.append(ExprAssign(cf, ExprCond(cond_expr, - new_cf, - default_cf))) - e.append(ExprAssign(of, ExprCond(cond_expr, - new_of, - default_of))) - return e, [] - - -def csinc(ir, instr, arg1, arg2, arg3, arg4): - e = [] - cond_expr = cond2expr[arg4.name] - e.append( - ExprAssign( - arg1, - ExprCond( - cond_expr, - arg2, - arg3 + ExprInt(1, arg3.size) - ) - ) - ) - return e, [] - - -def csinv(ir, instr, arg1, arg2, arg3, arg4): - e = [] - cond_expr = cond2expr[arg4.name] - e.append( - ExprAssign( - arg1, - ExprCond( - cond_expr, - arg2, - ~arg3) - ) - ) - return e, [] - - -def csneg(ir, instr, arg1, arg2, arg3, arg4): - e = [] - cond_expr = cond2expr[arg4.name] - e.append( - ExprAssign( - arg1, - ExprCond( - cond_expr, - arg2, - -arg3) - ) - ) - return e, [] - - -def cset(ir, instr, arg1, arg2): - e = [] - cond_expr = cond2expr[arg2.name] - e.append( - ExprAssign( - arg1, - ExprCond( - cond_expr, - ExprInt(1, arg1.size), - ExprInt(0, arg1.size) - ) - ) - ) - return e, [] - - -def csetm(ir, instr, arg1, arg2): - e = [] - cond_expr = cond2expr[arg2.name] - e.append( - ExprAssign( - arg1, - ExprCond( - cond_expr, - ExprInt(-1, arg1.size), - ExprInt(0, arg1.size) - ) - ) - ) - return e, [] - - -def get_mem_access(mem): - updt = None - if isinstance(mem, ExprOp): - if mem.op == 'preinc': - addr = mem.args[0] + mem.args[1] - elif mem.op == 'segm': - base = mem.args[0] - op, (reg, shift) = mem.args[1].op, mem.args[1].args - if op == 'SXTW': - off = reg.signExtend(base.size) << shift.zeroExtend(base.size) - addr = base + off - elif op == 'UXTW': - off = reg.zeroExtend(base.size) << shift.zeroExtend(base.size) - addr = base + off - elif op == 'LSL': - if isinstance(shift, ExprInt) and int(shift) == 0: - addr = base + reg.zeroExtend(base.size) - else: - addr = base + \ - (reg.zeroExtend(base.size) - << shift.zeroExtend(base.size)) - else: - raise NotImplementedError('bad op') - elif mem.op == "postinc": - addr, off = mem.args - updt = ExprAssign(addr, addr + off) - elif mem.op == "preinc_wb": - base, off = mem.args - addr = base + off - updt = ExprAssign(base, base + off) - else: - raise NotImplementedError('bad op') - else: - raise NotImplementedError('bad op') - return addr, updt - - - -def ldr(ir, instr, arg1, arg2): - e = [] - addr, updt = get_mem_access(arg2) - e.append(ExprAssign(arg1, ExprMem(addr, arg1.size))) - if updt: - e.append(updt) - return e, [] - - -def ldr_size(ir, instr, arg1, arg2, size): - e = [] - addr, updt = get_mem_access(arg2) - e.append( - ExprAssign(arg1, ExprMem(addr, size).zeroExtend(arg1.size))) - if updt: - e.append(updt) - return e, [] - - -def ldrb(ir, instr, arg1, arg2): - return ldr_size(ir, instr, arg1, arg2, 8) - - -def ldrh(ir, instr, arg1, arg2): - return ldr_size(ir, instr, arg1, arg2, 16) - - -def ldrs_size(ir, instr, arg1, arg2, size): - e = [] - addr, updt = get_mem_access(arg2) - e.append( - ExprAssign(arg1, ExprMem(addr, size).signExtend(arg1.size))) - if updt: - e.append(updt) - return e, [] - - -def ldrsb(ir, instr, arg1, arg2): - return ldrs_size(ir, instr, arg1, arg2, 8) - - -def ldrsh(ir, instr, arg1, arg2): - return ldrs_size(ir, instr, arg1, arg2, 16) - - -def ldrsw(ir, instr, arg1, arg2): - return ldrs_size(ir, instr, arg1, arg2, 32) - -def ldaxrb(ir, instr, arg1, arg2): - # TODO XXX no memory lock implemented - assert arg2.is_op('preinc') - assert len(arg2.args) == 1 - ptr = arg2.args[0] - e = [] - e.append(ExprAssign(arg1, ExprMem(ptr, 8).zeroExtend(arg1.size))) - return e, [] - - -def stlxrb(ir, instr, arg1, arg2, arg3): - assert arg3.is_op('preinc') - assert len(arg3.args) == 1 - ptr = arg3.args[0] - e = [] - e.append(ExprAssign(ExprMem(ptr, 8), arg2[:8])) - # TODO XXX here, force update success - e.append(ExprAssign(arg1, ExprInt(0, arg1.size))) - return e, [] - - -def l_str(ir, instr, arg1, arg2): - e = [] - addr, updt = get_mem_access(arg2) - e.append(ExprAssign(ExprMem(addr, arg1.size), arg1)) - if updt: - e.append(updt) - return e, [] - - -def strb(ir, instr, arg1, arg2): - e = [] - addr, updt = get_mem_access(arg2) - e.append(ExprAssign(ExprMem(addr, 8), arg1[:8])) - if updt: - e.append(updt) - return e, [] - - -def strh(ir, instr, arg1, arg2): - e = [] - addr, updt = get_mem_access(arg2) - e.append(ExprAssign(ExprMem(addr, 16), arg1[:16])) - if updt: - e.append(updt) - return e, [] - - -def stp(ir, instr, arg1, arg2, arg3): - e = [] - addr, updt = get_mem_access(arg3) - e.append(ExprAssign(ExprMem(addr, arg1.size), arg1)) - e.append( - ExprAssign(ExprMem(addr + ExprInt(arg1.size // 8, addr.size), arg2.size), arg2)) - if updt: - e.append(updt) - return e, [] - - -def ldp(ir, instr, arg1, arg2, arg3): - e = [] - addr, updt = get_mem_access(arg3) - e.append(ExprAssign(arg1, ExprMem(addr, arg1.size))) - e.append( - ExprAssign(arg2, ExprMem(addr + ExprInt(arg1.size // 8, addr.size), arg2.size))) - if updt: - e.append(updt) - return e, [] - - -def sbfm(ir, instr, arg1, arg2, arg3, arg4): - e = [] - rim, sim = int(arg3.arg), int(arg4) + 1 - if sim > rim: - res = arg2[rim:sim].signExtend(arg1.size) - else: - shift = ExprInt(arg2.size - rim, arg2.size) - res = (arg2[:sim].signExtend(arg1.size) << shift) - e.append(ExprAssign(arg1, res)) - return e, [] - - -def ubfm(ir, instr, arg1, arg2, arg3, arg4): - e = [] - rim, sim = int(arg3.arg), int(arg4) + 1 - if sim != arg1.size - 1 and rim == sim: - # Simple case: lsl - value = int(rim) - assert value < arg1.size - e.append(ExprAssign(arg1, arg2 << (ExprInt(arg1.size - value, arg2.size)))) - return e, [] - if sim == arg1.size: - # Simple case: lsr - value = int(rim) - assert value < arg1.size - e.append(ExprAssign(arg1, arg2 >> (ExprInt(value, arg2.size)))) - return e, [] - - if sim > rim: - res = arg2[rim:sim].zeroExtend(arg1.size) - else: - shift = ExprInt(arg2.size - rim, arg2.size) - res = (arg2[:sim].zeroExtend(arg1.size) << shift) - e.append(ExprAssign(arg1, res)) - return e, [] - -def bfm(ir, instr, arg1, arg2, arg3, arg4): - e = [] - rim, sim = int(arg3.arg), int(arg4) + 1 - if sim > rim: - res = arg2[rim:sim] - e.append(ExprAssign(arg1[:sim-rim], res)) - else: - shift_i = arg2.size - rim - shift = ExprInt(shift_i, arg2.size) - res = arg2[:sim] - e.append(ExprAssign(arg1[shift_i:shift_i+sim], res)) - return e, [] - - - -def mrs(ir, insr, arg1, arg2, arg3, arg4, arg5): - e = [] - if arg2.is_int(3) and arg3.is_id("c4") and arg4.is_id("c2") and arg5.is_int(0): - out = [] - out.append(ExprInt(0x0, 28)) - out.append(of) - out.append(cf) - out.append(zf) - out.append(nf) - e.append(ExprAssign(arg1, ExprCompose(*out).zeroExtend(arg1.size))) - else: - raise NotImplementedError("MRS not implemented") - return e, [] - -def msr(ir, instr, arg1, arg2, arg3, arg4, arg5): - - e = [] - if arg1.is_int(3) and arg2.is_id("c4") and arg3.is_id("c2") and arg4.is_int(0): - e.append(ExprAssign(nf, arg5[31:32])) - e.append(ExprAssign(zf, arg5[30:31])) - e.append(ExprAssign(cf, arg5[29:30])) - e.append(ExprAssign(of, arg5[28:29])) - else: - raise NotImplementedError("MSR not implemented") - return e, [] - - - -def adc(ir, instr, arg1, arg2, arg3): - arg3 = extend_arg(arg2, arg3) - e = [] - r = arg2 + arg3 + cf.zeroExtend(arg3.size) - e.append(ExprAssign(arg1, r)) - return e, [] - - -def adcs(ir, instr, arg1, arg2, arg3): - arg3 = extend_arg(arg2, arg3) - e = [] - r = arg2 + arg3 + cf.zeroExtend(arg3.size) - e.append(ExprAssign(arg1, r)) - e += update_flag_arith_addwc_zn(arg2, arg3, cf) - e += update_flag_arith_addwc_co(arg2, arg3, cf) - return e, [] - - -def sbc(ir, instr, arg1, arg2, arg3): - arg3 = extend_arg(arg2, arg3) - e = [] - r = arg2 - (arg3 + (~cf).zeroExtend(arg3.size)) - e.append(ExprAssign(arg1, r)) - return e, [] - - -def sbcs(ir, instr, arg1, arg2, arg3): - arg3 = extend_arg(arg2, arg3) - e = [] - r = arg2 - (arg3 + (~cf).zeroExtend(arg3.size)) - e.append(ExprAssign(arg1, r)) - e += update_flag_arith_subwc_zn(arg2, arg3, ~cf) - e += update_flag_arith_subwc_co(arg2, arg3, ~cf) - return e, [] - - -@sbuild.parse -def madd(arg1, arg2, arg3, arg4): - arg1 = arg2 * arg3 + arg4 - - -@sbuild.parse -def msub(arg1, arg2, arg3, arg4): - arg1 = arg4 - (arg2 * arg3) - - -@sbuild.parse -def udiv(arg1, arg2, arg3): - if arg3: - arg1 = ExprOp('udiv', arg2, arg3) - else: - exception_flags = ExprInt(EXCEPT_DIV_BY_ZERO, - exception_flags.size) - -@sbuild.parse -def sdiv(arg1, arg2, arg3): - if arg3: - arg1 = ExprOp('sdiv', arg2, arg3) - else: - exception_flags = ExprInt(EXCEPT_DIV_BY_ZERO, - exception_flags.size) - - - -@sbuild.parse -def smaddl(arg1, arg2, arg3, arg4): - arg1 = arg2.signExtend(arg1.size) * arg3.signExtend(arg1.size) + arg4 - - -@sbuild.parse -def cbz(arg1, arg2): - dst = ExprLoc(ir.get_next_loc_key(instr), 64) if arg1 else arg2 - PC = dst - ir.IRDst = dst - - -@sbuild.parse -def cbnz(arg1, arg2): - dst = arg2 if arg1 else ExprLoc(ir.get_next_loc_key(instr), 64) - PC = dst - ir.IRDst = dst - - -@sbuild.parse -def tbz(arg1, arg2, arg3): - bitmask = ExprInt(1, arg1.size) << arg2 - dst = ExprLoc( - ir.get_next_loc_key(instr), - 64 - ) if arg1 & bitmask else arg3 - PC = dst - ir.IRDst = dst - - -@sbuild.parse -def tbnz(arg1, arg2, arg3): - bitmask = ExprInt(1, arg1.size) << arg2 - dst = arg3 if arg1 & bitmask else ExprLoc( - ir.get_next_loc_key(instr), - 64 - ) - PC = dst - ir.IRDst = dst - - -@sbuild.parse -def b_ne(arg1): - cond = cond2expr['NE'] - dst = arg1 if cond else ExprLoc(ir.get_next_loc_key(instr), 64) - PC = dst - ir.IRDst = dst - - -@sbuild.parse -def b_eq(arg1): - cond = cond2expr['EQ'] - dst = arg1 if cond else ExprLoc(ir.get_next_loc_key(instr), 64) - PC = dst - ir.IRDst = dst - - -@sbuild.parse -def b_ge(arg1): - cond = cond2expr['GE'] - dst = arg1 if cond else ExprLoc(ir.get_next_loc_key(instr), 64) - PC = dst - ir.IRDst = dst - - -@sbuild.parse -def b_mi(arg1): - cond = cond2expr['MI'] - dst = arg1 if cond else ExprLoc(ir.get_next_loc_key(instr), 64) - PC = dst - ir.IRDst = dst - - -@sbuild.parse -def b_pl(arg1): - cond = cond2expr['PL'] - dst = arg1 if cond else ExprLoc(ir.get_next_loc_key(instr), 64) - PC = dst - ir.IRDst = dst - - -@sbuild.parse -def b_gt(arg1): - cond = cond2expr['GT'] - dst = arg1 if cond else ExprLoc(ir.get_next_loc_key(instr), 64) - PC = dst - ir.IRDst = dst - - -@sbuild.parse -def b_cc(arg1): - cond = cond2expr['CC'] - dst = arg1 if cond else ExprLoc(ir.get_next_loc_key(instr), 64) - PC = dst - ir.IRDst = dst - - -@sbuild.parse -def b_cs(arg1): - cond = cond2expr['CS'] - dst = arg1 if cond else ExprLoc(ir.get_next_loc_key(instr), 64) - PC = dst - ir.IRDst = dst - - -@sbuild.parse -def b_hi(arg1): - cond = cond2expr['HI'] - dst = arg1 if cond else ExprLoc(ir.get_next_loc_key(instr), 64) - PC = dst - ir.IRDst = dst - - -@sbuild.parse -def b_le(arg1): - cond = cond2expr['LE'] - dst = arg1 if cond else ExprLoc(ir.get_next_loc_key(instr), 64) - PC = dst - ir.IRDst = dst - - -@sbuild.parse -def b_ls(arg1): - cond = cond2expr['LS'] - dst = arg1 if cond else ExprLoc(ir.get_next_loc_key(instr), 64) - PC = dst - ir.IRDst = dst - - -@sbuild.parse -def b_lt(arg1): - cond = cond2expr['LT'] - dst = arg1 if cond else ExprLoc(ir.get_next_loc_key(instr), 64) - PC = dst - ir.IRDst = dst - - -@sbuild.parse -def ret(arg1): - PC = arg1 - ir.IRDst = arg1 - - -@sbuild.parse -def adrp(arg1, arg2): - arg1 = (PC & ExprInt(0xfffffffffffff000, 64)) + arg2 - - -@sbuild.parse -def adr(arg1, arg2): - arg1 = PC + arg2 - - -@sbuild.parse -def b(arg1): - PC = arg1 - ir.IRDst = arg1 - - -@sbuild.parse -def br(arg1): - PC = arg1 - ir.IRDst = arg1 - -@sbuild.parse -def blr(arg1): - PC = arg1 - ir.IRDst = arg1 - LR = ExprLoc(ir.get_next_loc_key(instr), 64) - -@sbuild.parse -def nop(): - """Do nothing""" - - -def rev(ir, instr, arg1, arg2): - out = [] - for i in range(0, arg2.size, 8): - out.append(arg2[i:i+8]) - out.reverse() - e = [] - result = ExprCompose(*out) - e.append(ExprAssign(arg1, result)) - return e, [] - - -def rev16(ir, instr, arg1, arg2): - out = [] - for i in range(0, arg2.size // 8): - index = (i & ~1) + (1 - (i & 1)) - out.append(arg2[index * 8:(index + 1) * 8]) - e = [] - result = ExprCompose(*out) - e.append(ExprAssign(arg1, result)) - return e, [] - - -@sbuild.parse -def extr(arg1, arg2, arg3, arg4): - compose = ExprCompose(arg2, arg3) - arg1 = compose[int(arg4.arg):int(arg4)+arg1.size] - - -@sbuild.parse -def svc(arg1): - exception_flags = ExprInt(EXCEPT_INT_XX, exception_flags.size) - interrupt_num = ExprInt(int(arg1), interrupt_num.size) - - -def fmov(ir, instr, arg1, arg2): - if arg2.is_int(): - # Transform int to signed floating-point constant with 3-bit exponent - # and normalized 4 bits of precision - # VFPExpandImm() of ARM Architecture Reference Manual - imm8 = int(arg2) - N = arg1.size - assert N in [32, 64] - E = 8 if N == 32 else 11 - F = N - E - 1; - # sign = imm8<7>; - sign = (imm8 >> 7) & 1; - # exp = NOT(imm8<6>):Replicate(imm8<6>,E-3):imm8<5:4>; - exp = (((imm8 >> 6) & 1) ^ 1) << (E - 3 + 2) - if (imm8 >> 6) & 1: - tmp = (1 << (E - 3)) - 1 - else: - tmp = 0 - exp |= tmp << 2 - exp |= (imm8 >> 4) & 3 - # frac = imm8<3:0>:Zeros(F-4); - frac = (imm8 & 0xf) << (F - 4) - value = frac - value |= exp << (4 + F - 4) - value |= sign << (4 + F - 4 + 1 + E - 3 + 2) - arg2 = ExprInt(value, N) - e = [ExprAssign(arg1, arg2)] - return e, [] - - -def fadd(ir, instr, arg1, arg2, arg3): - e = [] - e.append(ExprAssign(arg1, ExprOp('fadd', arg2, arg3))) - return e, [] - - -def fsub(ir, instr, arg1, arg2, arg3): - e = [] - e.append(ExprAssign(arg1, ExprOp('fsub', arg2, arg3))) - return e, [] - - -def fmul(ir, instr, arg1, arg2, arg3): - e = [] - e.append(ExprAssign(arg1, ExprOp('fmul', arg2, arg3))) - return e, [] - - -def fdiv(ir, instr, arg1, arg2, arg3): - e = [] - e.append(ExprAssign(arg1, ExprOp('fdiv', arg2, arg3))) - return e, [] - - -def fabs(ir, instr, arg1, arg2): - e = [] - e.append(ExprAssign(arg1, ExprOp('fabs', arg2))) - return e, [] - - -def fmadd(ir, instr, arg1, arg2, arg3, arg4): - e = [] - e.append( - ExprAssign( - arg1, - ExprOp( - 'fadd', - arg4, - ExprOp('fmul', arg2, arg3) - ) - ) - ) - return e, [] - - -def fmsub(ir, instr, arg1, arg2, arg3, arg4): - e = [] - e.append( - ExprAssign( - arg1, - ExprOp( - 'fsub', - arg4, - ExprOp('fmul', arg2, arg3) - ) - ) - ) - return e, [] - - -def fcvt(ir, instr, arg1, arg2): - # XXX TODO: rounding - e = [] - src = ExprOp('fpconvert_fp%d' % arg1.size, arg2) - e.append(ExprAssign(arg1, src)) - return e, [] - - -def scvtf(ir, instr, arg1, arg2): - # XXX TODO: rounding - e = [] - src = ExprOp('sint_to_fp', arg2) - if arg1.size != src.size: - src = ExprOp('fpconvert_fp%d' % arg1.size, src) - e.append(ExprAssign(arg1, src)) - return e, [] - - -def ucvtf(ir, instr, arg1, arg2): - # XXX TODO: rounding - e = [] - src = ExprOp('uint_to_fp', arg2) - if arg1.size != src.size: - src = ExprOp('fpconvert_fp%d' % arg1.size, src) - e.append(ExprAssign(arg1, src)) - return e, [] - - -def fcvtzs(ir, instr, arg1, arg2): - # XXX TODO: rounding - e = [] - e.append( - ExprAssign( - arg1, - ExprOp('fp_to_sint%d' % arg1.size, - ExprOp('fpround_towardszero', arg2) - ) - ) - ) - return e, [] - - -def fcvtzu(ir, instr, arg1, arg2): - # XXX TODO: rounding - e = [] - e.append( - ExprAssign( - arg1, - ExprOp('fp_to_uint%d' % arg1.size, - ExprOp('fpround_towardszero', arg2) - ) - ) - ) - return e, [] - - -def fcmpe(ir, instr, arg1, arg2): - e = [] - e.append( - ExprAssign( - nf, - ExprOp('fcom_c0', arg1, arg2) - ) - ) - e.append( - ExprAssign( - cf, - ~ExprOp('fcom_c0', arg1, arg2) - ) - ) - e.append( - ExprAssign( - zf, - ExprOp('fcom_c3', arg1, arg2) - ) - ) - e.append(ExprAssign(of, ExprInt(0, 1))) - return e, [] - - -def clz(ir, instr, arg1, arg2): - e = [] - e.append(ExprAssign(arg1, ExprOp('cntleadzeros', arg2))) - return e, [] - -def casp(ir, instr, arg1, arg2, arg3): - # XXX TODO: memory barrier - e = [] - if arg1.size == 32: - regs = gpregs32_expr - else: - regs = gpregs64_expr - index1 = regs.index(arg1) - index2 = regs.index(arg2) - - # TODO endianness - comp_value = ExprCompose(regs[index1], regs[index1 + 1]) - new_value = ExprCompose(regs[index2], regs[index2 + 1]) - assert arg3.is_op('preinc') - ptr = arg3.args[0] - data = ExprMem(ptr, comp_value.size) - - loc_store = ExprLoc(ir.loc_db.add_location(), ir.IRDst.size) - loc_do = ExprLoc(ir.loc_db.add_location(), ir.IRDst.size) - loc_next = ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) - - e.append(ExprAssign(ir.IRDst, ExprCond(ExprOp("FLAG_EQ_CMP", data, comp_value), loc_do, loc_store))) - - e_store = [] - e_store.append(ExprAssign(data, new_value)) - e_store.append(ExprAssign(ir.IRDst, loc_do)) - blk_store = IRBlock(loc_store.loc_key, [AssignBlock(e_store, instr)]) - - e_do = [] - e_do.append(ExprAssign(regs[index1], data[:data.size // 2])) - e_do.append(ExprAssign(regs[index1 + 1], data[data.size // 2:])) - e_do.append(ExprAssign(ir.IRDst, loc_next)) - blk_do = IRBlock(loc_do.loc_key, [AssignBlock(e_do, instr)]) - - return e, [blk_store, blk_do] - - -@sbuild.parse -def umaddl(arg1, arg2, arg3, arg4): - arg1 = arg2.zeroExtend(arg1.size) * arg3.zeroExtend(arg1.size) + arg4 - - -@sbuild.parse -def umsubbl(arg1, arg2, arg3, arg4): - arg1 = arg2.zeroExtend(arg1.size) * arg3.zeroExtend(arg1.size) + arg4 - - -@sbuild.parse -def umull(arg1, arg2, arg3): - arg1 = (arg2.zeroExtend(64) * arg3.zeroExtend(64)) - - -@sbuild.parse -def umulh(arg1, arg2, arg3): - arg1 = (arg2.zeroExtend(128) * arg3.zeroExtend(128))[64:] - - -@sbuild.parse -def smulh(arg1, arg2, arg3): - arg1 = (arg2.signExtend(128) * arg3.signExtend(128))[64:] - - -@sbuild.parse -def smull(arg1, arg2, arg3): - arg1 = (arg2.signExtend(64) * arg3.signExtend(64))[64:] - - - -mnemo_func = sbuild.functions -mnemo_func.update({ - 'and': and_l, - 'adds': adds, - 'ands': ands, - 'tst': tst, - 'subs': subs, - 'cmp': cmp, - 'cmn': cmn, - 'movk': movk, - 'ccmp': ccmp, - 'csinc': csinc, - 'csinv': csinv, - 'csneg': csneg, - 'cset': cset, - 'csetm': csetm, - - 'b.ne': b_ne, - 'b.eq': b_eq, - 'b.ge': b_ge, - 'b.mi': b_mi, - 'b.pl': b_pl, - 'b.gt': b_gt, - 'b.cc': b_cc, - 'b.cs': b_cs, - 'b.hi': b_hi, - 'b.le': b_le, - 'b.ls': b_ls, - 'b.lt': b_lt, - - 'bics': bics, - - 'ret': ret, - 'stp': stp, - 'ldp': ldp, - - 'ldr': ldr, - 'ldrb': ldrb, - 'ldrh': ldrh, - - 'ldur': ldr, - 'ldurb': ldrb, - 'ldursb': ldrsb, - 'ldurh': ldrh, - 'ldursh': ldrsh, - 'ldursw': ldrsw, - - 'ldrsb': ldrsb, - 'ldrsh': ldrsh, - 'ldrsw': ldrsw, - - 'ldaxrb': ldaxrb, - 'stlxrb': stlxrb, - - 'str': l_str, - 'strb': strb, - 'strh': strh, - - 'stur': l_str, - 'sturb': strb, - 'sturh': strh, - - - 'bfm': bfm, - 'sbfm': sbfm, - 'ubfm': ubfm, - - 'extr': extr, - 'rev': rev, - 'rev16': rev16, - - 'msr': msr, - 'mrs': mrs, - - 'adc': adc, - 'adcs': adcs, - 'sbc': sbc, - 'sbcs': sbcs, - - 'fmov': fmov, - 'fadd': fadd, - 'fsub': fsub, - 'fmul': fmul, - 'fdiv': fdiv, - 'fabs': fabs, - 'fmadd': fmadd, - 'fmsub': fmsub, - 'fcvt': fcvt, - 'scvtf': scvtf, - 'ucvtf': ucvtf, - 'fcvtzs': fcvtzs, - 'fcvtzu': fcvtzu, - 'fcmpe': fcmpe, - 'clz': clz, - - # XXX TODO: memory barrier - 'casp':casp, - 'caspl':casp, - 'caspa':casp, - 'caspal':casp, - - -}) - - -def get_mnemo_expr(ir, instr, *args): - if not instr.name.lower() in mnemo_func: - raise NotImplementedError('unknown mnemo %s' % instr) - instr, extra_ir = mnemo_func[instr.name.lower()](ir, instr, *args) - return instr, extra_ir - - -class aarch64info(object): - mode = "aarch64" - # offset - - -class ir_aarch64l(IntermediateRepresentation): - - def __init__(self, loc_db=None): - IntermediateRepresentation.__init__(self, mn_aarch64, "l", loc_db) - self.pc = PC - self.sp = SP - self.IRDst = ExprId('IRDst', 64) - self.addrsize = 64 - - def get_ir(self, instr): - args = instr.args - if len(args) and isinstance(args[-1], ExprOp): - if (args[-1].op in ['<<', '>>', '<>', '<<<', '>>>'] and - isinstance(args[-1].args[-1], ExprId)): - args[-1] = ExprOp(args[-1].op, - args[-1].args[0], - args[-1].args[-1][:8].zeroExtend(32)) - instr_ir, extra_ir = get_mnemo_expr(self, instr, *args) - self.mod_pc(instr, instr_ir, extra_ir) - instr_ir, extra_ir = self.del_dst_zr(instr, instr_ir, extra_ir) - return instr_ir, extra_ir - - def expr_fix_regs_for_mode(self, e): - return e.replace_expr(replace_regs) - - def expraff_fix_regs_for_mode(self, e): - dst = self.expr_fix_regs_for_mode(e.dst) - src = self.expr_fix_regs_for_mode(e.src) - return ExprAssign(dst, src) - - def irbloc_fix_regs_for_mode(self, irblock, mode=64): - irs = [] - for assignblk in irblock: - new_assignblk = dict(assignblk) - for dst, src in viewitems(assignblk): - del(new_assignblk[dst]) - # Special case for 64 bits: - # If destination is a 32 bit reg, zero extend the 64 bit reg - if (isinstance(dst, ExprId) and - dst.size == 32 and - dst in replace_regs): - src = src.zeroExtend(64) - dst = replace_regs[dst].arg - - dst = self.expr_fix_regs_for_mode(dst) - src = self.expr_fix_regs_for_mode(src) - new_assignblk[dst] = src - irs.append(AssignBlock(new_assignblk, assignblk.instr)) - return IRBlock(irblock.loc_key, irs) - - def mod_pc(self, instr, instr_ir, extra_ir): - "Replace PC by the instruction's offset" - cur_offset = ExprInt(instr.offset, 64) - pc_fixed = {self.pc: cur_offset} - for i, expr in enumerate(instr_ir): - dst, src = expr.dst, expr.src - if dst != self.pc: - dst = dst.replace_expr(pc_fixed) - src = src.replace_expr(pc_fixed) - instr_ir[i] = ExprAssign(dst, src) - - for idx, irblock in enumerate(extra_ir): - extra_ir[idx] = irblock.modify_exprs(lambda expr: expr.replace_expr(pc_fixed) \ - if expr != self.pc else expr, - lambda expr: expr.replace_expr(pc_fixed)) - - - def del_dst_zr(self, instr, instr_ir, extra_ir): - "Writes to zero register are discarded" - regs_to_fix = [WZR, XZR] - instr_ir = [expr for expr in instr_ir if expr.dst not in regs_to_fix] - - new_irblocks = [] - for irblock in extra_ir: - irs = [] - for assignblk in irblock: - new_dsts = { - dst:src for dst, src in viewitems(assignblk) - if dst not in regs_to_fix - } - irs.append(AssignBlock(new_dsts, assignblk.instr)) - new_irblocks.append(IRBlock(irblock.loc_key, irs)) - - return instr_ir, new_irblocks - - -class ir_aarch64b(ir_aarch64l): - - def __init__(self, loc_db=None): - IntermediateRepresentation.__init__(self, mn_aarch64, "b", loc_db) - self.pc = PC - self.sp = SP - self.IRDst = ExprId('IRDst', 64) diff --git a/miasm2/arch/arm/__init__.py b/miasm2/arch/arm/__init__.py deleted file mode 100644 index bbad893b..00000000 --- a/miasm2/arch/arm/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__all__ = ["arch", "disasm", "regs", "sem"] diff --git a/miasm2/arch/arm/arch.py b/miasm2/arch/arm/arch.py deleted file mode 100644 index 3cafad59..00000000 --- a/miasm2/arch/arm/arch.py +++ /dev/null @@ -1,3299 +0,0 @@ -#-*- coding:utf-8 -*- - -from builtins import range -from future.utils import viewitems - -import logging -from pyparsing import * -from miasm2.expression.expression import * -from miasm2.core.cpu import * -from collections import defaultdict -from miasm2.core.bin_stream import bin_stream -import miasm2.arch.arm.regs as regs_module -from miasm2.arch.arm.regs import * -from miasm2.core.asm_ast import AstInt, AstId, AstMem, AstOp - -# A1 encoding - -log = logging.getLogger("armdis") -console_handler = logging.StreamHandler() -console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) -log.addHandler(console_handler) -log.setLevel(logging.DEBUG) - -# arm regs ############## -reg_dum = ExprId('DumReg', 32) - -PC, _ = gen_reg('PC') - -# GP -regs_str = ['R%d' % r for r in range(0x10)] -regs_str[13] = 'SP' -regs_str[14] = 'LR' -regs_str[15] = 'PC' -regs_expr = [ExprId(x, 32) for x in regs_str] - -gpregs = reg_info(regs_str, regs_expr) - -gpregs_pc = reg_info(regs_str[-1:], regs_expr[-1:]) -gpregs_sp = reg_info(regs_str[13:14], regs_expr[13:14]) - -gpregs_nosppc = reg_info(regs_str[:13] + [str(reg_dum), regs_str[14]], - regs_expr[:13] + [reg_dum, regs_expr[14]]) - -gpregs_nopc = reg_info(regs_str[:14], - regs_expr[:14]) - -gpregs_nosp = reg_info(regs_str[:13] + [str(reg_dum), regs_str[14], regs_str[15]], - regs_expr[:13] + [reg_dum, regs_expr[14], regs_expr[15]]) - - -# psr -sr_flags = "cxsf" -cpsr_regs_str = [] -spsr_regs_str = [] -for i in range(0x10): - o = "" - for j in range(4): - if i & (1 << j): - o += sr_flags[j] - cpsr_regs_str.append("CPSR_%s" % o) - spsr_regs_str.append("SPSR_%s" % o) - -# psr_regs_str = ['CPSR', 'SPSR'] -# psr_regs_expr = [ExprId(x, 32) for x in psr_regs_str] - -# psr_regs = reg_info(psr_regs_str, psr_regs_expr) - -cpsr_regs_expr = [ExprId(x, 32) for x in cpsr_regs_str] -spsr_regs_expr = [ExprId(x, 32) for x in spsr_regs_str] - -cpsr_regs = reg_info(cpsr_regs_str, cpsr_regs_expr) -spsr_regs = reg_info(spsr_regs_str, spsr_regs_expr) - -# CP -cpregs_str = ['c%d' % r for r in range(0x10)] -cpregs_expr = [ExprId(x, 32) for x in cpregs_str] - -cp_regs = reg_info(cpregs_str, cpregs_expr) - -# P -pregs_str = ['p%d' % r for r in range(0x10)] -pregs_expr = [ExprId(x, 32) for x in pregs_str] - -p_regs = reg_info(pregs_str, pregs_expr) - -conditional_branch = ["BEQ", "BNE", "BCS", "BCC", "BMI", "BPL", "BVS", - "BVC", "BHI", "BLS", "BGE", "BLT", "BGT", "BLE"] - -unconditional_branch = ["B", "BX", "BL", "BLX"] - -barrier_expr = { - 0b1111: ExprId("SY", 32), - 0b1110: ExprId("ST", 32), - 0b1101: ExprId("LD", 32), - 0b1011: ExprId("ISH", 32), - 0b1010: ExprId("ISHST", 32), - 0b1001: ExprId("ISHLD", 32), - 0b0111: ExprId("NSH", 32), - 0b0110: ExprId("NSHST", 32), - 0b0011: ExprId("OSH", 32), - 0b0010: ExprId("OSHST", 32), - 0b0001: ExprId("OSHLD", 32), -} - -barrier_info = reg_info_dct(barrier_expr) - - - -# parser helper ########### - -def cb_tok_reg_duo(tokens): - tokens = tokens[0] - i1 = gpregs.expr.index(tokens[0].name) - i2 = gpregs.expr.index(tokens[1].name) - o = [] - for i in range(i1, i2 + 1): - o.append(AstId(gpregs.expr[i])) - return o - -LPARENTHESIS = Literal("(") -RPARENTHESIS = Literal(")") - -LACC = Suppress(Literal("{")) -RACC = Suppress(Literal("}")) -MINUS = Suppress(Literal("-")) -CIRCUNFLEX = Literal("^") - - -def check_bounds(left_bound, right_bound, value): - if left_bound <= value and value <= right_bound: - return AstInt(value) - else: - raise ValueError('shift operator immediate value out of bound') - - -def check_values(values, value): - if value in values: - return AstInt(value) - else: - raise ValueError('shift operator immediate value out of bound') - -int_1_31 = str_int.copy().setParseAction(lambda v: check_bounds(1, 31, v[0])) -int_1_32 = str_int.copy().setParseAction(lambda v: check_bounds(1, 32, v[0])) - -int_8_16_24 = str_int.copy().setParseAction(lambda v: check_values([8, 16, 24], v[0])) - - -def cb_reglistparse(tokens): - tokens = tokens[0] - if tokens[-1] == "^": - return AstOp('sbit', AstOp('reglist', *tokens[:-1])) - return AstOp('reglist', *tokens) - - -allshifts = ['<<', '>>', 'a>>', '>>>', 'rrx'] -allshifts_armt = ['<<', '>>', 'a>>', '>>>', 'rrx'] - -shift2expr_dct = {'LSL': '<<', 'LSR': '>>', 'ASR': 'a>>', - 'ROR': ">>>", 'RRX': "rrx"} - -expr2shift_dct = dict((value, key) for key, value in viewitems(shift2expr_dct)) - - -def op_shift2expr(tokens): - return shift2expr_dct[tokens[0]] - -reg_duo = Group(gpregs.parser + MINUS + - gpregs.parser).setParseAction(cb_tok_reg_duo) -reg_or_duo = reg_duo | gpregs.parser -gpreg_list = Group(LACC + delimitedList( - reg_or_duo, delim=',') + RACC + Optional(CIRCUNFLEX)) -gpreg_list.setParseAction(cb_reglistparse) - -LBRACK = Suppress("[") -RBRACK = Suppress("]") -COMMA = Suppress(",") -all_binaryop_1_31_shifts_t = literal_list( - ['LSL', 'ROR']).setParseAction(op_shift2expr) -all_binaryop_1_32_shifts_t = literal_list( - ['LSR', 'ASR']).setParseAction(op_shift2expr) -all_unaryop_shifts_t = literal_list(['RRX']).setParseAction(op_shift2expr) - -ror_shifts_t = literal_list(['ROR']).setParseAction(op_shift2expr) - - -allshifts_t_armt = literal_list( - ['LSL', 'LSR', 'ASR', 'ROR', 'RRX']).setParseAction(op_shift2expr) - -gpreg_p = gpregs.parser - -psr_p = cpsr_regs.parser | spsr_regs.parser - - -def cb_shift(tokens): - if len(tokens) == 1: - ret = tokens[0] - elif len(tokens) == 2: - ret = AstOp(tokens[1], tokens[0]) - elif len(tokens) == 3: - ret = AstOp(tokens[1], tokens[0], tokens[2]) - else: - raise ValueError("Bad arg") - return ret - -shift_off = (gpregs.parser + Optional( - (all_unaryop_shifts_t) | - (all_binaryop_1_31_shifts_t + (gpregs.parser | int_1_31)) | - (all_binaryop_1_32_shifts_t + (gpregs.parser | int_1_32)) -)).setParseAction(cb_shift) -shift_off |= base_expr - - -rot2_expr = (gpregs.parser + Optional( - (ror_shifts_t + (int_8_16_24)) -)).setParseAction(cb_shift) - - -OP_LSL = Suppress("LSL") - -def cb_deref_reg_reg(tokens): - if len(tokens) != 2: - raise ValueError("Bad mem format") - return AstMem(AstOp('+', tokens[0], tokens[1]), 8) - -def cb_deref_reg_reg_lsl_1(tokens): - if len(tokens) != 3: - raise ValueError("Bad mem format") - reg1, reg2, index = tokens - if not isinstance(index, AstInt) or index.value != 1: - raise ValueError("Bad index") - ret = AstMem(AstOp('+', reg1, AstOp('<<', reg2, index)), 16) - return ret - - -deref_reg_reg = (LBRACK + gpregs.parser + COMMA + gpregs.parser + RBRACK).setParseAction(cb_deref_reg_reg) -deref_reg_reg_lsl_1 = (LBRACK + gpregs.parser + COMMA + gpregs.parser + OP_LSL + base_expr + RBRACK).setParseAction(cb_deref_reg_reg_lsl_1) - - - -(gpregs.parser + Optional( - (ror_shifts_t + (int_8_16_24)) -)).setParseAction(cb_shift) - - - -reg_or_base = gpregs.parser | base_expr - -def deref2expr_nooff(tokens): - tokens = tokens[0] - # XXX default - return ExprOp("preinc", tokens[0], ExprInt(0, 32)) - - -def cb_deref_preinc(tokens): - tokens = tokens[0] - if len(tokens) == 1: - return AstOp("preinc", tokens[0], AstInt(0)) - elif len(tokens) == 2: - return AstOp("preinc", tokens[0], tokens[1]) - else: - raise NotImplementedError('len(tokens) > 2') - - -def cb_deref_pre_mem(tokens): - tokens = tokens[0] - if len(tokens) == 1: - return AstMem(AstOp("preinc", tokens[0], AstInt(0)), 32) - elif len(tokens) == 2: - return AstMem(AstOp("preinc", tokens[0], tokens[1]), 32) - else: - raise NotImplementedError('len(tokens) > 2') - - -def cb_deref_post(tokens): - tokens = tokens[0] - return AstOp("postinc", tokens[0], tokens[1]) - - -def cb_deref_wb(tokens): - tokens = tokens[0] - if tokens[-1] == '!': - return AstMem(AstOp('wback', *tokens[:-1]), 32) - return AstMem(tokens[0], 32) - -# shift_off.setParseAction(deref_off) -deref_nooff = Group( - LBRACK + gpregs.parser + RBRACK).setParseAction(deref2expr_nooff) -deref_pre = Group(LBRACK + gpregs.parser + Optional( - COMMA + shift_off) + RBRACK).setParseAction(cb_deref_preinc) -deref_post = Group(LBRACK + gpregs.parser + RBRACK + - COMMA + shift_off).setParseAction(cb_deref_post) -deref = Group((deref_post | deref_pre | deref_nooff) - + Optional('!')).setParseAction(cb_deref_wb) - - -def cb_gpreb_wb(tokens): - assert len(tokens) == 1 - tokens = tokens[0] - if tokens[-1] == '!': - return AstOp('wback', *tokens[:-1]) - return tokens[0] - -gpregs_wb = Group(gpregs.parser + Optional('!')).setParseAction(cb_gpreb_wb) - - -cond_list_full = ['EQ', 'NE', 'CS', 'CC', 'MI', 'PL', 'VS', 'VC', - 'HI', 'LS', 'GE', 'LT', 'GT', 'LE', 'NV'] - - -cond_list = ['EQ', 'NE', 'CS', 'CC', 'MI', 'PL', 'VS', 'VC', - 'HI', 'LS', 'GE', 'LT', 'GT', 'LE', ''] # , 'NV'] -cond_dct = dict([(x[1], x[0]) for x in enumerate(cond_list)]) -bm_cond = bs_mod_name(l=4, fname='cond', mn_mod=cond_list) - - - -cond_dct_barmt = dict([(x[0], x[1]) for x in enumerate(cond_list) if x[0] & 0b1110 != 0b1110]) -bm_cond_barmt = bs_mod_name(l=4, fname='cond', mn_mod=cond_dct_barmt) - - - -def permut_args(order, args): - l = [] - for i, x in enumerate(order): - l.append((x.__class__, i)) - l = dict(l) - out = [None for x in range(len(args))] - for a in args: - out[l[a.__class__]] = a - return out - - -class additional_info(object): - - def __init__(self): - self.except_on_instr = False - self.lnk = None - self.cond = None - - -class instruction_arm(instruction): - __slots__ = [] - delayslot = 0 - - def __init__(self, *args, **kargs): - super(instruction_arm, self).__init__(*args, **kargs) - - @staticmethod - def arg2str(expr, index=None, loc_db=None): - wb = False - if expr.is_id() or expr.is_int(): - return str(expr) - elif expr.is_loc(): - if loc_db is not None: - return loc_db.pretty_str(expr.loc_key) - else: - return str(expr) - if isinstance(expr, ExprOp) and expr.op in expr2shift_dct: - if len(expr.args) == 1: - return '%s %s' % (expr.args[0], expr2shift_dct[expr.op]) - elif len(expr.args) == 2: - return '%s %s %s' % (expr.args[0], expr2shift_dct[expr.op], expr.args[1]) - else: - raise NotImplementedError('zarb arg2str') - - - sb = False - if isinstance(expr, ExprOp) and expr.op == "sbit": - sb = True - expr = expr.args[0] - if isinstance(expr, ExprOp) and expr.op == "reglist": - o = [gpregs.expr.index(x) for x in expr.args] - out = reglist2str(o) - if sb: - out += "^" - return out - - - if isinstance(expr, ExprOp) and expr.op == 'wback': - wb = True - expr = expr.args[0] - if isinstance(expr, ExprId): - out = str(expr) - if wb: - out += "!" - return out - - if not isinstance(expr, ExprMem): - return str(expr) - - expr = expr.ptr - if isinstance(expr, ExprOp) and expr.op == 'wback': - wb = True - expr = expr.args[0] - - - if isinstance(expr, ExprId): - r, s = expr, None - elif len(expr.args) == 1 and isinstance(expr.args[0], ExprId): - r, s = expr.args[0], None - elif isinstance(expr.args[0], ExprId): - r, s = expr.args[0], expr.args[1] - else: - r, s = expr.args[0].args - if isinstance(s, ExprOp) and s.op in expr2shift_dct: - s = ' '.join( - str(x) - for x in (s.args[0], expr2shift_dct[s.op], s.args[1]) - ) - - if isinstance(expr, ExprOp) and expr.op == 'postinc': - o = '[%s]' % r - if s and not (isinstance(s, ExprInt) and s.arg == 0): - o += ', %s' % s - else: - if s and not (isinstance(s, ExprInt) and s.arg == 0): - o = '[%s, %s]' % (r, s) - else: - o = '[%s]' % (r) - - - if wb: - o += "!" - return o - - - def dstflow(self): - if self.is_subcall(): - return True - return self.name in conditional_branch + unconditional_branch - - def dstflow2label(self, loc_db): - expr = self.args[0] - if not isinstance(expr, ExprInt): - return - if self.name == 'BLX': - addr = expr.arg + self.offset - else: - addr = expr.arg + self.offset - loc_key = loc_db.get_or_create_offset_location(addr) - self.args[0] = ExprLoc(loc_key, expr.size) - - def breakflow(self): - if self.is_subcall(): - return True - if self.name in conditional_branch + unconditional_branch: - return True - if self.name.startswith("LDM") and PC in self.args[1].args: - return True - if self.args and PC in self.args[0].get_r(): - return True - return False - - def is_subcall(self): - if self.name == 'BLX': - return True - return self.additional_info.lnk - - def getdstflow(self, loc_db): - return [self.args[0]] - - def splitflow(self): - if self.additional_info.lnk: - return True - if self.name == 'BLX': - return True - if self.name == 'BX': - return False - return self.breakflow() and self.additional_info.cond != 14 - - def get_symbol_size(self, symbol, loc_db): - return 32 - - def fixDstOffset(self): - e = self.args[0] - if self.offset is None: - raise ValueError('symbol not resolved %s' % l) - if not isinstance(e, ExprInt): - log.debug('dyn dst %r', e) - return - off = e.arg - self.offset - if int(off % 4): - raise ValueError('strange offset! %r' % off) - self.args[0] = ExprInt(off, 32) - - def get_args_expr(self): - args = [a for a in self.args] - return args - - def get_asm_offset(self, expr): - # LDR XXX, [PC, offset] => PC is self.offset+8 - return ExprInt(self.offset+8, expr.size) - -class instruction_armt(instruction_arm): - __slots__ = [] - delayslot = 0 - - def __init__(self, *args, **kargs): - super(instruction_armt, self).__init__(*args, **kargs) - - def dstflow(self): - if self.name in ["CBZ", "CBNZ"]: - return True - return self.name in conditional_branch + unconditional_branch - - def dstflow2label(self, loc_db): - if self.name in ["CBZ", "CBNZ"]: - expr = self.args[1] - else: - expr = self.args[0] - if not isinstance(expr, ExprInt): - return - if self.name == 'BLX': - addr = expr.arg + (self.offset & 0xfffffffc) - elif self.name == 'BL': - addr = expr.arg + self.offset - elif self.name.startswith('BP'): - addr = expr.arg + self.offset - elif self.name.startswith('CB'): - addr = expr.arg + self.offset + self.l + 2 - else: - addr = expr.arg + self.offset - - loc_key = loc_db.get_or_create_offset_location(addr) - dst = ExprLoc(loc_key, expr.size) - - if self.name in ["CBZ", "CBNZ"]: - self.args[1] = dst - else: - self.args[0] = dst - - def breakflow(self): - if self.name in conditional_branch + unconditional_branch +["CBZ", "CBNZ", 'TBB', 'TBH']: - return True - if self.name.startswith("LDM") and PC in self.args[1].args: - return True - if self.args and PC in self.args[0].get_r(): - return True - return False - - def getdstflow(self, loc_db): - if self.name in ['CBZ', 'CBNZ']: - return [self.args[1]] - return [self.args[0]] - - def splitflow(self): - if self.name in conditional_branch + ['BL', 'BLX', 'CBZ', 'CBNZ']: - return True - return False - - def is_subcall(self): - return self.name in ['BL', 'BLX'] - - def fixDstOffset(self): - e = self.args[0] - if self.offset is None: - raise ValueError('symbol not resolved %s' % l) - if not isinstance(e, ExprInt): - log.debug('dyn dst %r', e) - return - # The first +2 is to compensate instruction len, but strangely, 32 bits - # thumb2 instructions len is 2... For the second +2, didn't find it in - # the doc. - off = e.arg - self.offset - if int(off % 2): - raise ValueError('strange offset! %r' % off) - self.args[0] = ExprInt(off, 32) - - def get_asm_offset(self, expr): - # ADR XXX, PC, imm => PC is 4 aligned + imm - new_offset = ((self.offset + self.l) // 4) * 4 - return ExprInt(new_offset, expr.size) - - -class mn_arm(cls_mn): - delayslot = 0 - name = "arm" - regs = regs_module - bintree = {} - num = 0 - all_mn = [] - all_mn_mode = defaultdict(list) - all_mn_name = defaultdict(list) - all_mn_inst = defaultdict(list) - pc = {'l':PC, 'b':PC} - sp = {'l':SP, 'b':SP} - instruction = instruction_arm - max_instruction_len = 4 - alignment = 4 - - @classmethod - def getpc(cls, attrib = None): - return PC - - @classmethod - def getsp(cls, attrib = None): - return SP - - def additional_info(self): - info = additional_info() - info.lnk = False - if hasattr(self, "lnk"): - info.lnk = self.lnk.value != 0 - if hasattr(self, "cond"): - info.cond = self.cond.value - else: - info.cond = None - return info - - @classmethod - def getbits(cls, bs, attrib, start, n): - if not n: - return 0 - o = 0 - if n > bs.getlen() * 8: - raise ValueError('not enough bits %r %r' % (n, len(bs.bin) * 8)) - while n: - offset = start // 8 - n_offset = cls.endian_offset(attrib, offset) - c = cls.getbytes(bs, n_offset, 1) - if not c: - raise IOError - c = ord(c) - r = 8 - start % 8 - c &= (1 << r) - 1 - l = min(r, n) - c >>= (r - l) - o <<= l - o |= c - n -= l - start += l - return o - - @classmethod - def endian_offset(cls, attrib, offset): - if attrib == "l": - return (offset & ~3) + 3 - offset % 4 - elif attrib == "b": - return offset - else: - raise NotImplementedError('bad attrib') - - @classmethod - def check_mnemo(cls, fields): - l = sum([x.l for x in fields]) - assert l == 32, "len %r" % l - - @classmethod - def getmn(cls, name): - return name.upper() - - @classmethod - def mod_fields(cls, fields): - l = sum([x.l for x in fields]) - if l == 32: - return fields - return [bm_cond] + fields - - @classmethod - def gen_modes(cls, subcls, name, bases, dct, fields): - dct['mode'] = None - return [(subcls, name, bases, dct, fields)] - - def value(self, mode): - v = super(mn_arm, self).value(mode) - if mode == 'l': - return [x[::-1] for x in v] - elif mode == 'b': - return [x for x in v] - else: - raise NotImplementedError('bad attrib') - - - def get_symbol_size(self, symbol, loc_db, mode): - return 32 - - -class mn_armt(cls_mn): - name = "armt" - regs = regs_module - delayslot = 0 - bintree = {} - num = 0 - all_mn = [] - all_mn_mode = defaultdict(list) - all_mn_name = defaultdict(list) - all_mn_inst = defaultdict(list) - pc = PC - sp = SP - instruction = instruction_armt - max_instruction_len = 4 - alignment = 4 - - @classmethod - def getpc(cls, attrib = None): - return PC - - @classmethod - def getsp(cls, attrib = None): - return SP - - def additional_info(self): - info = additional_info() - info.lnk = False - if hasattr(self, "lnk"): - info.lnk = self.lnk.value != 0 - info.cond = 14 # COND_ALWAYS - return info - - - @classmethod - def getbits(cls, bs, attrib, start, n): - if not n: - return 0 - o = 0 - if n > bs.getlen() * 8: - raise ValueError('not enough bits %r %r' % (n, len(bs.bin) * 8)) - while n: - offset = start // 8 - n_offset = cls.endian_offset(attrib, offset) - c = cls.getbytes(bs, n_offset, 1) - if not c: - raise IOError - c = ord(c) - r = 8 - start % 8 - c &= (1 << r) - 1 - l = min(r, n) - c >>= (r - l) - o <<= l - o |= c - n -= l - start += l - return o - - @classmethod - def endian_offset(cls, attrib, offset): - if attrib == "l": - return (offset & ~1) + 1 - offset % 2 - elif attrib == "b": - return offset - else: - raise NotImplementedError('bad attrib') - - @classmethod - def check_mnemo(cls, fields): - l = sum([x.l for x in fields]) - assert l in [16, 32], "len %r" % l - - @classmethod - def getmn(cls, name): - return name.upper() - - @classmethod - def mod_fields(cls, fields): - return list(fields) - - @classmethod - def gen_modes(cls, subcls, name, bases, dct, fields): - dct['mode'] = None - return [(subcls, name, bases, dct, fields)] - - def value(self, mode): - v = super(mn_armt, self).value(mode) - if mode == 'l': - out = [] - for x in v: - if len(x) == 2: - out.append(x[::-1]) - elif len(x) == 4: - out.append(x[:2][::-1] + x[2:4][::-1]) - return out - elif mode == 'b': - return [x for x in v] - else: - raise NotImplementedError('bad attrib') - - def get_args_expr(self): - args = [a.expr for a in self.args] - return args - - def get_symbol_size(self, symbol, loc_db, mode): - return 32 - - -class arm_arg(m_arg): - def asm_ast_to_expr(self, arg, loc_db): - if isinstance(arg, AstId): - if isinstance(arg.name, ExprId): - return arg.name - if arg.name in gpregs.str: - return None - loc_key = loc_db.get_or_create_name_location(arg.name.encode()) - return ExprLoc(loc_key, 32) - if isinstance(arg, AstOp): - args = [self.asm_ast_to_expr(tmp, loc_db) for tmp in arg.args] - if None in args: - return None - return ExprOp(arg.op, *args) - if isinstance(arg, AstInt): - return ExprInt(arg.value, 32) - if isinstance(arg, AstMem): - ptr = self.asm_ast_to_expr(arg.ptr, loc_db) - if ptr is None: - return None - return ExprMem(ptr, arg.size) - return None - - -class arm_reg(reg_noarg, arm_arg): - pass - - -class arm_gpreg_noarg(reg_noarg): - reg_info = gpregs - parser = reg_info.parser - - -class arm_gpreg(arm_reg): - reg_info = gpregs - parser = reg_info.parser - - -class arm_reg_wb(arm_reg): - reg_info = gpregs - parser = gpregs_wb - - def decode(self, v): - v = v & self.lmask - e = self.reg_info.expr[v] - if self.parent.wback.value: - e = ExprOp('wback', e) - self.expr = e - return True - - def encode(self): - e = self.expr - self.parent.wback.value = 0 - if isinstance(e, ExprOp) and e.op == 'wback': - self.parent.wback.value = 1 - e = e.args[0] - if isinstance(e, ExprId): - self.value = self.reg_info.expr.index(e) - else: - self.parent.wback.value = 1 - self.value = self.reg_info.expr.index(e.args[0]) - return True - - -class arm_psr(arm_arg): - parser = psr_p - - def decode(self, v): - v = v & self.lmask - if self.parent.psr.value == 0: - e = cpsr_regs.expr[v] - else: - e = spsr_regs.expr[v] - self.expr = e - return True - - def encode(self): - e = self.expr - if e in spsr_regs.expr: - self.parent.psr.value = 1 - v = spsr_regs.expr.index(e) - elif e in cpsr_regs.expr: - self.parent.psr.value = 0 - v = cpsr_regs.expr.index(e) - else: - return False - self.value = v - return True - - -class arm_cpreg(arm_reg): - reg_info = cp_regs - parser = reg_info.parser - - -class arm_preg(arm_reg): - reg_info = p_regs - parser = reg_info.parser - - -class arm_imm(imm_noarg, arm_arg): - parser = base_expr - - -class arm_offs(arm_imm): - parser = base_expr - - def int2expr(self, v): - if v & ~self.intmask != 0: - return None - return ExprInt(v, self.intsize) - - def decodeval(self, v): - v <<= 2 - # Add pipeline offset - v += 8 - return v - - def encodeval(self, v): - if v%4 != 0: - return False - # Remove pipeline offset - v -= 8 - return v >> 2 - - def decode(self, v): - v = v & self.lmask - if (1 << (self.l - 1)) & v: - v |= ~0 ^ self.lmask - v = self.decodeval(v) - self.expr = ExprInt(v, 32) - return True - - def encode(self): - if not isinstance(self.expr, ExprInt): - return False - v = int(self.expr) - if (1 << (self.l - 1)) & v: - v = -((0xffffffff ^ v) + 1) - v = self.encodeval(v) - if v is False: - return False - self.value = (v & 0xffffffff) & self.lmask - return True - - -class arm_imm8_12(arm_arg): - parser = deref - - def decode(self, v): - v = v & self.lmask - if self.parent.updown.value: - e = ExprInt(v << 2, 32) - else: - e = ExprInt(-v << 2, 32) - if self.parent.ppi.value: - e = ExprOp('preinc', self.parent.rn.expr, e) - else: - e = ExprOp('postinc', self.parent.rn.expr, e) - if self.parent.wback.value == 1: - e = ExprOp('wback', e) - self.expr = ExprMem(e, 32) - return True - - def encode(self): - self.parent.updown.value = 1 - e = self.expr - if not isinstance(e, ExprMem): - return False - e = e.ptr - if isinstance(e, ExprOp) and e.op == 'wback': - self.parent.wback.value = 1 - e = e.args[0] - else: - self.parent.wback.value = 0 - if e.op == "postinc": - self.parent.ppi.value = 0 - elif e.op == "preinc": - self.parent.ppi.value = 1 - else: - # XXX default - self.parent.ppi.value = 1 - self.parent.rn.expr = e.args[0] - if len(e.args) == 1: - self.value = 0 - return True - e = e.args[1] - if not isinstance(e, ExprInt): - log.debug('should be int %r', e) - return False - v = int(e) - if v < 0 or v & (1 << 31): - self.parent.updown.value = 0 - v = -v & 0xFFFFFFFF - if v & 0x3: - log.debug('arg should be 4 aligned') - return False - v >>= 2 - self.value = v - return True - - -class arm_imm_4_12(arm_arg): - parser = reg_or_base - - def decode(self, v): - v = v & self.lmask - imm = (self.parent.imm4.value << 12) | v - self.expr = ExprInt(imm, 32) - return True - - def encode(self): - if not isinstance(self.expr, ExprInt): - return False - v = int(self.expr) - if v > 0xffff: - return False - self.parent.imm4.value = v >> 12 - self.value = v & 0xfff - return True - - -class arm_imm_12_4(arm_arg): - parser = base_expr - - def decode(self, v): - v = v & self.lmask - imm = (self.parent.imm.value << 4) | v - self.expr = ExprInt(imm, 32) - return True - - def encode(self): - if not isinstance(self.expr, ExprInt): - return False - v = int(self.expr) - if v > 0xffff: - return False - self.parent.imm.value = (v >> 4) & 0xfff - self.value = v & 0xf - return True - - -class arm_op2(arm_arg): - parser = shift_off - - def str_to_imm_rot_form(self, s, neg=False): - if neg: - s = -s & 0xffffffff - for i in range(0, 32, 2): - v = myrol32(s, i) - if 0 <= v < 0x100: - return ((i // 2) << 8) | v - return None - - def decode(self, v): - val = v & self.lmask - if self.parent.immop.value: - rot = val >> 8 - imm = val & 0xff - imm = myror32(imm, rot * 2) - self.expr = ExprInt(imm, 32) - return True - rm = val & 0xf - shift = val >> 4 - shift_kind = shift & 1 - shift_type = (shift >> 1) & 3 - shift >>= 3 - if shift_kind: - # shift kind is reg - if shift & 1: - return False - rs = shift >> 1 - if rs == 0xf: - return False - shift_op = regs_expr[rs] - else: - # shift kind is imm - amount = shift - shift_op = ExprInt(amount, 32) - a = regs_expr[rm] - if shift_op == ExprInt(0, 32): - if shift_type == 3: - self.expr = ExprOp(allshifts[4], a) - else: - self.expr = a - else: - self.expr = ExprOp(allshifts[shift_type], a, shift_op) - return True - - def encode(self): - e = self.expr - # pure imm - if isinstance(e, ExprInt): - val = self.str_to_imm_rot_form(int(e)) - if val is None: - return False - self.parent.immop.value = 1 - self.value = val - return True - - self.parent.immop.value = 0 - # pure reg - if isinstance(e, ExprId): - rm = gpregs.expr.index(e) - shift_kind = 0 - shift_type = 0 - amount = 0 - self.value = ( - ((((amount << 2) | shift_type) << 1) | shift_kind) << 4) | rm - return True - # rot reg - if not isinstance(e, ExprOp): - log.debug('bad reg rot1 %r', e) - return False - rm = gpregs.expr.index(e.args[0]) - shift_type = allshifts.index(e.op) - if e.op == 'rrx': - shift_kind = 0 - amount = 0 - shift_type = 3 - elif isinstance(e.args[1], ExprInt): - shift_kind = 0 - amount = int(e.args[1]) - # LSR/ASR of 32 => 0 - if amount == 32 and e.op in ['>>', 'a>>']: - amount = 0 - else: - shift_kind = 1 - amount = gpregs.expr.index(e.args[1]) << 1 - self.value = ( - ((((amount << 2) | shift_type) << 1) | shift_kind) << 4) | rm - return True - -# op2imm + rn - - -class arm_op2imm(arm_imm8_12): - parser = deref - - def str_to_imm_rot_form(self, s, neg=False): - if neg: - s = -s & 0xffffffff - if 0 <= s < (1 << 12): - return s - return None - - def decode(self, v): - val = v & self.lmask - if self.parent.immop.value == 0: - imm = val - if self.parent.updown.value == 0: - imm = -imm - if self.parent.ppi.value: - e = ExprOp('preinc', self.parent.rn.expr, ExprInt(imm, 32)) - else: - e = ExprOp('postinc', self.parent.rn.expr, ExprInt(imm, 32)) - if self.parent.wback.value == 1: - e = ExprOp('wback', e) - self.expr = ExprMem(e, 32) - return True - rm = val & 0xf - shift = val >> 4 - shift_kind = shift & 1 - shift_type = (shift >> 1) & 3 - shift >>= 3 - # print self.parent.immop.value, hex(shift), hex(shift_kind), - # hex(shift_type) - if shift_kind: - # log.debug('error in disasm xx') - return False - else: - # shift kind is imm - amount = shift - shift_op = ExprInt(amount, 32) - a = regs_expr[rm] - if shift_op == ExprInt(0, 32): - pass - else: - a = ExprOp(allshifts[shift_type], a, shift_op) - if self.parent.ppi.value: - e = ExprOp('preinc', self.parent.rn.expr, a) - else: - e = ExprOp('postinc', self.parent.rn.expr, a) - if self.parent.wback.value == 1: - e = ExprOp('wback', e) - self.expr = ExprMem(e, 32) - return True - - def encode(self): - self.parent.immop.value = 1 - self.parent.updown.value = 1 - - e = self.expr - assert(isinstance(e, ExprMem)) - e = e.ptr - if e.op == 'wback': - self.parent.wback.value = 1 - e = e.args[0] - else: - self.parent.wback.value = 0 - if e.op == "postinc": - self.parent.ppi.value = 0 - elif e.op == "preinc": - self.parent.ppi.value = 1 - else: - # XXX default - self.parent.ppi.value = 1 - - # if len(v) <1: - # raise ValueError('cannot parse', s) - self.parent.rn.expr = e.args[0] - if len(e.args) == 1: - self.parent.immop.value = 0 - self.value = 0 - return True - # pure imm - if isinstance(e.args[1], ExprInt): - self.parent.immop.value = 0 - val = self.str_to_imm_rot_form(int(e.args[1])) - if val is None: - val = self.str_to_imm_rot_form(int(e.args[1]), True) - if val is None: - log.debug('cannot encode inm') - return False - self.parent.updown.value = 0 - self.value = val - return True - # pure reg - if isinstance(e.args[1], ExprId): - rm = gpregs.expr.index(e.args[1]) - shift_kind = 0 - shift_type = 0 - amount = 0 - self.value = ( - ((((amount << 2) | shift_type) << 1) | shift_kind) << 4) | rm - return True - # rot reg - if not isinstance(e.args[1], ExprOp): - log.debug('bad reg rot2 %r', e) - return False - e = e.args[1] - rm = gpregs.expr.index(e.args[0]) - shift_type = allshifts.index(e.op) - if isinstance(e.args[1], ExprInt): - shift_kind = 0 - amount = int(e.args[1]) - else: - shift_kind = 1 - amount = gpregs.expr.index(e.args[1]) << 1 - self.value = ( - ((((amount << 2) | shift_type) << 1) | shift_kind) << 4) | rm - return True - - -def reglist2str(rlist): - out = [] - i = 0 - while i < len(rlist): - j = i + 1 - while j < len(rlist) and rlist[j] < 13 and rlist[j] == rlist[j - 1] + 1: - j += 1 - j -= 1 - if j < i + 2: - out.append(regs_str[rlist[i]]) - i += 1 - else: - out.append(regs_str[rlist[i]] + '-' + regs_str[rlist[j]]) - i = j + 1 - return "{" + ", ".join(out) + '}' - - -class arm_rlist(arm_arg): - parser = gpreg_list - - def encode(self): - self.parent.sbit.value = 0 - e = self.expr - if isinstance(e, ExprOp) and e.op == "sbit": - e = e.args[0] - self.parent.sbit.value = 1 - rlist = [gpregs.expr.index(x) for x in e.args] - v = 0 - for r in rlist: - v |= 1 << r - self.value = v - return True - - def decode(self, v): - v = v & self.lmask - out = [] - for i in range(0x10): - if 1 << i & v: - out.append(gpregs.expr[i]) - if not out: - return False - e = ExprOp('reglist', *out) - if self.parent.sbit.value == 1: - e = ExprOp('sbit', e) - self.expr = e - return True - - -class updown_b_nosp_mn(bs_mod_name): - mn_mod = ['D', 'I'] - - def modname(self, name, f_i): - return name + self.args['mn_mod'][f_i] - - -class ppi_b_nosp_mn(bs_mod_name): - prio = 5 - mn_mod = ['A', 'B'] - - -class updown_b_sp_mn(bs_mod_name): - mn_mod = ['A', 'D'] - - def modname(self, name, f_i): - if name.startswith("STM"): - f_i = [1, 0][f_i] - return name + self.args['mn_mod'][f_i] - - -class ppi_b_sp_mn(bs_mod_name): - mn_mod = ['F', 'E'] - - def modname(self, name, f_i): - if name.startswith("STM"): - f_i = [1, 0][f_i] - return name + self.args['mn_mod'][f_i] - - -class arm_reg_wb_nosp(arm_reg_wb): - - def decode(self, v): - v = v & self.lmask - if v == 13: - return False - e = self.reg_info.expr[v] - if self.parent.wback.value: - e = ExprOp('wback', e) - self.expr = e - return True - - -class arm_offs_blx(arm_imm): - - def decode(self, v): - v = v & self.lmask - v = (v << 2) + (self.parent.lowb.value << 1) - v = sign_ext(v, 26, 32) - # Add pipeline offset - v += 8 - self.expr = ExprInt(v, 32) - return True - - def encode(self): - if not isinstance(self.expr, ExprInt): - return False - # Remove pipeline offset - v = int(self.expr.arg - 8) - if v & 0x80000000: - v &= (1 << 26) - 1 - self.parent.lowb.value = (v >> 1) & 1 - self.value = v >> 2 - return True - - -class bs_lnk(bs_mod_name): - - def modname(self, name, i): - return name[:1] + self.args['mn_mod'][i] + name[1:] - - -class armt_rm_cp(bsi): - - def decode(self, v): - if v != gpregs.expr.index(self.parent.rm.expr): - return False - return True - - def encode(self): - self.value = gpregs.expr.index(self.parent.rm.expr) - return True - - -accum = bs(l=1) -scc = bs_mod_name(l=1, fname='scc', mn_mod=['', 'S']) -dumscc = bs("1") -rd = bs(l=4, cls=(arm_gpreg,)) -rdl = bs(l=4, cls=(arm_gpreg,)) - -rn = bs(l=4, cls=(arm_gpreg,), fname="rn") -rs = bs(l=4, cls=(arm_gpreg,)) -rm = bs(l=4, cls=(arm_gpreg,), fname='rm') -ra = bs(l=4, cls=(arm_gpreg,)) -rt = bs(l=4, cls=(arm_gpreg,), fname='rt') -rt2 = bs(l=4, cls=(arm_gpreg,)) - -rm_cp = bs(l=4, cls=(armt_rm_cp,)) - -op2 = bs(l=12, cls=(arm_op2,)) -lnk = bs_lnk(l=1, fname='lnk', mn_mod=['', 'L']) -offs = bs(l=24, cls=(arm_offs,), fname="offs") - -rn_noarg = bs(l=4, cls=(arm_gpreg_noarg,), fname="rn") -rm_noarg = bs(l=4, cls=(arm_gpreg_noarg,), fname="rm", order = -1) - -immop = bs(l=1, fname='immop') -dumr = bs(l=4, default_val="0000", fname="dumr") -# psr = bs(l=1, cls=(arm_psr,), fname="psr") - -psr = bs(l=1, fname="psr") -psr_field = bs(l=4, cls=(arm_psr,)) - -ppi = bs(l=1, fname='ppi') -updown = bs(l=1, fname='updown') -trb = bs_mod_name(l=1, fname='trb', mn_mod=['', 'B']) -wback = bs_mod_name(l=1, fname="wback", mn_mod=['', 'T']) -wback_no_t = bs(l=1, fname="wback") - -op2imm = bs(l=12, cls=(arm_op2imm,)) - -updown_b_nosp = updown_b_nosp_mn(l=1, mn_mod=['D', 'I'], fname='updown') -ppi_b_nosp = ppi_b_nosp_mn(l=1, mn_mod=['A', 'B'], fname='ppi') -updown_b_sp = updown_b_sp_mn(l=1, mn_mod=['A', 'D'], fname='updown') -ppi_b_sp = ppi_b_sp_mn(l=1, mn_mod=['F', 'E'], fname='ppi') - -sbit = bs(l=1, fname="sbit") -rn_sp = bs("1101", cls=(arm_reg_wb,), fname='rnsp') -rn_wb = bs(l=4, cls=(arm_reg_wb_nosp,), fname='rn') -rlist = bs(l=16, cls=(arm_rlist,), fname='rlist') - -swi_i = bs(l=24, cls=(arm_imm,), fname="swi_i") - -opc = bs(l=4, cls=(arm_imm, m_arg), fname='opc') -crn = bs(l=4, cls=(arm_cpreg,), fname='crn') -crd = bs(l=4, cls=(arm_cpreg,), fname='crd') -crm = bs(l=4, cls=(arm_cpreg,), fname='crm') -cpnum = bs(l=4, cls=(arm_preg,), fname='cpnum') -cp = bs(l=3, cls=(arm_imm, m_arg), fname='cp') - -imm8_12 = bs(l=8, cls=(arm_imm8_12, m_arg), fname='imm') -tl = bs_mod_name(l=1, fname="tl", mn_mod=['', 'L']) - -cpopc = bs(l=3, cls=(arm_imm, m_arg), fname='cpopc') -imm20 = bs(l=20, cls=(arm_imm, m_arg)) -imm4 = bs(l=4, cls=(arm_imm, m_arg)) -imm12 = bs(l=12, cls=(arm_imm, m_arg)) -imm16 = bs(l=16, cls=(arm_imm, m_arg)) - -imm12_off = bs(l=12, fname="imm") - -imm2_noarg = bs(l=2, fname="imm") -imm4_noarg = bs(l=4, fname="imm4") - - -imm_4_12 = bs(l=12, cls=(arm_imm_4_12,)) - -imm12_noarg = bs(l=12, fname="imm") -imm_12_4 = bs(l=4, cls=(arm_imm_12_4,)) - -lowb = bs(l=1, fname='lowb') -offs_blx = bs(l=24, cls=(arm_offs_blx,), fname="offs") - -fix_cond = bs("1111", fname="cond") - -class mul_part_x(bs_mod_name): - prio = 5 - mn_mod = ['B', 'T'] - -class mul_part_y(bs_mod_name): - prio = 6 - mn_mod = ['B', 'T'] - -mul_x = mul_part_x(l=1, fname='x', mn_mod=['B', 'T']) -mul_y = mul_part_y(l=1, fname='y', mn_mod=['B', 'T']) - -class arm_immed(arm_arg): - parser = deref - - def decode(self, v): - if self.parent.immop.value == 1: - imm = ExprInt((self.parent.immedH.value << 4) | v, 32) - else: - imm = gpregs.expr[v] - if self.parent.updown.value == 0: - imm = -imm - if self.parent.ppi.value: - e = ExprOp('preinc', self.parent.rn.expr, imm) - else: - e = ExprOp('postinc', self.parent.rn.expr, imm) - if self.parent.wback.value == 1: - e = ExprOp('wback', e) - self.expr = ExprMem(e, 32) - - return True - - def encode(self): - self.parent.immop.value = 1 - self.parent.updown.value = 1 - e = self.expr - if not isinstance(e, ExprMem): - return False - e = e.ptr - if isinstance(e, ExprOp) and e.op == 'wback': - self.parent.wback.value = 1 - e = e.args[0] - else: - self.parent.wback.value = 0 - if e.op == "postinc": - self.parent.ppi.value = 0 - elif e.op == "preinc": - self.parent.ppi.value = 1 - else: - # XXX default - self.parent.ppi.value = 1 - self.parent.rn.expr = e.args[0] - if len(e.args) == 1: - self.value = 0 - self.parent.immedH.value = 0 - return True - e = e.args[1] - if isinstance(e, ExprInt): - v = int(e) - if v < 0 or v & (1 << 31): - self.parent.updown.value = 0 - v = (-v) & 0xFFFFFFFF - if v > 0xff: - log.debug('cannot encode imm XXX') - return False - self.value = v & 0xF - self.parent.immedH.value = v >> 4 - return True - - self.parent.immop.value = 0 - if isinstance(e, ExprOp) and len(e.args) == 1 and e.op == "-": - self.parent.updown.value = 0 - e = e.args[0] - if e in gpregs.expr: - self.value = gpregs.expr.index(e) - self.parent.immedH.value = 0x0 - return True - else: - raise ValueError('e should be int: %r' % e) - -immedH = bs(l=4, fname='immedH') -immedL = bs(l=4, cls=(arm_immed, m_arg), fname='immedL') -hb = bs(l=1) - - -class armt2_rot_rm(arm_arg): - parser = shift_off - def decode(self, v): - r = self.parent.rm.expr - if v == 00: - e = r - else: - raise NotImplementedError('rotation') - self.expr = e - return True - def encode(self): - e = self.expr - if isinstance(e, ExprId): - self.value = 0 - else: - raise NotImplementedError('rotation') - return True - -rot_rm = bs(l=2, cls=(armt2_rot_rm,), fname="rot_rm") - - -class arm_mem_rn_imm(arm_arg): - parser = deref - def decode(self, v): - value = self.parent.imm.value - if self.parent.rw.value == 0: - value = -value - imm = ExprInt(value, 32) - reg = gpregs.expr[v] - if value: - expr = ExprMem(reg + imm, 32) - else: - expr = ExprMem(reg, 32) - self.expr = expr - return True - - def encode(self): - self.parent.add_imm.value = 1 - self.parent.imm.value = 0 - expr = self.expr - if not isinstance(expr, ExprMem): - return False - ptr = expr.ptr - if ptr in gpregs.expr: - self.value = gpregs.expr.index(ptr) - elif (isinstance(ptr, ExprOp) and - len(ptr.args) == 2 and - ptr.op == 'preinc'): - reg, imm = ptr.args - if not reg in gpregs.expr: - return False - self.value = gpregs.expr.index(reg) - if not isinstance(imm, ExprInt): - return False - value = int(imm) - if value & 0x80000000: - value = -value - self.parent.add_imm.value = 0 - self.parent.imm.value = value - else: - return False - return True - -mem_rn_imm = bs(l=4, cls=(arm_mem_rn_imm,), order=1) - -def armop(name, fields, args=None, alias=False): - dct = {"fields": fields} - dct["alias"] = alias - if args is not None: - dct['args'] = args - type(name, (mn_arm,), dct) - - -def armtop(name, fields, args=None, alias=False): - dct = {"fields": fields} - dct["alias"] = alias - if args is not None: - dct['args'] = args - type(name, (mn_armt,), dct) - - -op_list = ['AND', 'EOR', 'SUB', 'RSB', 'ADD', 'ADC', 'SBC', 'RSC', - 'TST', 'TEQ', 'CMP', 'CMN', 'ORR', 'MOV', 'BIC', 'MVN'] -data_mov_name = {'MOV': 13, 'MVN': 15} -data_test_name = {'TST': 8, 'TEQ': 9, 'CMP': 10, 'CMN': 11} - -data_name = {} -for i, n in enumerate(op_list): - if n in list(data_mov_name) + list(data_test_name): - continue - data_name[n] = i -bs_data_name = bs_name(l=4, name=data_name) - -bs_data_mov_name = bs_name(l=4, name=data_mov_name) - -bs_data_test_name = bs_name(l=4, name=data_test_name) - - -transfer_name = {'STR': 0, 'LDR': 1} -bs_transfer_name = bs_name(l=1, name=transfer_name) - -transferh_name = {'STRH': 0, 'LDRH': 1} -bs_transferh_name = bs_name(l=1, name=transferh_name) - - -transfer_ldr_name = {'LDRD': 0, 'LDRSB': 1} -bs_transfer_ldr_name = bs_name(l=1, name=transfer_ldr_name) - -btransfer_name = {'STM': 0, 'LDM': 1} -bs_btransfer_name = bs_name(l=1, name=btransfer_name) - -ctransfer_name = {'STC': 0, 'LDC': 1} -bs_ctransfer_name = bs_name(l=1, name=ctransfer_name) - -mr_name = {'MCR': 0, 'MRC': 1} -bs_mr_name = bs_name(l=1, name=mr_name) - - -bs_addi = bs(l=1, fname="add_imm") -bs_rw = bs_mod_name(l=1, fname='rw', mn_mod=['W', '']) - -armop("mul", [bs('000000'), bs('0'), scc, rd, bs('0000'), rs, bs('1001'), rm], [rd, rm, rs]) -armop("umull", [bs('000010'), bs('0'), scc, rd, rdl, rs, bs('1001'), rm], [rdl, rd, rm, rs]) -armop("umlal", [bs('000010'), bs('1'), scc, rd, rdl, rs, bs('1001'), rm], [rdl, rd, rm, rs]) -armop("smull", [bs('000011'), bs('0'), scc, rd, rdl, rs, bs('1001'), rm], [rdl, rd, rm, rs]) -armop("smlal", [bs('000011'), bs('1'), scc, rd, rdl, rs, bs('1001'), rm], [rdl, rd, rm, rs]) -armop("mla", [bs('000000'), bs('1'), scc, rd, rn, rs, bs('1001'), rm], [rd, rm, rs, rn]) -armop("mrs", [bs('00010'), psr, bs('00'), psr_field, rd, bs('000000000000')], [rd, psr]) -armop("msr", [bs('00010'), psr, bs('10'), psr_field, bs('1111'), bs('0000'), bs('0000'), rm], [psr_field, rm]) -armop("data", [bs('00'), immop, bs_data_name, scc, rn, rd, op2], [rd, rn, op2]) -armop("data_mov", [bs('00'), immop, bs_data_mov_name, scc, bs('0000'), rd, op2], [rd, op2]) -armop("data_test", [bs('00'), immop, bs_data_test_name, dumscc, rn, dumr, op2]) -armop("b", [bs('101'), lnk, offs]) - -armop("smul", [bs('00010110'), rd, bs('0000'), rs, bs('1'), mul_y, mul_x, bs('0'), rm], [rd, rm, rs]) - -# TODO TEST -#armop("und", [bs('011'), imm20, bs('1'), imm4]) -armop("transfer", [bs('01'), immop, ppi, updown, trb, wback_no_t, bs_transfer_name, rn_noarg, rd, op2imm], [rd, op2imm]) -armop("transferh", [bs('000'), ppi, updown, immop, wback_no_t, bs_transferh_name, rn_noarg, rd, immedH, bs('1011'), immedL], [rd, immedL]) -armop("ldrd", [bs('000'), ppi, updown, immop, wback_no_t, bs_transfer_ldr_name, rn_noarg, rd, immedH, bs('1101'), immedL], [rd, immedL]) -armop("ldrsh", [bs('000'), ppi, updown, immop, wback_no_t, bs('1'), rn_noarg, rd, immedH, bs('1'), bs('1'), bs('1'), bs('1'), immedL], [rd, immedL]) -armop("strd", [bs('000'), ppi, updown, immop, wback_no_t, bs('0'), rn_noarg, rd, immedH, bs('1'), bs('1'), bs('1'), bs('1'), immedL], [rd, immedL]) -armop("btransfersp", [bs('100'), ppi_b_sp, updown_b_sp, sbit, wback_no_t, bs_btransfer_name, rn_sp, rlist]) -armop("btransfer", [bs('100'), ppi_b_nosp, updown_b_nosp, sbit, wback_no_t, bs_btransfer_name, rn_wb, rlist]) -# TODO: TEST -armop("swp", [bs('00010'), trb, bs('00'), rn, rd, bs('0000'), bs('1001'), rm]) -armop("svc", [bs('1111'), swi_i]) -armop("cdp", [bs('1110'), opc, crn, crd, cpnum, cp, bs('0'), crm], [cpnum, opc, crd, crn, crm, cp]) -armop("cdata", [bs('110'), ppi, updown, tl, wback_no_t, bs_ctransfer_name, rn_noarg, crd, cpnum, imm8_12], [cpnum, crd, imm8_12]) -armop("mr", [bs('1110'), cpopc, bs_mr_name, crn, rd, cpnum, cp, bs('1'), crm], [cpnum, cpopc, rd, crn, crm, cp]) -armop("bkpt", [bs('00010010'), imm12_noarg, bs('0111'), imm_12_4]) -armop("bx", [bs('000100101111111111110001'), rn]) -armop("mov", [bs('00110000'), imm4_noarg, rd, imm_4_12], [rd, imm_4_12]) -armop("movt", [bs('00110100'), imm4_noarg, rd, imm_4_12], [rd, imm_4_12]) -armop("blx", [bs('00010010'), bs('1111'), bs('1111'), bs('1111'), bs('0011'), rm], [rm]) -armop("blx", [fix_cond, bs('101'), lowb, offs_blx], [offs_blx]) -armop("clz", [bs('00010110'), bs('1111'), rd, bs('1111'), bs('0001'), rm], [rd, rm]) -armop("qadd", [bs('00010000'), rn, rd, bs('0000'), bs('0101'), rm], [rd, rm, rn]) - -armop("uxtb", [bs('01101110'), bs('1111'), rd, rot_rm, bs('00'), bs('0111'), rm_noarg]) -armop("uxth", [bs('01101111'), bs('1111'), rd, rot_rm, bs('00'), bs('0111'), rm_noarg]) -armop("sxtb", [bs('01101010'), bs('1111'), rd, rot_rm, bs('00'), bs('0111'), rm_noarg]) -armop("sxth", [bs('01101011'), bs('1111'), rd, rot_rm, bs('00'), bs('0111'), rm_noarg]) - -armop("rev", [bs('01101011'), bs('1111'), rd, bs('1111'), bs('0011'), rm]) -armop("rev16", [bs('01101011'), bs('1111'), rd, bs('1111'), bs('1011'), rm]) - -armop("pld", [bs8(0xF5), bs_addi, bs_rw, bs('01'), mem_rn_imm, bs('1111'), imm12_off]) - -armop("isb", [bs8(0xF5), bs8(0x7F), bs8(0xF0), bs8(0x6F)]) -armop("nop", [bs8(0xE3), bs8(0x20), bs8(0xF0), bs8(0)]) - -class arm_widthm1(arm_imm, m_arg): - def decode(self, v): - self.expr = ExprInt(v+1, 32) - return True - - def encode(self): - if not isinstance(self.expr, ExprInt): - return False - v = int(self.expr) + -1 - if v > self.lmask: - return False - self.value = v - return True - - -class arm_rm_rot2(arm_arg): - parser = rot2_expr - def decode(self, v): - expr = gpregs.expr[v] - shift_value = self.parent.rot2.value - if shift_value: - expr = ExprOp(allshifts[3], expr, ExprInt(shift_value * 8, 32)) - self.expr = expr - return True - def encode(self): - if self.expr in gpregs.expr: - self.value = gpregs.expr.index(self.expr) - self.parent.rot2.value = 0 - elif (isinstance(self.expr, ExprOp) and - self.expr.op == allshifts[3]): - reg, value = self.expr.args - if reg not in gpregs.expr: - return False - self.value = gpregs.expr.index(reg) - if not isinstance(value, ExprInt): - return False - value = int(value) - if not value in [8, 16, 24]: - return False - self.parent.rot2.value = value // 8 - return True - -class arm_gpreg_nopc(reg_noarg): - reg_info = gpregs_nopc - parser = reg_info.parser - - - def decode(self, v): - ret = super(arm_gpreg_nopc, self).decode(v) - if ret is False: - return False - if self.expr == reg_dum: - return False - return True - - -class arm_gpreg_nosp(reg_noarg): - reg_info = gpregs_nosp - parser = reg_info.parser - - def decode(self, v): - ret = super(arm_gpreg_nosp, self).decode(v) - if ret is False: - return False - if self.expr == reg_dum: - return False - return True - - -rm_rot2 = bs(l=4, cls=(arm_rm_rot2,), fname="rm") -rot2 = bs(l=2, fname="rot2") - -widthm1 = bs(l=5, cls=(arm_widthm1, m_arg)) -lsb = bs(l=5, cls=(arm_imm, m_arg)) - -rd_nopc = bs(l=4, cls=(arm_gpreg_nopc, arm_arg), fname="rd") -rn_nopc = bs(l=4, cls=(arm_gpreg_nopc, arm_arg), fname="rn") -ra_nopc = bs(l=4, cls=(arm_gpreg_nopc, arm_arg), fname="ra") -rt_nopc = bs(l=4, cls=(arm_gpreg_nopc, arm_arg), fname="rt") - -rn_nosp = bs(l=4, cls=(arm_gpreg_nosp, arm_arg), fname="rn") - -rn_nopc_noarg = bs(l=4, cls=(arm_gpreg_nopc,), fname="rn") - -armop("ubfx", [bs('0111111'), widthm1, rd, lsb, bs('101'), rn], [rd, rn, lsb, widthm1]) - -armop("bfc", [bs('0111110'), widthm1, rd, lsb, bs('001'), bs('1111')], [rd, lsb, widthm1]) - -armop("uxtab", [bs('01101110'), rn_nopc, rd, rot2, bs('000111'), rm_rot2], [rd, rn_nopc, rm_rot2]) - - - -# -# thumnb ####################### -# -# ARM7-TDMI-manual-pt3 -gpregs_l = reg_info(regs_str[:8], regs_expr[:8]) -gpregs_h = reg_info(regs_str[8:], regs_expr[8:]) - -gpregs_sppc = reg_info(regs_str[-1:] + regs_str[13:14], - regs_expr[-1:] + regs_expr[13:14]) - -deref_reg_imm = Group(LBRACK + gpregs.parser + Optional( - COMMA + shift_off) + RBRACK).setParseAction(cb_deref_pre_mem) -deref_low = Group(LBRACK + gpregs_l.parser + Optional( - COMMA + shift_off) + RBRACK).setParseAction(cb_deref_pre_mem) -deref_pc = Group(LBRACK + gpregs_pc.parser + Optional( - COMMA + shift_off) + RBRACK).setParseAction(cb_deref_pre_mem) -deref_sp = Group(LBRACK + gpregs_sp.parser + COMMA + - shift_off + RBRACK).setParseAction(cb_deref_pre_mem) - -gpregs_l_wb = Group( - gpregs_l.parser + Optional('!')).setParseAction(cb_gpreb_wb) - - -gpregs_l_13 = reg_info(regs_str[:13], regs_expr[:13]) - - -class arm_offreg(arm_arg): - parser = deref_pc - - def decodeval(self, v): - return v - - def encodeval(self, v): - return v - - def decode(self, v): - v = v & self.lmask - v = self.decodeval(v) - if v: - self.expr = self.off_reg + ExprInt(v, 32) - else: - self.expr = self.off_reg - - e = self.expr - if isinstance(e, ExprOp) and e.op == 'wback': - self.parent.wback.value = 1 - e = e.args[0] - return True - - def encode(self): - e = self.expr - if not (isinstance(e, ExprOp) and e.op == "preinc"): - log.debug('cannot encode %r', e) - return False - if e.args[0] != self.off_reg: - log.debug('cannot encode reg %r', e.args[0]) - return False - v = int(e.args[1]) - v = self.encodeval(v) - self.value = v - return True - - -class arm_offpc(arm_offreg): - off_reg = regs_expr[15] - - def decode(self, v): - v = v & self.lmask - v <<= 2 - if v: - self.expr = ExprMem(self.off_reg + ExprInt(v, 32), 32) - else: - self.expr = ExprMem(self.off_reg, 32) - - e = self.expr.ptr - if isinstance(e, ExprOp) and e.op == 'wback': - self.parent.wback.value = 1 - e = e.args[0] - return True - - def encode(self): - e = self.expr - if not isinstance(e, ExprMem): - return False - e = e.ptr - if not (isinstance(e, ExprOp) and e.op == "preinc"): - log.debug('cannot encode %r', e) - return False - if e.args[0] != self.off_reg: - log.debug('cannot encode reg %r', e.args[0]) - return False - v = int(e.args[1]) - if v & 3: - return False - v >>= 2 - if v > self.lmask: - return False - self.value = v - return True - - - - -class arm_offsp(arm_offpc): - parser = deref_sp - off_reg = regs_expr[13] - - -class arm_offspc(arm_offs): - - def decodeval(self, v): - v = v << 1 - # Add pipeline offset - v += 2 + 2 - return v - - def encodeval(self, v): - # Remove pipeline offset - v -= 2 + 2 - if v % 2 != 0: - return False - if v > (1 << (self.l - 1)) - 1: - return False - return v >> 1 - - -class arm_off8sppc(arm_imm): - - def decodeval(self, v): - return v << 2 - - def encodeval(self, v): - return v >> 2 - - -class arm_off7(arm_imm): - - def decodeval(self, v): - return v << 2 - - def encodeval(self, v): - return v >> 2 - -class arm_deref_reg_imm(arm_arg): - parser = deref_reg_imm - - def decode(self, v): - v = v & self.lmask - rbase = regs_expr[v] - e = ExprOp('preinc', rbase, self.parent.off.expr) - self.expr = ExprMem(e, 32) - return True - - def encode(self): - self.parent.off.expr = None - e = self.expr - if not isinstance(e, ExprMem): - return False - e = e.ptr - if not (isinstance(e, ExprOp) and e.op == 'preinc'): - log.debug('cannot encode %r', e) - return False - off = e.args[1] - if isinstance(off, ExprId): - self.parent.off.expr = off - elif isinstance(off, ExprInt): - self.parent.off.expr = off - else: - log.debug('cannot encode off %r', off) - return False - self.value = gpregs.expr.index(e.args[0]) - if self.value >= 1 << self.l: - log.debug('cannot encode reg %r', off) - return False - return True - -class arm_derefl(arm_deref_reg_imm): - parser = deref_low - - -class arm_offbw(imm_noarg): - - def decode(self, v): - v = v & self.lmask - if self.parent.trb.value == 0: - v <<= 2 - self.expr = ExprInt(v, 32) - return True - - def encode(self): - if not isinstance(self.expr, ExprInt): - return False - v = int(self.expr) - if self.parent.trb.value == 0: - if v & 3: - log.debug('off must be aligned %r', v) - return False - v >>= 2 - if v > self.lmask: - return False - self.value = v - return True - - - -class arm_off(imm_noarg): - - def decode(self, v): - v = v & self.lmask - self.expr = ExprInt(v, 32) - return True - - def encode(self): - if not isinstance(self.expr, ExprInt): - return False - v = int(self.expr) - if v > self.lmask: - return False - self.value = v - return True - - -class arm_offh(imm_noarg): - - def decode(self, v): - v = v & self.lmask - v <<= 1 - self.expr = ExprInt(v, 32) - return True - - def encode(self): - if not isinstance(self.expr, ExprInt): - return False - v = int(self.expr) - if v & 1: - log.debug('off must be aligned %r', v) - return False - v >>= 1 - if v > self.lmask: - return False - self.value = v - return True - - -class armt_rlist(arm_arg): - parser = gpreg_list - - def encode(self): - e = self.expr - rlist = [gpregs_l.expr.index(x) for x in e.args] - v = 0 - for r in rlist: - v |= 1 << r - self.value = v - return True - - def decode(self, v): - v = v & self.lmask - out = [] - for i in range(0x10): - if 1 << i & v: - out.append(gpregs.expr[i]) - if not out: - return False - e = ExprOp('reglist', *out) - self.expr = e - return True - - -class armt_rlist13(armt_rlist): - parser = gpreg_list - - def encode(self): - e = self.expr - rlist = [] - reg_l = list(e.args) - - self.parent.pclr.value = 0 - if self.parent.name.startswith('PUSH'): - if regs_expr[14] in reg_l: - reg_l.remove(regs_expr[14]) - self.parent.pclr.value = 1 - else: - if regs_expr[15] in reg_l: - reg_l.remove(regs_expr[15]) - self.parent.pclr.value = 1 - - for reg in reg_l: - if reg not in gpregs_l_13.expr: - return False - rlist.append(gpregs_l_13.expr.index(reg)) - v = 0 - for r in rlist: - v |= 1 << r - self.value = v - return True - - def decode(self, v): - v = v & self.lmask - out = [] - for i in range(13): - if 1 << i & v: - out.append(gpregs_l_13.expr[i]) - - if self.parent.pclr.value == 1: - if self.parent.name.startswith("PUSH"): - out += [regs_expr[14]] - else: - out += [regs_expr[15]] - - if not out: - return False - e = ExprOp('reglist', *out) - self.expr = e - return True - - - -class armt_rlist13_pc_lr(armt_rlist): - parser = gpreg_list - - def encode(self): - e = self.expr - rlist = [] - reg_l = list(e.args) - - self.parent.pc_in.value = 0 - self.parent.lr_in.value = 0 - if regs_expr[14] in reg_l: - reg_l.remove(regs_expr[14]) - self.parent.lr_in.value = 1 - if regs_expr[15] in reg_l: - reg_l.remove(regs_expr[15]) - self.parent.pc_in.value = 1 - - for reg in reg_l: - if reg not in gpregs_l_13.expr: - return False - rlist.append(gpregs_l_13.expr.index(reg)) - v = 0 - for r in rlist: - v |= 1 << r - self.value = v - return True - - def decode(self, v): - v = v & self.lmask - out = [] - for i in range(13): - if 1 << i & v: - out.append(gpregs_l_13.expr[i]) - - if self.parent.lr_in.value == 1: - out += [regs_expr[14]] - if self.parent.pc_in.value == 1: - out += [regs_expr[15]] - - if not out: - return False - e = ExprOp('reglist', *out) - self.expr = e - return True - - - -class armt_rlist_pclr(armt_rlist): - - def encode(self): - e = self.expr - reg_l = list(e.args) - self.parent.pclr.value = 0 - if self.parent.pp.value == 0: - if regs_expr[14] in reg_l: - reg_l.remove(regs_expr[14]) - self.parent.pclr.value = 1 - else: - if regs_expr[15] in reg_l: - reg_l.remove(regs_expr[15]) - self.parent.pclr.value = 1 - rlist = [gpregs.expr.index(x) for x in reg_l] - v = 0 - for r in rlist: - v |= 1 << r - if v > self.lmask: - return False - self.value = v - return True - - def decode(self, v): - v = v & self.lmask - out = [] - for i in range(0x10): - if 1 << i & v: - out.append(gpregs.expr[i]) - - if self.parent.pclr.value == 1: - if self.parent.pp.value == 0: - out += [regs_expr[14]] - else: - out += [regs_expr[15]] - if not out: - return False - e = ExprOp('reglist', *out) - self.expr = e - return True - - -class armt_reg_wb(arm_reg_wb): - reg_info = gpregs_l - parser = gpregs_l_wb - - def decode(self, v): - v = v & self.lmask - e = self.reg_info.expr[v] - if not e in self.parent.trlist.expr.args: - e = ExprOp('wback', e) - self.expr = e - return True - - def encode(self): - e = self.expr - if isinstance(e, ExprOp): - if e.op != 'wback': - return False - e = e.args[0] - self.value = self.reg_info.expr.index(e) - return True - - -class arm_gpreg_l(arm_reg): - reg_info = gpregs_l - parser = reg_info.parser - - -class arm_gpreg_h(arm_reg): - reg_info = gpregs_h - parser = reg_info.parser - - -class arm_gpreg_l_noarg(arm_gpreg_noarg): - reg_info = gpregs_l - parser = reg_info.parser - - -class arm_sppc(arm_reg): - reg_info = gpregs_sppc - parser = reg_info.parser - - -class arm_sp(arm_reg): - reg_info = gpregs_sp - parser = reg_info.parser - - -off5 = bs(l=5, cls=(arm_imm,), fname="off") -off3 = bs(l=3, cls=(arm_imm,), fname="off") -off8 = bs(l=8, cls=(arm_imm,), fname="off") -off7 = bs(l=7, cls=(arm_off7,), fname="off") - -rdl = bs(l=3, cls=(arm_gpreg_l,), fname="rd") -rnl = bs(l=3, cls=(arm_gpreg_l,), fname="rn") -rsl = bs(l=3, cls=(arm_gpreg_l,), fname="rs") -rml = bs(l=3, cls=(arm_gpreg_l,), fname="rm") -rol = bs(l=3, cls=(arm_gpreg_l,), fname="ro") -rbl = bs(l=3, cls=(arm_gpreg_l,), fname="rb") -rbl_deref = bs(l=3, cls=(arm_derefl,), fname="rb") -dumrh = bs(l=3, default_val="000") - -rdh = bs(l=3, cls=(arm_gpreg_h,), fname="rd") -rsh = bs(l=3, cls=(arm_gpreg_h,), fname="rs") - -offpc8 = bs(l=8, cls=(arm_offpc,), fname="offs") -offsp8 = bs(l=8, cls=(arm_offsp,), fname="offs") -rol_noarg = bs(l=3, cls=(arm_gpreg_l_noarg,), fname="off") - -off5bw = bs(l=5, cls=(arm_offbw,), fname="off") -off5h = bs(l=5, cls=(arm_offh,), fname="off") -sppc = bs(l=1, cls=(arm_sppc,)) - -off12 = bs(l=12, cls=(arm_off,), fname="off", order=-1) -rn_deref = bs(l=4, cls=(arm_deref_reg_imm,), fname="rt") - - - -pclr = bs(l=1, fname='pclr', order=-2) - - -pc_in = bs(l=1, fname='pc_in', order=-2) -lr_in = bs(l=1, fname='lr_in', order=-2) - - -sp = bs(l=0, cls=(arm_sp,)) - - -off8s = bs(l=8, cls=(arm_offs,), fname="offs") -trlistpclr = bs(l=8, cls=(armt_rlist_pclr,)) -trlist = bs(l=8, cls=(armt_rlist,), fname="trlist", order = -1) -trlist13 = bs(l=13, cls=(armt_rlist13,), fname="trlist", order = -1) -trlist13pclr = bs(l=13, cls=(armt_rlist13_pc_lr,), fname="trlist", order = -1) - - -rbl_wb = bs(l=3, cls=(armt_reg_wb,), fname='rb') - -offs8 = bs(l=8, cls=(arm_offspc,), fname="offs") -offs11 = bs(l=11, cls=(arm_offspc,), fname="offs") - -hl = bs(l=1, prio=default_prio + 1, fname='hl') -off8sppc = bs(l=8, cls=(arm_off8sppc,), fname="off") - -imm8_d1 = bs(l=8, default_val="00000001") -imm8 = bs(l=8, cls=(arm_imm,), default_val = "00000001") - - -mshift_name = {'LSLS': 0, 'LSRS': 1, 'ASRS': 2} -bs_mshift_name = bs_name(l=2, name=mshift_name) - - -addsub_name = {'ADDS': 0, 'SUBS': 1} -bs_addsub_name = bs_name(l=1, name=addsub_name) - -mov_cmp_add_sub_name = {'MOVS': 0, 'CMP': 1, 'ADDS': 2, 'SUBS': 3} -bs_mov_cmp_add_sub_name = bs_name(l=2, name=mov_cmp_add_sub_name) - -alu_name = {'ANDS': 0, 'EORS': 1, 'LSLS': 2, 'LSRS': 3, - 'ASRS': 4, 'ADCS': 5, 'SBCS': 6, 'RORS': 7, - 'TST': 8, 'NEGS': 9, 'CMP': 10, 'CMN': 11, - 'ORRS': 12, 'MULS': 13, 'BICS': 14, 'MVNS': 15} -bs_alu_name = bs_name(l=4, name=alu_name) - -hiregop_name = {'ADDS': 0, 'CMP': 1, 'MOV': 2} -bs_hiregop_name = bs_name(l=2, name=hiregop_name) - -ldr_str_name = {'STR': 0, 'LDR': 1} -bs_ldr_str_name = bs_name(l=1, name=ldr_str_name) - -ldrh_strh_name = {'STRH': 0, 'LDRH': 1} -bs_ldrh_strh_name = bs_name(l=1, name=ldrh_strh_name) - -ldstsp_name = {'STR': 0, 'LDR': 1} -bs_ldstsp_name = bs_name(l=1, name=ldstsp_name) - -addsubsp_name = {'ADD': 0, 'SUB': 1} -bs_addsubsp_name = bs_name(l=1, name=addsubsp_name) - -pushpop_name = {'PUSH': 0, 'POP': 1} -bs_pushpop_name = bs_name(l=1, name=pushpop_name, fname='pp') - -tbtransfer_name = {'STMIA': 0, 'LDMIA': 1} -bs_tbtransfer_name = bs_name(l=1, name=tbtransfer_name) - -br_name = {'BEQ': 0, 'BNE': 1, 'BCS': 2, 'BCC': 3, 'BMI': 4, - 'BPL': 5, 'BVS': 6, 'BVC': 7, 'BHI': 8, 'BLS': 9, - 'BGE': 10, 'BLT': 11, 'BGT': 12, 'BLE': 13} -bs_br_name = bs_name(l=4, name=br_name) - - -armtop("mshift", [bs('000'), bs_mshift_name, off5, rsl, rdl], [rdl, rsl, off5]) -armtop("addsubr", [bs('000110'), bs_addsub_name, rnl, rsl, rdl], [rdl, rsl, rnl]) -armtop("addsubi", [bs('000111'), bs_addsub_name, off3, rsl, rdl], [rdl, rsl, off3]) -armtop("mcas", [bs('001'), bs_mov_cmp_add_sub_name, rnl, off8]) -armtop("alu", [bs('010000'), bs_alu_name, rsl, rdl], [rdl, rsl]) - # should not be used ?? -armtop("hiregop00", [bs('010001'), bs_hiregop_name, bs('00'), rsl, rdl], [rdl, rsl]) -armtop("hiregop01", [bs('010001'), bs_hiregop_name, bs('01'), rsh, rdl], [rdl, rsh]) -armtop("hiregop10", [bs('010001'), bs_hiregop_name, bs('10'), rsl, rdh], [rdh, rsl]) -armtop("hiregop11", [bs('010001'), bs_hiregop_name, bs('11'), rsh, rdh], [rdh, rsh]) -armtop("bx", [bs('010001'), bs('11'), bs('00'), rsl, dumrh]) -armtop("bx", [bs('010001'), bs('11'), bs('01'), rsh, dumrh]) -armtop("ldr", [bs('01001'), rdl, offpc8]) -armtop("ldrstr", [bs('0101'), bs_ldr_str_name, trb, bs('0'), rol_noarg, rbl_deref, rdl], [rdl, rbl_deref]) -armtop("strh", [bs('0101'), bs('00'), bs('1'), rol_noarg, rbl_deref, rdl], [rdl, rbl_deref]) -armtop("ldrh", [bs('0101'), bs('10'), bs('1'), rol_noarg, rbl_deref, rdl], [rdl, rbl_deref]) -armtop("ldsb", [bs('0101'), bs('01'), bs('1'), rol_noarg, rbl_deref, rdl], [rdl, rbl_deref]) -armtop("ldsh", [bs('0101'), bs('11'), bs('1'), rol_noarg, rbl_deref, rdl], [rdl, rbl_deref]) -armtop("ldst", [bs('011'), trb, bs_ldr_str_name, off5bw, rbl_deref, rdl], [rdl, rbl_deref]) -armtop("ldhsth", [bs('1000'), bs_ldrh_strh_name, off5h, rbl_deref, rdl], [rdl, rbl_deref]) -armtop("ldstsp", [bs('1001'), bs_ldstsp_name, rdl, offsp8], [rdl, offsp8]) -armtop("add", [bs('1010'), sppc, rdl, off8sppc], [rdl, sppc, off8sppc]) -armtop("addsp", [bs('10110000'), bs_addsubsp_name, sp, off7], [sp, off7]) -armtop("pushpop", [bs('1011'), bs_pushpop_name, bs('10'), pclr, trlistpclr], [trlistpclr]) -armtop("btransfersp", [bs('1100'), bs_tbtransfer_name, rbl_wb, trlist]) -armtop("br", [bs('1101'), bs_br_name, offs8]) -armtop("blx", [bs("01000111"), bs('1'), rm, bs('000')]) -armtop("svc", [bs('11011111'), imm8]) -armtop("b", [bs('11100'), offs11]) -armtop("und", [bs('1101'), bs('1110'), imm8_d1]) - -armtop("rev", [bs('10111010'), bs('00'), rsl, rdl], [rdl, rsl]) -armtop("rev16", [bs('10111010'), bs('01'), rsl, rdl], [rdl, rsl]) - -armtop("uxtb", [bs('10110010'), bs('11'), rml, rdl], [rdl, rml]) -armtop("uxth", [bs('10110010'), bs('10'), rml, rdl], [rdl, rml]) -armtop("sxtb", [bs('10110010'), bs('01'), rml, rdl], [rdl, rml]) -armtop("sxth", [bs('10110010'), bs('00'), rml, rdl], [rdl, rml]) - -armtop("uxtab", [bs('111110100'), bs('101'), rn_nopc, bs('1111'), rd, bs('10'), rot2, rm_rot2], [rd, rn_nopc, rm_rot2]) -armtop("uxtah", [bs('111110100'), bs('001'), rn_nopc, bs('1111'), rd, bs('10'), rot2, rm_rot2], [rd, rn_nopc, rm_rot2]) - -# thumb2 ###################### -# -# ARM Architecture Reference Manual Thumb-2 Supplement - -armt_gpreg_shift_off = (gpregs_nosppc.parser + allshifts_t_armt + (gpregs.parser | int_1_31)).setParseAction(cb_shift) - - -armt_gpreg_shift_off |= gpregs_nosppc.parser - - -class arm_gpreg_nosppc(arm_reg): - reg_info = gpregs_nosppc - parser = reg_info.parser - - def decode(self, v): - ret = super(arm_gpreg_nosppc, self).decode(v) - if ret is False: - return False - if self.expr == reg_dum: - return False - return True - - -class armt_gpreg_rm_shift_off(arm_reg): - parser = armt_gpreg_shift_off - - def decode(self, v): - v = v & self.lmask - if v >= len(gpregs_nosppc.expr): - return False - r = gpregs_nosppc.expr[v] - if r == reg_dum: - return False - - i = int(self.parent.imm5_3.value) << 2 - i |= int(self.parent.imm5_2.value) - - if self.parent.stype.value < 3 or i != 0: - shift = allshifts_armt[self.parent.stype.value] - else: - shift = allshifts_armt[4] - self.expr = ExprOp(shift, r, ExprInt(i, 32)) - return True - - def encode(self): - e = self.expr - if isinstance(e, ExprId): - if e not in gpregs_nosppc.expr: - return False - self.value = gpregs_nosppc.expr.index(e) - self.parent.stype.value = 0 - self.parent.imm5_3.value = 0 - self.parent.imm5_2.value = 0 - return True - if not e.is_op(): - return False - shift = e.op - r = gpregs_nosppc.expr.index(e.args[0]) - self.value = r - i = int(e.args[1]) - if shift == 'rrx': - if i != 1: - log.debug('rrx shift must be 1') - return False - self.parent.imm5_3.value = 0 - self.parent.imm5_2.value = 0 - self.parent.stype.value = 3 - return True - self.parent.stype.value = allshifts_armt.index(shift) - self.parent.imm5_2.value = i & 3 - self.parent.imm5_3.value = i >> 2 - return True - -rn_nosppc = bs(l=4, cls=(arm_gpreg_nosppc,), fname="rn") -rd_nosppc = bs(l=4, cls=(arm_gpreg_nosppc,), fname="rd") -rm_sh = bs(l=4, cls=(armt_gpreg_rm_shift_off,), fname="rm") - - -class armt2_imm12(arm_imm): - - def decode(self, v): - v = v & self.lmask - v |= int(self.parent.imm12_3.value) << 8 - v |= int(self.parent.imm12_1.value) << 11 - - # simple encoding - if 0 <= v < 0x100: - self.expr = ExprInt(v, 32) - return True - # 00XY00XY form - if v >> 8 == 1: - v &= 0xFF - self.expr = ExprInt((v << 16) | v, 32) - return True - # XY00XY00 form - if v >> 8 == 2: - v &= 0xFF - self.expr = ExprInt((v << 24) | (v << 8), 32) - return True - # XYXYXYXY - if v >> 8 == 3: - v &= 0xFF - self.expr = ExprInt((v << 24) | (v << 16) | (v << 8) | v, 32) - return True - r = v >> 7 - v = 0x80 | (v & 0x7F) - self.expr = ExprInt(myror32(v, r), 32) - return True - - def encode(self): - if not self.expr.is_int(): - return False - v = int(self.expr) - value = None - # simple encoding - if 0 <= v < 0x100: - value = v - elif v & 0xFF00FF00 == 0 and v & 0xFF == (v >> 16) & 0xff: - # 00XY00XY form - value = (1 << 8) | (v & 0xFF) - elif v & 0x00FF00FF == 0 and (v >> 8) & 0xff == (v >> 24) & 0xff: - # XY00XY00 form - value = (2 << 8) | ((v >> 8) & 0xff) - elif (v & 0xFF == - (v >> 8) & 0xFF == - (v >> 16) & 0xFF == - (v >> 24) & 0xFF): - # XYXYXYXY form - value = (3 << 8) | ((v >> 16) & 0xff) - else: - # rol encoding - for i in range(32): - o = myrol32(v, i) - if 0x80 <= o <= 0xFF: - value = (i << 7) | (o & 0x7F) - break - if value is None: - log.debug('cannot encode imm12') - return False - self.value = value & self.lmask - self.parent.imm12_3.value = (value >> 8) & self.parent.imm12_3.lmask - self.parent.imm12_1.value = (value >> 11) & self.parent.imm12_1.lmask - return True - - - - -class armt4_imm12(arm_imm): - - def decode(self, v): - v = v & self.lmask - v |= int(self.parent.imm12_3.value) << 8 - v |= int(self.parent.imm12_1.value) << 11 - self.expr = ExprInt(v, 32) - return True - - def encode(self): - if not self.expr.is_int(): - return False - value = int(self.expr) - if value >= (1 << 16): - return False - self.value = value & self.lmask - self.parent.imm12_3.value = (value >> 8) & self.parent.imm12_3.lmask - self.parent.imm12_1.value = (value >> 11) & self.parent.imm12_1.lmask - return True - - - -class armt2_imm16(arm_imm): - - def decode(self, v): - v = v & self.lmask - v |= int(self.parent.imm16_3.value) << 8 - v |= int(self.parent.imm16_1.value) << 11 - v |= int(self.parent.imm16_4.value) << 12 - self.expr = ExprInt(v, 32) - return True - - def encode(self): - if not self.expr.is_int(): - return False - value = int(self.expr) - if value >= (1 << 16): - return False - self.value = value & self.lmask - self.parent.imm16_3.value = (value >> 8) & self.parent.imm16_3.lmask - self.parent.imm16_1.value = (value >> 11) & self.parent.imm16_1.lmask - self.parent.imm16_4.value = (value >> 12) & self.parent.imm16_4.lmask - return True - - -class armt2_lsb5(arm_imm): - - def decode(self, v): - v = v & self.lmask - v |= int(self.parent.lsb5_3.value) << 2 - self.expr = ExprInt(v, 32) - return True - - def encode(self): - if not self.expr.is_int(): - return False - value = int(self.expr) - self.value = value & self.lmask - self.parent.lsb5_3.value = (value >> 2) & self.parent.lsb5_3.lmask - return True - - -class armt_widthm1(arm_imm): - parser = base_expr - - def decodeval(self, v): - return v + 1 - - def encodeval(self, v): - if v <= 0: - return False - return v - 1 - - - - -class armt2_off20(arm_imm): - - def decode(self, v): - v = v & self.lmask - v <<= 1 - v |= int(self.parent.off20_6.value) << 12 - v |= int(self.parent.off20_j1.value) << 18 - v |= int(self.parent.off20_j2.value) << 19 - v |= int(self.parent.off20_s.value) << 20 - self.expr = ExprInt(v, 32) - return True - - def encode(self): - if not self.expr.is_int(): - return False - value = int(self.expr) - if value & 1: - return False - self.value = (value >> 1) & self.lmask - self.parent.off20_6.value = (value >> 12) & self.parent.off20_6.lmask - self.parent.off20_j1.value = (value >> 18) & self.parent.off20_j1.lmask - self.parent.off20_j2.value = (value >> 19) & self.parent.off20_j2.lmask - self.parent.off20_s.value = (value >> 20) & self.parent.off20_s.lmask - return True - - - -class armt2_imm10l(arm_imm): - - def decode(self, v): - v = v & self.lmask - s = self.parent.sign.value - j1 = self.parent.j1.value - j2 = self.parent.j2.value - imm10h = self.parent.imm10h.value - imm10l = v - - i1, i2 = j1 ^ s ^ 1, j2 ^ s ^ 1 - - v = (s << 24) | (i1 << 23) | (i2 << 22) | (imm10h << 12) | (imm10l << 2) - v = sign_ext(v, 25, 32) - self.expr = ExprInt(v, 32) - return True - - def encode(self): - if not isinstance(self.expr, ExprInt): - return False - v = self.expr.arg.arg - s = 0 - if v & 0x80000000: - s = 1 - v &= (1<<26) - 1 - if v >= (1 << 26): - return False - i1, i2, imm10h, imm10l = (v >> 23) & 1, (v >> 22) & 1, (v >> 12) & 0x3ff, (v >> 2) & 0x3ff - j1, j2 = i1 ^ s ^ 1, i2 ^ s ^ 1 - self.parent.sign.value = s - self.parent.j1.value = j1 - self.parent.j2.value = j2 - self.parent.imm10h.value = imm10h - self.value = imm10l - return True - - -class armt2_imm11l(arm_imm): - - def decode(self, v): - v = v & self.lmask - s = self.parent.sign.value - j1 = self.parent.j1.value - j2 = self.parent.j2.value - imm10h = self.parent.imm10h.value - imm11l = v - - i1, i2 = j1 ^ s ^ 1, j2 ^ s ^ 1 - - v = (s << 24) | (i1 << 23) | (i2 << 22) | (imm10h << 12) | (imm11l << 1) - v = sign_ext(v, 25, 32) - self.expr = ExprInt(v + 4, 32) - return True - - def encode(self): - if not isinstance(self.expr, ExprInt): - return False - v = self.expr.arg.arg - 4 - s = 0 - if v & 0x80000000: - s = 1 - v &= (1<<26) - 1 - if v >= (1 << 26): - return False - if v & 1: - return False - i1, i2, imm10h, imm11l = (v >> 23) & 1, (v >> 22) & 1, (v >> 12) & 0x3ff, (v >> 1) & 0x7ff - j1, j2 = i1 ^ s ^ 1, i2 ^ s ^ 1 - self.parent.sign.value = s - self.parent.j1.value = j1 - self.parent.j2.value = j2 - self.parent.imm10h.value = imm10h - self.value = imm11l - return True - - - -class armt2_imm6_11l(arm_imm): - - def decode(self, v): - v = v & self.lmask - s = self.parent.sign.value - j1 = self.parent.j1.value - j2 = self.parent.j2.value - imm6h = self.parent.imm6h.value - imm11l = v - - v = (s << 20) | (j2 << 19) | (j1 << 18) | (imm6h << 12) | (imm11l << 1) - v = sign_ext(v, 21, 32) - self.expr = ExprInt(v + 4, 32) - return True - - def encode(self): - if not isinstance(self.expr, ExprInt): - return False - v = self.expr.arg.arg - 4 - s = 0 - if v != sign_ext(v & ((1 << 22) - 1), 21, 32): - return False - if v & 0x80000000: - s = 1 - v &= (1<<22) - 1 - if v & 1: - return False - i2, i1, imm6h, imm11l = (v >> 19) & 1, (v >> 18) & 1, (v >> 12) & 0x3f, (v >> 1) & 0x7ff - self.parent.sign.value = s - self.parent.j1.value = i1 - self.parent.j2.value = i2 - self.parent.imm6h.value = imm6h - self.value = imm11l - return True - - - -imm12_1 = bs(l=1, fname="imm12_1", order=1) -imm12_3 = bs(l=3, fname="imm12_3", order=1) -imm12_8 = bs(l=8, cls=(armt2_imm12,), fname="imm", order=2) - - -imm12_8_t4 = bs(l=8, cls=(armt4_imm12,), fname="imm", order=2) - - -imm16_1 = bs(l=1, fname="imm16_1", order=1) -imm16_3 = bs(l=3, fname="imm16_3", order=1) -imm16_4 = bs(l=4, fname="imm16_4", order=1) -imm16_8 = bs(l=8, cls=(armt2_imm16,), fname="imm", order=2) - - -imm5_3 = bs(l=3, fname="imm5_3") -imm5_2 = bs(l=2, fname="imm5_2") -imm_stype = bs(l=2, fname="stype") - -imm_stype_00 = bs('00', fname="stype") -imm_stype_01 = bs('01', fname="stype") -imm_stype_11 = bs('11', fname="stype") - - -imm1 = bs(l=1, fname="imm1") - - - -off20_6 = bs(l=6, fname="off20_6", order=1) -off20_11 = bs(l=11, cls=(armt2_off20,), fname="imm", order=2) - - - -lsb5_3 = bs(l=3, fname="lsb5_3", order=1) -lsb5_2 = bs(l=2, cls=(armt2_lsb5,), fname="imm", order=2) - -widthm1 = bs(l=5, cls=(armt_widthm1,), fname="imm", order=2) - - - -class armt_imm5_1(arm_imm): - - def decode(self, v): - v = ((self.parent.imm1.value << 5) | v) << 1 - self.expr = ExprInt(v, 32) - return True - - def encode(self): - if not isinstance(self.expr, ExprInt): - return False - v = self.expr.arg.arg - if v & 0x1: - return False - self.parent.imm1.value = (v >> 6) & 1 - self.value = (v >> 1) & 0x1f - return True - -aif_str = ["X", "F", "I", "IF", "A", "AF", "AI", "AIF"] -aif_expr = [ExprId(x, 32) if x != None else None for x in aif_str] - -aif_reg = reg_info(aif_str, aif_expr) - -class armt_aif(reg_noarg, arm_arg): - reg_info = aif_reg - parser = reg_info.parser - - def decode(self, v): - if v == 0: - return False - return super(armt_aif, self).decode(v) - - def encode(self): - ret = super(armt_aif, self).encode() - if not ret: - return ret - return self.value != 0 - - def fromstring(self, text, loc_db, parser_result=None): - start, stop = super(armt_aif, self).fromstring(text, loc_db, parser_result) - if self.expr.name == "X": - return None, None - return start, stop - - -class armt_it_arg(arm_arg): - arg_E = ExprId('E', 1) - arg_NE = ExprId('NE', 1) - - def decode(self, v): - if v: - return self.arg_E - else: - return self.arg_NE - - def encode(self): - if self.expr == self.arg_E: - return 1 - elif self.expr == self.arg_NE: - return 0 - -class armt_itmask(bs_divert): - prio = 2 - - def divert(self, i, candidates): - out = [] - for cls, _, bases, dct, fields in candidates: - for value in range(1, 0x10): - nfields = fields[:] - s = int2bin(value, self.args['l']) - args = dict(self.args) - args.update({'strbits': s}) - f = bs(**args) - nfields[i] = f - inv = nfields[-2].value - ndct = dict(dct) - ndct['name'] = self.modname(ndct['name'], value, inv) - out.append((cls, ndct['name'], bases, ndct, nfields)) - return out - - def modname(self, name, value, inv): - count = 0 - while value & (1 << count) == 0: - count += 1 - out = [] - values = ['E', 'T'] - if inv== 1: - values.reverse() - for index in range(3 - count): - if value & (1 << (3 - index)): - out.append(values[0]) - else: - out.append(values[1]) - return name + "".join(out) - - - -class armt_cond_lsb(bs_divert): - prio = 2 - - def divert(self, i, candidates): - out = [] - for cls, _, bases, dct, fields in candidates: - for value in range(2): - nfields = fields[:] - s = int2bin(value, self.args['l']) - args = dict(self.args) - args.update({'strbits': s}) - f = bs(**args) - nfields[i] = f - ndct = dict(dct) - out.append((cls, ndct['name'], bases, ndct, nfields)) - return out - - -cond_expr = [ExprId(x, 32) for x in cond_list_full] -cond_info = reg_info(cond_list_full, cond_expr) - -class armt_cond_arg(arm_arg): - parser = cond_info.parser - - def decode(self, v): - v = (v << 1) | self.parent.condlsb.value - self.expr = ExprId(cond_list_full[v], 32) - return True - - def encode(self): - index = cond_list_full.index(self.expr.name) - self.value = index >> 1 - if index & 1 != self.parent.condlsb.value: - return False - return True - - -class armt_op2imm(arm_imm8_12): - parser = deref - - def str_to_imm_rot_form(self, s, neg=False): - if neg: - s = -s & 0xffffffff - if 0 <= s < (1 << 12): - return s - return None - - def decodeval(self, v): - return v - - def encodeval(self, v): - return v - - def decode(self, v): - val = v & self.lmask - val = self.decodeval(val) - if val is False: - return False - imm = val - if self.parent.updown.value == 0: - imm = -imm - if self.parent.ppi.value == 0 and self.parent.wback.value == 0: - return False - if self.parent.ppi.value: - e = ExprOp('preinc', self.parent.rn.expr, ExprInt(imm, 32)) - if self.parent.wback.value == 1: - e = ExprOp('wback', e) - else: - e = ExprOp('postinc', self.parent.rn.expr, ExprInt(imm, 32)) - self.expr = ExprMem(e, 32) - return True - - def encode(self): - self.parent.updown.value = 1 - self.parent.wback.value = 0 - - e = self.expr - assert(isinstance(e, ExprMem)) - e = e.ptr - if e.op == 'wback': - self.parent.wback.value = 1 - e = e.args[0] - if e.op == "postinc": - self.parent.ppi.value = 0 - self.parent.wback.value = 1 - elif e.op == "preinc": - self.parent.ppi.value = 1 - else: - # XXX default - self.parent.ppi.value = 1 - - self.parent.rn.expr = e.args[0] - - if len(e.args) == 1: - self.value = 0 - return True - # pure imm - if isinstance(e.args[1], ExprInt): - val = self.str_to_imm_rot_form(int(e.args[1])) - if val is None: - val = self.str_to_imm_rot_form(int(e.args[1]), True) - if val is None: - log.debug('cannot encode inm') - return False - self.parent.updown.value = 0 - val = self.encodeval(val) - if val is False: - return False - self.value = val - return True - # pure reg - if isinstance(e.args[1], ExprId): - rm = gpregs.expr.index(e.args[1]) - shift_kind = 0 - shift_type = 0 - amount = 0 - val = (((((amount << 2) | shift_type) << 1) | shift_kind) << 4) | rm - val = self.encodeval(val) - if val is False: - return False - self.value = val - return True - return False - - -class armt_op2imm00(armt_op2imm): - - def decodeval(self, v): - return v << 2 - - def encodeval(self, v): - if v & 3: - return False - return v >> 2 - - -class armt_deref_reg(arm_imm8_12): - parser = deref - - def decode(self, v): - base = self.parent.rn.expr - off = gpregs.expr[v] - if self.parent.imm.value != 0: - off = off << ExprInt(self.parent.imm.value, 32) - e = ExprMem(ExprOp('preinc', base, off), 8) - self.expr = e - return True - - def encode(self): - if not isinstance(self.expr, ExprMem): - return False - ptr = self.expr.ptr - if not ptr.is_op('preinc'): - return False - if len(ptr.args) != 2: - return False - base, off = ptr.args - if base.is_id() and off.is_id(): - self.parent.rn.expr = base - self.parent.imm.value = 0 - self.value = gpregs.expr.index(off) - elif off.is_int(): - return False - elif off.is_op('<<'): - if len(off.args) != 2: - return False - reg, off = off.args - self.parent.rn.expr = base - self.parent.imm.value = 0 - self.value = gpregs.expr.index(reg) - off = int(off) - if off > self.parent.imm.lmask: - return False - self.parent.imm.value = off - return True - - -class armt_deref_reg_reg(arm_arg): - parser = deref_reg_reg - reg_info = gpregs - - def decode(self, v): - expr = self.reg_info.expr[v] - expr = ExprMem(self.parent.rn.expr + expr, 8) - self.expr = expr - return True - - def encode(self): - expr = self.expr - if not expr.is_mem(): - return False - ptr = expr.ptr - if not ptr.is_op('+') or len(ptr.args) != 2: - return False - reg1, reg2 = ptr.args - self.parent.rn.expr = reg1 - self.value = self.reg_info.expr.index(reg2) - return True - - -class armt_deref_reg_reg_lsl_1(arm_reg): - parser = deref_reg_reg_lsl_1 - reg_info = gpregs - - def decode(self, v): - expr = self.reg_info.expr[v] - expr = ExprMem(self.parent.rn.expr + (expr << ExprInt(1, 32)), 16) - self.expr = expr - return True - - def encode(self): - expr = self.expr - if not expr.is_mem(): - return False - ptr = expr.ptr - if not ptr.is_op('+') or len(ptr.args) != 2: - return False - reg1, reg_shift = ptr.args - self.parent.rn.expr = reg1 - if not reg_shift.is_op('<<') or len(reg_shift.args) != 2: - return False - if reg_shift.args[1] != ExprInt(1, 32): - return False - self.value = self.reg_info.expr.index(reg_shift.args[0]) - return True - - -aif = bs(l=3, cls=(armt_aif,)) - - -imm5_off = bs(l=5, cls=(armt_imm5_1,), fname="imm5_off") - -tsign = bs(l=1, fname="sign") -tj1 = bs(l=1, fname="j1") -tj2 = bs(l=1, fname="j2") - -timm6h = bs(l=6, fname="imm6h") -timm10H = bs(l=10, fname="imm10h") -timm10L = bs(l=10, cls=(armt2_imm10l,), fname="imm10l") -timm11L = bs(l=11, cls=(armt2_imm11l,), fname="imm11l") - -timm6h11l = bs(l=11, cls=(armt2_imm6_11l,), fname="imm6h11l") - -itcond = bs(l=4, fname="itcond") -itmask = armt_itmask(l=4, fname="itmask") -bs_cond_arg_msb = bs(l=3, cls=(armt_cond_arg,)) - - -condlsb = armt_cond_lsb(l=1, fname="condlsb") - -deref_immpuw = bs(l=8, cls=(armt_op2imm,)) -deref_immpuw00 = bs(l=8, cls=(armt_op2imm00,)) - - -rm_deref_reg = bs(l=4, cls=(armt_deref_reg,)) - -bs_deref_reg_reg = bs(l=4, cls=(armt_deref_reg_reg,)) -bs_deref_reg_reg_lsl_1 = bs(l=4, cls=(armt_deref_reg_reg_lsl_1,)) - - -class armt_barrier_option(reg_noarg, arm_arg): - reg_info = barrier_info - parser = reg_info.parser - - def decode(self, v): - v = v & self.lmask - if v not in self.reg_info.dct_expr: - return False - self.expr = self.reg_info.dct_expr[v] - return True - - def encode(self): - if not self.expr in self.reg_info.dct_expr_inv: - log.debug("cannot encode reg %r", self.expr) - return False - self.value = self.reg_info.dct_expr_inv[self.expr] - if self.value > self.lmask: - log.debug("cannot encode field value %x %x", - self.value, self.lmask) - return False - return True - - def check_fbits(self, v): - return v & self.fmask == self.fbits - -barrier_option = bs(l=4, cls=(armt_barrier_option,)) - -armtop("adc", [bs('11110'), imm12_1, bs('0'), bs('1010'), scc, rn_nosppc, bs('0'), imm12_3, rd_nosppc, imm12_8]) -armtop("adc", [bs('11101'), bs('01'), bs('1010'), scc, rn_nosppc, bs('0'), imm5_3, rd_nosppc, imm5_2, imm_stype, rm_sh]) -armtop("bl", [bs('11110'), tsign, timm10H, bs('11'), tj1, bs('1'), tj2, timm11L]) -armtop("blx", [bs('11110'), tsign, timm10H, bs('11'), tj1, bs('0'), tj2, timm10L, bs('0')]) -armtop("cbz", [bs('101100'), imm1, bs('1'), imm5_off, rnl], [rnl, imm5_off]) -armtop("cbnz", [bs('101110'), imm1, bs('1'), imm5_off, rnl], [rnl, imm5_off]) - -armtop("bkpt", [bs('1011'), bs('1110'), imm8]) - - -armtop("it", [bs('10111111'), bs_cond_arg_msb, condlsb, itmask]) - - -armtop("nop", [bs8(0xBF),bs8(0x0)]) -armtop("wfi", [bs8(0xBF),bs8(0x30)]) -armtop("cpsid", [bs8(0xB6),bs('0111'), bs('0'), aif], [aif]) -armtop("cpsie", [bs8(0xB6),bs('0110'), bs('0'), aif], [aif]) - -armtop("push", [bs('1110100'), bs('10'), bs('0'), bs('1'), bs('0'), bs('1101'), bs('0'), pclr, bs('0'), trlist13], [trlist13]) -armtop("pop", [bs('1110100'), bs('01'), bs('0'), bs('1'), bs('1'), bs('1101'), pc_in, lr_in, bs('0'), trlist13pclr], [trlist13pclr]) -armtop("mov", [bs('11110'), imm12_1, bs('00010'), scc, bs('1111'), bs('0'), imm12_3, rd_nosppc, imm12_8]) -armtop("asr", [bs('11111010'), bs('0100'), rm, bs('1111'), rd, bs('0000'), rs], [rd, rm, rs]) -armtop("lsl", [bs('11111010'), bs('0000'), rm, bs('1111'), rd, bs('0000'), rs], [rd, rm, rs]) -armtop("sel", [bs('11111010'), bs('1010'), rm, bs('1111'), rd, bs('1000'), rs], [rd, rm, rs]) -armtop("rev", [bs('11111010'), bs('1001'), rm, bs('1111'), rd, bs('1000'), rm_cp], [rd, rm]) -armtop("uadd8", [bs('111110101000'), rn, bs('1111'), rd, bs('0100'), rm], [rd, rn, rm]) -armtop("mvn", [bs('11101010011'), scc, bs('11110'), imm5_3, rd_nosppc, imm5_2, imm_stype, rm_sh] ) -armtop("and", [bs('11101010000'), scc, rn_nosppc, bs('0'), imm5_3, rd_nosppc, imm5_2, imm_stype, rm_sh], [rd_nosppc, rn_nosppc, rm_sh] ) -armtop("orr", [bs('11101010010'), scc, rn_nosppc, bs('0'), imm5_3, rd_nosppc, imm5_2, imm_stype, rm_sh], [rd_nosppc, rn_nosppc, rm_sh] ) -armtop("bic", [bs('11101010001'), scc, rn_nosppc, bs('0'), imm5_3, rd_nosppc, imm5_2, imm_stype, rm_sh], [rd_nosppc, rn_nosppc, rm_sh] ) -armtop("add", [bs('11101011000'), scc, rn_nosppc, bs('0'), imm5_3, rd_nosppc, imm5_2, imm_stype, rm_sh], [rd_nosppc, rn_nosppc, rm_sh] ) -armtop("sub", [bs('11101011101'), scc, rn_nosppc, bs('0'), imm5_3, rd_nosppc, imm5_2, imm_stype, rm_sh], [rd_nosppc, rn_nosppc, rm_sh] ) -armtop("eor", [bs('11101010100'), scc, rn_nosppc, bs('0'), imm5_3, rd_nosppc, imm5_2, imm_stype, rm_sh], [rd_nosppc, rn_nosppc, rm_sh] ) -armtop("rsb", [bs('11101011110'), scc, rn, bs('0'), imm5_3, rd, imm5_2, imm_stype, rm_sh], [rd, rn, rm_sh] ) -armtop("orn", [bs('11101010011'), scc, rn_nopc, bs('0'), imm5_3, rd, imm5_2, imm_stype, rm_sh], [rd, rn_nopc, rm_sh] ) -# lsl -armtop("mov", [bs('11101010010'), scc, bs('1111'), bs('0'), imm5_3, rd_nosppc, imm5_2, imm_stype_00, rm_sh], [rd_nosppc, rm_sh] ) -armtop("mov", [bs('11101010010'), scc, bs('1111'), bs('0'), imm5_3, rd_nosppc, imm5_2, imm_stype_01, rm_sh], [rd_nosppc, rm_sh] ) -armtop("mov", [bs('11101010010'), scc, bs('1111'), bs('0'), imm5_3, rd_nosppc, imm5_2, imm_stype_11, rm_sh], [rd_nosppc, rm_sh] ) - - -armtop("orr", [bs('11110'), imm12_1, bs('00010'), scc, rn_nosppc, bs('0'), imm12_3, rd, imm12_8] ) -armtop("add", [bs('11110'), imm12_1, bs('01000'), bs('0'), rn, bs('0'), imm12_3, rd_nopc, imm12_8], [rd_nopc, rn, imm12_8]) -armtop("adds",[bs('11110'), imm12_1, bs('01000'), bs('1'), rn, bs('0'), imm12_3, rd_nopc, imm12_8], [rd_nopc, rn, imm12_8]) -armtop("bic", [bs('11110'), imm12_1, bs('00001'), scc, rn_nosppc, bs('0'), imm12_3, rd, imm12_8], [rd, rn_nosppc, imm12_8]) -armtop("and", [bs('11110'), imm12_1, bs('00000'), scc, rn, bs('0'), imm12_3, rd_nopc, imm12_8], [rd_nopc, rn, imm12_8]) -armtop("sub", [bs('11110'), imm12_1, bs('01101'), scc, rn, bs('0'), imm12_3, rd_nopc, imm12_8], [rd_nopc, rn, imm12_8]) -armtop("eor", [bs('11110'), imm12_1, bs('00100'), scc, rn, bs('0'), imm12_3, rd_nopc, imm12_8], [rd_nopc, rn, imm12_8]) -armtop("add", [bs('11110'), imm12_1, bs('10000'), scc, rn_nosppc, bs('0'), imm12_3, rd, imm12_8_t4], [rd, rn_nosppc, imm12_8_t4]) -armtop("cmp", [bs('11110'), imm12_1, bs('01101'), bs('1'), rn, bs('0'), imm12_3, bs('1111'), imm12_8] ) - -armtop("cmp", [bs('11101011101'), bs('1'), rn, bs('0'), imm5_3, bs('1111'), imm5_2, imm_stype, rm_sh], [rn, rm_sh] ) - -armtop("cmn", [bs('11110'), imm12_1, bs('01000'), bs('1'), rn, bs('0'), imm12_3, bs('1111'), imm12_8], [rn, imm12_8]) - - -armtop("mvn", [bs('11110'), imm12_1, bs('00011'), scc, bs('1111'), bs('0'), imm12_3, rd, imm12_8]) -armtop("rsb", [bs('11110'), imm12_1, bs('01110'), scc, rn_nosppc, bs('0'), imm12_3, rd, imm12_8], [rd, rn_nosppc, imm12_8]) -armtop("sub", [bs('11110'), imm12_1, bs('101010'), rn_nosppc, bs('0'), imm12_3, rd, imm12_8_t4], [rd, rn_nosppc, imm12_8_t4]) -armtop("tst", [bs('11110'), imm12_1, bs('000001'), rn, bs('0'), imm12_3, bs('1111'), imm12_8], [rn, imm12_8]) - -armtop("mov", [bs('11110'), imm16_1, bs('100100'), imm16_4, bs('0'), imm16_3, rd, imm16_8] ) -armtop("movt", [bs('11110'), imm16_1, bs('101100'), imm16_4, bs('0'), imm16_3, rd, imm16_8] ) - -armtop("sdiv", [bs('111110111001'), rn, bs('1111'), rd, bs('1111'), rm], [rd, rn, rm] ) -armtop("udiv", [bs('111110111011'), rn, bs('1111'), rd, bs('1111'), rm], [rd, rn, rm] ) -armtop("mls", [bs('111110110000'), rn, ra, rd, bs('0001'), rm], [rd, rn, rm, ra] ) -armtop("mla", [bs('111110110000'), rn, ra_nopc, rd, bs('0000'), rm], [rd, rn, rm, ra_nopc] ) -armtop("mul", [bs('111110110000'), rn, bs('1111'), rd, bs('0000'), rm], [rd, rn, rm] ) - -armtop("smlabb", [bs('111110110001'), rn, ra_nopc, rd, bs('00'), bs('00'), rm], [rd, rn, rm, ra_nopc]) -armtop("smlabt", [bs('111110110001'), rn, ra_nopc, rd, bs('00'), bs('01'), rm], [rd, rn, rm, ra_nopc]) -armtop("smlatb", [bs('111110110001'), rn, ra_nopc, rd, bs('00'), bs('10'), rm], [rd, rn, rm, ra_nopc]) -armtop("smlatt", [bs('111110110001'), rn, ra_nopc, rd, bs('00'), bs('11'), rm], [rd, rn, rm, ra_nopc]) - -armtop("b", [bs('11110'), tsign, bm_cond_barmt, timm6h, bs('10'), tj1, bs('0'), tj2, timm6h11l], [timm6h11l]) -armtop("b", [bs('11110'), tsign, timm10H, bs('10'), tj1, bs('1'), tj2, timm11L], [timm11L]) - -armtop("ubfx", [bs('111100111100'), rn, bs('0'), lsb5_3, rd, lsb5_2, bs('0'), widthm1], [rd, rn, lsb5_2, widthm1]) -armtop("uxth", [bs('111110100001'), bs('1111'), bs('1111'), rd, bs('10'), rot2, rm_rot2], [rd, rm_rot2]) - - - -armtop("str", [bs('111110001100'), rn_deref, rt, off12], [rt, rn_deref]) -armtop("str", [bs('111110000100'), rn_noarg, rt, bs('000000'), imm2_noarg, rm_deref_reg], [rt, rm_deref_reg]) -armtop("str", [bs('111110000100'), rn_noarg, rt, bs('1'), ppi, updown, wback_no_t, deref_immpuw], [rt, deref_immpuw]) -armtop("strb", [bs('111110001000'), rn_deref, rt, off12], [rt, rn_deref]) -armtop("strb", [bs('111110000000'), rn_noarg, rt, bs('1'), ppi, updown, wback_no_t, deref_immpuw], [rt, deref_immpuw]) -armtop("strh", [bs('111110001010'), rn_deref, rt, off12], [rt, rn_deref]) -armtop("strh", [bs('111110000010'), rn_noarg, rt, bs('1'), ppi, updown, wback_no_t, deref_immpuw], [rt, deref_immpuw]) - -armtop("strd", [bs('1110100'), ppi, updown, bs('1'), wback_no_t, bs('0'), rn_nopc_noarg, rt, rt2, deref_immpuw00], [rt, rt2, deref_immpuw00]) -armtop("ldrd", [bs('1110100'), ppi, updown, bs('1'), wback_no_t, bs('1'), rn_nopc_noarg, rt, rt2, deref_immpuw00], [rt, rt2, deref_immpuw00]) - - -armtop("ldr", [bs('111110001101'), rn_deref, rt, off12], [rt, rn_deref]) -armtop("ldr", [bs('111110000101'), rn_noarg, rt, bs('1'), ppi, updown, wback_no_t, deref_immpuw], [rt, deref_immpuw]) -armtop("ldr", [bs('111110000101'), rn_noarg, rt, bs('000000'), imm2_noarg, rm_deref_reg], [rt, rm_deref_reg]) -armtop("ldrb", [bs('111110000001'), rn_noarg, rt, bs('000000'), imm2_noarg, rm_deref_reg], [rt, rm_deref_reg]) -armtop("ldrb", [bs('111110000001'), rn_noarg, rt, bs('1'), ppi, updown, wback_no_t, deref_immpuw], [rt, deref_immpuw]) -armtop("ldrb", [bs('111110001001'), rn_deref, rt_nopc, off12], [rt_nopc, rn_deref]) -armtop("ldrsb",[bs('111110011001'), rn_deref, rt, off12], [rt, rn_deref]) -armtop("ldrsh",[bs('111110011011'), rn_deref, rt, off12], [rt, rn_deref]) -armtop("ldrh", [bs('111110001011'), rn_deref, rt, off12], [rt, rn_deref]) -armtop("ldrh", [bs('111110000011'), rn_noarg, rt, bs('1'), ppi, updown, wback_no_t, deref_immpuw], [rt, deref_immpuw]) - -armtop("pld", [bs('111110001001'), rn_deref, bs('1111'), off12], [rn_deref]) -armtop("pldw", [bs('111110001011'), rn_deref, bs('1111'), off12], [rn_deref]) - -armtop("clz", [bs('111110101011'), rm, bs('1111'), rd, bs('1000'), rm_cp], [rd, rm]) -armtop("tbb", [bs('111010001101'), rn_noarg, bs('11110000000'), bs('0'), bs_deref_reg_reg], [bs_deref_reg_reg]) -armtop("tbh", [bs('111010001101'), rn_noarg, bs('11110000000'), bs('1'), bs_deref_reg_reg_lsl_1], [bs_deref_reg_reg_lsl_1]) -armtop("dsb", [bs('111100111011'), bs('1111'), bs('1000'), bs('1111'), bs('0100'), barrier_option]) - -armtop("adr", [bs('11110'), imm12_1, bs('100000'), bs('1111'), bs('0'), imm12_3, rd, imm12_8_t4], [rd, imm12_8_t4]) diff --git a/miasm2/arch/arm/disasm.py b/miasm2/arch/arm/disasm.py deleted file mode 100644 index 2a443cc2..00000000 --- a/miasm2/arch/arm/disasm.py +++ /dev/null @@ -1,61 +0,0 @@ -from future.utils import viewvalues - -from miasm2.core.asmblock import AsmConstraint, disasmEngine -from miasm2.arch.arm.arch import mn_arm, mn_armt - - -def cb_arm_fix_call(mn, cur_bloc, loc_db, offsets_to_dis, *args, **kwargs): - """ - for arm: - MOV LR, PC - LDR PC, [R5, 0x14] - * is a subcall * - - """ - if len(cur_bloc.lines) < 2: - return - l1 = cur_bloc.lines[-1] - l2 = cur_bloc.lines[-2] - if l1.name != "LDR": - return - if l2.name != "MOV": - return - - values = viewvalues(mn.pc) - if not l1.args[0] in values: - return - if not l2.args[1] in values: - return - loc_key_cst = loc_db.get_or_create_offset_location(l1.offset + 4) - cur_bloc.add_cst(loc_key_cst, AsmConstraint.c_next) - offsets_to_dis.add(l1.offset + 4) - -cb_arm_funcs = [cb_arm_fix_call] - - -def cb_arm_disasm(*args, **kwargs): - for func in cb_arm_funcs: - func(*args, **kwargs) - - -class dis_armb(disasmEngine): - attrib = 'b' - def __init__(self, bs=None, **kwargs): - super(dis_armb, self).__init__(mn_arm, self.attrib, bs, **kwargs) - self.dis_block_callback = cb_arm_disasm - -class dis_arml(disasmEngine): - attrib = 'l' - def __init__(self, bs=None, **kwargs): - super(dis_arml, self).__init__(mn_arm, self.attrib, bs, **kwargs) - self.dis_block_callback = cb_arm_disasm - -class dis_armtb(disasmEngine): - attrib = 'b' - def __init__(self, bs=None, **kwargs): - super(dis_armtb, self).__init__(mn_armt, self.attrib, bs, **kwargs) - -class dis_armtl(disasmEngine): - attrib = 'l' - def __init__(self, bs=None, **kwargs): - super(dis_armtl, self).__init__(mn_armt, self.attrib, bs, **kwargs) diff --git a/miasm2/arch/arm/ira.py b/miasm2/arch/arm/ira.py deleted file mode 100644 index 2214c626..00000000 --- a/miasm2/arch/arm/ira.py +++ /dev/null @@ -1,106 +0,0 @@ -#-*- coding:utf-8 -*- - -from miasm2.ir.analysis import ira -from miasm2.ir.ir import IRBlock -from miasm2.arch.arm.sem import ir_arml, ir_armtl, ir_armb, ir_armtb, tab_cond -from miasm2.expression.expression import ExprAssign, ExprOp, ExprLoc, ExprCond -from miasm2.ir.ir import AssignBlock - -class ir_a_arml_base(ir_arml, ira): - def __init__(self, loc_db=None): - ir_arml.__init__(self, loc_db) - self.ret_reg = self.arch.regs.R0 - -class ir_a_armb_base(ir_armb, ira): - def __init__(self, loc_db=None): - ir_armb.__init__(self, loc_db) - self.ret_reg = self.arch.regs.R0 - - -class ir_a_arml(ir_a_arml_base): - - def __init__(self, loc_db=None): - ir_a_arml_base.__init__(self, loc_db) - self.ret_reg = self.arch.regs.R0 - - def call_effects(self, ad, instr): - call_assignblk = AssignBlock( - [ - ExprAssign( - self.ret_reg, - ExprOp( - 'call_func_ret', - ad, - self.arch.regs.R0, - self.arch.regs.R1, - self.arch.regs.R2, - self.arch.regs.R3, - ) - ), - ExprAssign( - self.sp, - ExprOp('call_func_stack', ad, self.sp) - ), - ], - instr - ) - - - cond = instr.additional_info.cond - if cond == 14: # COND_ALWAYS: - return [call_assignblk], [] - - # Call is a conditional instruction - cond = tab_cond[cond] - - loc_next = self.get_next_loc_key(instr) - loc_next_expr = ExprLoc(loc_next, 32) - loc_do = self.loc_db.add_location() - loc_do_expr = ExprLoc(loc_do, 32) - dst_cond = ExprCond(cond, loc_do_expr, loc_next_expr) - - call_assignblks = [ - call_assignblk, - AssignBlock([ExprAssign(self.IRDst, loc_next_expr)], instr), - ] - e_do = IRBlock(loc_do, call_assignblks) - assignblks_out = [ - AssignBlock([ExprAssign(self.IRDst, dst_cond)], instr) - ] - return assignblks_out, [e_do] - - - def get_out_regs(self, _): - return set([self.ret_reg, self.sp]) - - def sizeof_char(self): - return 8 - - def sizeof_short(self): - return 16 - - def sizeof_int(self): - return 32 - - def sizeof_long(self): - return 32 - - def sizeof_pointer(self): - return 32 - -class ir_a_armb(ir_a_armb_base, ir_a_arml): - - def __init__(self, loc_db=None): - ir_a_armb_base.__init__(self, loc_db) - self.ret_reg = self.arch.regs.R0 - - -class ir_a_armtl(ir_armtl, ir_a_arml): - def __init__(self, loc_db=None): - ir_armtl.__init__(self, loc_db) - self.ret_reg = self.arch.regs.R0 - -class ir_a_armtb(ir_a_armtl, ir_armtb, ir_a_armb): - def __init__(self, loc_db=None): - ir_armtb.__init__(self, loc_db) - self.ret_reg = self.arch.regs.R0 diff --git a/miasm2/arch/arm/jit.py b/miasm2/arch/arm/jit.py deleted file mode 100644 index 128baffb..00000000 --- a/miasm2/arch/arm/jit.py +++ /dev/null @@ -1,148 +0,0 @@ -from builtins import range -import logging - -from miasm2.jitter.jitload import Jitter, named_arguments -from miasm2.core.locationdb import LocationDB -from miasm2.core.utils import pck32, upck32 -from miasm2.arch.arm.sem import ir_armb, ir_arml, ir_armtl, ir_armtb, cond_dct_inv, tab_cond -from miasm2.jitter.codegen import CGen -from miasm2.expression.expression import ExprId, ExprAssign, ExprCond -from miasm2.ir.ir import IRBlock, AssignBlock -from miasm2.ir.translators.C import TranslatorC -from miasm2.expression.simplifications import expr_simp_high_to_explicit - -log = logging.getLogger('jit_arm') -hnd = logging.StreamHandler() -hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) -log.addHandler(hnd) -log.setLevel(logging.CRITICAL) - - - -class arm_CGen(CGen): - - def block2assignblks(self, block): - """ - Return the list of irblocks for a native @block - @block: AsmBlock - """ - irblocks_list = [] - index = -1 - while index + 1 < len(block.lines): - index += 1 - instr = block.lines[index] - - if instr.name.startswith("IT"): - assignments = [] - label = self.ir_arch.get_instr_label(instr) - irblocks = [] - index, irblocks = self.ir_arch.do_it_block(label, index, block, assignments, True) - irblocks_list += irblocks - continue - - - assignblk_head, assignblks_extra = self.ir_arch.instr2ir(instr) - # Keep result in ordered list as first element is the assignblk head - # The remainings order is not really important - irblock_head = self.assignblk_to_irbloc(instr, assignblk_head) - irblocks = [irblock_head] + assignblks_extra - - - # Simplify high level operators - out = [] - for irblock in irblocks: - new_irblock = irblock.simplify(expr_simp_high_to_explicit)[1] - out.append(new_irblock) - irblocks = out - - - for irblock in irblocks: - assert irblock.dst is not None - irblocks_list.append(irblocks) - return irblocks_list - - -class jitter_arml(Jitter): - C_Gen = arm_CGen - - def __init__(self, *args, **kwargs): - sp = LocationDB() - Jitter.__init__(self, ir_arml(sp), *args, **kwargs) - self.vm.set_little_endian() - - def push_uint32_t(self, value): - self.cpu.SP -= 4 - self.vm.set_mem(self.cpu.SP, pck32(value)) - - def pop_uint32_t(self): - value = self.vm.get_u32(self.cpu.SP) - self.cpu.SP += 4 - return value - - def get_stack_arg(self, index): - return self.vm.get_u32(self.cpu.SP + 4 * index) - - # calling conventions - - @named_arguments - def func_args_stdcall(self, n_args): - args = [self.get_arg_n_stdcall(i) for i in range(n_args)] - ret_ad = self.cpu.LR - return ret_ad, args - - def func_ret_stdcall(self, ret_addr, ret_value1=None, ret_value2=None): - self.pc = self.cpu.PC = ret_addr - if ret_value1 is not None: - self.cpu.R0 = ret_value1 - if ret_value2 is not None: - self.cpu.R1 = ret_value2 - return True - - def func_prepare_stdcall(self, ret_addr, *args): - for index in range(min(len(args), 4)): - setattr(self.cpu, 'R%d' % index, args[index]) - for index in reversed(range(4, len(args))): - self.push_uint32_t(args[index]) - self.cpu.LR = ret_addr - - def get_arg_n_stdcall(self, index): - if index < 4: - arg = getattr(self.cpu, 'R%d' % index) - else: - arg = self.get_stack_arg(index-4) - return arg - - func_args_systemv = func_args_stdcall - func_ret_systemv = func_ret_stdcall - func_prepare_systemv = func_prepare_stdcall - get_arg_n_systemv = get_arg_n_stdcall - - def syscall_args_systemv(self, n_args): - args = [self.cpu.R0, self.cpu.R1, self.cpu.R2, self.cpu.R3, - self.cpu.R4, self.cpu.R5][:n_args] - return args - - def syscall_ret_systemv(self, value): - self.cpu.R0 = value - - def init_run(self, *args, **kwargs): - Jitter.init_run(self, *args, **kwargs) - self.cpu.PC = self.pc - - -class jitter_armb(jitter_arml): - C_Gen = arm_CGen - - def __init__(self, *args, **kwargs): - sp = LocationDB() - Jitter.__init__(self, ir_armb(sp), *args, **kwargs) - self.vm.set_big_endian() - - -class jitter_armtl(jitter_arml): - C_Gen = arm_CGen - - def __init__(self, *args, **kwargs): - sp = LocationDB() - Jitter.__init__(self, ir_armtl(sp), *args, **kwargs) - self.vm.set_little_endian() diff --git a/miasm2/arch/arm/regs.py b/miasm2/arch/arm/regs.py deleted file mode 100644 index f39f2161..00000000 --- a/miasm2/arch/arm/regs.py +++ /dev/null @@ -1,114 +0,0 @@ -#-*- coding:utf-8 -*- - -from builtins import range -from miasm2.expression.expression import * - - -# GP - -regs32_str = ["R%d" % i for i in range(13)] + ["SP", "LR", "PC"] -regs32_expr = [ExprId(x, 32) for x in regs32_str] - -exception_flags = ExprId('exception_flags', 32) -interrupt_num = ExprId('interrupt_num', 32) -bp_num = ExprId('bp_num', 32) - - -R0 = regs32_expr[0] -R1 = regs32_expr[1] -R2 = regs32_expr[2] -R3 = regs32_expr[3] -R4 = regs32_expr[4] -R5 = regs32_expr[5] -R6 = regs32_expr[6] -R7 = regs32_expr[7] -R8 = regs32_expr[8] -R9 = regs32_expr[9] -R10 = regs32_expr[10] -R11 = regs32_expr[11] -R12 = regs32_expr[12] -SP = regs32_expr[13] -LR = regs32_expr[14] -PC = regs32_expr[15] - -R0_init = ExprId("R0_init", 32) -R1_init = ExprId("R1_init", 32) -R2_init = ExprId("R2_init", 32) -R3_init = ExprId("R3_init", 32) -R4_init = ExprId("R4_init", 32) -R5_init = ExprId("R5_init", 32) -R6_init = ExprId("R6_init", 32) -R7_init = ExprId("R7_init", 32) -R8_init = ExprId("R8_init", 32) -R9_init = ExprId("R9_init", 32) -R10_init = ExprId("R10_init", 32) -R11_init = ExprId("R11_init", 32) -R12_init = ExprId("R12_init", 32) -SP_init = ExprId("SP_init", 32) -LR_init = ExprId("LR_init", 32) -PC_init = ExprId("PC_init", 32) - - -reg_zf = 'zf' -reg_nf = 'nf' -reg_of = 'of' -reg_cf = 'cf' - -zf = ExprId(reg_zf, size=1) -nf = ExprId(reg_nf, size=1) -of = ExprId(reg_of, size=1) -cf = ExprId(reg_cf, size=1) - -zf_init = ExprId("zf_init", size=1) -nf_init = ExprId("nf_init", size=1) -of_init = ExprId("of_init", size=1) -cf_init = ExprId("cf_init", size=1) - - -reg_ge0 = 'ge0' -reg_ge1 = 'ge1' -reg_ge2 = 'ge2' -reg_ge3 = 'ge3' - -ge0 = ExprId(reg_ge0, size=1) -ge1 = ExprId(reg_ge1, size=1) -ge2 = ExprId(reg_ge2, size=1) -ge3 = ExprId(reg_ge3, size=1) - -ge0_init = ExprId("ge0_init", size=1) -ge1_init = ExprId("ge1_init", size=1) -ge2_init = ExprId("ge2_init", size=1) -ge3_init = ExprId("ge3_init", size=1) - -ge_regs = [ge0, ge1, ge2, ge3] - -all_regs_ids = [ - R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, SP, LR, PC, - zf, nf, of, cf, - ge0, ge1, ge2, ge3, - exception_flags, interrupt_num, bp_num -] - -all_regs_ids_no_alias = all_regs_ids - -attrib_to_regs = { - 'l': all_regs_ids_no_alias, - 'b': all_regs_ids_no_alias, -} - -all_regs_ids_byname = dict([(x.name, x) for x in all_regs_ids]) - -all_regs_ids_init = [R0_init, R1_init, R2_init, R3_init, - R4_init, R5_init, R6_init, R7_init, - R8_init, R9_init, R10_init, R11_init, - R12_init, SP_init, LR_init, PC_init, - zf_init, nf_init, of_init, cf_init, - ge0_init, ge1_init, ge2_init, ge3_init, - ExprInt(0, 32), ExprInt(0, 32), ExprInt(0, 32) - ] - -regs_init = {} -for i, r in enumerate(all_regs_ids): - regs_init[r] = all_regs_ids_init[i] - -regs_flt_expr = [] diff --git a/miasm2/arch/arm/sem.py b/miasm2/arch/arm/sem.py deleted file mode 100644 index 6622a42a..00000000 --- a/miasm2/arch/arm/sem.py +++ /dev/null @@ -1,1902 +0,0 @@ -from builtins import range -from future.utils import viewitems, viewvalues - -from miasm2.expression.expression import * -from miasm2.ir.ir import IntermediateRepresentation, IRBlock, AssignBlock -from miasm2.arch.arm.arch import mn_arm, mn_armt -from miasm2.arch.arm.regs import * - -from miasm2.jitter.csts import EXCEPT_DIV_BY_ZERO, EXCEPT_INT_XX - -# liris.cnrs.fr/~mmrissa/lib/exe/fetch.php?media=armv7-a-r-manual.pdf -EXCEPT_SOFT_BP = (1 << 1) - -EXCEPT_PRIV_INSN = (1 << 17) - -# CPSR: N Z C V - - -def update_flag_zf(a): - return [ExprAssign(zf, ExprOp("FLAG_EQ", a))] - - -def update_flag_zf_eq(a, b): - return [ExprAssign(zf, ExprOp("FLAG_EQ_CMP", a, b))] - - -def update_flag_nf(arg): - return [ - ExprAssign( - nf, - ExprOp("FLAG_SIGN_SUB", arg, ExprInt(0, arg.size)) - ) - ] - - -def update_flag_zn(a): - e = [] - e += update_flag_zf(a) - e += update_flag_nf(a) - return e - - - -# XXX TODO: set cf if ROT imm in argument - - -def check_ops_msb(a, b, c): - if not a or not b or not c or a != b or a != c: - raise ValueError('bad ops size %s %s %s' % (a, b, c)) - -def update_flag_add_cf(op1, op2): - "Compute cf in @op1 + @op2" - return [ExprAssign(cf, ExprOp("FLAG_ADD_CF", op1, op2))] - - -def update_flag_add_of(op1, op2): - "Compute of in @op1 + @op2" - return [ExprAssign(of, ExprOp("FLAG_ADD_OF", op1, op2))] - - -def update_flag_sub_cf(op1, op2): - "Compote CF in @op1 - @op2" - return [ExprAssign(cf, ExprOp("FLAG_SUB_CF", op1, op2) ^ ExprInt(1, 1))] - - -def update_flag_sub_of(op1, op2): - "Compote OF in @op1 - @op2" - return [ExprAssign(of, ExprOp("FLAG_SUB_OF", op1, op2))] - - -def update_flag_arith_add_co(arg1, arg2): - e = [] - e += update_flag_add_cf(arg1, arg2) - e += update_flag_add_of(arg1, arg2) - return e - - -def update_flag_arith_add_zn(arg1, arg2): - """ - Compute zf and nf flags for (arg1 + arg2) - """ - e = [] - e += update_flag_zf_eq(arg1, -arg2) - e += [ExprAssign(nf, ExprOp("FLAG_SIGN_SUB", arg1, -arg2))] - return e - - -def update_flag_arith_sub_co(arg1, arg2): - """ - Compute cf and of flags for (arg1 - arg2) - """ - e = [] - e += update_flag_sub_cf(arg1, arg2) - e += update_flag_sub_of(arg1, arg2) - return e - - -def update_flag_arith_sub_zn(arg1, arg2): - """ - Compute zf and nf flags for (arg1 - arg2) - """ - e = [] - e += update_flag_zf_eq(arg1, arg2) - e += [ExprAssign(nf, ExprOp("FLAG_SIGN_SUB", arg1, arg2))] - return e - - - - -def update_flag_zfaddwc_eq(arg1, arg2, arg3): - return [ExprAssign(zf, ExprOp("FLAG_EQ_ADDWC", arg1, arg2, arg3))] - -def update_flag_zfsubwc_eq(arg1, arg2, arg3): - return [ExprAssign(zf, ExprOp("FLAG_EQ_SUBWC", arg1, arg2, arg3))] - - -def update_flag_arith_addwc_zn(arg1, arg2, arg3): - """ - Compute znp flags for (arg1 + arg2 + cf) - """ - e = [] - e += update_flag_zfaddwc_eq(arg1, arg2, arg3) - e += [ExprAssign(nf, ExprOp("FLAG_SIGN_ADDWC", arg1, arg2, arg3))] - return e - - -def update_flag_arith_subwc_zn(arg1, arg2, arg3): - """ - Compute znp flags for (arg1 - (arg2 + cf)) - """ - e = [] - e += update_flag_zfsubwc_eq(arg1, arg2, arg3) - e += [ExprAssign(nf, ExprOp("FLAG_SIGN_SUBWC", arg1, arg2, arg3))] - return e - - -def update_flag_addwc_cf(op1, op2, op3): - "Compute cf in @res = @op1 + @op2 + @op3" - return [ExprAssign(cf, ExprOp("FLAG_ADDWC_CF", op1, op2, op3))] - - -def update_flag_addwc_of(op1, op2, op3): - "Compute of in @res = @op1 + @op2 + @op3" - return [ExprAssign(of, ExprOp("FLAG_ADDWC_OF", op1, op2, op3))] - - -def update_flag_arith_addwc_co(arg1, arg2, arg3): - e = [] - e += update_flag_addwc_cf(arg1, arg2, arg3) - e += update_flag_addwc_of(arg1, arg2, arg3) - return e - - - -def update_flag_subwc_cf(op1, op2, op3): - "Compute cf in @res = @op1 + @op2 + @op3" - return [ExprAssign(cf, ExprOp("FLAG_SUBWC_CF", op1, op2, op3) ^ ExprInt(1, 1))] - - -def update_flag_subwc_of(op1, op2, op3): - "Compute of in @res = @op1 + @op2 + @op3" - return [ExprAssign(of, ExprOp("FLAG_SUBWC_OF", op1, op2, op3))] - - -def update_flag_arith_subwc_co(arg1, arg2, arg3): - e = [] - e += update_flag_subwc_cf(arg1, arg2, arg3) - e += update_flag_subwc_of(arg1, arg2, arg3) - return e - - - -def get_dst(a): - if a == PC: - return PC - return None - -# instruction definition ############## - - -def adc(ir, instr, a, b, c=None): - e = [] - if c is None: - b, c = a, b - arg1, arg2 = b, c - r = b + c + cf.zeroExtend(32) - if instr.name == 'ADCS' and a != PC: - e += update_flag_arith_addwc_zn(arg1, arg2, cf) - e += update_flag_arith_addwc_co(arg1, arg2, cf) - e.append(ExprAssign(a, r)) - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - - -def add(ir, instr, a, b, c=None): - e = [] - if c is None: - b, c = a, b - arg1, arg2 = b, c - r = b + c - if instr.name == 'ADDS' and a != PC: - e += update_flag_arith_add_zn(arg1, arg2) - e += update_flag_arith_add_co(arg1, arg2) - e.append(ExprAssign(a, r)) - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - - -def l_and(ir, instr, a, b, c=None): - e = [] - if c is None: - b, c = a, b - r = b & c - if instr.name == 'ANDS' and a != PC: - e += [ExprAssign(zf, ExprOp('FLAG_EQ_AND', b, c))] - e += update_flag_nf(r) - - e.append(ExprAssign(a, r)) - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - - -def sub(ir, instr, a, b, c=None): - e = [] - if c is None: - b, c = a, b - r = b - c - e.append(ExprAssign(a, r)) - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - - -def subs(ir, instr, a, b, c=None): - e = [] - if c is None: - b, c = a, b - arg1, arg2 = b, c - r = b - c - e += update_flag_arith_sub_zn(arg1, arg2) - e += update_flag_arith_sub_co(arg1, arg2) - e.append(ExprAssign(a, r)) - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - - -def eor(ir, instr, a, b, c=None): - e = [] - if c is None: - b, c = a, b - r = b ^ c - e.append(ExprAssign(a, r)) - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - - -def eors(ir, instr, a, b, c=None): - e = [] - if c is None: - b, c = a, b - arg1, arg2 = b, c - r = arg1 ^ arg2 - - e += [ExprAssign(zf, ExprOp('FLAG_EQ_CMP', arg1, arg2))] - e += update_flag_nf(r) - - e.append(ExprAssign(a, r)) - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - - -def rsb(ir, instr, a, b, c=None): - e = [] - if c is None: - b, c = a, b - arg1, arg2 = c, b - r = arg1 - arg2 - e.append(ExprAssign(a, r)) - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - - -def rsbs(ir, instr, a, b, c=None): - e = [] - if c is None: - b, c = a, b - arg1, arg2 = c, b - r = arg1 - arg2 - e += update_flag_arith_sub_zn(arg1, arg2) - e += update_flag_arith_sub_co(arg1, arg2) - e.append(ExprAssign(a, r)) - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - - -def sbc(ir, instr, a, b, c=None): - e = [] - if c is None: - b, c = a, b - arg1, arg2 = b, c - r = arg1 - (arg2 + (~cf).zeroExtend(32)) - e.append(ExprAssign(a, r)) - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - - -def sbcs(ir, instr, a, b, c=None): - e = [] - if c is None: - b, c = a, b - arg1, arg2 = b, c - r = arg1 - (arg2 + (~cf).zeroExtend(32)) - - e += update_flag_arith_subwc_zn(arg1, arg2, ~cf) - e += update_flag_arith_subwc_co(arg1, arg2, ~cf) - - e.append(ExprAssign(a, r)) - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - - -def rsc(ir, instr, a, b, c=None): - e = [] - if c is None: - b, c = a, b - arg1, arg2 = c, b - r = arg1 - (arg2 + (~cf).zeroExtend(32)) - e.append(ExprAssign(a, r)) - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - - -def rscs(ir, instr, a, b, c=None): - e = [] - if c is None: - b, c = a, b - arg1, arg2 = c, b - r = arg1 - (arg2 + (~cf).zeroExtend(32)) - - e += update_flag_arith_subwc_zn(arg1, arg2, ~cf) - e += update_flag_arith_subwc_co(arg1, arg2, ~cf) - - e.append(ExprAssign(a, r)) - - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - - -def tst(ir, instr, a, b): - e = [] - arg1, arg2 = a, b - r = arg1 & arg2 - - e += [ExprAssign(zf, ExprOp('FLAG_EQ_AND', arg1, arg2))] - e += update_flag_nf(r) - - return e, [] - - -def teq(ir, instr, a, b, c=None): - e = [] - if c is None: - b, c = a, b - arg1, arg2 = b, c - r = arg1 ^ arg2 - - e += [ExprAssign(zf, ExprOp('FLAG_EQ_CMP', arg1, arg2))] - e += update_flag_nf(r) - - return e, [] - - -def l_cmp(ir, instr, a, b, c=None): - e = [] - if c is None: - b, c = a, b - arg1, arg2 = b, c - e += update_flag_arith_sub_zn(arg1, arg2) - e += update_flag_arith_sub_co(arg1, arg2) - return e, [] - - -def cmn(ir, instr, a, b, c=None): - e = [] - if c is None: - b, c = a, b - arg1, arg2 = b, c - e += update_flag_arith_add_zn(arg1, arg2) - e += update_flag_arith_add_co(arg1, arg2) - return e, [] - - -def orr(ir, instr, a, b, c=None): - e = [] - if c is None: - b, c = a, b - r = b | c - e.append(ExprAssign(a, r)) - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - - -def orn(ir, instr, a, b, c=None): - e = [] - if c is None: - b, c = a, b - r = ~(b | c) - e.append(ExprAssign(a, r)) - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - - -def orrs(ir, instr, a, b, c=None): - e = [] - if c is None: - b, c = a, b - arg1, arg2 = b, c - r = arg1 | arg2 - - e += [ExprAssign(zf, ExprOp('FLAG_EQ', r))] - e += update_flag_nf(r) - - e.append(ExprAssign(a, r)) - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - - -def mov(ir, instr, a, b): - e = [ExprAssign(a, b)] - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, b)) - return e, [] - - -def movt(ir, instr, a, b): - r = a | b << ExprInt(16, 32) - e = [ExprAssign(a, r)] - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - - -def movs(ir, instr, a, b): - e = [] - e.append(ExprAssign(a, b)) - # XXX TODO check - e += [ExprAssign(zf, ExprOp('FLAG_EQ', b))] - e += update_flag_nf(b) - - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, b)) - return e, [] - - -def mvn(ir, instr, a, b): - r = b ^ ExprInt(-1, 32) - e = [ExprAssign(a, r)] - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - - -def mvns(ir, instr, a, b): - e = [] - r = b ^ ExprInt(-1, 32) - e.append(ExprAssign(a, r)) - # XXX TODO check - e += [ExprAssign(zf, ExprOp('FLAG_EQ', r))] - e += update_flag_nf(r) - - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - - - -def mrs(ir, instr, a, b): - e = [] - if b.is_id('CPSR_cxsf'): - out = [] - out.append(ExprInt(0x10, 28)) - out.append(of) - out.append(cf) - out.append(zf) - out.append(nf) - e.append(ExprAssign(a, ExprCompose(*out))) - else: - raise NotImplementedError("MRS not implemented") - return e, [] - -def msr(ir, instr, a, b): - e = [] - if a.is_id('CPSR_cf'): - e.append(ExprAssign(nf, b[31:32])) - e.append(ExprAssign(zf, b[30:31])) - e.append(ExprAssign(cf, b[29:30])) - e.append(ExprAssign(of, b[28:29])) - else: - raise NotImplementedError("MSR not implemented") - return e, [] - - -def neg(ir, instr, a, b): - e = [] - r = - b - e.append(ExprAssign(a, r)) - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - -def negs(ir, instr, a, b): - return subs(ir, instr, a, ExprInt(0, b.size), b) - -def bic(ir, instr, a, b, c=None): - e = [] - if c is None: - b, c = a, b - r = b & (c ^ ExprInt(-1, 32)) - e.append(ExprAssign(a, r)) - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - - -def bics(ir, instr, a, b, c=None): - e = [] - if c is None: - b, c = a, b - tmp1, tmp2 = b, ~c - r = tmp1 & tmp2 - - e += [ExprAssign(zf, ExprOp('FLAG_EQ_AND', tmp1, tmp2))] - e += update_flag_nf(r) - - e.append(ExprAssign(a, r)) - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - - -def sdiv(ir, instr, a, b, c=None): - e = [] - if c is None: - b, c = a, b - - loc_div = ExprLoc(ir.loc_db.add_location(), ir.IRDst.size) - loc_except = ExprId(ir.loc_db.add_location(), ir.IRDst.size) - loc_next = ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) - - e.append(ExprAssign(ir.IRDst, ExprCond(c, loc_div, loc_except))) - - do_except = [] - do_except.append(ExprAssign(exception_flags, ExprInt(EXCEPT_DIV_BY_ZERO, exception_flags.size))) - do_except.append(ExprAssign(ir.IRDst, loc_next)) - blk_except = IRBlock(loc_except.loc_key, [AssignBlock(do_except, instr)]) - - - - r = ExprOp("sdiv", b, c) - do_div = [] - do_div.append(ExprAssign(a, r)) - dst = get_dst(a) - if dst is not None: - do_div.append(ExprAssign(ir.IRDst, r)) - - do_div.append(ExprAssign(ir.IRDst, loc_next)) - blk_div = IRBlock(loc_div.loc_key, [AssignBlock(do_div, instr)]) - - return e, [blk_div, blk_except] - - -def udiv(ir, instr, a, b, c=None): - e = [] - if c is None: - b, c = a, b - - - - loc_div = ExprLoc(ir.loc_db.add_location(), ir.IRDst.size) - loc_except = ExprLoc(ir.loc_db.add_location(), ir.IRDst.size) - loc_next = ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) - - e.append(ExprAssign(ir.IRDst, ExprCond(c, loc_div, loc_except))) - - do_except = [] - do_except.append(ExprAssign(exception_flags, ExprInt(EXCEPT_DIV_BY_ZERO, exception_flags.size))) - do_except.append(ExprAssign(ir.IRDst, loc_next)) - blk_except = IRBlock(loc_except.loc_key, [AssignBlock(do_except, instr)]) - - - r = ExprOp("udiv", b, c) - do_div = [] - do_div.append(ExprAssign(a, r)) - dst = get_dst(a) - if dst is not None: - do_div.append(ExprAssign(ir.IRDst, r)) - - do_div.append(ExprAssign(ir.IRDst, loc_next)) - blk_div = IRBlock(loc_div.loc_key, [AssignBlock(do_div, instr)]) - - return e, [blk_div, blk_except] - - -def mla(ir, instr, a, b, c, d): - e = [] - r = (b * c) + d - e.append(ExprAssign(a, r)) - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - - -def mlas(ir, instr, a, b, c, d): - e = [] - r = (b * c) + d - e += update_flag_zn(r) - e.append(ExprAssign(a, r)) - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - - -def mls(ir, instr, a, b, c, d): - e = [] - r = d - (b * c) - e.append(ExprAssign(a, r)) - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - - -def mul(ir, instr, a, b, c=None): - e = [] - if c is None: - b, c = a, b - r = b * c - e.append(ExprAssign(a, r)) - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - - -def muls(ir, instr, a, b, c=None): - e = [] - if c is None: - b, c = a, b - r = b * c - e += update_flag_zn(r) - e.append(ExprAssign(a, r)) - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - -def umull(ir, instr, a, b, c, d): - e = [] - r = c.zeroExtend(64) * d.zeroExtend(64) - e.append(ExprAssign(a, r[0:32])) - e.append(ExprAssign(b, r[32:64])) - # r15/IRDst not allowed as output - return e, [] - -def umlal(ir, instr, a, b, c, d): - e = [] - r = c.zeroExtend(64) * d.zeroExtend(64) + ExprCompose(a, b) - e.append(ExprAssign(a, r[0:32])) - e.append(ExprAssign(b, r[32:64])) - # r15/IRDst not allowed as output - return e, [] - -def smull(ir, instr, a, b, c, d): - e = [] - r = c.signExtend(64) * d.signExtend(64) - e.append(ExprAssign(a, r[0:32])) - e.append(ExprAssign(b, r[32:64])) - # r15/IRDst not allowed as output - return e, [] - -def smlal(ir, instr, a, b, c, d): - e = [] - r = c.signExtend(64) * d.signExtend(64) + ExprCompose(a, b) - e.append(ExprAssign(a, r[0:32])) - e.append(ExprAssign(b, r[32:64])) - # r15/IRDst not allowed as output - return e, [] - -def b(ir, instr, a): - e = [] - e.append(ExprAssign(PC, a)) - e.append(ExprAssign(ir.IRDst, a)) - return e, [] - - -def bl(ir, instr, a): - e = [] - l = ExprInt(instr.offset + instr.l, 32) - e.append(ExprAssign(PC, a)) - e.append(ExprAssign(ir.IRDst, a)) - e.append(ExprAssign(LR, l)) - return e, [] - - -def bx(ir, instr, a): - e = [] - e.append(ExprAssign(PC, a)) - e.append(ExprAssign(ir.IRDst, a)) - return e, [] - - -def blx(ir, instr, a): - e = [] - l = ExprInt(instr.offset + instr.l, 32) - e.append(ExprAssign(PC, a)) - e.append(ExprAssign(ir.IRDst, a)) - e.append(ExprAssign(LR, l)) - return e, [] - - -def st_ld_r(ir, instr, a, a2, b, store=False, size=32, s_ext=False, z_ext=False): - e = [] - wb = False - b = b.copy() - postinc = False - b = b.ptr - if isinstance(b, ExprOp): - if b.op == "wback": - wb = True - b = b.args[0] - if b.op == "postinc": - postinc = True - if isinstance(b, ExprOp) and b.op in ["postinc", 'preinc']: - # XXX TODO CHECK - base, off = b.args[0], b.args[1] # ExprInt(size/8, 32) - else: - base, off = b, ExprInt(0, 32) - if postinc: - ad = base - else: - ad = base + off - - # PC base lookup uses PC 4 byte alignment - ad = ad.replace_expr({PC: PC & ExprInt(0xFFFFFFFC, 32)}) - - dmem = False - if size in [8, 16]: - if store: - a = a[:size] - m = ExprMem(ad, size=size) - elif s_ext: - m = ExprMem(ad, size=size).signExtend(a.size) - elif z_ext: - m = ExprMem(ad, size=size).zeroExtend(a.size) - else: - raise ValueError('unhandled case') - elif size == 32: - m = ExprMem(ad, size=size) - elif size == 64: - assert a2 is not None - m = ExprMem(ad, size=32) - dmem = True - size = 32 - else: - raise ValueError('the size DOES matter') - dst = None - - if store: - e.append(ExprAssign(m, a)) - if dmem: - e.append(ExprAssign(ExprMem(ad + ExprInt(4, 32), size=size), a2)) - else: - if a == PC: - dst = PC - e.append(ExprAssign(ir.IRDst, m)) - e.append(ExprAssign(a, m)) - if dmem: - e.append(ExprAssign(a2, ExprMem(ad + ExprInt(4, 32), size=size))) - - # XXX TODO check multiple write cause by wb - if wb or postinc: - e.append(ExprAssign(base, base + off)) - return e, [] - - -def ldr(ir, instr, a, b): - return st_ld_r(ir, instr, a, None, b, store=False) - - -def ldrd(ir, instr, a, b, c=None): - if c is None: - a2 = ir.arch.regs.all_regs_ids[ir.arch.regs.all_regs_ids.index(a) + 1] - else: - a2 = b - b = c - return st_ld_r(ir, instr, a, a2, b, store=False, size=64) - - -def l_str(ir, instr, a, b): - return st_ld_r(ir, instr, a, None, b, store=True) - - -def l_strd(ir, instr, a, b, c=None): - if c is None: - a2 = ir.arch.regs.all_regs_ids[ir.arch.regs.all_regs_ids.index(a) + 1] - else: - a2 = b - b = c - return st_ld_r(ir, instr, a, a2, b, store=True, size=64) - -def ldrb(ir, instr, a, b): - return st_ld_r(ir, instr, a, None, b, store=False, size=8, z_ext=True) - -def ldrsb(ir, instr, a, b): - return st_ld_r(ir, instr, a, None, b, store=False, size=8, s_ext=True, z_ext=False) - -def strb(ir, instr, a, b): - return st_ld_r(ir, instr, a, None, b, store=True, size=8) - -def ldrh(ir, instr, a, b): - return st_ld_r(ir, instr, a, None, b, store=False, size=16, z_ext=True) - - -def strh(ir, instr, a, b): - return st_ld_r(ir, instr, a, None, b, store=True, size=16, z_ext=True) - - -def ldrsh(ir, instr, a, b): - return st_ld_r(ir, instr, a, None, b, store=False, size=16, s_ext=True, z_ext=False) - - -def st_ld_m(ir, instr, a, b, store=False, postinc=False, updown=False): - e = [] - wb = False - dst = None - if isinstance(a, ExprOp) and a.op == 'wback': - wb = True - a = a.args[0] - if isinstance(b, ExprOp) and b.op == 'sbit': - b = b.args[0] - regs = b.args - base = a - if updown: - step = 4 - else: - step = -4 - regs = regs[::-1] - if postinc: - pass - else: - base += ExprInt(step, 32) - for i, r in enumerate(regs): - ad = base + ExprInt(i * step, 32) - if store: - e.append(ExprAssign(ExprMem(ad, 32), r)) - else: - e.append(ExprAssign(r, ExprMem(ad, 32))) - if r == PC: - e.append(ExprAssign(ir.IRDst, ExprMem(ad, 32))) - # XXX TODO check multiple write cause by wb - if wb: - if postinc: - e.append(ExprAssign(a, base + ExprInt(len(regs) * step, 32))) - else: - e.append(ExprAssign(a, base + ExprInt((len(regs) - 1) * step, 32))) - if store: - pass - else: - assert(isinstance(b, ExprOp) and b.op == "reglist") - - return e, [] - - -def ldmia(ir, instr, a, b): - return st_ld_m(ir, instr, a, b, store=False, postinc=True, updown=True) - - -def ldmib(ir, instr, a, b): - return st_ld_m(ir, instr, a, b, store=False, postinc=False, updown=True) - - -def ldmda(ir, instr, a, b): - return st_ld_m(ir, instr, a, b, store=False, postinc=True, updown=False) - - -def ldmdb(ir, instr, a, b): - return st_ld_m(ir, instr, a, b, store=False, postinc=False, updown=False) - - -def stmia(ir, instr, a, b): - return st_ld_m(ir, instr, a, b, store=True, postinc=True, updown=True) - - -def stmib(ir, instr, a, b): - return st_ld_m(ir, instr, a, b, store=True, postinc=False, updown=True) - - -def stmda(ir, instr, a, b): - return st_ld_m(ir, instr, a, b, store=True, postinc=True, updown=False) - - -def stmdb(ir, instr, a, b): - return st_ld_m(ir, instr, a, b, store=True, postinc=False, updown=False) - - -def svc(ir, instr, a): - e = [] - except_int = EXCEPT_INT_XX - e.append(ExprAssign(exception_flags, ExprInt(except_int, 32))) - e.append(ExprAssign(interrupt_num, a)) - return e, [] - - -def und(ir, instr, a, b): - # XXX TODO implement - e = [] - return e, [] - -# TODO XXX implement correct CF for shifters -def lsr(ir, instr, a, b, c=None): - e = [] - if c is None: - b, c = a, b - r = b >> c - e.append(ExprAssign(a, r)) - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - - -def lsrs(ir, instr, a, b, c=None): - e = [] - if c is None: - b, c = a, b - r = b >> c - e.append(ExprAssign(a, r)) - - e += [ExprAssign(zf, ExprOp('FLAG_EQ', r))] - e += update_flag_nf(r) - - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - -def asr(ir, instr, a, b, c=None): - e = [] - if c is None: - b, c = a, b - r = ExprOp("a>>", b, c) - e.append(ExprAssign(a, r)) - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - -def asrs(ir, instr, a, b, c=None): - e = [] - if c is None: - b, c = a, b - r = ExprOp("a>>", b, c) - e.append(ExprAssign(a, r)) - - e += [ExprAssign(zf, ExprOp('FLAG_EQ', r))] - e += update_flag_nf(r) - - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - -def lsl(ir, instr, a, b, c=None): - e = [] - if c is None: - b, c = a, b - r = b << c - e.append(ExprAssign(a, r)) - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - - -def lsls(ir, instr, a, b, c=None): - e = [] - if c is None: - b, c = a, b - r = b << c - e.append(ExprAssign(a, r)) - - e += [ExprAssign(zf, ExprOp('FLAG_EQ', r))] - e += update_flag_nf(r) - - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - - -def rors(ir, instr, a, b): - e = [] - r = ExprOp(">>>", a, b) - e.append(ExprAssign(a, r)) - - e += [ExprAssign(zf, ExprOp('FLAG_EQ', r))] - e += update_flag_nf(r) - - dst = get_dst(a) - if dst is not None: - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - - -def push(ir, instr, a): - e = [] - regs = list(a.args) - for i in range(len(regs)): - r = SP + ExprInt(-4 * len(regs) + 4 * i, 32) - e.append(ExprAssign(ExprMem(r, 32), regs[i])) - r = SP + ExprInt(-4 * len(regs), 32) - e.append(ExprAssign(SP, r)) - return e, [] - - -def pop(ir, instr, a): - e = [] - regs = list(a.args) - dst = None - for i in range(len(regs)): - r = SP + ExprInt(4 * i, 32) - e.append(ExprAssign(regs[i], ExprMem(r, 32))) - if regs[i] == ir.pc: - dst = ExprMem(r, 32) - r = SP + ExprInt(4 * len(regs), 32) - e.append(ExprAssign(SP, r)) - if dst is not None: - e.append(ExprAssign(ir.IRDst, dst)) - return e, [] - - -def cbz(ir, instr, a, b): - e = [] - loc_next = ir.get_next_loc_key(instr) - loc_next_expr = ExprLoc(loc_next, 32) - e.append(ExprAssign(ir.IRDst, ExprCond(a, loc_next_expr, b))) - return e, [] - - -def cbnz(ir, instr, a, b): - e = [] - loc_next = ir.get_next_loc_key(instr) - loc_next_expr = ExprLoc(loc_next, 32) - e.append(ExprAssign(ir.IRDst, ExprCond(a, b, loc_next_expr))) - return e, [] - - -def uxtb(ir, instr, a, b): - e = [] - r = b[:8].zeroExtend(32) - e.append(ExprAssign(a, r)) - dst = None - if PC in a.get_r(): - dst = PC - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - -def uxth(ir, instr, a, b): - e = [] - r = b[:16].zeroExtend(32) - e.append(ExprAssign(a, r)) - dst = None - if PC in a.get_r(): - dst = PC - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - -def sxtb(ir, instr, a, b): - e = [] - r = b[:8].signExtend(32) - e.append(ExprAssign(a, r)) - dst = None - if PC in a.get_r(): - dst = PC - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - -def sxth(ir, instr, a, b): - e = [] - r = b[:16].signExtend(32) - e.append(ExprAssign(a, r)) - dst = None - if PC in a.get_r(): - dst = PC - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - - -def ubfx(ir, instr, a, b, c, d): - e = [] - c = int(c) - d = int(d) - r = b[c:c+d].zeroExtend(32) - e.append(ExprAssign(a, r)) - dst = None - if PC in a.get_r(): - dst = PC - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - -def bfc(ir, instr, a, b, c): - e = [] - start = int(b) - stop = start + int(c) - out = [] - last = 0 - if start: - out.append(a[:start]) - last = start - if stop - start: - out.append(ExprInt(0, 32)[last:stop]) - last = stop - if last < 32: - out.append(a[last:]) - r = ExprCompose(*out) - e.append(ExprAssign(a, r)) - dst = None - if PC in a.get_r(): - dst = PC - e.append(ExprAssign(ir.IRDst, r)) - return e, [] - - -def pld(ir, instr, a): - e = [] - return e, [] - - -def pldw(ir, instr, a): - e = [] - return e, [] - - -def clz(ir, instr, a, b): - e = [] - e.append(ExprAssign(a, ExprOp('cntleadzeros', b))) - return e, [] - -def uxtab(ir, instr, a, b, c): - e = [] - e.append(ExprAssign(a, b + (c & ExprInt(0xff, 32)))) - return e, [] - - -def uxtah(ir, instr, a, b, c): - e = [] - e.append(ExprAssign(a, b + (c & ExprInt(0xffff, 32)))) - return e, [] - - -def bkpt(ir, instr, a): - e = [] - e.append(ExprAssign(exception_flags, ExprInt(EXCEPT_SOFT_BP, 32))) - e.append(ExprAssign(bp_num, a)) - return e, [] - - -def _extract_s16(arg, part): - if part == 'B': # bottom 16 bits - return arg[0:16] - elif part == 'T': # top 16 bits - return arg[16:32] - - -def smul(ir, instr, a, b, c): - e = [] - e.append(ExprAssign(a, _extract_s16(b, instr.name[4]).signExtend(32) * _extract_s16(c, instr.name[5]).signExtend(32))) - return e, [] - - -def smulw(ir, instr, a, b, c): - e = [] - prod = b.signExtend(48) * _extract_s16(c, instr.name[5]).signExtend(48) - e.append(ExprAssign(a, prod[16:48])) - return e, [] # signed most significant 32 bits of the 48-bit result - - -def tbb(ir, instr, a): - e = [] - dst = PC + ExprInt(2, 32) * a.zeroExtend(32) - e.append(ExprAssign(PC, dst)) - e.append(ExprAssign(ir.IRDst, dst)) - return e, [] - - -def tbh(ir, instr, a): - e = [] - dst = PC + ExprInt(2, 32) * a.zeroExtend(32) - e.append(ExprAssign(PC, dst)) - e.append(ExprAssign(ir.IRDst, dst)) - return e, [] - - -def smlabb(ir, instr, a, b, c, d): - e = [] - result = (b[:16].signExtend(32) * c[:16].signExtend(32)) + d - e.append(ExprAssign(a, result)) - return e, [] - - -def smlabt(ir, instr, a, b, c, d): - e = [] - result = (b[:16].signExtend(32) * c[16:32].signExtend(32)) + d - e.append(ExprAssign(a, result)) - return e, [] - - -def smlatb(ir, instr, a, b, c, d): - e = [] - result = (b[16:32].signExtend(32) * c[:16].signExtend(32)) + d - e.append(ExprAssign(a, result)) - return e, [] - - -def smlatt(ir, instr, a, b, c, d): - e = [] - result = (b[16:32].signExtend(32) * c[16:32].signExtend(32)) + d - e.append(ExprAssign(a, result)) - return e, [] - - -def uadd8(ir, instr, a, b, c): - e = [] - sums = [] - ges = [] - for i in range(0, 32, 8): - sums.append(b[i:i+8] + c[i:i+8]) - ges.append((b[i:i+8].zeroExtend(9) + c[i:i+8].zeroExtend(9))[8:9]) - - e.append(ExprAssign(a, ExprCompose(*sums))) - - for i, value in enumerate(ges): - e.append(ExprAssign(ge_regs[i], value)) - return e, [] - - -def sel(ir, instr, a, b, c): - e = [] - cond = nf ^ of ^ ExprInt(1, 1) - parts = [] - for i in range(4): - parts.append(ExprCond(ge_regs[i], b[i*8:(i+1)*8], c[i*8:(i+1)*8])) - result = ExprCompose(*parts) - e.append(ExprAssign(a, result)) - return e, [] - - -def rev(ir, instr, a, b): - e = [] - result = ExprCompose(b[24:32], b[16:24], b[8:16], b[:8]) - e.append(ExprAssign(a, result)) - return e, [] - - -def rev16(ir, instr, a, b): - e = [] - result = ExprCompose(b[8:16], b[:8], b[24:32], b[16:24]) - e.append(ExprAssign(a, result)) - return e, [] - - -def nop(ir, instr): - e = [] - return e, [] - - -def dsb(ir, instr, a): - # XXX TODO - e = [] - return e, [] - - -def cpsie(ir, instr, a): - # XXX TODO - e = [] - return e, [] - - -def cpsid(ir, instr, a): - # XXX TODO - e = [] - return e, [] - - -def wfe(ir, instr): - # XXX TODO - e = [] - return e, [] - - -def wfi(ir, instr): - # XXX TODO - e = [] - return e, [] - -def adr(ir, instr, arg1, arg2): - e = [] - e.append(ExprAssign(arg1, (PC & ExprInt(0xfffffffc, 32)) + arg2)) - return e, [] - -COND_EQ = 0 -COND_NE = 1 -COND_CS = 2 -COND_CC = 3 -COND_MI = 4 -COND_PL = 5 -COND_VS = 6 -COND_VC = 7 -COND_HI = 8 -COND_LS = 9 -COND_GE = 10 -COND_LT = 11 -COND_GT = 12 -COND_LE = 13 -COND_AL = 14 -COND_NV = 15 - -cond_dct = { - COND_EQ: "EQ", - COND_NE: "NE", - COND_CS: "CS", - COND_CC: "CC", - COND_MI: "MI", - COND_PL: "PL", - COND_VS: "VS", - COND_VC: "VC", - COND_HI: "HI", - COND_LS: "LS", - COND_GE: "GE", - COND_LT: "LT", - COND_GT: "GT", - COND_LE: "LE", - COND_AL: "AL", - # COND_NV: "NV", -} - -cond_dct_inv = dict((name, num) for num, name in viewitems(cond_dct)) - - -""" -Code Meaning (for cmp or subs) Flags Tested -eq Equal. Z==1 -ne Not equal. Z==0 -cs or hs Unsigned higher or same (or carry set). C==1 -cc or lo Unsigned lower (or carry clear). C==0 -mi Negative. The mnemonic stands for "minus". N==1 -pl Positive or zero. The mnemonic stands for "plus". N==0 -vs Signed overflow. The mnemonic stands for "V set". V==1 -vc No signed overflow. The mnemonic stands for "V clear". V==0 -hi Unsigned higher. (C==1) && (Z==0) -ls Unsigned lower or same. (C==0) || (Z==1) -ge Signed greater than or equal. N==V -lt Signed less than. N!=V -gt Signed greater than. (Z==0) && (N==V) -le Signed less than or equal. (Z==1) || (N!=V) -al (or omitted) Always executed. None tested. -""" - -tab_cond = {COND_EQ: ExprOp("CC_EQ", zf), - COND_NE: ExprOp("CC_NE", zf), - COND_CS: ExprOp("CC_U>=", cf ^ ExprInt(1, 1)), # inv cf - COND_CC: ExprOp("CC_U<", cf ^ ExprInt(1, 1)), # inv cf - COND_MI: ExprOp("CC_NEG", nf), - COND_PL: ExprOp("CC_POS", nf), - COND_VS: ExprOp("CC_sOVR", of), - COND_VC: ExprOp("CC_sNOOVR", of), - COND_HI: ExprOp("CC_U>", cf ^ ExprInt(1, 1), zf), # inv cf - COND_LS: ExprOp("CC_U<=", cf ^ ExprInt(1, 1), zf), # inv cf - COND_GE: ExprOp("CC_S>=", nf, of), - COND_LT: ExprOp("CC_S<", nf, of), - COND_GT: ExprOp("CC_S>", nf, of, zf), - COND_LE: ExprOp("CC_S<=", nf, of, zf), - } - - - - - -def is_pc_written(ir, instr_ir): - all_pc = viewvalues(ir.mn.pc) - for ir in instr_ir: - if ir.dst in all_pc: - return True, ir.dst - return False, None - - -def add_condition_expr(ir, instr, cond, instr_ir, extra_ir): - if cond == COND_AL: - return instr_ir, extra_ir - if not cond in tab_cond: - raise ValueError('unknown condition %r' % cond) - cond = tab_cond[cond] - - - - loc_next = ir.get_next_loc_key(instr) - loc_next_expr = ExprLoc(loc_next, 32) - loc_do = ir.loc_db.add_location() - loc_do_expr = ExprLoc(loc_do, 32) - - dst_cond = ExprCond(cond, loc_do_expr, loc_next_expr) - assert(isinstance(instr_ir, list)) - - has_irdst = False - for e in instr_ir: - if e.dst == ir.IRDst: - has_irdst = True - break - if not has_irdst: - instr_ir.append(ExprAssign(ir.IRDst, loc_next_expr)) - e_do = IRBlock(loc_do, [AssignBlock(instr_ir, instr)]) - e = [ExprAssign(ir.IRDst, dst_cond)] - return e, [e_do] + extra_ir - -mnemo_func = {} -mnemo_func_cond = {} -mnemo_condm0 = {'add': add, - 'sub': sub, - 'eor': eor, - 'and': l_and, - 'rsb': rsb, - 'adc': adc, - 'sbc': sbc, - 'rsc': rsc, - - 'tst': tst, - 'teq': teq, - 'cmp': l_cmp, - 'cmn': cmn, - 'orr': orr, - 'mov': mov, - 'movt': movt, - 'bic': bic, - 'mvn': mvn, - 'neg': neg, - - 'sdiv': sdiv, - 'udiv': udiv, - - 'mul': mul, - 'umull': umull, - 'umlal': umlal, - 'smull': smull, - 'smlal': smlal, - 'mla': mla, - 'ldr': ldr, - 'ldrd': ldrd, - 'ldrsb': ldrsb, - 'str': l_str, - 'strd': l_strd, - 'b': b, - 'bl': bl, - 'svc': svc, - 'und': und, - 'bx': bx, - 'ldrh': ldrh, - 'strh': strh, - 'ldrsh': ldrsh, - 'ldsh': ldrsh, - 'uxtb': uxtb, - 'uxth': uxth, - 'sxtb': sxtb, - 'sxth': sxth, - 'ubfx': ubfx, - 'bfc': bfc, - 'rev': rev, - 'rev16': rev16, - 'clz': clz, - 'uxtab': uxtab, - 'uxtah': uxtah, - 'bkpt': bkpt, - 'smulbb': smul, - 'smulbt': smul, - 'smultb': smul, - 'smultt': smul, - 'smulwt': smulw, - 'smulwb': smulw, - } - -mnemo_condm1 = {'adds': add, - 'subs': subs, - 'eors': eors, - 'ands': l_and, - 'rsbs': rsbs, - 'adcs': adc, - 'sbcs': sbcs, - 'rscs': rscs, - - 'orrs': orrs, - 'movs': movs, - 'bics': bics, - 'mvns': mvns, - - 'mrs': mrs, - 'msr': msr, - - 'negs': negs, - - 'muls': muls, - 'mls': mls, - 'mlas': mlas, - 'blx': blx, - - 'ldrb': ldrb, - 'ldsb': ldrsb, - 'strb': strb, - } - -mnemo_condm2 = {'ldmia': ldmia, - 'ldmib': ldmib, - 'ldmda': ldmda, - 'ldmdb': ldmdb, - - 'ldmfa': ldmda, - 'ldmfd': ldmia, - 'ldmea': ldmdb, - 'ldmed': ldmib, # XXX - - - 'stmia': stmia, - 'stmib': stmib, - 'stmda': stmda, - 'stmdb': stmdb, - - 'stmfa': stmib, - 'stmed': stmda, - 'stmfd': stmdb, - 'stmea': stmia, - } - - -mnemo_nocond = {'lsr': lsr, - 'lsrs': lsrs, - 'lsl': lsl, - 'lsls': lsls, - 'rors': rors, - 'push': push, - 'pop': pop, - 'asr': asr, - 'asrs': asrs, - 'cbz': cbz, - 'cbnz': cbnz, - 'pld': pld, - 'pldw': pldw, - 'tbb': tbb, - 'tbh': tbh, - 'nop': nop, - 'dsb': dsb, - 'cpsie': cpsie, - 'cpsid': cpsid, - 'wfe': wfe, - 'wfi': wfi, - 'adr': adr, - 'orn': orn, - 'smlabb': smlabb, - 'smlabt': smlabt, - 'smlatb': smlatb, - 'smlatt': smlatt, - 'uadd8': uadd8, - 'sel': sel, - } - -mn_cond_x = [mnemo_condm0, - mnemo_condm1, - mnemo_condm2] - -for index, mn_base in enumerate(mn_cond_x): - for mn, mf in viewitems(mn_base): - for cond, cn in viewitems(cond_dct): - if cond == COND_AL: - cn = "" - cn = cn.lower() - if index == 0: - mn_mod = mn + cn - else: - mn_mod = mn[:-index] + cn + mn[-index:] - # print mn_mod - mnemo_func_cond[mn_mod] = cond, mf - -for name, mf in viewitems(mnemo_nocond): - mnemo_func_cond[name] = COND_AL, mf - - -def split_expr_dst(ir, instr_ir): - out = [] - dst = None - for i in instr_ir: - if i.dst == ir.pc: - out.append(i) - dst = ir.pc # i.src - else: - out.append(i) - return out, dst - - -def get_mnemo_expr(ir, instr, *args): - if not instr.name.lower() in mnemo_func_cond: - raise ValueError('unknown mnemo %s' % instr) - cond, mf = mnemo_func_cond[instr.name.lower()] - instr_ir, extra_ir = mf(ir, instr, *args) - instr, extra_ir = add_condition_expr(ir, instr, cond, instr_ir, extra_ir) - return instr, extra_ir - -get_arm_instr_expr = get_mnemo_expr - - -class arminfo(object): - mode = "arm" - # offset - - -class ir_arml(IntermediateRepresentation): - def __init__(self, loc_db=None): - IntermediateRepresentation.__init__(self, mn_arm, "l", loc_db) - self.pc = PC - self.sp = SP - self.IRDst = ExprId('IRDst', 32) - self.addrsize = 32 - - - - def mod_pc(self, instr, instr_ir, extra_ir): - # fix PC (+8 for arm) - pc_fixed = {self.pc: ExprInt(instr.offset + 8, 32)} - - for i, expr in enumerate(instr_ir): - dst, src = expr.dst, expr.src - if dst != self.pc: - dst = dst.replace_expr(pc_fixed) - src = src.replace_expr(pc_fixed) - instr_ir[i] = ExprAssign(dst, src) - - for idx, irblock in enumerate(extra_ir): - extra_ir[idx] = irblock.modify_exprs(lambda expr: expr.replace_expr(pc_fixed) \ - if expr != self.pc else expr, - lambda expr: expr.replace_expr(pc_fixed)) - - def get_ir(self, instr): - args = instr.args - # ir = get_mnemo_expr(self, self.name.lower(), *args) - if len(args) and isinstance(args[-1], ExprOp): - if args[-1].op == 'rrx': - args[-1] = ExprCompose(args[-1].args[0][1:], cf) - elif (args[-1].op in ['<<', '>>', '<>', '<<<', '>>>'] and - isinstance(args[-1].args[-1], ExprId)): - args[-1] = ExprOp(args[-1].op, - args[-1].args[0], - args[-1].args[-1][:8].zeroExtend(32)) - instr_ir, extra_ir = get_mnemo_expr(self, instr, *args) - - self.mod_pc(instr, instr_ir, extra_ir) - return instr_ir, extra_ir - - def parse_itt(self, instr): - name = instr.name - assert name.startswith('IT') - name = name[1:] - out = [] - for hint in name: - if hint == 'T': - out.append(0) - elif hint == "E": - out.append(1) - else: - raise ValueError("IT name invalid %s" % instr) - return out, instr.args[0] - - def do_it_block(self, loc, index, block, assignments, gen_pc_updt): - instr = block.lines[index] - it_hints, it_cond = self.parse_itt(instr) - cond_num = cond_dct_inv[it_cond.name] - cond_eq = tab_cond[cond_num] - - if not index + len(it_hints) <= len(block.lines): - raise NotImplementedError("Split IT block non supported yet") - - ir_blocks_all = [] - - # Gen dummy irblock for IT instr - loc_next = self.get_next_loc_key(instr) - dst = ExprAssign(self.IRDst, ExprLoc(loc_next, 32)) - dst_blk = AssignBlock([dst], instr) - assignments.append(dst_blk) - irblock = IRBlock(loc, assignments) - ir_blocks_all.append([irblock]) - - loc = loc_next - assignments = [] - for hint in it_hints: - irblocks = [] - index += 1 - instr = block.lines[index] - - # Add conditionnal jump to current irblock - loc_do = self.loc_db.add_location() - loc_next = self.get_next_loc_key(instr) - - if hint: - local_cond = ~cond_eq - else: - local_cond = cond_eq - dst = ExprAssign(self.IRDst, ExprCond(local_cond, ExprLoc(loc_do, 32), ExprLoc(loc_next, 32))) - dst_blk = AssignBlock([dst], instr) - assignments.append(dst_blk) - irblock = IRBlock(loc, assignments) - - irblocks.append(irblock) - - it_instr_irblocks = [] - assignments = [] - loc = loc_do - - split = self.add_instr_to_current_state( - instr, block, assignments, - it_instr_irblocks, gen_pc_updt - ) - if split: - raise NotImplementedError("Unsupported instr in IT block (%s)" % instr) - - if it_instr_irblocks: - assert len(it_instr_irblocks) == 1 - it_instr_irblocks = it_instr_irblocks.pop() - # Remove flags assignment if instr != [CMP, CMN, TST] - if instr.name not in ["CMP", "CMN", "TST"]: - # Fix assignments - out = [] - for assignment in assignments: - assignment = AssignBlock( - { - dst: src for (dst, src) in viewitems(assignment) - if dst not in [zf, nf, of, cf] - }, - assignment.instr - ) - out.append(assignment) - assignments = out - # Fix extra irblocksx - new_irblocks = [] - for irblock in it_instr_irblocks: - out = [] - for tmp_assignment in irblock: - assignment = AssignBlock( - { - dst: src for (dst, src) in viewitems(assignment) - if dst not in [zf, nf, of, cf] - }, - assignment.instr - ) - out.append(assignment) - new_irblock = IRBlock(irblock.loc_key, out) - new_irblocks.append(new_irblock) - it_instr_irblocks = new_irblocks - - irblocks += it_instr_irblocks - dst = ExprAssign(self.IRDst, ExprLoc(loc_next, 32)) - dst_blk = AssignBlock([dst], instr) - assignments.append(dst_blk) - irblock = IRBlock(loc, assignments) - irblocks.append(irblock) - loc = loc_next - assignments = [] - ir_blocks_all.append(irblocks) - return index, ir_blocks_all - - def add_asmblock_to_ircfg(self, block, ircfg, gen_pc_updt=False): - """ - Add a native block to the current IR - @block: native assembly block - @gen_pc_updt: insert PC update effects between instructions - """ - - it_hints = None - it_cond = None - label = block.loc_key - assignments = [] - ir_blocks_all = [] - index = -1 - while index + 1 < len(block.lines): - index += 1 - instr = block.lines[index] - if label is None: - assignments = [] - label = self.get_loc_key_for_instr(instr) - if instr.name.startswith("IT"): - index, irblocks_it = self.do_it_block(label, index, block, assignments, gen_pc_updt) - for irblocks in irblocks_it: - ir_blocks_all += irblocks - label = None - continue - - split = self.add_instr_to_current_state( - instr, block, assignments, - ir_blocks_all, gen_pc_updt - ) - if split: - ir_blocks_all.append(IRBlock(label, assignments)) - label = None - assignments = [] - if label is not None: - ir_blocks_all.append(IRBlock(label, assignments)) - - new_ir_blocks_all = self.post_add_asmblock_to_ircfg(block, ircfg, ir_blocks_all) - for irblock in new_ir_blocks_all: - ircfg.add_irblock(irblock) - return new_ir_blocks_all - - - -class ir_armb(ir_arml): - def __init__(self, loc_db=None): - IntermediateRepresentation.__init__(self, mn_arm, "b", loc_db) - self.pc = PC - self.sp = SP - self.IRDst = ExprId('IRDst', 32) - self.addrsize = 32 - - -class ir_armtl(ir_arml): - def __init__(self, loc_db=None): - IntermediateRepresentation.__init__(self, mn_armt, "l", loc_db) - self.pc = PC - self.sp = SP - self.IRDst = ExprId('IRDst', 32) - self.addrsize = 32 - - - def mod_pc(self, instr, instr_ir, extra_ir): - # fix PC (+4 for thumb) - pc_fixed = {self.pc: ExprInt(instr.offset + 4, 32)} - - for i, expr in enumerate(instr_ir): - dst, src = expr.dst, expr.src - if dst != self.pc: - dst = dst.replace_expr(pc_fixed) - src = src.replace_expr(pc_fixed) - instr_ir[i] = ExprAssign(dst, src) - - for idx, irblock in enumerate(extra_ir): - extra_ir[idx] = irblock.modify_exprs(lambda expr: expr.replace_expr(pc_fixed) \ - if expr != self.pc else expr, - lambda expr: expr.replace_expr(pc_fixed)) - - -class ir_armtb(ir_armtl): - def __init__(self, loc_db=None): - IntermediateRepresentation.__init__(self, mn_armt, "b", loc_db) - self.pc = PC - self.sp = SP - self.IRDst = ExprId('IRDst', 32) - self.addrsize = 32 - diff --git a/miasm2/arch/mep/__init__.py b/miasm2/arch/mep/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/miasm2/arch/mep/arch.py b/miasm2/arch/mep/arch.py deleted file mode 100644 index 2266b596..00000000 --- a/miasm2/arch/mep/arch.py +++ /dev/null @@ -1,2052 +0,0 @@ -# Toshiba MeP-c4 - miasm architecture definition -# Guillaume Valadon - -from builtins import range -from miasm2.core.cpu import * -from miasm2.core.utils import Disasm_Exception -from miasm2.expression.expression import Expr, ExprId, ExprInt, ExprLoc, \ - ExprMem, ExprOp -from miasm2.core.asm_ast import AstId, AstMem - -from miasm2.arch.mep.regs import * -import miasm2.arch.mep.regs as mep_regs_module # will be used to set mn_mep.regs - - -# Note: pyparsing is used to alter the way special operands are parsed -from pyparsing import Literal, Group, Word, hexnums - - -# These definitions will help parsing dereferencing instructions (i.e. that uses -# parenthesis) with pyparsing -LPARENTHESIS = Literal("(") -RPARENTHESIS = Literal(")") -PLUSSIGN = Literal("+") -HEX_INTEGER = str_int_pos | str_int_neg - - -def ExprInt2SignedString(expr, pos_fmt="%d", neg_fmt="%d", size=None, offset=0): - """Return the signed string corresponding to an ExprInt - - Note: this function is only useful to mimic objdump output""" - - # Apply a mask to the integer - if size is None: - mask_length = expr.size - else: - mask_length = size - mask = (1 << mask_length) - 1 - value = int(expr.arg) & mask - - # Return a signed integer if necessary - if (value >> mask_length - 1) == 1: - value = offset - ((value ^ mask) + 1) - if value < 0: - return "-" + neg_fmt % -value - else: - value += offset - - return pos_fmt % value - - -class instruction_mep(instruction): - """Generic MeP-c4 instruction - - Notes: - - this object is used to build internal miasm instructions based - on mnemonics - - it must be implemented ! - """ - - # Default delay slot - # Note: - # - mandatory for the miasm2 Machine - delayslot = 0 - - @staticmethod - def arg2str(expr, pos=None, loc_db=None): - """Convert mnemonics arguments into readable strings according to the - MeP-c4 architecture manual and their internal types - - Notes: - - it must be implemented ! However, a simple 'return str(expr)' - could do the trick. - - it is used to mimic objdump output - - Args: - expr: argument as a miasm2 expression - pos: position index in the arguments list - """ - - if isinstance(expr, ExprId) or isinstance(expr, ExprInt): - return str(expr) - - elif isinstance(expr, ExprLoc): - if loc_db is not None: - return loc_db.pretty_str(expr.loc_key) - else: - return str(expr) - - elif isinstance(expr, ExprMem) and (isinstance(expr.ptr, ExprId) or isinstance(expr.ptr, ExprInt)): - return "(%s)" % expr.ptr - - elif isinstance(expr, ExprMem) and isinstance(expr.ptr, ExprOp): - return "0x%X(%s)" % (expr.ptr.args[1].arg, expr.ptr.args[0]) - - # Raise an exception if the expression type was not processed - message = "instruction_mep.arg2str(): don't know what \ - to do with a '%s' instance." % type(expr) - raise Disasm_Exception(message) - - def __str__(self): - """Return the mnemonic as a string. - - Note: - - it is not mandatory as the instruction class already implement - it. It used to get rid of the padding between the opcode and the - arguments. - - most of this code is copied from miasm2/core/cpu.py - """ - - o = "%s" % self.name - - if self.name == "SSARB": - # The first operand is displayed in decimal, not in hex - o += " %d" % self.args[0].arg - o += self.arg2str(self.args[1]) - - elif self.name in ["MOV", "ADD"] and isinstance(self.args[1], ExprInt): - # The second operand is displayed in decimal, not in hex - o += " " + self.arg2str(self.args[0]) - o += ", %s" % ExprInt2SignedString(self.args[1].arg) - - elif "CPI" in self.name: - # The second operand ends with the '+' sign - o += " " + self.arg2str(self.args[0]) - deref_reg_str = self.arg2str(self.args[1]) - o += ", %s+)" % deref_reg_str[:-1] # GV: looks ugly - - elif self.name[0] in ["S", "L"] and self.name[-3:] in ["CPA", "PM0", "PM1"]: - # The second operand ends with the '+' sign - o += " " + self.arg2str(self.args[0]) - deref_reg_str = self.arg2str(self.args[1]) - o += ", %s+)" % deref_reg_str[:-1] # GV: looks ugly - # The third operand is displayed in decimal, not in hex - o += ", %s" % ExprInt2SignedString(self.args[2].arg) - - elif len(self.args) == 2 and self.name in ["SB", "SH", "LBU", "LB", "LH", "LW"] and \ - isinstance(self.args[1], ExprMem) and isinstance(self.args[1].ptr, ExprOp): # Major Opcodes #12 - # The second operand is an offset to a register - o += " " + self.arg2str(self.args[0]) - o += ", %s" % ExprInt2SignedString(self.args[1].ptr.args[1], "0x%X") - o += "(%s)" % self.arg2str(self.args[1].ptr.args[0]) - - elif len(self.args) == 2 and self.name in ["SWCP", "LWCP", "SMCP", "LMCP"] \ - and isinstance(self.args[1], ExprMem) and isinstance(self.args[1].ptr, ExprOp): # Major Opcodes #12 - # The second operand is an offset to a register - o += " " + self.arg2str(self.args[0]) - o += ", %s" % ExprInt2SignedString(self.args[1].ptr.args[1]) - o += "(%s)" % self.arg2str(self.args[1].ptr.args[0]) - - elif self.name == "SLL" and isinstance(self.args[1], ExprInt): # Major Opcodes #6 - # The second operand is displayed in hex, not in decimal - o += " " + self.arg2str(self.args[0]) - o += ", 0x%X" % self.args[1].arg - - elif self.name in ["ADD3", "SLT3"] and isinstance(self.args[2], ExprInt): - o += " %s" % self.arg2str(self.args[0]) - o += ", %s" % self.arg2str(self.args[1]) - # The third operand is displayed in decimal, not in hex - o += ", %s" % ExprInt2SignedString(self.args[2].arg, pos_fmt="0x%X") - - elif self.name == "(RI)": - return o - - else: - args = [] - if self.args: - o += " " - for i, arg in enumerate(self.args): - if not isinstance(arg, Expr): - raise ValueError('zarb arg type') - x = self.arg2str(arg, pos=i) - args.append(x) - o += self.gen_args(args) - - return o - - def breakflow(self): - """Instructions that stop a basic bloc.""" - - if self.name in ["BRA", "BEQZ", "BNEZ", "BEQI", "BNEI", "BLTI", "BGEI", "BEQ", "BNE", "BSR"]: - return True - - if self.name in ["JMP", "JSR", "RET"]: - return True - - if self.name in ["RETI", "HALT", "SLEEP"]: - return True - - return False - - def splitflow(self): - """Instructions that splits a basic bloc, i.e. the CPU can go somewhere else.""" - - if self.name in ["BEQZ", "BNEZ", "BEQI", "BNEI", "BLTI", "BGEI", "BEQ", "BNE", "BSR"]: - return True - - return False - - def dstflow(self): - """Instructions that explicitly provide the destination.""" - - if self.name in ["BRA", "BEQZ", "BNEZ", "BEQI", "BNEI", "BLTI", "BGEI", "BEQ", "BNE", "BSR"]: - return True - - if self.name in ["JMP"]: - return True - - return False - - def dstflow2label(self, loc_db): - """Set the label for the current destination. - - Note: it is used at disassembly""" - - if self.name == "JMP" and isinstance(self.args[0], ExprId): - # 'JMP RM' does not provide the destination - return - - # Compute the correct address - num = self.get_dst_num() - addr = self.args[num].arg - if not self.name == "JMP": - addr += self.offset - - # Get a new label at the address - label = loc_db.get_or_create_offset_location(addr) - - # Assign the label to the correct instruction argument - self.args[num] = ExprLoc(label, self.args[num].size) - - def get_dst_num(self): - """Get the index of the argument that points to the instruction destination.""" - - if self.name[-1] == "Z": - num = 1 - elif self.name in ["BEQI", "BNEI", "BLTI", "BGEI", "BEQ", "BNE"]: - num = 2 - else: - num = 0 - - return num - - def getdstflow(self, loc_db): - """Get the argument that points to the instruction destination.""" - - num = self.get_dst_num() - return [self.args[num]] - - def is_subcall(self): - """Instructions used to call sub functions.""" - - return self.name in ["JSR", "BSR"] - - def fixDstOffset(self): - """Fix/correct the instruction immediate according to the current offset - - Note: - it is used at assembly - - code inspired by miasm2/arch/mips32/arch.py""" - - if self.name == "JMP" and isinstance(self.args[0], ExprInt): - # 'JMP IMMEDIATE' does not need to be fixed - return - - # Get the argument that needs to be fixed - if not len(self.args): - return - num = self.get_dst_num() - expr = self.args[num] - - # Check that the argument can be fixed - if self.offset is None: - raise ValueError("Symbol not resolved %s" % self.l) - if not isinstance(expr, ExprInt): - return - - # Adjust the immediate according to the current instruction offset - off = expr.arg - self.offset - if int(off % 2): - raise ValueError("Strange offset! %r" % off) - self.args[num] = ExprInt(off, 32) - - -class mep_additional_info(object): - """Additional MeP instructions information - """ - - def __init__(self): - self.except_on_instr = False - - -class mn_mep(cls_mn): - """Toshiba MeP-c4 disassembler & assembler - """ - - # Define variables that stores information used to disassemble & assemble - # Notes: - theses variables are mandatory - # - they could be moved to the cls_mn class - - num = 0 # holds the number of mnemonics - - all_mn = list() # list of mnenomnics, converted to metamn objects - - all_mn_mode = defaultdict(list) # mneomnics, converted to metamn objects - # Note: - # - the key is the mode # GV: what is it ? - # - the data is a list of mnemonics - - all_mn_name = defaultdict(list) # mnenomnics strings - # Note: - # - the key is the mnemonic string - # - the data is the corresponding - # metamn object - - all_mn_inst = defaultdict(list) # mnemonics objects - # Note: - # - the key is the mnemonic Python class - # - the data is an instantiated object - - bintree = dict() # Variable storing internal values used to guess a - # mnemonic during disassembly - - # Defines the instruction set that will be used - instruction = instruction_mep - - # Python module that stores registers information - regs = mep_regs_module - - # Default delay slot - # Note: - # - mandatory for the miasm2 Machine - delayslot = 0 - - # Architecture name - name = "mep" - - # PC name depending on architecture attributes (here, l or b) - pc = {'l': PC, 'b': PC} - - def additional_info(self): - """Define instruction side effects # GV: not fully understood yet - - When used, it must return an object that implements specific - variables, such as except_on_instr. - - Notes: - - it must be implemented ! - - it could be moved to the cls_mn class - """ - - return mep_additional_info() - - @classmethod - def gen_modes(cls, subcls, name, bases, dct, fields): - """Ease populating internal variables used to disassemble & assemble, such - as self.all_mn_mode, self.all_mn_name and self.all_mn_inst - - Notes: - - it must be implemented ! - - it could be moved to the cls_mn class. All miasm architectures - use the same code - - Args: - cls: ? - sublcs: - name: mnemonic name - bases: ? - dct: ? - fields: ? - - Returns: - a list of ? - - """ - - dct["mode"] = None - return [(subcls, name, bases, dct, fields)] - - @classmethod - def getmn(cls, name): - """Get the mnemonic name - - Notes: - - it must be implemented ! - - it could be moved to the cls_mn class. Most miasm architectures - use the same code - - Args: - cls: the mnemonic class - name: the mnemonic string - """ - - return name.upper() - - @classmethod - def getpc(cls, attrib=None): - """"Return the ExprId that represents the Program Counter. - - Notes: - - mandatory for the symbolic execution - - PC is defined in regs.py - - Args: - attrib: architecture dependent attributes (here, l or b) - """ - - return PC - - @classmethod - def getsp(cls, attrib=None): - """"Return the ExprId that represents the Stack Pointer. - - Notes: - - mandatory for the symbolic execution - - SP is defined in regs.py - - Args: - attrib: architecture dependent attributes (here, l or b) - """ - - return SP - - @classmethod - def getbits(cls, bitstream, attrib, start, n): - """Return an integer of n bits at the 'start' offset - - Note: code from miasm2/arch/mips32/arch.py - """ - - # Return zero if zero bits are requested - if not n: - return 0 - - o = 0 # the returned value - while n: - # Get a byte, the offset is adjusted according to the endianness - offset = start // 8 # the offset in bytes - n_offset = cls.endian_offset(attrib, offset) # the adjusted offset - c = cls.getbytes(bitstream, n_offset, 1) - if not c: - raise IOError - - # Extract the bits value - c = ord(c) - r = 8 - start % 8 - c &= (1 << r) - 1 - l = min(r, n) - c >>= (r - l) - o <<= l - o |= c - n -= l - start += l - - return o - - @classmethod - def endian_offset(cls, attrib, offset): - """Adjust the byte offset according to the endianness""" - - if attrib == "l": # Little Endian - if offset % 2: - return offset - 1 - else: - return offset + 1 - - elif attrib == "b": # Big Endian - return offset - - else: - raise NotImplementedError("Bad MeP endianness") - - def value(self, mode): - """Adjust the assembled instruction based on the endianness - - Note: code inspired by miasm2/arch/mips32/arch.py - """ - - # Get the candidated - candidates = super(mn_mep, self).value(mode) - - if mode == "l": - # Invert bytes per 16-bits - for i in range(len(candidates)): - tmp = candidates[i][1] + candidates[i][0] - if len(candidates[i]) == 4: - tmp += candidates[i][3] + candidates[i][2] - candidates[i] = tmp - return candidates - - elif mode == "b": - return candidates - - else: - raise NotImplementedError("Bad MeP endianness (%s)" % mode) - - -def addop(name, fields, args=None, alias=False): - """Dynamically create the "name" object - - Notes: - - it could be moved to a generic function such as: - addop(name, fields, cls_mn, args=None, alias=False). - - most architectures use the same code - - Args: - name: the mnemonic name - fields: used to fill the object.__dict__'fields' attribute # GV: not understood yet - args: used to fill the object.__dict__'fields' attribute # GV: not understood yet - alias: used to fill the object.__dict__'fields' attribute # GV: not understood yet - """ - - namespace = {"fields": fields, "alias": alias} - - if args is not None: - namespace["args"] = args - - # Dynamically create the "name" object - type(name, (mn_mep,), namespace) - - -# Define specific operand parsers & converters - -def deref2expr(s, l, parse_results): - """Convert a parsed dereferenced register to an ExprMem""" - - # Only use the first results - parse_results = parse_results[0] - - if type(parse_results[0]) == AstInt and isinstance(parse_results[2], AstId): - return AstMem(parse_results[2] + parse_results[0], 32) # 1 == "(" and 3 == ")" - - elif type(parse_results[0]) == int and isinstance(parse_results[2], AstId): - return AstMem(parse_results[2] + AstOp('-', AstInt(-parse_results[0])), 32) # 1 == "(" and 3 == ")" - - else: - return AstMem(parse_results[1], 32) # 0 == "(" and 2 == ")" - - -deref_reg_parser = Group(LPARENTHESIS + gpr_infos.parser + RPARENTHESIS).setParseAction(deref2expr) -deref_inc_reg_parser = Group(LPARENTHESIS + gpr_infos.parser + PLUSSIGN + RPARENTHESIS).setParseAction(deref2expr) -abs24_deref_parser = Group(LPARENTHESIS + HEX_INTEGER + RPARENTHESIS).setParseAction(deref2expr) -offset_deref_reg_parser = Group(HEX_INTEGER + LPARENTHESIS + gpr_infos.parser + RPARENTHESIS).setParseAction(deref2expr) - -# Define registers decoders and encoders - -class mep_arg(m_arg): - def asm_ast_to_expr(self, arg, loc_db): - """Convert AST to expressions - - Note: - code inspired by miasm2/arch/mips32/arch.py""" - - if isinstance(arg, AstId): - if isinstance(arg.name, ExprId): - return arg.name - if isinstance(arg.name, str) and arg.name in gpr_names: - return None # GV: why? - loc_key = loc_db.get_or_create_name_location(arg.name.encode()) - return ExprLoc(loc_key, 32) - - elif isinstance(arg, AstMem): - addr = self.asm_ast_to_expr(arg.ptr, loc_db) - if addr is None: - return None - return ExprMem(addr, 32) - - elif isinstance(arg, AstInt): - return ExprInt(arg.value, 32) - - elif isinstance(arg, AstOp): - args = [self.asm_ast_to_expr(tmp, loc_db) for tmp in arg.args] - if None in args: - return None - return ExprOp(arg.op, *args) - - # Raise an exception if the argument was not processed - message = "mep_arg.asm_ast_to_expr(): don't know what \ - to do with a '%s' instance." % type(arg) - raise Exception(message) - -class mep_reg(reg_noarg, mep_arg): - """Generic Toshiba MeP-c4 register - - Note: - - the register size will be set using bs() - """ - reg_info = gpr_infos # the list of MeP-c4 registers defined in regs.py - parser = reg_info.parser # GV: not understood yet - - -class mep_deref_reg(mep_arg): - """Generic Toshiba MeP-c4 dereferenced register - - Note: - - the arg2str() method could be defined to change the output string - """ - parser = deref_reg_parser - - def decode(self, v): - """Transform the decoded value to a ExprMem(ExprId()) expression""" - r = gpr_infos.expr[v] # get the ExprId, i.e. the register expression - self.expr = ExprMem(r, 32) - return True - - def encode(self): - """Ensure that we have a ExprMem(ExprId()) expression, and return the - register value.""" - - if not isinstance(self.expr, ExprMem): - return False - if not isinstance(self.expr.ptr, ExprId): - return False - - # Get the ExprId index, i.e. its value - self.value = gpr_exprs.index(self.expr.ptr) - return True - - -class mep_reg_sp(mep_reg): - """Dummy Toshiba MeP-c4 register that represents SP. It is used in - instructions that implicitly use SP, such as ADD3. - """ - implicit_reg = SP - - def decode(self, v): - """Always return 'implicit_reg.""" - self.expr = self.implicit_reg - return True - - def encode(self): - """Do nothing""" - return True - - -class mep_reg_tp(mep_reg_sp): - """Dummy Toshiba MeP-c4 register that represents TP. - """ - implicit_reg = TP - - -class mep_deref_reg_offset(mep_arg): - """Toshiba MeP-c4 dereferenced register that represents SP, plus an - offset. - """ - parser = offset_deref_reg_parser - - def decode(self, v): - """Modify the decoded value using the previously decoded - register id. - """ - - # Apply the immediate mask - se = sign_ext(v & 0xFFFF, 16, 32) # GV: might not belong here - int_id = ExprInt(se, 32) - - # Get the register expression - reg_id = gpr_infos.expr[self.parent.reg04_deref.value] - - # Build the internal expression - self.expr = ExprMem(reg_id + int_id, 32) - - return True - - def encode(self): - """Modify the encoded value. One part is stored in this object, and - the other one in reg04_deref. - """ - - # Verify the expression - if not isinstance(self.expr, ExprMem): - return False - if not isinstance(self.expr.ptr, ExprOp): - return False - - # Get the integer and check the upper bound - v = int(self.expr.ptr.args[1].arg & 0xFFFF) - - # Encode the values - self.parent.reg04_deref.value = gpr_exprs.index(self.expr.ptr.args[0]) - self.value = v & 0xFFFF - return True - - -class mep_deref_sp_offset(mep_deref_reg): - """Dummy Toshiba MeP-c4 dereferenced register that represents SP, plus an - offset. - Note: it is as generic as possible to ease its use in different instructions - """ - implicit_reg = SP - parser = offset_deref_reg_parser - - def decode(self, v): - """Modify the decoded value using the previously decoded - immediate. - """ - - immediate = None - if getattr(self.parent, "imm7_align4", False): - # Apply the immediate mask - v = self.parent.imm7_align4.value & 0x1F - - # Shift value such as: - # imm7=iii_ii||00 - immediate = v << 2 - - elif getattr(self.parent, "imm7", False): - # Apply the immediate mask - immediate = self.parent.imm7.value & 0x7F - - elif getattr(self.parent, "disp7_align2", False): - # Apply the immediate mask - disp7_align2 = self.parent.disp7_align2.value & 0x3F - - # Shift value such as: - # disp7 = ddd_ddd||0 - immediate = disp7_align2 << 1 - - if immediate is not None: - self.expr = ExprMem(self.implicit_reg + ExprInt(immediate, 32), 32) - return True - else: - return False - - def encode(self): - """Modify the encoded value. One part is stored in this object, and - the other one in a parent immediate. - """ - - # Verify the expression - if not isinstance(self.expr, ExprMem): - return False - if not isinstance(self.expr.ptr, ExprOp): - return False - if self.expr.ptr.args[0] != self.implicit_reg: - return False - - if getattr(self.parent, "imm7_align4", False): - - # Get the integer and check the upper bound - v = int(self.expr.ptr.args[1].arg) - if v > 0x80: - return False - - # Encode the value - self.parent.imm7_align4.value = v >> 2 - - return True - - elif getattr(self.parent, "imm7", False): - - # Get the integer and check the upper bound - v = int(self.expr.ptr.args[1].arg) - if v > 0x80: - return False - - # Encode the value - self.parent.imm7.value = v - - return True - - elif getattr(self.parent, "disp7_align2", False): - - # Get the integer and check the upper bound - v = int(self.expr.ptr.args[1].arg) - if v > 0x80: - return False - - # Encode the value - self.parent.disp7_align2.value = v >> 1 - - return True - - return False - - -class mep_deref_tp_offset(mep_deref_sp_offset): - """Dummy Toshiba MeP-c4 dereferenced register that represents TP, plus an - offset. - """ - implicit_reg = TP - - -class mep_copro_reg(reg_noarg, mep_arg): - """Generic Toshiba MeP-c4 coprocessor register - """ - reg_info = copro_gpr_infos # the list of MeP-c4 coprocessor registers defined in regs.py - parser = reg_info.parser # GV: not understood yet - - -class mep_copro_reg_split(mep_copro_reg): - """Generic Toshiba MeP-c4 coprocessor register encode into different fields - """ - - def decode(self, v): - """Modify the decoded value using the previously decoded imm4_noarg. - """ - - # Apply the immediate mask - v = v & self.lmask - - # Shift values such as: - # CRn=NNnnnn - crn = (v << 4) + (self.parent.imm4.value & 0xF) - - # Build the internal expression - self.expr = ExprId("C%d" % crn, 32) - return True - - def encode(self): - """Modify the encoded value. One part is stored in this object, and - the other one in imm4_noarg. - """ - - if not isinstance(self.expr, ExprId): - return False - - # Get the register and check the upper bound - reg_name = self.expr.name - if reg_name[0] != "C": - return False - reg_value = copro_gpr_names.index(reg_name) - if reg_value > 0x3f: - return False - - # Encode the value into two parts - self.parent.imm4.value = (reg_value & 0xF) - self.value = (reg_value >> 4) & 0x3 - return True - - -class mep_deref_inc_reg(mep_deref_reg): - """Generic Toshiba MeP-c4 coprocess dereferenced & incremented register - """ - parser = deref_inc_reg_parser - - -# Immediate decoders and encoders - -class mep_int32_noarg(int32_noarg): - """Generic Toshiba MeP-c4 signed immediate - - Note: encode() is copied from int32_noarg.encode() and modified to allow - small (< 32 bits) signed immediate to be manipulated. - - """ - - def encode(self): - if not isinstance(self.expr, ExprInt): - return False - v = int(self.expr.arg) - # Note: the following lines were commented on purpose - #if sign_ext(v & self.lmask, self.l, self.intsize) != v: - # return False - v = self.encodeval(v & self.lmask) - self.value = v & self.lmask - return True - - -class mep_imm(imm_noarg, mep_arg): - """Generic Toshiba MeP-c4 immediate - - Note: - - the immediate size will be set using bs() - """ - parser = base_expr - - -class mep_imm6(mep_int32_noarg): - """Toshiba MeP-c4 signed 6 bits immediate.""" - parser = base_expr - intsize = 6 - intmask = (1 << intsize) - 1 - int2expr = lambda self, x: ExprInt(sign_ext(x, self.l, 32), 32) - - -class mep_imm8(mep_int32_noarg): - """Toshiba MeP-c4 signed 8 bits immediate.""" - parser = base_expr - intsize = 8 - intmask = (1 << intsize) - 1 - int2expr = lambda self, x: ExprInt(sign_ext(x, self.l, 32), 32) - - -class mep_imm16(mep_int32_noarg): - """Toshiba MeP-c4 16 bits immediate.""" - parser = base_expr - intsize = 16 - intmask = (1 << intsize) - 1 - int2expr = lambda self, x: ExprInt(x, 32) - - -class mep_imm16_signed(mep_int32_noarg): - """Toshiba MeP-c4 signed 16 bits immediate.""" - parser = base_expr - intsize = 16 - intmask = (1 << intsize) - 1 - int2expr = lambda self, x: ExprInt(sign_ext(x, self.l, 32), 32) - - -class mep_target24(mep_imm): - """Toshiba MeP-c4 target24 immediate, as used in JMP - """ - - def decode(self, v): - """Modify the decoded value using the previously decoded imm7. - """ - - # Apply the immediate mask - v = v & self.lmask - - # Shift values such as: - # target24=tttt_tttt_tttt_tttt||TTT_TTTT||0 - target24 = (v << 8) + ((self.parent.imm7.value & 0x7F) << 1) - - # Build the internal expression - self.expr = ExprInt(target24, 32) - return True - - def encode(self): - """Modify the encoded value. One part is stored in this object, and - the other one in imm7. - """ - - if not isinstance(self.expr, ExprInt): - return False - - # Get the integer and apply a mask - v = int(self.expr.arg) & 0x00FFFFFF - - # Encode the value into two parts - self.parent.imm7.value = (v & 0xFF) >> 1 - self.value = v >> 8 - return True - - -class mep_target24_signed(mep_target24): - """Toshiba MeP-c4 target24 signed immediate, as used in BSR - """ - - def decode(self, v): - """Perform sign extension - """ - - mep_target24.decode(self, v) - v = int(self.expr.arg) - self.expr = ExprInt(sign_ext(v, 24, 32), 32) - - return True - - -class mep_code20(mep_imm): - """Toshiba MeP-c4 code20 immediate, as used in DSP1 - """ - - def decode(self, v): - """Modify the decoded value using the previously decoded imm4_noarg. - """ - - # Apply the immediate mask - v = v & self.lmask - - # Shift values such as: - # code20=mmmm_cccc_cccc_cccc_cccc - code20 = v + ((self.parent.imm4.value & 0xFF) << 16) - - # Build the internal expression - self.expr = ExprInt(code20, 32) - return True - - def encode(self): - """Modify the encoded value. One part is stored in this object, and - the other one in imm4_noarg. - """ - - if not isinstance(self.expr, ExprInt): - return False - - # Get the integer and check the upper bound - v = int(self.expr.arg) - if v > 0xffffff: - return False - - # Encode the value into two parts - self.parent.imm4 = ((v >> 16) & 0xFF) - self.value = v - return True - - -class mep_code24(mep_imm): - """Toshiba MeP-c4 code24 immediate, as used in CP - """ - - def decode(self, v): - """Modify the decoded value using the previously decoded imm8_CCCC_CCCC. - """ - - # Shift values such as: - # code24=CCCC_CCCC||cccc_cccc_cccc_cccc - code24 = v + ((self.parent.imm8_CCCC_CCCC.value & 0xFF) << 16) - - # Build the internal expression - self.expr = ExprInt(code24, 32) - return True - - def encode(self): - """Modify the encoded value. One part is stored in this object, and - the other one in imm8_CCCC_CCCC. - """ - - if not isinstance(self.expr, ExprInt): - return False - - # Get the integer and check the upper bound - v = int(self.expr.arg) - if v > 0xFFFFFF: - return False - - # Encode the value into two parts - self.parent.imm8_CCCC_CCCC.value = ((v >> 16) & 0xFF) - self.value = v & 0xFFFF - return True - - -class mep_imm7_align4(mep_imm): - """Toshiba MeP-c4 imm7.align4 immediate, as used in Major #4 opcodes - """ - - def decode(self, v): - """Modify the decoded value. - """ - - # Apply the immediate mask - v = v & self.lmask - - # Shift value such as: - # imm7=iii_ii||00 - imm7_align4 = v << 2 - - # Build the internal expression - self.expr = ExprInt(imm7_align4, 32) - return True - - def encode(self): - """Modify the encoded value. - """ - - if not isinstance(self.expr, ExprInt): - return False - - # Get the integer and check the upper bound - v = int(self.expr.arg) - if v > 0x80: - return False - - # Encode the value - self.value = v >> 2 - return True - - -class mep_imm5_Iiiii (mep_imm): - """Toshiba MeP-c4 imm5 immediate, as used in STC & LDC. It encodes a - control/special register. - """ - - reg_info = csr_infos # the list of MeP-c4 control/special registers defined in regs.py - parser = reg_info.parser # GV: not understood yet - - def decode(self, v): - """Modify the decoded value using the previously decoded imm4_iiii - """ - - # Apply the immediate mask - I = v & self.lmask - - # Shift values such as: - # imm5=I||iiii - imm5 = (I << 4) + (self.parent.imm4_iiii.value & 0xF) - - # Build the internal register expression - self.expr = ExprId(csr_names[imm5], 32) - return True - - def encode(self): - """Modify the encoded value. One part is stored in this object, and - the other one in imm4_iiii. - """ - - if not isinstance(self.expr, ExprId): - return False - - # Get the register number and check the upper bound - v = csr_names.index(self.expr.name) - if v > 0x1F: - return False - - # Encode the value into two parts - self.parent.imm4_iiii.value = v & 0xF # iiii - self.value = (v >> 4) & 0b1 # I - return True - - -class mep_disp7_align2(mep_imm): - """Toshiba MeP-c4 disp7.align2 immediate, as used in Major #8 opcodes - """ - upper_bound = 0x7F - bits_shift = 1 - - def decode(self, v): - """Modify the decoded value. - """ - - # Apply the immediate mask - v = v & self.lmask - - # Shift value such as: - # disp7 = ddd_ddd||0 - disp7_align2 = (v << self.bits_shift) - - # Sign extension - disp7_align2 = sign_ext(disp7_align2, self.l + self.bits_shift, 32) - - # Build the internal expression - self.expr = ExprInt(disp7_align2, 32) - return True - - def encode(self): - """Modify the encoded value. - """ - - if not isinstance(self.expr, ExprInt): - return False - - # Get the integer - v = int(self.expr.arg) & self.upper_bound - - # Encode the value - self.value = (v >> self.bits_shift) & self.upper_bound - self.value = (v & self.upper_bound) >> self.bits_shift - return True - - -class mep_disp8_align2(mep_disp7_align2): - upper_bound = 0xFF - - -class mep_disp8_align4(mep_disp7_align2): - upper_bound = 0xFF - bits_shift = 2 - - -class mep_imm8_align8(mep_disp7_align2): - upper_bound = 0xFF - bits_shift = 3 - - -class mep_disp12_align2(mep_disp7_align2): - upper_bound = 0xFFF - - -class mep_disp12_align2_signed(mep_disp12_align2): - - def decode(self, v): - """Perform sign extension. - """ - mep_disp12_align2.decode(self, v) - v = int(self.expr.arg) - - self.expr = ExprInt(sign_ext(v, 12, 32), 32) - return True - - -class mep_disp17(mep_disp7_align2): - upper_bound = 0x1FFFF - - -class mep_imm24(mep_imm): - """Toshiba MeP-c4 imm24 immediate, as used in MOVU - """ - - def decode(self, v): - """Modify the decoded value. - """ - - # Apply the immediate mask - v = v & self.lmask - - # Shift values such as: - # imm24=iiii_iiii_iiii_iiii||IIII_IIIII - imm24 = ((v & 0xFFFF) << 8) + ((v & 0xFF0000) >> 16) - - # Build the internal expression - self.expr = ExprInt(imm24, 32) - return True - - def encode(self): - """Modify the encoded value. - """ - - if not isinstance(self.expr, ExprInt): - return False - - # Get the integer and check the upper bound - v = int(self.expr.arg) - if v > 0xFFFFFF: - return False - - # Encode the value - self.value = ((v & 0xFFFF00) >> 8) + ((v & 0xFF) << 16) - return True - - -class mep_abs24(mep_imm): - """Toshiba MeP-c4 abs24 immediate - """ - parser = abs24_deref_parser - - def decode(self, v): - """Modify the decoded value using the previously decoded imm6. - """ - - # Apply the immediate mask - v = v & self.lmask - - # Shift values such as: - # abs24=dddd_dddd_dddd_dddd||DDDD_DD||00 - abs24 = (v << 8) + ((self.parent.imm6.value & 0x3F) << 2) - - # Build the internal expression - self.expr = ExprMem(ExprInt(abs24, 32), 32) - return True - - def encode(self): - """Modify the encoded value. One part is stored in this object, and - the other one in imm6. - """ - - if not (isinstance(self.expr, ExprMem) and isinstance(self.expr.ptr, ExprInt)): - return False - - # Get the integer and check the upper bound - v = int(self.expr.ptr.arg) - if v > 0xffffff: - return False - - # Encode the value into two parts - self.parent.imm6.value = (v & 0xFF) >> 2 - self.value = v >> 8 - return True - - -# Define MeP-c4 assembly operands - -reg04 = bs(l=4, # length in bits - cls=(mep_reg, )) # class implementing decoding & encoding - -reg04_l = bs(l=4, cls=(mep_reg, )) - -reg04_m = bs(l=4, cls=(mep_reg, )) - -reg04_n = bs(l=4, cls=(mep_reg, )) - -reg00 = bs(l=0, cls=(mep_reg, )) - -reg00_sp = bs(l=0, cls=(mep_reg_sp, )) - -reg00_tp = bs(l=0, cls=(mep_reg_tp, )) - -reg00_deref_sp = bs(l=0, cls=(mep_deref_sp_offset, )) - -reg00_deref_tp = bs(l=0, cls=(mep_deref_tp_offset, )) - -reg03 = bs(l=3, cls=(mep_reg, )) - -reg04_deref = bs(l=4, cls=(mep_deref_reg,)) - -reg04_deref_noarg = bs(l=4, fname="reg04_deref") - -reg04_inc_deref = bs(l=4, cls=(mep_deref_inc_reg,)) - -copro_reg04 = bs(l=4, cls=(mep_copro_reg,)) - -copro_reg05 = bs(l=1, cls=(mep_copro_reg_split,)) - -copro_reg06 = bs(l=2, cls=(mep_copro_reg_split,)) - -disp2 = bs(l=2, cls=(mep_imm, )) - -imm2 = disp2 - -imm3 = bs(l=3, cls=(mep_imm, )) - -imm4 = bs(l=4, cls=(mep_imm, )) - -imm4_noarg = bs(l=4, fname="imm4") - -imm4_iiii_noarg = bs(l=4, fname="imm4_iiii") - -imm5 = bs(l=5, cls=(mep_imm, )) - -imm5_Iiiii = bs(l=1, cls=(mep_imm5_Iiiii, )) # it is not an immediate, but a - # control/special register. - -imm6 = bs(l=6, cls=(mep_imm6, mep_arg)) - -imm6_noarg = bs(l=6, fname="imm6") - -imm7 = bs(l=7, cls=(mep_imm, )) - -imm7_noarg = bs(l=7, fname="imm7") # Note: - # - will be decoded as a 7 bits immediate - # - fname is used to set the operand name - # used in mep_target24 to merge operands - # values. By default, the bs class fills - # fname with an hex string compute from - # arguments passed to __init__ - -imm7_align4 = bs(l=5, cls=(mep_imm7_align4,)) - -imm7_align4_noarg = bs(l=5, fname="imm7_align4") - -disp7_align2 = bs(l=6, cls=(mep_disp7_align2,)) - -disp7_align2_noarg = bs(l=6, fname="disp7_align2") - -imm8 = bs(l=8, cls=(mep_imm8, mep_arg)) - -imm8_noarg = bs(l=8, fname="imm8_CCCC_CCCC") - -disp8 = bs(l=7, cls=(mep_disp8_align2, )) - -imm8_align2 = bs(l=7, cls=(mep_disp8_align2, )) - -imm8_align4 = bs(l=6, cls=(mep_disp8_align4, )) - -imm8_align8 = bs(l=5, cls=(mep_imm8_align8, )) - -imm12 = bs(l=12, cls=(mep_imm, )) - -disp12_signed = bs(l=11, cls=(mep_disp12_align2_signed, )) - -imm16 = bs(l=16, cls=(mep_imm16, mep_arg)) -imm16_signed = bs(l=16, cls=(mep_imm16_signed, mep_arg)) - -disp16_reg_deref = bs(l=16, cls=(mep_deref_reg_offset,)) - -disp17 = bs(l=16, cls=(mep_disp17, )) - -imm18 = bs(l=19, cls=(mep_imm, )) - -imm_code20 = bs(l=16, cls=(mep_code20, )) - -imm24 = bs(l=24, cls=(mep_imm24, )) - -imm_target24 = bs(l=16, cls=(mep_target24, )) -imm_target24_signed = bs(l=16, cls=(mep_target24_signed, )) - -imm_code24 = bs(l=16, cls=(mep_code24, )) - -abs24 = bs(l=16, cls=(mep_abs24, )) - - -# MeP-c4 mnemonics objects - -### - -# MOV Rn,Rm - 0000_nnnn_mmmm_0000 -addop("MOV", [bs("0000"), reg04, reg04, bs("0000")]) - -# NEG Rn,Rm - 0000_nnnn_mmmm_0001 -addop("NEG", [bs("0000"), reg04, reg04, bs("0001")]) - -# SLT3 R0,Rn,Rm - 0000_nnnn_mmmm_0010 -addop("SLT3", [bs("0000"), reg00, reg04, reg04, bs("0010")]) - -# SLTU3 R0,Rn,Rm - 0000_nnnn_mmmm_0011 -addop("SLTU3", [bs("0000"), reg00, reg04, reg04, bs("0011")]) - -# SUB Rn,Rm - 0000_nnnn_mmmm_0100 -addop("SUB", [bs("0000"), reg04, reg04, bs("0100")]) - -# SBVCK3 R0,Rn,Rm - 0000_nnnn_mmmm_0101 -addop("SBVCK3", [bs("0000"), reg00, reg04, reg04, bs("0101")]) - -# (RI) - 0000_xxxx_xxxx_0110 -addop("(RI)", [bs("0000"), reg04, reg04, bs("0110")]) - -# ADVCK3 R0,Rn,Rm - 0000_nnnn_mmmm_0111 -addop("ADVCK3", [bs("0000"), reg00, reg04, reg04, bs("0111")]) - -# SB Rn,(Rm) - 0000_nnnn_mmmm_1000 -addop("SB", [bs("0000"), reg04, reg04_deref, bs("1000")]) - -# SH Rn,(Rm) - 0000_nnnn_mmmm_1001 -addop("SH", [bs("0000"), reg04, reg04_deref, bs("1001")]) - -# SW Rn,(Rm) - 0000_nnnn_mmmm_1010 -addop("SW", [bs("0000"), reg04, reg04_deref, bs("1010")]) - -# LBU Rn,(Rm) - 0000_nnnn_mmmm_1011 -addop("LBU", [bs("0000"), reg04, reg04_deref, bs("1011")]) - -# LB Rn,(Rm) - 0000_nnnn_mmmm_1100 -addop("LB", [bs("0000"), reg04, reg04_deref, bs("1100")]) - -# LH Rn,(Rm) - 0000_nnnn_mmmm_1101 -addop("LH", [bs("0000"), reg04, reg04_deref, bs("1101")]) - -# LW Rn,(Rm) - 0000_nnnn_mmmm_1110 -addop("LW", [bs("0000"), reg04, reg04_deref, bs("1110")]) - -# LHU Rn,(Rm) - 0000_nnnn_mmmm_1111 -addop("LHU", [bs("0000"), reg04, reg04_deref, bs("1111")]) - - -### - -# OR Rn,Rm - 0001_nnnn_mmmm_0000 -addop("OR", [bs("0001"), reg04, reg04, bs("0000")]) - -# AND Rn,Rm - 0001_nnnn_mmmm_0001 -addop("AND", [bs("0001"), reg04, reg04, bs("0001")]) - -# XOR Rn,Rm - 0001_nnnn_mmmm_0010 -addop("XOR", [bs("0001"), reg04, reg04, bs("0010")]) - -# NOR Rn,Rm - 0001_nnnn_mmmm_0011 -addop("NOR", [bs("0001"), reg04, reg04, bs("0011")]) - -# MUL Rn,Rm - 0001_nnnn_mmmm_0100 -addop("MUL", [bs("0001"), reg04, reg04, bs("0100")]) - -# MULU Rn,Rm - 0001_nnnn_mmmm_0101 -addop("MULU", [bs("0001"), reg04, reg04, bs("0101")]) - -# MULR Rn,Rm - 0001_nnnn_mmmm_0110 -addop("MULR", [bs("0001"), reg04, reg04, bs("0110")]) - -# MULRU Rn,Rm - 0001_nnnn_mmmm_0111 -addop("MULRU", [bs("0001"), reg04, reg04, bs("0111")]) - -# DIV Rn,Rm - 0001_nnnn_mmmm_1000 -addop("DIV", [bs("0001"), reg04, reg04, bs("1000")]) - -# DIVU Rn,Rm - 0001_nnnn_mmmm_1001 -addop("DIVU", [bs("0001"), reg04, reg04, bs("1001")]) - -# (RI) - 0001_xxxx_xxxx_1010 -addop("(RI)", [bs("0001"), reg04, reg04, bs("1010")]) - -# (RI) - 0001_xxxx_xxxx_1011 -addop("(RI)", [bs("0001"), reg04, reg04, bs("1011")]) - -# SSARB disp2(Rm) - 0001_00dd_mmmm_1100 -addop("SSARB", [bs("000100"), disp2, reg04_deref, bs("1100")]) - -# EXTB Rn - 0001_nnnn_0000_1101 -addop("EXTB", [bs("0001"), reg04, bs("00001101")]) - -# EXTH Rn - 0001_nnnn_0010_1101 -addop("EXTH", [bs("0001"), reg04, bs("00101101")]) - -# EXTUB Rn - 0001_nnnn_1000_1101 -addop("EXTUB", [bs("0001"), reg04, bs("10001101")]) - -# EXTUH Rn - 0001_nnnn_1010_1101 -addop("EXTUH", [bs("0001"), reg04, bs("10101101")]) - -# JMP Rm - 0001_0000_mmmm_1110 -addop("JMP", [bs("00010000"), reg04, bs("1110")]) - -# JSR Rm - 0001_0000_mmmm_1111 -addop("JSR", [bs("00010000"), reg04, bs("1111")]) - -# JSRV Rm - 0001_1000_mmmm_1111 -addop("JSRV", [bs("00011000"), reg04, bs("1111")]) - - -### - -# BSETM (Rm),imm3 - 0010_0iii_mmmm_0000 -addop("BSETM", [bs("00100"), imm3, reg04_deref, bs("0000")], [reg04_deref, imm3]) - -# BCLRM (Rn),imm3 - 0010_0iii_mmmm_0001 -addop("BCLRM", [bs("00100"), imm3, reg04_deref, bs("0001")], [reg04_deref, imm3]) - -# BNOTM (Rm),imm3 - 0010_0iii_mmmm_0010 -addop("BNOTM", [bs("00100"), imm3, reg04_deref, bs("0010")], [reg04_deref, imm3]) - -# BTSTM R0,(Rm),imm3 - 0010_0iii_mmmm_0011 -addop("BTSTM", [bs("00100"), reg00, imm3, reg04_deref, bs("0011")], [reg00, reg04_deref, imm3]) - -# TAS Rn,(Rm) - 0010_nnnn_mmmm_0100 -addop("TAS", [bs("0010"), reg04, reg04_deref, bs("0100")]) - -# (RI) - 0010_xxxx_xxxx_0101 -addop("(RI)", [bs("0010"), reg04, reg04, bs("0101")]) - -# SL1AD3 R0,Rn,Rm - 0010_nnnn_mmmm_0110 -addop("SL1AD3", [bs("0010"), reg00, reg04, reg04, bs("0110")]) - -# SL2AD3 R0,Rn,Rm - 0010_nnnn_mmmm_0111 -addop("SL2AD3", [bs("0010"), reg00, reg04, reg04, bs("0111")]) - -# (RI) - 0010_xxxx_xxxx_1000 -addop("(RI)", [bs("0010"), reg04, reg04, bs("1000")]) - -# (RI) - 0010_xxxx_xxxx_1001 -addop("(RI)", [bs("0010"), reg04, reg04, bs("1001")]) - -# (RI) - 0010_xxxx_xxxx_1010 -addop("(RI)", [bs("0010"), reg04, reg04, bs("1010")]) - -# (RI) - 0010_xxxx_xxxx_1011 -addop("(RI)", [bs("0010"), reg04, reg04, bs("1011")]) - -# SRL Rn,Rm - 0010_nnnn_mmmm_1100 -addop("SRL", [bs("0010"), reg04, reg04, bs("1100")]) - -# SRA Rn,Rm - 0010_nnnn_mmmm_1101 -addop("SRA", [bs("0010"), reg04, reg04, bs("1101")]) - -# SLL Rn,Rm - 0010_nnnn_mmmm_1110 -addop("SLL", [bs("0010"), reg04, reg04, bs("1110")]) - -# FSFT Rn,Rm - 0010_nnnn_mmmm_1111 -addop("FSFT", [bs("0010"), reg04, reg04, bs("1111")]) - - -### - -# SWCPI CRn,(Rm+) - 0011_nnnn_mmmm_0000 -addop("SWCPI", [bs("0011"), copro_reg04, reg04_inc_deref, bs("0000")]) - -# LWCPI CRn,(Rm+) - 0011_nnnn_mmmm_0001 -addop("LWCPI", [bs("0011"), copro_reg04, reg04_inc_deref, bs("0001")]) - -# SMCPI CRn,(Rm+) - 0011_nnnn_mmmm_0010 -addop("SMCPI", [bs("0011"), copro_reg04, reg04_inc_deref, bs("0010")]) - -# LMCPI CRn,(Rm+) - 0011_nnnn_mmmm_0011 -addop("LMCPI", [bs("0011"), copro_reg04, reg04_inc_deref, bs("0011")]) - -# SWCP CRn,(Rm) - 0011_nnnn_mmmm_1000 -addop("SWCP", [bs("0011"), copro_reg04, reg04_deref, bs("1000")]) - -# LWCP CRn,(Rm) - 0011_nnnn_mmmm_1001 -addop("LWCP", [bs("0011"), copro_reg04, reg04_deref, bs("1001")]) - -# SMCP CRn,(Rm) - 0011_nnnn_mmmm_1010 -addop("SMCP", [bs("0011"), copro_reg04, reg04_deref, bs("1010")]) - -# LMCP CRn,(Rm) - 0011_nnnn_mmmm_1011 -addop("LMCP", [bs("0011"), copro_reg04, reg04_deref, bs("1011")]) - - -### - -# ADD3 Rn,SP,imm7.align4 - 0100_nnnn_0iii_ii00 -addop("ADD3", [bs("0100"), reg04, reg00_sp, bs("0"), imm7_align4, bs("00")]) - -# SW Rn,disp7.align4(SP) - 0100_nnnn_0ddd_dd10 -# Note: disp7.align4 is the same as imm7.align4 -addop("SW", [bs("0100"), reg04, bs("0"), imm7_align4_noarg, reg00_deref_sp, bs("10")]) - -# LW Rn,disp7.align4(SP) - 0100_nnnn_0ddd_dd11 -addop("LW", [bs("0100"), reg04, bs("0"), imm7_align4_noarg, reg00_deref_sp, bs("11")]) - -# SW Rn[0-7],disp7.align4(TP) - 0100_0nnn_1ddd_dd10 -addop("SW", [bs("01000"), reg03, bs("1"), imm7_align4_noarg, reg00_deref_tp, bs("10")]) - -# LW Rn[0-7],disp7.align4(TP) - 0100_0nnn_1ddd_dd11 -addop("LW", [bs("01000"), reg03, bs("1"), imm7_align4_noarg, reg00_deref_tp, bs("11")]) - -# LBU Rn[0-7],disp7(TP) - 0100_1nnn_1ddd_dddd -addop("LBU", [bs("01001"), reg03, bs("1"), imm7_noarg, reg00_deref_tp], [reg03, reg00_deref_tp]) - -### - -# MOV Rn,imm8 - 0101_nnnn_iiii_iiii -addop("MOV", [bs("0101"), reg04, imm8]) - - -### - -# ADD Rn,imm6 - 0110_nnnn_iiii_ii00 -addop("ADD", # mnemonic name - [bs("0110"), reg04, imm6, bs("00")]) # mnemonic description - -# SLT3 R0,Rn,imm5 - 0110_nnnn_iiii_i001 -addop("SLT3", [bs("0110"), reg00, reg04, imm5, bs("001")]) - -# SRL Rn,imm5 - 0110_nnnn_iiii_i010 -addop("SRL", [bs("0110"), reg04, imm5, bs("010")]) - -# SRA Rn,imm5 - 0110_nnnn_iiii_i011 -addop("SRA", [bs("0110"), reg04, imm5, bs("011")]) - -# SLTU3 R0,Rn,imm5 - 0110_nnnn_iiii_i101 -addop("SLTU3", [bs("0110"), reg00, reg04, imm5, bs("101")]) - -# SLL Rn,imm5 - 0110_nnnn_iiii_i110 -addop("SLL", [bs("0110"), reg04, imm5, bs("110")]) - -# SLL3 R0,Rn,imm5 - 0110_nnnn_iiii_i111 -addop("SLL3", [bs("0110"), reg00, reg04, imm5, bs("111")]) - - -### - -# DI - 0111_0000_0000_0000 -addop("DI", [bs("0111000000000000")]) - -# EI - 0111_0000_0001_0000 -addop("EI", [bs("0111000000010000")]) - -# SYNCM - 0111_0000_0001_0001 -addop("SYNCM", [bs("0111000000010001")]) - -# SYNCCP - 0111_0000_0010_0001 -addop("SYNCCP", [bs("0111000000100001")]) - -# RET - 0111_0000_0000_0010 -addop("RET", [bs("0111000000000010")]) - -# RETI - 0111_0000_0001_0010 -addop("RETI", [bs("0111000000010010")]) - -# HALT - 0111_0000_0010_0010 -addop("HALT", [bs("0111000000100010")]) - -# BREAK - 0111_0000_0011_0010 -addop("BREAK", [bs("0111000000110010")]) - -# SLEEP - 0111_0000_0110_0010 -addop("SLEEP", [bs("0111000001100010")]) - -# DRET - 0111_0000_0001_0011 -addop("DRET", [bs("0111000000010011")]) - -# DBREAK - 0111_0000_0011_0011 -addop("DBREAK", [bs("0111000000110011")]) - -# CACHE imm4,(Rm) - 0111_iiii_mmmm_0100 -addop("CACHE", [bs("0111"), imm4, reg04_deref, bs("0100")]) - -# (RI) - 0111_xxxx_xxxx_0101 -addop("(RI)", [bs("0111"), reg04, reg04, bs("0101")]) - -# SWI imm2 - 0111_0000_00ii_0110 -addop("SWI", [bs("0111000000"), imm2, bs("0110")]) - -# (RI) - 0111_xxxx_xxxx_0111 -addop("(RI)", [bs("0111"), reg04, reg04, bs("0111")]) - -# STC Rn,imm5 - 0111_nnnn_iiii_100I -addop("STC", [bs("0111"), reg04, imm4_iiii_noarg, bs("100"), imm5_Iiiii]) - -# LDC Rn,imm5 - 0111_nnnn_iiii_101I -addop("LDC", [bs("0111"), reg04, imm4_iiii_noarg, bs("101"), imm5_Iiiii]) - -# (RI) - 0111_xxxx_xxxx_1100 -addop("(RI)", [bs("0111"), reg04, reg04, bs("1100")]) - -# (RI) - 0111_xxxx_xxxx_1101 -addop("(RI)", [bs("0111"), reg04, reg04, bs("1101")]) - -# (RI) - 0111_xxxx_xxxx_1110 -addop("(RI)", [bs("0111"), reg04, reg04, bs("1110")]) - -# (RI) - 0111_xxxx_xxxx_1111 -addop("(RI)", [bs("0111"), reg04, reg04, bs("1111")]) - - -### - -# SB Rn[0-7],disp7(TP) - 1000_0nnn_0ddd_dddd -addop("SB", [bs("10000"), reg03, bs("0"), imm7_noarg, reg00_deref_tp]) - -# SH Rn[0-7],disp7.align2(TP) - 1000_0nnn_1ddd_ddd0 -# (disp7.align2 = ddd_ddd||0) -addop("SH", [bs("10000"), reg03, bs("1"), disp7_align2_noarg, bs("0"), reg00_deref_tp]) - -# LB Rn[0-7],disp7(TP) - 1000_1nnn_0ddd_dddd -addop("LB", [bs("10001"), reg03, bs("0"), imm7_noarg, reg00_deref_tp]) - -# LH Rn[0-7],disp7.align2(TP) - 1000_1nnn_1ddd_ddd0 -addop("LH", [bs("10001"), reg03, bs("1"), disp7_align2_noarg, bs("0"), reg00_deref_tp]) - -# LHU Rn[0-7],disp7.align2(TP) - 1000_1nnn_1ddd_ddd1 -addop("LHU", [bs("10001"), reg03, bs("1"), disp7_align2_noarg, bs("1"), reg00_deref_tp]) - - -### - -# ADD3 Rl,Rn,Rm - 1001_nnnn_mmmm_llll -addop("ADD3", [bs("1001"), reg04_n, reg04_m, reg04_l], [reg04_l, reg04_n, reg04_m]) - - -### - -# BEQZ Rn,disp8.align2 - 1010_nnnn_dddd_ddd0 -# (disp8=dddd_ddd||0) -addop("BEQZ", [bs("1010"), reg04, disp8, bs("0")]) - -# BNEZ Rn,disp8.align2 - 1010_nnnn_dddd_ddd1 -addop("BNEZ", [bs("1010"), reg04, disp8, bs("1")]) - - -### - -# BRA disp12.align2 - 1011_dddd_dddd_ddd0 -# (disp12=dddd_dddd_ddd||0) -addop("BRA", [bs("1011"), disp12_signed, bs("0")]) - -# BSR disp12.align2 - 1011_dddd_dddd_ddd1 -addop("BSR", [bs("1011"), disp12_signed, bs("1")]) - - -### - -# ADD3 Rn,Rm,imm16 - 1100_nnnn_mmmm_0000 iiii_iiii_iiii_iiii -addop("ADD3", [bs("1100"), reg04, reg04, bs("0000"), imm16_signed]) - -# MOV Rn,imm16 - 1100_nnnn_0000_0001 iiii_iiii_iiii_iiii -addop("MOV", [bs("1100"), reg04, bs("00000001"), imm16]) - -# MOVU Rn,imm16 - 1100_nnnn_0001_0001 iiii_iiii_iiii_iiii -addop("MOVU", [bs("1100"), reg04, bs("00010001"), imm16]) - -# MOVH Rn,imm16 - 1100_nnnn_0010_0001 iiii_iiii_iiii_iiii -addop("MOVH", [bs("1100"), reg04, bs("00100001"), imm16]) - -# SLT3 Rn,Rm,imm16 - 1100_nnnn_mmmm_0010 iiii_iiii_iiii_iiii -addop("SLT3", [bs("1100"), reg04, reg04, bs("0010"), imm16_signed]) - -# SLTU3 Rn,Rm,imm16 - 1100_nnnn_mmmm_0011 iiii_iiii_iiii_iiii -addop("SLTU3", [bs("1100"), reg04, reg04, bs("0011"), imm16]) - -# OR3 Rn,Rm,imm16 - 1100_nnnn_mmmm_0100 iiii_iiii_iiii_iiii -addop("OR3", [bs("1100"), reg04, reg04, bs("0100"), imm16]) - -# AND3 Rn,Rm,imm16 - 1100_nnnn_mmmm_0101 iiii_iiii_iiii_iiii -addop("AND3", [bs("1100"), reg04, reg04, bs("0101"), imm16]) - -# XOR3 Rn,Rm,imm16 - 1100_nnnn_mmmm_0110 iiii_iiii_iiii_iiii -addop("XOR3", [bs("1100"), reg04, reg04, bs("0110"), imm16]) - -# (RI) - 1100_xxxx_xxxx_0111 xxxx_xxxx_xxxx_xxxx -addop("(RI)", [bs("1100"), imm8, bs("0111"), imm16]) - -# SB Rn,disp16(Rm) - 1100_nnnn_mmmm_1000 dddd_dddd_dddd_dddd -addop("SB", [bs("1100"), reg04, reg04_deref_noarg, bs("1000"), disp16_reg_deref], [reg04, disp16_reg_deref]) - -# SH Rn,disp16(Rm) - 1100_nnnn_mmmm_1001 dddd_dddd_dddd_dddd -addop("SH", [bs("1100"), reg04, reg04_deref_noarg, bs("1001"), disp16_reg_deref], [reg04, disp16_reg_deref]) - -# SW Rn,disp16(Rm) - 1100_nnnn_mmmm_1010 dddd_dddd_dddd_dddd -addop("SW", [bs("1100"), reg04, reg04_deref_noarg, bs("1010"), disp16_reg_deref], [reg04, disp16_reg_deref]) - -# LBU Rn,disp16(Rm) - 1100_nnnn_mmmm_1011 dddd_dddd_dddd_dddd -addop("LBU", [bs("1100"), reg04, reg04_deref_noarg, bs("1011"), disp16_reg_deref], [reg04, disp16_reg_deref]) - -# LB Rn,disp16(Rm) - 1100_nnnn_mmmm_1100 dddd_dddd_dddd_dddd -addop("LB", [bs("1100"), reg04, reg04_deref_noarg, bs("1100"), disp16_reg_deref], [reg04, disp16_reg_deref]) - -# LH Rn,disp16(Rm) - 1100_nnnn_mmmm_1101 dddd_dddd_dddd_dddd -addop("LH", [bs("1100"), reg04, reg04_deref_noarg, bs("1101"), disp16_reg_deref], [reg04, disp16_reg_deref]) - -# LW Rn,disp16(Rm) - 1100_nnnn_mmmm_1110 dddd_dddd_dddd_dddd -addop("LW", [bs("1100"), reg04, reg04_deref_noarg, bs("1110"), disp16_reg_deref], [reg04, disp16_reg_deref]) - -# LHU Rn,disp16(Rm) - 1100_nnnn_mmmm_1111 dddd_dddd_dddd_dddd -addop("LHU", [bs("1100"), reg04, reg04_deref_noarg, bs("1111"), disp16_reg_deref], [reg04, disp16_reg_deref]) - - -### - -# MOVU Rn[0-7],imm24 - 1101_0nnn_IIII_IIII iiii_iiii_iiii_iiii -addop("MOVU", [bs("11010"), reg03, imm24]) - -# BCPEQ cccc,disp17 - 1101_1000_cccc_0100 dddd_dddd_dddd_dddd -addop("BCPEQ", [bs("11011000"), imm4, bs("0100"), disp17]) - -# BCPNE cccc,disp17 - 1101_1000_cccc_0101 dddd_dddd_dddd_dddd -addop("BCPNE", [bs("11011000"), imm4, bs("0101"), disp17]) - -# BCPAT cccc,disp17 - 1101_1000_cccc_0110 dddd_dddd_dddd_dddd -addop("BCPAT", [bs("11011000"), imm4, bs("0110"), disp17]) - -# BCPAF cccc,disp17 - 1101_1000_cccc_0111 dddd_dddd_dddd_dddd -addop("BCPAF", [bs("11011000"), imm4, bs("0111"), disp17]) - -# JMP target24 - 1101_1TTT_TTTT_1000 tttt_tttt_tttt_tttt -addop("JMP", [bs("11011"), imm7_noarg, bs("1000"), imm_target24], - [imm_target24]) # the only interesting operand is imm_target24 - -# BSR disp24 - 1101_1DDD_DDDD_1001 dddd_dddd_dddd_dddd -addop("BSR", [bs("11011"), imm7_noarg, bs("1001"), imm_target24_signed], [imm_target24_signed]) - -# BSRV disp24 1101_1DDD_DDDD_1011 dddd_dddd_dddd_dddd -addop("BSRV", [bs("11011"), imm7_noarg, bs("1011"), imm_target24], [imm_target24]) - - -### - -# BEQI Rn,imm4,disp17 - 1110_nnnn_iiii_0000 dddd_dddd_dddd_dddd -addop("BEQI", [bs("1110"), reg04, imm4, bs("0000"), disp17]) - -# BEQ Rn,Rm,disp17 - 1110_nnnn_mmmm_0001 dddd_dddd_dddd_dddd -addop("BEQ", [bs("1110"), reg04, reg04, bs("0001"), disp17]) - -# BNEI Rn,imm4,disp17 - 1110_nnnn_iiii_0100 dddd_dddd_dddd_dddd -addop("BNEI", [bs("1110"), reg04, imm4, bs("0100"), disp17]) - -# BNE Rn,Rm,disp17 - 1110_nnnn_mmmm_0101 dddd_dddd_dddd_dddd -addop("BNE", [bs("1110"), reg04, reg04, bs("0101"), disp17]) - -# BGEI Rn,imm4,disp17 - 1110_nnnn_iiii_1000 dddd_dddd_dddd_dddd -addop("BGEI", [bs("1110"), reg04, imm4, bs("1000"), disp17]) - -# REPEAT Rn,disp17 - 1110_nnnn_0000_1001 dddd_dddd_dddd_dddd -addop("REPEAT", [bs("1110"), reg04, bs("00001001"), disp17]) - -# EREPEAT disp17 - 1110_0000_0001_1001 dddd_dddd_dddd_dddd -addop("EREPEAT", [bs("1110000000011001"), disp17]) - -# BLTI Rn,imm4,disp17 - 1110_nnnn_iiii_1100 dddd_dddd_dddd_dddd -addop("BLTI", [bs("1110"), reg04, imm4, bs("1100"), disp17]) - -# (RI) - 1110_xxxx_xxxx_1101 xxxx_xxxx_xxxx_xxxx -addop("(RI)", [bs("1110"), imm8, bs("1101"), imm16]) - -# SW Rn,(abs24) - 1110_nnnn_DDDD_DD10 dddd_dddd_dddd_dddd -addop("SW", [bs("1110"), reg04, imm6_noarg, bs("10"), abs24]) - -# LW Rn,(abs24) - 1110_nnnn_DDDD_DD11 dddd_dddd_dddd_dddd -addop("LW", [bs("1110"), reg04, imm6_noarg, bs("11"), abs24]) - - -### - -# DSP Rn,Rm,code16 - 1111_nnnn_mmmm_0000 cccc_cccc_cccc_cccc -addop("DSP", [bs("1111"), reg04, reg04, bs("0000"), imm16]) - -# Note: DSP, DSP0 & DSP1 look exactly the same. This is ambiguous, and prevent -# them for being correctly disassembled. DSP0 & DSP1 are arbitrarily -# disabled. - -# DSP0 code24 - 1111_nnnn_mmmm_0000 cccc_cccc_cccc_cccc -#addop("DSP0", [bs("1111"), imm8_noarg, bs("0000"), imm_code24], [imm_code24]) - -# DSP1 Rn,code20 - 1111_nnnn_mmmm_0000 cccc_cccc_cccc_cccc -#addop("DSP1", [bs("1111"), reg04, imm4_noarg, bs("0000"), imm_code20]) - -# LDZ Rn,Rm - 1111_nnnn_mmmm_0001 0000_0000_0000_0000 -addop("LDZ", [bs("1111"), reg04, reg04, bs("00010000000000000000")]) - -# AVE Rn,Rm - 1111_nnnn_mmmm_0001 0000_0000_0000_0010 -addop("AVE", [bs("1111"), reg04, reg04, bs("00010000000000000010")]) - -# ABS Rn,Rm - 1111_nnnn_mmmm_0001 0000_0000_0000_0011 -addop("ABS", [bs("1111"), reg04, reg04, bs("00010000000000000011")]) - -# MIN Rn,Rm - 1111_nnnn_mmmm_0001 0000_0000_0000_0100 -addop("MIN", [bs("1111"), reg04, reg04, bs("00010000000000000100")]) - -# MAX Rn,Rm - 1111_nnnn_mmmm_0001 0000_0000_0000_0101 -addop("MAX", [bs("1111"), reg04, reg04, bs("00010000000000000101")]) - -# MINU Rn,Rm - 1111_nnnn_mmmm_0001 0000_0000_0000_0110 -addop("MINU", [bs("1111"), reg04, reg04, bs("00010000000000000110")]) - -# MAXU Rn,Rm - 1111_nnnn_mmmm_0001 0000_0000_0000_0111 -addop("MAXU", [bs("1111"), reg04, reg04, bs("00010000000000000111")]) - -# SADD Rn,Rm - 1111_nnnn_mmmm_0001 0000_0000_0000_1000 -addop("SADD", [bs("1111"), reg04, reg04, bs("00010000000000001000")]) - -# SADDU Rn,Rm - 1111_nnnn_mmmm_0001 0000_0000_0000_1001 -addop("SADDU", [bs("1111"), reg04, reg04, bs("00010000000000001001")]) - -# SSUB Rn,Rm - 1111_nnnn_mmmm_0001 0000_0000_0000_1010 -addop("SSUB", [bs("1111"), reg04, reg04, bs("00010000000000001010")]) - -# SSUBU Rn,Rm - 1111_nnnn_mmmm_0001 0000_0000_0000_1011 -addop("SSUBU", [bs("1111"), reg04, reg04, bs("00010000000000001011")]) - -# CLIP Rn,imm5 - 1111_nnnn_0000_0001 0001_0000_iiii_i000 -addop("CLIP", [bs("1111"), reg04, bs("0000000100010000"), imm5, bs("000")]) - -# CLIPU Rn,imm5 - 1111_nnnn_0000_0001 0001_0000_iiii_i001 -addop("CLIPU", [bs("1111"), reg04, bs("0000000100010000"), imm5, bs("001")]) - -# (RI) - 1111_xxxx_xxxx_0001 0010_xxxx_xxxx_xxxx -addop("(RI)", [bs("1111"), imm8, bs("00010010"), imm12]) - -# MADD Rn,Rm - 1111_nnnn_mmmm_0001 0011_0000_0000_0100 -addop("MADD", [bs("1111"), reg04, reg04, bs("00010011000000000100")]) - -# MADDU Rn,Rm - 1111_nnnn_mmmm_0001 0011_0000_0000_0101 -addop("MADDU", [bs("1111"), reg04, reg04, bs("00010011000000000101")]) - -# MADDR Rn,Rm - 1111_nnnn_mmmm_0001 0011_0000_0000_0110 -addop("MADDR", [bs("1111"), reg04, reg04, bs("00010011000000000110")]) - -# MADDRU Rn,Rm - 1111_nnnn_mmmm_0001 0011_0000_0000_0111 -addop("MADDRU", [bs("1111"), reg04, reg04, bs("00010011000000000111")]) - -# UCI Rn,Rm,code16 - 1111_nnnn_mmmm_0010 cccc_cccc_cccc_cccc -addop("UCI", [bs("1111"), reg04, reg04, bs("0010"), imm16]) - -# (RI) - 1111_xxxx_xxxx_0011 xxxx_xxxx_xxxx_xxxx -addop("(RI)", [bs("1111"), imm8, bs("0011"), imm16]) - -# STCB Rn,abs16 - 1111_nnnn_0000_0100 aaaa_aaaa_aaaa_aaaa -addop("STCB", [bs("1111"), reg04, bs("00000100"), imm16]) - -# LDCB Rn,abs16 - 1111_nnnn_0001_0100 aaaa_aaaa_aaaa_aaaa -addop("LDCB", [bs("1111"), reg04, bs("00010100"), imm16]) - -# SBCPA CRn,(Rm+),imm8 - 1111_nnnn_mmmm_0101 0000_0000_iiii_iiii -addop("SBCPA", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010100000000"), imm8]) - -# SHCPA CRn,(Rm+),imm8.align2 - 1111_nnnn_mmmm_0101 0001_0000_iiii_iii0 -addop("SHCPA", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010100010000"), imm8_align2, bs("0")]) - -# SWCPA CRn,(Rm+),imm8.align4 - 1111_nnnn_mmmm_0101 0010_0000_iiii_ii00 -addop("SWCPA", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010100100000"), imm8_align4, bs("00")]) - -# SMCPA CRn,(Rm+),imm8.align8 - 1111_nnnn_mmmm_0101 0011_0000_iiii_i000 -addop("SMCPA", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010100110000"), imm8_align8, bs("000")]) - -# LBCPA CRn,(Rm+),imm8 - 1111_nnnn_mmmm_0101 0100_0000_iiii_iiii -addop("LBCPA", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010101000000"), imm8]) - -# LHCPA CRn,(Rm+),imm8.align2 - 1111_nnnn_mmmm_0101 0101_0000_iiii_iii0 -addop("LHCPA", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010101010000"), imm8_align2, bs("0")]) - -# LWCPA CRn,(Rm+),imm8.align4 - 1111_nnnn_mmmm_0101 0110_0000_iiii_ii00 -addop("LWCPA", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010101100000"), imm8_align4, bs("00")]) - -# LMCPA CRn,(Rm+),imm8.align8 - 1111_nnnn_mmmm_0101 0111_0000_iiii_i000 -addop("LMCPA", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010101110000"), imm8_align8, bs("000")]) - -# SBCPM0 CRn,(Rm+),imm8 - 1111_nnnn_mmmm_0101 0000_1000_iiii_iiii -addop("SBCPM0", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010100001000"), imm8]) - -# SHCPM0 CRn,(Rm+),imm8.align2 - 1111_nnnn_mmmm_0101 0001_1000_iiii_iii0 -addop("SHCPM0", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010100011000"), imm8_align2, bs("0")]) - -# SWCPM0 CRn,(Rm+),imm8.align4 - 1111_nnnn_mmmm_0101 0010_1000_iiii_ii00 -addop("SWCPM0", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010100101000"), imm8_align4, bs("00")]) - -# SMCPM0 CRn,(Rm+),imm8.align8 - 1111_nnnn_mmmm_0101 0011_1000_iiii_i000 -addop("SMCPM0", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010100111000"), imm8_align8, bs("000")]) - -# LBCPM0 CRn,(Rm+),imm8 - 1111_nnnn_mmmm_0101 0100_1000_iiii_iiii -addop("LBCPM0", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010101001000"), imm8]) - -# LHCPM0 CRn,(Rm+),imm8.align2 - 1111_nnnn_mmmm_0101 0101_1000_iiii_iii0 -addop("LHCPM0", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010101011000"), imm8_align2, bs("0")]) - -# LWCPM0 CRn,(Rm+),imm8.align4 - 1111_nnnn_mmmm_0101 0110_1000_iiii_ii00 -addop("LWCPM0", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010101101000"), imm8_align4, bs("00")]) - -# LMCPM0 CRn,(Rm+),imm8.align8 - 1111_nnnn_mmmm_0101 0111_1000_iiii_i000 -addop("LMCPM0", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010101111000"), imm8_align8, bs("000")]) - -# SBCPM1 CRn,(Rm+),imm8 - 1111_nnnn_mmmm_0101 0000_1100_iiii_iiii -addop("SBCPM1", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010100001100"), imm8]) - -# SHCPM1 CRn,(Rm+),imm8.align2 - 1111_nnnn_mmmm_0101 0001_1100_iiii_iii0 -addop("SHCPM1", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010100011100"), imm8_align2, bs("0")]) - -# SWCPM1 CRn,(Rm+),imm8.align4 - 1111_nnnn_mmmm_0101 0010_1100_iiii_ii00 -addop("SWCPM1", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010100101100"), imm8_align4, bs("00")]) - -# SMCPM1 CRn,(Rm+),imm8.align8 - 1111_nnnn_mmmm_0101 0011_1100_iiii_i000 -addop("SMCPM1", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010100111100"), imm8_align8, bs("000")]) - -# LBCPM1 CRn,(Rm+),imm8 - 1111_nnnn_mmmm_0101 0100_1100_iiii_iiii -addop("LBCPM1", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010101001100"), imm8]) - -# LHCPM1 CRn,(Rm+),imm8.align2 - 1111_nnnn_mmmm_0101 0101_1100_iiii_iii0 -addop("LHCPM1", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010101011100"), imm8_align2, bs("0")]) - -# LWCPM1 CRn,(Rm+),imm8.align4 - 1111_nnnn_mmmm_0101 0110_1100_iiii_ii00 -addop("LWCPM1", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010101101100"), imm8_align4, bs("00")]) - -# LMCPM1 CRn,(Rm+),imm8.align8 - 1111_nnnn_mmmm_0101 0111_1100_iiii_i000 -addop("LMCPM1", [bs("1111"), copro_reg04, reg04_inc_deref, bs("010101111100"), imm8_align8, bs("000")]) - -# (RI) - 1111_xxxx_xxxx_0110 xxxx_xxxx_xxxx_xxxx -addop("(RI)", [bs("1111"), imm8, bs("0110"), imm16]) - -# CP code24 - 1111_CCCC_CCCC_0111 cccc_cccc_cccc_cccc -#addop("CP", [bs("1111"), imm8_noarg, bs("0111"), imm_code24], [imm_code24]) -# Note: CP & CMOV* look exactly the same. This is ambiguous, and prevent -# them for being correctly disassembled. CP was arbitrarily disabled. - -# CP code56 - 1111_CCCC_CCCC_0111 cccc_cccc_cccc_cccc cccc_cccc_cccc_cccc -# 64-bit VLIW operation mode - not implemented - -# CMOV CRn,Rm - 1111_nnnn_mmmm_0111 1111_0000_0000_0000 -#addop("CMOV", [bs("1111"), copro_reg04, reg04, bs("01111111000000000000")]) - -# CMOV Rm,CRn - 1111_nnnn_mmmm_0111 1111_0000_0000_0001 -#addop("CMOV", [bs("1111"), copro_reg04, reg04, bs("01111111000000000001")], [reg04, copro_reg04]) - -# CMOVC CCRn,Rm - 1111_nnnn_mmmm_0111 1111_0000_0000_NN10 -# CRn=NNnnnn -addop("CMOVC", [bs("1111"), imm4_noarg, reg04, bs("0111111100000000"), copro_reg06, bs("10")], [copro_reg06, reg04]) - -# CMOVC Rm,CCRn - 1111_nnnn_mmmm_0111 1111_0000_0000_NN11 -# CRn=NNnnnn -addop("CMOVC", [bs("1111"), imm4_noarg, reg04, bs("0111111100000000"), copro_reg06, bs("11")], [reg04, copro_reg06]) - -# CMOVH CRn,Rm - 1111_nnnn_mmmm_0111 1111_0001_0000_0000 -#addop("CMOVH", [bs("1111"), copro_reg04, reg04, bs("01111111000100000000")]) - -# CMOVH Rm,CRn - 1111_nnnn_mmmm_0111 1111_0001_0000_0001 -#addop("CMOVH", [bs("1111"), copro_reg04, reg04, bs("01111111000100000001")], [reg04, copro_reg04]) - -# Note: the following CMOV* instructions are extensions used when the processor -# has more than 16 coprocessor general-purpose registers. They can be -# used to assemble and disassemble both CMOV* instructuons sets. - -# CMOV CRn,Rm - 1111_nnnn_mmmm_0111 1111_0000_0000_N000 -# CRn=Nnnnn -addop("CMOV", [bs("1111"), imm4_noarg, reg04, bs("0111111100000000"), copro_reg05, bs("000")], [copro_reg05, reg04]) - -# CMOV Rm,CRn - 1111_nnnn_mmmm_0111 1111_0000_0000_N001 -addop("CMOV", [bs("1111"), imm4_noarg, reg04, bs("0111111100000000"), copro_reg05, bs("001")], [reg04, copro_reg05]) - -# CMOVH CRn,Rm - 1111_nnnn_mmmm_0111 1111_0001_0000_N000 -addop("CMOVH", [bs("1111"), imm4_noarg, reg04, bs("0111111100010000"), copro_reg05, bs("000")], [copro_reg05, reg04]) - -# CMOVH Rm,CRn - 1111_nnnn_mmmm_0111 1111_0001_0000_N001 -addop("CMOVH", [bs("1111"), imm4_noarg, reg04, bs("0111111100010000"), copro_reg05, bs("001")], [reg04, copro_reg05]) - -# (RI) - 1111_xxxx_xxxx_10xx xxxx_xxxx_xxxx_xxxx -addop("(RI)", [bs("1111"), imm8, bs("10"), imm18]) - -# SWCP CRn,disp16(Rm) - 1111_nnnn_mmmm_1100 dddd_dddd_dddd_dddd -addop("SWCP", [bs("1111"), copro_reg04, reg04_deref_noarg, bs("1100"), disp16_reg_deref], [copro_reg04, disp16_reg_deref]) - -# LWCP CRn,disp16(Rm) - 1111_nnnn_mmmm_1101 dddd_dddd_dddd_dddd -addop("LWCP", [bs("1111"), copro_reg04, reg04_deref_noarg, bs("1101"), disp16_reg_deref], [copro_reg04, disp16_reg_deref, reg04_deref]) - -# SMCP CRn,disp16(Rm) - 1111_nnnn_mmmm_1110 dddd_dddd_dddd_dddd -addop("SMCP", [bs("1111"), copro_reg04, reg04_deref_noarg, bs("1110"), disp16_reg_deref], [copro_reg04, disp16_reg_deref, reg04_deref]) - -# LMCP CRn,disp16(Rm) - 1111_nnnn_mmmm_1111 dddd_dddd_dddd_dddd -addop("LMCP", [bs("1111"), copro_reg04, reg04_deref_noarg, bs("1111"), disp16_reg_deref], [copro_reg04, disp16_reg_deref]) diff --git a/miasm2/arch/mep/disasm.py b/miasm2/arch/mep/disasm.py deleted file mode 100644 index 2ad73036..00000000 --- a/miasm2/arch/mep/disasm.py +++ /dev/null @@ -1,23 +0,0 @@ -# Toshiba MeP-c4 - miasm disassembly engine -# Guillaume Valadon - -from miasm2.core.asmblock import disasmEngine -from miasm2.arch.mep.arch import mn_mep - - -class dis_mepb(disasmEngine): - """MeP miasm disassembly engine - Big Endian - - Notes: - - its is mandatory to call the miasm Machine - """ - - attrib = "b" - - def __init__(self, bs=None, **kwargs): - super(dis_mepb, self).__init__(mn_mep, self.attrib, bs, **kwargs) - - -class dis_mepl(dis_mepb): - """MeP miasm disassembly engine - Little Endian""" - attrib = "l" diff --git a/miasm2/arch/mep/ira.py b/miasm2/arch/mep/ira.py deleted file mode 100644 index 34808656..00000000 --- a/miasm2/arch/mep/ira.py +++ /dev/null @@ -1,45 +0,0 @@ -# Toshiba MeP-c4 - miasm IR analysis -# Guillaume Valadon - -from miasm2.arch.mep.sem import ir_mepb, ir_mepl -from miasm2.ir.analysis import ira - - -class ir_a_mepb(ir_mepb, ira): - """MeP high level IR manipulations - Big Endian - - Notes: - - it is mandatory for symbolic execution. - """ - - def __init__(self, loc_db=None): - ir_mepb.__init__(self, loc_db) - self.ret_reg = self.arch.regs.R0 - - # Note: the following are abstract method and must be implemented - def sizeof_char(self): - "Return the size of a char in bits" - return 8 - - def sizeof_short(self): - "Return the size of a short in bits" - return 16 - - def sizeof_int(self): - "Return the size of an int in bits" - return 32 - - def sizeof_long(self): - "Return the size of a long in bits" - return 32 - - def sizeof_pointer(self): - "Return the size of a void* in bits" - return 32 - - -class ir_a_mepl(ir_mepl, ir_a_mepb): - """MeP high level IR manipulations - Little Endian""" - - def __init__(self, loc_db=None): - ir_a_mepb.__init__(self, loc_db) diff --git a/miasm2/arch/mep/jit.py b/miasm2/arch/mep/jit.py deleted file mode 100644 index 6c0e6ff5..00000000 --- a/miasm2/arch/mep/jit.py +++ /dev/null @@ -1,115 +0,0 @@ -# Toshiba MeP-c4 - miasm jitter -# Guillaume Valadon -# Note: inspiration from msp430/jit.py - -from miasm2.jitter.jitload import Jitter -from miasm2.core.locationdb import LocationDB -from miasm2.core.utils import * -from miasm2.jitter.codegen import CGen -from miasm2.ir.translators.C import TranslatorC -from miasm2.arch.mep.sem import ir_mepl, ir_mepb - -import logging - -log = logging.getLogger("jit_mep") -hnd = logging.StreamHandler() -hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) -log.addHandler(hnd) -log.setLevel(logging.CRITICAL) - - -class mep_CGen(CGen): - """ - Translate a bloc containing MeP instructions to C - - Note: it is used to emulate the *REPEAT instructions - """ - - def __init__(self, ir_arch): - self.ir_arch = ir_arch - self.PC = self.ir_arch.arch.regs.PC - self.translator = TranslatorC(self.ir_arch.loc_db) - self.init_arch_C() - - def gen_pre_code(self, attrib): - """Generate C code inserted before the current bloc""" - - # Call the base class method - out = super(mep_CGen, self).gen_pre_code(attrib) - - # Set the PC register value explicitly - out.append("mycpu->PC = 0x%X;" % attrib.instr.offset) - out.append("mycpu->last_addr = mycpu->PC;"); - - return out - - def gen_post_code(self, attrib, pc_value): - """Generate C code inserted after the current bloc""" - - # Call the base class method - out = super(mep_CGen, self).gen_post_code(attrib, pc_value) - - # Implement the *REPEAT instructions logics - tmp = r""" - /* *REPEAT instructions logic */ - { - uint32_t is_repeat_end = mycpu->is_repeat_end; - mycpu->is_repeat_end = !!(mycpu->last_addr == (mycpu->RPE&~0x1)); - - if (is_repeat_end && !mycpu->take_jmp && - (mycpu->in_erepeat || mycpu->RPC)) { - if (mycpu->RPC) - mycpu->RPC --; - - //printf("Go repeat %X\n", mycpu->RPB); - DST_value = mycpu->RPB; - BlockDst->address = mycpu->RPB; - return JIT_RET_NO_EXCEPTION; - } - } - """ - - out += tmp.split('`\n') - return out - - -class jitter_mepl(Jitter): - - C_Gen = mep_CGen - - def __init__(self, *args, **kwargs): - sp = LocationDB() - Jitter.__init__(self, ir_mepl(sp), *args, **kwargs) - self.vm.set_little_endian() - self.ir_arch.jit_pc = self.ir_arch.arch.regs.PC - - def push_uint16_t(self, v): - regs = self.cpu.get_gpreg() - regs["SP"] -= 2 - self.cpu.set_gpreg(regs) - self.vm.set_mem(regs["SP"], pck16(v)) - - def pop_uint16_t(self): - regs = self.cpu.get_gpreg() - x = self.vm.get_u16(regs["SP"]) - regs["SP"] += 2 - self.cpu.set_gpreg(regs) - return x - - def get_stack_arg(self, n): - regs = self.cpu.get_gpreg() - x = self.vm.get_u16(regs["SP"] + 2 * n) - return x - - def init_run(self, *args, **kwargs): - Jitter.init_run(self, *args, **kwargs) - self.cpu.PC = self.pc - - -class jitter_mepb(jitter_mepl): - - def __init__(self, *args, **kwargs): - sp = LocationDB() - Jitter.__init__(self, ir_mepb(sp), *args, **kwargs) - self.vm.set_big_endian() - self.ir_arch.jit_pc = self.ir_arch.arch.regs.PC diff --git a/miasm2/arch/mep/regs.py b/miasm2/arch/mep/regs.py deleted file mode 100644 index 88248823..00000000 --- a/miasm2/arch/mep/regs.py +++ /dev/null @@ -1,91 +0,0 @@ -# Toshiba MeP-c4 - miasm registers definition -# Guillaume Valadon - -from builtins import range -from miasm2.expression.expression import ExprId -from miasm2.core.cpu import reg_info, gen_reg, gen_regs - -# Used by internal miasm exceptions -exception_flags = ExprId("exception_flags", 32) -exception_flags_init = ExprId("exception_flags_init", 32) - -is_repeat_end = ExprId("is_repeat_end", 32) -is_repeat_end_init = ExprId("is_repeat_end_init", 32) -last_addr = ExprId("last_addr", 32) -last_addr_init = ExprId("last_addr_init", 32) -take_jmp = ExprId("take_jmp", 32) -take_jmp_init = ExprId("take_jmp_init", 32) -in_erepeat = ExprId("in_erepeat", 32) -in_erepeat_init = ExprId("take_jmp_init", 32) - - -# General-purpose registers (R0 to R15) names -gpr_names = ["R%d" % r for r in range(13)] # register names -gpr_names += ["TP", "GP", "SP"] # according to the manual GP does not exist -gpr_exprs, gpr_inits, gpr_infos = gen_regs(gpr_names, globals()) # sz=32 bits (default) - -# Notes: -# - gpr_exprs: register ExprIds on 32 bits. The size is important for -# symbolic execution. -# - gpr_inits: register initial values. -# - gpr_infos: object that binds names & ExprIds - -# Define aliases to general-purpose registers -TP = gpr_exprs[13] # Tiny data area Pointer -GP = gpr_exprs[14] # Global Pointer -SP = gpr_exprs[15] # Stack Pointer - - -# Control/special registers name -csr_names = ["PC", "LP", "SAR", "S3", "RPB", "RPE", "RPC", "HI", "LO", - "S9", "S10", "S11", "MB0", "ME0", "MB1", "ME1", "PSW", - "ID", "TMP", "EPC", "EXC", "CFG", "S22", "NPC", "DBG", - "DEPC", "OPT", "RCFG", "CCFG", "S29", "S30", "S31", "S32"] -csr_exprs, csr_inits, csr_infos = gen_regs(csr_names, globals()) - -# Define aliases to control/special registers -PC = csr_exprs[0] # Program Conter. On MeP, it is the special register R0 -LP = csr_exprs[1] # Link Pointer. On MeP, it is the special register R1 -SAR = csr_exprs[2] # Shift Amount Register. On MeP, it is the special register R2 -RPB = csr_exprs[4] # Repeat Begin. On MeP, it is the special register R4 -RPE = csr_exprs[5] # Repeat End. On MeP, it is the special register R5 -RPC = csr_exprs[6] # Repeat Counter. On MeP, it is the special register R6 - - -# Coprocesssor general-purpose registers (C0 to C15) names -# Note: a processor extension allows up to 32 coprocessor general-purpose registers -copro_gpr_names = ["C%d" % r for r in range(32)] # register names -copro_gpr_exprs, copro_gpr_inits, copro_gpr_infos = gen_regs(copro_gpr_names, globals()) - - -# Set registers initial values -all_regs_ids = gpr_exprs + csr_exprs + copro_gpr_exprs + [ - exception_flags, take_jmp, last_addr, is_repeat_end, - in_erepeat -] - -all_regs_ids_init = gpr_inits + csr_inits + copro_gpr_inits + [ - exception_flags_init, take_jmp_init, last_addr_init, is_repeat_end_init, - in_erepeat_init -] - -all_regs_ids_no_alias = all_regs_ids[:] # GV: not understood yet ! -all_regs_ids_byname = dict([(x.name, x) for x in all_regs_ids]) - - -float_st0 = ExprId("float_st0", 64) -float_st1 = ExprId("float_st1", 64) -float_st2 = ExprId("float_st2", 64) -float_st3 = ExprId("float_st3", 64) -float_st4 = ExprId("float_st4", 64) -float_st5 = ExprId("float_st5", 64) -float_st6 = ExprId("float_st6", 64) -float_st7 = ExprId("float_st7", 64) - -regs_flt_expr = [float_st0, float_st1, float_st2, float_st3, - float_st4, float_st5, float_st6, float_st7] - - -regs_init = dict() # mandatory name -for i, r in enumerate(all_regs_ids): - regs_init[r] = all_regs_ids_init[i] diff --git a/miasm2/arch/mep/sem.py b/miasm2/arch/mep/sem.py deleted file mode 100644 index c346c535..00000000 --- a/miasm2/arch/mep/sem.py +++ /dev/null @@ -1,1179 +0,0 @@ -# Toshiba MeP-c4 - miasm instructions side effects -# Guillaume Valadon - -from miasm2.core.sembuilder import SemBuilder -from miasm2.ir.ir import IntermediateRepresentation -from miasm2.arch.mep.arch import mn_mep -from miasm2.arch.mep.regs import PC, SP, LP, SAR, TP, RPB, RPE, RPC, EPC, NPC, \ - take_jmp, in_erepeat -from miasm2.arch.mep.regs import EXC, HI, LO, PSW, DEPC, DBG -from miasm2.expression.expression import ExprId, ExprInt, ExprOp, TOK_EQUAL -from miasm2.expression.expression import ExprAssign, ExprCond, ExprMem -from miasm2.core.cpu import sign_ext -from miasm2.jitter.csts import EXCEPT_DIV_BY_ZERO - -from miasm2.arch.mep.regs import exception_flags - - -def compute_s_inf(arg1, arg2): - """Signed comparison operator""" - return ((arg1 - arg2) ^ ((arg1 ^ arg2) & ((arg1 - arg2) ^ arg1))).msb() - -def compute_u_inf(x, y): - """Unsigned comparison operator""" - result = (((x - y) ^ ((x ^ y) & ((x - y) ^ x))) ^ x ^ y).msb() - return result - - -# SemBuilder context -ctx = {"PC": PC, "SP": SP, "LP": LP, "SAR": SAR, "TP": TP, - "RPB": RPB, "RPE": RPE, "RPC": RPC, "EPC": EPC, "NPC": NPC, - "EXC": EXC, "HI": HI, "LO": LO, "PSW": PSW, "DEPC": DEPC, "DBG": DBG, - "exception_flags": exception_flags, "compute_s_inf": compute_s_inf, - "compute_u_inf": compute_u_inf, "take_jmp": take_jmp, - "in_erepeat": in_erepeat, "EXCEPT_DIV_BY_ZERO": EXCEPT_DIV_BY_ZERO} -sbuild = SemBuilder(ctx) - - -# Functions used to get an instruction IR -manual_functions = dict() - - -@sbuild.parse -def mep_nop(): - """Dummy instruction""" - - -@sbuild.parse -def mep_nop_2_args(arg1, arg2): - """Dummy instruction with two arguments""" - - -### Load/Store instructions - -# Register indirect addressing mode - -@sbuild.parse -def sb(reg_src, deref_dst): - """SB - Store Byte into memory""" - - # MemByte(Rm31..0) <- Rn7..0 - # MemByte((ZeroExt(disp7)+TP)31..0)) <- Rn7..0 - # MemByte((SignExt(disp16)+Rm)31..0) <- Rn7..0 - mem8[deref_dst.ptr] = reg_src[:8] - - -@sbuild.parse -def sh(reg_src, deref_dst): - """SH - Store Halfword into memory""" - - # MemHword(Rm31..1||0) <- Rn15..0 - # MemHword((ZeroExt((disp7)6..1||0)+TP)31..1||0)) <- Rn15..0 - # MemHword((SignExt(disp16)+Rm)31..1||0) <- Rn15..0 - mem16[deref_dst.ptr & i32(0xFFFFFFFE)] = reg_src[:16] - - -@sbuild.parse -def sw(reg_src, deref_dst): - """SW - Store Word into memory""" - - # MemWord(Rm31..2||00) <- Rn31..0 - # MemWord((ZeroExt((disp7)6..2||00)+SP)31..2||00)) <- Rn31..0 - # MemWord((ZeroExt((disp7)6..2||00)+TP)31..2||00)) <- Rn31..0 - # MemWord((SignExt(disp16)+Rm)31..2||00) <- Rn31..0 - # MemWord(ZeroExt((abs24)23..2||00)) - Rn31..0 - - mem32[deref_dst.ptr & i32(0xFFFFFFFC)] = reg_src - -# Without the sembuilder -#def sw(ir, instr, reg_src, deref_reg_or_imm, deref_reg=None): -# """SW - store Word into memory. -# -# Note: there are three variants to get the memory address: -# - from a register -# - relatively to SP -# - relatively to TP""" -# -# if isinstance(deref_reg_or_imm, ExprMem): -# # MemWord(Rm31..2||00) <- Rn31..0 -# dst = deref_reg_or_imm -# -# elif isinstance(deref_reg_or_imm, ExprInt) and deref_reg: -# # MemWord((ZeroExt((disp7)6..2||00)+SP)31..2||00)) <- Rn31..0 -# # MemWord((ZeroExt((disp7)6..2||00)+TP)31..2||00)) <- Rn31..0 -# -# imm = deref_reg_or_imm.zeroExtend(32) -# dst = ExprMem(ExprOp("+", imm, deref_reg.arg)) -# -# return [ExprAssign(dst, reg_src)], [] - - -@sbuild.parse -def lb(reg_dst, deref_dst): - """LB - Load Byte from memory""" - - # Rn <- SignExt(MemByte(Rm31..0)) - # Rn <- SignExt(MemByte((ZeroExt(disp7)+TP)31..0)) - # Rn <- SignExt(MemByte((SignExt(disp16)+Rm)31..0) - reg_dst = mem8[deref_dst.ptr].signExtend(32) - - -@sbuild.parse -def lh(reg_dst, deref_dst): - """LH - Load Halfword from memory""" - - # Rn <- SignExt(MemHword(Rm31..1||0)) - # Rn <- SignExt(MemHword((ZeroExt((disp7)6..1||0)+TP)31..1||0) - # Rn <- SignExt(MemHword((SignExt(disp16)+Rm)31..1||0)) - reg_dst = mem16[deref_dst.ptr & i32(0xFFFFFFFE)].signExtend(32) - - -@sbuild.parse -def lw(reg_dst, deref_dst): - """LW - Load Word from memory""" - - # Rn <- MemWord(Rm31..2||00) - # Rn <- MemWord((ZeroExt((disp7)6..2||00)+TP)31..2||00) - # Rn <- MemWord((SignExt(disp16)+Rm)31..2||00) - # Rn <- MemWord(ZeroExt((abs24)23..2||00)) - reg_dst = mem32[deref_dst.ptr & i32(0xFFFFFFFC)] - - -@sbuild.parse -def lbu(reg_dst, deref_dst): - """LBU - Load an unsigned Byte from memory""" - - # Rn <- ZeroExt(MemByte(Rm31..0)) - # Rn <- ZeroExt(MemByte((ZeroExt(disp7)+TP)31..0)) - # Rn <- ZeroExt(MemByte((SignExt(disp16)+Rm)31..0)) - reg_dst = mem8[deref_dst.ptr].zeroExtend(32) - - -@sbuild.parse -def lhu(reg_dst, deref_dst): - """LHU - Load an unsigned Halfword from memory""" - - # Rn <- ZeroExt(MemHword(Rm31..1||0)) - # Rn <- ZeroExt(MemHword((SignExt(disp16)+Rm)31..1||0)) - # Rn <- ZeroExt(MemHword((ZeroExt((disp7)6..1||0)+TP)31..1||0)) - reg_dst = mem16[deref_dst.ptr & i32(0xFFFFFFFE)].zeroExtend(32) - - -### Byte/Halfword extension instructions - -@sbuild.parse -def extb(reg): - """EXTB - Sign extend a byte""" - - # Rn <- SignExt(Rn7..0) - reg = reg[:8].signExtend(32) - - -@sbuild.parse -def exth(reg): - """EXTH - Sign extend a word""" - - # Rn <- ZeroExt(Rn15..0) - reg = reg[:16].signExtend(32) - - -@sbuild.parse -def extub(reg): - """EXUTB - Zero extend a byte""" - - # Rn <- SignExt(Rn7..0) - reg = reg[:8].zeroExtend(32) - - -@sbuild.parse -def extuh(reg): - """EXTUH - Zero extend a word""" - - # Rn <- ZeroExt(Rn15..0) - reg = reg[:16].zeroExtend(32) - - -### Shift amount manipulation instructions - -#@sbuild.parse -#def ssarb(deref_reg): - - -### Move instructions - -@sbuild.parse -def mov(reg, value): - """MOV - Copy 'value' to a register. The three alternatives are handled.""" - - # Rn <- Rm - # Rn <- SignExt(imm8) - # Rn <- SignExt(imm16) - reg = value.signExtend(32) - - -@sbuild.parse -def movu(reg, value): - """MOV - Copy 'value' to a register. The two alternatives are handled.""" - - # Rn[0-7] <- ZeroExt(imm24) - # Rn <- ZeroExt(imm16) - reg = value.zeroExtend(32) - - -@sbuild.parse -def movh(reg, imm16): - """MOVH - Copy a shifted imm16 to a register.""" - - # Rn <- imm16 <<16 - reg = imm16.zeroExtend(32) << i32(16) - - -### Arithmetic instructions - -def add3(ir, instr, reg_dst, reg_src, reg_or_imm): - """ADD3 - Add two register and store the result to a register, or - add a register and an immediate and store the result to a register""" - - if isinstance(reg_or_imm, ExprId): - # Rl <- Rn + Rm - result = ExprOp("+", reg_src, reg_or_imm) - else: - # Rn <- Rm + SignExt(imm16) - value = int(reg_or_imm.arg) - result = ExprOp("+", reg_src, ExprInt(value, 32)) - - return [ExprAssign(reg_dst, result)], [] - -manual_functions["add3"] = add3 - - -@sbuild.parse -def add(arg1, arg2): - """ADD - Add a register and an immediate.""" - - # Rn <- Rn + SignExt(imm6) - arg1 = arg1 + arg2.signExtend(32) - - -@sbuild.parse -def advck3(r0, rn, rm): - """ADVCK3 - Check addition overflow.""" - - # if(Overflow(Rn+Rm)) R0<-1 else R0<-0 (Signed) - r0 = i32(1) if compute_u_inf(i64(0xFFFFFFFF), rn.zeroExtend(64) + rm.zeroExtend(64)) else i32(0) - - -@sbuild.parse -def sub(reg1, reg2): - """SUB - Subtract one register to another.""" - - # Rn <- Rn - Rm - reg1 = reg1 - reg2 - - -def sbvck3(ir, instr, r0, rn, rm): - """SBVCK3 - Check subtraction overflow""" - - # if(Overflow(Rn-Rm)) R0<-1 else R0<-0 (Signed) - - # Subtract registers - reg_sub = ExprOp("+", rn, rm) - - # Get the register storing the highest value - max_rn_rm = ExprCond(ExprOp(">", rn, rm), rn, rm) - - # Check for an overflow - overflow_test = ExprOp(">", reg_sub, max_rn_rm) - - # Return the result - condition = ExprCond(overflow_test, ExprInt(1, 32), ExprInt(0, 32)) - return [ExprAssign(r0, condition)], [] - -manual_functions["sbvck3"] = sbvck3 - - -@sbuild.parse -def neg(reg1, reg2): - """NEG - Negate one register.""" - - # Rn <- - Rm - reg1 = - reg2 - - -@sbuild.parse -def slt3(r0, rn, rm_or_imm5): - """SLT3 - Set on less than (signed).""" - - # if (Rn> i32(31) - - # rn is positive and rm negative, return 1 - r0_mixed = i32(1) if sign_rn else i32(0) - - # rn & rm are both positives, test and return 1 or 0 - r0_pos = (i32(1) if "<"(rn, rm_ext) else i32(0)) if are_both_pos else r0_mixed - - # rn & rm are both negatives, test and return 0 or 1 - r0 = (i32(0) if "<"(rn, rm_ext) else i32(1)) if are_both_neg else r0_pos - - -@sbuild.parse -def sltu3(r0, rn, rm_or_imm5): - """SLTU3 - Set on less than (unsigned).""" - - # if (Rn> Rm4..0 - # Rn <- (Signed) Rn >> imm5 - - # Unsigned result - shift_u = rn >> rm_or_imm5 - - # Signed result - shift_mask = i32(32) - rm_or_imm5 - mask = (i32(0xFFFFFFFF) >> shift_mask) << shift_mask - shift_s = shift_u | mask - - rn = shift_s if rn.msb() else shift_u - - -@sbuild.parse -def srl(rn, rm_or_imm5): - """SRL - Shift Right unsigned.""" - - # Rn <- (Unsigned) Rn >> Rm4..0 - # Rn <- (Unsigned) Rn >> imm5 - rn = rn >> rm_or_imm5 - - -@sbuild.parse -def sll(rn, rm_or_imm5): - """SLL - Shift Left unsigned.""" - - # Rn <- (Unsigned) Rn >> Rm4..0 - # Rn <- (Unsigned) Rn << imm5 - rn = rn << rm_or_imm5 - - -@sbuild.parse -def sll3(r0, rn, imm5): - """SLL3 - Shift Left unsigned, with 3 arguments.""" - - # R0 <- (Unsigned) Rn << imm5 - r0 = rn << imm5 - - -@sbuild.parse -def fsft(rn, rm): - "FSFT - Funnel shift.""" - - # Rn <- ((Rn||Rm)<> (i32(32) - sar) # Shift Rm in the reverse order - rn = tmp_rn | tmp_rm # Concatenate registers - - -## Branch/Jump instructions - -@sbuild.parse -def bra(disp12): - """BRA - Branch to an address.""" - - # PC <- PC + SignExt((disp12)11..1||0) - dst = disp12 - PC = dst - take_jmp = ExprInt(1, 32) - ir.IRDst = dst - - -@sbuild.parse -def beqz(reg_test, disp8): - """BEQZ - Branch if the register stores zero.""" - - # if(Rn==0) PC <- PC +SignExt((disp8)7..1||0) - dst = ExprLoc(ir.get_next_break_loc_key(instr), 32) if reg_test else disp8 - take_jmp = ExprInt(0, 32) if reg_test else ExprInt(1, 32) - PC = dst - ir.IRDst = dst - - -@sbuild.parse -def bnez(reg_test, disp8): - """BNEZ - Branch if the register does not store zero.""" - - # if(Rn!=0) PC <- PC + SignExt((disp8)7..1||0) - dst = disp8 if reg_test else ExprLoc(ir.get_next_break_loc_key(instr), 32) - take_jmp = ExprInt(1, 32) if reg_test else ExprInt(0, 32) - PC = dst - ir.IRDst = dst - - -@sbuild.parse -def beqi(reg_test, imm4, disp16): - """BEQI - Branch if the register stores imm4.""" - - # if(Rn==ZeroExt(imm4)) PC <- PC +SignExt((disp17)16..1||0) - dst = ExprLoc(ir.get_next_break_loc_key(instr), 32) if (reg_test - imm4) else disp16 - take_jmp = ExprInt(0, 32) if (reg_test - imm4) else ExprInt(1, 32) - PC = dst - ir.IRDst = dst - - -@sbuild.parse -def bnei(reg_test, imm4, disp16): - """BNEI - Branch if the register does not store imm4.""" - - # if(Rn!=ZeroExt(imm4)) PC <- PC+SignExt((disp17)16..1||0) - dst = disp16 if (reg_test - imm4) else ExprLoc(ir.get_next_break_loc_key(instr), 32) - take_jmp = ExprInt(1, 32) if (reg_test - imm4) else ExprInt(0, 32) - PC = dst - ir.IRDst = dst - - -@sbuild.parse -def blti(reg_test, imm4, disp16): - """BLTI - Branch if the register is lower than imm4.""" - - # if(Rn< ZeroExt(imm4)) PC <- PC +SignExt((disp17)16..1||0) - (Signed comparison) - dst = disp16 if compute_s_inf(reg_test, imm4) else ExprLoc(ir.get_next_break_loc_key(instr), 32) - take_jmp = ExprInt(1, 32) if compute_s_inf(reg_test, imm4) else ExprInt(0, 32) - PC = dst - ir.IRDst = dst - - -@sbuild.parse -def bgei(reg_test, imm4, disp16): - """BGEI - Branch if the register is greater or equal to imm4.""" - - # if(Rn>=ZeroExt(imm4)) PC <- PC +SignExt((disp17)16..1||0) - (Signed comparison) - cond = i32(1) if ExprOp(TOK_EQUAL, reg_test, imm4) else compute_s_inf(imm4, reg_test).zeroExtend(32) - dst = disp16 if cond else ExprLoc(ir.get_next_break_loc_key(instr), 32) - take_jmp = ExprInt(1, 32) if cond else ExprInt(0, 32) - PC = dst - ir.IRDst = dst - - -@sbuild.parse -def beq(rn, rm, disp16): - """BEQ - Branch if the two registers are equal.""" - - # if(Rn==Rm) PC <- PC +SignExt((disp17)16..1||0) - dst = ExprLoc(ir.get_next_break_loc_key(instr), 32) if (rn - rm) else disp16 - take_jmp = ExprInt(0, 32) if (rn - rm) else ExprInt(1, 32) - PC = dst - ir.IRDst = dst - - -@sbuild.parse -def bne(rn, rm, disp16): - """BNE - Branch if the two registers are not equal.""" - - # if(Rn!=Rm) PC <- PC +SignExt((disp17)16..1||0) - dst = disp16 if (rn - rm) else ExprLoc(ir.get_next_break_loc_key(instr), 32) - take_jmp = ExprInt(1, 32) if (rn - rm) else ExprInt(0, 32) - PC = dst - ir.IRDst = dst - - -@sbuild.parse -def bsr(disp): - """BSR - Branch to an address, and store the return address.""" - - # 16-bit variant: LP <- PC + 2; PC <- PC +SignExt((disp12)11..1||0) - # 32-bit variant: LP <- PC + 4; PC <- PC +SignExt((disp24)23..1||0) - - # Set LP - LP = ExprLoc(ir.get_next_break_loc_key(instr), 32) - take_jmp = ExprInt(1, 32) - - # Set PC according to the immediate size - dst = disp - PC = dst - ir.IRDst = dst - - -def jmp(ir, instr, reg_or_imm): - """JMP - Change PC to a register content or an immediate. - Note: the behavior in VLIW mode is not implemented""" - - take_jmp = ExprInt(1, 32) - - if isinstance(reg_or_imm, ExprId): - # PC <- Rm31..1||0 - new_PC = ExprAssign(PC, reg_or_imm) - else: - # PC <- PC31..28||0000||(target24)23..1||0 - new_PC = ExprAssign(PC, ExprOp("+", ExprOp("&", PC, ExprInt(0xF0000000, 32)), reg_or_imm)) - - return [new_PC, ExprAssign(ir.IRDst, new_PC)], [] - -manual_functions["jmp"] = jmp - - -@sbuild.parse -def jsr(reg): - """JSR - Jump to the register, and store the return address.""" - - # LP <- PC + 2; PC <- Rm31..1||0 - LP = ExprLoc(ir.get_next_break_loc_key(instr), 32) - take_jmp = ExprInt(1, 32) - PC = reg - ir.IRDst = reg - - -@sbuild.parse -def ret(): - """RET - Return from a function call. - Note: the behavior in VLIW mode is not implemented""" - - # PC <- LP31..1||0 - dst = LP - PC = dst - ir.IRDst = dst - - -# Repeat instructions - -@sbuild.parse -def repeat(rn, disp17): - """REPEAT - This instruction repeats an instruction block. It sets the RPB, - RPE and RPC control registers.""" - - # RPB <- pc+4 // Repeat Begin - RPB = PC + i32(4) - # RPE <- pc+SignExt((disp17)16..1||0)) // Repeat End - RPE = PC + i32(disp17.arg & 0xFFFFFFFE) - # RPC <- Rn - RPC = rn - in_erepeat = ExprInt(0, 32) - - -@sbuild.parse -def erepeat(disp17): - """EREPEAT - This instruction repeats an instruction block. It sets the RPB - and RPE control registers. To distinguish from the repeat instruction, - the least significant bit in the RPE register (ELR) is set to 1.""" - - # RPB <- pc+4 // Repeat Begin - RPB = PC + i32(4) - # RPE <- pc+SignExt((disp17)16..1||1)) (EREPEAT) - RPE = PC + i32(disp17.arg + 1) - # RPC <- undefined - in_erepeat = ExprInt(1, 32) - - -## Control Instructions - -@sbuild.parse -def stc(reg, control_reg): - """STC - Copy a general-purpose register into a control register.""" - - # ControlReg(imm5) <- Rn - control_reg = reg - - -@sbuild.parse -def ldc(reg, control_reg): - """LDC - Copy a control register into a general-purpose register.""" - - # Rn <- ControlReg(imm5) - reg = control_reg - - -@sbuild.parse -def di(): - """DI - Disable Interrupt""" - - # PSW.IEC<-0 - PSW = PSW & i32(0xFFFFFFFE) # PSW.IEC: bit 0 - - -@sbuild.parse -def ei(): - """EI - Enable Interrupt""" - - # PSW.IEC<-1 - PSW = PSW ^ i32(0b1) # PSW.IEC: bit 0 - - -@sbuild.parse -def reti(): - """RETI - Return from the exception/interrupt handler. - Note: the behavior in VLIW mode is not implemented""" - - #if (PSW.NMI==1) { - # PC <- NPC31..1 || 0; PSW.NMI<-0; - #} else { - # PC <- EPC31..1 || 0; - # PSW.UMC <- PSW.UMP; PSW.IEC <- PSW.IEP - #} - - # PSW.NMI == bit 9 - NMI_mask = i32(1 << 9) - - # PSW.UMP == bit 3 - # PSW.IEP == bit 1 - UMP_IEP_mask = i32((1 << 3) ^ (1 << 1)) - - # PSW.UMC == bit 2 - # PSW.IEC == bit 0 - UMC_IEC_mask = (PSW & UMP_IEP_mask) >> i32(1) - - # Get PSW.NMI - PSW_NMI = (PSW & NMI_mask) >> i32(9) - - # Set PC - dst = NPC & i32(0xFFFFFFFE) if PSW_NMI else EPC & i32(0xFFFFFFFE) - PC = dst - - # Set flags - PSW = PSW ^ NMI_mask if PSW_NMI else PSW ^ UMC_IEC_mask - - ir.IRDst = dst - - -@sbuild.parse -def swi(imm2): - """SWI - Software Interrupt""" - - # if(imm2==0) EXC.SIP0 <- 1 - # else if (imm2==1) EXC.SIP1 <- 1 - # else if (imm2==2) EXC.SIP2 <- 1 - # else if (imm2==3) EXC.SIP3 <- 1 - - # EXC.SIP0 == bit 4 - # EXC.SIP1 == bit 5 - # EXC.SIP2 == bit 6 - # EXC.SIP3 == bit 7 - - EXC = EXC ^ (i32(1) << (i32(4) + imm2)) - - -# Note: the following instructions can't be implemented -manual_functions["halt"] = mep_nop -manual_functions["sleep"] = mep_nop -manual_functions["break"] = mep_nop -manual_functions["syncm"] = mep_nop -manual_functions["stcb"] = mep_nop_2_args -manual_functions["ldcb"] = mep_nop_2_args - - -### Bit manipulation instruction option - -@sbuild.parse -def bsetm(rm_deref, imm3): - """BSETM - Bit Set Memory""" - - # MemByte(Rm) <- MemByte(Rm) or (1< reversed_rm = 0b1110 - - # Test bits individually - b3 = (reversed_rm & i32(2**3)) >> i32(3) if reversed_rm else i32(0) - -> b3 = (0b1110 & 0b1000 >> 3) = 1 - - b2 = (reversed_rm & i32(2**2)) >> i32(2) if b3 else i32(0) - -> b2 = (0b1110 & 0b0100 >> 2) = 1 - - b1 = (reversed_rm & i32(2**1)) >> i32(1) if b2 else i32(0) - -> b1 = (0b1110 & 0b0010 >> 1) = 1 - - b0 = (reversed_rm & i32(2**0)) >> i32(0) if b1 else i32(0) - -> b0 = (0b1110 & 0b0001 >> 0) = 0 - - # Sum all partial results - rn = b3 + b2 + b1 + b0 - -> rn = 1 + 1 + 1 + 0 = 3 - """ - - # Rn <- LeadingZeroDetect(Rm) - - # Invert the value - reversed_rm = ~rm - - # Test bits individually - b31 = (reversed_rm & i32(2**31)) >> i32(31) if reversed_rm else i32(0) - b30 = (reversed_rm & i32(2**30)) >> i32(30) if b31 else i32(0) - b29 = (reversed_rm & i32(2**29)) >> i32(29) if b30 else i32(0) - b28 = (reversed_rm & i32(2**28)) >> i32(28) if b29 else i32(0) - b27 = (reversed_rm & i32(2**27)) >> i32(27) if b28 else i32(0) - b26 = (reversed_rm & i32(2**26)) >> i32(26) if b27 else i32(0) - b25 = (reversed_rm & i32(2**25)) >> i32(25) if b26 else i32(0) - b24 = (reversed_rm & i32(2**24)) >> i32(24) if b25 else i32(0) - b23 = (reversed_rm & i32(2**23)) >> i32(23) if b24 else i32(0) - b22 = (reversed_rm & i32(2**22)) >> i32(22) if b23 else i32(0) - b21 = (reversed_rm & i32(2**21)) >> i32(21) if b22 else i32(0) - b20 = (reversed_rm & i32(2**20)) >> i32(20) if b21 else i32(0) - b19 = (reversed_rm & i32(2**19)) >> i32(19) if b20 else i32(0) - b18 = (reversed_rm & i32(2**18)) >> i32(18) if b19 else i32(0) - b17 = (reversed_rm & i32(2**17)) >> i32(17) if b18 else i32(0) - b16 = (reversed_rm & i32(2**16)) >> i32(16) if b17 else i32(0) - b15 = (reversed_rm & i32(2**15)) >> i32(15) if b16 else i32(0) - b14 = (reversed_rm & i32(2**14)) >> i32(14) if b15 else i32(0) - b13 = (reversed_rm & i32(2**13)) >> i32(13) if b14 else i32(0) - b12 = (reversed_rm & i32(2**12)) >> i32(12) if b13 else i32(0) - b11 = (reversed_rm & i32(2**11)) >> i32(11) if b12 else i32(0) - b10 = (reversed_rm & i32(2**10)) >> i32(10) if b11 else i32(0) - b09 = (reversed_rm & i32(2 ** 9)) >> i32(9) if b10 else i32(0) - b08 = (reversed_rm & i32(2 ** 8)) >> i32(8) if b09 else i32(0) - b07 = (reversed_rm & i32(2 ** 7)) >> i32(7) if b08 else i32(0) - b06 = (reversed_rm & i32(2 ** 6)) >> i32(6) if b07 else i32(0) - b05 = (reversed_rm & i32(2 ** 5)) >> i32(5) if b06 else i32(0) - b04 = (reversed_rm & i32(2 ** 4)) >> i32(4) if b05 else i32(0) - b03 = (reversed_rm & i32(2 ** 3)) >> i32(3) if b04 else i32(0) - b02 = (reversed_rm & i32(2 ** 2)) >> i32(2) if b03 else i32(0) - b01 = (reversed_rm & i32(2 ** 1)) >> i32(1) if b02 else i32(0) - b00 = (reversed_rm & i32(2 ** 0)) >> i32(0) if b01 else i32(0) - - # Sum all partial results - rn = b31 + b30 + b29 + b28 + b27 + b26 + b25 + b24 + b23 + b22 + b21 + b20 \ - + b19 + b18 + b17 + b16 + b15 + b14 + b13 + b12 + b11 + b10 + b09 + b08 \ - + b07 + b06 + b05 + b04 + b03 + b02 + b01 + b00 - - -### Coprocessor option - -# Note: these instructions are implemented when needed - -# SWCP - Store Word to memory from a coprocessor register -# MemWord(Rm31..2||00) <- CRn 31..0 -manual_functions["swcp"] = sw - - -# LWCP - Load Word from memory to a coprocessor register -# CRn <- MemWord(Rm31..2||00) -manual_functions["lwcp"] = lw - - -@sbuild.parse -def smcp(reg_src, deref_dst): - """SMCP - Store Word to memory from a coprocessor register""" - - # MemDword(Rm31..3||000) <- CRn - mem32[deref_dst.ptr & i32(0xFFFFFFF8)] = reg_src - - -@sbuild.parse -def lmcp(reg_dst, deref_src): - """LMCP - Load Word from memory to a coprocessor register""" - - # CRn <- MemDword(Rm31..3||000) - reg_dst = mem32[deref_src.ptr & i32(0xFFFFFFF8)] - - -@sbuild.parse -def swcpi(reg_src, deref_dst): - """SWCPI - Store Word to memory, and increment the address""" - - # MemWord(Rm31..2||00) <- CRn 31..0; Rm<-Rm+4 - mem32[deref_dst.ptr & i32(0xFFFFFFFC)] = reg_src - deref_dst.ptr = deref_dst.ptr + i32(4) - - -@sbuild.parse -def lwcpi(reg_dst, deref_src): - """LWCPI - Load Word from memory, and increment the address""" - - # CRn <- MemWord(Rm31..2||00); Rm<-Rm+4 - reg_dst = mem32[deref_src.ptr & i32(0xFFFFFFFC)] - deref_src.ptr = deref_src.ptr + i32(4) - - -@sbuild.parse -def smcpi(reg_src, deref_dst): - """SMCPI - Store Word to memory, and increment the address""" - - # MemDword(Rm31..3||000) <- CRn; Rm<-Rm+8 - mem32[deref_dst.ptr & i32(0xFFFFFFF8)] = reg_src - deref_dst.ptr = deref_dst.ptr + i32(8) - - -@sbuild.parse -def lmcpi(reg_dst, deref_src): - """LMCPI - Load Word from memory, and increment the address""" - - # CRn <- MemDword(Rm31..3||000); Rm<-Rm+8 - reg_dst = mem32[deref_src.ptr & i32(0xFFFFFFFC)] - deref_src.ptr = deref_src.ptr + i32(8) - - -### IR MeP definitions - -def get_mnemo_expr(ir, instr, *args): - """Simplify getting the IR from a miasm instruction.""" - - if instr.name.lower() in sbuild.functions: - mnemo_func = sbuild.functions[instr.name.lower()] - else: - mnemo_func = manual_functions[instr.name.lower()] - - ir, extra_ir = mnemo_func(ir, instr, *args) - return ir, extra_ir - - -class ir_mepb(IntermediateRepresentation): - """Toshiba MeP miasm IR - Big Endian - - It transforms an instructon into an IR. - """ - - addrsize = 32 - - def __init__(self, loc_db=None): - IntermediateRepresentation.__init__(self, mn_mep, "b", loc_db) - self.pc = mn_mep.getpc() - self.sp = mn_mep.getsp() - self.IRDst = ExprId("IRDst", 32) - - def get_ir(self, instr): - """Get the IR from a miasm instruction.""" - - instr_ir, extra_ir = get_mnemo_expr(self, instr, *instr.args) - - return instr_ir, extra_ir - - def get_next_break_loc_key(self, instr): - """Returns a new label that identifies where the instruction is going. - - Note: it eases linking IR blocs - """ - - l = self.loc_db.get_or_create_offset_location(instr.offset + instr.l) - return l - - -class ir_mepl(ir_mepb): - """Toshiba MeP miasm IR - Little Endian""" - - def __init__(self, loc_db=None): - IntermediateRepresentation.__init__(self, mn_mep, "l", loc_db) - self.pc = mn_mep.getpc() - self.sp = mn_mep.getsp() - self.IRDst = ExprId("IRDst", 32) diff --git a/miasm2/arch/mips32/__init__.py b/miasm2/arch/mips32/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/miasm2/arch/mips32/arch.py b/miasm2/arch/mips32/arch.py deleted file mode 100644 index 6046b12c..00000000 --- a/miasm2/arch/mips32/arch.py +++ /dev/null @@ -1,755 +0,0 @@ -#-*- coding:utf-8 -*- - -import logging -from collections import defaultdict - -from pyparsing import Literal, Optional - -from miasm2.expression.expression import ExprMem, ExprInt, ExprId, ExprOp, ExprLoc -from miasm2.core.bin_stream import bin_stream -import miasm2.arch.mips32.regs as regs -import miasm2.core.cpu as cpu - -from miasm2.core.asm_ast import AstInt, AstId, AstMem, AstOp - -log = logging.getLogger("mips32dis") -console_handler = logging.StreamHandler() -console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) -log.addHandler(console_handler) -log.setLevel(logging.DEBUG) - - -gpregs = cpu.reg_info(regs.regs32_str, regs.regs32_expr) - - -LPARENTHESIS = Literal("(") -RPARENTHESIS = Literal(")") - -def cb_deref(tokens): - if len(tokens) != 4: - raise NotImplementedError("TODO") - return AstMem(tokens[2] + tokens[0], 32) - -def cb_deref_nooff(tokens): - if len(tokens) != 3: - raise NotImplementedError("TODO") - return AstMem(tokens[1], 32) - -base_expr = cpu.base_expr - -deref_off = (Optional(base_expr) + LPARENTHESIS + gpregs.parser + RPARENTHESIS).setParseAction(cb_deref) -deref_nooff = (LPARENTHESIS + gpregs.parser + RPARENTHESIS).setParseAction(cb_deref_nooff) -deref = deref_off | deref_nooff - - -class additional_info(object): - def __init__(self): - self.except_on_instr = False - -br_0 = ['B', 'J', 'JR', 'BAL', 'JAL', 'JALR'] -br_1 = ['BGEZ', 'BLTZ', 'BGTZ', 'BLEZ', 'BC1T', 'BC1F'] -br_2 = ['BEQ', 'BEQL', 'BNE'] - - -class instruction_mips32(cpu.instruction): - __slots__ = [] - delayslot = 1 - - def __init__(self, *args, **kargs): - super(instruction_mips32, self).__init__(*args, **kargs) - - - @staticmethod - def arg2str(expr, index=None, loc_db=None): - if expr.is_id() or expr.is_int(): - return str(expr) - elif expr.is_loc(): - if loc_db is not None: - return loc_db.pretty_str(expr.loc_key) - else: - return str(expr) - assert(isinstance(expr, ExprMem)) - arg = expr.ptr - if isinstance(arg, ExprId): - return "(%s)"%arg - assert(len(arg.args) == 2 and arg.op == '+') - return "%s(%s)"%(arg.args[1], arg.args[0]) - - def dstflow(self): - if self.name == 'BREAK': - return False - if self.name in br_0 + br_1 + br_2: - return True - return False - - def get_dst_num(self): - if self.name in br_0: - i = 0 - elif self.name in br_1: - i = 1 - elif self.name in br_2: - i = 2 - else: - raise NotImplementedError("TODO %s"%self) - return i - - def dstflow2label(self, loc_db): - if self.name in ["J", 'JAL']: - expr = self.args[0].arg - addr = (self.offset & (0xFFFFFFFF ^ ((1<< 28)-1))) + expr - loc_key = loc_db.get_or_create_offset_location(addr) - self.args[0] = ExprLoc(loc_key, expr.size) - return - - ndx = self.get_dst_num() - expr = self.args[ndx] - - if not isinstance(expr, ExprInt): - return - addr = expr.arg + self.offset - loc_key = loc_db.get_or_create_offset_location(addr) - self.args[ndx] = ExprLoc(loc_key, expr.size) - - def breakflow(self): - if self.name == 'BREAK': - return False - if self.name in br_0 + br_1 + br_2: - return True - return False - - def is_subcall(self): - if self.name in ['JAL', 'JALR', 'BAL']: - return True - return False - - def getdstflow(self, loc_db): - if self.name in br_0: - return [self.args[0]] - elif self.name in br_1: - return [self.args[1]] - elif self.name in br_2: - return [self.args[2]] - elif self.name in ['JAL', 'JALR', 'JR', 'J']: - return [self.args[0]] - else: - raise NotImplementedError("fix mnemo %s"%self.name) - - def splitflow(self): - if self.name in ["B", 'JR', 'J']: - return False - if self.name in br_0: - return True - if self.name in br_1: - return True - if self.name in br_2: - return True - if self.name in ['JAL', 'JALR']: - return True - return False - - def get_symbol_size(self, symbol, loc_db): - return 32 - - def fixDstOffset(self): - ndx = self.get_dst_num() - e = self.args[ndx] - if self.offset is None: - raise ValueError('symbol not resolved %s' % self.l) - if not isinstance(e, ExprInt): - return - off = e.arg - self.offset - if int(off % 4): - raise ValueError('strange offset! %r' % off) - self.args[ndx] = ExprInt(off, 32) - - def get_args_expr(self): - args = [a for a in self.args] - return args - - -class mn_mips32(cpu.cls_mn): - delayslot = 1 - name = "mips32" - regs = regs - bintree = {} - num = 0 - all_mn = [] - all_mn_mode = defaultdict(list) - all_mn_name = defaultdict(list) - all_mn_inst = defaultdict(list) - pc = {'l':regs.PC, 'b':regs.PC} - sp = {'l':regs.SP, 'b':regs.SP} - instruction = instruction_mips32 - max_instruction_len = 4 - - @classmethod - def getpc(cls, attrib = None): - return regs.PC - - @classmethod - def getsp(cls, attrib = None): - return regs.SP - - def additional_info(self): - info = additional_info() - return info - - @classmethod - def getbits(cls, bitstream, attrib, start, n): - if not n: - return 0 - o = 0 - while n: - offset = start // 8 - n_offset = cls.endian_offset(attrib, offset) - c = cls.getbytes(bitstream, n_offset, 1) - if not c: - raise IOError - c = ord(c) - r = 8 - start % 8 - c &= (1 << r) - 1 - l = min(r, n) - c >>= (r - l) - o <<= l - o |= c - n -= l - start += l - return o - - @classmethod - def endian_offset(cls, attrib, offset): - if attrib == "l": - return (offset & ~3) + 3 - offset % 4 - elif attrib == "b": - return offset - else: - raise NotImplementedError('bad attrib') - - @classmethod - def check_mnemo(cls, fields): - l = sum([x.l for x in fields]) - assert l == 32, "len %r" % l - - @classmethod - def getmn(cls, name): - return name.upper() - - @classmethod - def gen_modes(cls, subcls, name, bases, dct, fields): - dct['mode'] = None - return [(subcls, name, bases, dct, fields)] - - def value(self, mode): - v = super(mn_mips32, self).value(mode) - if mode == 'l': - return [x[::-1] for x in v] - elif mode == 'b': - return [x for x in v] - else: - raise NotImplementedError('bad attrib') - - - -def mips32op(name, fields, args=None, alias=False): - dct = {"fields": fields} - dct["alias"] = alias - if args is not None: - dct['args'] = args - type(name, (mn_mips32,), dct) - #type(name, (mn_mips32b,), dct) - -class mips32_arg(cpu.m_arg): - def asm_ast_to_expr(self, arg, loc_db): - if isinstance(arg, AstId): - if isinstance(arg.name, ExprId): - return arg.name - if arg.name in gpregs.str: - return None - loc_key = loc_db.get_or_create_name_location(arg.name.encode()) - return ExprLoc(loc_key, 32) - if isinstance(arg, AstOp): - args = [self.asm_ast_to_expr(tmp, loc_db) for tmp in arg.args] - if None in args: - return None - return ExprOp(arg.op, *args) - if isinstance(arg, AstInt): - return ExprInt(arg.value, 32) - if isinstance(arg, AstMem): - ptr = self.asm_ast_to_expr(arg.ptr, loc_db) - if ptr is None: - return None - return ExprMem(ptr, arg.size) - return None - - -class mips32_reg(cpu.reg_noarg, mips32_arg): - pass - -class mips32_gpreg(mips32_reg): - reg_info = gpregs - parser = reg_info.parser - -class mips32_fltpreg(mips32_reg): - reg_info = regs.fltregs - parser = reg_info.parser - - -class mips32_fccreg(mips32_reg): - reg_info = regs.fccregs - parser = reg_info.parser - -class mips32_imm(cpu.imm_noarg): - parser = base_expr - - -class mips32_s16imm_noarg(mips32_imm): - def decode(self, v): - v = v & self.lmask - v = cpu.sign_ext(v, 16, 32) - self.expr = ExprInt(v, 32) - return True - - def encode(self): - if not isinstance(self.expr, ExprInt): - return False - v = self.expr.arg.arg - if v & 0x80000000: - nv = v & ((1 << 16) - 1) - assert( v == cpu.sign_ext(nv, 16, 32)) - v = nv - self.value = v - return True - -class mips32_soff_noarg(mips32_imm): - def decode(self, v): - v = v & self.lmask - v <<= 2 - v = cpu.sign_ext(v, 16+2, 32) - # Add pipeline offset - self.expr = ExprInt(v + 4, 32) - return True - - def encode(self): - if not isinstance(self.expr, ExprInt): - return False - # Remove pipeline offset - v = int(self.expr.arg - 4) - if v & 0x80000000: - nv = v & ((1 << 16+2) - 1) - assert( v == cpu.sign_ext(nv, 16+2, 32)) - v = nv - self.value = v>>2 - return True - - -class mips32_s16imm(mips32_s16imm_noarg, mips32_arg): - pass - -class mips32_soff(mips32_soff_noarg, mips32_arg): - pass - - -class mips32_instr_index(mips32_imm, mips32_arg): - def decode(self, v): - v = v & self.lmask - self.expr = ExprInt(v<<2, 32) - return True - - def encode(self): - if not isinstance(self.expr, ExprInt): - return False - v = self.expr.arg.arg - if v & 3: - return False - v>>=2 - if v > (1<>=2 - self.parent.cpr0.value = index - return True - -rs = cpu.bs(l=5, cls=(mips32_gpreg,)) -rt = cpu.bs(l=5, cls=(mips32_gpreg,)) -rd = cpu.bs(l=5, cls=(mips32_gpreg,)) -ft = cpu.bs(l=5, cls=(mips32_fltpreg,)) -fs = cpu.bs(l=5, cls=(mips32_fltpreg,)) -fd = cpu.bs(l=5, cls=(mips32_fltpreg,)) - -s16imm = cpu.bs(l=16, cls=(mips32_s16imm,)) -u16imm = cpu.bs(l=16, cls=(mips32_u16imm,)) -sa = cpu.bs(l=5, cls=(mips32_u16imm,)) -base = cpu.bs(l=5, cls=(mips32_dreg_imm,)) -soff = cpu.bs(l=16, cls=(mips32_soff,)) - -cpr0 = cpu.bs(l=5, cls=(mips32_imm,), fname="cpr0") -cpr = cpu.bs(l=3, cls=(mips32_cpr,)) - - -s16imm_noarg = cpu.bs(l=16, cls=(mips32_s16imm_noarg,), fname="imm", - order=-1) - -hint = cpu.bs(l=5, default_val="00000") -fcc = cpu.bs(l=3, cls=(mips32_fccreg,)) - -sel = cpu.bs(l=3, cls=(mips32_u16imm,)) - -code = cpu.bs(l=20, cls=(mips32_u16imm,)) - -esize = cpu.bs(l=5, cls=(mips32_esize,)) -epos = cpu.bs(l=5, cls=(mips32_u16imm,), fname="epos", - order=-1) - -eposh = cpu.bs(l=5, cls=(mips32_eposh,)) - -instr_index = cpu.bs(l=26, cls=(mips32_instr_index,)) -bs_fmt = cpu.bs_mod_name(l=5, fname='fmt', mn_mod={0x10: '.S', 0x11: '.D', - 0x14: '.W', 0x15: '.L', - 0x16: '.PS'}) -class bs_cond(cpu.bs_mod_name): - mn_mod = ['.F', '.UN', '.EQ', '.UEQ', - '.OLT', '.ULT', '.OLE', '.ULE', - '.SF', '.NGLE', '.SEQ', '.NGL', - '.LT', '.NGE', '.LE', '.NGT' - ] - - def modname(self, name, f_i): - raise NotImplementedError("Not implemented") - - -class bs_cond_name(cpu.bs_divert): - prio = 2 - mn_mod = [['.F', '.UN', '.EQ', '.UEQ', - '.OLT', '.ULT', '.OLE', '.ULE'], - ['.SF', '.NGLE', '.SEQ', '.NGL', - '.LT', '.NGE', '.LE', '.NGT'] - ] - - def divert(self, index, candidates): - out = [] - for candidate in candidates: - cls, name, bases, dct, fields = candidate - cond1 = [f for f in fields if f.fname == "cond1"] - assert(len(cond1) == 1) - cond1 = cond1.pop() - mm = self.mn_mod[cond1.value] - for value, new_name in enumerate(mm): - nfields = fields[:] - s = cpu.int2bin(value, self.args['l']) - args = dict(self.args) - args.update({'strbits': s}) - f = cpu.bs(**args) - nfields[index] = f - ndct = dict(dct) - ndct['name'] = name + new_name - out.append((cls, new_name, bases, ndct, nfields)) - return out - - - -class bs_cond_mod(cpu.bs_mod_name): - prio = 1 - -bs_cond = bs_cond_mod(l=4, - mn_mod = ['.F', '.UN', '.EQ', '.UEQ', - '.OLT', '.ULT', '.OLE', '.ULE', - '.SF', '.NGLE', '.SEQ', '.NGL', - '.LT', '.NGE', '.LE', '.NGT']) - - - -bs_arith = cpu.bs_name(l=6, name={'ADDU':0b100001, - 'SUBU':0b100011, - 'OR':0b100101, - 'AND':0b100100, - 'SLTU':0b101011, - 'XOR':0b100110, - 'SLT':0b101010, - 'SUBU':0b100011, - 'NOR':0b100111, - 'MOVN':0b001011, - 'MOVZ':0b001010, - }) - -bs_shift = cpu.bs_name(l=6, name={'SLL':0b000000, - 'SRL':0b000010, - 'SRA':0b000011, - }) - -bs_shift1 = cpu.bs_name(l=6, name={'SLLV':0b000100, - 'SRLV':0b000110, - 'SRAV':0b000111, - }) - - -bs_arithfmt = cpu.bs_name(l=6, name={'ADD':0b000000, - 'SUB':0b000001, - 'MUL':0b000010, - 'DIV':0b000011, - }) - -bs_s_l = cpu.bs_name(l=6, name = {"SW": 0b101011, - "SH": 0b101001, - "SB": 0b101000, - "LW": 0b100011, - "LH": 0b100001, - "LB": 0b100000, - "LHU": 0b100101, - "LBU": 0b100100, - "LWL": 0b100010, - "LWR": 0b100110, - - "SWL": 0b101010, - "SWR": 0b101110, - }) - - -bs_oax = cpu.bs_name(l=6, name = {"ORI": 0b001101, - "ANDI": 0b001100, - "XORI": 0b001110, - }) - -bs_bcc = cpu.bs_name(l=5, name = {"BGEZ": 0b00001, - "BGEZL": 0b00011, - "BGEZAL": 0b10001, - "BGEZALL": 0b10011, - "BLTZ": 0b00000, - "BLTZL": 0b00010, - "BLTZAL": 0b10000, - "BLTZALL": 0b10010, - }) - - -bs_code = cpu.bs(l=10) - - -mips32op("addi", [cpu.bs('001000'), rs, rt, s16imm], [rt, rs, s16imm]) -mips32op("addiu", [cpu.bs('001001'), rs, rt, s16imm], [rt, rs, s16imm]) -mips32op("nop", [cpu.bs('0'*32)], alias = True) -mips32op("lui", [cpu.bs('001111'), cpu.bs('00000'), rt, u16imm]) -mips32op("oax", [bs_oax, rs, rt, u16imm], [rt, rs, u16imm]) - -mips32op("arith", [cpu.bs('000000'), rs, rt, rd, cpu.bs('00000'), bs_arith], - [rd, rs, rt]) -mips32op("shift1", [cpu.bs('000000'), rs, rt, rd, cpu.bs('00000'), bs_shift1], - [rd, rt, rs]) - -mips32op("shift", [cpu.bs('000000'), cpu.bs('00000'), rt, rd, sa, bs_shift], - [rd, rt, sa]) - -mips32op("rotr", [cpu.bs('000000'), cpu.bs('00001'), rt, rd, sa, - cpu.bs('000010')], [rd, rt, sa]) - -mips32op("mul", [cpu.bs('011100'), rs, rt, rd, cpu.bs('00000'), - cpu.bs('000010')], [rd, rs, rt]) -mips32op("div", [cpu.bs('000000'), rs, rt, cpu.bs('0000000000'), - cpu.bs('011010')]) - -mips32op("s_l", [bs_s_l, base, rt, s16imm_noarg], [rt, base]) - -#mips32op("mfc0", [bs('010000'), bs('00000'), rt, rd, bs('00000000'), sel]) -mips32op("mfc0", [cpu.bs('010000'), cpu.bs('00000'), rt, cpr0, - cpu.bs('00000000'), cpr]) -mips32op("mfc1", [cpu.bs('010001'), cpu.bs('00000'), rt, fs, - cpu.bs('00000000000')]) - -mips32op("ldc1", [cpu.bs('110101'), base, ft, s16imm_noarg], [ft, base]) - -mips32op("mov", [cpu.bs('010001'), bs_fmt, cpu.bs('00000'), fs, fd, - cpu.bs('000110')], [fd, fs]) - -mips32op("add", [cpu.bs('010001'), bs_fmt, ft, fs, fd, bs_arithfmt], - [fd, fs, ft]) - -mips32op("divu", [cpu.bs('000000'), rs, rt, cpu.bs('0000000000'), - cpu.bs('011011')]) -mips32op("mult", [cpu.bs('000000'), rs, rt, cpu.bs('0000000000'), - cpu.bs('011000')]) -mips32op("multu", [cpu.bs('000000'), rs, rt, cpu.bs('0000000000'), - cpu.bs('011001')]) -mips32op("mflo", [cpu.bs('000000'), cpu.bs('0000000000'), rd, - cpu.bs('00000'), cpu.bs('010010')]) -mips32op("mfhi", [cpu.bs('000000'), cpu.bs('0000000000'), rd, - cpu.bs('00000'), cpu.bs('010000')]) - - -mips32op("b", [cpu.bs('000100'), cpu.bs('00000'), cpu.bs('00000'), soff], - alias = True) -mips32op("bne", [cpu.bs('000101'), rs, rt, soff]) -mips32op("beq", [cpu.bs('000100'), rs, rt, soff]) - -mips32op("blez", [cpu.bs('000110'), rs, cpu.bs('00000'), soff]) - -mips32op("bcc", [cpu.bs('000001'), rs, bs_bcc, soff]) - -mips32op("bgtz", [cpu.bs('000111'), rs, cpu.bs('00000'), soff]) -mips32op("bal", [cpu.bs('000001'), cpu.bs('00000'), cpu.bs('10001'), soff], - alias = True) - - -mips32op("slti", [cpu.bs('001010'), rs, rt, s16imm], [rt, rs, s16imm]) -mips32op("sltiu", [cpu.bs('001011'), rs, rt, s16imm], [rt, rs, s16imm]) - - -mips32op("j", [cpu.bs('000010'), instr_index]) -mips32op("jal", [cpu.bs('000011'), instr_index]) -mips32op("jalr", [cpu.bs('000000'), rs, cpu.bs('00000'), rd, hint, - cpu.bs('001001')]) -mips32op("jr", [cpu.bs('000000'), rs, cpu.bs('0000000000'), hint, - cpu.bs('001000')]) - -mips32op("lwc1", [cpu.bs('110001'), base, ft, s16imm_noarg], [ft, base]) - -#mips32op("mtc0", [bs('010000'), bs('00100'), rt, rd, bs('00000000'), sel]) -mips32op("mtc0", [cpu.bs('010000'), cpu.bs('00100'), rt, cpr0, - cpu.bs('00000000'), cpr]) -mips32op("mtc1", [cpu.bs('010001'), cpu.bs('00100'), rt, fs, - cpu.bs('00000000000')]) - -# XXXX TODO CFC1 -mips32op("cfc1", [cpu.bs('010001'), cpu.bs('00010'), rt, fs, - cpu.bs('00000000000')]) -# XXXX TODO CTC1 -mips32op("ctc1", [cpu.bs('010001'), cpu.bs('00110'), rt, fs, - cpu.bs('00000000000')]) - -mips32op("break", [cpu.bs('000000'), code, cpu.bs('001101')]) -mips32op("syscall", [cpu.bs('000000'), code, cpu.bs('001100')]) - - -mips32op("c", [cpu.bs('010001'), bs_fmt, ft, fs, fcc, cpu.bs('0'), - cpu.bs('0'), cpu.bs('11'), bs_cond], [fcc, fs, ft]) - - -mips32op("bc1t", [cpu.bs('010001'), cpu.bs('01000'), fcc, cpu.bs('0'), - cpu.bs('1'), soff]) -mips32op("bc1f", [cpu.bs('010001'), cpu.bs('01000'), fcc, cpu.bs('0'), - cpu.bs('0'), soff]) - -mips32op("swc1", [cpu.bs('111001'), base, ft, s16imm_noarg], [ft, base]) - -mips32op("cvt.d", [cpu.bs('010001'), bs_fmt, cpu.bs('00000'), fs, fd, - cpu.bs('100001')], [fd, fs]) -mips32op("cvt.w", [cpu.bs('010001'), bs_fmt, cpu.bs('00000'), fs, fd, - cpu.bs('100100')], [fd, fs]) -mips32op("cvt.s", [cpu.bs('010001'), bs_fmt, cpu.bs('00000'), fs, fd, - cpu.bs('100000')], [fd, fs]) - -mips32op("ext", [cpu.bs('011111'), rs, rt, esize, epos, cpu.bs('000000')], - [rt, rs, epos, esize]) -mips32op("ins", [cpu.bs('011111'), rs, rt, eposh, epos, cpu.bs('000100')], - [rt, rs, epos, eposh]) - -mips32op("seb", [cpu.bs('011111'), cpu.bs('00000'), rt, rd, cpu.bs('10000'), - cpu.bs('100000')], [rd, rt]) -mips32op("seh", [cpu.bs('011111'), cpu.bs('00000'), rt, rd, cpu.bs('11000'), - cpu.bs('100000')], [rd, rt]) -mips32op("wsbh", [cpu.bs('011111'), cpu.bs('00000'), rt, rd, cpu.bs('00010'), - cpu.bs('100000')], [rd, rt]) - -mips32op("di", [cpu.bs('010000'), cpu.bs('01011'), rt, cpu.bs('01100'), - cpu.bs('00000'), cpu.bs('0'), cpu.bs('00'), cpu.bs('000')]) -mips32op("ei", [cpu.bs('010000'), cpu.bs('01011'), rt, cpu.bs('01100'), - cpu.bs('00000'), cpu.bs('1'), cpu.bs('00'), cpu.bs('000')]) - - -mips32op("tlbp", [cpu.bs('010000'), cpu.bs('1'), cpu.bs('0'*19), - cpu.bs('001000')]) -mips32op("tlbwi", [cpu.bs('010000'), cpu.bs('1'), cpu.bs('0'*19), - cpu.bs('000010')]) - - -mips32op("teq", [cpu.bs('000000'), rs, rt, bs_code, cpu.bs('110100')], - [rs, rt]) diff --git a/miasm2/arch/mips32/disasm.py b/miasm2/arch/mips32/disasm.py deleted file mode 100644 index bdd800d5..00000000 --- a/miasm2/arch/mips32/disasm.py +++ /dev/null @@ -1,16 +0,0 @@ -from miasm2.core.asmblock import disasmEngine -from miasm2.arch.mips32.arch import mn_mips32 - - - -class dis_mips32b(disasmEngine): - attrib = 'b' - def __init__(self, bs=None, **kwargs): - super(dis_mips32b, self).__init__(mn_mips32, self.attrib, bs, **kwargs) - - -class dis_mips32l(disasmEngine): - attrib = "l" - def __init__(self, bs=None, **kwargs): - super(dis_mips32l, self).__init__(mn_mips32, self.attrib, bs, **kwargs) - diff --git a/miasm2/arch/mips32/ira.py b/miasm2/arch/mips32/ira.py deleted file mode 100644 index 90558708..00000000 --- a/miasm2/arch/mips32/ira.py +++ /dev/null @@ -1,104 +0,0 @@ -#-*- coding:utf-8 -*- - -from miasm2.expression.expression import ExprAssign, ExprOp -from miasm2.ir.ir import IRBlock, AssignBlock -from miasm2.ir.analysis import ira -from miasm2.arch.mips32.sem import ir_mips32l, ir_mips32b - -class ir_a_mips32l(ir_mips32l, ira): - def __init__(self, loc_db=None): - ir_mips32l.__init__(self, loc_db) - self.ret_reg = self.arch.regs.V0 - - def call_effects(self, ad, instr): - call_assignblk = AssignBlock( - [ - ExprAssign( - self.ret_reg, - ExprOp( - 'call_func_ret', - ad, - self.arch.regs.A0, - self.arch.regs.A1, - self.arch.regs.A2, - self.arch.regs.A3, - ) - ), - ], - instr - ) - - return [call_assignblk], [] - - - def add_asmblock_to_ircfg(self, block, ircfg, gen_pc_updt=False): - """ - Add a native block to the current IR - @block: native assembly block - @ircfg: IRCFG instance - @gen_pc_updt: insert PC update effects between instructions - """ - loc_key = block.loc_key - ir_blocks_all = [] - - assignments = [] - for index, instr in enumerate(block.lines): - if loc_key is None: - assignments = [] - loc_key = self.get_loc_key_for_instr(instr) - if instr.is_subcall(): - assert index == len(block.lines) - 2 - - # Add last instruction first (before call) - split = self.add_instr_to_current_state( - block.lines[-1], block, assignments, - ir_blocks_all, gen_pc_updt - ) - assert not split - # Add call effects after the delay splot - split = self.add_instr_to_current_state( - instr, block, assignments, - ir_blocks_all, gen_pc_updt - ) - assert split - break - split = self.add_instr_to_current_state( - instr, block, assignments, - ir_blocks_all, gen_pc_updt - ) - if split: - ir_blocks_all.append(IRBlock(loc_key, assignments)) - loc_key = None - assignments = [] - if loc_key is not None: - ir_blocks_all.append(IRBlock(loc_key, assignments)) - - new_ir_blocks_all = self.post_add_asmblock_to_ircfg(block, ircfg, ir_blocks_all) - for irblock in new_ir_blocks_all: - ircfg.add_irblock(irblock) - return new_ir_blocks_all - - def get_out_regs(self, _): - return set([self.ret_reg, self.sp]) - - def sizeof_char(self): - return 8 - - def sizeof_short(self): - return 16 - - def sizeof_int(self): - return 32 - - def sizeof_long(self): - return 32 - - def sizeof_pointer(self): - return 32 - - - -class ir_a_mips32b(ir_mips32b, ir_a_mips32l): - def __init__(self, loc_db=None): - ir_mips32b.__init__(self, loc_db) - self.ret_reg = self.arch.regs.V0 diff --git a/miasm2/arch/mips32/jit.py b/miasm2/arch/mips32/jit.py deleted file mode 100644 index 04690a3e..00000000 --- a/miasm2/arch/mips32/jit.py +++ /dev/null @@ -1,151 +0,0 @@ -from builtins import range -import logging - -from miasm2.jitter.jitload import Jitter, named_arguments -from miasm2.core.locationdb import LocationDB -from miasm2.core.utils import pck32, upck32 -from miasm2.arch.mips32.sem import ir_mips32l, ir_mips32b -from miasm2.jitter.codegen import CGen -from miasm2.ir.ir import AssignBlock, IRBlock -import miasm2.expression.expression as m2_expr - -log = logging.getLogger('jit_mips32') -hnd = logging.StreamHandler() -hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) -log.addHandler(hnd) -log.setLevel(logging.CRITICAL) - - -class mipsCGen(CGen): - CODE_INIT = CGen.CODE_INIT + r""" - unsigned int branch_dst_pc; - unsigned int branch_dst_irdst; - unsigned int branch_dst_set=0; - """ - - CODE_RETURN_NO_EXCEPTION = r""" - %s: - if (branch_dst_set) { - %s = %s; - BlockDst->address = %s; - } else { - BlockDst->address = %s; - } - return JIT_RET_NO_EXCEPTION; - """ - - def __init__(self, ir_arch): - super(mipsCGen, self).__init__(ir_arch) - self.delay_slot_dst = m2_expr.ExprId("branch_dst_irdst", 32) - self.delay_slot_set = m2_expr.ExprId("branch_dst_set", 32) - - def block2assignblks(self, block): - irblocks_list = super(mipsCGen, self).block2assignblks(block) - for irblocks in irblocks_list: - for blk_idx, irblock in enumerate(irblocks): - has_breakflow = any(assignblock.instr.breakflow() for assignblock in irblock) - if not has_breakflow: - continue - - irs = [] - for assignblock in irblock: - if self.ir_arch.pc not in assignblock: - irs.append(AssignBlock(assignments, assignblock.instr)) - continue - assignments = dict(assignblock) - # Add internal branch destination - assignments[self.delay_slot_dst] = assignblock[ - self.ir_arch.pc] - assignments[self.delay_slot_set] = m2_expr.ExprInt(1, 32) - # Replace IRDst with next instruction - dst_loc_key = self.ir_arch.get_next_instr(assignblock.instr) - assignments[self.ir_arch.IRDst] = m2_expr.ExprLoc(dst_loc_key, 32) - irs.append(AssignBlock(assignments, assignblock.instr)) - irblocks[blk_idx] = IRBlock(irblock.loc_key, irs) - - return irblocks_list - - def gen_finalize(self, block): - """ - Generate the C code for the final block instruction - """ - - loc_key = self.get_block_post_label(block) - offset = self.ir_arch.loc_db.get_location_offset(loc_key) - out = (self.CODE_RETURN_NO_EXCEPTION % (loc_key, - self.C_PC, - m2_expr.ExprId('branch_dst_irdst', 32), - m2_expr.ExprId('branch_dst_irdst', 32), - self.id_to_c(m2_expr.ExprInt(offset, 32))) - ).split('\n') - return out - - -class jitter_mips32l(Jitter): - - C_Gen = mipsCGen - - def __init__(self, *args, **kwargs): - sp = LocationDB() - Jitter.__init__(self, ir_mips32l(sp), *args, **kwargs) - self.vm.set_little_endian() - - def push_uint32_t(self, value): - self.cpu.SP -= 4 - self.vm.set_mem(self.cpu.SP, pck32(value)) - - def pop_uint32_t(self): - value = self.vm.get_u32(self.cpu.SP) - self.cpu.SP += 4 - return value - - def get_stack_arg(self, index): - return self.vm.get_u32(self.cpu.SP + 4 * index) - - def init_run(self, *args, **kwargs): - Jitter.init_run(self, *args, **kwargs) - self.cpu.PC = self.pc - - # calling conventions - - @named_arguments - def func_args_stdcall(self, n_args): - args = [self.get_arg_n_stdcall(i) for i in range(n_args)] - ret_ad = self.cpu.RA - return ret_ad, args - - def func_ret_stdcall(self, ret_addr, ret_value1=None, ret_value2=None): - self.pc = self.cpu.PC = ret_addr - if ret_value1 is not None: - self.cpu.V0 = ret_value1 - if ret_value2 is not None: - self.cpu.V1 = ret_value2 - return True - - def func_prepare_stdcall(self, ret_addr, *args): - for index in range(min(len(args), 4)): - setattr(self.cpu, 'A%d' % index, args[index]) - for index in range(4, len(args)): - self.vm.set_mem(self.cpu.SP + 4 * (index - 4), pck32(args[index])) - self.cpu.RA = ret_addr - - def get_arg_n_stdcall(self, index): - if index < 4: - arg = getattr(self.cpu, 'A%d' % index) - else: - arg = self.get_stack_arg(index-4) - return arg - - - func_args_systemv = func_args_stdcall - func_ret_systemv = func_ret_stdcall - func_prepare_systemv = func_prepare_stdcall - get_arg_n_systemv = get_arg_n_stdcall - - -class jitter_mips32b(jitter_mips32l): - - def __init__(self, *args, **kwargs): - sp = LocationDB() - Jitter.__init__(self, ir_mips32b(sp), *args, **kwargs) - self.vm.set_big_endian() diff --git a/miasm2/arch/mips32/regs.py b/miasm2/arch/mips32/regs.py deleted file mode 100644 index d1d14bdc..00000000 --- a/miasm2/arch/mips32/regs.py +++ /dev/null @@ -1,73 +0,0 @@ -#-*- coding:utf-8 -*- - -from builtins import range -from miasm2.expression.expression import ExprId -from miasm2.core.cpu import gen_reg, gen_regs - - -PC, _ = gen_reg('PC') -PC_FETCH, _ = gen_reg('PC_FETCH') - -R_LO, _ = gen_reg('R_LO') -R_HI, _ = gen_reg('R_HI') - -exception_flags = ExprId('exception_flags', 32) - -PC_init = ExprId("PC_init", 32) -PC_FETCH_init = ExprId("PC_FETCH_init", 32) - -regs32_str = ["ZERO", 'AT', 'V0', 'V1'] +\ - ['A%d'%i for i in range(4)] +\ - ['T%d'%i for i in range(8)] +\ - ['S%d'%i for i in range(8)] +\ - ['T%d'%i for i in range(8, 10)] +\ - ['K0', 'K1'] +\ - ['GP', 'SP', 'FP', 'RA'] - -regs32_expr = [ExprId(x, 32) for x in regs32_str] -ZERO = regs32_expr[0] - -regs_flt_str = ['F%d'%i for i in range(0x20)] - -regs_fcc_str = ['FCC%d'%i for i in range(8)] - -R_LO = ExprId('R_LO', 32) -R_HI = ExprId('R_HI', 32) - -R_LO_init = ExprId('R_LO_init', 32) -R_HI_init = ExprId('R_HI_init', 32) - - -cpr0_str = ["CPR0_%d"%x for x in range(0x100)] -cpr0_str[0] = "INDEX" -cpr0_str[16] = "ENTRYLO0" -cpr0_str[24] = "ENTRYLO1" -cpr0_str[40] = "PAGEMASK" -cpr0_str[72] = "COUNT" -cpr0_str[80] = "ENTRYHI" -cpr0_str[104] = "CAUSE" -cpr0_str[112] = "EPC" -cpr0_str[128] = "CONFIG" -cpr0_str[152] = "WATCHHI" - -regs_cpr0_expr, regs_cpr0_init, regs_cpr0_info = gen_regs(cpr0_str, globals()) - -gpregs_expr, gpregs_init, gpregs = gen_regs(regs32_str, globals()) -regs_flt_expr, regs_flt_init, fltregs = gen_regs(regs_flt_str, globals(), sz=64) -regs_fcc_expr, regs_fcc_init, fccregs = gen_regs(regs_fcc_str, globals()) - - -all_regs_ids = [PC, PC_FETCH, R_LO, R_HI, exception_flags] + gpregs_expr + regs_flt_expr + \ - regs_fcc_expr + regs_cpr0_expr -all_regs_ids_byname = dict([(x.name, x) for x in all_regs_ids]) -all_regs_ids_init = [ExprId("%s_init" % reg.name, reg.size) for reg in all_regs_ids] -all_regs_ids_no_alias = all_regs_ids[:] - -attrib_to_regs = { - 'l': all_regs_ids_no_alias, - 'b': all_regs_ids_no_alias, -} - -regs_init = {} -for i, r in enumerate(all_regs_ids): - regs_init[r] = all_regs_ids_init[i] diff --git a/miasm2/arch/mips32/sem.py b/miasm2/arch/mips32/sem.py deleted file mode 100644 index 62a85355..00000000 --- a/miasm2/arch/mips32/sem.py +++ /dev/null @@ -1,520 +0,0 @@ -import miasm2.expression.expression as m2_expr -from miasm2.ir.ir import IntermediateRepresentation, IRBlock, AssignBlock -from miasm2.arch.mips32.arch import mn_mips32 -from miasm2.arch.mips32.regs import R_LO, R_HI, PC, RA, ZERO, exception_flags -from miasm2.core.sembuilder import SemBuilder -from miasm2.jitter.csts import EXCEPT_DIV_BY_ZERO - - -# SemBuilder context -ctx = { - "R_LO": R_LO, - "R_HI": R_HI, - "PC": PC, - "RA": RA, - "m2_expr": m2_expr -} - -sbuild = SemBuilder(ctx) - - -@sbuild.parse -def addiu(arg1, arg2, arg3): - """Adds a register @arg3 and a sign-extended immediate value @arg2 and - stores the result in a register @arg1""" - arg1 = arg2 + arg3 - -@sbuild.parse -def lw(arg1, arg2): - "A word is loaded into a register @arg1 from the specified address @arg2." - arg1 = arg2 - -@sbuild.parse -def sw(arg1, arg2): - "The contents of @arg2 is stored at the specified address @arg1." - arg2 = arg1 - -@sbuild.parse -def jal(arg1): - "Jumps to the calculated address @arg1 and stores the return address in $RA" - PC = arg1 - ir.IRDst = arg1 - RA = ExprLoc(ir.get_next_break_loc_key(instr), RA.size) - -@sbuild.parse -def jalr(arg1, arg2): - """Jump to an address stored in a register @arg1, and store the return - address in another register @arg2""" - PC = arg1 - ir.IRDst = arg1 - arg2 = ExprLoc(ir.get_next_break_loc_key(instr), arg2.size) - -@sbuild.parse -def bal(arg1): - PC = arg1 - ir.IRDst = arg1 - RA = ExprLoc(ir.get_next_break_loc_key(instr), RA.size) - -@sbuild.parse -def l_b(arg1): - PC = arg1 - ir.IRDst = arg1 - -@sbuild.parse -def lbu(arg1, arg2): - """A byte is loaded (unsigned extended) into a register @arg1 from the - specified address @arg2.""" - arg1 = mem8[arg2.ptr].zeroExtend(32) - -@sbuild.parse -def lhu(arg1, arg2): - """A word is loaded (unsigned extended) into a register @arg1 from the - specified address @arg2.""" - arg1 = mem16[arg2.ptr].zeroExtend(32) - -@sbuild.parse -def lb(arg1, arg2): - "A byte is loaded into a register @arg1 from the specified address @arg2." - arg1 = mem8[arg2.ptr].signExtend(32) - -@sbuild.parse -def beq(arg1, arg2, arg3): - "Branches on @arg3 if the quantities of two registers @arg1, @arg2 are eq" - dst = arg3 if ExprOp(m2_expr.TOK_EQUAL, arg1, arg2) else ExprLoc(ir.get_next_break_loc_key(instr), ir.IRDst.size) - PC = dst - ir.IRDst = dst - -@sbuild.parse -def bgez(arg1, arg2): - """Branches on @arg2 if the quantities of register @arg1 is greater than or - equal to zero""" - dst = ExprLoc(ir.get_next_break_loc_key(instr), ir.IRDst.size) if ExprOp(m2_expr.TOK_INF_SIGNED, arg1, ExprInt(0, arg1.size)) else arg2 - PC = dst - ir.IRDst = dst - -@sbuild.parse -def bne(arg1, arg2, arg3): - """Branches on @arg3 if the quantities of two registers @arg1, @arg2 are NOT - equal""" - dst = ExprLoc(ir.get_next_break_loc_key(instr), ir.IRDst.size) if ExprOp(m2_expr.TOK_EQUAL, arg1, arg2) else arg3 - PC = dst - ir.IRDst = dst - -@sbuild.parse -def lui(arg1, arg2): - """The immediate value @arg2 is shifted left 16 bits and stored in the - register @arg1. The lower 16 bits are zeroes.""" - arg1 = ExprCompose(i16(0), arg2[:16]) - -@sbuild.parse -def nop(): - """Do nothing""" - -@sbuild.parse -def j(arg1): - """Jump to an address @arg1""" - PC = arg1 - ir.IRDst = arg1 - -@sbuild.parse -def l_or(arg1, arg2, arg3): - """Bitwise logical ors two registers @arg2, @arg3 and stores the result in a - register @arg1""" - arg1 = arg2 | arg3 - -@sbuild.parse -def nor(arg1, arg2, arg3): - """Bitwise logical Nors two registers @arg2, @arg3 and stores the result in - a register @arg1""" - arg1 = (arg2 | arg3) ^ i32(-1) - -@sbuild.parse -def l_and(arg1, arg2, arg3): - """Bitwise logical ands two registers @arg2, @arg3 and stores the result in - a register @arg1""" - arg1 = arg2 & arg3 - -@sbuild.parse -def ext(arg1, arg2, arg3, arg4): - pos = int(arg3) - size = int(arg4) - arg1 = arg2[pos:pos + size].zeroExtend(32) - -@sbuild.parse -def mul(arg1, arg2, arg3): - """Multiplies @arg2 by $arg3 and stores the result in @arg1.""" - arg1 = 'imul'(arg2, arg3) - -@sbuild.parse -def sltu(arg1, arg2, arg3): - """If @arg2 is less than @arg3 (unsigned), @arg1 is set to one. It gets zero - otherwise.""" - arg1 = ExprCond( - ExprOp(m2_expr.TOK_INF_UNSIGNED, arg2, arg3), - ExprInt(1, arg1.size), - ExprInt(0, arg1.size) - ) - -@sbuild.parse -def slt(arg1, arg2, arg3): - """If @arg2 is less than @arg3 (signed), @arg1 is set to one. It gets zero - otherwise.""" - arg1 = ExprCond( - ExprOp(m2_expr.TOK_INF_SIGNED, arg2, arg3), - ExprInt(1, arg1.size), - ExprInt(0, arg1.size) - ) - - -@sbuild.parse -def l_sub(arg1, arg2, arg3): - arg1 = arg2 - arg3 - -@sbuild.parse -def sb(arg1, arg2): - """The least significant byte of @arg1 is stored at the specified address - @arg2.""" - mem8[arg2.ptr] = arg1[:8] - -@sbuild.parse -def sh(arg1, arg2): - mem16[arg2.ptr] = arg1[:16] - -@sbuild.parse -def movn(arg1, arg2, arg3): - if arg3: - arg1 = arg2 - -@sbuild.parse -def movz(arg1, arg2, arg3): - if not arg3: - arg1 = arg2 - -@sbuild.parse -def srl(arg1, arg2, arg3): - """Shifts arg1 register value @arg2 right by the shift amount @arg3 and - places the value in the destination register @arg1. - Zeroes are shifted in.""" - arg1 = arg2 >> arg3 - -@sbuild.parse -def sra(arg1, arg2, arg3): - """Shifts arg1 register value @arg2 right by the shift amount @arg3 and - places the value in the destination register @arg1. The sign bit is shifted - in.""" - arg1 = 'a>>'(arg2, arg3) - -@sbuild.parse -def srav(arg1, arg2, arg3): - arg1 = 'a>>'(arg2, arg3 & i32(0x1F)) - -@sbuild.parse -def sll(arg1, arg2, arg3): - arg1 = arg2 << arg3 - -@sbuild.parse -def srlv(arg1, arg2, arg3): - """Shifts a register value @arg2 right by the amount specified in @arg3 and - places the value in the destination register @arg1. - Zeroes are shifted in.""" - arg1 = arg2 >> (arg3 & i32(0x1F)) - -@sbuild.parse -def sllv(arg1, arg2, arg3): - """Shifts a register value @arg2 left by the amount specified in @arg3 and - places the value in the destination register @arg1. - Zeroes are shifted in.""" - arg1 = arg2 << (arg3 & i32(0x1F)) - -@sbuild.parse -def l_xor(arg1, arg2, arg3): - """Exclusive ors two registers @arg2, @arg3 and stores the result in a - register @arg3""" - arg1 = arg2 ^ arg3 - -@sbuild.parse -def seb(arg1, arg2): - arg1 = arg2[:8].signExtend(32) - -@sbuild.parse -def seh(arg1, arg2): - arg1 = arg2[:16].signExtend(32) - -@sbuild.parse -def bltz(arg1, arg2): - """Branches on @arg2 if the register @arg1 is less than zero""" - dst_o = arg2 if ExprOp(m2_expr.TOK_INF_SIGNED, arg1, ExprInt(0, arg1.size)) else ExprLoc(ir.get_next_break_loc_key(instr), ir.IRDst.size) - PC = dst_o - ir.IRDst = dst_o - -@sbuild.parse -def blez(arg1, arg2): - """Branches on @arg2 if the register @arg1 is less than or equal to zero""" - cond = ExprOp(m2_expr.TOK_INF_EQUAL_SIGNED, arg1, ExprInt(0, arg1.size)) - dst_o = arg2 if cond else ExprLoc(ir.get_next_break_loc_key(instr), ir.IRDst.size) - PC = dst_o - ir.IRDst = dst_o - -@sbuild.parse -def bgtz(arg1, arg2): - """Branches on @arg2 if the register @arg1 is greater than zero""" - cond = ExprOp(m2_expr.TOK_INF_EQUAL_SIGNED, arg1, ExprInt(0, arg1.size)) - dst_o = ExprLoc(ir.get_next_break_loc_key(instr), ir.IRDst.size) if cond else arg2 - PC = dst_o - ir.IRDst = dst_o - -@sbuild.parse -def wsbh(arg1, arg2): - arg1 = ExprCompose(arg2[8:16], arg2[0:8], arg2[24:32], arg2[16:24]) - -@sbuild.parse -def rotr(arg1, arg2, arg3): - arg1 = '>>>'(arg2, arg3) - -@sbuild.parse -def add_d(arg1, arg2, arg3): - # XXX TODO check - arg1 = 'fadd'(arg2, arg3) - -@sbuild.parse -def sub_d(arg1, arg2, arg3): - # XXX TODO check - arg1 = 'fsub'(arg2, arg3) - -@sbuild.parse -def div_d(arg1, arg2, arg3): - # XXX TODO check - arg1 = 'fdiv'(arg2, arg3) - -@sbuild.parse -def mul_d(arg1, arg2, arg3): - # XXX TODO check - arg1 = 'fmul'(arg2, arg3) - -@sbuild.parse -def mov_d(arg1, arg2): - # XXX TODO check - arg1 = arg2 - -@sbuild.parse -def mfc0(arg1, arg2): - arg1 = arg2 - -@sbuild.parse -def mfc1(arg1, arg2): - arg1 = arg2 - -@sbuild.parse -def mtc0(arg1, arg2): - arg2 = arg1 - -@sbuild.parse -def mtc1(arg1, arg2): - arg2 = arg1 - -@sbuild.parse -def tlbwi(): - "TODO XXX" - -@sbuild.parse -def tlbp(): - "TODO XXX" - -def ins(ir, instr, a, b, c, d): - e = [] - pos = int(c) - l = int(d) - - my_slices = [] - if pos != 0: - my_slices.append((a[:pos], 0, pos)) - if l != 0: - my_slices.append((b[:l], pos, pos+l)) - if pos + l != 32: - my_slices.append((a[pos+l:], pos+l, 32)) - r = m2_expr.ExprCompose(my_slices) - e.append(m2_expr.ExprAssign(a, r)) - return e, [] - - -@sbuild.parse -def lwc1(arg1, arg2): - arg1 = ('mem_%.2d_to_single' % arg2.size)(arg2) - -@sbuild.parse -def swc1(arg1, arg2): - arg2 = ('single_to_mem_%.2d' % arg1.size)(arg1) - -@sbuild.parse -def c_lt_d(arg1, arg2, arg3): - arg1 = 'fcomp_lt'(arg2, arg3) - -@sbuild.parse -def c_eq_d(arg1, arg2, arg3): - arg1 = 'fcomp_eq'(arg2, arg3) - -@sbuild.parse -def c_le_d(arg1, arg2, arg3): - arg1 = 'fcomp_le'(arg2, arg3) - -@sbuild.parse -def bc1t(arg1, arg2): - dst_o = arg2 if arg1 else ExprLoc(ir.get_next_break_loc_key(instr), ir.IRDst.size) - PC = dst_o - ir.IRDst = dst_o - -@sbuild.parse -def bc1f(arg1, arg2): - dst_o = ExprLoc(ir.get_next_break_loc_key(instr), ir.IRDst.size) if arg1 else arg2 - PC = dst_o - ir.IRDst = dst_o - -@sbuild.parse -def cvt_d_w(arg1, arg2): - # TODO XXX - arg1 = 'flt_d_w'(arg2) - -@sbuild.parse -def mult(arg1, arg2): - """Multiplies (signed) @arg1 by @arg2 and stores the result in $R_HI:$R_LO""" - size = arg1.size - result = arg1.signExtend(size * 2) * arg2.signExtend(size * 2) - R_LO = result[:32] - R_HI = result[32:] - -@sbuild.parse -def multu(arg1, arg2): - """Multiplies (unsigned) @arg1 by @arg2 and stores the result in $R_HI:$R_LO""" - size = arg1.size - result = arg1.zeroExtend(size * 2) * arg2.zeroExtend(size * 2) - R_LO = result[:32] - R_HI = result[32:] - -@sbuild.parse -def div(arg1, arg2): - """Divide (signed) @arg1 by @arg2 and stores the remaining/result in $R_HI/$R_LO""" - R_LO = ExprOp('sdiv' ,arg1, arg2) - R_HI = ExprOp('smod', arg1, arg2) - -@sbuild.parse -def divu(arg1, arg2): - """Divide (unsigned) @arg1 by @arg2 and stores the remaining/result in $R_HI/$R_LO""" - R_LO = ExprOp('udiv', arg1, arg2) - R_HI = ExprOp('umod', arg1, arg2) - -@sbuild.parse -def mfhi(arg1): - "The contents of register $R_HI are moved to the specified register @arg1." - arg1 = R_HI - -@sbuild.parse -def mflo(arg1): - "The contents of register R_LO are moved to the specified register @arg1." - arg1 = R_LO - -@sbuild.parse -def di(arg1): - "NOP" - -@sbuild.parse -def ei(arg1): - "NOP" - -@sbuild.parse -def ehb(arg1): - "NOP" - - -def teq(ir, instr, arg1, arg2): - e = [] - - loc_except, loc_except_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) - loc_next = ir.get_next_loc_key(instr) - loc_next_expr = m2_expr.ExprLoc(loc_next, ir.IRDst.size) - - do_except = [] - do_except.append(m2_expr.ExprAssign(exception_flags, m2_expr.ExprInt( - EXCEPT_DIV_BY_ZERO, exception_flags.size))) - do_except.append(m2_expr.ExprAssign(ir.IRDst, loc_next_expr)) - blk_except = IRBlock(loc_except.index, [AssignBlock(do_except, instr)]) - - cond = arg1 - arg2 - - - e = [] - e.append(m2_expr.ExprAssign(ir.IRDst, - m2_expr.ExprCond(cond, loc_next_expr, loc_except_expr))) - - return e, [blk_except] - - -mnemo_func = sbuild.functions -mnemo_func.update({ - 'add.d': add_d, - 'addu': addiu, - 'addi': addiu, - 'and': l_and, - 'andi': l_and, - 'b': l_b, - 'c.eq.d': c_eq_d, - 'c.le.d': c_le_d, - 'c.lt.d': c_lt_d, - 'cvt.d.w': cvt_d_w, - 'div.d': div_d, - 'ins': ins, - 'jr': j, - 'mov.d': mov_d, - 'mul.d': mul_d, - 'or': l_or, - 'ori': l_or, - 'slti': slt, - 'sltiu': sltu, - 'sub.d': sub_d, - 'subu': l_sub, - 'xor': l_xor, - 'xori': l_xor, - 'teq': teq, -}) - -def get_mnemo_expr(ir, instr, *args): - instr, extra_ir = mnemo_func[instr.name.lower()](ir, instr, *args) - return instr, extra_ir - -class ir_mips32l(IntermediateRepresentation): - - def __init__(self, loc_db=None): - IntermediateRepresentation.__init__(self, mn_mips32, 'l', loc_db) - self.pc = mn_mips32.getpc() - self.sp = mn_mips32.getsp() - self.IRDst = m2_expr.ExprId('IRDst', 32) - self.addrsize = 32 - - def get_ir(self, instr): - args = instr.args - instr_ir, extra_ir = get_mnemo_expr(self, instr, *args) - - fixed_regs = { - self.pc: m2_expr.ExprInt(instr.offset + 4, 32), - ZERO: m2_expr.ExprInt(0, 32) - } - - instr_ir = [m2_expr.ExprAssign(expr.dst, expr.src.replace_expr(fixed_regs)) - for expr in instr_ir] - - new_extra_ir = [irblock.modify_exprs(mod_src=lambda expr: expr.replace_expr(fixed_regs)) - for irblock in extra_ir] - return instr_ir, new_extra_ir - - def get_next_instr(self, instr): - return self.loc_db.get_or_create_offset_location(instr.offset + 4) - - def get_next_break_loc_key(self, instr): - return self.loc_db.get_or_create_offset_location(instr.offset + 8) - -class ir_mips32b(ir_mips32l): - def __init__(self, loc_db=None): - self.addrsize = 32 - IntermediateRepresentation.__init__(self, mn_mips32, 'b', loc_db) - self.pc = mn_mips32.getpc() - self.sp = mn_mips32.getsp() - self.IRDst = m2_expr.ExprId('IRDst', 32) diff --git a/miasm2/arch/msp430/__init__.py b/miasm2/arch/msp430/__init__.py deleted file mode 100644 index bbad893b..00000000 --- a/miasm2/arch/msp430/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__all__ = ["arch", "disasm", "regs", "sem"] diff --git a/miasm2/arch/msp430/arch.py b/miasm2/arch/msp430/arch.py deleted file mode 100644 index fd31f6c4..00000000 --- a/miasm2/arch/msp430/arch.py +++ /dev/null @@ -1,587 +0,0 @@ -#-*- coding:utf-8 -*- - -from builtins import range - -import logging -from pyparsing import * -from miasm2.expression.expression import * -from miasm2.core.cpu import * -from collections import defaultdict -from miasm2.core.bin_stream import bin_stream -import miasm2.arch.msp430.regs as regs_module -from miasm2.arch.msp430.regs import * -from miasm2.core.asm_ast import AstInt, AstId, AstMem, AstOp - -log = logging.getLogger("msp430dis") -console_handler = logging.StreamHandler() -console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) -log.addHandler(console_handler) -log.setLevel(logging.DEBUG) - -conditional_branch = ['jnz', 'jz', 'jnc', 'jc', - 'jn', 'jge', 'jl'] -unconditional_branch = ['jmp'] - -def cb_deref_nooff(tokens): - assert len(tokens) == 1 - result = AstMem(tokens[0], 16) - return result - - -def cb_deref_pinc(tokens): - assert len(tokens) == 1 - - result = AstOp('autoinc', *tokens) - return result - - -def cb_deref_off(tokens): - assert len(tokens) == 2 - result = AstMem(tokens[1] + tokens[0], 16) - return result - - -def cb_expr(tokens): - assert(len(tokens) == 1) - result = tokens[0] - return result - - -ARO = Suppress("@") -LPARENT = Suppress("(") -RPARENT = Suppress(")") - -PINC = Suppress("+") - -deref_nooff = (ARO + base_expr).setParseAction(cb_deref_nooff) -deref_pinc = (ARO + base_expr + PINC).setParseAction(cb_deref_pinc) -deref_off = (base_expr + LPARENT + gpregs.parser + RPARENT).setParseAction(cb_deref_off) -sreg_p = (deref_pinc | deref_nooff | deref_off | base_expr).setParseAction(cb_expr) - - - -class msp430_arg(m_arg): - def asm_ast_to_expr(self, value, loc_db): - if isinstance(value, AstId): - name = value.name - if isinstance(name, Expr): - return name - assert isinstance(name, str) - if name in gpregs.str: - index = gpregs.str.index(name) - reg = gpregs.expr[index] - return reg - loc_key = loc_db.get_or_create_name_location(value.name.encode()) - return ExprLoc(loc_key, 16) - if isinstance(value, AstOp): - args = [self.asm_ast_to_expr(tmp, loc_db) for tmp in value.args] - if None in args: - return None - return ExprOp(value.op, *args) - if isinstance(value, AstInt): - return ExprInt(value.value, 16) - if isinstance(value, AstMem): - ptr = self.asm_ast_to_expr(value.ptr, loc_db) - if ptr is None: - return None - return ExprMem(ptr, value.size) - return None - - -class additional_info(object): - - def __init__(self): - self.except_on_instr = False - - -class instruction_msp430(instruction): - __slots__ = [] - delayslot = 0 - - def dstflow(self): - if self.name.startswith('j'): - return True - return self.name in ['call'] - - @staticmethod - def arg2str(expr, index=None, loc_db=None): - if isinstance(expr, ExprId): - o = str(expr) - elif isinstance(expr, ExprInt): - o = str(expr) - elif expr.is_loc(): - if loc_db is not None: - return loc_db.pretty_str(expr.loc_key) - else: - return str(expr) - elif isinstance(expr, ExprOp) and expr.op == "autoinc": - o = "@%s+" % str(expr.args[0]) - elif isinstance(expr, ExprMem): - if isinstance(expr.ptr, ExprId): - if index == 0: - o = "@%s" % expr.ptr - else: - o = "0x0(%s)" % expr.ptr - elif isinstance(expr.ptr, ExprInt): - o = "@%s" % expr.ptr - elif isinstance(expr.ptr, ExprOp): - o = "%s(%s)" % (expr.ptr.args[1], expr.ptr.args[0]) - else: - raise NotImplementedError('unknown instance expr = %s' % type(expr)) - return o - - - def dstflow2label(self, loc_db): - expr = self.args[0] - if not isinstance(expr, ExprInt): - return - if self.name == "call": - addr = expr.arg - else: - addr = expr.arg + int(self.offset) - - loc_key = loc_db.get_or_create_offset_location(addr) - self.args[0] = ExprLoc(loc_key, expr.size) - - def breakflow(self): - if self.name in conditional_branch + unconditional_branch: - return True - if self.name.startswith('ret'): - return True - if self.name.startswith('int'): - return True - if self.name.startswith('mov') and self.args[1] == PC: - return True - return self.name in ['call'] - - def splitflow(self): - if self.name in conditional_branch: - return True - if self.name in unconditional_branch: - return False - return self.name in ['call'] - - def setdstflow(self, a): - return - - def is_subcall(self): - return self.name in ['call'] - - def getdstflow(self, loc_db): - return [self.args[0]] - - def get_symbol_size(self, symbol, loc_db): - return 16 - - def fixDstOffset(self): - e = self.args[0] - if self.offset is None: - raise ValueError('symbol not resolved %s' % l) - if not isinstance(e, ExprInt): - # raise ValueError('dst must be int or label') - log.warning('dynamic dst %r', e) - return - - # Call argument is an absolute offset - # Other offsets are relative to instruction offset - if self.name != "call": - self.args[0] = ExprInt(int(e) - self.offset, 16) - - def get_info(self, c): - pass - - def __str__(self): - o = super(instruction_msp430, self).__str__() - return o - - def get_args_expr(self): - args = [] - for a in self.args: - args.append(a) - return args - - -mode_msp430 = None - - -class mn_msp430(cls_mn): - name = "msp430" - regs = regs_module - all_mn = [] - bintree = {} - num = 0 - delayslot = 0 - pc = {None: PC} - sp = {None: SP} - all_mn_mode = defaultdict(list) - all_mn_name = defaultdict(list) - all_mn_inst = defaultdict(list) - instruction = instruction_msp430 - max_instruction_len = 8 - - @classmethod - def getpc(cls, attrib): - return PC - - @classmethod - def getsp(cls, attrib): - return SP - - @classmethod - def check_mnemo(cls, fields): - l = sum([x.l for x in fields]) - assert l % 16 == 00, "len %r" % l - - @classmethod - def getbits(cls, bs, attrib, start, n): - if not n: - return 0 - o = 0 - if n > bs.getlen() * 8: - raise ValueError('not enough bits %r %r' % (n, len(bs.bin) * 8)) - while n: - i = start // 8 - c = cls.getbytes(bs, i) - if not c: - raise IOError - c = ord(c) - r = 8 - start % 8 - c &= (1 << r) - 1 - l = min(r, n) - c >>= (r - l) - o <<= l - o |= c - n -= l - start += l - return o - - @classmethod - def getbytes(cls, bs, offset, l=1): - out = b"" - for _ in range(l): - n_offset = (offset & ~1) + 1 - offset % 2 - out += bs.getbytes(n_offset, 1) - offset += 1 - return out - - def decoded2bytes(self, result): - tmp = super(mn_msp430, self).decoded2bytes(result) - out = [] - for x in tmp: - o = b"" - while x: - o += x[:2][::-1] - x = x[2:] - out.append(o) - return out - - @classmethod - def gen_modes(cls, subcls, name, bases, dct, fields): - dct['mode'] = None - return [(subcls, name, bases, dct, fields)] - - def additional_info(self): - info = additional_info() - return info - - @classmethod - def getmn(cls, name): - return name.upper() - - def reset_class(self): - super(mn_msp430, self).reset_class() - - def getnextflow(self, loc_db): - raise NotImplementedError('not fully functional') - - -def addop(name, fields, args=None, alias=False): - dct = {"fields": fields} - dct["alias"] = alias - if args is not None: - dct['args'] = args - type(name, (mn_msp430,), dct) - - -class bw_mn(bs_mod_name): - prio = 5 - mn_mod = ['.w', '.b'] - - -class msp430_sreg_arg(reg_noarg, msp430_arg): - prio = default_prio + 1 - reg_info = gpregs - parser = sreg_p - - def decode(self, v): - size = 16 - if hasattr(self.parent, 'size'): - size = [16, 8][self.parent.size.value] - v = v & self.lmask - e = self.reg_info.expr[v] - if self.parent.a_s.value == 0b00: - if e == R3: - self.expr = ExprInt(0, size) - else: - self.expr = e - elif self.parent.a_s.value == 0b01: - if e == SR: - self.expr = ExprMem(ExprInt(self.parent.off_s.value, 16), size) - elif e == R3: - self.expr = ExprInt(1, size) - else: - self.expr = ExprMem( - e + ExprInt(self.parent.off_s.value, 16), size) - elif self.parent.a_s.value == 0b10: - if e == SR: - self.expr = ExprInt(4, size) - elif e == R3: - self.expr = ExprInt(2, size) - else: - self.expr = ExprMem(e, size) - elif self.parent.a_s.value == 0b11: - if e == SR: - self.expr = ExprInt(8, size) - elif e == R3: - if self.parent.size.value == 0: - self.expr = ExprInt(0xffff, size) - else: - self.expr = ExprInt(0xff, size) - elif e == PC: - self.expr = ExprInt(self.parent.off_s.value, size) - else: - self.expr = ExprOp('autoinc', e) - else: - raise NotImplementedError( - "unknown value self.parent.a_s.value = " + - "%d" % self.parent.a_s.value) - return True - - def encode(self): - e = self.expr - if e in self.reg_info.expr: - self.parent.a_s.value = 0 - self.value = self.reg_info.expr.index(e) - elif isinstance(e, ExprInt): - v = int(e) - if v == 0xffff and self.parent.size.value == 0: - self.parent.a_s.value = 0b11 - self.value = 3 - elif v == 0xff and self.parent.size.value == 1: - self.parent.a_s.value = 0b11 - self.value = 3 - elif v == 2: - self.parent.a_s.value = 0b10 - self.value = 3 - elif v == 1: - self.parent.a_s.value = 0b01 - self.value = 3 - elif v == 8: - self.parent.a_s.value = 0b11 - self.value = 2 - elif v == 4: - self.parent.a_s.value = 0b10 - self.value = 2 - elif v == 0: - self.parent.a_s.value = 0b00 - self.value = 3 - else: - self.parent.a_s.value = 0b11 - self.value = 0 - self.parent.off_s.value = v - elif isinstance(e, ExprMem): - if isinstance(e.ptr, ExprId): - self.parent.a_s.value = 0b10 - self.value = self.reg_info.expr.index(e.ptr) - elif isinstance(e.ptr, ExprInt): - self.parent.a_s.value = 0b01 - self.value = self.reg_info.expr.index(SR) - self.parent.off_s.value = int(e.ptr) - elif isinstance(e.ptr, ExprOp): - self.parent.a_s.value = 0b01 - self.value = self.reg_info.expr.index(e.ptr.args[0]) - self.parent.off_s.value = int(e.ptr.args[1]) - else: - raise NotImplementedError( - 'unknown instance e.ptr = %s' % type(e.ptr)) - elif isinstance(e, ExprOp) and e.op == "autoinc": - self.parent.a_s.value = 0b11 - self.value = self.reg_info.expr.index(e.args[0]) - else: - raise NotImplementedError('unknown instance e = %s' % type(e)) - return True - - -class msp430_dreg_arg(msp430_sreg_arg): - prio = default_prio + 1 - reg_info = gpregs - parser = sreg_p - - def decode(self, v): - if hasattr(self.parent, 'size'): - size = [16, 8][self.parent.size.value] - else: - size = 16 - - v = v & self.lmask - e = self.reg_info.expr[v] - if self.parent.a_d.value == 0: - self.expr = e - elif self.parent.a_d.value == 1: - if e == SR: - x = ExprInt(self.parent.off_d.value, 16) - else: - x = e + ExprInt(self.parent.off_d.value, 16) - self.expr = ExprMem(x, size) - else: - raise NotImplementedError( - "unknown value self.parent.a_d.value = " + - "%d" % self.parent.a_d.value) - return True - - def encode(self): - e = self.expr - if e in self.reg_info.expr: - self.parent.a_d.value = 0 - self.value = self.reg_info.expr.index(e) - elif isinstance(e, ExprMem): - if isinstance(e.ptr, ExprId): - r, i = e.ptr, ExprInt(0, 16) - elif isinstance(e.ptr, ExprOp): - r, i = e.ptr.args[0], e.ptr.args[1] - elif isinstance(e.ptr, ExprInt): - r, i = SR, e.ptr - else: - raise NotImplementedError( - 'unknown instance e.arg = %s' % type(e.ptr)) - self.parent.a_d.value = 1 - self.value = self.reg_info.expr.index(r) - self.parent.off_d.value = int(i) - else: - raise NotImplementedError('unknown instance e = %s' % type(e)) - return True - -class bs_cond_off_s(bs_cond): - - @classmethod - def flen(cls, mode, v): - if v['a_s'] == 0b00: - return None - elif v['a_s'] == 0b01: - if v['sreg'] in [3]: - return None - else: - return 16 - elif v['a_s'] == 0b10: - return None - elif v['a_s'] == 0b11: - """ - if v['sreg'] in [2, 3]: - return None - else: - return 16 - """ - if v['sreg'] in [0]: - return 16 - else: - return None - else: - raise NotImplementedError("unknown value v[a_s] = %d" % v['a_s']) - - def encode(self): - return super(bs_cond_off_s, self).encode() - - def decode(self, v): - if self.l == 0: - self.value = None - self.value = v - return True - - -class bs_cond_off_d(bs_cond_off_s): - - @classmethod - def flen(cls, mode, v): - if v['a_d'] == 0: - return None - elif v['a_d'] == 1: - return 16 - else: - raise NotImplementedError("unknown value v[a_d] = %d" % v['a_d']) - - -class msp430_offs(imm_noarg, msp430_arg): - parser = base_expr - - def int2expr(self, v): - if v & ~self.intmask != 0: - return None - return ExprInt(v, 16) - - def decodeval(self, v): - v <<= 1 - v += self.parent.l - return v - - def encodeval(self, v): - plen = self.parent.l + self.l - assert(plen % 8 == 0) - v -= plen // 8 - if v % 2 != 0: - return False - return v >> 1 - - def decode(self, v): - v = v & self.lmask - if (1 << (self.l - 1)) & v: - v |= ~0 ^ self.lmask - v = self.decodeval(v) - self.expr = ExprInt(v, 16) - return True - - def encode(self): - if not isinstance(self.expr, ExprInt): - return False - v = int(self.expr) - if (1 << (self.l - 1)) & v: - v = -((0xffff ^ v) + 1) - v = self.encodeval(v) - self.value = (v & 0xffff) & self.lmask - return True - - -off_s = bs(l=16, order=-10, cls=(bs_cond_off_s,), fname = "off_s") -off_d = bs(l=16, order=-10, cls=(bs_cond_off_d,), fname = "off_d") - -a_s = bs(l=2, order=-4, fname='a_s') -a_d = bs(l=1, order=-6, fname='a_d') - -a_d2 = bs(l=2, order=-2, fname='a_d') - -sreg = bs(l=4, order=-3, cls=(msp430_sreg_arg,), fname='sreg') -dreg = bs(l=4, order=-5, cls=(msp430_dreg_arg,), fname='dreg') - -bw = bw_mn(l=1, order=-10, mn_mod=['.w', '.b'], fname='size') - -bs_f1 = bs_name( - l=4, name={ - 'mov': 4, 'add': 5, 'addc': 6, 'subc': 7, 'sub': 8, 'cmp': 9, - 'dadd': 10, 'bit': 11, 'bic': 12, 'bis': 13, 'xor': 14, 'and': 15}) -addop("f1", [bs_f1, sreg, a_d, bw, a_s, dreg, off_s, off_d]) - -bs_f2 = bs_name(l=3, name={'rrc': 0, 'rra': 2, - 'push': 4}) -addop("f2_1", [bs('000100'), bs_f2, bw, a_s, sreg, off_s]) - - -bs_f2_nobw = bs_name(l=3, name={'swpb': 1, 'sxt': 3, - 'call': 5}) -addop("f2_2", [bs('000100'), bs_f2_nobw, bs('0'), a_s, sreg, off_s]) - -# Offset must be decoded in last position to have final instruction len -offimm = bs(l=10, cls=(msp430_offs,), fname="offs", order=-1) - -bs_f2_jcc = bs_name(l=3, name={'jnz': 0, 'jz': 1, 'jnc': 2, 'jc': 3, 'jn': 4, - 'jge': 5, 'jl': 6, 'jmp': 7}) -addop("f2_3", [bs('001'), bs_f2_jcc, offimm]) - diff --git a/miasm2/arch/msp430/ctype.py b/miasm2/arch/msp430/ctype.py deleted file mode 100644 index 464adaf8..00000000 --- a/miasm2/arch/msp430/ctype.py +++ /dev/null @@ -1,68 +0,0 @@ -from miasm2.core.objc import CLeafTypes, ObjCDecl, PADDING_TYPE_NAME -from miasm2.core.ctypesmngr import CTypeId, CTypePtr - - -class CTypeMSP430_unk(CLeafTypes): - """Define C types sizes/alignment for msp430 architecture""" - - obj_pad = ObjCDecl(PADDING_TYPE_NAME, 1, 1) # __padding__ is size 1/align 1 - - obj_char = ObjCDecl("char", 1, 1) - obj_short = ObjCDecl("short", 2, 2) - obj_int = ObjCDecl("int", 2, 2) - obj_long = ObjCDecl("long", 2, 2) - - obj_uchar = ObjCDecl("uchar", 1, 1) - obj_ushort = ObjCDecl("ushort", 2, 2) - obj_uint = ObjCDecl("uint", 2, 2) - obj_ulong = ObjCDecl("ulong", 2, 2) - - obj_void = ObjCDecl("void", 1, 1) - - obj_enum = ObjCDecl("enum", 2, 2) - - obj_float = ObjCDecl("float", 4, 4) - obj_double = ObjCDecl("double", 8, 8) - obj_ldouble = ObjCDecl("ldouble", 16, 16) - - def __init__(self): - self.types = { - CTypeId(PADDING_TYPE_NAME): self.obj_pad, - - CTypeId('char'): self.obj_char, - CTypeId('short'): self.obj_short, - CTypeId('int'): self.obj_int, - CTypeId('void'): self.obj_void, - CTypeId('long',): self.obj_long, - CTypeId('float'): self.obj_float, - CTypeId('double'): self.obj_double, - - CTypeId('signed', 'char'): self.obj_char, - CTypeId('unsigned', 'char'): self.obj_uchar, - - CTypeId('short', 'int'): self.obj_short, - CTypeId('signed', 'short'): self.obj_short, - CTypeId('signed', 'short', 'int'): self.obj_short, - CTypeId('unsigned', 'short'): self.obj_ushort, - CTypeId('unsigned', 'short', 'int'): self.obj_ushort, - - CTypeId('unsigned', ): self.obj_uint, - CTypeId('unsigned', 'int'): self.obj_uint, - CTypeId('signed', 'int'): self.obj_int, - - CTypeId('long', 'int'): self.obj_long, - CTypeId('long', 'long'): self.obj_long, - CTypeId('long', 'long', 'int'): self.obj_long, - CTypeId('signed', 'long', 'long'): self.obj_long, - CTypeId('unsigned', 'long', 'long'): self.obj_ulong, - CTypeId('signed', 'long', 'long', 'int'): self.obj_long, - CTypeId('unsigned', 'long', 'long', 'int'): self.obj_ulong, - - CTypeId('signed', 'long'): self.obj_long, - CTypeId('unsigned', 'long'): self.obj_ulong, - CTypeId('signed', 'long', 'int'): self.obj_long, - CTypeId('unsigned', 'long', 'int'): self.obj_ulong, - - CTypeId('long', 'double'): self.obj_ldouble, - CTypePtr(CTypeId('void')): self.obj_uint, - } diff --git a/miasm2/arch/msp430/disasm.py b/miasm2/arch/msp430/disasm.py deleted file mode 100644 index 849cd675..00000000 --- a/miasm2/arch/msp430/disasm.py +++ /dev/null @@ -1,8 +0,0 @@ -from miasm2.core.asmblock import disasmEngine -from miasm2.arch.msp430.arch import mn_msp430 - - -class dis_msp430(disasmEngine): - - def __init__(self, bs=None, **kwargs): - super(dis_msp430, self).__init__(mn_msp430, None, bs, **kwargs) diff --git a/miasm2/arch/msp430/ira.py b/miasm2/arch/msp430/ira.py deleted file mode 100644 index 5b19956e..00000000 --- a/miasm2/arch/msp430/ira.py +++ /dev/null @@ -1,31 +0,0 @@ -#-*- coding:utf-8 -*- - -from miasm2.ir.analysis import ira -from miasm2.arch.msp430.sem import ir_msp430 -from miasm2.ir.ir import AssignBlock -from miasm2.expression.expression import * - -class ir_a_msp430_base(ir_msp430, ira): - - def __init__(self, loc_db=None): - ir_msp430.__init__(self, loc_db) - self.ret_reg = self.arch.regs.R15 - - def call_effects(self, addr, instr): - call_assignblk = AssignBlock( - [ - ExprAssign(self.ret_reg, ExprOp('call_func_ret', addr, self.sp, self.arch.regs.R15)), - ExprAssign(self.sp, ExprOp('call_func_stack', addr, self.sp)) - ], - instr - ) - return [call_assignblk], [] - -class ir_a_msp430(ir_a_msp430_base): - - def __init__(self, loc_db=None): - ir_a_msp430_base.__init__(self, loc_db) - - def get_out_regs(self, _): - return set([self.ret_reg, self.sp]) - diff --git a/miasm2/arch/msp430/jit.py b/miasm2/arch/msp430/jit.py deleted file mode 100644 index e4d04f9f..00000000 --- a/miasm2/arch/msp430/jit.py +++ /dev/null @@ -1,42 +0,0 @@ -from miasm2.jitter.jitload import Jitter -from miasm2.core.locationdb import LocationDB -from miasm2.core.utils import pck16, upck16 -from miasm2.arch.msp430.sem import ir_msp430 - -import logging - -log = logging.getLogger('jit_msp430') -hnd = logging.StreamHandler() -hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) -log.addHandler(hnd) -log.setLevel(logging.CRITICAL) - -class jitter_msp430(Jitter): - - def __init__(self, *args, **kwargs): - sp = LocationDB() - Jitter.__init__(self, ir_msp430(sp), *args, **kwargs) - self.vm.set_little_endian() - - def push_uint16_t(self, value): - regs = self.cpu.get_gpreg() - regs['SP'] -= 2 - self.cpu.set_gpreg(regs) - self.vm.set_mem(regs['SP'], pck16(value)) - - def pop_uint16_t(self): - regs = self.cpu.get_gpreg() - value = self.vm.get_u16(regs['SP']) - regs['SP'] += 2 - self.cpu.set_gpreg(regs) - return value - - def get_stack_arg(self, index): - regs = self.cpu.get_gpreg() - value = self.vm.get_u16(regs['SP'] + 2 * index) - return value - - def init_run(self, *args, **kwargs): - Jitter.init_run(self, *args, **kwargs) - self.cpu.PC = self.pc - diff --git a/miasm2/arch/msp430/regs.py b/miasm2/arch/msp430/regs.py deleted file mode 100644 index a3e714b2..00000000 --- a/miasm2/arch/msp430/regs.py +++ /dev/null @@ -1,116 +0,0 @@ -from builtins import range -from miasm2.expression.expression import * -from miasm2.core.cpu import reg_info - - -# GP - -regs16_str = ["PC", "SP", "SR"] + ["R%d" % i for i in range(3, 16)] -regs16_expr = [ExprId(x, 16) for x in regs16_str] - -exception_flags = ExprId('exception_flags', 32) - -gpregs = reg_info(regs16_str, regs16_expr) - -PC = regs16_expr[0] -SP = regs16_expr[1] -SR = regs16_expr[2] -R3 = regs16_expr[3] -R4 = regs16_expr[4] -R5 = regs16_expr[5] -R6 = regs16_expr[6] -R7 = regs16_expr[7] -R8 = regs16_expr[8] -R9 = regs16_expr[9] -R10 = regs16_expr[10] -R11 = regs16_expr[11] -R12 = regs16_expr[12] -R13 = regs16_expr[13] -R14 = regs16_expr[14] -R15 = regs16_expr[15] - -PC_init = ExprId("PC_init", 16) -SP_init = ExprId("SP_init", 16) -SR_init = ExprId("SR_init", 16) -R3_init = ExprId("R3_init", 16) -R4_init = ExprId("R4_init", 16) -R5_init = ExprId("R5_init", 16) -R6_init = ExprId("R6_init", 16) -R7_init = ExprId("R7_init", 16) -R8_init = ExprId("R8_init", 16) -R9_init = ExprId("R9_init", 16) -R10_init = ExprId("R10_init", 16) -R11_init = ExprId("R11_init", 16) -R12_init = ExprId("R12_init", 16) -R13_init = ExprId("R13_init", 16) -R14_init = ExprId("R14_init", 16) -R15_init = ExprId("R15_init", 16) - - -reg_zf = 'zf' -reg_nf = 'nf' -reg_of = 'of' -reg_cf = 'cf' -reg_cpuoff = 'cpuoff' -reg_gie = 'gie' -reg_osc = 'osc' -reg_scg0 = 'scg0' -reg_scg1 = 'scg1' -reg_res = 'res' - -zf = ExprId(reg_zf, size=1) -nf = ExprId(reg_nf, size=1) -of = ExprId(reg_of, size=1) -cf = ExprId(reg_cf, size=1) - -cpuoff = ExprId(reg_cpuoff, size=1) -gie = ExprId(reg_gie, size=1) -osc = ExprId(reg_osc, size=1) -scg0 = ExprId(reg_scg0, size=1) -scg1 = ExprId(reg_scg1, size=1) -res = ExprId(reg_res, size=7) - - -zf_init = ExprId("zf_init", size=1) -nf_init = ExprId("nf_init", size=1) -of_init = ExprId("of_init", size=1) -cf_init = ExprId("cf_init", size=1) - - -cpuoff_init = ExprId("cpuoff_init", size=1) -gie_init = ExprId("gie_init", size=1) -osc_init = ExprId("osc_init", size=1) -scg0_init = ExprId("scg0_init", size=1) -scg1_init = ExprId("scg1_init", size=1) -res_init = ExprId("res_init", size=7) - - -all_regs_ids = [ - PC, SP, SR, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, - zf, nf, of, cf, - cpuoff, gie, osc, scg0, scg1, res, -] - -all_regs_ids_no_alias = all_regs_ids - -attrib_to_regs = { - 'l': all_regs_ids_no_alias, - 'b': all_regs_ids_no_alias, -} - -all_regs_ids_byname = dict([(x.name, x) for x in all_regs_ids]) - -all_regs_ids_init = [PC_init, SP_init, SR_init, R3_init, - R4_init, R5_init, R6_init, R7_init, - R8_init, R9_init, R10_init, R11_init, - R12_init, R13_init, R14_init, R15_init, - zf_init, nf_init, of_init, cf_init, - cpuoff_init, gie_init, osc_init, - scg0_init, scg1_init, res_init, - ] - -regs_init = {} -for i, r in enumerate(all_regs_ids): - regs_init[r] = all_regs_ids_init[i] - -regs_flt_expr = [] diff --git a/miasm2/arch/msp430/sem.py b/miasm2/arch/msp430/sem.py deleted file mode 100644 index b939c224..00000000 --- a/miasm2/arch/msp430/sem.py +++ /dev/null @@ -1,509 +0,0 @@ -#-*- coding:utf-8 -*- - -from miasm2.expression.expression import * -from miasm2.arch.msp430.regs import * -from miasm2.arch.msp430.arch import mn_msp430 -from miasm2.ir.ir import IntermediateRepresentation - - -# Utils -def hex2bcd(val): - "Return val as BCD" - try: - return int("%x" % val, 10) - except ValueError: - raise NotImplementedError("Not defined behaviour") - - -def bcd2hex(val): - "Return the hex value of a BCD" - try: - return int("0x%d" % val, 16) - except ValueError: - raise NotImplementedError("Not defined behaviour") - - -def reset_sr_res(): - return [ExprAssign(res, ExprInt(0, 7))] - - -def update_flag_cf_inv_zf(a): - return [ExprAssign(cf, ExprCond(a, ExprInt(1, 1), ExprInt(0, 1)))] - - -def update_flag_zf_eq(a, b): - return [ExprAssign(zf, ExprOp("FLAG_EQ_CMP", a, b))] - - -def update_flag_zf(a): - return [ExprAssign(zf, ExprOp("FLAG_EQ", a))] - - -def update_flag_nf(arg): - return [ - ExprAssign( - nf, - ExprOp("FLAG_SIGN_SUB", arg, ExprInt(0, arg.size)) - ) - ] - - -def update_flag_add_cf(op1, op2, res): - "Compute cf in @res = @op1 + @op2" - return [ExprAssign(cf, ExprOp("FLAG_ADD_CF", op1, op2))] - - -def update_flag_add_of(op1, op2, res): - "Compute of in @res = @op1 + @op2" - return [ExprAssign(of, ExprOp("FLAG_ADD_OF", op1, op2))] - - -# checked: ok for sbb add because b & c before +cf -def update_flag_sub_cf(op1, op2, res): - "Compote CF in @op1 - @op2" - return [ExprAssign(cf, ExprOp("FLAG_SUB_CF", op1, op2) ^ ExprInt(1, 1))] - - -def update_flag_sub_of(op1, op2, res): - "Compote OF in @res = @op1 - @op2" - return [ExprAssign(of, ExprOp("FLAG_SUB_OF", op1, op2))] - - -def update_flag_arith_sub_zn(arg1, arg2): - """ - Compute znp flags for (arg1 - arg2) - """ - e = [] - e += update_flag_zf_eq(arg1, arg2) - e += [ExprAssign(nf, ExprOp("FLAG_SIGN_SUB", arg1, arg2))] - return e - - -def update_flag_arith_add_zn(arg1, arg2): - """ - Compute zf and nf flags for (arg1 + arg2) - """ - e = [] - e += update_flag_zf_eq(arg1, -arg2) - e += [ExprAssign(nf, ExprOp("FLAG_SIGN_SUB", arg1, -arg2))] - return e - - - -def mng_autoinc(a, b, size): - e = [] - if not (isinstance(a, ExprOp) and a.op == "autoinc"): - return e, a, b - - a_r = a.args[0] - e.append(ExprAssign(a_r, a_r + ExprInt(size // 8, a_r.size))) - a = ExprMem(a_r, size) - if isinstance(b, ExprMem) and a_r in b.arg: - b = ExprMem(b.arg + ExprInt(size // 8, 16), b.size) - return e, a, b - -# Mnemonics - - -def mov_b(ir, instr, a, b): - e, a, b = mng_autoinc(a, b, 8) - if isinstance(b, ExprMem): - b = ExprMem(b.arg, 8) - a = a[:8] - else: - a = a[:8].zeroExtend(16) - e.append(ExprAssign(b, a)) - return e, [] - - -def mov_w(ir, instr, a, b): - e, a, b = mng_autoinc(a, b, 16) - e.append(ExprAssign(b, a)) - if b == ir.pc: - e.append(ExprAssign(ir.IRDst, a)) - return e, [] - - -def and_b(ir, instr, a, b): - e, arg1, arg2 = mng_autoinc(a, b, 8) - arg1, arg2 = arg1[:8], arg2[:8] - res = arg1 & arg2 - e.append(ExprAssign(b, res.zeroExtend(16))) - - e += [ExprAssign(zf, ExprOp('FLAG_EQ_AND', arg1, arg2))] - e += [ExprAssign(nf, ExprOp("FLAG_SIGN_SUB", res, ExprInt(0, res.size)))] - e += reset_sr_res() - e += update_flag_cf_inv_zf(res) - e += [ExprAssign(of, ExprInt(0, 1))] - - return e, [] - - -def and_w(ir, instr, a, b): - e, arg1, arg2 = mng_autoinc(a, b, 16) - res = arg1 & arg2 - e.append(ExprAssign(arg2, res)) - - e += [ExprAssign(zf, ExprOp('FLAG_EQ_AND', arg1, arg2))] - e += [ExprAssign(nf, ExprOp("FLAG_SIGN_SUB", res, ExprInt(0, res.size)))] - e += reset_sr_res() - e += update_flag_cf_inv_zf(res) - e += [ExprAssign(of, ExprInt(0, 1))] - - return e, [] - - -def bic_b(ir, instr, a, b): - e, a, b = mng_autoinc(a, b, 8) - c = (a[:8] ^ ExprInt(0xff, 8)) & b[:8] - c = c.zeroExtend(b.size) - e.append(ExprAssign(b, c)) - return e, [] - - -def bic_w(ir, instr, a, b): - e, a, b = mng_autoinc(a, b, 16) - if b == SR: - # Special case - if a.is_int(1): - # cf - e.append(ExprAssign(cf, ExprInt(0, 1))) - return e, [] - c = (a ^ ExprInt(0xffff, 16)) & b - e.append(ExprAssign(b, c)) - return e, [] - - -def bis_w(ir, instr, a, b): - e, a, b = mng_autoinc(a, b, 16) - c = a | b - e.append(ExprAssign(b, c)) - return e, [] - - -def bit_w(ir, instr, a, b): - e, arg1, arg2 = mng_autoinc(a, b, 16) - res = arg1 & arg2 - - e += [ExprAssign(zf, ExprOp('FLAG_EQ_AND', arg1, arg2))] - e += [ExprAssign(nf, ExprOp("FLAG_SIGN_SUB", res, ExprInt(0, res.size)))] - e += reset_sr_res() - e += update_flag_cf_inv_zf(res) - e += [ExprAssign(of, ExprInt(0, 1))] - - return e, [] - - -def sub_w(ir, instr, a, b): - e, arg1, arg2 = mng_autoinc(a, b, 16) - res = arg2 - arg1 - - e.append(ExprAssign(b, res)) - - e += update_flag_arith_sub_zn(arg2, arg1) - e += update_flag_sub_cf(arg2, arg1, res) - e += update_flag_sub_of(arg2, arg1, res) - e += reset_sr_res() - - # micrcorruption - # e += update_flag_sub_of(a, b, c) - # e += update_flag_sub_of(b, a, c) - return e, [] - - -def add_b(ir, instr, a, b): - e, arg1, arg2 = mng_autoinc(a, b, 8) - if isinstance(arg2, ExprMem): - arg2 = ExprMem(arg2.arg, 8) - else: - arg2 = arg2[:8] - arg1 = arg1[:8] - res = arg2 + arg1 - e.append(ExprAssign(b, res)) - - e += update_flag_arith_add_zn(arg2, arg1) - e += update_flag_add_cf(arg2, arg1, res) - e += update_flag_add_of(arg2, arg1, res) - e += reset_sr_res() - - return e, [] - - -def add_w(ir, instr, a, b): - e, arg1, arg2 = mng_autoinc(a, b, 16) - res = arg2 + arg1 - e.append(ExprAssign(b, res)) - - e += update_flag_arith_add_zn(arg2, arg1) - e += update_flag_add_cf(arg2, arg1, res) - e += update_flag_add_of(arg2, arg1, res) - e += reset_sr_res() - - return e, [] - - -def dadd_w(ir, instr, a, b): - e, a, b = mng_autoinc(a, b, 16) - # TODO: microcorruption no carryflag - c = ExprOp("bcdadd", b, a) # +zeroExtend(cf, 16)) - - e.append(ExprAssign(b, c)) - - # micrcorruption - e += update_flag_zf(a) - # e += update_flag_nf(a) - e += reset_sr_res() - - e.append(ExprAssign(cf, ExprOp("bcdadd_cf", b, a))) # +zeroExtend(cf, 16)))) - - # of : undefined - return e, [] - - -def xor_w(ir, instr, a, b): - e, arg1, arg2 = mng_autoinc(a, b, 16) - res = arg2 ^ arg1 - e.append(ExprAssign(b, res)) - - e += [ExprAssign(zf, ExprOp('FLAG_EQ_CMP', arg2, arg1))] - e += update_flag_nf(res) - e += reset_sr_res() - e += update_flag_cf_inv_zf(c) - e.append(ExprAssign(of, arg2.msb() & arg1.msb())) - - return e, [] - - -def push_w(ir, instr, a): - e = [] - e.append(ExprAssign(ExprMem(SP - ExprInt(2, 16), 16), a)) - e.append(ExprAssign(SP, SP - ExprInt(2, 16))) - return e, [] - - -def call(ir, instr, a): - e, a, dummy = mng_autoinc(a, None, 16) - - loc_next = ir.get_next_loc_key(instr) - loc_next_expr = ExprLoc(loc_next, 16) - - e.append(ExprAssign(ExprMem(SP - ExprInt(2, 16), 16), loc_next_expr)) - e.append(ExprAssign(SP, SP - ExprInt(2, 16))) - e.append(ExprAssign(PC, a)) - e.append(ExprAssign(ir.IRDst, a)) - return e, [] - - -def swpb(ir, instr, a): - e = [] - x, y = a[:8], a[8:16] - e.append(ExprAssign(a, ExprCompose(y, x))) - return e, [] - - -def cmp_w(ir, instr, a, b): - e, arg1, arg2 = mng_autoinc(a, b, 16) - res = arg2 - arg1 - - e += update_flag_arith_sub_zn(arg2, arg1) - e += update_flag_sub_cf(arg2, arg1, res) - e += update_flag_sub_of(arg2, arg1, res) - e += reset_sr_res() - - return e, [] - - -def cmp_b(ir, instr, a, b): - e, arg1, arg2 = mng_autoinc(a, b, 8) - arg1, arg2 = arg1[:8], arg2[:8] - res = arg2 - arg1 - - e += update_flag_arith_sub_zn(arg2, arg1) - e += update_flag_sub_cf(arg2, arg1, res) - e += update_flag_sub_of(arg2, arg1, res) - e += reset_sr_res() - - return e, [] - - -def jz(ir, instr, a): - loc_next = ir.get_next_loc_key(instr) - loc_next_expr = ExprLoc(loc_next, 16) - e = [] - e.append(ExprAssign(PC, ExprCond(ExprOp("CC_EQ", zf), a, loc_next_expr))) - e.append(ExprAssign(ir.IRDst, ExprCond(ExprOp("CC_EQ", zf), a, loc_next_expr))) - return e, [] - - -def jnz(ir, instr, a): - loc_next = ir.get_next_loc_key(instr) - loc_next_expr = ExprLoc(loc_next, 16) - e = [] - e.append(ExprAssign(PC, ExprCond(ExprOp("CC_EQ", zf), loc_next_expr, a))) - e.append(ExprAssign(ir.IRDst, ExprCond(ExprOp("CC_EQ", zf), loc_next_expr, a))) - return e, [] - - -def jl(ir, instr, a): - loc_next = ir.get_next_loc_key(instr) - loc_next_expr = ExprLoc(loc_next, 16) - e = [] - e.append(ExprAssign(PC, ExprCond(ExprOp("CC_S<", nf, of), a, loc_next_expr))) - e.append(ExprAssign(ir.IRDst, ExprCond(ExprOp("CC_S<", nf, of), a, loc_next_expr))) - return e, [] - - -def jc(ir, instr, a): - loc_next = ir.get_next_loc_key(instr) - loc_next_expr = ExprLoc(loc_next, 16) - e = [] - e.append(ExprAssign(PC, ExprCond(ExprOp("CC_U>=", cf ^ ExprInt(1, 1)), a, loc_next_expr))) - e.append(ExprAssign(ir.IRDst, ExprCond(ExprOp("CC_U>=", cf ^ ExprInt(1, 1)), a, loc_next_expr))) - return e, [] - - -def jnc(ir, instr, a): - loc_next = ir.get_next_loc_key(instr) - loc_next_expr = ExprLoc(loc_next, 16) - e = [] - e.append(ExprAssign(PC, ExprCond(ExprOp("CC_U>=", cf ^ ExprInt(1, 1)), loc_next_expr, a))) - e.append(ExprAssign(ir.IRDst, ExprCond(ExprOp("CC_U>=", cf ^ ExprInt(1, 1)), loc_next_expr, a))) - return e, [] - - -def jge(ir, instr, a): - loc_next = ir.get_next_loc_key(instr) - loc_next_expr = ExprLoc(loc_next, 16) - e = [] - e.append(ExprAssign(PC, ExprCond(ExprOp("CC_S>=", nf, of), a, loc_next_expr))) - e.append(ExprAssign(ir.IRDst, ExprCond(ExprOp("CC_S>=", nf, of), a, loc_next_expr))) - return e, [] - - -def jmp(ir, instr, a): - e = [] - e.append(ExprAssign(PC, a)) - e.append(ExprAssign(ir.IRDst, a)) - return e, [] - - -def rrc_w(ir, instr, a): - e = [] - c = ExprCompose(a[1:16], cf) - e.append(ExprAssign(a, c)) - e.append(ExprAssign(cf, a[:1])) - - # micrcorruption - e += update_flag_zf(a) - # e += update_flag_nf(a) - e += reset_sr_res() - - e.append(ExprAssign(of, ExprInt(0, 1))) - return e, [] - - -def rra_w(ir, instr, a): - e = [] - c = ExprCompose(a[1:16], a[15:16]) - e.append(ExprAssign(a, c)) - # TODO: error in disasm microcorruption? - # e.append(ExprAssign(cf, a[:1])) - - # micrcorruption - e += update_flag_zf(a) - # e += update_flag_nf(a) - e += reset_sr_res() - - e.append(ExprAssign(of, ExprInt(0, 1))) - return e, [] - - -def sxt(ir, instr, a): - e = [] - c = a[:8].signExtend(16) - e.append(ExprAssign(a, c)) - - e += update_flag_zf(a) - e += update_flag_nf(a) - e += reset_sr_res() - e += update_flag_cf_inv_zf(c) - e.append(ExprAssign(of, ExprInt(0, 1))) - - return e, [] - -mnemo_func = { - "mov.b": mov_b, - "mov.w": mov_w, - "and.b": and_b, - "and.w": and_w, - "bic.b": bic_b, - "bic.w": bic_w, - "bis.w": bis_w, - "bit.w": bit_w, - "sub.w": sub_w, - "add.b": add_b, - "add.w": add_w, - "push.w": push_w, - "dadd.w": dadd_w, - "xor.w": xor_w, - "call": call, - "swpb": swpb, - "cmp.w": cmp_w, - "cmp.b": cmp_b, - "jz": jz, - "jnz": jnz, - "jl": jl, - "jc": jc, - "jnc": jnc, - "jmp": jmp, - "jge": jge, - "rrc.w": rrc_w, - "rra.w": rra_w, - "sxt": sxt, -} - - -composed_sr = ExprCompose(cf, zf, nf, gie, cpuoff, osc, scg0, scg1, of, res) - - -def ComposeExprAssign(dst, src): - e = [] - for start, arg in dst.iter_args(): - e.append(ExprAssign(arg, src[start:start+arg.size])) - return e - - -class ir_msp430(IntermediateRepresentation): - - def __init__(self, loc_db=None): - IntermediateRepresentation.__init__(self, mn_msp430, None, loc_db) - self.pc = PC - self.sp = SP - self.IRDst = ExprId('IRDst', 16) - self.addrsize = 16 - - def mod_pc(self, instr, instr_ir, extra_ir): - pass - - def get_ir(self, instr): - args = instr.args - instr_ir, extra_ir = mnemo_func[instr.name](self, instr, *args) - self.mod_sr(instr, instr_ir, extra_ir) - - return instr_ir, extra_ir - - def mod_sr(self, instr, instr_ir, extra_ir): - for i, x in enumerate(instr_ir): - x = ExprAssign(x.dst, x.src.replace_expr({SR: composed_sr})) - instr_ir[i] = x - if x.dst != SR: - continue - xx = ComposeExprAssign(composed_sr, x.src) - instr_ir[i:i+1] = xx - for i, x in enumerate(instr_ir): - x = ExprAssign(x.dst, x.src.replace_expr( - {self.pc: ExprInt(instr.offset + instr.l, 16)})) - instr_ir[i] = x - - if extra_ir: - raise NotImplementedError('not fully functional') diff --git a/miasm2/arch/ppc/__init__.py b/miasm2/arch/ppc/__init__.py deleted file mode 100644 index bbad893b..00000000 --- a/miasm2/arch/ppc/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__all__ = ["arch", "disasm", "regs", "sem"] diff --git a/miasm2/arch/ppc/arch.py b/miasm2/arch/ppc/arch.py deleted file mode 100644 index e7661371..00000000 --- a/miasm2/arch/ppc/arch.py +++ /dev/null @@ -1,764 +0,0 @@ -from builtins import range - -import logging -from pyparsing import * -from miasm2.expression.expression import * -from miasm2.core.cpu import * -from collections import defaultdict -from miasm2.core.bin_stream import bin_stream -import miasm2.arch.ppc.regs as regs_module -from miasm2.arch.ppc.regs import * -from miasm2.core.asm_ast import AstInt, AstId, AstMem, AstOp - -log = logging.getLogger("ppcdis") -console_handler = logging.StreamHandler() -console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) -log.addHandler(console_handler) -log.setLevel(logging.DEBUG) - -LPARENTHESIS = Suppress(Literal("(")) -RPARENTHESIS = Suppress(Literal(")")) - -def cb_deref_imm_reg(tokens): - if len(tokens) == 1: - return AstMem(tokens[0], 32) - elif len(tokens) == 2: - return AstMem(tokens[1] + tokens[0], 32) - else: - raise NotImplementedError('len(tokens) > 2') - - -deref_reg_disp = (Optional(base_expr) + LPARENTHESIS + gpregs.parser + RPARENTHESIS).setParseAction(cb_deref_imm_reg) -deref_reg = (LPARENTHESIS + gpregs.parser + RPARENTHESIS).setParseAction(cb_deref_imm_reg) - -deref = deref_reg | deref_reg_disp - - -class ppc_arg(m_arg): - def asm_ast_to_expr(self, arg, loc_db): - if isinstance(arg, AstId): - if isinstance(arg.name, ExprId): - return arg.name - if arg.name in gpregs.str: - return None - loc_key = loc_db.get_or_create_name_location(arg.name.encode()) - return ExprLoc(loc_key, 32) - if isinstance(arg, AstOp): - args = [self.asm_ast_to_expr(tmp, loc_db) for tmp in arg.args] - if None in args: - return None - return ExprOp(arg.op, *args) - if isinstance(arg, AstInt): - return ExprInt(arg.value, 32) - if isinstance(arg, AstMem): - ptr = self.asm_ast_to_expr(arg.ptr, loc_db) - if ptr is None: - return None - return ExprMem(ptr, arg.size) - return None - - -class additional_info(object): - - def __init__(self): - self.except_on_instr = False - self.bo_bi_are_defined = False - self.bi = 0 - self.bo = 0 - - -class instruction_ppc(instruction): - delayslot = 0 - - def __init__(self, *args, **kargs): - super(instruction_ppc, self).__init__(*args, **kargs) - - @staticmethod - def arg2str(e, pos = None, loc_db=None): - if isinstance(e, ExprId) or isinstance(e, ExprInt): - return str(e) - elif isinstance(e, ExprMem): - addr = e.ptr - if isinstance(addr, ExprInt) or isinstance(addr, ExprId): - out = '(%s)'%addr - elif isinstance(addr, ExprOp): - if len(addr.args) == 1: - out = '(%s)'%addr - elif len(addr.args) == 2: - out = '%s(%s)'%(addr.args[1], addr.args[0]) - else: - raise NotImplementedError('More than two args to ExprOp of address') - else: - raise NotImplementedError('Invalid memory expression') - return out - - return str(e) - - @staticmethod - def is_conditional_jump(s): - return (s[0] == 'B' and - s[1:3] in { 'DN', 'DZ', 'LT', 'GT', 'EQ', 'SO', - 'GE', 'LE', 'NE', 'NS' }) - - def dstflow(self): - name = self.name - if name[-1] == '+' or name[-1] == '-': - name = name[:-1] - return (name[0] == 'B' and - name[-2:] != 'LR' and - name[-3:] != 'LRL' and - name[-3:] != 'CTR' and - name[-4:] != 'CTRL') - - def dstflow2label(self, loc_db): - name = self.name - if name[-1] == '+' or name[-1] == '-': - name = name[:-1] - - if name[-1] == 'L': - name = name[:-1] - elif name[-2:] == 'LA': - name = name[:-2] + 'A' - - if name[-2:] != 'LR' and name[-3:] != 'CTR': - if len(self.args) == 2: - address_index = 1 - else: - address_index = 0 - e = self.args[address_index] - if not isinstance(e, ExprInt): - return - if name[-1] != 'A': - ad = e.arg + self.offset - else: - ad = e.arg - loc_key = loc_db.get_or_create_offset_location(ad) - s = ExprLoc(loc_key, e.size) - self.args[address_index] = s - - def breakflow(self): - return self.name[0] == 'B' - - def is_subcall(self): - name = self.name - if name[-1] == '+' or name[-1] == '-': - name = name[0:-1] - return name[0] == 'B' and (name[-1] == 'L' or name[-2:-1] == 'LA') - - def getdstflow(self, loc_db): - if 'LR' in self.name: - return [ LR ] - elif 'CTR' in self.name: - return [ CTR ] - elif len(self.args) == 2: - address_index = 1 - else: - address_index = 0 - return [ self.args[address_index] ] - - def splitflow(self): - ret = False - if self.is_conditional_jump(self.name): - if self.additional_info.bo & 0b10100 != 0b10100: - ret = True - ret = ret or self.is_subcall() - return ret - - def get_symbol_size(self, symbol, loc_db): - return 32 - - def fixDstOffset(self): - e = self.args[0] - if not isinstance(e, ExprInt): - log.debug('Dynamic destination offset %r' % e) - return - if self.name[-1] != 'A': - if self.offset is None: - raise ValueError('symbol not resolved %s' % self.l) - off = e.arg - (self.offset + self.l) - if int(off % 4): - raise ValueError('Offset %r must be a multiple of four' % off) - else: - off = e.arg - self.args[0] = ExprInt(off, 32) - - def get_args_expr(self): - args = [a for a in self.args] - return args - - def get_asm_offset(self, x): - return ExprInt_from(x, self.offset) - - -class mn_ppc(cls_mn): - delayslot = 0 - name = "ppc32" - regs = regs_module - bintree = {} - num = 0 - all_mn = [] - all_mn_mode = defaultdict(list) - all_mn_name = defaultdict(list) - all_mn_inst = defaultdict(list) - instruction = instruction_ppc - max_instruction_len = 4 - - @classmethod - def getpc(cls, attrib = None): - return PC - - @classmethod - def getsp(cls, attrib = None): - return R1 - - def additional_info(self): - info = additional_info() - info.bo_bi_are_defined = False - if hasattr(self, "bo"): - info.bo_bi_are_defined = True - info.bi = int(self.bi.strbits, 2) - info.bo = int(self.bo.strbits, 2) - return info - - @classmethod - def getbits(cls, bs, attrib, start, n): - if not n: - return 0 - o = 0 - if n > bs.getlen() * 8: - raise ValueError('not enough bits %r %r' % (n, len(bs.bin) * 8)) - while n: - offset = start // 8 - n_offset = cls.endian_offset(attrib, offset) - c = cls.getbytes(bs, n_offset, 1) - if not c: - raise IOError - c = ord(c) - r = 8 - start % 8 - c &= (1 << r) - 1 - l = min(r, n) - c >>= (r - l) - o <<= l - o |= c - n -= l - start += l - return o - - @classmethod - def endian_offset(cls, attrib, offset): - if attrib == "b": - return offset - else: - raise NotImplementedError("bad attrib") - - @classmethod - def check_mnemo(cls, fields): - l = sum([x.l for x in fields]) - assert l == 32, "len %r" % l - - @classmethod - def getmn(cls, name): - return name.upper() - - @classmethod - def mod_fields(cls, fields): - l = sum([x.l for x in fields]) - return fields - - @classmethod - def gen_modes(cls, subcls, name, bases, dct, fields): - dct['mode'] = None - return [(subcls, name, bases, dct, fields)] - - def post_dis(self): - return self - - def value(self, mode): - v = super(mn_ppc, self).value(mode) - if mode == 'b': - return [x for x in v] - else: - raise NotImplementedError("bad attrib") - - def get_symbol_size(self, symbol, loc_db, mode): - return 32 - - -class ppc_reg(reg_noarg, ppc_arg): - pass - - -class ppc_gpreg_noarg(reg_noarg): - reg_info = gpregs - parser = reg_info.parser - -class ppc_gpreg_or_0_noarg(reg_noarg): - reg_info = gpregs - parser = reg_info.parser - - def decode(self, v): - ret = super(ppc_gpreg_or_0_noarg, self).decode(v) - if ret == False: - return False - reg = self.expr - if reg == R0: - self.expr = ExprInt(0, 32) - return ret - -class ppc_gpreg(ppc_reg): - reg_info = gpregs - parser = reg_info.parser - -class ppc_gpreg_or_0(ppc_reg): - reg_info = gpregs - parser = reg_info.parser - - def decode(self, v): - ret = super(ppc_gpreg_or_0, self).decode(v) - if ret == False: - return False - reg = self.expr - if reg == R0: - self.expr = ExprInt(0, 32) - return ret - -class ppc_crfreg_noarg(reg_noarg): - reg_info = crfregs - parser = reg_info.parser - -class ppc_crfreg(ppc_reg): - reg_info = crfregs - parser = reg_info.parser - -class ppc_imm(imm_noarg, ppc_arg): - parser = base_expr - -class ppc_s14imm_branch(ppc_imm): - - def decode(self, v): - v = sign_ext(v << 2, 16, 32) - self.expr = ExprInt(v, 32) - return True - - def encode(self): - if not isinstance(self.expr, ExprInt): - return False - v = self.expr.arg.arg - if v & 0x3: - return False - v = v >> 2 - if sign_ext(v & self.lmask, 14, 32) != v: - return False - self.value = v & self.lmask - return True - -class ppc_s24imm_branch(ppc_imm): - - def decode(self, v): - v = sign_ext(v << 2, 26, 32) - self.expr = ExprInt(v, 32) - return True - - def encode(self): - if not isinstance(self.expr, ExprInt): - return False - v = self.expr.arg.arg - if v & 0x3: - return False - v = v >> 2 - if sign_ext(v & self.lmask, 24, 32) != v: - return False - self.value = v & self.lmask - return True - -class ppc_s16imm(ppc_imm): - - def decode(self, v): - v = sign_ext(v, 16, 32) - self.expr = ExprInt(v, 32) - return True - - def encode(self): - if not isinstance(self.expr, ExprInt): - return False - v = self.expr.arg.arg - if sign_ext(v & self.lmask, 16, 32) != v: - return False - self.value = v & self.lmask - return True - -class ppc_u16imm(ppc_imm): - - def decode(self, v): - if v & self.lmask != v: - return False - self.expr = ExprInt(v, 32) - return True - - def encode(self): - if not isinstance(self.expr, ExprInt): - return False - v = self.expr.arg.arg - if v & self.lmask != v: - return False - self.value = v & self.lmask - return True - -def ppc_swap_10(v): - return ((v & 0b11111) << 5) | ((v & 0b1111100000) >> 5) - -class ppc_spr(ppc_imm): - - def decode(self, v): - self.expr = ExprInt(ppc_swap_10(v), 32) - return True - - def encode(self, e): - if not isinstance(e, ExprInt): - return False - self.value = ppc_swap_10(e.arg) - return True - -class ppc_tbr(ppc_imm): - - def decode(self, v): - self.expr = ExprInt(ppc_swap_10(v), 32) - return True - - def encode(self, e): - if not isinstance(e, ExprInt): - return False - self.value = ppc_swap_10(e.arg) - return True - -class ppc_u08imm(ppc_u16imm): - pass - -class ppc_u05imm(ppc_u16imm): - pass - -class ppc_u04imm(ppc_u16imm): - pass - -class ppc_u02imm_noarg(imm_noarg): - pass - - -def ppc_bo_bi_to_mnemo(bo, bi, prefer_taken=True, default_taken=True): - bo2mnemo = { 0: 'DNZF', 2: 'DZF', 4: 'F', 8: 'DNZT', - 10: 'DZT', 12: 'T', 16: 'DNZ', 18: 'DZ', - 20: '' } - bi2cond = { 0b00: 'LT', 0b01: 'GT', 0b10: 'EQ', 0b11: 'SO' } - bi2ncond = { 0b00: 'GE', 0b01: 'LE', 0b10: 'NE', 0b11: 'NS' } - n = bo & 0b11110 - if not n in bo2mnemo: - raise NotImplementedError("Unknown BO field") - mnem = 'B' + bo2mnemo[n] - if mnem[-1] == 'T': - mnem = mnem[:-1] + bi2cond[bi & 0b11] - if mnem[-1] == 'F': - mnem = mnem[:-1] + bi2ncond[bi & 0b11] - - if prefer_taken != default_taken: - if prefer_taken: - mnem += '+' - else: - mnem += '-' - - return mnem - -def ppc_all_bo_bi(): - for bo in [0, 2, 4, 8, 10, 12, 16, 18, 20]: - for bi in range(4): - yield bo, bi - -class ppc_divert_conditional_branch(bs_divert): - prio=3 - def divert(self, i, candidates): - out = [] - for cls, _, bases, dct, fields in candidates: - bi_i = getfieldindexby_name(fields, 'bi')[1] - bo_i = getfieldindexby_name(fields, 'bo')[1] - - for bo, bi in ppc_all_bo_bi(): - nfields = fields[:] - nfields[bi_i] = bs(int2bin(bi, 2), fname="bi") - nfields[bo_i] = bs(int2bin(bo, 5), fname="bo") - ndct = dict(dct) - ndct['name'] = ppc_bo_bi_to_mnemo(bo, bi) - out.append((cls, ndct['name'], bases, ndct, nfields)) - - nfields = fields[:] - nfields[bi_i] = bs(int2bin(bi, 2), fname="bi") - nfields[bo_i] = bs(int2bin(bo+1, 5), fname="bo") - ndct = dict(dct) - ndct['name'] = ppc_bo_bi_to_mnemo(bo, bi) - out.append((cls, ndct['name'], bases, ndct, nfields)) - - return out - -class ppc_deref32(ppc_arg): - parser = deref - - def decode(self, v): - v = sign_ext(v, 16, 32) - e = self.parent.ra.expr + ExprInt(v, 32) - self.expr = ExprMem(e, size=32) - return True - - def encode(self): - e = self.expr - if not isinstance(e, ExprMem): - return False - addr = e.ptr - if isinstance(addr, ExprId) or isinstance(addr, ExprInt): - addr = addr + ExprInt(0, 32) - elif not isinstance(addr, ExprOp): - return False - if addr.op != '+': - return False - if len(addr.args) != 2: - return False - reg, disp = addr.args[0], addr.args[1] - v = int(disp.arg) - if sign_ext(v & 0xFFFF, 16, 32) != v: - return False - v &= 0xFFFF - self.value = v - self.parent.ra.expr = reg - return True - - -def ppcop(name, fields, args=None, alias=False): - dct = {"fields": fields} - dct["alias"] = alias - if args is not None: - dct['args'] = args - type(name, (mn_ppc,), dct) - -rd = bs(l=5, cls=(ppc_gpreg,)) -ra = bs(l=5, cls=(ppc_gpreg,)) -ra_or_0 = bs(l=5, cls=(ppc_gpreg_or_0,)) -rb = bs(l=5, cls=(ppc_gpreg,)) -rs = bs(l=5, cls=(ppc_gpreg,)) -crfd = bs(l=3, cls=(ppc_crfreg,)) -crfs = bs(l=3, cls=(ppc_crfreg,)) -sh = bs(l=5, cls=(ppc_u05imm,)) -mb = bs(l=5, cls=(ppc_u05imm,)) -me = bs(l=5, cls=(ppc_u05imm,)) -nb = bs(l=5, cls=(ppc_u05imm,)) -crm = bs(l=8, cls=(ppc_u08imm,)) -sr = bs(l=4, cls=(ppc_u04imm,)) -spr = bs(l=10, cls=(ppc_spr,)) -tbr = bs(l=10, cls=(ppc_tbr,)) -u05imm = bs(l=5, cls=(ppc_u05imm,)) - -s24imm_branch = bs(l=24, cls=(ppc_s24imm_branch,), fname="imm") -s14imm_branch = bs(l=14, cls=(ppc_s14imm_branch,), fname="imm") -s16imm = bs(l=16, cls=(ppc_s16imm,), fname="imm") -u16imm = bs(l=16, cls=(ppc_u16imm,), fname="imm") -u08imm = bs(l=5, cls=(ppc_u08imm,), fname="imm") -u02imm_noarg = bs(l=2, cls=(ppc_u02imm_noarg,), fname="imm") - -ra_noarg = bs(l=5, cls=(ppc_gpreg_noarg,), fname="ra") -ra_or_0_noarg = bs(l=5, cls=(ppc_gpreg_or_0_noarg,), fname="ra") -dregimm = bs(l=16, cls=(ppc_deref32,)) - -rc_mod = bs_mod_name(l=1, mn_mod=['', '.'], fname='rc') - -arith1_name = {"MULLI": 0b000111, "SUBFIC": 0b001000, "ADDIC": 0b001100, - "ADDIC.": 0b001101 } - -logic2_name = {"ORI": 0b011000, "XORI": 0b011010, "ANDI.": 0b011100 } -slogic2_name = {"ORIS": 0b011001, "XORIS": 0b011011, "ANDIS.": 0b011101 } - -arith3_name = {"SUBFC": 0b0000001000, "ADDC": 0b0000001010, - "MULHWU": 0b0000001011, "SUBF": 0b0000101000, - "MULHW": 0b0001001011, "SUBFE": 0b0010001000, - "ADDE": 0b0010001010, "MULLW": 0b0011101011, - "ADD": 0b0100001010, "DIVWU": 0b0111001011, - "DIVW": 0b0111101011, "SUBFCO": 0b1000001000, - "ADDCO": 0b1000001010, "SUBFO": 0b1000101000, - "SUBFEO": 0b1010001000, "ADDEO": 0b1010001010, - "MULLWO": 0b1011101011, "ADDO": 0b1100001010, - "DIVWUO": 0b1111001011, "DIVWO": 0b1111101011 } - -xor_name = { "EQV": 0b0100011100, "XOR": 0b0100111100 } - -arith4_name = {"NEG": 0b0001101000, "SUBFZE": 0b0011001000, - "ADDZE": 0b0011001010, "SUBFME": 0b0011101000, - "ADDME": 0b0011101010, "NEGO": 0b1001101000, - "SUBFZEO": 0b1011001000, "ADDZEO": 0b1011001010, - "SUBFMEO": 0b1011101000, "ADDMEO": 0b1011101010 } - -arith5_name = {"CNTLZW": 0b00000, "EXTSH": 0b11100, "EXTSB": 0b11101 } - -crlogic_name = {"CRAND": 0b1000, "CRANDC": 0b0100, "CREQV": 0b1001, - "CRNAND": 0b0111, "CRNOR": 0b0001, "CROR": 0b1110, - "CRORC": 0b1101, "CRXOR": 0b0110 } - -rotins_name = {"RLWIMI": 0b010100, "RLWINM": 0b010101 } - -bs_arith1_name = bs_name(l=6, name=arith1_name) - -load1_name = {"LWARX": 0b0000010100, "LWZX": 0b0000010111, - "LBZX": 0b0001010111, "LHZX": 0b0100010111, - "ECIWX": 0b0100110110, "LHAX": 0b0101010111, - "LSWX": 0b1000010101, "LWBRX": 0b1000010110, - "LHBRX": 0b1100010110 } - -load1_name_u = {"LWZUX": 0b0000110111, "LBZUX": 0b0001110111, - "LHZUX": 0b0100110111, "LHAUX": 0b0101110111 } - -load2_name = {"LWZ": 0b0000, "LBZ": 0b0010, "LHZ": 0b1000, "LHA": 0b1010, - "LMW": 0b1110 } - -load2_name_u = {"LWZU": 0b0001, "LBZU": 0b0011, "LHZU": 0b1001, "LHAU": 0b1011} - -store1_name = { "STWCX.": 0b00100101101, "STWX": 0b00100101110, - "STBX": 0b00110101110, "STHX": 0b01100101110, - "ECOWX": 0b01101101100, "STSWX": 0b10100101010, - "STWBRX": 0b10100101100, "STHBRX": 0b11100101100 } -store1_name_u = { "STWUX": 0b00101101110, "STBUX": 0b00111101110, - "STHUX": 0b01101101110 } - -store2_name = { "STW": 0b0100, "STB": 0b0110, "STH": 0b1100, "STMW": 0b1111 } -store2_name_u = { "STWU": 0b0101, "STBU": 0b0111, "STHU": 0b1101 } - -logic1_name = {"SLW": 0b0000011000, "AND": 0b0000011100, - "ANDC": 0b0000111100, "NOR": 0b0001111100, - "ORC": 0b0110011100, "OR": 0b0110111100, - "NAND": 0b0111011100, "SRW": 0b1000011000, - "SRAW": 0b1100011000 } - -dcb_name = {"DCBST": 0b00001, "DCBF": 0b00010, - "DCBTST": 0b00111, "DCBT": 0b01000, - "DCBI": 0b01110, "DCBA": 0b10111, - "ICBI": 0b11110, "DCBZ": 0b11111 } - -class bs_mod_name_prio4(bs_mod_name): - prio = 4 - -class bs_mod_name_prio5(bs_mod_name): - prio = 5 - -class bs_mod_name_prio6(bs_mod_name): - prio = 6 - -branch_to_reg = bs_mod_name_prio4(l=1, mn_mod=['LR', 'CTR'], fname='btoreg') -branch_lk = bs_mod_name_prio5(l=1, mn_mod=['', 'L'], fname='lk') -branch_aa = bs_mod_name_prio6(l=1, mn_mod=['', 'A'], fname='aa') - -ppcop("arith1", [bs_arith1_name, rd, ra, s16imm]) -ppcop("ADDIS", [bs('001111'), rd, ra_or_0, u16imm]) -ppcop("ADDI", [bs('001110'), rd, ra_or_0, s16imm]) - -ppcop("logic2", [bs_name(l=6, name=logic2_name), rs, ra, u16imm], - [ra, rs, u16imm]) -ppcop("slogic2", [bs_name(l=6, name=slogic2_name), rs, ra, u16imm], - [ra, rs, u16imm]) - -ppcop("store1", [bs('011111'), rs, ra_or_0, rb, - bs_name(l=11, name=store1_name)]) -ppcop("store1u", [bs('011111'), rs, ra, rb, - bs_name(l=11, name=store1_name_u)]) - -ppcop("store2", [bs('10'), bs_name(l=4, name=store2_name), rs, - ra_noarg, dregimm]) -ppcop("store2u", [bs('10'), bs_name(l=4, name=store2_name_u), rs, - ra_or_0_noarg, dregimm]) - -ppcop("arith3", [bs('011111'), rd, ra, rb, bs_name(l=10, name=arith3_name), - rc_mod]) - -ppcop("xor", [bs('011111'), rs, ra, rb, bs_name(l=10, name=xor_name), - rc_mod], [ra, rs, rb]) - -ppcop("arith4", [bs('011111'), rd, ra, bs('00000'), - bs_name(l=10, name=arith4_name), rc_mod]) - -ppcop("arith5", [bs('011111'), rs, ra, bs('00000'), - bs_name(l=5, name=arith5_name), - bs('11010'), rc_mod], [ra, rs]) - -ppcop("load1", [bs('011111'), rd, ra_or_0, rb, - bs_name(l=10, name=load1_name), bs('0')]) -ppcop("load1u", [bs('011111'), rd, ra, rb, - bs_name(l=10, name=load1_name_u), bs('0')]) -ppcop("load2", [bs('10'), bs_name(l=4, name=load2_name), - rd, ra_or_0_noarg, dregimm]) -ppcop("load2u", [bs('10'), bs_name(l=4, name=load2_name_u), - rd, ra_noarg, dregimm]) - -ppcop("logic1", [bs('011111'), rs, ra, rb, bs_name(l=10, name=logic1_name), - rc_mod], - [ra, rs, rb]) - -ppcop("TWI", [bs('000011'), u05imm, ra, s16imm]) -ppcop("TW", [bs('011111'), u05imm, ra, rb, bs('00000001000')]) - -ppcop("CMPW", [bs('011111'), crfd, bs('00'), ra, rb, bs('00000000000')]) -ppcop("CMPLW", [bs('011111'), crfd, bs('00'), ra, rb, bs('00001000000')]) -ppcop("CMPLWI", [bs('001010'), crfd, bs('00'), ra, u16imm]) -ppcop("CMPWI", [bs('001011'), crfd, bs('00'), ra, s16imm]) - -ppcop("BC", [bs('010000'), bs(l=5, cls=(ppc_u05imm,), fname='bo'), - crfs, - ppc_divert_conditional_branch(l=2, fname='bi'), - s14imm_branch, branch_aa, branch_lk]) -ppcop("SC", [bs('01000100000000000000000000000010')]) -ppcop("B", [bs('010010'), s24imm_branch, branch_aa, branch_lk]) -ppcop("MCRF", [bs('010011'), crfd, bs('00'), crfs, bs('000000000000000000')]) - -ppcop("BCXXX", [bs('010011'), bs(l=5, cls=(ppc_u05imm,), fname='bo'), - crfs, - ppc_divert_conditional_branch(l=2, fname='bi'), - bs('00000'), branch_to_reg, - bs('000010000'), branch_lk]) - -ppcop("crlogic", [bs('010011'), - bs(l=5, cls=(ppc_u05imm,), fname='crbd'), - bs(l=5, cls=(ppc_u05imm,), fname='crba'), - bs(l=5, cls=(ppc_u05imm,), fname='crbb'), - bs('0'), - bs_name(l=4, name=crlogic_name), - bs('000010')]) - -ppcop("rotins", [bs_name(l=6, name=rotins_name), - rs, ra, sh, mb, me, rc_mod], - [ ra, rs, sh, mb, me ]) -ppcop("RLWNM", [bs('010111'), rs, ra, rb, mb, me, rc_mod], - [ ra, rs, rb, mb, me ]) -ppcop("MFXXX", [bs('011111'), rd, bs('0000000000'), - bs('000'), - bs_name(l=1, name={'MFCR':0, 'MFMSR':1}), - bs('0100110')]) - -ppcop("dcb", [bs('01111100000'), ra, rb, bs_name(l=5, name=dcb_name), - bs('101100')]) - -ppcop("MTCRF", [bs('011111'), rs, bs('0'), crm, bs('000100100000')], [crm, rs]) -ppcop("MTMSR", [bs('011111'), rs, bs('0000000000'), bs('00100100100')]) -ppcop("MTSR", [bs('011111'), rs, bs('0'), sr, bs('0000000110100100')], [sr, rs]) -ppcop("MTSRIN", [bs('011111'), rs, bs('00000'), rb, bs('00111100100')]) - -ppcop("TLBIE", [bs('011111'), bs('0000000000'), rb, bs('01001100100')]) -ppcop("MFSPR", [bs('011111'), rd, spr, bs('01010100110')]) -ppcop("TLBIA", [bs('01111100000000000000001011100100')]) -ppcop("MFTB", [bs('011111'), rd, tbr, bs('01011100110')]) -ppcop("RFI", [bs('01001100000000000000000001100100')]) -ppcop("ISYNC", [bs('01001100000000000000000100101100')]) -ppcop("MTSPR", [bs('011111'), rs, spr, bs('01110100110')], [spr, rs]) -ppcop("MCRXR", [bs('011111'), crfd, bs('000000000000'), - bs('10000000000')]) -ppcop("TLBSYNC", [bs('01111100000000000000010001101100')]) -ppcop("MFSR", [bs('011111'), rd, bs('0'), sr, bs('00000'), bs('10010100110')]) -ppcop("LSWI", [bs('011111'), rd, ra, nb, bs('10010101010')]) -ppcop("STSWI", [bs('011111'), rs, ra, nb, bs('10110101010')]) -ppcop("SYNC", [bs('011111'), bs('000000000000000'), bs('10010101100')]) -ppcop("MFSRIN", [bs('011111'), rd, bs('00000'), rb, bs('10100100110')]) - -ppcop("SRAWI", [bs('011111'), rs, ra, sh, bs('1100111000'), rc_mod], - [ra, rs, sh]) - -ppcop("EIEIO", [bs('011111'), bs('000000000000000'), bs('11010101100')]) diff --git a/miasm2/arch/ppc/disasm.py b/miasm2/arch/ppc/disasm.py deleted file mode 100644 index 9fe3d886..00000000 --- a/miasm2/arch/ppc/disasm.py +++ /dev/null @@ -1,7 +0,0 @@ -from miasm2.arch.ppc.arch import mn_ppc -from miasm2.core.asmblock import disasmEngine - -class dis_ppc32b(disasmEngine): - def __init__(self, bs=None, **kwargs): - super(dis_ppc32b, self).__init__(mn_ppc, None, bs, **kwargs) - self.attrib = 'b' diff --git a/miasm2/arch/ppc/ira.py b/miasm2/arch/ppc/ira.py deleted file mode 100644 index cde57dd3..00000000 --- a/miasm2/arch/ppc/ira.py +++ /dev/null @@ -1,87 +0,0 @@ -from miasm2.expression.expression import ExprAssign, ExprOp -from miasm2.ir.ir import AssignBlock -from miasm2.ir.analysis import ira -from miasm2.arch.ppc.sem import ir_ppc32b - - -class ir_a_ppc32b(ir_ppc32b, ira): - - def __init__(self, *args): - super(ir_a_ppc32b, self).__init__(*args) - self.ret_reg = self.arch.regs.R3 - - # for test XXX TODO - def set_dead_regs(self, irblock): - pass - - def get_out_regs(self, _): - return set([self.ret_reg, self.sp]) - - def add_unused_regs(self): - leaves = [self.blocks[label] for label in self.g.leafs()] - for irblock in leaves: - self.set_dead_regs(irblock) - - def call_effects(self, ad, instr): - call_assignblks = AssignBlock( - [ - ExprAssign( - self.ret_reg, - ExprOp( - 'call_func_ret', - ad, - self.sp, - self.arch.regs.R3, - self.arch.regs.R4, - self.arch.regs.R5, - ) - ), - ExprAssign(self.sp, ExprOp('call_func_stack', ad, self.sp)), - ], - instr - ) - return [call_assignblks], [] - - def add_instr_to_current_state(self, instr, block, assignments, ir_blocks_all, gen_pc_updt): - """ - Add the IR effects of an instruction to the current state. - - @instr: native instruction - @block: native block source - @assignments: list of current AssignBlocks - @ir_blocks_all: list of additional effects - @gen_pc_updt: insert PC update effects between instructions - """ - if instr.is_subcall(): - call_assignblks, extra_irblocks = self.call_effects( - instr.getdstflow(None)[0], - instr - ) - assignments += call_assignblks - ir_blocks_all += extra_irblocks - return True - - if gen_pc_updt is not False: - self.gen_pc_update(assignments, instr) - - assignblk, ir_blocks_extra = self.instr2ir(instr) - assignments.append(assignblk) - ir_blocks_all += ir_blocks_extra - if ir_blocks_extra: - return True - return False - - def sizeof_char(self): - return 8 - - def sizeof_short(self): - return 16 - - def sizeof_int(self): - return 32 - - def sizeof_long(self): - return 32 - - def sizeof_pointer(self): - return 32 diff --git a/miasm2/arch/ppc/jit.py b/miasm2/arch/ppc/jit.py deleted file mode 100644 index 8dc4aa99..00000000 --- a/miasm2/arch/ppc/jit.py +++ /dev/null @@ -1,71 +0,0 @@ -from builtins import range -from miasm2.jitter.jitload import Jitter, named_arguments -from miasm2.core.locationdb import LocationDB -from miasm2.arch.ppc.sem import ir_ppc32b -import struct - -import logging - -log = logging.getLogger('jit_ppc') -hnd = logging.StreamHandler() -hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) -log.addHandler(hnd) -log.setLevel(logging.CRITICAL) - -class jitter_ppc32b(Jitter): - max_reg_arg = 8 - - def __init__(self, *args, **kwargs): - super(jitter_ppc32b, self).__init__(ir_ppc32b(LocationDB()), - *args, **kwargs) - self.vm.set_big_endian() - - def push_uint32_t(self, v): - self.cpu.R1 -= 4 - self.vm.set_mem(self.cpu.R1, struct.pack(">I", v)) - - def pop_uint32_t(self): - x = struct.unpack(">I", self.vm.get_mem(self.cpu.R1, 4))[0] - self.cpu.R1 += 4 - return x - - def get_stack_arg(self, n): - x = struct.unpack(">I", self.vm.get_mem(self.cpu.R1 + 8 + 4 * n, 4))[0] - return x - - @named_arguments - def func_args_systemv(self, n_args): - args = [self.get_arg_n_systemv(i) for i in range(n_args)] - ret_ad = self.cpu.LR - return ret_ad, args - - def func_ret_systemv(self, ret_addr, ret_value1=None, ret_value2=None): - self.pc = self.cpu.PC = ret_addr - if ret_value1 is not None: - self.cpu.R3 = ret_value1 - if ret_value2 is not None: - self.cpu.R4 = ret_value2 - return True - - def func_prepare_systemv(self, ret_addr, *args): - for index in range(min(len(args), self.max_reg_arg)): - setattr(self.cpu, 'R%d' % (index + 3), args[index]) - for index in range(len(args) - 1, self.max_reg_arg - 1, -1): - self.push_uint32_t(args[index]) - - # reserve room for LR save word and backchain - self.cpu.R1 -= 8 - - self.cpu.LR = ret_addr - - def get_arg_n_systemv(self, index): - if index < self.max_reg_arg: - arg = getattr(self.cpu, 'R%d' % (index + 3)) - else: - arg = self.get_stack_arg(index - self.max_reg_arg) - return arg - - - def init_run(self, *args, **kwargs): - Jitter.init_run(self, *args, **kwargs) - self.cpu.PC = self.pc diff --git a/miasm2/arch/ppc/regs.py b/miasm2/arch/ppc/regs.py deleted file mode 100644 index e70afce2..00000000 --- a/miasm2/arch/ppc/regs.py +++ /dev/null @@ -1,60 +0,0 @@ - -from builtins import range -from miasm2.expression.expression import * -from miasm2.core.cpu import gen_reg, gen_regs - -exception_flags = ExprId('exception_flags', 32) -spr_access = ExprId('spr_access', 32) - -reserve = ExprId('reserve', 1) -reserve_address = ExprId('reserve_address', 32) - -SPR_ACCESS_IS_WRITE = 0x80000000 -SPR_ACCESS_SPR_MASK = 0x000003FF -SPR_ACCESS_SPR_OFF = 0 -SPR_ACCESS_GPR_MASK = 0x0001F000 -SPR_ACCESS_GPR_OFF = 12 - -gpregs_str = ["R%d" % i for i in range(32)] -gpregs_expr, gpregs_init, gpregs = gen_regs(gpregs_str, globals(), 32) - -crfregs_str = ["CR%d" % i for i in range(8)] -crfregs_expr, crfregs_init, crfregs = gen_regs(crfregs_str, globals(), 4) - -crfbitregs_str = ["CR%d_%s" % (i, flag) for i in range(8) - for flag in ['LT', 'GT', 'EQ', 'SO'] ] -crfbitregs_expr, crfbitregs_init, crfbitregs = gen_regs(crfbitregs_str, - globals(), 1) - -xerbitregs_str = ["XER_%s" % field for field in ['SO', 'OV', 'CA'] ] -xerbitregs_expr, xerbitregs_init, xerbitregs = gen_regs(xerbitregs_str, - globals(), 1) - -xerbcreg_str = ["XER_BC"] -xerbcreg_expr, xerbcreg_init, xerbcreg = gen_regs(xerbcreg_str, - globals(), 7) - - -otherregs_str = ["PC", "CTR", "LR" ] -otherregs_expr, otherregs_init, otherregs = gen_regs(otherregs_str, - globals(), 32) - -superregs_str = (["SPRG%d" % i for i in range(4)] + - ["SRR%d" % i for i in range(2)] + - ["DAR", "DSISR", "MSR", "PIR", "PVR", - "DEC", "TBL", "TBU"]) -superregs_expr, superregs_init, superregs = gen_regs(superregs_str, - globals(), 32) - -regs_flt_expr = [] - -all_regs_ids = (gpregs_expr + crfbitregs_expr + xerbitregs_expr + - xerbcreg_expr + otherregs_expr + superregs_expr + - [ exception_flags, spr_access, reserve, reserve_address ]) -all_regs_ids_byname = dict([(x.name, x) for x in all_regs_ids]) -all_regs_ids_init = [ExprId("%s_init" % x.name, x.size) for x in all_regs_ids] -all_regs_ids_no_alias = all_regs_ids[:] - -regs_init = {} -for i, r in enumerate(all_regs_ids): - regs_init[r] = all_regs_ids_init[i] diff --git a/miasm2/arch/ppc/sem.py b/miasm2/arch/ppc/sem.py deleted file mode 100644 index ef44ffe3..00000000 --- a/miasm2/arch/ppc/sem.py +++ /dev/null @@ -1,924 +0,0 @@ -from __future__ import print_function -from builtins import range - -import miasm2.expression.expression as expr -from miasm2.ir.ir import AssignBlock, IntermediateRepresentation, IRBlock -from miasm2.arch.ppc.arch import mn_ppc -from miasm2.arch.ppc.regs import * -from miasm2.core.sembuilder import SemBuilder -from miasm2.jitter.csts import * - -spr_dict = { - 8: LR, 9: CTR, 18: DSISR, 19: DAR, - 22: DEC, 26: SRR0, 27: SRR1, - 272: SPRG0, 273: SPRG0, 274: SPRG1, 275: SPRG2, 276: SPRG3, - 284: TBL, 285: TBU, 287: PVR, 1023: PIR -} - -crf_dict = dict((ExprId("CR%d" % i, 4), - dict( (bit, ExprId("CR%d_%s" % (i, bit), 1)) - for bit in ['LT', 'GT', 'EQ', 'SO' ] )) - for i in range(8) ) - -ctx = { - 'crf_dict': crf_dict, - 'spr_dict': spr_dict, - 'expr': expr, -} - -ctx.update(all_regs_ids_byname) -sbuild = SemBuilder(ctx) - -def mn_compute_flags(rvalue, overflow_expr=None): - ret = [] - ret.append(ExprAssign(CR0_LT, rvalue.msb())) - ret.append(ExprAssign(CR0_GT, (ExprCond(rvalue, ExprInt(1, 1), - ExprInt(0, 1)) & ~rvalue.msb()))) - ret.append(ExprAssign(CR0_EQ, ExprCond(rvalue, ExprInt(0, 1), - ExprInt(1, 1)))) - if overflow_expr != None: - ret.append(ExprAssign(CR0_SO, XER_SO | overflow_expr)) - else: - ret.append(ExprAssign(CR0_SO, XER_SO)) - - return ret - -def mn_do_add(ir, instr, arg1, arg2, arg3): - assert instr.name[0:3] == 'ADD' - - flags_update = [] - - has_dot = False - has_c = False - has_e = False - has_o = False - - for l in instr.name[3:]: - if l == '.': - has_dot = True - elif l == 'C': - has_c = True - elif l == 'E': - has_e = True - elif l == 'O': - has_o = True - elif l == 'I' or l == 'M' or l == 'S' or l == 'Z': - pass # Taken care of earlier - else: - assert False - - rvalue = arg2 + arg3 - - if has_e: - rvalue = rvalue + XER_CA.zeroExtend(32) - - over_expr = None - if has_o: - msb1 = arg2.msb() - msb2 = arg3.msb() - msba = rvalue.msb() - over_expr = ~(msb1 ^ msb2) & (msb1 ^ msba) - flags_update.append(ExprAssign(XER_OV, over_expr)) - flags_update.append(ExprAssign(XER_SO, XER_SO | over_expr)) - - if has_dot: - flags_update += mn_compute_flags(rvalue, over_expr) - - if has_c or has_e: - carry_expr = (((arg2 ^ arg3) ^ rvalue) ^ - ((arg2 ^ rvalue) & (~(arg2 ^ arg3)))).msb() - flags_update.append(ExprAssign(XER_CA, carry_expr)) - - return ([ ExprAssign(arg1, rvalue) ] + flags_update), [] - -def mn_do_and(ir, instr, ra, rs, arg2): - if len(instr.name) > 3 and instr.name[3] == 'C': - oarg = ~arg2 - else: - oarg = arg2 - - rvalue = rs & oarg - ret = [ ExprAssign(ra, rvalue) ] - - if instr.name[-1] == '.': - ret += mn_compute_flags(rvalue) - - return ret, [] - -def mn_do_cntlzw(ir, instr, ra, rs): - ret = [ ExprAssign(ra, ExprOp('cntleadzeros', rs)) ] - - if instr.name[-1] == '.': - ret += mn_compute_flags(rvalue) - - return ret, [] - -def crbit_to_reg(bit): - bit = bit.arg.arg - crid = bit // 4 - bitname = [ 'LT', 'GT', 'EQ', 'SO' ][bit % 4] - return all_regs_ids_byname["CR%d_%s" % (crid, bitname)] - -def mn_do_cr(ir, instr, crd, cra, crb): - a = crbit_to_reg(cra) - b = crbit_to_reg(crb) - d = crbit_to_reg(crd) - - op = instr.name[2:] - - if op == 'AND': - r = a & b - elif op == 'ANDC': - r = a & ~b - elif op == 'EQV': - r = ~(a ^ b) - elif op == 'NAND': - r = ~(a & b) - elif op == 'NOR': - r = ~(a | b) - elif op == 'OR': - r = a | b - elif op == 'ORC': - r = a | ~b - elif op == 'XOR': - r = a ^ b - else: - raise RuntimeError("Unknown operation on CR") - return [ ExprAssign(d, r) ], [] - -def mn_do_div(ir, instr, rd, ra, rb): - assert instr.name[0:4] == 'DIVW' - - flags_update = [] - - has_dot = False - has_c = False - has_o = False - has_u = False - - for l in instr.name[3:]: - if l == '.': - has_dot = True - elif l == 'C': - has_c = True - elif l == 'O': - has_o = True - elif l == 'U': - has_u = True - elif l == 'W': - pass - else: - assert False - - if has_u: - op = 'udiv' - else: - op = 'sdiv' - - rvalue = ExprOp(op, ra, rb) - - over_expr = None - if has_o: - over_expr = ExprCond(rb, ExprInt(0, 1), ExprInt(1, 1)) - if not has_u: - over_expr = over_expr | (ExprCond(ra ^ 0x80000000, ExprInt(0, 1), - ExprInt(1, 1)) & - ExprCond(rb ^ 0xFFFFFFFF, ExprInt(0, 1), - ExprInt(1, 1))) - flags_update.append(ExprAssign(XER_OV, over_expr)) - flags_update.append(ExprAssign(XER_SO, XER_SO | over_expr)) - - if has_dot: - flags_update += mn_compute_flags(rvalue, over_expr) - - return ([ ExprAssign(rd, rvalue) ] + flags_update), [] - - -def mn_do_eqv(ir, instr, ra, rs, rb): - rvalue = ~(rs ^ rb) - ret = [ ExprAssign(ra, rvalue) ] - - if instr.name[-1] == '.': - ret += mn_compute_flags(rvalue) - - return ret, [] - -def mn_do_exts(ir, instr, ra, rs): - if instr.name[4] == 'B': - size = 8 - elif instr.name[4] == 'H': - size = 16 - else: - assert False - - rvalue = rs[0:size].signExtend(32) - ret = [ ExprAssign(ra, rvalue) ] - - if instr.name[-1] == '.': - ret += mn_compute_flags(rvalue) - - return ret, [] - -def byte_swap(expr): - nbytes = expr.size // 8 - bytes = [ expr[i*8:i*8+8] for i in range(nbytes - 1, -1, -1) ] - return ExprCompose(bytes) - -def mn_do_load(ir, instr, arg1, arg2, arg3=None): - assert instr.name[0] == 'L' - - ret = [] - - if instr.name[1] == 'M': - return mn_do_lmw(ir, instr, arg1, arg2) - elif instr.name[1] == 'S': - raise RuntimeError("LSWI, and LSWX need implementing") - - size = {'B': 8, 'H': 16, 'W': 32}[instr.name[1]] - - has_a = False - has_b = False - has_u = False - is_lwarx = False - - for l in instr.name[2:]: - if l == 'A': - has_a = True - elif l == 'B': - has_b = True - elif l == 'U': - has_u = True - elif l == 'X' or l == 'Z': - pass # Taken care of earlier - elif l == 'R' and not has_b: - is_lwarx = True - else: - assert False - - if arg3 is None: - assert isinstance(arg2, ExprMem) - - address = arg2.ptr - else: - address = arg2 + arg3 - - src = ExprMem(address, size) - - if has_b: - src = byte_swap(src) - - if has_a: - src = src.signExtend(32) - else: - src = src.zeroExtend(32) - - ret.append(ExprAssign(arg1, src)) - if has_u: - if arg3 is None: - ret.append(ExprAssign(arg2.ptr.args[0], address)) - else: - ret.append(ExprAssign(arg2, address)) - - if is_lwarx: - ret.append(ExprAssign(reserve, ExprInt(1, 1))) - ret.append(ExprAssign(reserve_address, address)) # XXX should be the PA - - return ret, [] - -def mn_do_lmw(ir, instr, rd, src): - ret = [] - address = src.arg - ri = int(rd.name[1:],10) - i = 0 - while ri <= 31: - ret.append(ExprAssign(all_regs_ids_byname["R%d" % ri], - ExprMem(address + ExprInt(i, 32), 32))) - ri += 1 - i += 4 - - return ret, [] - -def mn_do_lswi(ir, instr, rd, ra, nb): - if nb == 0: - nb = 32 - i = 32 - raise RuntimeError("%r not implemented" % instr) - -def mn_do_lswx(ir, instr, rd, ra, nb): - raise RuntimeError("%r not implemented" % instr) - -def mn_do_mcrf(ir, instr, crfd, crfs): - ret = [] - - for bit in [ 'LT', 'GT', 'EQ', 'SO' ]: - d = all_regs_ids_byname["%s_%s" % (crfd, bit)] - s = all_regs_ids_byname["%s_%s" % (crfs, bit)] - ret.append(ExprAssign(d, s)) - - return ret, [] - -def mn_do_mcrxr(ir, instr, crfd): - ret = [] - - for (bit, val) in [ ('LT', XER_SO), ('GT', XER_OV), ('EQ', XER_CA), - ('SO', ExprInt(0, 1)) ]: - ret.append(ExprAssign(all_regs_ids_byname["%s_%s" % (crfd, bit)], val)) - - return ret, [] - -def mn_do_mfcr(ir, instr, rd): - return ([ ExprAssign(rd, ExprCompose(*[ all_regs_ids_byname["CR%d_%s" % (i, b)] - for i in range(7, -1, -1) - for b in ['SO', 'EQ', 'GT', 'LT']]))], - []) - -@sbuild.parse -def mn_mfmsr(rd): - rd = MSR - -def mn_mfspr(ir, instr, arg1, arg2): - sprid = arg2.arg.arg - gprid = int(arg1.name[1:]) - if sprid in spr_dict: - return [ ExprAssign(arg1, spr_dict[sprid]) ], [] - elif sprid == 1: # XER - return [ ExprAssign(arg1, ExprCompose(XER_BC, ExprInt(0, 22), - XER_CA, XER_OV, XER_SO)) ], [] - else: - return [ ExprAssign(spr_access, - ExprInt(((sprid << SPR_ACCESS_SPR_OFF) | - (gprid << SPR_ACCESS_GPR_OFF)), 32)), - ExprAssign(exception_flags, ExprInt(EXCEPT_SPR_ACCESS, 32)) ], [] - -def mn_mtcrf(ir, instr, crm, rs): - ret = [] - - for i in range(8): - if crm.arg.arg & (1 << (7 - i)): - j = (28 - 4 * i) + 3 - for b in ['LT', 'GT', 'EQ', 'SO']: - ret.append(ExprAssign(all_regs_ids_byname["CR%d_%s" % (i, b)], - rs[j:j+1])) - j -= 1 - - return ret, [] - -def mn_mtmsr(ir, instr, rs): - print("%08x: MSR assigned" % instr.offset) - return [ ExprAssign(MSR, rs) ], [] - -def mn_mtspr(ir, instr, arg1, arg2): - sprid = arg1.arg.arg - gprid = int(arg2.name[1:]) - if sprid in spr_dict: - return [ ExprAssign(spr_dict[sprid], arg2) ], [] - elif sprid == 1: # XER - return [ ExprAssign(XER_SO, arg2[31:32]), - ExprAssign(XER_OV, arg2[30:31]), - ExprAssign(XER_CA, arg2[29:30]), - ExprAssign(XER_BC, arg2[0:7]) ], [] - else: - return [ ExprAssign(spr_access, - ExprInt(((sprid << SPR_ACCESS_SPR_OFF) | - (gprid << SPR_ACCESS_GPR_OFF) | - SPR_ACCESS_IS_WRITE), 32)), - ExprAssign(exception_flags, ExprInt(EXCEPT_SPR_ACCESS, 32)) ], [] - -def mn_do_mul(ir, instr, rd, ra, arg2): - variant = instr.name[3:] - if variant[-1] == '.': - variant = variant[:-2] - - if variant == 'HW': - v1 = ra.signExtend(64) - v2 = arg2.signExtend(64) - shift = 32 - elif variant == 'HWU': - v1 = ra.zeroExtend(64) - v2 = arg2.zeroExtend(64) - shift = 32 - else: - v1 = ra - v2 = arg2 - shift = 0 - - rvalue = ExprOp('*', v1, v2) - if shift != 0: - rvalue = rvalue[shift : shift + 32] - - ret = [ ExprAssign(rd, rvalue) ] - - over_expr = None - if variant[-1] == 'O': - over_expr = ExprCond((rvalue.signExtend(64) ^ - ExprOp('*', v1.signExtend(64), - v2.signExtend(64))), - ExprInt(1, 1), ExprInt(0, 1)) - ret.append(ExprAssign(XER_OV, over_expr)) - ret.append(ExprAssign(XER_SO, XER_SO | over_expr)) - - if instr.name[-1] == '.': - ret += mn_compute_flags(rvalue, over_expr) - - return ret, [] - -def mn_do_nand(ir, instr, ra, rs, rb): - rvalue = ~(rs & rb) - ret = [ ExprAssign(ra, rvalue) ] - - if instr.name[-1] == '.': - ret += mn_compute_flags(rvalue) - - return ret, [] - -def mn_do_neg(ir, instr, rd, ra): - rvalue = -ra - ret = [ ExprAssign(rd, rvalue) ] - has_o = False - - over_expr = None - if instr.name[-1] == 'O' or instr.name[-2] == 'O': - has_o = True - over_expr = ExprCond(ra ^ ExprInt(0x80000000, 32), - ExprInt(0, 1), ExprInt(1, 1)) - ret.append(ExprAssign(XER_OV, over_expr)) - ret.append(ExprAssign(XER_SO, XER_SO | over_expr)) - - if instr.name[-1] == '.': - ret += mn_compute_flags(rvalue, over_expr) - - return ret, [] - -def mn_do_nor(ir, instr, ra, rs, rb): - - rvalue = ~(rs | rb) - ret = [ ExprAssign(ra, rvalue) ] - - if instr.name[-1] == '.': - ret += mn_compute_flags(rvalue) - - return ret, [] - -def mn_do_or(ir, instr, ra, rs, arg2): - if len(instr.name) > 2 and instr.name[2] == 'C': - oarg = ~arg2 - else: - oarg = arg2 - - rvalue = rs | oarg - ret = [ ExprAssign(ra, rvalue) ] - - if instr.name[-1] == '.': - ret += mn_compute_flags(rvalue) - - return ret, [] - -def mn_do_rfi(ir, instr): - dest = ExprCompose(ExprInt(0, 2), SRR0[2:32]) - ret = [ ExprAssign(MSR, (MSR & - ~ExprInt(0b1111111101110011, 32) | - ExprCompose(SRR1[0:2], ExprInt(0, 2), - SRR1[4:7], ExprInt(0, 1), - SRR1[8:16], ExprInt(0, 16)))), - ExprAssign(PC, dest), - ExprAssign(ir.IRDst, dest) ] - return ret, [] - -def mn_do_rotate(ir, instr, ra, rs, shift, mb, me): - r = ExprOp('<<<', rs, shift) - if mb <= me: - m = ExprInt(((1 << (32 - mb)) - 1) & ~((1 << (32 - me - 1)) - 1), 32) - else: - m = ExprInt(((1 << (32 - mb)) - 1) | ~((1 << (32 - me - 1)) - 1), 32) - rvalue = r & m - if instr.name[0:6] == 'RLWIMI': - rvalue = rvalue | (ra & ~m) - - ret = [ ExprAssign(ra, rvalue) ] - - if instr.name[-1] == '.': - ret += mn_compute_flags(rvalue) - - return ret, [] - -def mn_do_slw(ir, instr, ra, rs, rb): - - rvalue = ExprCond(rb[5:6], ExprInt(0, 32), - ExprOp('<<', rs, rb & ExprInt(0b11111, 32))) - ret = [ ExprAssign(ra, rvalue) ] - - if instr.name[-1] == '.': - ret += mn_compute_flags(rvalue) - - return ret, [] - -def mn_do_sraw(ir, instr, ra, rs, rb): - rvalue = ExprCond(rb[5:6], ExprInt(0xFFFFFFFF, 32), - ExprOp('a>>', rs, rb & ExprInt(0b11111, 32))) - ret = [ ExprAssign(ra, rvalue) ] - - if instr.name[-1] == '.': - ret += mn_compute_flags(rvalue) - - mask = ExprCond(rb[5:6], ExprInt(0xFFFFFFFF, 32), - (ExprInt(0xFFFFFFFF, 32) >> - (ExprInt(32, 32) - (rb & ExprInt(0b11111, 32))))) - ret.append(ExprAssign(XER_CA, rs.msb() & - ExprCond(rs & mask, ExprInt(1, 1), ExprInt(0, 1)))) - - return ret, [] - -def mn_do_srawi(ir, instr, ra, rs, imm): - rvalue = ExprOp('a>>', rs, imm) - ret = [ ExprAssign(ra, rvalue) ] - - if instr.name[-1] == '.': - ret += mn_compute_flags(rvalue) - - mask = ExprInt(0xFFFFFFFF >> (32 - imm.arg.arg), 32) - - ret.append(ExprAssign(XER_CA, rs.msb() & - ExprCond(rs & mask, ExprInt(1, 1), ExprInt(0, 1)))) - - return ret, [] - -def mn_do_srw(ir, instr, ra, rs, rb): - rvalue = rs >> (rb & ExprInt(0b11111, 32)) - ret = [ ExprAssign(ra, rvalue) ] - - if instr.name[-1] == '.': - ret += mn_compute_flags(rvalue) - - return ret, [] - -def mn_do_stmw(ir, instr, rs, dest): - ret = [] - address = dest.arg - ri = int(rs.name[1:],10) - i = 0 - while ri <= 31: - ret.append(ExprAssign(ExprMem(address + ExprInt(i,32), 32), - all_regs_ids_byname["R%d" % ri])) - ri += 1 - i += 4 - - return ret, [] - -def mn_do_store(ir, instr, arg1, arg2, arg3=None): - assert instr.name[0:2] == 'ST' - - ret = [] - additional_ir = [] - - if instr.name[2] == 'S': - raise RuntimeError("STSWI, and STSWX need implementing") - - size = {'B': 8, 'H': 16, 'W': 32}[instr.name[2]] - - has_b = False - has_u = False - is_stwcx = False - - for l in instr.name[3:]: - if l == 'B' or l == 'R': - has_b = True - elif l == 'U': - has_u = True - elif l == 'X' or l == 'Z': - pass # Taken care of earlier - elif l == 'C' or l == '.': - is_stwcx = True - else: - assert False - - if arg3 is None: - assert isinstance(arg2, ExprMem) - - address = arg2.ptr - else: - address = arg2 + arg3 - - dest = ExprMem(address, size) - - src = arg1[0:size] - if has_b: - src = byte_swap(src) - - ret.append(ExprAssign(dest, src)) - if has_u: - if arg3 is None: - ret.append(ExprAssign(arg2.ptr.args[0], address)) - else: - ret.append(ExprAssign(arg2, address)) - - if is_stwcx: - loc_do = ExprLoc(ir.loc_db.add_location(), ir.IRDst.size) - loc_dont = ExprLoc(ir.loc_db.add_location(), ir.IRDst.size) - loc_next = ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) - flags = [ ExprAssign(CR0_LT, ExprInt(0,1)), - ExprAssign(CR0_GT, ExprInt(0,1)), - ExprAssign(CR0_SO, XER_SO)] - ret += flags - ret.append(ExprAssign(CR0_EQ, ExprInt(1,1))) - ret.append(ExprAssign(ir.IRDst, loc_next)) - dont = flags + [ ExprAssign(CR0_EQ, ExprInt(0,1)), - ExprAssign(ir.IRDst, loc_next) ] - additional_ir = [ IRBlock(loc_do, [ AssignBlock(ret) ]), - IRBlock(loc_dont, [ AssignBlock(dont) ]) ] - ret = [ ExprAssign(reserve, ExprInt(0, 1)), - ExprAssign(ir.IRDst, ExprCond(reserve, loc_do, loc_dont)) ] - - return ret, additional_ir - -def mn_do_sub(ir, instr, arg1, arg2, arg3): - assert instr.name[0:4] == 'SUBF' - - flags_update = [] - - has_dot = False - has_c = False - has_e = False - has_o = False - - for l in instr.name[4:]: - if l == '.': - has_dot = True - elif l == 'C': - has_c = True - elif l == 'E': - has_e = True - elif l == 'O': - has_o = True - elif l == 'I' or l == 'M' or l == 'S' or l == 'Z': - pass # Taken care of earlier - else: - assert False - - if has_e: - arg3 = arg3 + XER_CA.zeroExtend(32) - arg2 = arg2 + ExprInt(1, 32) - - rvalue = arg3 - arg2 - - over_expr = None - if has_o: - msb1 = arg2.msb() - msb2 = arg3.msb() - msba = rvalue.msb() - over_expr = (msb1 ^ msb2) & (msb1 ^ msba) - flags_update.append(ExprAssign(XER_OV, over_expr)) - flags_update.append(ExprAssign(XER_SO, XER_SO | over_expr)) - - if has_dot: - flags_update += mn_compute_flags(rvalue, over_expr) - - if has_c or has_e: - carry_expr = ((((arg3 ^ arg2) ^ rvalue) ^ - ((arg3 ^ rvalue) & (arg3 ^ arg2))).msb()) - flags_update.append(ExprAssign(XER_CA, ~carry_expr)) - - return ([ ExprAssign(arg1, rvalue) ] + flags_update), [] - -def mn_do_xor(ir, instr, ra, rs, rb): - rvalue = rs ^ rb - ret = [ ExprAssign(ra, rvalue) ] - - if instr.name[-1] == '.': - ret += mn_compute_flags(rvalue) - - return ret, [] - -def mn_b(ir, instr, arg1, arg2 = None): - if arg2 is not None: - arg1 = arg2 - return [ ExprAssign(PC, arg1), ExprAssign(ir.IRDst, arg1) ], [] - -def mn_bl(ir, instr, arg1, arg2 = None): - if arg2 is not None: - arg1 = arg2 - dst = ir.get_next_instr(instr) - return [ ExprAssign(LR, ExprLoc(dst, 32)), - ExprAssign(PC, arg1), - ExprAssign(ir.IRDst, arg1) ], [] - -def mn_get_condition(instr): - bit = instr.additional_info.bi & 0b11 - cr = instr.args[0].name - return all_regs_ids_byname[cr + '_' + ['LT', 'GT', 'EQ', 'SO'][bit]] - -def mn_do_cond_branch(ir, instr, dest): - bo = instr.additional_info.bo - bi = instr.additional_info.bi - ret = [] - - if bo & 0b00100: - ctr_cond = True - else: - ret.append(ExprAssign(CTR, CTR - ExprInt(1, 32))) - ctr_cond = ExprCond(CTR ^ ExprInt(1, 32), ExprInt(1, 1), ExprInt(0, 1)) - if bo & 0b00010: - ctr_cond = ~ctr_cond - - if (bo & 0b10000): - cond_cond = True - else: - cond_cond = mn_get_condition(instr) - if not (bo & 0b01000): - cond_cond = ~cond_cond - - if ctr_cond != True or cond_cond != True: - if ctr_cond != True: - condition = ctr_cond - if cond_cond != True: - condition = condition & cond_cond - else: - condition = cond_cond - dst = ir.get_next_instr(instr) - dest_expr = ExprCond(condition, dest, - ExprLoc(dst, 32)) - else: - dest_expr = dest - - if instr.name[-1] == 'L' or instr.name[-2:-1] == 'LA': - dst = ir.get_next_instr(instr) - ret.append(ExprAssign(LR, ExprLoc(dst, 32))) - - ret.append(ExprAssign(PC, dest_expr)) - ret.append(ExprAssign(ir.IRDst, dest_expr)) - - return ret, [] - -def mn_do_nop_warn(ir, instr, *args): - print("Warning, instruction %s implemented as NOP" % instr) - return [], [] - -@sbuild.parse -def mn_cmp_signed(arg1, arg2, arg3): - crf_dict[arg1]['LT'] = expr.ExprOp(expr.TOK_INF_SIGNED, arg2, arg3) - crf_dict[arg1]['GT'] = expr.ExprOp(expr.TOK_INF_SIGNED, arg3, arg2) - crf_dict[arg1]['EQ'] = expr.ExprOp(expr.TOK_EQUAL, arg2, arg3) - crf_dict[arg1]['SO'] = XER_SO - -@sbuild.parse -def mn_cmp_unsigned(arg1, arg2, arg3): - crf_dict[arg1]['LT'] = expr.ExprOp(expr.TOK_INF_UNSIGNED, arg2, arg3) - crf_dict[arg1]['GT'] = expr.ExprOp(expr.TOK_INF_UNSIGNED, arg3, arg2) - crf_dict[arg1]['EQ'] = expr.ExprOp(expr.TOK_EQUAL, arg2, arg3) - crf_dict[arg1]['SO'] = XER_SO - -def mn_nop(ir, instr, *args): - return [], [] - -@sbuild.parse -def mn_or(arg1, arg2, arg3): - arg1 = arg2 | arg3 - -@sbuild.parse -def mn_assign(arg1, arg2): - arg2 = arg1 - -def mn_stb(ir, instr, arg1, arg2): - dest = ExprMem(arg2.arg, 8) - return [ExprAssign(dest, ExprSlice(arg1, 0, 8))], [] - -@sbuild.parse -def mn_stwu(arg1, arg2): - arg2 = arg1 - arg1 = arg2.arg - -sem_dir = { - 'B': mn_b, - 'BA': mn_b, - 'BL': mn_bl, - 'BLA': mn_bl, - 'CMPLW': mn_cmp_unsigned, - 'CMPLWI': mn_cmp_unsigned, - 'CMPW': mn_cmp_signed, - 'CMPWI': mn_cmp_signed, - 'CNTLZW': mn_do_cntlzw, - 'CNTLZW.': mn_do_cntlzw, - 'ECIWX': mn_do_nop_warn, - 'ECOWX': mn_do_nop_warn, - 'EIEIO': mn_do_nop_warn, - 'EQV': mn_do_eqv, - 'EQV.': mn_do_eqv, - 'ICBI': mn_do_nop_warn, - 'ISYNC': mn_do_nop_warn, - 'MCRF': mn_do_mcrf, - 'MCRXR': mn_do_mcrxr, - 'MFCR': mn_do_mfcr, - 'MFMSR': mn_mfmsr, - 'MFSPR': mn_mfspr, - 'MFSR': mn_do_nop_warn, - 'MFSRIN': mn_do_nop_warn, - 'MFTB': mn_mfmsr, - 'MTCRF': mn_mtcrf, - 'MTMSR': mn_mtmsr, - 'MTSPR': mn_mtspr, - 'MTSR': mn_do_nop_warn, - 'MTSRIN': mn_do_nop_warn, - 'NAND': mn_do_nand, - 'NAND.': mn_do_nand, - 'NOR': mn_do_nor, - 'NOR.': mn_do_nor, - 'RFI': mn_do_rfi, - 'SC': mn_do_nop_warn, - 'SLW': mn_do_slw, - 'SLW.': mn_do_slw, - 'SRAW': mn_do_sraw, - 'SRAW.': mn_do_sraw, - 'SRAWI': mn_do_srawi, - 'SRAWI.': mn_do_srawi, - 'SRW': mn_do_srw, - 'SRW.': mn_do_srw, - 'SYNC': mn_do_nop_warn, - 'TLBIA': mn_do_nop_warn, - 'TLBIE': mn_do_nop_warn, - 'TLBSYNC': mn_do_nop_warn, - 'TW': mn_do_nop_warn, - 'TWI': mn_do_nop_warn, -} - - -class ir_ppc32b(IntermediateRepresentation): - - def __init__(self, loc_db=None): - super(ir_ppc32b, self).__init__(mn_ppc, 'b', loc_db) - self.pc = mn_ppc.getpc() - self.sp = mn_ppc.getsp() - self.IRDst = expr.ExprId('IRDst', 32) - self.addrsize = 32 - - def get_ir(self, instr): - args = instr.args[:] - if instr.name[0:5] in [ 'ADDIS', 'ORIS', 'XORIS', 'ANDIS' ]: - args[2] = ExprInt(args[2].arg << 16, 32) - if instr.name[0:3] == 'ADD': - if instr.name[0:4] == 'ADDZ': - last_arg = ExprInt(0, 32) - elif instr.name[0:4] == 'ADDM': - last_arg = ExprInt(0xFFFFFFFF, 32) - else: - last_arg = args[2] - instr_ir, extra_ir = mn_do_add(self, instr, args[0], args[1], - last_arg) - elif instr.name[0:3] == 'AND': - instr_ir, extra_ir = mn_do_and(self, instr, *args) - elif instr.additional_info.bo_bi_are_defined: - name = instr.name - if name[-1] == '+' or name[-1] == '-': - name = name[0:-1] - if name[-3:] == 'CTR' or name[-4:] == 'CTRL': - arg1 = ExprCompose(ExprInt(0, 2), CTR[2:32]) - elif name[-2:] == 'LR' or name[-3:] == 'LRL': - arg1 = ExprCompose(ExprInt(0, 2), LR[2:32]) - else: - arg1 = args[1] - instr_ir, extra_ir = mn_do_cond_branch(self, instr, arg1) - elif instr.name[0:2] == 'CR': - instr_ir, extra_ir = mn_do_cr(self, instr, *args) - elif instr.name[0:3] == 'DCB': - instr_ir, extra_ir = mn_do_nop_warn(self, instr, *args) - elif instr.name[0:3] == 'DIV': - instr_ir, extra_ir = mn_do_div(self, instr, *args) - elif instr.name[0:4] == 'EXTS': - instr_ir, extra_ir = mn_do_exts(self, instr, *args) - elif instr.name[0] == 'L': - instr_ir, extra_ir = mn_do_load(self, instr, *args) - elif instr.name[0:3] == 'MUL': - instr_ir, extra_ir = mn_do_mul(self, instr, *args) - elif instr.name[0:3] == 'NEG': - instr_ir, extra_ir = mn_do_neg(self, instr, *args) - elif instr.name[0:2] == 'OR': - instr_ir, extra_ir = mn_do_or(self, instr, *args) - elif instr.name[0:2] == 'RL': - instr_ir, extra_ir = mn_do_rotate(self, instr, args[0], args[1], - args[2], args[3].arg.arg, - args[4].arg.arg) - elif instr.name == 'STMW': - instr_ir, extra_ir = mn_do_stmw(self, instr, *args) - elif instr.name[0:2] == 'ST': - instr_ir, extra_ir = mn_do_store(self, instr, *args) - elif instr.name[0:4] == 'SUBF': - if instr.name[0:5] == 'SUBFZ': - last_arg = ExprInt(0) - elif instr.name[0:5] == 'SUBFM': - last_arg = ExprInt(0xFFFFFFFF) - else: - last_arg = args[2] - instr_ir, extra_ir = mn_do_sub(self, instr, args[0], args[1], - last_arg) - elif instr.name[0:3] == 'XOR': - instr_ir, extra_ir = mn_do_xor(self, instr, *args) - else: - instr_ir, extra_ir = sem_dir[instr.name](self, instr, *args) - - return instr_ir, extra_ir - - def get_next_instr(self, instr): - l = self.loc_db.get_or_create_offset_location(instr.offset + 4) - return l - - def get_next_break_loc_key(self, instr): - l = self.loc_db.get_or_create_offset_location(instr.offset + 4) - return l diff --git a/miasm2/arch/sh4/__init__.py b/miasm2/arch/sh4/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/miasm2/arch/sh4/arch.py b/miasm2/arch/sh4/arch.py deleted file mode 100644 index c7ff6ab0..00000000 --- a/miasm2/arch/sh4/arch.py +++ /dev/null @@ -1,999 +0,0 @@ -#-*- coding:utf-8 -*- - -from __future__ import print_function -from builtins import range - -from pyparsing import * -from miasm2.core.cpu import * -from miasm2.expression.expression import * -from collections import defaultdict -import miasm2.arch.sh4.regs as regs_module -from miasm2.arch.sh4.regs import * - - -from miasm2.core.asm_ast import AstInt, AstId, AstMem, AstOp - -jra = ExprId('jra', 32) -jrb = ExprId('jrb', 32) -jrc = ExprId('jrc', 32) - - -# parser helper ########### -PLUS = Suppress("+") -MULT = Suppress("*") -MINUS = Suppress("-") -AND = Suppress("&") -LBRACK = Suppress("[") -RBRACK = Suppress("]") -DEREF = Suppress("@") -COMMA = Suppress(",") -LPARENT = Suppress("(") -RPARENT = Suppress(")") - - -def cb_deref_pcimm(tokens): - return tokens[0] + tokens[1] - - -def cb_pcandimmimm(tokens): - return (tokens[0] & tokens[1]) + tokens[2] - - - -ref_pc = (LPARENT + reg_info_pc.parser + COMMA + base_expr + RPARENT).setParseAction(cb_deref_pcimm) -ref_pcandimm = (LPARENT + reg_info_pc.parser + AND + base_expr + COMMA + base_expr + RPARENT).setParseAction(cb_pcandimmimm) -pcdisp = (reg_info_pc.parser + AND + base_expr + PLUS + base_expr).setParseAction(cb_pcandimmimm) - -PTR = Suppress('PTR') - - -def cb_deref_mem(tokens): - assert len(tokens) == 1 - result = AstMem(tokens[0], 32) - return result - - -def cb_predec(tokens): - assert len(tokens) == 1 - result = AstMem(AstOp('predec', tokens[0]), 32) - return result - - -def cb_postinc(tokens): - assert len(tokens) == 1 - result = AstMem(AstOp('postinc', tokens[0]), 32) - return result - - -def cb_regdisp(tokens): - assert len(tokens) == 2 - result = AstMem(tokens[0] + tokens[1], 32) - return result - - -def cb_regreg(tokens): - assert len(tokens) == 2 - result = AstMem(tokens[0] + tokens[1], 32) - return result - - -deref_pc = (DEREF + ref_pc).setParseAction(cb_deref_mem) -deref_pcimm = (DEREF + ref_pcandimm).setParseAction(cb_deref_mem) - -dgpregs_base = (DEREF + gpregs.parser).setParseAction(cb_deref_mem) -dgpregs_predec = (DEREF + MINUS + gpregs.parser).setParseAction(cb_predec) -dgpregs_postinc = (DEREF + gpregs.parser + PLUS).setParseAction(cb_postinc) - -dgpregs = dgpregs_base | dgpregs_predec | dgpregs_postinc - -d_gpreg_gpreg = (DEREF + LPARENT + gpregs.parser + COMMA + gpregs.parser + RPARENT).setParseAction(cb_regdisp) -dgpregs_p = dgpregs_predec | dgpregs_postinc - - -dgpregs_ir = (DEREF + LPARENT + gpregs.parser + COMMA + base_expr + RPARENT).setParseAction(cb_regdisp) -dgpregs_ir |= d_gpreg_gpreg - -dgbr_imm = (DEREF + LPARENT + reg_info_gbr.parser + COMMA + base_expr + RPARENT).setParseAction(cb_regdisp) - -dgbr_reg = (DEREF + LPARENT + reg_info_gbr.parser + COMMA + gpregs.parser + RPARENT).setParseAction(cb_regreg) - - -class sh4_arg(m_arg): - def asm_ast_to_expr(self, arg, loc_db): - if isinstance(arg, AstId): - if isinstance(arg.name, ExprId): - return arg.name - if arg.name in gpregs.str: - return None - loc_key = loc_db.get_or_create_name_location(arg.name.encode()) - return ExprLoc(loc_key, 32) - if isinstance(arg, AstOp): - args = [self.asm_ast_to_expr(tmp, loc_db) for tmp in arg.args] - if None in args: - return None - return ExprOp(arg.op, *args) - if isinstance(arg, AstInt): - return ExprInt(arg.value, 32) - if isinstance(arg, AstMem): - ptr = self.asm_ast_to_expr(arg.ptr, loc_db) - if ptr is None: - return None - return ExprMem(ptr, arg.size) - return None - - -_, bs_pr = gen_reg_bs('PR', reg_info_pr, (m_reg, sh4_arg,)) -_, bs_r0 = gen_reg_bs('R0', reg_info_r0, (m_reg, sh4_arg,)) -_, bs_sr = gen_reg_bs('SR', reg_info_sr, (m_reg, sh4_arg,)) -_, bs_gbr = gen_reg_bs('GBR', reg_info_gbr, (m_reg, sh4_arg,)) -_, bs_vbr = gen_reg_bs('VBR', reg_info_vbr, (m_reg, sh4_arg,)) -_, bs_ssr = gen_reg_bs('SSR', reg_info_ssr, (m_reg, sh4_arg,)) -_, bs_spc = gen_reg_bs('SPC', reg_info_spc, (m_reg, sh4_arg,)) -_, bs_sgr = gen_reg_bs('SGR', reg_info_sgr, (m_reg, sh4_arg,)) -_, bs_dbr = gen_reg_bs('dbr', reg_info_dbr, (m_reg, sh4_arg,)) -_, bs_mach = gen_reg_bs('mach', reg_info_mach, (m_reg, sh4_arg,)) -_, bs_macl = gen_reg_bs('macl', reg_info_macl, (m_reg, sh4_arg,)) -_, bs_fpul = gen_reg_bs('fpul', reg_info_fpul, (m_reg, sh4_arg,)) -_, bs_fr0 = gen_reg_bs('fr0', reg_info_fr0, (m_reg, sh4_arg,)) - -class sh4_reg(reg_noarg, sh4_arg): - pass - - -class sh4_gpreg(sh4_reg): - reg_info = gpregs - parser = reg_info.parser - - -class sh4_dr(sh4_reg): - reg_info = dregs - parser = reg_info.parser - - -class sh4_bgpreg(sh4_reg): - reg_info = bgpregs - parser = reg_info.parser - - -class sh4_gpreg_noarg(reg_noarg, ): - reg_info = gpregs - parser = reg_info.parser - - -class sh4_freg(sh4_reg): - reg_info = fregs - parser = reg_info.parser - - -class sh4_dgpreg(sh4_arg): - parser = dgpregs_base - - def fromstring(self, text, loc_db, parser_result=None): - start, stop = super(sh4_dgpreg, self).fromstring(text, loc_db, parser_result) - if start is None or self.expr == [None]: - return start, stop - self.expr = ExprMem(self.expr.ptr, self.sz) - return start, stop - - def decode(self, v): - r = gpregs.expr[v] - self.expr = ExprMem(r, self.sz) - return True - - def encode(self): - e = self.expr - if not isinstance(e, ExprMem): - return False - if not isinstance(e.ptr, ExprId): - return False - v = gpregs.expr.index(e.ptr) - self.value = v - return True - - -class sh4_dgpregpinc(sh4_arg): - parser = dgpregs_p - - def fromstring(self, text, loc_db, parser_result=None): - start, stop = super(sh4_dgpregpinc, self).fromstring(text, loc_db, parser_result) - if self.expr == [None]: - return None, None - if not isinstance(self.expr.ptr, ExprOp): - return None, None - if self.expr.ptr.op != self.op: - return None, None - return start, stop - - def decode(self, v): - r = gpregs.expr[v] - e = ExprOp(self.op, r) - self.expr = ExprMem(e, self.sz) - return True - - def encode(self): - e = self.expr - if not isinstance(e, ExprMem): - return False - e = e.ptr - res = match_expr(e, ExprOp(self.op, jra), [jra]) - if not res: - return False - r = res[jra] - if not r in gpregs.expr: - return False - v = gpregs.expr.index(r) - self.value = v - return True - - -class sh4_dgpregpdec(sh4_arg): - parser = dgpregs_postinc - op = "preinc" - - -class sh4_dgpreg_imm(sh4_dgpreg): - parser = dgpregs_ir - - def decode(self, v): - p = self.parent - r = gpregs.expr[v] - s = self.sz - d = ExprInt((p.disp.value * s) // 8, 32) - e = ExprMem(r + d, s) - self.expr = e - return True - - def encode(self): - e = self.expr - p = self.parent - s = self.sz - if not isinstance(e, ExprMem): - return False - if isinstance(e.ptr, ExprId): - v = gpregs.expr.index(e.ptr) - p.disp.value = 0 - elif isinstance(e.ptr, ExprOp): - res = match_expr(e, ExprMem(jra + jrb, self.sz), [jra, jrb]) - if not res: - return False - if not isinstance(res[jra], ExprId): - return False - if not isinstance(res[jrb], ExprInt): - return False - d = int(res[jrb]) - p.disp.value = d // (s // 8) - if not res[jra] in gpregs.expr: - return False - v = gpregs.expr.index(res[jra]) - else: - return False - self.value = v - return True - - -class sh4_imm(imm_noarg, sh4_arg): - parser = base_expr - pass - - -class sh4_simm(sh4_imm): - parser = base_expr - - def decode(self, v): - v = sign_ext(v, self.l, 32) - v = self.decodeval(v) - self.expr = ExprInt(v, 32) - return True - - def encode(self): - if not isinstance(self.expr, ExprInt): - return False - v = int(self.expr) - if (1 << (self.l - 1)) & v: - v = -((0xffffffff ^ v) + 1) - v = self.encodeval(v) - self.value = (v & 0xffffffff) & self.lmask - return True - - -class sh4_dpc16imm(sh4_dgpreg): - parser = deref_pc - - def decode(self, v): - self.expr = ExprMem(PC + ExprInt(v * 2 + 4, 32), 16) - return True - - def calcdisp(self, v): - v = (int(v) - 4) // 2 - if not 0 < v <= 0xff: - return None - return v - - def encode(self): - res = match_expr(self.expr, ExprMem(PC + jra, 16), [jra]) - if not res: - return False - if not isinstance(res[jra], ExprInt): - return False - v = self.calcdisp(res[jra]) - if v is None: - return False - self.value = v - return True - - -class sh4_dgbrimm8(sh4_dgpreg): - parser = dgbr_imm - - def decode(self, v): - s = self.sz - self.expr = ExprMem(GBR + ExprInt((v * s) // 8, 32), s) - return True - - def encode(self): - e = self.expr - s = self.sz - if e == ExprMem(GBR, 32): - self.value = 0 - return True - res = match_expr(self.expr, ExprMem(GBR + jra, s), [jra]) - if not res: - return False - if not isinstance(res[jra], ExprInt): - return False - self.value = int(res[jra]) // (s // 8) - return True - - -class sh4_dpc32imm(sh4_dpc16imm): - parser = deref_pcimm - - def decode(self, v): - self.expr = ExprMem( - (PC & ExprInt(0xfffffffc, 32)) + ExprInt(v * 4 + 4, 32), 32) - return True - - def calcdisp(self, v): - v = (int(v) - 4) // 4 - if not 0 < v <= 0xff: - return None - return v - - def encode(self): - res = match_expr( - self.expr, ExprMem((PC & ExprInt(0xFFFFFFFC, 32)) + jra, 32), [jra]) - if not res: - return False - if not isinstance(res[jra], ExprInt): - return False - v = self.calcdisp(res[jra]) - if v is None: - return False - self.value = v - return True - - -class sh4_pc32imm(sh4_arg): - parser = pcdisp - - def decode(self, v): - self.expr = (PC & ExprInt(0xfffffffc, 32)) + ExprInt(v * 4 + 4, 32) - return True - - def encode(self): - res = match_expr(self.expr, (PC & ExprInt(0xfffffffc, 32)) + jra, [jra]) - if not res: - return False - if not isinstance(res[jra], ExprInt): - return False - v = (int(res[jra]) - 4) // 4 - if v is None: - return False - self.value = v - return True - -class additional_info(object): - - def __init__(self): - self.except_on_instr = False - - -class instruction_sh4(instruction): - __slots__ = [] - delayslot = 0 - - def __init__(self, *args, **kargs): - super(instruction_sh4, self).__init__(*args, **kargs) - - def dstflow(self): - return self.name.startswith('J') - - @staticmethod - def arg2str(expr, index=None, loc_db=None): - if isinstance(expr, ExprId) or isinstance(expr, ExprInt): - return str(expr) - elif expr.is_loc(): - if loc_db is not None: - return loc_db.pretty_str(expr.loc_key) - else: - return str(expr) - assert(isinstance(expr, ExprMem)) - ptr = expr.ptr - - if isinstance(ptr, ExprOp): - if ptr.op == "predec": - s = '-%s' % ptr.args[0] - elif ptr.op == "postinc": - s = '%s+' % ptr.args[0] - else: - s = ','.join( - str(x).replace('(', '').replace(')', '') - for x in ptr.args - ) - s = "(%s)"%s - s = "@%s" % s - elif isinstance(ptr, ExprId): - s = "@%s" % ptr - else: - raise NotImplementedError('zarb arg2str') - return s - - - """ - def dstflow2label(self, loc_db): - e = self.args[0] - if not isinstance(e, ExprInt): - return - if self.name == 'BLX': - ad = e.arg+8+self.offset - else: - ad = e.arg+8+self.offset - l = loc_db.get_or_create_offset_location(ad) - s = ExprId(l, e.size) - self.args[0] = s - """ - - def breakflow(self): - if self.name.startswith('J'): - return True - return False - - def is_subcall(self): - return self.name == 'JSR' - - def getdstflow(self, loc_db): - return [self.args[0]] - - def splitflow(self): - return self.name == 'JSR' - - def get_symbol_size(self, symbol, loc_db): - return 32 - - def fixDstOffset(self): - e = self.args[0] - if self.offset is None: - raise ValueError('symbol not resolved %s' % l) - if not isinstance(e, ExprInt): - log.debug('dyn dst %r', e) - return - off = e.arg - (self.offset + 4 + self.l) - print(hex(off)) - if int(off % 4): - raise ValueError('strange offset! %r' % off) - self.args[0] = ExprInt(off, 32) - print('final', self.args[0]) - - def get_args_expr(self): - args = [a for a in self.args] - return args - - -class mn_sh4(cls_mn): - bintree = {} - regs = regs_module - num = 0 - all_mn = [] - all_mn_mode = defaultdict(list) - all_mn_name = defaultdict(list) - all_mn_inst = defaultdict(list) - pc = PC - # delayslot: - # http://resource.renesas.com/lib/eng/e_learnig/sh4/13/index.html - delayslot = 0 # unit is instruction instruction - instruction = instruction_sh4 - - def additional_info(self): - info = additional_info() - return info - - @classmethod - def getbits(cls, bs, attrib, start, n): - if not n: - return 0 - o = 0 - if n > bs.getlen() * 8: - raise ValueError('not enough bits %r %r' % (n, len(bs.bin) * 8)) - while n: - i = start // 8 - c = cls.getbytes(bs, i) - if not c: - raise IOError - c = ord(c) - r = 8 - start % 8 - c &= (1 << r) - 1 - l = min(r, n) - c >>= (r - l) - o <<= l - o |= c - n -= l - start += l - return o - - @classmethod - def getbytes(cls, bs, offset, l=1): - out = b"" - for _ in range(l): - n_offset = (offset & ~1) + 1 - offset % 2 - out += bs.getbytes(n_offset, 1) - offset += 1 - return out - - @classmethod - def check_mnemo(cls, fields): - l = sum([x.l for x in fields]) - assert l == 16, "len %r" % l - - @classmethod - def getmn(cls, name): - return name.upper().replace('_', '.') - - @classmethod - def gen_modes(cls, subcls, name, bases, dct, fields): - dct['mode'] = None - return [(subcls, name, bases, dct, fields)] - - def value(self, mode): - v = super(mn_sh4, self).value(mode) - return [x[::-1] for x in v] - - -class bs_dr0gbr(sh4_dgpreg): - parser = dgbr_reg - - def decode(self, v): - self.expr = ExprMem(GBR + R0, 8) - return True - - def encode(self): - return self.expr == ExprMem(GBR + R0, 8) - - -class bs_dr0gp(sh4_dgpreg): - parser = d_gpreg_gpreg - - def decode(self, v): - self.expr = ExprMem(gpregs.expr[v] + R0, self.sz) - return True - - def encode(self): - res = match_expr(self.expr, ExprMem(R0 + jra, self.sz), [jra]) - if not res: - return False - r = res[jra] - if not r in gpregs.expr: - return False - self.value = gpregs.expr.index(r) - return True - - -class bs_dgpreg(sh4_dgpreg): - parser = dgpregs_base - - -rn = bs(l=4, cls=(sh4_gpreg,), fname="rn") -rm = bs(l=4, cls=(sh4_gpreg,), fname="rm") - - -d08_rn = bs(l=4, cls=(sh4_dgpreg,), fname="rn", sz = 8) -d16_rn = bs(l=4, cls=(sh4_dgpreg,), fname="rn", sz = 16) -d32_rn = bs(l=4, cls=(sh4_dgpreg,), fname="rn", sz = 32) -d08_rm = bs(l=4, cls=(sh4_dgpreg,), fname="rm", sz = 8) -d16_rm = bs(l=4, cls=(sh4_dgpreg,), fname="rm", sz = 16) -d32_rm = bs(l=4, cls=(sh4_dgpreg,), fname="rm", sz = 32) - - -brm = bs(l=3, cls=(sh4_bgpreg,), fname="brm") -brn = bs(l=3, cls=(sh4_bgpreg,), fname="brn") - -d08rnimm = bs(l=4, fname="rn", cls=(sh4_dgpreg_imm,), sz = 8) -d16rnimm = bs(l=4, fname="rn", cls=(sh4_dgpreg_imm,), sz = 16) -d32rnimm = bs(l=4, fname="rn", cls=(sh4_dgpreg_imm,), sz = 32) - -d08rmimm = bs(l=4, fname="rm", cls=(sh4_dgpreg_imm,), sz = 8) -d16rmimm = bs(l=4, fname="rm", cls=(sh4_dgpreg_imm,), sz = 16) -d32rmimm = bs(l=4, fname="rm", cls=(sh4_dgpreg_imm,), sz = 32) - -btype = bs(l=4, fname="btype", order=-1) - -s08imm = bs(l=8, cls=(sh4_simm,), fname="imm") -s12imm = bs(l=12, cls=(sh4_simm,), fname="imm") -dpc16imm = bs(l=8, cls=(sh4_dpc16imm,), fname="pcimm", sz=16) -dpc32imm = bs(l=8, cls=(sh4_dpc32imm,), fname="pcimm", sz=32) -dimm4 = bs(l=4, fname='disp', order=-1) -d08gbrimm8 = bs(l=8, cls=(sh4_dgbrimm8,), fname='disp', sz=8) -d16gbrimm8 = bs(l=8, cls=(sh4_dgbrimm8,), fname='disp', sz=16) -d32gbrimm8 = bs(l=8, cls=(sh4_dgbrimm8,), fname='disp', sz=32) - -pc32imm = bs(l=8, cls=(sh4_pc32imm,), fname="pcimm") - -d08rnpinc = bs(l=4, cls=(sh4_dgpregpinc,), op='postinc', sz=8, fname="rn") -d08rmpinc = bs(l=4, cls=(sh4_dgpregpinc,), op='postinc', sz=8, fname="rm") - -d16rnpinc = bs(l=4, cls=(sh4_dgpregpinc,), op='postinc', sz=16, fname="rn") -d16rmpinc = bs(l=4, cls=(sh4_dgpregpinc,), op='postinc', sz=16, fname="rm") - -d32rnpinc = bs(l=4, cls=(sh4_dgpregpinc,), op='postinc', sz=32, fname="rn") -d32rmpinc = bs(l=4, cls=(sh4_dgpregpinc,), op='postinc', sz=32, fname="rm") - -d08rnpdec = bs(l=4, cls=(sh4_dgpregpinc,), op='predec', sz=8, fname="rn") -d08rmpdec = bs(l=4, cls=(sh4_dgpregpinc,), op='predec', sz=8, fname="rm") - -d16rnpdec = bs(l=4, cls=(sh4_dgpregpinc,), op='predec', sz=16, fname="rn") -d16rmpdec = bs(l=4, cls=(sh4_dgpregpinc,), op='predec', sz=16, fname="rm") - -d32rnpdec = bs(l=4, cls=(sh4_dgpregpinc,), op='predec', sz=32, fname="rn") -d32rmpdec = bs(l=4, cls=(sh4_dgpregpinc,), op='predec', sz=32, fname="rm") - - -u08imm = bs(l=8, cls=(sh4_imm,), fname="imm") -dr0gbr = bs(l=0, cls=(bs_dr0gbr,), sz=8) - -d08gpreg = bs(l=4, cls=(bs_dgpreg,), sz=8) -d32gpreg = bs(l=4, cls=(bs_dgpreg,), sz=32) - -frn = bs(l=4, cls=(sh4_freg,), fname="frn") -frm = bs(l=4, cls=(sh4_freg,), fname="frm") - -bd08r0gp = bs(l=4, cls=(bs_dr0gp,), sz=8) -bd16r0gp = bs(l=4, cls=(bs_dr0gp,), sz=16) -bd32r0gp = bs(l=4, cls=(bs_dr0gp,), sz=32) - -drn = bs(l=3, cls=(sh4_dr,), fname="drn") -drm = bs(l=3, cls=(sh4_dr,), fname="drm") - - -def addop(name, fields, args=None, alias=False): - dct = {"fields": fields} - dct["alias"] = alias - if args is not None: - dct['args'] = args - type(name, (mn_sh4,), dct) - -addop("mov", [bs('1110'), rn, s08imm], [s08imm, rn]) -addop("mov_w", [bs('1001'), rn, dpc16imm], [dpc16imm, rn]) -addop("mov_l", [bs('1101'), rn, dpc32imm], [dpc32imm, rn]) -addop("mov", [bs('0110', fname="opc"), rn, rm, bs('0011')], [rm, rn]) -addop("mov_b", [bs('0010', fname="opc"), d08_rn, rm, bs('0000')], [rm, d08_rn]) -addop("mov_w", [bs('0010', fname="opc"), d16_rn, rm, bs('0001')], [rm, d16_rn]) -addop("mov_l", [bs('0010', fname="opc"), d32_rn, rm, bs('0010')], [rm, d32_rn]) -addop("mov_b", [bs('0110', fname="opc"), rn, d08_rm, bs('0000')], [d08_rm, rn]) -addop("mov_w", [bs('0110', fname="opc"), rn, d16_rm, bs('0001')], [d16_rm, rn]) -addop("mov_l", [bs('0110', fname="opc"), rn, d32_rm, bs('0010')], [d32_rm, rn]) -addop("mov_b", - [bs('0010', fname="opc"), d08rnpdec, rm, bs('0100')], [rm, d08rnpdec]) -addop("mov_w", - [bs('0010', fname="opc"), d16rnpdec, rm, bs('0101')], [rm, d16rnpdec]) -addop("mov_l", - [bs('0010', fname="opc"), d32rnpdec, rm, bs('0110')], [rm, d32rnpdec]) -addop("mov_b", - [bs('0110', fname="opc"), rn, d08rmpinc, bs('0100')], [rm, d08rnpinc]) -addop("mov_w", - [bs('0110', fname="opc"), rn, d16rmpinc, bs('0101')], [d16rmpinc, rn]) -addop("mov_l", - [bs('0110', fname="opc"), rn, d32rmpinc, bs('0110')], [d32rmpinc, rn]) -addop("mov_b", [bs('10000000', fname='opc'), bs_r0, d08rnimm, dimm4]) -addop("mov_w", [bs('10000001', fname='opc'), bs_r0, d16rnimm, dimm4]) -addop("mov_l", [bs('0001', fname='opc'), d32rnimm, rm, dimm4], [rm, d32rnimm]) -addop("mov_b", [bs('10000100', fname='opc'), d08rmimm, dimm4, bs_r0]) -addop("mov_w", [bs('10000101', fname='opc'), d16rmimm, dimm4, bs_r0]) -addop("mov_l", [bs('0101', fname='opc'), rn, d32rmimm, dimm4], [d32rmimm, rn]) -addop("mov_b", - [bs('0000', fname='opc'), bd08r0gp, rm, bs('0100')], [rm, bd08r0gp]) -addop("mov_w", - [bs('0000', fname='opc'), bd16r0gp, rm, bs('0101')], [rm, bd16r0gp]) -addop("mov_l", - [bs('0000', fname='opc'), bd32r0gp, rm, bs('0110')], [rm, bd32r0gp]) -addop("mov_b", - [bs('0000', fname='opc'), rn, bd08r0gp, bs('1100')], [bd08r0gp, rn]) -addop("mov_w", - [bs('0000', fname='opc'), rn, bd16r0gp, bs('1101')], [bd16r0gp, rn]) -addop("mov_l", - [bs('0000', fname='opc'), rn, bd32r0gp, bs('1110')], [bd32r0gp, rn]) - -addop("mov_b", [bs('11000000'), bs_r0, d08gbrimm8]) -addop("mov_w", [bs('11000001'), bs_r0, d16gbrimm8]) -addop("mov_l", [bs('11000010'), bs_r0, d32gbrimm8]) - -addop("mov_b", [bs('11000100'), d08gbrimm8, bs_r0]) -addop("mov_w", [bs('11000101'), d16gbrimm8, bs_r0]) -addop("mov_l", [bs('11000110'), d32gbrimm8, bs_r0]) - -addop("mov", [bs('11000111'), pc32imm, bs_r0]) - -addop("swapb", [bs('0110'), rn, rm, bs('1000')], [rm, rn]) -addop("swapw", [bs('0110'), rn, rm, bs('1001')], [rm, rn]) -addop("xtrct", [bs('0010'), rn, rm, bs('1101')], [rm, rn]) - - -addop("add", [bs('0011'), rn, rm, bs('1100')], [rm, rn]) -addop("add", [bs('0111'), rn, s08imm], [s08imm, rn]) -addop("addc", [bs('0011'), rn, rm, bs('1110')], [rm, rn]) -addop("addv", [bs('0011'), rn, rm, bs('1111')], [rm, rn]) - - -addop("cmpeq", [bs('10001000'), s08imm, bs_r0]) - - -addop("cmpeq", [bs('0011'), rn, rm, bs('0000')], [rm, rn]) -addop("cmphs", [bs('0011'), rn, rm, bs('0010')], [rm, rn]) -addop("cmpge", [bs('0011'), rn, rm, bs('0011')], [rm, rn]) -addop("cmphi", [bs('0011'), rn, rm, bs('0110')], [rm, rn]) -addop("cmpgt", [bs('0011'), rn, rm, bs('0111')], [rm, rn]) - - -addop("cmppz", [bs('0100'), rn, bs('00010001')]) -addop("cmppl", [bs('0100'), rn, bs('00010101')]) -addop("cmpstr", [bs('0010'), rn, rm, bs('1100')], [rm, rn]) - - -addop("div1", [bs('0011'), rn, rm, bs('0100')], [rm, rn]) - -addop("div0s", [bs('0010'), rn, rm, bs('0111')], [rm, rn]) -addop("div0u", [bs('0000000000011001')]) - -addop("dmuls", [bs('0011'), rn, rm, bs('1101')], [rm, rn]) -addop("dmulu", [bs('0011'), rn, rm, bs('0101')], [rm, rn]) - -addop("dt", [bs('0100'), rn, bs('00010000')]) - - -addop("extsb", [bs('0110'), rn, rm, bs('1110')], [rm, rn]) -addop("extsw", [bs('0110'), rn, rm, bs('1111')], [rm, rn]) -addop("extub", [bs('0110'), rn, rm, bs('1100')], [rm, rn]) -addop("extuw", [bs('0110'), rn, rm, bs('1101')], [rm, rn]) - -addop("mac_l", [bs('0000', fname='opc'), d32rnpinc, - d32rmpinc, bs('1111')], [d32rmpinc, d32rnpinc]) -addop("mac_w", [bs('0100', fname='opc'), d16rnpinc, - d16rmpinc, bs('1111')], [d16rmpinc, d16rnpinc]) - -addop("mull", [bs('0000'), rn, rm, bs('0111')], [rm, rn]) -addop("mulsw", [bs('0010'), rn, rm, bs('1111')], [rm, rn]) -addop("muluw", [bs('0010'), rn, rm, bs('1110')], [rm, rn]) - -addop("neg", [bs('0110'), rn, rm, bs('1011')], [rm, rn]) -addop("negc", [bs('0110'), rn, rm, bs('1010')], [rm, rn]) - -addop("sub", [bs('0011'), rn, rm, bs('1000')], [rm, rn]) -addop("subc", [bs('0011'), rn, rm, bs('1010')], [rm, rn]) -addop("subv", [bs('0011'), rn, rm, bs('1011')], [rm, rn]) - -addop("and", [bs('0010'), rn, rm, bs('1001')], [rm, rn]) -addop("and", [bs('11001001'), u08imm, bs_r0]) -addop("and_b", [bs('11001101'), u08imm, dr0gbr]) - -addop("not", [bs('0110'), rn, rm, bs('0111')], [rm, rn]) - -addop("or", [bs('0010'), rn, rm, bs('1011')], [rm, rn]) - -addop("or", [bs('11001011'), u08imm, bs_r0]) -addop("or_b", [bs('11001111'), u08imm, dr0gbr]) - -addop("tas_b", [bs('0100'), d08gpreg, bs('00011011')]) -addop("tst", [bs('0010'), rn, rm, bs('1000')], [rm, rn]) -addop("tst", [bs('11001000'), u08imm, bs_r0]) -addop("tst_b", [bs('11001100'), u08imm, dr0gbr]) - - -addop("xor", [bs('0010'), rn, rm, bs('1010')], [rm, rn]) -addop("xor", [bs('11001010'), u08imm, bs_r0]) -addop("xor_b", [bs('11001110'), u08imm, dr0gbr]) - -addop("rotl", [bs('0100'), rn, bs('00000100')]) -addop("rotr", [bs('0100'), rn, bs('00000101')]) -addop("rotcl", [bs('0100'), rn, bs('00100100')]) -addop("rotcr", [bs('0100'), rn, bs('00100101')]) - -addop("shad", [bs('0100'), rn, rm, bs('1100')], [rm, rn]) -addop("shal", [bs('0100'), rn, bs('00100000')]) -addop("shar", [bs('0100'), rn, bs('00100001')]) -addop("shld", [bs('0100'), rn, rm, bs('1101')], [rm, rn]) - -addop("shll", [bs('0100'), rn, bs('00000000')]) -addop("shlr", [bs('0100'), rn, bs('00000001')]) -addop("shll2", [bs('0100'), rn, bs('00001000')]) -addop("shlr2", [bs('0100'), rn, bs('00001001')]) -addop("shll8", [bs('0100'), rn, bs('00011000')]) -addop("shlr8", [bs('0100'), rn, bs('00011001')]) -addop("shll16", [bs('0100'), rn, bs('00101000')]) -addop("shlr16", [bs('0100'), rn, bs('00101001')]) - - -addop("bf", [bs('10001011'), s08imm]) -""" - def splitflow(self): - return True - def breakflow(self): - return True - def dstflow(self): - return True - def dstflow2label(self, loc_db): - e = self.args[0].expr - ad = e.arg*2+4+self.offset - l = loc_db.get_or_create_offset_location(ad) - s = ExprId(l, e.size) - self.args[0].expr = s -""" - -addop("bfs", [bs('10001111'), s08imm]) -""" - delayslot = 1 -""" -addop("bt", [bs('10001001'), s08imm]) - -addop("bts", [bs('10001101'), s08imm]) - -addop("bra", [bs('1010'), s12imm]) -""" - delayslot = 1 - def breakflow(self): - return True - def dstflow(self): - return True - def dstflow2label(self, loc_db): - e = self.args[0].expr - ad = e.arg*2+4+self.offset - l = loc_db.get_or_create_offset_location(ad) - s = ExprId(l, e.size) - self.args[0].expr = s -""" - -addop("braf", [bs('0000'), rn, bs('00100011')]) -""" - delayslot = 1 - def breakflow(self): - return True - def dstflow(self): - return True -""" -addop("bsr", [bs('1011'), s12imm]) - -addop("bsrf", [bs('0000'), rn, bs('00000011')]) -""" - delayslot = 1 - def breakflow(self): - return True - def is_subcall(self): - return True - def splitflow(self): - return True -""" - -addop("jmp_l", [bs('0100'), d32gpreg, bs('00101011')]) -""" - delayslot = 1 - def breakflow(self): - return True -""" - -addop("jsr_l", [bs('0100'), d32gpreg, bs('00001011')]) -""" - delayslot = 1 - def breakflow(self): - return True - def is_subcall(self): - return True - def splitflow(self): - return True -""" - -addop("rts", [bs('0000000000001011')]) -""" - delayslot = 1 - def breakflow(self): - return True -""" -addop("clrmac", [bs('0000000000101000')]) -addop("clrs", [bs('0000000001001000')]) -addop("clrt", [bs('0000000000001000')]) - - -addop("ldc", [bs('0100'), rm, bs_sr, bs('00001110')]) -addop("ldc", [bs('0100'), rm, bs_gbr, bs('00011110')]) -addop("ldc", [bs('0100'), rm, bs_vbr, bs('00101110')]) -addop("ldc", [bs('0100'), rm, bs_ssr, bs('00111110')]) -addop("ldc", [bs('0100'), rm, bs_spc, bs('01001110')]) -addop("ldc", [bs('0100'), rm, bs_dbr, bs('11111010')]) -addop("ldc", [bs('0100'), rm, bs('1'), brn, bs('1110')], [rm, brn]) -addop("ldc_l", [bs('0100'), d32rmpinc, bs_sr, bs('00000111')]) -addop("ldc_l", [bs('0100'), d32rmpinc, bs_gbr, bs('00010111')]) -addop("ldc_l", [bs('0100'), d32rmpinc, bs_vbr, bs('00100111')]) -addop("ldc_l", [bs('0100'), d32rmpinc, bs_ssr, bs('00110111')]) -addop("ldc_l", [bs('0100'), d32rmpinc, bs_spc, bs('01000111')]) -addop("ldc_l", [bs('0100'), d32rmpinc, bs_dbr, bs('11110110')]) -addop("ldc_l", [bs('0100'), d32rmpinc, bs('1'), brn, bs('0111')]) -addop("lds", [bs('0100'), rm, bs_mach, bs('00001010')]) -addop("lds", [bs('0100'), rm, bs_macl, bs('00011010')]) -addop("lds", [bs('0100'), rm, bs_pr, bs('00101010')]) -addop("lds_l", [bs('0100'), d32rmpinc, bs_mach, bs('00000110')]) -addop("lds_l", [bs('0100'), d32rmpinc, bs_macl, bs('00010110')]) -addop("lds_l", [bs('0100'), d32rmpinc, bs_pr, bs('00100110')]) -addop("ldtlb", [bs('0000000000111000')]) - -addop("movca_l", [bs('0000'), bs_r0, d32gpreg, bs('11000011')]) -addop("nop", [bs('0000000000001001')]) -addop("ocbi_l", [bs('0000'), d32gpreg, bs('10010011')]) -addop("ocbp_l", [bs('0000'), d32gpreg, bs('10100011')]) -addop("ocbwb_l", [bs('0000'), d32gpreg, bs('10110011')]) -addop("pref_l", [bs('0000'), d32gpreg, bs('10000011')]) - - -addop("rte", [bs('0000000000101011')]) -addop("sets", [bs('0000000001011000')]) -addop("sett", [bs('0000000000011000')]) -addop("sleep", [bs('0000000000011011')]) -addop("stc", [bs('0000'), bs_sr, rn, bs('00000010')]) -addop("stc", [bs('0000'), bs_gbr, rn, bs('00010010')]) -addop("stc", [bs('0000'), bs_vbr, rn, bs('00100010')]) -addop("stc", [bs('0000'), bs_ssr, rn, bs('00110010')]) -addop("stc", [bs('0000'), bs_spc, rn, bs('01000010')]) -addop("stc", [bs('0000'), bs_sgr, rn, bs('00111010')]) -addop("stc", [bs('0000'), bs_dbr, rn, bs('11111010')]) -addop("stc", [bs('0000'), rn, bs('1'), brm, bs('0010')], [brm, rn]) - -addop("stc_l", [bs('0100'), bs_sr, d32rmpdec, bs('00000011')]) -addop("stc_l", [bs('0100'), bs_gbr, d32rmpdec, bs('00010011')]) -addop("stc_l", [bs('0100'), bs_vbr, d32rmpdec, bs('00100011')]) -addop("stc_l", [bs('0100'), bs_ssr, d32rmpdec, bs('00110011')]) -addop("stc_l", [bs('0100'), bs_spc, d32rmpdec, bs('01000011')]) -addop("stc_l", [bs('0100'), bs_sgr, d32rmpdec, bs('00110010')]) -addop("stc_l", [bs('0100'), bs_dbr, d32rmpdec, bs('11110010')]) -addop("stc_l", - [bs('0100'), d32rnpdec, bs('1'), brm, bs('0011')], [brm, d32rnpdec]) - -# float -addop("sts", [bs('0000'), bs_mach, rm, bs('00001010')]) -addop("sts", [bs('0000'), bs_macl, rm, bs('00011010')]) -addop("sts", [bs('0000'), bs_pr, rm, bs('00101010')]) -addop("sts_l", [bs('0100'), bs_mach, d32rmpdec, bs('00000010')]) -addop("sts_l", [bs('0100'), bs_macl, d32rmpdec, bs('00010010')]) -addop("sts_l", - [bs('0100'), d32rnpdec, bs_pr, bs('00100010')], [bs_pr, d32rnpdec]) -addop("trapa", [bs('11000011'), u08imm]) - -addop("fldi0", [bs('1111'), frn, bs('10001101')]) -addop("fldi1", [bs('1111'), frn, bs('10011101')]) -addop("fmov", [bs('1111'), frn, frm, bs('1100')], [frm, frn]) -addop("fmov_s", [bs('1111'), frn, d32gpreg, bs('1000')], [d32gpreg, frn]) -addop("fmov_s", [bs('1111'), frn, bd32r0gp, bs('0110')], [bd32r0gp, frn]) -addop("fmov_s", [bs('1111'), frn, d32rmpinc, bs('1001')], [d32rmpinc, frn]) -addop("fmov_s", [bs('1111'), d32gpreg, frm, bs('1010')], [frm, d32gpreg]) -addop("fmov_s", [bs('1111'), d32rnpdec, frm, bs('1011')], [frm, d32rnpdec]) -addop("fmov_s", [bs('1111'), bd32r0gp, frm, bs('0111')], [frm, bd32r0gp]) - -addop("flds", [bs('1111'), frm, bs_fpul, bs('00011101')]) -addop("fsts", [bs('1111'), bs_fpul, frm, bs('00001101')]) -addop("fabs", [bs('1111'), frn, bs('01011101')]) -addop("fadd", [bs('1111'), frn, frm, bs('0000')], [frm, frn]) -addop("fcmpeq", [bs('1111'), frn, frm, bs('0100')], [frm, frn]) -addop("fcmpgt", [bs('1111'), frn, frm, bs('0101')], [frm, frn]) -addop("fdiv", [bs('1111'), frn, frm, bs('0011')], [frm, frn]) - -addop("float", [bs('1111'), bs_fpul, frn, bs('00101101')]) -addop("fmac", [bs('1111'), bs_fr0, frn, frm, bs('1110')], [bs_fr0, frm, frn]) -addop("fmul", [bs('1111'), frn, frm, bs('0010')], [frm, frn]) -addop("fneg", [bs('1111'), frn, bs('01001101')]) -addop("fsqrt", [bs('1111'), frn, bs('01101101')]) -addop("fsub", [bs('1111'), frn, frm, bs('0001')], [frm, frn]) -addop("ftrc", [bs('1111'), frm, bs_fpul, bs('00111101')]) diff --git a/miasm2/arch/sh4/regs.py b/miasm2/arch/sh4/regs.py deleted file mode 100644 index c294eb8c..00000000 --- a/miasm2/arch/sh4/regs.py +++ /dev/null @@ -1,84 +0,0 @@ -from builtins import range -from miasm2.expression.expression import * -from miasm2.core.cpu import reg_info, gen_reg - -# GP -gpregs_str = ['R%d' % r for r in range(0x10)] -gpregs_expr = [ExprId(x, 32) for x in gpregs_str] -gpregs = reg_info(gpregs_str, gpregs_expr) - -bgpregs_str = ['R%d_BANK' % r for r in range(0x8)] -bgpregs_expr = [ExprId(x, 32) for x in bgpregs_str] -bgpregs = reg_info(bgpregs_str, bgpregs_expr) - -fregs_str = ['FR%d' % r for r in range(0x10)] -fregs_expr = [ExprId(x, 32) for x in fregs_str] -fregs = reg_info(fregs_str, fregs_expr) - -dregs_str = ['DR%d' % r for r in range(0x8)] -dregs_expr = [ExprId(x, 32) for x in dregs_str] -dregs = reg_info(dregs_str, dregs_expr) - - -PC, reg_info_pc = gen_reg('PC') -PR, reg_info_pr = gen_reg('PR') -R0, reg_info_r0 = gen_reg('R0') -GBR, reg_info_gbr = gen_reg('GBR') -SR, reg_info_sr = gen_reg('SR') -VBR, reg_info_vbr = gen_reg('VBR') -SSR, reg_info_ssr = gen_reg('SSR') -SPC, reg_info_spc = gen_reg('SPC') -SGR, reg_info_sgr = gen_reg('SGR') -DBR, reg_info_dbr = gen_reg('DBR') -MACH, reg_info_mach = gen_reg('MACH') -MACL, reg_info_macl = gen_reg('MACL') -FPUL, reg_info_fpul = gen_reg('FPUL') -FR0, reg_info_fr0 = gen_reg('FR0') - -R0 = gpregs_expr[0] -R1 = gpregs_expr[1] -R2 = gpregs_expr[2] -R3 = gpregs_expr[3] -R4 = gpregs_expr[4] -R5 = gpregs_expr[5] -R6 = gpregs_expr[6] -R7 = gpregs_expr[7] -R8 = gpregs_expr[8] -R9 = gpregs_expr[9] -R10 = gpregs_expr[10] -R11 = gpregs_expr[11] -R12 = gpregs_expr[12] -R13 = gpregs_expr[13] -R14 = gpregs_expr[14] -R15 = gpregs_expr[15] - - -reg_zf = 'zf' -reg_nf = 'nf' -reg_of = 'of' -reg_cf = 'cf' - -zf = ExprId(reg_zf, size=1) -nf = ExprId(reg_nf, size=1) -of = ExprId(reg_of, size=1) -cf = ExprId(reg_cf, size=1) - - -all_regs_ids = [ - R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, - zf, nf, of, cf, - - PC, PR, R0, GBR, SR, VBR, SSR, SPC, - SGR, DBR, MACH, MACL, FPUL, FR0] - -all_regs_ids_no_alias = all_regs_ids - -all_regs_ids_byname = dict([(x.name, x) for x in all_regs_ids]) - -all_regs_ids_init = [ExprId("%s_init" % x.name, x.size) for x in all_regs_ids] - -regs_init = {} -for i, r in enumerate(all_regs_ids): - regs_init[r] = all_regs_ids_init[i] - -regs_flt_expr = [] diff --git a/miasm2/arch/x86/__init__.py b/miasm2/arch/x86/__init__.py deleted file mode 100644 index bbad893b..00000000 --- a/miasm2/arch/x86/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__all__ = ["arch", "disasm", "regs", "sem"] diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py deleted file mode 100644 index 7a2c371c..00000000 --- a/miasm2/arch/x86/arch.py +++ /dev/null @@ -1,4637 +0,0 @@ -#-*- coding:utf-8 -*- - -from __future__ import print_function -from builtins import range -import re - -from future.utils import viewitems - -from miasm2.core.utils import int_to_byte -from miasm2.expression.expression import * -from pyparsing import * -from miasm2.core.cpu import * -from collections import defaultdict -import miasm2.arch.x86.regs as regs_module -from miasm2.arch.x86.regs import * -from miasm2.core.asm_ast import AstNode, AstInt, AstId, AstMem, AstOp - - -log = logging.getLogger("x86_arch") -console_handler = logging.StreamHandler() -console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) -log.addHandler(console_handler) -log.setLevel(logging.WARN) - -conditional_branch = ["JO", "JNO", "JB", "JAE", - "JZ", "JNZ", "JBE", "JA", - "JS", "JNS", "JPE", "JNP", - #"L", "NL", "NG", "G"] - "JL", "JGE", "JLE", "JG", - "JCXZ", "JECXZ", "JRCXZ"] - -unconditional_branch = ['JMP', 'JMPF'] - -f_isad = "AD" -f_s08 = "S08" -f_u08 = "U08" -f_s16 = "S16" -f_u16 = "U16" -f_s32 = "S32" -f_u32 = "U32" -f_s64 = "S64" -f_u64 = "U64" -f_imm = 'IMM' - -f_imm2size = {f_s08: 8, f_s16: 16, f_s32: 32, f_s64: 64, - f_u08: 8, f_u16: 16, f_u32: 32, f_u64: 64} - - -size2gpregs = {8: gpregs08, 16: gpregs16, - 32: gpregs32, 64: gpregs64} - - -replace_regs64 = { - AL: RAX[:8], CL: RCX[:8], DL: RDX[:8], BL: RBX[:8], - AH: RAX[8:16], CH: RCX[8:16], DH: RDX[8:16], BH: RBX[8:16], - SPL: RSP[0:8], BPL: RBP[0:8], SIL: RSI[0:8], DIL: RDI[0:8], - R8B: R8[0:8], R9B: R9[0:8], R10B: R10[0:8], R11B: R11[0:8], - R12B: R12[0:8], R13B: R13[0:8], R14B: R14[0:8], R15B: R15[0:8], - - AX: RAX[:16], CX: RCX[:16], DX: RDX[:16], BX: RBX[:16], - SP: RSP[:16], BP: RBP[:16], SI: RSI[:16], DI: RDI[:16], - R8W: R8[:16], R9W: R9[:16], R10W: R10[:16], R11W: R11[:16], - R12W: R12[:16], R13W: R13[:16], R14W: R14[:16], R15W: R15[:16], - - EAX: RAX[:32], ECX: RCX[:32], EDX: RDX[:32], EBX: RBX[:32], - ESP: RSP[:32], EBP: RBP[:32], ESI: RSI[:32], EDI: RDI[:32], - R8D: R8[:32], R9D: R9[:32], R10D: R10[:32], R11D: R11[:32], - R12D: R12[:32], R13D: R13[:32], R14D: R14[:32], R15D: R15[:32], - - IP: RIP[:16], EIP: RIP[:32], - - ExprId("ST", 64): float_st0, - ExprId("ST(0)", 64): float_st0, - ExprId("ST(1)", 64): float_st1, - ExprId("ST(2)", 64): float_st2, - ExprId("ST(3)", 64): float_st3, - ExprId("ST(4)", 64): float_st4, - ExprId("ST(5)", 64): float_st5, - ExprId("ST(6)", 64): float_st6, - ExprId("ST(7)", 64): float_st7, - -} - -replace_regs32 = { - AL: EAX[:8], CL: ECX[:8], DL: EDX[:8], BL: EBX[:8], - AH: EAX[8:16], CH: ECX[8:16], DH: EDX[8:16], BH: EBX[8:16], - - AX: EAX[:16], CX: ECX[:16], DX: EDX[:16], BX: EBX[:16], - SP: ESP[:16], BP: EBP[:16], SI: ESI[:16], DI: EDI[:16], - - IP: EIP[:16], - - - ExprId("ST", 64): float_st0, - ExprId("ST(0)", 64): float_st0, - ExprId("ST(1)", 64): float_st1, - ExprId("ST(2)", 64): float_st2, - ExprId("ST(3)", 64): float_st3, - ExprId("ST(4)", 64): float_st4, - ExprId("ST(5)", 64): float_st5, - ExprId("ST(6)", 64): float_st6, - ExprId("ST(7)", 64): float_st7, - -} - -replace_regs16 = { - AL: AX[:8], CL: CX[:8], DL: DX[:8], BL: BX[:8], - AH: AX[8:16], CH: CX[8:16], DH: DX[8:16], BH: BX[8:16], - - AX: AX[:16], CX: CX[:16], DX: DX[:16], BX: BX[:16], - SP: SP[:16], BP: BP[:16], SI: SI[:16], DI: DI[:16], - - - ExprId("ST", 64): float_st0, - ExprId("ST(0)", 64): float_st0, - ExprId("ST(1)", 64): float_st1, - ExprId("ST(2)", 64): float_st2, - ExprId("ST(3)", 64): float_st3, - ExprId("ST(4)", 64): float_st4, - ExprId("ST(5)", 64): float_st5, - ExprId("ST(6)", 64): float_st6, - ExprId("ST(7)", 64): float_st7, - -} - -replace_regs = {16: replace_regs16, - 32: replace_regs32, - 64: replace_regs64} - - -segm2enc = {CS: 1, SS: 2, DS: 3, ES: 4, FS: 5, GS: 6} -enc2segm = dict((value, key) for key, value in viewitems(segm2enc)) - -segm_info = reg_info_dct(enc2segm) - - - -enc2crx = { - 0: cr0, - 1: cr1, - 2: cr2, - 3: cr3, - 4: cr4, - 5: cr5, - 6: cr6, - 7: cr7, -} - -crx_info = reg_info_dct(enc2crx) - - -enc2drx = { - 0: dr0, - 1: dr1, - 2: dr2, - 3: dr3, - 4: dr4, - 5: dr5, - 6: dr6, - 7: dr7, -} - -drx_info = reg_info_dct(enc2drx) - - - -# parser helper ########### -PLUS = Suppress("+") -MULT = Suppress("*") - -COLON = Suppress(":") - - -LBRACK = Suppress("[") -RBRACK = Suppress("]") - - -gpreg = ( - gpregs08.parser | - gpregs08_64.parser | - gpregs16.parser | - gpregs32.parser | - gpregs64.parser | - gpregs_xmm.parser | - gpregs_mm.parser | - gpregs_bnd.parser -) - - - - -def cb_deref_segmoff(tokens): - assert len(tokens) == 2 - return AstOp('segm', tokens[0], tokens[1]) - - -def cb_deref_base_expr(tokens): - tokens = tokens[0] - assert isinstance(tokens, AstNode) - addr = tokens - return addr - - -deref_mem_ad = (LBRACK + base_expr + RBRACK).setParseAction(cb_deref_base_expr) - -deref_ptr = (base_expr + COLON + base_expr).setParseAction(cb_deref_segmoff) - - -PTR = Suppress('PTR') - -FAR = Suppress('FAR') - - -BYTE = Literal('BYTE') -WORD = Literal('WORD') -DWORD = Literal('DWORD') -QWORD = Literal('QWORD') -TBYTE = Literal('TBYTE') -XMMWORD = Literal('XMMWORD') - -MEMPREFIX2SIZE = {'BYTE': 8, 'WORD': 16, 'DWORD': 32, - 'QWORD': 64, 'TBYTE': 80, 'XMMWORD': 128} - -SIZE2MEMPREFIX = dict((value, key) for key, value in viewitems(MEMPREFIX2SIZE)) - -def cb_deref_mem(tokens): - if len(tokens) == 2: - s, ptr = tokens - assert isinstance(ptr, AstNode) - return AstMem(ptr, MEMPREFIX2SIZE[s]) - elif len(tokens) == 3: - s, segm, ptr = tokens - return AstMem(AstOp('segm', segm, ptr), MEMPREFIX2SIZE[s]) - raise ValueError('len(tokens) > 3') - -mem_size = (BYTE | DWORD | QWORD | WORD | TBYTE | XMMWORD) -deref_mem = (mem_size + PTR + Optional((base_expr + COLON))+ deref_mem_ad).setParseAction(cb_deref_mem) - - -rmarg = ( - gpregs08.parser | - gpregs08_64.parser | - gpregs16.parser | - gpregs32.parser | - gpregs64.parser | - gpregs_mm.parser | - gpregs_xmm.parser | - gpregs_bnd.parser -) - -rmarg |= deref_mem - - -mem_far = FAR + deref_mem - - -cl_or_imm = r08_ecx.parser -cl_or_imm |= base_expr - - - -class x86_arg(m_arg): - def asm_ast_to_expr(self, value, loc_db, size_hint=None, fixed_size=None): - if size_hint is None: - size_hint = self.parent.mode - if fixed_size is None: - fixed_size = set() - if isinstance(value, AstId): - if value.name in all_regs_ids_byname: - reg = all_regs_ids_byname[value.name] - fixed_size.add(reg.size) - return reg - if isinstance(value.name, ExprId): - fixed_size.add(value.name.size) - return value.name - if value.name in MEMPREFIX2SIZE: - return None - if value.name in ["FAR"]: - return None - - loc_key = loc_db.get_or_create_name_location(value.name.encode()) - return ExprLoc(loc_key, size_hint) - if isinstance(value, AstOp): - # First pass to retrieve fixed_size - if value.op == "segm": - segm = self.asm_ast_to_expr(value.args[0], loc_db) - ptr = self.asm_ast_to_expr(value.args[1], loc_db, None, fixed_size) - return ExprOp('segm', segm, ptr) - args = [self.asm_ast_to_expr(arg, loc_db, None, fixed_size) for arg in value.args] - if len(fixed_size) == 0: - # No fixed size - pass - elif len(fixed_size) == 1: - # One fixed size, regen all - size = list(fixed_size)[0] - args = [self.asm_ast_to_expr(arg, loc_db, size, fixed_size) for arg in value.args] - else: - raise ValueError("Size conflict") - if None in args: - return None - return ExprOp(value.op, *args) - if isinstance(value, AstInt): - if 1 << size_hint < value.value: - size_hint *= 2 - return ExprInt(value.value, size_hint) - if isinstance(value, AstMem): - fixed_size.add(value.size) - ptr = self.asm_ast_to_expr(value.ptr, loc_db, None, set()) - if ptr is None: - return None - return ExprMem(ptr, value.size) - return None - -class r_al(reg_noarg, x86_arg): - reg_info = r08_eax - parser = reg_info.parser - - -class r_ax(reg_noarg, x86_arg): - reg_info = r16_eax - parser = reg_info.parser - - -class r_dx(reg_noarg, x86_arg): - reg_info = r16_edx - parser = reg_info.parser - - -class r_eax(reg_noarg, x86_arg): - reg_info = r32_eax - parser = reg_info.parser - - -class r_rax(reg_noarg, x86_arg): - reg_info = r64_eax - parser = reg_info.parser - - -class r_cl(reg_noarg, x86_arg): - reg_info = r08_ecx - parser = reg_info.parser - - -invmode = {16: 32, 32: 16} - - -def opmode_prefix(mode): - size, opmode, admode = mode - if size in [16, 32]: - if opmode: - return invmode[size] - else: - return size - elif size == 64: - if opmode: - return 16 - else: - return 32 - raise NotImplementedError('not fully functional') - - -def admode_prefix(mode): - size, opmode, admode = mode - if size in [16, 32]: - if admode: - return invmode[size] - else: - return size - elif size == 64: - return 64 - raise NotImplementedError('not fully functional') - - -def v_opmode_info(size, opmode, rex_w, stk): - if size in [16, 32]: - if opmode: - return invmode[size] - else: - return size - elif size == 64: - # Rex has the maximum priority - # Then opmode - # Then stacker - if rex_w == 1: - return 64 - elif opmode == 1: - return 16 - elif stk: - return 64 - else: - return 32 - - -def v_opmode(p): - stk = hasattr(p, 'stk') - return v_opmode_info(p.mode, p.opmode, p.rex_w.value, stk) - - -def v_admode_info(size, admode): - if size in [16, 32]: - if admode: - return invmode[size] - else: - return size - elif size == 64: - if admode == 1: - return 32 - return 64 - - -def v_admode(p): - return v_admode_info(p.mode, p.admode) - - -def offsize(p): - if p.opmode: - return 16 - else: - return p.mode - - -def get_prefix(s): - g = re.search('(\S+)(\s+)', s) - if not g: - return None, s - prefix, b = g.groups() - return prefix, s[len(prefix) + len(b):] - - -repeat_mn = ["INS", "OUTS", - "MOVSB", "MOVSW", "MOVSD", "MOVSQ", - "SCASB", "SCASW", "SCASD", "SCASQ", - "LODSB", "LODSW", "LODSD", "LODSQ", - "STOSB", "STOSW", "STOSD", "STOSQ", - "CMPSB", "CMPSW", "CMPSD", "CMPSQ", - ] - - -class group(object): - - def __init__(self): - self.value = None - - -class additional_info(object): - - def __init__(self): - self.except_on_instr = False - self.g1 = group() - self.g2 = group() - self.vopmode = None - self.stk = False - self.v_opmode = None - self.v_admode = None - self.prefixed = b'' - - -class instruction_x86(instruction): - __slots__ = [] - delayslot = 0 - - def __init__(self, *args, **kargs): - super(instruction_x86, self).__init__(*args, **kargs) - - def v_opmode(self): - return self.additional_info.v_opmode - - def v_admode(self): - return self.additional_info.v_admode - - def dstflow(self): - if self.name in conditional_branch + unconditional_branch: - return True - if self.name.startswith('LOOP'): - return True - return self.name in ['CALL'] - - def dstflow2label(self, loc_db): - if self.additional_info.g1.value & 14 and self.name in repeat_mn: - return - expr = self.args[0] - if not expr.is_int(): - return - addr = expr.arg + int(self.offset) - loc_key = loc_db.get_or_create_offset_location(addr) - self.args[0] = ExprLoc(loc_key, expr.size) - - def breakflow(self): - if self.name in conditional_branch + unconditional_branch: - return True - if self.name.startswith('LOOP'): - return True - if self.name.startswith('RET'): - return True - if self.name.startswith('INT'): - return True - if self.name.startswith('SYS'): - return True - return self.name in ['CALL', 'HLT', 'IRET', 'IRETD', 'IRETQ', 'ICEBP'] - - def splitflow(self): - if self.name in conditional_branch: - return True - if self.name in unconditional_branch: - return False - if self.name.startswith('LOOP'): - return True - if self.name.startswith('INT'): - return True - if self.name.startswith('SYS'): - return True - return self.name in ['CALL'] - - def setdstflow(self, a): - return - - def is_subcall(self): - return self.name in ['CALL'] - - def getdstflow(self, loc_db): - if self.additional_info.g1.value & 14 and self.name in repeat_mn: - addr = int(self.offset) - loc_key = loc_db.get_or_create_offset_location(addr) - return [ExprLoc(loc_key, self.v_opmode())] - return [self.args[0]] - - def get_symbol_size(self, symbol, loc_db): - return self.mode - - def fixDstOffset(self): - expr = self.args[0] - if self.offset is None: - raise ValueError('symbol not resolved %s' % l) - if not isinstance(expr, ExprInt): - log.warning('dynamic dst %r', expr) - return - self.args[0] = ExprInt(int(expr) - self.offset, self.mode) - - def get_info(self, c): - self.additional_info.g1.value = c.g1.value - self.additional_info.g2.value = c.g2.value - self.additional_info.stk = hasattr(c, 'stk') - self.additional_info.v_opmode = c.v_opmode() - self.additional_info.v_admode = c.v_admode() - self.additional_info.prefix = c.prefix - self.additional_info.prefixed = getattr(c, "prefixed", b"") - - def __str__(self): - return self.to_string() - - def to_string(self, loc_db=None): - o = super(instruction_x86, self).to_string(loc_db) - if self.additional_info.g1.value & 1: - o = "LOCK %s" % o - if self.additional_info.g1.value & 2: - if getattr(self.additional_info.prefixed, 'default', b"") != b"\xF2": - o = "REPNE %s" % o - if self.additional_info.g1.value & 8: - if getattr(self.additional_info.prefixed, 'default', b"") != b"\xF3": - o = "REP %s" % o - elif self.additional_info.g1.value & 4: - if getattr(self.additional_info.prefixed, 'default', b"") != b"\xF3": - o = "REPE %s" % o - return o - - def get_args_expr(self): - args = [] - for a in self.args: - a = a.replace_expr(replace_regs[self.mode]) - args.append(a) - return args - - @staticmethod - def arg2str(expr, index=None, loc_db=None): - if expr.is_id() or expr.is_int(): - o = str(expr) - elif expr.is_loc(): - if loc_db is not None: - o = loc_db.pretty_str(expr.loc_key) - else: - o = str(expr) - elif ((isinstance(expr, ExprOp) and expr.op == 'far' and - isinstance(expr.args[0], ExprMem)) or - isinstance(expr, ExprMem)): - if isinstance(expr, ExprOp): - prefix, expr = "FAR ", expr.args[0] - else: - prefix = "" - sz = SIZE2MEMPREFIX[expr.size] - segm = "" - if expr.is_mem_segm(): - segm = "%s:" % expr.ptr.args[0] - expr = expr.ptr.args[1] - else: - expr = expr.ptr - if isinstance(expr, ExprOp): - s = str(expr).replace('(', '').replace(')', '') - else: - s = str(expr) - o = prefix + sz + ' PTR %s[%s]' % (segm, s) - elif isinstance(expr, ExprOp) and expr.op == 'segm': - o = "%s:%s" % (expr.args[0], expr.args[1]) - else: - raise ValueError('check this %r' % expr) - return "%s" % o - - - -class mn_x86(cls_mn): - name = "x86" - prefix_op_size = False - prefix_ad_size = False - regs = regs_module - all_mn = [] - all_mn_mode = defaultdict(list) - all_mn_name = defaultdict(list) - all_mn_inst = defaultdict(list) - bintree = {} - num = 0 - delayslot = 0 - pc = {16: IP, 32: EIP, 64: RIP} - sp = {16: SP, 32: ESP, 64: RSP} - instruction = instruction_x86 - max_instruction_len = 15 - - @classmethod - def getpc(cls, attrib): - return cls.pc[attrib] - - @classmethod - def getsp(cls, attrib): - return cls.sp[attrib] - - def v_opmode(self): - if hasattr(self, 'stk'): - stk = 1 - else: - stk = 0 - return v_opmode_info(self.mode, self.opmode, self.rex_w.value, stk) - - def v_admode(self): - size, opmode, admode = self.mode, self.opmode, self.admode - if size in [16, 32]: - if admode: - return invmode[size] - else: - return size - elif size == 64: - if admode == 1: - return 32 - return 64 - - def additional_info(self): - info = additional_info() - info.g1.value = self.g1.value - info.g2.value = self.g2.value - info.stk = hasattr(self, 'stk') - info.v_opmode = self.v_opmode() - info.prefixed = b"" - if hasattr(self, 'prefixed'): - info.prefixed = self.prefixed.default - return info - - @classmethod - def check_mnemo(cls, fields): - pass - - @classmethod - def getmn(cls, name): - return name.upper() - - @classmethod - def mod_fields(cls, fields): - prefix = [d_g1, d_g2, d_rex_p, d_rex_w, d_rex_r, d_rex_x, d_rex_b] - return prefix + fields - - @classmethod - def gen_modes(cls, subcls, name, bases, dct, fields): - dct['mode'] = None - return [(subcls, name, bases, dct, fields)] - - @classmethod - def fromstring(cls, text, loc_db, mode): - pref = 0 - prefix, new_s = get_prefix(text) - if prefix == "LOCK": - pref |= 1 - text = new_s - elif prefix == "REPNE" or prefix == "REPNZ": - pref |= 2 - text = new_s - elif prefix == "REPE" or prefix == "REPZ": - pref |= 4 - text = new_s - elif prefix == "REP": - pref |= 8 - text = new_s - c = super(mn_x86, cls).fromstring(text, loc_db, mode) - c.additional_info.g1.value = pref - return c - - @classmethod - def pre_dis(cls, v, mode, offset): - offset_o = offset - pre_dis_info = {'opmode': 0, - 'admode': 0, - 'g1': 0, - 'g2': 0, - 'rex_p': 0, - 'rex_w': 0, - 'rex_r': 0, - 'rex_x': 0, - 'rex_b': 0, - 'prefix': b"", - 'prefixed': b"", - } - while True: - c = v.getbytes(offset) - if c == b'\x66': - pre_dis_info['opmode'] = 1 - elif c == b'\x67': - pre_dis_info['admode'] = 1 - elif c == b'\xf0': - pre_dis_info['g1'] = 1 - elif c == b'\xf2': - pre_dis_info['g1'] = 2 - elif c == b'\xf3': - pre_dis_info['g1'] = 12 - - elif c == b'\x2e': - pre_dis_info['g2'] = 1 - elif c == b'\x36': - pre_dis_info['g2'] = 2 - elif c == b'\x3e': - pre_dis_info['g2'] = 3 - elif c == b'\x26': - pre_dis_info['g2'] = 4 - elif c == b'\x64': - pre_dis_info['g2'] = 5 - elif c == b'\x65': - pre_dis_info['g2'] = 6 - - else: - break - pre_dis_info['prefix'] += c - offset += 1 - if mode == 64 and c in b'@ABCDEFGHIJKLMNO': - x = ord(c) - pre_dis_info['rex_p'] = 1 - pre_dis_info['rex_w'] = (x >> 3) & 1 - pre_dis_info['rex_r'] = (x >> 2) & 1 - pre_dis_info['rex_x'] = (x >> 1) & 1 - pre_dis_info['rex_b'] = (x >> 0) & 1 - offset += 1 - elif pre_dis_info.get('g1', None) == 12 and c in [b'\xa6', b'\xa7', b'\xae', b'\xaf']: - pre_dis_info['g1'] = 4 - return pre_dis_info, v, mode, offset, offset - offset_o - - @classmethod - def get_cls_instance(cls, cc, mode, infos=None): - for opmode in [0, 1]: - for admode in [0, 1]: - c = cc() - c.init_class() - - c.reset_class() - c.add_pre_dis_info() - c.dup_info(infos) - c.mode = mode - c.opmode = opmode - c.admode = admode - - if not hasattr(c, 'stk') and hasattr(c, "fopmode") and c.fopmode.mode == 64: - c.rex_w.value = 1 - yield c - - def post_dis(self): - if self.g2.value: - for a in self.args: - if not isinstance(a.expr, ExprMem): - continue - m = a.expr - a.expr = ExprMem( - ExprOp('segm', enc2segm[self.g2.value], m.ptr), m.size) - return self - - def dup_info(self, infos): - if infos is not None: - self.g1.value = infos.g1.value - self.g2.value = infos.g2.value - - def reset_class(self): - super(mn_x86, self).reset_class() - if hasattr(self, "opmode"): - del(self.opmode) - if hasattr(self, "admode"): - del(self.admode) - - def add_pre_dis_info(self, pre_dis_info=None): - if pre_dis_info is None: - return True - if hasattr(self, "prefixed") and self.prefixed.default == b"\x66": - pre_dis_info['opmode'] = 0 - self.opmode = pre_dis_info['opmode'] - self.admode = pre_dis_info['admode'] - - if hasattr(self, 'no_xmm_pref') and\ - pre_dis_info['prefix'] and\ - pre_dis_info['prefix'][-1] in b'\x66\xf2\xf3': - return False - if (hasattr(self, "prefixed") and - not pre_dis_info['prefix'].endswith(self.prefixed.default)): - return False - if (self.rex_w.value is not None and - self.rex_w.value != pre_dis_info['rex_w']): - return False - else: - self.rex_w.value = pre_dis_info['rex_w'] - self.rex_r.value = pre_dis_info['rex_r'] - self.rex_b.value = pre_dis_info['rex_b'] - self.rex_x.value = pre_dis_info['rex_x'] - self.rex_p.value = pre_dis_info['rex_p'] - - if hasattr(self, 'no_rex') and\ - (self.rex_r.value or self.rex_b.value or - self.rex_x.value or self.rex_p.value): - return False - - - self.g1.value = pre_dis_info['g1'] - self.g2.value = pre_dis_info['g2'] - self.prefix = pre_dis_info['prefix'] - return True - - def post_asm(self, v): - return v - - - def gen_prefix(self): - v = b"" - rex = 0x40 - if self.g1.value is None: - self.g1.value = 0 - if self.g2.value is None: - self.g2.value = 0 - - if self.rex_w.value: - rex |= 0x8 - if self.rex_r.value: - rex |= 0x4 - if self.rex_x.value: - rex |= 0x2 - if self.rex_b.value: - rex |= 0x1 - if rex != 0x40 or self.rex_p.value == 1: - v = int_to_byte(rex) + v - if hasattr(self, 'no_rex'): - return None - - if hasattr(self, 'prefixed'): - v = self.prefixed.default + v - - if self.g1.value & 1: - v = b"\xf0" + v - if self.g1.value & 2: - if hasattr(self, 'no_xmm_pref'): - return None - v = b"\xf2" + v - if self.g1.value & 12: - if hasattr(self, 'no_xmm_pref'): - return None - v = b"\xf3" + v - if self.g2.value: - v = { - 1: b'\x2e', - 2: b'\x36', - 3: b'\x3e', - 4: b'\x26', - 5: b'\x64', - 6: b'\x65' - }[self.g2.value] + v - # mode prefix - if hasattr(self, "admode") and self.admode: - v = b"\x67" + v - - if hasattr(self, "opmode") and self.opmode: - if hasattr(self, 'no_xmm_pref'): - return None - v = b"\x66" + v - return v - - def encodefields(self, decoded): - v = super(mn_x86, self).encodefields(decoded) - prefix = self.gen_prefix() - if prefix is None: - return None - return prefix + v - - def getnextflow(self, loc_db): - raise NotImplementedError('not fully functional') - - def ir_pre_instruction(self): - return [ExprAssign(mRIP[self.mode], - ExprInt(self.offset + self.l, mRIP[self.mode].size))] - - @classmethod - def filter_asm_candidates(cls, instr, candidates): - - cand_same_mode = [] - cand_diff_mode = [] - out = [] - for c, v in candidates: - if (hasattr(c, 'no_xmm_pref') and - (c.g1.value & 2 or c.g1.value & 4 or c.g1.value & 8 or c.opmode)): - continue - if hasattr(c, "fopmode") and v_opmode(c) != c.fopmode.mode: - continue - if hasattr(c, "fadmode") and v_admode(c) != c.fadmode.mode: - continue - # relative dstflow must not have opmode set - # (assign IP instead of EIP for instance) - if (instr.dstflow() and - instr.name not in ["JCXZ", "JECXZ", "JRCXZ"] and - len(instr.args) == 1 and - isinstance(instr.args[0], ExprInt) and c.opmode): - continue - - out.append((c, v)) - candidates = out - for c, v in candidates: - if v_opmode(c) == instr.mode: - cand_same_mode += v - for c, v in candidates: - if v_opmode(c) != instr.mode: - cand_diff_mode += v - cand_same_mode.sort(key=len) - cand_diff_mode.sort(key=len) - return cand_same_mode + cand_diff_mode - - -class bs_modname_size(bs_divert): - prio = 1 - - def divert(self, i, candidates): - out = [] - for candidate in candidates: - cls, name, bases, dct, fields = candidate - fopmode = opmode_prefix( - (dct['mode'], dct['opmode'], dct['admode'])) - mode = dct['mode'] - size, opmode, admode = dct['mode'], dct['opmode'], dct['admode'] - # no mode64 exinstance in name means no 64bit version of mnemo - if mode == 64: - if mode in self.args['name']: - nfields = fields[:] - f, i = getfieldindexby_name(nfields, 'rex_w') - f = bs("1", l=0, cls=(bs_fbit,), fname="rex_w") - osize = v_opmode_info(size, opmode, 1, 0) - nfields[i] = f - nfields = nfields[:-1] - ndct = dict(dct) - if osize in self.args['name']: - ndct['name'] = self.args['name'][osize] - out.append((cls, ndct['name'], bases, ndct, nfields)) - - nfields = fields[:] - nfields = nfields[:-1] - f, i = getfieldindexby_name(nfields, 'rex_w') - f = bs("0", l=0, cls=(bs_fbit,), fname="rex_w") - osize = v_opmode_info(size, opmode, 0, 0) - nfields[i] = f - ndct = dict(dct) - if osize in self.args['name']: - ndct['name'] = self.args['name'][osize] - out.append((cls, ndct['name'], bases, ndct, nfields)) - else: - l = opmode_prefix((dct['mode'], dct['opmode'], dct['admode'])) - osize = v_opmode_info(size, opmode, None, 0) - nfields = fields[:-1] - ndct = dict(dct) - if osize in self.args['name']: - ndct['name'] = self.args['name'][osize] - out.append((cls, ndct['name'], bases, ndct, nfields)) - return out - - -class bs_modname_jecx(bs_divert): - prio = 1 - - def divert(self, i, candidates): - out = [] - for candidate in candidates: - cls, name, bases, dct, fields = candidate - fopmode = opmode_prefix( - (dct['mode'], dct['opmode'], dct['admode'])) - mode = dct['mode'] - size, opmode, admode = dct['mode'], dct['opmode'], dct['admode'] - - nfields = fields[:] - nfields = nfields[:-1] - args = dict(self.args) - ndct = dict(dct) - if mode == 64: - if admode: - ndct['name'] = "JECXZ" - else: - ndct['name'] = "JRCXZ" - elif mode == 32: - if admode: - ndct['name'] = "JCXZ" - else: - ndct['name'] = "JECXZ" - elif mode == 16: - if admode: - ndct['name'] = "JECXZ" - else: - ndct['name'] = "JCXZ" - else: - raise ValueError('unhandled mode') - out.append((cls, ndct['name'], bases, ndct, nfields)) - return out - - -class bs_modname_mode(bs_divert): - prio = 1 - - def divert(self, i, candidates): - out = [] - for candidate in candidates: - cls, name, bases, dct, fields = candidate - fopmode = opmode_prefix( - (dct['mode'], dct['opmode'], dct['admode'])) - size, opmode, admode = dct['mode'], dct['opmode'], dct['admode'] - - mode = dct['mode'] - l = opmode_prefix((dct['mode'], dct['opmode'], dct['admode'])) - osize = v_opmode_info(size, opmode, None, 0) - nfields = fields[:-1] - args = dict(self.args) - ndct = dict(dct) - if mode == 64 or osize == 32: - ndct['name'] = self.args['name'][mode] - else: - ndct['name'] = self.args['name'][16] - out.append((cls, ndct['name'], bases, ndct, nfields)) - return out - - -class x86_imm(imm_noarg): - parser = base_expr - - def decodeval(self, v): - return swap_uint(self.l, v) - - def encodeval(self, v): - return swap_uint(self.l, v) - - -class x86_imm_fix_08(imm_noarg): - parser = base_expr - intsize = 8 - intmask = (1 << intsize) - 1 - - def decodeval(self, v): - return self.ival - - def encode(self): - v = self.expr2int(self.expr) - if v != self.ival: - return False - self.value = 0 - return True - - -class x86_08(x86_imm): - intsize = 8 - intmask = (1 << intsize) - 1 - - -class x86_16(x86_imm): - intsize = 16 - intmask = (1 << intsize) - 1 - - -class x86_32(x86_imm): - intsize = 32 - intmask = (1 << intsize) - 1 - - -class x86_64(x86_imm): - intsize = 64 - intmask = (1 << intsize) - 1 - - -class x86_08_ne(x86_imm): - intsize = 8 - intmask = (1 << intsize) - 1 - - def encode(self): - return True - - def decode(self, v): - v = swap_uint(self.l, v) - p = self.parent - admode = p.v_admode() - value = sign_ext(v, self.intsize, admode) - self.expr = ExprInt(value, admode) - return True - - -class x86_16_ne(x86_08_ne): - intsize = 16 - intmask = (1 << intsize) - 1 - - -class x86_32_ne(x86_08_ne): - intsize = 32 - intmask = (1 << intsize) - 1 - - -class x86_64_ne(x86_08_ne): - intsize = 64 - intmask = (1 << intsize) - 1 - - -class x86_s08to16(x86_imm): - in_size = 8 - out_size = 16 - - def myexpr(self, x): - return ExprInt(x, 16) - - def int2expr(self, v): - return self.myexpr(v) - - def expr2int(self, e): - if not isinstance(e, ExprInt): - return None - v = int(e) - if v & ~((1 << self.l) - 1) != 0: - return None - return v - - def decode(self, v): - v = v & self.lmask - v = self.decodeval(v) - if self.parent.v_opmode() == 64: - self.expr = ExprInt(sign_ext(v, self.in_size, 64), 64) - else: - if (1 << (self.l - 1)) & v: - v = sign_ext(v, self.l, self.out_size) - self.expr = self.myexpr(v) - return True - - def encode(self): - if not isinstance(self.expr, ExprInt): - return False - v = int(self.expr) - opmode = self.parent.v_opmode() - - out_size = self.out_size - if opmode != self.out_size: - if opmode == 32 and self.out_size == 64: - out_size = opmode - if v == sign_ext( - int(v & ((1 << self.in_size) - 1)), self.in_size, out_size): - pass - else: - # test with rex_w - self.parent.rex_w.value = 1 - opmode = self.parent.v_opmode() - out_size = opmode - if (v != sign_ext( - int(v & ((1 << self.in_size) - 1)), - self.in_size, out_size)): - return False - if v != sign_ext( - int(v & ((1 << self.in_size) - 1)), self.in_size, out_size): - return False - v = self.encodeval(v) - self.value = (v & 0xffffffff) & self.lmask - return True - - def decodeval(self, v): - return swap_uint(self.l, v) - - def encodeval(self, v): - return swap_sint(self.l, v) - - -class x86_s08to32(x86_s08to16): - in_size = 8 - out_size = 32 - - def myexpr(self, x): - return ExprInt(x, 32) - - def decode(self, v): - v = v & self.lmask - v = self.decodeval(v) - if self.parent.rex_w.value == 1: - v = ExprInt(sign_ext(v, self.in_size, 64), 64) - else: - v = ExprInt(sign_ext(v, self.in_size, 32), 32) - - self.expr = v - return True - - -class x86_s08to64(x86_s08to32): - in_size = 8 - out_size = 64 - - def myexpr(self, x): - return ExprInt(x, 64) - - -class x86_s32to64(x86_s08to32): - in_size = 32 - out_size = 64 - - def myexpr(self, x): - return ExprInt(x, 64) - - -class bs_eax(x86_arg): - reg_info = r_eax_all - rindex = 0 - parser = reg_info.parser - - def decode(self, v): - p = self.parent - expr = None - if hasattr(p, 'w8') and p.w8.value == 0: - expr = regs08_expr[self.rindex] - else: - expr = size2gpregs[p.v_opmode()].expr[self.rindex] - self.expr = expr - return True - - def encode(self): - self.value = 0 - p = self.parent - expr = self.expr - osize = p.v_opmode() - if hasattr(p, 'w8'): - if p.w8.value is None: - # XXX TODO: priority in w8 erase? - if expr.size == 8: - p.w8.value = 0 - else: - p.w8.value = 1 - if hasattr(p, 'w8') and p.w8.value == 0: - return expr == regs08_expr[self.rindex] - elif p.mode in [16, 32]: - return expr == size2gpregs[osize].expr[self.rindex] - elif p.mode == 64: - if expr == size2gpregs[64].expr[self.rindex]: - p.rex_w.value = 1 - return True - elif expr == size2gpregs[osize].expr[self.rindex]: - return True - return False - return False - -class bs_seg(x86_arg): - reg_info = r_eax_all - rindex = 0 - parser = reg_info.parser - - def decode(self, v): - self.expr = self.reg_info.expr[0] - return True - - def encode(self): - self.value = 0 - return self.expr == self.reg_info.expr[0] - - -class bs_edx(bs_eax): - reg_info = r_edx_all - rindex = 2 - parser = reg_info.parser - - -class bs_st(bs_eax): - reg_info = r_st_all - rindex = 0 - parser = reg_info.parser - - -class bs_cs(bs_seg): - reg_info = r_cs_all - rindex = 0 - parser = reg_info.parser - - -class bs_ds(bs_seg): - reg_info = r_ds_all - rindex = 0 - parser = reg_info.parser - - -class bs_es(bs_seg): - reg_info = r_es_all - rindex = 0 - parser = reg_info.parser - - -class bs_ss(bs_seg): - reg_info = r_ss_all - rindex = 0 - parser = reg_info.parser - - -class bs_fs(bs_seg): - reg_info = r_fs_all - rindex = 0 - parser = reg_info.parser - - -class bs_gs(bs_seg): - reg_info = r_gs_all - rindex = 0 - parser = reg_info.parser - - -class x86_reg_st(reg_noarg, x86_arg): - reg_info = r_st_all - parser = reg_info.parser - - -class bs_sib_scale(bs_divert): - bsname = "sib_scale" - - def divert(self, i, candidates): - out = [] - done = False - for cls, name, bases, dct, fields in candidates: - if (not (admode_prefix( - (dct['mode'], dct['opmode'], dct['admode'])) != 16 and - 'rm' in dct and dct['rm'] == 0b100 and - 'mod' in dct and dct['mod'] != 0b11)): - ndct = dict(dct) - nfields = fields[:] - nfields[i] = None - ndct[self.args['fname']] = None - out.append((cls, ndct['name'], bases, ndct, nfields)) - continue - - nfields = fields[:] - args = dict(self.args) - ndct = dict(dct) - f = bs(**args) - nfields[i] = f - ndct[self.args['fname']] = None - out.append((cls, ndct['name'], bases, ndct, nfields)) - return out - - -class bs_sib_index(bs_sib_scale): - pass - - -class bs_sib_base(bs_sib_scale): - pass - - -class bs_disp(bs_divert): - - def divert(self, i, candidates): - out = [] - done = False - for cls, name, bases, dct, fields in candidates: - ndct = dict(dct) - nfields = fields[:] - if (admode_prefix( - (dct['mode'], dct['opmode'], dct['admode'])) == 16): - if 'mod' in dct and dct['mod'] == 0b00 and \ - 'rm' in dct and dct['rm'] == 0b110: - nfields[i] = bs( - l=16, cls=(x86_16_ne,), fname=self.args['fname']) - ndct[self.args['fname']] = True - out.append((cls, ndct['name'], bases, ndct, nfields)) - continue - elif 'mod' in dct and dct['mod'] == 0b01: - nfields[i] = bs( - l=8, cls=(x86_08_ne,), fname=self.args['fname']) - ndct[self.args['fname']] = True - out.append((cls, ndct['name'], bases, ndct, nfields)) - continue - elif 'mod' in dct and dct['mod'] == 0b10: - nfields[i] = bs( - l=16, cls=(x86_16_ne,), fname=self.args['fname']) - ndct[self.args['fname']] = True - out.append((cls, ndct['name'], bases, ndct, nfields)) - continue - else: - if 'mod' in dct and dct['mod'] == 0b00 and \ - 'rm' in dct and dct['rm'] == 0b101: - nfields[i] = bs( - l=32, cls=(x86_32_ne,), fname=self.args['fname']) - ndct[self.args['fname']] = True - out.append((cls, ndct['name'], bases, ndct, nfields)) - continue - elif 'mod' in dct and dct['mod'] == 0b01: - nfields[i] = bs( - l=8, cls=(x86_08_ne,), fname=self.args['fname']) - ndct[self.args['fname']] = True - out.append((cls, ndct['name'], bases, ndct, nfields)) - continue - elif 'mod' in dct and dct['mod'] == 0b10: - nfields[i] = bs( - l=32, cls=(x86_32_ne,), fname=self.args['fname']) - ndct[self.args['fname']] = True - out.append((cls, ndct['name'], bases, ndct, nfields)) - continue - - nfields[i] = None - ndct[self.args['fname']] = None - out.append((cls, ndct['name'], bases, ndct, nfields)) - return out - - -def getmodrm(c): - return (c >> 6) & 3, (c >> 3) & 7, c & 7 - - -def setmodrm(mod, re, rm): - return ((mod & 3) << 6) | ((re & 7) << 3) | (rm & 7) - - -def sib(c): - return modrm(c) - -db_afs_64 = [] -sib_64_s08_ebp = [] - - -def gen_modrm_form(): - global db_afs_64, sib_64_s08_ebp - ebp = 5 - - sib_s08_ebp = [{f_isad: True} for i in range(0x100)] - sib_u32_ebp = [{f_isad: True} for i in range(0x100)] - sib_u32 = [{f_isad: True} for i in range(0x100)] - - sib_u64 = [] - for rex_x in range(2): - o = [] - for rex_b in range(2): - x = [{f_isad: True} for i in range(0x100)] - o.append(x) - sib_u64.append(o) - - sib_u64_ebp = [] - for rex_x in range(2): - o = [] - for rex_b in range(2): - x = [{f_isad: True} for i in range(0x100)] - o.append(x) - sib_u64_ebp.append(o) - - sib_64_s08_ebp = [] - for rex_x in range(2): - o = [] - for rex_b in range(2): - x = [{f_isad: True} for i in range(0x100)] - o.append(x) - sib_64_s08_ebp.append(o) - - for sib_rez in [sib_s08_ebp, - sib_u32_ebp, - sib_u32, - sib_64_s08_ebp, - sib_u64_ebp, - sib_u64, - ]: - for index in range(0x100): - ss, i, b = getmodrm(index) - - if b == 0b101: - if sib_rez == sib_s08_ebp: - sib_rez[index][f_imm] = f_s08 - sib_rez[index][ebp] = 1 - elif sib_rez == sib_u32_ebp: - sib_rez[index][f_imm] = f_u32 - sib_rez[index][ebp] = 1 - elif sib_rez == sib_u32: - sib_rez[index][f_imm] = f_u32 - elif sib_rez == sib_u64_ebp: - for rex_b in range(2): - for rex_x in range(2): - sib_rez[rex_x][rex_b][index][f_imm] = f_u32 - sib_rez[rex_x][rex_b][index][ebp + 8 * rex_b] = 1 - elif sib_rez == sib_u64: - for rex_b in range(2): - for rex_x in range(2): - sib_rez[rex_x][rex_b][index][f_imm] = f_u32 - elif sib_rez == sib_64_s08_ebp: - for rex_b in range(2): - for rex_x in range(2): - sib_rez[rex_x][rex_b][index][f_imm] = f_s08 - sib_rez[rex_x][rex_b][index][ebp + 8 * rex_b] = 1 - - else: - if sib_rez == sib_s08_ebp: - sib_rez[index][b] = 1 - sib_rez[index][f_imm] = f_s08 - elif sib_rez == sib_u32_ebp: - sib_rez[index][b] = 1 - sib_rez[index][f_imm] = f_u32 - elif sib_rez == sib_u32: - sib_rez[index][b] = 1 - elif sib_rez == sib_u64_ebp: - for rex_b in range(2): - for rex_x in range(2): - sib_rez[rex_x][rex_b][index][b + 8 * rex_b] = 1 - sib_rez[rex_x][rex_b][index][f_imm] = f_u32 - elif sib_rez == sib_u64: - for rex_b in range(2): - for rex_x in range(2): - sib_rez[rex_x][rex_b][index][b + 8 * rex_b] = 1 - elif sib_rez == sib_64_s08_ebp: - for rex_b in range(2): - for rex_x in range(2): - sib_rez[rex_x][rex_b][index][f_imm] = f_s08 - sib_rez[rex_x][rex_b][index][b + 8 * rex_b] = 1 - - if i == 0b100 and sib_rez in [sib_s08_ebp, sib_u32_ebp, sib_u32]: - continue - - if sib_rez in [sib_s08_ebp, sib_u32_ebp, sib_u32]: - tmp = i - if not tmp in sib_rez[index]: - sib_rez[index][tmp] = 0 # 1 << ss - sib_rez[index][tmp] += 1 << ss - else: - for rex_b in range(2): - for rex_x in range(2): - tmp = i + 8 * rex_x - if i == 0b100 and rex_x == 0: - continue - if not tmp in sib_rez[rex_x][rex_b][index]: - sib_rez[rex_x][rex_b][index][tmp] = 0 # 1 << ss - sib_rez[rex_x][rex_b][index][tmp] += 1 << ss - - # 32bit - db_afs_32 = [None for i in range(0x100)] - for i in range(0x100): - index = i - mod, re, rm = getmodrm(i) - - if mod == 0b00: - if rm == 0b100: - db_afs_32[index] = sib_u32 - elif rm == 0b101: - db_afs_32[index] = {f_isad: True, f_imm: f_u32} - else: - db_afs_32[index] = {f_isad: True, rm: 1} - elif mod == 0b01: - if rm == 0b100: - db_afs_32[index] = sib_s08_ebp - continue - tmp = {f_isad: True, rm: 1, f_imm: f_s08} - db_afs_32[index] = tmp - - elif mod == 0b10: - if rm == 0b100: - db_afs_32[index] = sib_u32_ebp - else: - db_afs_32[index] = {f_isad: True, rm: 1, f_imm: f_u32} - elif mod == 0b11: - db_afs_32[index] = {f_isad: False, rm: 1} - - # 64bit - db_afs_64 = [None for i in range(0x400)] - for i in range(0x400): - index = i - rex_x = (index >> 9) & 1 - rex_b = (index >> 8) & 1 - mod, re, rm = getmodrm(i & 0xff) - - if mod == 0b00: - if rm == 0b100: - db_afs_64[i] = sib_u64[rex_x][rex_b] - elif rm == 0b101: - db_afs_64[i] = {f_isad: True, f_imm: f_u32, 16: 1} - else: - db_afs_64[i] = {f_isad: True, rm + 8 * rex_b: 1} - elif mod == 0b01: - if rm == 0b100: - db_afs_64[i] = sib_64_s08_ebp[rex_x][rex_b] - continue - tmp = {f_isad: True, rm + 8 * rex_b: 1, f_imm: f_s08} - db_afs_64[i] = tmp - - elif mod == 0b10: - if rm == 0b100: - db_afs_64[i] = sib_u64_ebp[rex_x][rex_b] - else: - db_afs_64[i] = {f_isad: True, rm + 8 * rex_b: 1, f_imm: f_u32} - elif mod == 0b11: - db_afs_64[i] = {f_isad: False, rm + 8 * rex_b: 1} - - # 16bit - db_afs_16 = [None for i in range(0x100)] - _si = 6 - _di = 7 - _bx = 3 - _bp = 5 - for i in range(0x100): - index = i - mod, re, rm = getmodrm(i) - - if mod == 0b00: - if rm == 0b100: - db_afs_16[index] = {f_isad: True, _si: 1} - elif rm == 0b101: - db_afs_16[index] = {f_isad: True, _di: 1} - elif rm == 0b110: - db_afs_16[index] = { - f_isad: True, f_imm: f_u16} # {f_isad:True,_bp:1} - elif rm == 0b111: - db_afs_16[index] = {f_isad: True, _bx: 1} - else: - db_afs_16[index] = {f_isad: True, - [_si, _di][rm % 2]: 1, - [_bx, _bp][(rm >> 1) % 2]: 1} - elif mod in [0b01, 0b10]: - if mod == 0b01: - my_imm = f_s08 - else: - my_imm = f_u16 - - if rm == 0b100: - db_afs_16[index] = {f_isad: True, _si: 1, f_imm: my_imm} - elif rm == 0b101: - db_afs_16[index] = {f_isad: True, _di: 1, f_imm: my_imm} - elif rm == 0b110: - db_afs_16[index] = {f_isad: True, _bp: 1, f_imm: my_imm} - elif rm == 0b111: - db_afs_16[index] = {f_isad: True, _bx: 1, f_imm: my_imm} - else: - db_afs_16[index] = {f_isad: True, - [_si, _di][rm % 2]: 1, - [_bx, _bp][(rm >> 1) % 2]: 1, - f_imm: my_imm} - - elif mod == 0b11: - db_afs_16[index] = {f_isad: False, rm: 1} - - byte2modrm = {} - byte2modrm[16] = db_afs_16 - byte2modrm[32] = db_afs_32 - byte2modrm[64] = db_afs_64 - - modrm2byte = {16: defaultdict(list), - 32: defaultdict(list), - 64: defaultdict(list), - } - for size, db_afs in viewitems(byte2modrm): - for i, modrm in enumerate(db_afs): - if not isinstance(modrm, list): - # We only need sort for determinism - modrm = tuple(sorted(viewitems(modrm), key=str)) - modrm2byte[size][modrm].append(i) - continue - for j, modrm_f in enumerate(modrm): - # We only need sort for determinism - modrm_f = tuple(sorted(viewitems(modrm_f), key=str)) - modrm2byte[size][modrm_f].append((i, j)) - - return byte2modrm, modrm2byte - -byte2modrm, modrm2byte = gen_modrm_form() - - -# ret is modr; ret is displacement -def exprfindmod(e, o=None): - if o is None: - o = {} - if isinstance(e, ExprInt): - return e - if isinstance(e, ExprId): - i = size2gpregs[e.size].expr.index(e) - o[i] = 1 - return None - elif isinstance(e, ExprOp): - out = None - if e.op == '+': - for a in e.args: - r = exprfindmod(a, o) - if out and r1: - raise ValueError('multiple displacement!') - out = r - return out - elif e.op == "*": - mul = int(e.args[1]) - a = e.args[0] - i = size2gpregs[a.size].expr.index(a) - o[i] = mul - else: - raise ValueError('bad op') - return None - -def test_addr_size(ptr, size): - if isinstance(ptr, ExprInt): - return ptr.arg < (1 << size) - else: - return ptr.size == size - -SIZE2XMMREG = {64:gpregs_mm, - 128:gpregs_xmm} -SIZE2BNDREG = {64:gpregs_mm, - 128:gpregs_bnd} - -def parse_mem(expr, parent, w8, sx=0, xmm=0, mm=0, bnd=0): - dct_expr = {} - opmode = parent.v_opmode() - if expr.is_mem_segm() and expr.ptr.args[0].is_int(): - return None, None, False - - if expr.is_mem_segm(): - segm = expr.ptr.args[0] - ptr = expr.ptr.args[1] - else: - segm = None - ptr = expr.ptr - - dct_expr[f_isad] = True - ad_size = ptr.size - admode = parent.v_admode() - if not test_addr_size(ptr, admode): - return None, None, False - - if (w8 == 1 and expr.size != opmode and not sx and - not (hasattr(parent, 'sd') or hasattr(parent, 'wd'))): - return None, None, False - - if hasattr(parent, 'wd'): - if expr.size == 16: - parent.wd.value = 1 - elif expr.size == 32: - pass - else: - return None, None, False - - if (not isinstance(ptr, ExprInt) and - parent.mode == 64 and - ptr.size == 32 and - parent.admode != 1): - return None, None, False - dct_expr = {f_isad: True} - disp = exprfindmod(ptr, dct_expr) - out = [] - if disp is None: - # add 0 disp - disp = ExprInt(0, 32) - if disp is not None: - for signed, encoding, cast_size in [(True, f_s08, 8), - (True, f_s16, 16), - (True, f_s32, 32), - (False, f_u08, 8), - (False, f_u16, 16), - (False, f_u32, 32)]: - value = ExprInt(int(disp), cast_size) - if admode < value.size: - if signed: - if int(disp.arg) != sign_ext(int(value), admode, disp.size): - continue - else: - if int(disp.arg) != int(value): - continue - else: - if int(disp.arg) != sign_ext(int(value), value.size, admode): - continue - x1 = dict(dct_expr) - x1[f_imm] = (encoding, value) - out.append(x1) - else: - out = [dct_expr] - return out, segm, True - -def expr2modrm(expr, parent, w8, sx=0, xmm=0, mm=0, bnd=0): - dct_expr = {f_isad : False} - - if mm or xmm or bnd: - if mm and expr.size != 64: - return None, None, False - elif xmm and expr.size != 128: - return None, None, False - elif bnd and expr.size != 128: - return None, None, False - - if isinstance(expr, ExprId): - if bnd: - size2reg = SIZE2BNDREG - else: - size2reg = SIZE2XMMREG - selreg = size2reg[expr.size] - if not expr in selreg.expr: - return None, None, False - i = selreg.expr.index(expr) - dct_expr[i] = 1 - return [dct_expr], None, True - else: - return parse_mem(expr, parent, w8, sx, xmm, mm) - - elif expr.size == 64 and expr not in gpregs_mm.expr: - if hasattr(parent, 'sd'): - parent.sd.value = 1 - elif hasattr(parent, 'wd'): - pass - elif hasattr(parent, 'stk'): - pass - else: - parent.rex_w.value = 1 - opmode = parent.v_opmode() - if sx == 1: - opmode = 16 - if sx == 2: - opmode = 32 - if expr.size == 8 and w8 != 0: - return None, None, False - - if w8 == 0 and expr.size != 8: - return None, None, False - - if not isinstance(expr, ExprMem): - dct_expr[f_isad] = False - if xmm: - if expr in gpregs_xmm.expr: - i = gpregs_xmm.expr.index(expr) - dct_expr[i] = 1 - return [dct_expr], None, True - else: - return None, None, False - if bnd: - if expr in gpregs_bnd.expr: - i = gpregs_bnd.expr.index(expr) - dct_expr[i] = 1 - return [dct_expr], None, True - else: - return None, None, False - if mm: - if expr in gpregs_mm.expr: - i = gpregs_mm.expr.index(expr) - dct_expr[i] = 1 - return [dct_expr], None, True - else: - return None, None, False - if w8 == 0: - if parent.mode == 64 and expr in gpregs08_64.expr: - r = gpregs08_64 - parent.rex_p.value = 1 - else: - parent.rex_p.value = 0 - parent.rex_x.value = 0 - r = size2gpregs[8] - if not expr in r.expr: - return None, None, False - i = r.expr.index(expr) - dct_expr[i] = 1 - return [dct_expr], None, True - if opmode != expr.size: - return None, None, False - if not expr in size2gpregs[opmode].expr: - return None, None, False - i = size2gpregs[opmode].expr.index(expr) - if i > 7: - if parent.mode != 64: - return None, None, False - dct_expr[i] = 1 - return [dct_expr], None, True - return parse_mem(expr, parent, w8, sx, xmm, mm, bnd) - -def modrm2expr(modrm, parent, w8, sx=0, xmm=0, mm=0, bnd=0): - o = [] - if not modrm[f_isad]: - modrm_k = [key for key, value in viewitems(modrm) if value == 1] - if len(modrm_k) != 1: - raise ValueError('strange reg encoding %r' % modrm) - modrm_k = modrm_k[0] - if w8 == 0: - opmode = 8 - elif sx == 1: - opmode = 16 - elif sx == 2: - opmode = 32 - else: - opmode = parent.v_opmode() - if xmm: - expr = gpregs_xmm.expr[modrm_k] - elif mm: - expr = gpregs_mm.expr[modrm_k] - elif bnd: - expr = gpregs_bnd.expr[modrm_k] - elif opmode == 8 and (parent.v_opmode() == 64 or parent.rex_p.value == 1): - expr = gpregs08_64.expr[modrm_k] - else: - expr = size2gpregs[opmode].expr[modrm_k] - return expr - admode = parent.v_admode() - opmode = parent.v_opmode() - for modrm_k, scale in viewitems(modrm): - if isinstance(modrm_k, int): - expr = size2gpregs[admode].expr[modrm_k] - if scale != 1: - expr = ExprInt(scale, admode) * expr - o.append(expr) - if f_imm in modrm: - if parent.disp.value is None: - return None - o.append(ExprInt(int(parent.disp.expr), admode)) - expr = ExprOp('+', *o) - if w8 == 0: - opmode = 8 - elif sx == 1: - opmode = 16 - elif sx == 2: - opmode = 32 - if xmm: - opmode = 128 - elif mm: - opmode = 64 - elif bnd: - opmode = 128 - - expr = ExprMem(expr, size=opmode) - return expr - - -class x86_rm_arg(x86_arg): - parser = rmarg - - def fromstring(self, text, loc_db, parser_result=None): - start, stop = super(x86_rm_arg, self).fromstring(text, loc_db, parser_result) - p = self.parent - if start is None: - return None, None - return start, stop - - def get_modrm(self): - p = self.parent - admode = p.v_admode() - - if not admode in [16, 32, 64]: - raise ValueError('strange admode %r', admode) - v = setmodrm(p.mod.value, 0, p.rm.value) - v |= p.rex_b.value << 8 - v |= p.rex_x.value << 9 - if p.mode == 64: - # XXXx to check - admode = 64 - - xx = byte2modrm[admode][v] - if isinstance(xx, list): - if not p.sib_scale: - return False - v = setmodrm(p.sib_scale.value, - p.sib_index.value, - p.sib_base.value) - xx = xx[v] - return xx - - def decode(self, v): - p = self.parent - xx = self.get_modrm() - self.expr = modrm2expr(xx, p, 1) - return self.expr is not None - - def gen_cand(self, v_cand, admode): - if not admode in modrm2byte: - # XXX TODO: 64bit - return - if not v_cand: - return - - p = self.parent - o_rex_x = p.rex_x.value - o_rex_b = p.rex_b.value - # add candidate without 0 imm - new_v_cand = [] - moddd = False - for v in v_cand: - new_v_cand.append(v) - if f_imm in v and int(v[f_imm][1]) == 0: - v = dict(v) - del(v[f_imm]) - new_v_cand.append(v) - moddd = True - - v_cand = new_v_cand - - out_c = [] - for v in v_cand: - disp = None - # patch value in modrm - if f_imm in v: - size, disp = v[f_imm] - disp = int(disp) - - v[f_imm] = size - vo = v - # We only need sort for determinism - v = tuple(sorted(viewitems(v), key=str)) - admode = 64 if p.mode == 64 else admode - if not v in modrm2byte[admode]: - continue - xx = modrm2byte[admode][v] - - # default case - for x in xx: - if type(x) == tuple: - modrm, sib = x - else: - modrm = x - sib = None - - # 16 bit cannot have sib - if sib is not None and admode == 16: - continue - rex = modrm >> 8 # 0# XXX HACK REM temporary REX modrm>>8 - if rex and admode != 64: - continue - - p.rex_x.value = (rex >> 1) & 1 - p.rex_b.value = rex & 1 - - if o_rex_x is not None and p.rex_x.value != o_rex_x: - continue - if o_rex_b is not None and p.rex_b.value != o_rex_b: - continue - - mod, re, rm = getmodrm(modrm) - # check re on parent - if re != p.reg.value: - continue - - if sib is not None: - s_scale, s_index, s_base = getmodrm(sib) - else: - s_scale, s_index, s_base = None, None, None - - p.mod.value = mod - p.rm.value = rm - p.sib_scale.value = s_scale - p.sib_index.value = s_index - p.sib_base.value = s_base - p.disp.value = disp - if disp is not None: - p.disp.l = f_imm2size[vo[f_imm]] - - yield True - - return - - def encode(self): - if isinstance(self.expr, ExprInt): - return - p = self.parent - admode = p.v_admode() - mode = self.expr.size - v_cand, segm, ok = expr2modrm(self.expr, p, 1) - if segm: - p.g2.value = segm2enc[segm] - for x in self.gen_cand(v_cand, admode): - yield x - -class x86_rm_mem(x86_rm_arg): - def fromstring(self, text, loc_db, parser_result=None): - self.expr = None - start, stop = super(x86_rm_mem, self).fromstring(text, loc_db, parser_result) - if not isinstance(self.expr, ExprMem): - return None, None - return start, stop - - -class x86_rm_mem_far(x86_rm_arg): - parser = mem_far - def fromstring(self, text, loc_db, parser_result=None): - self.expr = None - start, stop = super(x86_rm_mem_far, self).fromstring(text, loc_db, parser_result) - if not isinstance(self.expr, ExprMem): - return None, None - self.expr = ExprOp('far', self.expr) - return start, stop - - def decode(self, v): - ret = super(x86_rm_mem_far, self).decode(v) - if not ret: - return ret - if isinstance(self.expr, m2_expr.ExprMem): - self.expr = ExprOp('far', self.expr) - return True - - def encode(self): - if not (isinstance(self.expr, m2_expr.ExprOp) and - self.expr.op == 'far'): - return - - expr = self.expr.args[0] - if isinstance(expr, ExprInt): - return - p = self.parent - admode = p.v_admode() - mode = expr.size - v_cand, segm, ok = expr2modrm(expr, p, 1) - if segm: - p.g2.value = segm2enc[segm] - for x in self.gen_cand(v_cand, admode): - yield x - -class x86_rm_w8(x86_rm_arg): - - def decode(self, v): - p = self.parent - xx = self.get_modrm() - self.expr = modrm2expr(xx, p, p.w8.value) - return self.expr is not None - - def encode(self): - if isinstance(self.expr, ExprInt): - return - p = self.parent - if p.w8.value is None: - if self.expr.size == 8: - p.w8.value = 0 - else: - p.w8.value = 1 - - v_cand, segm, ok = expr2modrm(self.expr, p, p.w8.value) - if segm: - p.g2.value = segm2enc[segm] - for x in self.gen_cand(v_cand, p.v_admode()): - yield x - - -class x86_rm_sx(x86_rm_arg): - - def decode(self, v): - p = self.parent - xx = self.get_modrm() - self.expr = modrm2expr(xx, p, p.w8.value, 1) - return self.expr is not None - - def encode(self): - if isinstance(self.expr, ExprInt): - return - p = self.parent - if p.w8.value is None: - if self.expr.size == 8: - p.w8.value = 0 - else: - p.w8.value = 1 - v_cand, segm, ok = expr2modrm(self.expr, p, p.w8.value, 1) - if segm: - p.g2.value = segm2enc[segm] - for x in self.gen_cand(v_cand, p.v_admode()): - yield x - - -class x86_rm_sxd(x86_rm_arg): - - def decode(self, v): - p = self.parent - xx = self.get_modrm() - self.expr = modrm2expr(xx, p, 1, 2) - return self.expr is not None - - def encode(self): - if isinstance(self.expr, ExprInt): - return - p = self.parent - v_cand, segm, ok = expr2modrm(self.expr, p, 1, 2) - if segm: - p.g2.value = segm2enc[segm] - for x in self.gen_cand(v_cand, p.v_admode()): - yield x - - -class x86_rm_sd(x86_rm_arg): - out_size = 64 - def get_s_value(self): - return self.parent.sd.value - def set_s_value(self, value): - self.parent.sd.value = value - - def decode(self, v): - p = self.parent - xx = self.get_modrm() - expr = modrm2expr(xx, p, 1) - if not isinstance(expr, ExprMem): - return False - if self.get_s_value() == 0: - expr = ExprMem(expr.ptr, 32) - else: - expr = ExprMem(expr.ptr, self.out_size) - self.expr = expr - return self.expr is not None - - def encode(self): - if isinstance(self.expr, ExprInt): - return - p = self.parent - if not self.expr.size in [32, 64]: - return - self.set_s_value(0) - v_cand, segm, ok = expr2modrm(self.expr, p, 1) - for x in self.gen_cand(v_cand, p.v_admode()): - yield x - - -class x86_rm_wd(x86_rm_sd): - out_size = 16 - def get_s_value(self): - return self.parent.wd.value - def set_s_value(self, value): - self.parent.wd.value = value - - def encode(self): - if isinstance(self.expr, ExprInt): - return - p = self.parent - p.wd.value = 0 - v_cand, segm, ok = expr2modrm(self.expr, p, 1) - for x in self.gen_cand(v_cand, p.v_admode()): - yield x - - -class x86_rm_08(x86_rm_arg): - msize = 8 - - def decode(self, v): - p = self.parent - xx = self.get_modrm() - expr = modrm2expr(xx, p, 0) - if not isinstance(expr, ExprMem): - self.expr = expr - return True - self.expr = ExprMem(expr.ptr, self.msize) - return self.expr is not None - - def encode(self): - if isinstance(self.expr, ExprInt): - return - p = self.parent - v_cand, segm, ok = expr2modrm(self.expr, p, 0, 0, 0, 0) - for x in self.gen_cand(v_cand, p.v_admode()): - yield x - -class x86_rm_reg_m08(x86_rm_arg): - msize = 8 - - def decode(self, v): - ret = x86_rm_arg.decode(self, v) - if not ret: - return ret - if not isinstance(self.expr, ExprMem): - return True - self.expr = ExprMem(self.expr.ptr, self.msize) - return self.expr is not None - - def encode(self): - if isinstance(self.expr, ExprInt): - return - p = self.parent - if isinstance(self.expr, ExprMem): - expr = ExprMem(self.expr.ptr, 32) - else: - expr = self.expr - v_cand, segm, ok = expr2modrm(expr, p, 1, 0, 0, 0) - for x in self.gen_cand(v_cand, p.v_admode()): - yield x - -class x86_rm_reg_m16(x86_rm_reg_m08): - msize = 16 - -class x86_rm_m64(x86_rm_arg): - msize = 64 - - def decode(self, v): - p = self.parent - xx = self.get_modrm() - expr = modrm2expr(xx, p, 1) - if not isinstance(expr, ExprMem): - return False - self.expr = ExprMem(expr.ptr, self.msize) - return self.expr is not None - - def encode(self): - if isinstance(self.expr, ExprInt): - return - p = self.parent - v_cand, segm, ok = expr2modrm(self.expr, p, 0, 0, 0, 1) - for x in self.gen_cand(v_cand, p.v_admode()): - yield x - - -class x86_rm_m80(x86_rm_m64): - msize = 80 - - def encode(self): - if isinstance(self.expr, ExprInt): - return - if not isinstance(self.expr, ExprMem) or self.expr.size != self.msize: - return - p = self.parent - mode = p.mode - if mode == 64: - mode = 32 - self.expr = ExprMem(self.expr.ptr, mode) - v_cand, segm, ok = expr2modrm(self.expr, p, 1) - for x in self.gen_cand(v_cand, p.v_admode()): - yield x - - -class x86_rm_m08(x86_rm_arg): - msize = 8 - - def decode(self, v): - p = self.parent - xx = self.get_modrm() - self.expr = modrm2expr(xx, p, 0) - return self.expr is not None - - def encode(self): - if self.expr.size != 8: - return - p = self.parent - mode = p.mode - v_cand, segm, ok = expr2modrm(self.expr, p, 0) - for x in self.gen_cand(v_cand, p.v_admode()): - yield x - - -class x86_rm_m16(x86_rm_m80): - msize = 16 - - -class x86_rm_mm(x86_rm_m80): - msize = 64 - is_mm = True - is_xmm = False - is_bnd = False - - def decode(self, v): - p = self.parent - xx = self.get_modrm() - expr = modrm2expr(xx, p, 0, 0, self.is_xmm, self.is_mm, self.is_bnd) - if isinstance(expr, ExprMem): - if self.msize is None: - return False - if expr.size != self.msize: - expr = ExprMem(expr.ptr, self.msize) - self.expr = expr - return True - - - def encode(self): - expr = self.expr - if isinstance(expr, ExprInt): - return - if isinstance(expr, ExprMem) and expr.size != self.msize: - return - p = self.parent - mode = p.mode - if mode == 64: - mode = 32 - if isinstance(expr, ExprMem): - if self.is_xmm: - expr = ExprMem(expr.ptr, 128) - elif self.is_mm: - expr = ExprMem(expr.ptr, 64) - - v_cand, segm, ok = expr2modrm(expr, p, 0, 0, self.is_xmm, self.is_mm, - self.is_bnd) - for x in self.gen_cand(v_cand, p.v_admode()): - yield x - - -class x86_rm_mm_m64(x86_rm_mm): - msize = 64 - is_mm = True - is_xmm = False - -class x86_rm_xmm(x86_rm_mm): - msize = 128 - is_mm = False - is_xmm = True - - -class x86_rm_xmm_m32(x86_rm_mm): - msize = 32 - is_mm = False - is_xmm = True - -class x86_rm_xmm_m64(x86_rm_mm): - msize = 64 - is_mm = False - is_xmm = True - -class x86_rm_xmm_m128(x86_rm_mm): - msize = 128 - is_mm = False - is_xmm = True - - -class x86_rm_xmm_reg(x86_rm_mm): - msize = None - is_mm = False - is_xmm = True - -class x86_rm_mm_reg(x86_rm_mm): - msize = None - is_mm = True - is_xmm = False - - -class x86_rm_bnd(x86_rm_mm): - msize = 128 - is_mm = False - is_xmm = False - is_bnd = True - - -class x86_rm_bnd_reg(x86_rm_mm): - msize = None - is_mm = False - is_xmm = False - is_bnd = True - - -class x86_rm_bnd_m64(x86_rm_mm): - msize = 64 - is_mm = False - is_xmm = False - is_bnd = True - - -class x86_rm_bnd_m128(x86_rm_mm): - msize = 128 - is_mm = False - is_xmm = False - is_bnd = True - - -class x86_rm_reg_noarg(object): - prio = default_prio + 1 - - parser = gpreg - - def fromstring(self, text, loc_db, parser_result=None): - if not hasattr(self.parent, 'sx') and hasattr(self.parent, "w8"): - self.parent.w8.value = 1 - if parser_result: - result, start, stop = parser_result[self.parser] - if result == [None]: - return None, None - self.expr = result - if self.expr.size == 8: - if hasattr(self.parent, 'sx') or not hasattr(self.parent, 'w8'): - return None, None - self.parent.w8.value = 0 - return start, stop - try: - result, start, stop = next(self.parser.scanString(text)) - except StopIteration: - return None, None - expr = self.asm_ast_to_expr(result[0], loc_db) - if expr is None: - return None, None - - self.expr = expr - if self.expr.size == 0: - if hasattr(self.parent, 'sx') or not hasattr(self.parent, 'w8'): - return None, None - self.parent.w8.value = 0 - - return start, stop - - def getrexsize(self): - return self.parent.rex_r.value - - def setrexsize(self, v): - self.parent.rex_r.value = v - - def decode(self, v): - v = v & self.lmask - p = self.parent - opmode = p.v_opmode() - if not hasattr(p, 'sx') and (hasattr(p, 'w8') and p.w8.value == 0): - opmode = 8 - r = size2gpregs[opmode] - if p.mode == 64 and self.getrexsize(): - v |= 0x8 - if p.v_opmode() == 64 or p.rex_p.value == 1: - if not hasattr(p, 'sx') and (hasattr(p, 'w8') and p.w8.value == 0): - r = gpregs08_64 - elif p.rex_r.value == 1: - v |= 8 - self.expr = r.expr[v] - return True - - def encode(self): - if not isinstance(self.expr, ExprId): - return False - if self.expr in gpregs64.expr and not hasattr(self.parent, 'stk'): - self.parent.rex_w.value = 1 - opmode = self.parent.v_opmode() - if not hasattr(self.parent, 'sx') and hasattr(self.parent, 'w8'): - self.parent.w8.value = 1 - if self.expr.size == 8: - if hasattr(self.parent, 'sx') or not hasattr(self.parent, 'w8'): - return False - self.parent.w8.value = 0 - opmode = 8 - r = size2gpregs[opmode] - if self.expr in r.expr: - i = r.expr.index(self.expr) - elif (opmode == 8 and self.parent.mode == 64 and - self.expr in gpregs08_64.expr): - i = gpregs08_64.expr.index(self.expr) - self.parent.rex_p.value = 1 - else: - log.debug("cannot encode reg %r", self.expr) - return False - if self.parent.v_opmode() == 64: - if i > 7: - self.setrexsize(1) - i -= 8 - elif self.parent.mode == 64 and i > 7: - i -= 8 - self.setrexsize(1) - self.value = i - if self.value > self.lmask: - log.debug("cannot encode field value %x %x", - self.value, self.lmask) - return False - return True - - -class x86_rm_reg_mm(x86_rm_reg_noarg, x86_arg): - selreg = gpregs_mm - def decode(self, v): - if self.parent.mode == 64 and self.getrexsize(): - v |= 0x8 - self.expr = self.selreg.expr[v] - return True - - def encode(self): - if not isinstance(self.expr, ExprId): - return False - if self.expr not in self.selreg.expr: - return False - i = self.selreg.expr.index(self.expr) - if self.parent.mode == 64 and i > 7: - i -= 8 - self.setrexsize(1) - self.value = i - if self.value > self.lmask: - log.debug("cannot encode field value %x %x", - self.value, self.lmask) - return False - return True - -class x86_rm_reg_xmm(x86_rm_reg_mm): - selreg = gpregs_xmm - -class x86_rm_reg_bnd(x86_rm_reg_mm): - selreg = gpregs_bnd - -class x86_rm_reg(x86_rm_reg_noarg, x86_arg): - pass - - -class x86_reg(x86_rm_reg): - - def getrexsize(self): - return self.parent.rex_b.value - - def setrexsize(self, v): - self.parent.rex_b.value = v - - -class x86_reg_modrm(x86_rm_reg): - - def getrexsize(self): - return self.parent.rex_r.value - - def setrexsize(self, v): - self.parent.rex_r.value = v - - - -class x86_reg_noarg(x86_rm_reg_noarg): - - def getrexsize(self): - return self.parent.rex_b.value - - def setrexsize(self, v): - self.parent.rex_b.value = v - - -class x86_rm_segm(reg_noarg, x86_arg): - prio = default_prio + 1 - reg_info = segmreg - parser = reg_info.parser - - -class x86_rm_cr(reg_noarg, x86_arg): - prio = default_prio + 1 - reg_info = crregs - parser = reg_info.parser - - -class x86_rm_dr(reg_noarg, x86_arg): - prio = default_prio + 1 - reg_info = drregs - parser = reg_info.parser - - -class x86_rm_flt(reg_noarg, x86_arg): - prio = default_prio + 1 - reg_info = fltregs - parser = reg_info.parser - - -class bs_fbit(bsi): - - def decode(self, v): - # value already decoded in pre_dis_info - return True - - -class bs_cl1(bsi, x86_arg): - parser = cl_or_imm - - def decode(self, v): - if v == 1: - self.expr = regs08_expr[1] - else: - self.expr = ExprInt(1, 8) - return True - - def encode(self): - if self.expr == regs08_expr[1]: - self.value = 1 - elif isinstance(self.expr, ExprInt) and int(self.expr) == 1: - self.value = 0 - else: - return False - return True - - -def sib_cond(cls, mode, v): - if admode_prefix((mode, v["opmode"], v["admode"])) == 16: - return None - if v['mod'] == 0b11: - return None - elif v['rm'] == 0b100: - return cls.ll - else: - return None - return v['rm'] == 0b100 - - -class bs_cond_scale(bs_cond): - # cond must return field len - ll = 2 - - @classmethod - def flen(cls, mode, v): - return sib_cond(cls, mode, v) - - def encode(self): - if self.value is None: - self.value = 0 - self.l = 0 - return True - return super(bs_cond_scale, self).encode() - - def decode(self, v): - self.value = v - return True - - -class bs_cond_index(bs_cond_scale): - ll = 3 - - @classmethod - def flen(cls, mode, v): - return sib_cond(cls, mode, v) - - -class bs_cond_disp(bs_cond): - # cond must return field len - - @classmethod - def flen(cls, mode, v): - if admode_prefix((mode, v['opmode'], v['admode'])) == 16: - if v['mod'] == 0b00: - if v['rm'] == 0b110: - return 16 - else: - return None - elif v['mod'] == 0b01: - return 8 - elif v['mod'] == 0b10: - return 16 - return None - # 32, 64 - if 'sib_base' in v and v['sib_base'] == 0b101: - if v['mod'] == 0b00: - return 32 - elif v['mod'] == 0b01: - return 8 - elif v['mod'] == 0b10: - return 32 - else: - return None - - if v['mod'] == 0b00: - if v['rm'] == 0b101: - return 32 - else: - return None - elif v['mod'] == 0b01: - return 8 - elif v['mod'] == 0b10: - return 32 - else: - return None - - def encode(self): - if self.value is None: - self.value = 0 - self.l = 0 - return True - self.value = swap_uint(self.l, self.value) - return True - - def decode(self, v): - admode = self.parent.v_admode() - v = swap_uint(self.l, v) - self.value = v - v = sign_ext(v, self.l, admode) - v = ExprInt(v, admode) - self.expr = v - return True - - -class bs_cond_imm(bs_cond_scale, x86_arg): - parser = base_expr - max_size = 32 - - def fromstring(self, text, loc_db, parser_result=None): - if parser_result: - expr, start, stop = parser_result[self.parser] - else: - try: - expr, start, stop = next(self.parser.scanString(text)) - except StopIteration: - expr = None - self.expr = expr - - if len(self.parent.args) > 1: - l = self.parent.args[0].expr.size - else: - l = self.parent.v_opmode() - if isinstance(self.expr, ExprInt): - v = int(self.expr) - mask = ((1 << l) - 1) - self.expr = ExprInt(v & mask, l) - - if self.expr is None: - log.debug('cannot fromstring int %r', text) - return None, None - return start, stop - - @classmethod - def flen(cls, mode, v): - if 'w8' not in v or v['w8'] == 1: - if 'se' in v and v['se'] == 1: - return 8 - else: - osize = v_opmode_info(mode, v['opmode'], v['rex_w'], 0) - osize = min(osize, cls.max_size) - return osize - return 8 - - def getmaxlen(self): - return 32 - - def encode(self): - if not isinstance(self.expr, ExprInt): - return - arg0_expr = self.parent.args[0].expr - self.parent.rex_w.value = 0 - # special case for push - if len(self.parent.args) == 1: - v = int(self.expr) - l = self.parent.v_opmode() - l = min(l, self.max_size) - - self.l = l - mask = ((1 << self.l) - 1) - if v != sign_ext(v & mask, self.l, l): - return - self.value = swap_uint(self.l, v & ((1 << self.l) - 1)) - yield True - return - - # assume 2 args; use first arg to guess op size - if arg0_expr.size == 64: - self.parent.rex_w.value = 1 - - l = self.parent.v_opmode() - v = int(self.expr) - if arg0_expr.size == 8: - if not hasattr(self.parent, 'w8'): - return - self.parent.w8.value = 0 - l = 8 - if hasattr(self.parent, 'se'): - self.parent.se.value = 0 - elif hasattr(self.parent, 'se'): - if hasattr(self.parent, 'w8'): - self.parent.w8.value = 1 - # try to generate signed extended version - if v == sign_ext(v & 0xFF, 8, arg0_expr.size): - self.parent.se.value = 1 - self.l = 8 - self.value = v & 0xFF - yield True - self.parent.se.value = 0 - else: - if hasattr(self.parent, 'w8'): - self.parent.w8.value = 1 - if l == 64: - self.l = self.getmaxlen() - else: - self.l = l - - mask = ((1 << self.l) - 1) - if v != sign_ext(v & mask, self.l, l): - return - self.value = swap_uint(self.l, v & ((1 << self.l) - 1)) - yield True - - def decode(self, v): - opmode = self.parent.v_opmode() - v = swap_uint(self.l, v) - self.value = v - l_out = opmode - if hasattr(self.parent, 'w8') and self.parent.w8.value == 0: - l_out = 8 - v = sign_ext(v, self.l, l_out) - self.expr = ExprInt(v, l_out) - return True - - -class bs_cond_imm64(bs_cond_imm): - max_size = 64 - - def getmaxlen(self): - return 64 - - @classmethod - def flen(cls, mode, v): - if 'w8' not in v or v['w8'] == 1: - if 'se' in v and v['se'] == 1: - return 8 - else: - osize = v_opmode_info(mode, v['opmode'], v['rex_w'], 0) - return osize - else: - return 8 - - -class bs_rel_off(bs_cond_imm): - parser = base_expr - - def fromstring(self, text, loc_db, parser_result=None): - if parser_result: - expr, start, stop = parser_result[self.parser] - else: - try: - expr, start, stop = next(self.parser.scanString(text)) - except StopIteration: - expr = None - self.expr = expr - l = self.parent.mode - if isinstance(self.expr, ExprInt): - v = int(self.expr) - mask = ((1 << l) - 1) - self.expr = ExprInt(v & mask, l) - return start, stop - - @classmethod - def flen(cls, mode, v): - osize = v_opmode_info(mode, v['opmode'], v['rex_w'], 0) - if osize == 16: - return 16 - else: - return 32 - - def encode(self): - if not isinstance(self.expr, ExprInt): - return - arg0_expr = self.parent.args[0].expr - if self.l == 0: - l = self.parent.v_opmode() - self.l = l - l = offsize(self.parent) - prefix = self.parent.gen_prefix() - parent_len = len(prefix) * 8 + self.parent.l + self.l - assert(parent_len % 8 == 0) - - v = int(self.expr.arg) - parent_len // 8 - if prefix is None: - return - mask = ((1 << self.l) - 1) - if self.l > l: - return - if v != sign_ext(v & mask, self.l, l): - return - self.value = swap_uint(self.l, v & ((1 << self.l) - 1)) - yield True - - def decode(self, v): - v = swap_uint(self.l, v) - size = offsize(self.parent) - v = sign_ext(v, self.l, size) - v += self.parent.l - self.expr = ExprInt(v, size) - return True - -class bs_s08(bs_rel_off): - parser = base_expr - - @classmethod - def flen(cls, mode, v): - return 8 - - def encode(self): - if not isinstance(self.expr, ExprInt): - return - arg0_expr = self.parent.args[0].expr - if self.l != 0: - l = self.l - else: - l = self.parent.v_opmode() - self.l = l - l = offsize(self.parent) - v = int(self.expr) - mask = ((1 << self.l) - 1) - if self.l > l: - return - if v != sign_ext(v & mask, self.l, l): - return - self.value = swap_uint(self.l, v & ((1 << self.l) - 1)) - yield True - - def decode(self, v): - v = swap_uint(self.l, v) - size = offsize(self.parent) - v = sign_ext(v, self.l, size) - self.expr = ExprInt(v, size) - return True - - -class bs_rel_off08(bs_rel_off): - - @classmethod - def flen(cls, mode, v): - return 8 - - -class bs_moff(bsi): - - @classmethod - def flen(cls, mode, v): - osize = v_opmode_info(mode, v['opmode'], v['rex_w'], 0) - if osize == 16: - return 16 - else: - return 32 - - def encode(self): - if not hasattr(self.parent, "mseg"): - return - m = self.parent.mseg.expr - if not (isinstance(m, ExprOp) and m.op == 'segm'): - return - if not isinstance(m.args[1], ExprInt): - return - l = self.parent.v_opmode() - if l == 16: - self.l = 16 - else: - self.l = 32 - v = int(m.args[1]) - mask = ((1 << self.l) - 1) - if v != sign_ext(v & mask, self.l, l): - return - self.value = swap_uint(self.l, v & ((1 << self.l) - 1)) - yield True - - def decode(self, v): - opmode = self.parent.v_opmode() - if opmode == 64: - return False - v = swap_uint(self.l, v) - self.value = v - v = sign_ext(v, self.l, opmode) - self.expr = ExprInt(v, opmode) - return True - - -class bs_movoff(x86_arg): - parser = deref_mem - - def fromstring(self, text, loc_db, parser_result=None): - if parser_result: - e, start, stop = parser_result[self.parser] - if e is None: - return None, None - if not isinstance(e, ExprMem): - return None, None - self.expr = e - if self.expr is None: - return None, None - return start, stop - try: - v, start, stop = next(self.parser.scanString(text)) - except StopIteration: - return None, None - if not isinstance(e, ExprMem): - return None, None - self.expr = v[0] - if self.expr is None: - log.debug('cannot fromstring int %r', text) - return None, None - return start, stop - - @classmethod - def flen(cls, mode, v): - if mode == 64: - if v['admode']: - return 32 - else: - return 64 - asize = v_admode_info(mode, v['admode']) - return asize - - def encode(self): - p = self.parent - if not isinstance(self.expr, ExprMem) or not isinstance(self.expr.ptr, ExprInt): - return - self.l = p.v_admode() - v = int(self.expr.ptr) - mask = ((1 << self.l) - 1) - if v != mask & v: - return - self.value = swap_uint(self.l, v & ((1 << self.l) - 1)) - yield True - - def decode(self, v): - if self.parent.mode == 64: - if self.parent.admode == 1: - l = 32 - else: - l = 64 - else: - l = self.parent.v_admode() - v = swap_uint(self.l, v) - self.value = v - v = sign_ext(v, self.l, l) - v = ExprInt(v, l) - size = self.parent.v_opmode() - if self.parent.w8.value == 0: - size = 8 - self.expr = ExprMem(v, size) - return True - - -class bs_msegoff(x86_arg): - parser = deref_ptr - - def fromstring(self, text, loc_db, parser_result=None): - if parser_result: - e, start, stop = parser_result[self.parser] - if e is None: - return None, None - self.expr = e - if self.expr is None: - return None, None - return start, stop - try: - v, start, stop = next(self.parser.scanString(text)) - except StopIteration: - return None, None - self.expr = v[0] - if self.expr is None: - log.debug('cannot fromstring int %r', text) - return None, None - return start, stop - - def encode(self): - if not (isinstance(self.expr, ExprOp) and self.expr.op == 'segm'): - return - if not isinstance(self.expr.args[0], ExprInt): - return - if not isinstance(self.expr.args[1], ExprInt): - return - l = self.parent.v_opmode() - v = int(self.expr.args[0]) - mask = ((1 << self.l) - 1) - if v != sign_ext(v & mask, self.l, l): - return - self.value = swap_uint(self.l, v & ((1 << self.l) - 1)) - yield True - - def decode(self, v): - opmode = self.parent.v_opmode() - v = swap_uint(self.l, v) - self.value = v - v = ExprInt(v, 16) - self.expr = ExprOp('segm', v, self.parent.off.expr) - return True - - -d_rex_p = bs(l=0, cls=(bs_fbit,), fname="rex_p") -d_rex_w = bs(l=0, cls=(bs_fbit,), fname="rex_w") -d_rex_r = bs(l=0, cls=(bs_fbit,), fname="rex_r") -d_rex_x = bs(l=0, cls=(bs_fbit,), fname="rex_x") -d_rex_b = bs(l=0, cls=(bs_fbit,), fname="rex_b") - -d_g1 = bs(l=0, cls=(bs_fbit,), fname="g1") -d_g2 = bs(l=0, cls=(bs_fbit,), fname="g2") - - -d_cl1 = bs(l=1, cls=(bs_cl1,), fname="cl1") - - -w8 = bs(l=1, fname="w8") -se = bs(l=1, fname="se") - -sx = bs(l=0, fname="sx") -sxd = bs(l=0, fname="sx") - - -xmmreg = bs(l=0, fname="xmmreg") -mmreg = bs(l=0, fname="mmreg") - -pref_f2 = bs(l=0, fname="prefixed", default=b"\xf2") -pref_f3 = bs(l=0, fname="prefixed", default=b"\xf3") -pref_66 = bs(l=0, fname="prefixed", default=b"\x66") -no_xmm_pref = bs(l=0, fname="no_xmm_pref") - -no_rex = bs(l=0, fname="no_rex") - -sib_scale = bs(l=2, cls=(bs_cond_scale,), fname = "sib_scale") -sib_index = bs(l=3, cls=(bs_cond_index,), fname = "sib_index") -sib_base = bs(l=3, cls=(bs_cond_index,), fname = "sib_base") - -disp = bs(l=0, cls=(bs_cond_disp,), fname = "disp") - -s08 = bs(l=8, cls=(bs_s08, )) - -u08 = bs(l=8, cls=(x86_08, x86_arg)) -u07 = bs(l=7, cls=(x86_08, x86_arg)) -u16 = bs(l=16, cls=(x86_16, x86_arg)) -u32 = bs(l=32, cls=(x86_32, x86_arg)) -s3264 = bs(l=32, cls=(x86_s32to64, x86_arg)) - -u08_3 = bs(l=0, cls=(x86_imm_fix_08, x86_arg), ival = 3) - -d0 = bs("000", fname='reg') -d1 = bs("001", fname='reg') -d2 = bs("010", fname='reg') -d3 = bs("011", fname='reg') -d4 = bs("100", fname='reg') -d5 = bs("101", fname='reg') -d6 = bs("110", fname='reg') -d7 = bs("111", fname='reg') - -sd = bs(l=1, fname="sd") -wd = bs(l=1, fname="wd") - -stk = bs(l=0, fname="stk") - - -class field_size(object): - prio = default_prio - - def __init__(self, d=None): - if d is None: - d = {} - self.d = d - - def get(self, opm, adm=None): - return self.d[opm] - -class bs_mem(object): - def encode(self): - return self.value != 0b11 - - def decode(self, v): - self.value = v - return v != 0b11 - -d_imm64 = bs(l=0, fname="imm64") - -d_eax = bs(l=0, cls=(bs_eax, ), fname='eax') -d_edx = bs(l=0, cls=(bs_edx, ), fname='edx') -d_st = bs(l=0, cls=(x86_reg_st, ), fname='st') -d_imm = bs(l=0, cls=(bs_cond_imm,), fname="imm") -d_imm64 = bs(l=0, cls=(bs_cond_imm64,), fname="imm") -d_ax = bs(l=0, cls=(r_ax, ), fname='ax') -d_dx = bs(l=0, cls=(r_dx, ), fname='dx') -d_cl = bs(l=0, cls=(r_cl, ), fname='cl') - -d_cs = bs(l=0, cls=(bs_cs, ), fname='cs') -d_ds = bs(l=0, cls=(bs_ds, ), fname='ds') -d_es = bs(l=0, cls=(bs_es, ), fname='es') -d_ss = bs(l=0, cls=(bs_ss, ), fname='ss') -d_fs = bs(l=0, cls=(bs_fs, ), fname='fs') -d_gs = bs(l=0, cls=(bs_gs, ), fname='gs') - -# Offset must be decoded in last position to have final instruction len -rel_off = bs(l=0, cls=(bs_rel_off,), fname="off", order=-1) -# Offset must be decoded in last position to have final instruction len -rel_off08 = bs(l=8, cls=(bs_rel_off08,), fname="off", order=-1) -moff = bs(l=0, cls=(bs_moff,), fname="off") -msegoff = bs(l=16, cls=(bs_msegoff,), fname="mseg") -movoff = bs(l=0, cls=(bs_movoff,), fname="off") -mod = bs(l=2, fname="mod") -mod_mem = bs(l=2, cls=(bs_mem,), fname="mod") - -rmreg = bs(l=3, cls=(x86_rm_reg, ), order =1, fname = "reg") -reg = bs(l=3, cls=(x86_reg, ), order =1, fname = "reg") - -reg_modrm = bs(l=3, cls=(x86_reg_modrm, ), order =1, fname = "reg") - - -regnoarg = bs(l=3, default_val="000", order=1, fname="reg") -segm = bs(l=3, cls=(x86_rm_segm, ), order =1, fname = "reg") -crreg = bs(l=3, cls=(x86_rm_cr, ), order =1, fname = "reg") -drreg = bs(l=3, cls=(x86_rm_dr, ), order =1, fname = "reg") - - -mm_reg = bs(l=3, cls=(x86_rm_reg_mm, ), order =1, fname = "reg") -xmm_reg = bs(l=3, cls=(x86_rm_reg_xmm, ), order =1, fname = "reg") -bnd_reg = bs(l=3, cls=(x86_rm_reg_bnd, ), order =1, fname = "reg") - - -fltreg = bs(l=3, cls=(x86_rm_flt, ), order =1, fname = "reg") - -rm = bs(l=3, fname="rm") - -rm_arg = bs(l=0, cls=(x86_rm_arg,), fname='rmarg') -rm_arg_w8 = bs(l=0, cls=(x86_rm_w8,), fname='rmarg') -rm_arg_sx = bs(l=0, cls=(x86_rm_sx,), fname='rmarg') -rm_arg_sxd = bs(l=0, cls=(x86_rm_sxd,), fname='rmarg') -rm_arg_sd = bs(l=0, cls=(x86_rm_sd,), fname='rmarg') -rm_arg_wd = bs(l=0, cls=(x86_rm_wd,), fname='rmarg') -rm_arg_08 = bs(l=0, cls=(x86_rm_08,), fname='rmarg') -rm_arg_reg_m08 = bs(l=0, cls=(x86_rm_reg_m08,), fname='rmarg') -rm_arg_reg_m16 = bs(l=0, cls=(x86_rm_reg_m16,), fname='rmarg') -rm_arg_m08 = bs(l=0, cls=(x86_rm_m08,), fname='rmarg') -rm_arg_m64 = bs(l=0, cls=(x86_rm_m64,), fname='rmarg') -rm_arg_m80 = bs(l=0, cls=(x86_rm_m80,), fname='rmarg') -rm_arg_m16 = bs(l=0, cls=(x86_rm_m16,), fname='rmarg') - -rm_mem = bs(l=0, cls=(x86_rm_mem,), fname='rmarg') -rm_mem_far = bs(l=0, cls=(x86_rm_mem_far,), fname='rmarg') - -rm_arg_mm = bs(l=0, cls=(x86_rm_mm,), fname='rmarg') -rm_arg_mm_m64 = bs(l=0, cls=(x86_rm_mm_m64,), fname='rmarg') -rm_arg_mm_reg = bs(l=0, cls=(x86_rm_mm_reg,), fname='rmarg') - -rm_arg_xmm = bs(l=0, cls=(x86_rm_xmm,), fname='rmarg') -rm_arg_xmm_m32 = bs(l=0, cls=(x86_rm_xmm_m32,), fname='rmarg') -rm_arg_xmm_m64 = bs(l=0, cls=(x86_rm_xmm_m64,), fname='rmarg') -rm_arg_xmm_m128 = bs(l=0, cls=(x86_rm_xmm_m128,), fname='rmarg') -rm_arg_xmm_reg = bs(l=0, cls=(x86_rm_xmm_reg,), fname='rmarg') - -rm_arg_bnd = bs(l=0, cls=(x86_rm_bnd,), fname='rmarg') -rm_arg_bnd_m64 = bs(l=0, cls=(x86_rm_bnd_m64,), fname='rmarg') -rm_arg_bnd_m128 = bs(l=0, cls=(x86_rm_bnd_m128,), fname='rmarg') -rm_arg_bnd_reg = bs(l=0, cls=(x86_rm_bnd_reg,), fname='rmarg') - - -swapargs = bs_swapargs(l=1, fname="swap", mn_mod=list(range(1 << 1))) - - -class bs_op_mode(bsi): - - def decode(self, v): - opmode = self.parent.v_opmode() - return opmode == self.mode - - -class bs_ad_mode(bsi): - - def decode(self, v): - admode = self.parent.v_admode() - return admode == self.mode - - -class bs_op_mode_no64(bsi): - - def encode(self): - if self.parent.mode == 64: - return False - return super(bs_op_mode_no64, self).encode() - - def decode(self, v): - if self.parent.mode == 64: - return False - opmode = self.parent.v_opmode() - return opmode == self.mode - - -class bs_op_mode64(bsi): - def encode(self): - if self.parent.mode != 64: - return False - return super(bs_op_mode64, self).encode() - - def decode(self, v): - if self.parent.mode != 64: - return False - return True - -class bs_op_modeno64(bsi): - def encode(self): - if self.parent.mode == 64: - return False - return super(bs_op_modeno64, self).encode() - - def decode(self, v): - if self.parent.mode == 64: - return False - return True - - - -bs_opmode16 = bs(l=0, cls=(bs_op_mode,), mode = 16, fname="fopmode") -bs_opmode32 = bs(l=0, cls=(bs_op_mode,), mode = 32, fname="fopmode") -bs_opmode64 = bs(l=0, cls=(bs_op_mode,), mode = 64, fname="fopmode") - - -bs_admode16 = bs(l=0, cls=(bs_ad_mode,), mode = 16, fname="fadmode") -bs_admode32 = bs(l=0, cls=(bs_ad_mode,), mode = 32, fname="fadmode") -bs_admode64 = bs(l=0, cls=(bs_ad_mode,), mode = 64, fname="fadmode") - -bs_opmode16_no64 = bs(l=0, cls=(bs_op_mode_no64,), mode = 16, fname="fopmode") -bs_opmode32_no64 = bs(l=0, cls=(bs_op_mode_no64,), mode = 32, fname="fopmode") - -bs_mode64 = bs(l=0, cls=(bs_op_mode64,)) -bs_modeno64 = bs(l=0, cls=(bs_op_modeno64,)) - - -cond_list = ["O", "NO", "B", "AE", - "Z", "NZ", "BE", "A", - "S", "NS", "PE", "NP", - #"L", "NL", "NG", "G"] - "L", "GE", "LE", "G"] -cond = bs_mod_name(l=4, fname='cond', mn_mod=cond_list) - - -def rmmod(r, rm_arg_x=rm_arg, modrm=mod): - return [modrm, r, rm, sib_scale, sib_index, sib_base, disp, rm_arg_x] - -# -# mode | reg | rm # -# - -# -# scale | index | base # -# - -# -# Prefix | REX prefix | Opcode | mod/rm | sib | displacement | immediate # -# - - -def addop(name, fields, args=None, alias=False): - dct = {"fields": fields} - dct["alias"] = alias - if args is not None: - dct['args'] = args - type(name, (mn_x86,), dct) -""" -class ia32_aaa(mn_x86): - fields = [bs8(0x37)] -""" -addop("aaa", [bs8(0x37)]) -addop("aas", [bs8(0x3F)]) -addop("aad", [bs8(0xd5), u08]) -addop("aam", [bs8(0xd4), u08]) - -addop("adc", [bs("0001010"), w8, d_eax, d_imm]) -addop("adc", [bs("100000"), se, w8] + rmmod(d2, rm_arg_w8) + [d_imm]) -addop("adc", [bs("000100"), swapargs, w8] + - rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) - -addop("add", [bs("0000010"), w8, d_eax, d_imm]) -addop("add", [bs("100000"), se, w8] + rmmod(d0, rm_arg_w8) + [d_imm]) -addop("add", [bs("000000"), swapargs, w8] + - rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) - -addop("and", [bs("0010010"), w8, d_eax, d_imm]) -addop("and", [bs("100000"), se, w8] + rmmod(d4, rm_arg_w8) + [d_imm]) -addop("and", [bs("001000"), swapargs, w8] + - rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) - -addop("bndmov", [bs8(0x0f), bs8(0x1a), pref_66, bs_modeno64] + - rmmod(bnd_reg, rm_arg_bnd_m64), [bnd_reg, rm_arg_bnd_m64]) -addop("bndmov", [bs8(0x0f), bs8(0x1a), pref_66, bs_mode64] + - rmmod(bnd_reg, rm_arg_bnd_m128), [bnd_reg, rm_arg_bnd_m128]) -addop("bndmov", [bs8(0x0f), bs8(0x1b), pref_66, bs_modeno64] + - rmmod(bnd_reg, rm_arg_bnd_m64), [rm_arg_bnd_m64, bnd_reg]) -addop("bndmov", [bs8(0x0f), bs8(0x1b), pref_66, bs_mode64] + - rmmod(bnd_reg, rm_arg_bnd_m128), [rm_arg_bnd_m128, bnd_reg]) - - - -addop("bsf", [bs8(0x0f), bs8(0xbc)] + rmmod(rmreg)) -addop("bsr", [bs8(0x0f), bs8(0xbd), mod, - rmreg, rm, sib_scale, sib_index, sib_base, disp, rm_arg]) - -addop("bswap", [bs8(0x0f), bs('11001'), reg]) - -addop("bt", [bs8(0x0f), bs8(0xa3)] + rmmod(rmreg), [rm_arg, rmreg]) -addop("bt", [bs8(0x0f), bs8(0xba)] + rmmod(d4) + [u08]) -addop("btc", [bs8(0x0f), bs8(0xbb)] + rmmod(rmreg), [rm_arg, rmreg]) -addop("btc", [bs8(0x0f), bs8(0xba)] + rmmod(d7) + [u08]) - - -addop("btr", [bs8(0x0f), bs8(0xb3)] + rmmod(rmreg), [rm_arg, rmreg]) -addop("btr", [bs8(0x0f), bs8(0xba)] + rmmod(d6) + [u08]) -addop("bts", [bs8(0x0f), bs8(0xab)] + rmmod(rmreg), [rm_arg, rmreg]) -addop("bts", [bs8(0x0f), bs8(0xba)] + rmmod(d5) + [u08]) - -addop("call", [bs8(0xe8), rel_off]) -addop("call", [bs8(0xff), stk] + rmmod(d2)) -addop("call", [bs8(0xff), stk] + rmmod(d3, rm_arg_x=rm_mem_far, modrm=mod_mem)) -addop("call", [bs8(0x9a), bs_modeno64, moff, msegoff]) - - -addop("cbw", [bs8(0x98), bs_opmode16]) -addop("cwde", [bs8(0x98), bs_opmode32]) -addop("cdqe", [bs8(0x98), bs_opmode64]) - -addop("clc", [bs8(0xf8)]) -addop("cld", [bs8(0xfc)]) -addop("cli", [bs8(0xfa)]) -addop("clts", [bs8(0x0f), bs8(0x06)]) -addop("cmc", [bs8(0xf5)]) - -addop("cmov", [bs8(0x0f), bs('0100'), cond] + rmmod(rmreg)) - -addop("cmp", [bs("0011110"), w8, d_eax, d_imm]) -addop("cmp", [bs("100000"), se, w8] + rmmod(d7, rm_arg_w8) + [d_imm]) -addop("cmp", [bs("001110"), swapargs, w8] + - rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) - - -addop("cmpsb", [bs8(0xa6)]) -addop("cmpsw", [bs8(0xa7), bs_opmode16]) -addop("cmpsd", [bs8(0xa7), bs_opmode32]) -addop("cmpsq", [bs8(0xa7), bs_opmode64]) - -addop("cmpxchg", [bs8(0x0f), bs('1011000'), w8] - + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) -addop("cmpxchg8b", [bs8(0x0f), bs8(0xc7), bs_opmode16] + rmmod(d1, rm_arg_m64)) -addop("cmpxchg8b", [bs8(0x0f), bs8(0xc7), bs_opmode32] + rmmod(d1, rm_arg_m64)) -addop("cmpxchg16b", [bs8(0x0f), bs8(0xc7), bs_opmode64] + rmmod(d1, rm_arg_xmm_m128)) - -# XXX TODO CMPXCHG8/16 - -addop("comiss", [bs8(0x0f), bs8(0x2f), no_xmm_pref] + - rmmod(xmm_reg, rm_arg_xmm_m32), [xmm_reg, rm_arg_xmm_m32]) -addop("comisd", [bs8(0x0f), bs8(0x2f), pref_66] + - rmmod(xmm_reg, rm_arg_xmm_m64), [xmm_reg, rm_arg_xmm_m64]) - -addop("cpuid", [bs8(0x0f), bs8(0xa2)]) - -addop("cwd", [bs8(0x99), bs_opmode16]) -addop("cdq", [bs8(0x99), bs_opmode32]) -addop("cqo", [bs8(0x99), bs_opmode64]) - - -addop("daa", [bs8(0x27)]) -addop("das", [bs8(0x2f)]) -addop("dec", [bs('1111111'), w8] + rmmod(d1, rm_arg_w8)) -addop("dec", [bs('01001'), reg, bs_modeno64]) -addop("div", [bs('1111011'), w8] + rmmod(d6, rm_arg_w8)) -addop("enter", [bs8(0xc8), u16, u08]) - -# float ##### -addop("fwait", [bs8(0x9b)]) - -addop("f2xm1", [bs8(0xd9), bs8(0xf0)]) -addop("fabs", [bs8(0xd9), bs8(0xe1)]) - -addop("fadd", [bs("11011"), sd, bs("00")] + rmmod(d0, rm_arg_sd)) -addop("fadd", [bs("11011"), swapargs, bs("00"), - bs("11000"), d_st, fltreg], [d_st, fltreg]) -addop("faddp", [bs8(0xde), bs("11000"), fltreg, d_st]) -addop("fiadd", [bs("11011"), wd, bs("10")] + rmmod(d0, rm_arg_wd)) - -addop("fbld", [bs8(0xdf)] + rmmod(d4, rm_arg_m80)) -addop("fbldp", [bs8(0xdf)] + rmmod(d6, rm_arg_m80)) -addop("fchs", [bs8(0xd9), bs8(0xe0)]) -# addop("fclex", [bs8(0x9b), bs8(0xdb), bs8(0xe2)]) -addop("fnclex", [bs8(0xdb), bs8(0xe2)]) - -addop("fcmovb", [bs8(0xda), bs("11000"), d_st, fltreg]) -addop("fcmove", [bs8(0xda), bs("11001"), d_st, fltreg]) -addop("fcmovbe", [bs8(0xda), bs("11010"), d_st, fltreg]) -addop("fcmovu", [bs8(0xda), bs("11011"), d_st, fltreg]) -addop("fcmovnb", [bs8(0xdb), bs("11000"), d_st, fltreg]) -addop("fcmovne", [bs8(0xdb), bs("11001"), d_st, fltreg]) -addop("fcmovnbe", [bs8(0xdb), bs("11010"), d_st, fltreg]) -addop("fcmovnu", [bs8(0xdb), bs("11011"), d_st, fltreg]) - -addop("fcom", [bs("11011"), sd, bs("00")] + rmmod(d2, rm_arg_sd)) -addop("fcom", [bs("11011"), swapargs, bs("00"), - bs("11010"), d_st, fltreg], [d_st, fltreg]) -addop("fcomp", [bs("11011"), sd, bs("00")] + rmmod(d3, rm_arg_sd)) -addop("fcomp", - [bs("11011"), swapargs, bs("00"), bs("11011"), - d_st, fltreg], [d_st, fltreg]) -addop("fcompp", [bs8(0xde), bs8(0xd9)]) - -addop("fcomi", [bs8(0xdb), bs("11110"), d_st, fltreg]) -addop("fcomip", [bs8(0xdf), bs("11110"), d_st, fltreg]) -addop("fucomi", [bs8(0xdb), bs("11101"), d_st, fltreg]) -addop("fucomip", [bs8(0xdf), bs("11101"), d_st, fltreg]) - -addop("fcos", [bs8(0xd9), bs8(0xff)]) -addop("fdecstp", [bs8(0xd9), bs8(0xf6)]) - - -addop("fdiv", [bs("11011"), sd, bs("00")] + rmmod(d6, rm_arg_sd)) -addop("fdiv", [bs8(0xd8), bs("11110"), d_st, fltreg]) -addop("fdiv", [bs8(0xdc), bs("11111"), fltreg, d_st]) -addop("fdivp", [bs8(0xde), bs("11111"), fltreg, d_st]) -addop("fidiv", [bs("11011"), wd, bs("10")] + rmmod(d6, rm_arg_wd)) - -addop("fdivr", [bs("11011"), sd, bs("00")] + rmmod(d7, rm_arg_sd)) -addop("fdivr", [bs8(0xd8), bs("11111"), d_st, fltreg]) -addop("fdivr", [bs8(0xdc), bs("11110"), fltreg, d_st]) -addop("fdivrp", [bs8(0xde), bs("11110"), fltreg, d_st]) -addop("fidivr", [bs("11011"), wd, bs("10")] + rmmod(d7, rm_arg_wd)) - -addop("ffree", [bs8(0xdd), bs("11000"), fltreg]) -addop("ficom", [bs("11011"), wd, bs("10")] + rmmod(d2, rm_arg_wd)) -addop("ficomp", [bs("11011"), wd, bs("10")] + rmmod(d3, rm_arg_wd)) -addop("fild", [bs("11011"), wd, bs("11")] + rmmod(d0, rm_arg_wd)) -addop("fild", [bs8(0xdf)] + rmmod(d5, rm_arg_m64)) - -addop("fincstp", [bs8(0xd9), bs8(0xf7)]) - -# addop("finit", [bs8(0x9b), bs8(0xdb), bs8(0xe3)]) -addop("fninit", [bs8(0xdb), bs8(0xe3)]) - -addop("fist", [bs("11011"), wd, bs("11")] + rmmod(d2, rm_arg_wd)) -addop("fistp", [bs("11011"), wd, bs("11")] + rmmod(d3, rm_arg_wd)) -addop("fistp", [bs8(0xdf)] + rmmod(d7, rm_arg_m64)) - -addop("fisttp", [bs("11011"), wd, bs("11")] + rmmod(d1, rm_arg_wd)) -addop("fisttp", [bs8(0xdd)] + rmmod(d1, rm_arg_m64)) - -addop("fld", [bs("11011"), sd, bs("01")] + rmmod(d0, rm_arg_sd)) -addop("fld", [bs8(0xdb)] + rmmod(d5, rm_arg_m80)) -addop("fld", [bs8(0xd9), bs("11000"), fltreg]) - -addop("fld1", [bs8(0xd9), bs8(0xe8)]) -addop("fldl2t", [bs8(0xd9), bs8(0xe9)]) -addop("fldl2e", [bs8(0xd9), bs8(0xea)]) -addop("fldpi", [bs8(0xd9), bs8(0xeb)]) -addop("fldlg2", [bs8(0xd9), bs8(0xec)]) -addop("fldln2", [bs8(0xd9), bs8(0xed)]) -addop("fldz", [bs8(0xd9), bs8(0xee)]) - -addop("fldcw", [bs8(0xd9)] + rmmod(d5, rm_arg_m16)) -addop("fldenv", [bs8(0xd9)] + rmmod(d4, rm_arg_m80)) # XXX TODO: m14? - -addop("fmul", [bs("11011"), sd, bs("00")] + rmmod(d1, rm_arg_sd)) -addop("fmul", [bs("11011"), swapargs, bs("00"), - bs("11001"), d_st, fltreg], [d_st, fltreg]) -addop("fmulp", [bs8(0xde), bs("11001"), fltreg, d_st]) -addop("fimul", [bs("11011"), wd, bs("10")] + rmmod(d1, rm_arg_wd)) - -addop("fnop", [bs8(0xd9), bs8(0xd0)]) -addop("fpatan", [bs8(0xd9), bs8(0xf3)]) -addop("fprem", [bs8(0xd9), bs8(0xf8)]) -addop("fprem1", [bs8(0xd9), bs8(0xf5)]) -addop("fptan", [bs8(0xd9), bs8(0xf2)]) -addop("frndint", [bs8(0xd9), bs8(0xfc)]) -addop("frstor", [bs8(0xdd)] + rmmod(d4, rm_arg_m80)) # XXX TODO: m94 ? -# addop("fsave", [bs8(0x9b), bs8(0xdd)] + rmmod(d6, rm_arg_m80)) # XXX -# TODO: m94 ? -addop("fnsave", [bs8(0xdd)] + rmmod(d6, rm_arg_m80)) # XXX TODO: m94 ? - -addop("fscale", [bs8(0xd9), bs8(0xfd)]) -addop("fsin", [bs8(0xd9), bs8(0xfe)]) -addop("fsincos", [bs8(0xd9), bs8(0xfb)]) -addop("fsqrt", [bs8(0xd9), bs8(0xfa)]) - -addop("fst", [bs("11011"), sd, bs("01")] + rmmod(d2, rm_arg_sd)) -addop("fst", [bs8(0xdd), bs("11010"), fltreg]) -addop("fstp", [bs("11011"), sd, bs("01")] + rmmod(d3, rm_arg_sd)) -addop("fstp", [bs8(0xdb)] + rmmod(d7, rm_arg_m80)) -addop("fstp", [bs8(0xdd), bs("11011"), fltreg]) - -# addop("fstcw", [bs8(0x9b), bs8(0xd9)] + rmmod(d7, rm_arg_m16)) -addop("fnstcw", [bs8(0xd9)] + rmmod(d7, rm_arg_m16)) -# addop("fstenv", [bs8(0x9b), bs8(0xd9)] + rmmod(d6, rm_arg_m80)) # XXX -# TODO: m14? -addop("fnstenv", [bs8(0xd9)] + rmmod(d6, rm_arg_m80)) # XXX TODO: m14? -# addop("fstsw", [bs8(0x9b), bs8(0xdd)] + rmmod(d7, rm_arg_m16)) -addop("fnstsw", [bs8(0xdd)] + rmmod(d7, rm_arg_m16)) -# addop("fstsw", [bs8(0x9b), bs8(0xdf), bs8(0xe0), d_ax]) -addop("fnstsw", [bs8(0xdf), bs8(0xe0), d_ax]) - -addop("fsub", [bs("11011"), sd, bs("00")] + rmmod(d4, rm_arg_sd)) -addop("fsub", [bs8(0xd8), bs("11100"), d_st, fltreg]) -addop("fsub", [bs8(0xdc), bs("11101"), fltreg, d_st]) -addop("fsubp", [bs8(0xde), bs("11101"), fltreg, d_st]) -addop("fisub", [bs("11011"), wd, bs("10")] + rmmod(d4, rm_arg_wd)) - -addop("fsubr", [bs("11011"), sd, bs("00")] + rmmod(d5, rm_arg_sd)) -addop("fsubr", [bs8(0xd8), bs("11101"), d_st, fltreg]) -addop("fsubr", [bs8(0xdc), bs("11100"), fltreg, d_st]) -addop("fsubrp", [bs8(0xde), bs("11100"), fltreg, d_st]) -addop("fisubr", [bs("11011"), wd, bs("10")] + rmmod(d5, rm_arg_wd)) -addop("ftst", [bs8(0xd9), bs8(0xe4)]) - - -addop("fucom", [bs8(0xdd), bs("11100"), fltreg]) -addop("fucomp", [bs8(0xdd), bs("11101"), fltreg]) -addop("fucompp", [bs8(0xda), bs8(0xe9)]) - -addop("fxam", [bs8(0xd9), bs8(0xe5)]) -addop("fxch", [bs8(0xd9), bs("11001"), fltreg]) -addop("fxrstor", [bs8(0x0f), bs8(0xae)] - + rmmod(d1, rm_arg_m80)) # XXX TODO m512 -addop("fxsave", [bs8(0x0f), bs8(0xae)] - + rmmod(d0, rm_arg_m80)) # XXX TODO m512 -addop("stmxcsr", [bs8(0x0f), bs8(0xae)] + rmmod(d3)) -addop("ldmxcsr", [bs8(0x0f), bs8(0xae)] + rmmod(d2)) - -addop("fxtract", [bs8(0xd9), bs8(0xf4)]) -addop("fyl2x", [bs8(0xd9), bs8(0xf1)]) -addop("fyl2xp1", [bs8(0xd9), bs8(0xf9)]) - -addop("hlt", [bs8(0xf4)]) -addop("icebp", [bs8(0xf1)]) - -addop("idiv", [bs('1111011'), w8] + rmmod(d7, rm_arg_w8)) - -addop("imul", [bs('1111011'), w8] + rmmod(d5, rm_arg_w8)) -addop("imul", [bs8(0x0f), bs8(0xaf)] + rmmod(rmreg)) - -addop("imul", [bs("011010"), se, bs('1')] + rmmod(rmreg) + [d_imm]) - -addop("in", [bs("1110010"), w8, d_eax, u08]) -addop("in", [bs("1110110"), w8, d_eax, d_edx]) - -addop("inc", [bs('1111111'), w8] + rmmod(d0, rm_arg_w8)) -addop("inc", [bs('01000'), reg, bs_modeno64]) - -addop("insb", [bs8(0x6c)]) -addop("insw", [bs8(0x6d), bs_opmode16]) -addop("insd", [bs8(0x6d), bs_opmode32]) -addop("insd", [bs8(0x6d), bs_opmode64]) - -addop("int", [bs8(0xcc), u08_3]) -addop("int", [bs8(0xcd), u08]) -addop("into", [bs8(0xce)]) -addop("invd", [bs8(0x0f), bs8(0x08)]) -addop("invlpg", [bs8(0x0f), bs8(0x01)] + rmmod(d7)) - -addop("iret", [bs8(0xcf), bs_opmode16]) -addop("iretd", [bs8(0xcf), bs_opmode32]) -addop("iretq", [bs8(0xcf), bs_opmode64]) - -addop("j", [bs('0111'), cond, rel_off08]) - -addop("jcxz", [bs8(0xe3), rel_off08, bs_admode16]) -addop("jecxz", [bs8(0xe3), rel_off08, bs_admode32]) -addop("jrcxz", [bs8(0xe3), rel_off08, bs_admode64]) - -addop("j", [bs8(0x0f), bs('1000'), cond, rel_off]) -addop("jmp", [bs8(0xeb), rel_off08]) -addop("jmp", [bs8(0xe9), rel_off]) -# TODO XXX replace stk force64? -addop("jmp", [bs8(0xff), stk] + rmmod(d4)) -addop("jmp", [bs8(0xea), bs_modeno64, moff, msegoff]) - -addop("jmp", [bs8(0xff)] + rmmod(d5, rm_arg_x=rm_mem_far, modrm=mod_mem)) - -addop("lahf", [bs8(0x9f)]) -addop("lar", [bs8(0x0f), bs8(0x02)] + rmmod(rmreg)) - -addop("lea", [bs8(0x8d)] + rmmod(rmreg, rm_arg_x=rm_mem, modrm=mod_mem)) -addop("les", [bs8(0xc4)] + rmmod(rmreg, rm_arg_x=rm_mem, modrm=mod_mem)) -addop("lds", [bs8(0xc5)] + rmmod(rmreg, rm_arg_x=rm_mem, modrm=mod_mem)) -addop("lss", [bs8(0x0f), bs8(0xb2)] + rmmod(rmreg, rm_arg_x=rm_mem, modrm=mod_mem)) -addop("lfs", [bs8(0x0f), bs8(0xb4)] + rmmod(rmreg, rm_arg_x=rm_mem, modrm=mod_mem)) -addop("lgs", [bs8(0x0f), bs8(0xb5)] + rmmod(rmreg, rm_arg_x=rm_mem, modrm=mod_mem)) - -addop("lgdt", [bs8(0x0f), bs8(0x01)] + rmmod(d2, modrm=mod_mem)) -addop("lidt", [bs8(0x0f), bs8(0x01)] + rmmod(d3, modrm=mod_mem)) - -addop("lfence", [bs8(0x0f), bs8(0xae), bs8(0xe8)]) -addop("mfence", [bs8(0x0f), bs8(0xae), bs8(0xf0)]) -addop("sfence", [bs8(0x0f), bs8(0xae), bs8(0xf8)]) - -addop("leave", [bs8(0xc9), stk]) - -addop("lodsb", [bs8(0xac)]) -addop("lodsw", [bs8(0xad), bs_opmode16]) -addop("lodsd", [bs8(0xad), bs_opmode32]) -addop("lodsq", [bs8(0xad), bs_opmode64]) - -addop("loop", [bs8(0xe2), rel_off08]) -addop("loope", [bs8(0xe1), rel_off08]) -addop("loopne", [bs8(0xe0), rel_off08]) -addop("lsl", [bs8(0x0f), bs8(0x03)] + rmmod(rmreg)) -addop("monitor", [bs8(0x0f), bs8(0x01), bs8(0xc8)]) - -addop("mov", [bs("100010"), swapargs, w8] + - rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) -addop("mov", [bs("100011"), swapargs, bs('0')] + rmmod(segm), [rm_arg, segm]) -addop("mov", [bs("101000"), swapargs, w8, d_eax, movoff], [d_eax, movoff]) -addop("mov", [bs("1011"), w8, reg, d_imm64]) -addop("mov", [bs("1100011"), w8] + rmmod(d0, rm_arg_w8) + [d_imm]) -addop("mov", [bs8(0x0f), bs("001000"), swapargs, bs('0')] - + rmmod(crreg), [rm_arg, crreg]) -addop("mov", [bs8(0x0f), bs("001000"), swapargs, bs('1')] - + rmmod(drreg), [rm_arg, drreg]) -addop("movsb", [bs8(0xa4)]) -addop("movsw", [bs8(0xa5), bs_opmode16]) -addop("movsd", [bs8(0xa5), bs_opmode32]) -addop("movsq", [bs8(0xa5), bs_opmode64]) - -addop("movsx", [bs8(0x0f), bs("1011111"), w8, sx] + rmmod(rmreg, rm_arg_sx)) -addop("movsxd", [bs8(0x63), sxd, bs_mode64] + rmmod(rmreg, rm_arg_sxd)) - -addop("movups", [bs8(0x0f), bs("0001000"), swapargs, no_xmm_pref] + - rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm]) -addop("movsd", [bs8(0x0f), bs("0001000"), swapargs, pref_f2] - + rmmod(xmm_reg, rm_arg_xmm_m64), [xmm_reg, rm_arg_xmm_m64]) -addop("movss", [bs8(0x0f), bs("0001000"), swapargs, pref_f3] + - rmmod(xmm_reg, rm_arg_xmm_m32), [xmm_reg, rm_arg_xmm_m32]) -addop("movupd", [bs8(0x0f), bs8(0x10), pref_66] + rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm]) -addop("movupd", [bs8(0x0f), bs8(0x11), pref_66] + rmmod(xmm_reg, rm_arg_xmm), [rm_arg_xmm, xmm_reg]) - - -addop("movd", [bs8(0x0f), bs('011'), swapargs, bs('1110'), no_xmm_pref] + - rmmod(mm_reg, rm_arg), [mm_reg, rm_arg]) -addop("movd", [bs8(0x0f), bs('011'), swapargs, bs('1110'), pref_66, bs_opmode32] + - rmmod(xmm_reg, rm_arg), [xmm_reg, rm_arg]) -addop("movq", [bs8(0x0f), bs('011'), swapargs, bs('1110'), pref_66, bs_opmode64] + - rmmod(xmm_reg, rm_arg), [xmm_reg, rm_arg]) - -addop("movq", [bs8(0x0f), bs('011'), swapargs, bs('1111'), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm_m64), [mm_reg, rm_arg_mm_m64]) - -addop("movq", [bs8(0x0f), bs8(0x7e), pref_f3] + - rmmod(xmm_reg, rm_arg_xmm_m64), [xmm_reg, rm_arg_xmm_m64]) -addop("movq", [bs8(0x0f), bs8(0xd6), pref_66] + - rmmod(xmm_reg, rm_arg_xmm_m64), [rm_arg_xmm_m64, xmm_reg]) - -addop("movmskps", [bs8(0x0f), bs8(0x50), no_xmm_pref] + - rmmod(reg_modrm, rm_arg_xmm_reg)) -addop("movmskpd", [bs8(0x0f), bs8(0x50), pref_66] + - rmmod(reg_modrm, rm_arg_xmm_reg)) - -addop("addss", [bs8(0x0f), bs8(0x58), pref_f3] + rmmod(xmm_reg, rm_arg_xmm_m32)) -addop("addsd", [bs8(0x0f), bs8(0x58), pref_f2] + rmmod(xmm_reg, rm_arg_xmm_m64)) - -addop("subss", [bs8(0x0f), bs8(0x5c), pref_f3] + rmmod(xmm_reg, rm_arg_xmm_m32)) -addop("subsd", [bs8(0x0f), bs8(0x5c), pref_f2] + rmmod(xmm_reg, rm_arg_xmm_m64)) - -addop("mulss", [bs8(0x0f), bs8(0x59), pref_f3] + rmmod(xmm_reg, rm_arg_xmm_m32)) -addop("mulsd", [bs8(0x0f), bs8(0x59), pref_f2] + rmmod(xmm_reg, rm_arg_xmm_m64)) - -addop("divss", [bs8(0x0f), bs8(0x5e), pref_f3] + rmmod(xmm_reg, rm_arg_xmm_m32)) -addop("divsd", [bs8(0x0f), bs8(0x5e), pref_f2] + rmmod(xmm_reg, rm_arg_xmm_m64)) - - -addop("pminsw", [bs8(0x0f), bs8(0xea), no_xmm_pref] + rmmod(mm_reg, rm_arg_mm)) -addop("pminsw", [bs8(0x0f), bs8(0xea), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) - -addop("ucomiss", [bs8(0x0f), bs8(0x2e), no_xmm_pref] + rmmod(xmm_reg, rm_arg_xmm_m32)) -addop("ucomisd", [bs8(0x0f), bs8(0x2e), pref_66] + rmmod(xmm_reg, rm_arg_xmm_m64)) - - -addop("movzx", [bs8(0x0f), bs("1011011"), w8, sx] + rmmod(rmreg, rm_arg_sx)) -addop("mul", [bs('1111011'), w8] + rmmod(d4, rm_arg_w8)) - -addop("neg", [bs('1111011'), w8] + rmmod(d3, rm_arg_w8)) -addop("nop", [bs8(0x0f), bs8(0x1f)] + rmmod(d0, rm_arg)) # XXX TODO m512 -addop("nop", [bs8(0x0f), bs8(0x1f)] + rmmod(d1, rm_arg)) # XXX TODO m512 -addop("nop", [bs8(0x0f), bs8(0x1f)] + rmmod(d2, rm_arg)) # XXX TODO m512 -addop("nop", [bs8(0x0f), bs8(0x1f)] + rmmod(d3, rm_arg)) # XXX TODO m512 -addop("nop", [bs8(0x0f), bs8(0x1f)] + rmmod(d4, rm_arg)) # XXX TODO m512 -addop("nop", [bs8(0x0f), bs8(0x1f)] + rmmod(d5, rm_arg)) # XXX TODO m512 -addop("nop", [bs8(0x0f), bs8(0x1f)] + rmmod(d6, rm_arg)) # XXX TODO m512 -addop("nop", [bs8(0x0f), bs8(0x1f)] + rmmod(d7, rm_arg)) # XXX TODO m512 -addop("not", [bs('1111011'), w8] + rmmod(d2, rm_arg_w8)) -addop("or", [bs("0000110"), w8, d_eax, d_imm]) -addop("or", [bs("100000"), se, w8] + rmmod(d1, rm_arg_w8) + [d_imm]) -addop("or", [bs("000010"), swapargs, w8] + - rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) -addop("out", [bs("1110011"), w8, u08, d_eax]) -addop("out", [bs("1110111"), w8, d_edx, d_eax]) - -addop("outsb", [bs8(0x6e)]) -addop("outsw", [bs8(0x6f), bs_opmode16]) -addop("outsd", [bs8(0x6f), bs_opmode32]) -addop("outsd", [bs8(0x6f), bs_opmode64]) - -addop("setalc", [bs8(0xD6)]) - -# addop("pause", [bs8(0xf3), bs8(0x90)]) - -addop("popw", [bs8(0x8f), stk, bs_opmode16] + rmmod(d0)) -addop("popw", [bs("01011"), stk, reg, bs_opmode16]) -addop("popw", [bs8(0x1f), stk, d_ds, bs_opmode16]) -addop("popw", [bs8(0x07), stk, d_es, bs_opmode16]) -addop("popw", [bs8(0x17), stk, d_ss, bs_opmode16]) -addop("popw", [bs8(0x0f), stk, bs8(0xa1), d_fs, bs_opmode16]) -addop("popw", [bs8(0x0f), stk, bs8(0xa9), d_gs, bs_opmode16]) - -addop("pop", [bs8(0x8f), stk, bs_opmode32] + rmmod(d0)) -addop("pop", [bs("01011"), stk, reg, bs_opmode32]) -addop("pop", [bs8(0x1f), stk, d_ds, bs_opmode32]) -addop("pop", [bs8(0x07), stk, d_es, bs_opmode32]) -addop("pop", [bs8(0x17), stk, d_ss, bs_opmode32]) -addop("pop", [bs8(0x0f), stk, bs8(0xa1), d_fs, bs_opmode32]) -addop("pop", [bs8(0x0f), stk, bs8(0xa9), d_gs, bs_opmode32]) - -addop("pop", [bs8(0x8f), stk, bs_opmode64] + rmmod(d0)) -addop("pop", [bs("01011"), stk, reg, bs_opmode64]) -addop("pop", [bs8(0x1f), stk, d_ds, bs_opmode64]) -addop("pop", [bs8(0x07), stk, d_es, bs_opmode64]) -addop("pop", [bs8(0x17), stk, d_ss, bs_opmode64]) -addop("pop", [bs8(0x0f), stk, bs8(0xa1), d_fs, bs_opmode64]) -addop("pop", [bs8(0x0f), stk, bs8(0xa9), d_gs, bs_opmode64]) - - -addop("popa", [bs8(0x61), stk, bs_opmode16]) -addop("popad", [bs8(0x61), stk, bs_opmode32]) - -addop("popfw", [bs8(0x9d), stk, bs_opmode16]) -addop("popfd", [bs8(0x9d), stk, bs_opmode32]) -addop("popfq", [bs8(0x9d), stk, bs_opmode64]) - -addop("prefetch0", [bs8(0x0f), bs8(0x18)] + rmmod(d1, rm_arg_m08)) -addop("prefetch1", [bs8(0x0f), bs8(0x18)] + rmmod(d2, rm_arg_m08)) -addop("prefetch2", [bs8(0x0f), bs8(0x18)] + rmmod(d3, rm_arg_m08)) -addop("prefetchnta", [bs8(0x0f), bs8(0x18)] + rmmod(d0, rm_arg_m08)) -addop("prefetchw", [bs8(0x0f), bs8(0x0d)] + rmmod(d1, rm_arg_m08)) - -addop("pushw", [bs8(0xff), stk, bs_opmode16] + rmmod(d6)) -addop("pushw", [bs("01010"), stk, reg, bs_opmode16]) -addop("pushw", [bs8(0x6a), s08, stk, bs_opmode16]) -addop("pushw", [bs8(0x68), d_imm, stk, bs_opmode16]) -addop("pushw", [bs8(0x0e), stk, d_cs, bs_opmode16]) -addop("pushw", [bs8(0x16), stk, d_ss, bs_opmode16]) -addop("pushw", [bs8(0x1e), stk, d_ds, bs_opmode16]) -addop("pushw", [bs8(0x06), stk, d_es, bs_opmode16]) -addop("pushw", [bs8(0x0f), stk, bs8(0xa0), d_fs, bs_opmode16]) -addop("pushw", [bs8(0x0f), stk, bs8(0xa8), d_gs, bs_opmode16]) - -addop("push", [bs8(0xff), stk, bs_opmode32] + rmmod(d6)) -addop("push", [bs("01010"), stk, reg, bs_opmode32]) -addop("push", [bs8(0x6a), s08, stk, bs_opmode32]) -addop("push", [bs8(0x68), d_imm, stk, bs_opmode32]) -addop("push", [bs8(0x0e), stk, d_cs, bs_opmode32]) -addop("push", [bs8(0x16), stk, d_ss, bs_opmode32]) -addop("push", [bs8(0x1e), stk, d_ds, bs_opmode32]) -addop("push", [bs8(0x06), stk, d_es, bs_opmode32]) -addop("push", [bs8(0x0f), stk, bs8(0xa0), d_fs, bs_opmode32]) -addop("push", [bs8(0x0f), stk, bs8(0xa8), d_gs, bs_opmode32]) - -addop("push", [bs8(0xff), stk, bs_opmode64] + rmmod(d6)) -addop("push", [bs("01010"), stk, reg, bs_opmode64]) -addop("push", [bs8(0x6a), s08, stk, bs_opmode64]) -addop("push", [bs8(0x68), d_imm, stk, bs_opmode64]) -addop("push", [bs8(0x0e), stk, d_cs, bs_opmode64]) -addop("push", [bs8(0x16), stk, d_ss, bs_opmode64]) -addop("push", [bs8(0x1e), stk, d_ds, bs_opmode64]) -addop("push", [bs8(0x06), stk, d_es, bs_opmode64]) -addop("push", [bs8(0x0f), stk, bs8(0xa0), d_fs, bs_opmode64]) -addop("push", [bs8(0x0f), stk, bs8(0xa8), d_gs, bs_opmode64]) - -addop("pusha", [bs8(0x60), stk, bs_opmode16_no64]) -addop("pushad", [bs8(0x60), stk, bs_opmode32_no64]) - - -addop("pushfw", [bs8(0x9c), stk, bs_opmode16]) -addop("pushfd", [bs8(0x9c), stk, bs_opmode32]) -addop("pushfq", [bs8(0x9c), stk, bs_opmode64]) - -addop("rcl", [bs('110100'), d_cl1, w8] + - rmmod(d2, rm_arg_w8), [rm_arg_w8, d_cl1]) -addop("rcl", [bs('1100000'), w8] + rmmod(d2, rm_arg_w8) + [u08]) -addop("rcr", [bs('110100'), d_cl1, w8] + - rmmod(d3, rm_arg_w8), [rm_arg_w8, d_cl1]) -addop("rcr", [bs('1100000'), w8] + rmmod(d3, rm_arg_w8) + [u08]) -addop("rol", [bs('110100'), d_cl1, w8] - + rmmod(d0, rm_arg_w8), [rm_arg_w8, d_cl1]) -addop("rol", [bs('1100000'), w8] + rmmod(d0, rm_arg_w8) + [u08]) -addop("ror", [bs('110100'), d_cl1, w8] - + rmmod(d1, rm_arg_w8), [rm_arg_w8, d_cl1]) -addop("ror", [bs('1100000'), w8] + rmmod(d1, rm_arg_w8) + [u08]) - -addop("rdmsr", [bs8(0x0f), bs8(0x32)]) -addop("rdpmc", [bs8(0x0f), bs8(0x33)]) -addop("rdtsc", [bs8(0x0f), bs8(0x31)]) -addop("ret", [bs8(0xc3), stk]) -addop("ret", [bs8(0xc2), stk, u16]) -addop("retf", [bs8(0xcb), stk]) -addop("retf", [bs8(0xca), stk, u16]) - -addop("rsm", [bs8(0x0f), bs8(0xaa)]) -addop("sahf", [bs8(0x9e)]) - -# XXX tipo in doc: /4 instead of /6 -addop("sal", [bs('110100'), d_cl1, w8] + - rmmod(d6, rm_arg_w8), [rm_arg_w8, d_cl1]) -addop("sal", [bs('1100000'), w8] + rmmod(d6, rm_arg_w8) + [u08]) -addop("sar", [bs('110100'), d_cl1, w8] + - rmmod(d7, rm_arg_w8), [rm_arg_w8, d_cl1]) -addop("sar", [bs('1100000'), w8] + rmmod(d7, rm_arg_w8) + [u08]) - -addop("scasb", [bs8(0xae)]) -addop("scasw", [bs8(0xaf), bs_opmode16]) -addop("scasd", [bs8(0xaf), bs_opmode32]) -addop("scasq", [bs8(0xaf), bs_opmode64]) - -addop("shl", [bs('110100'), d_cl1, w8] - + rmmod(d4, rm_arg_w8), [rm_arg_w8, d_cl1]) -addop("shl", [bs('1100000'), w8] + rmmod(d4, rm_arg_w8) + [u08]) -addop("shr", [bs('110100'), d_cl1, w8] - + rmmod(d5, rm_arg_w8), [rm_arg_w8, d_cl1]) -addop("shr", [bs('1100000'), w8] + rmmod(d5, rm_arg_w8) + [u08]) - -addop("sbb", [bs("0001110"), w8, d_eax, d_imm]) -addop("sbb", [bs("100000"), se, w8] + rmmod(d3, rm_arg_w8) + [d_imm]) -addop("sbb", [bs("000110"), swapargs, w8] + - rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) - -addop("set", [bs8(0x0f), bs('1001'), cond] + rmmod(regnoarg, rm_arg_08)) -addop("sgdt", [bs8(0x0f), bs8(0x01)] + rmmod(d0, modrm=mod_mem)) -addop("shld", [bs8(0x0f), bs8(0xa4)] + - rmmod(rmreg) + [u08], [rm_arg, rmreg, u08]) -addop("shld", [bs8(0x0f), bs8(0xa5)] + - rmmod(rmreg) + [d_cl], [rm_arg, rmreg, d_cl]) -addop("shrd", [bs8(0x0f), bs8(0xac)] + - rmmod(rmreg) + [u08], [rm_arg, rmreg, u08]) -addop("shrd", [bs8(0x0f), bs8(0xad)] + - rmmod(rmreg) + [d_cl], [rm_arg, rmreg, d_cl]) -addop("sidt", [bs8(0x0f), bs8(0x01)] + rmmod(d1, modrm=mod_mem)) -addop("sldt", [bs8(0x0f), bs8(0x00)] + rmmod(d0, rm_arg_x=rm_arg_reg_m16)) -addop("smsw", [bs8(0x0f), bs8(0x01)] + rmmod(d4)) -addop("stc", [bs8(0xf9)]) -addop("std", [bs8(0xfd)]) -addop("sti", [bs8(0xfb)]) -addop("stosb", [bs8(0xaa)]) -addop("stosw", [bs8(0xab), bs_opmode16]) -addop("stosd", [bs8(0xab), bs_opmode32]) -addop("stosq", [bs8(0xab), bs_opmode64]) - -addop("str", [bs8(0x0f), bs8(0x00)] + rmmod(d1)) - -addop("sub", [bs("0010110"), w8, d_eax, d_imm]) -addop("sub", [bs("100000"), se, w8] + rmmod(d5, rm_arg_w8) + [d_imm]) -addop("sub", [bs("001010"), swapargs, w8] + - rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) - -addop("syscall", [bs8(0x0f), bs8(0x05)]) -addop("sysenter", [bs8(0x0f), bs8(0x34)]) -addop("sysexit", [bs8(0x0f), bs8(0x35)]) -addop("sysret", [bs8(0x0f), bs8(0x07)]) -addop("test", [bs("1010100"), w8, d_eax, d_imm]) -addop("test", [bs("1111011"), w8] + rmmod(d0, rm_arg_w8) + [d_imm]) -addop("test", [bs("1000010"), w8] + - rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) -addop("ud2", [bs8(0x0f), bs8(0x0b)]) -addop("verr", [bs8(0x0f), bs8(0x00)] + rmmod(d4)) -addop("verw", [bs8(0x0f), bs8(0x00)] + rmmod(d5)) -addop("wbinvd", [bs8(0x0f), bs8(0x09)]) -addop("wrmsr", [bs8(0x0f), bs8(0x30)]) -addop("xadd", [bs8(0x0f), bs("1100000"), w8] - + rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) - -addop("nop", [bs8(0x90), no_rex], alias=True) - -addop("xchg", [bs('10010'), d_eax, reg]) -addop("xchg", [bs('1000011'), w8] + - rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) -addop("xlat", [bs8(0xd7)]) - - -addop("xor", [bs("0011010"), w8, d_eax, d_imm]) -addop("xor", [bs("100000"), se, w8] + rmmod(d6, rm_arg_w8) + [d_imm]) -addop("xor", [bs("001100"), swapargs, w8] + - rmmod(rmreg, rm_arg_w8), [rm_arg_w8, rmreg]) - - -addop("xgetbv", [bs8(0x0f), bs8(0x01), bs8(0xd0)]) - - - -#### MMX/SSE/AVX operations -#### Categories are the same than here: https://software.intel.com/sites/landingpage/IntrinsicsGuide/ -#### - -### Arithmetic (integers) -### - -## Move -# SSE -addop("movapd", [bs8(0x0f), bs("0010100"), swapargs] - + rmmod(xmm_reg, rm_arg_xmm) + [bs_opmode16], [xmm_reg, rm_arg_xmm]) -addop("movaps", [bs8(0x0f), bs("0010100"), swapargs] - + rmmod(xmm_reg, rm_arg_xmm_m128) + [bs_opmode32], [xmm_reg, rm_arg_xmm_m128]) -addop("movaps", [bs8(0x0f), bs("0010100"), swapargs] - + rmmod(xmm_reg, rm_arg_xmm_m128) + [bs_opmode64], [xmm_reg, rm_arg_xmm_m128]) -addop("movdqu", [bs8(0x0f), bs("011"), swapargs, bs("1111"), pref_f3] - + rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm]) -addop("movdqa", [bs8(0x0f), bs("011"), swapargs, bs("1111"), pref_66] - + rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm]) - -addop("movhpd", [bs8(0x0f), bs("0001011"), swapargs, pref_66] + - rmmod(xmm_reg, rm_arg_m64), [xmm_reg, rm_arg_m64]) -addop("movhps", [bs8(0x0f), bs("0001011"), swapargs, no_xmm_pref] + - rmmod(xmm_reg, rm_arg_m64), [xmm_reg, rm_arg_m64]) -addop("movlpd", [bs8(0x0f), bs("0001001"), swapargs, pref_66] + - rmmod(xmm_reg, rm_arg_m64), [xmm_reg, rm_arg_m64]) -addop("movlps", [bs8(0x0f), bs("0001001"), swapargs, no_xmm_pref] + - rmmod(xmm_reg, rm_arg_m64), [xmm_reg, rm_arg_m64]) - -addop("movhlps", [bs8(0x0f), bs8(0x12), no_xmm_pref] + - rmmod(xmm_reg, rm_arg_xmm_reg), [xmm_reg, rm_arg_xmm_reg]) -addop("movlhps", [bs8(0x0f), bs8(0x16), no_xmm_pref] + - rmmod(xmm_reg, rm_arg_xmm_reg), [xmm_reg, rm_arg_xmm_reg]) - -addop("movdq2q", [bs8(0x0f), bs8(0xd6), pref_f2] + - rmmod(mm_reg, rm_arg_xmm_reg), [mm_reg, rm_arg_xmm_reg]) -addop("movq2dq", [bs8(0x0f), bs8(0xd6), pref_f3] + - rmmod(xmm_reg, rm_arg_mm)) - -## Additions -# SSE -addop("paddb", [bs8(0x0f), bs8(0xfc), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) -addop("paddw", [bs8(0x0f), bs8(0xfd), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) -addop("paddd", [bs8(0x0f), bs8(0xfe), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) -addop("paddq", [bs8(0x0f), bs8(0xd4), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) - -addop("paddb", [bs8(0x0f), bs8(0xfc), no_xmm_pref] + rmmod(mm_reg, rm_arg_mm)) -addop("paddw", [bs8(0x0f), bs8(0xfd), no_xmm_pref] + rmmod(mm_reg, rm_arg_mm)) -addop("paddd", [bs8(0x0f), bs8(0xfe), no_xmm_pref] + rmmod(mm_reg, rm_arg_mm)) -addop("paddq", [bs8(0x0f), bs8(0xd4), no_xmm_pref] + rmmod(mm_reg, rm_arg_mm)) - -## Substractions -# SSE -addop("psubb", [bs8(0x0f), bs8(0xf8), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) -addop("psubw", [bs8(0x0f), bs8(0xf9), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) -addop("psubd", [bs8(0x0f), bs8(0xfa), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) -addop("psubq", [bs8(0x0f), bs8(0xfb), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) - -addop("psubb", [bs8(0x0f), bs8(0xf8), no_xmm_pref] + rmmod(mm_reg, rm_arg_mm)) -addop("psubw", [bs8(0x0f), bs8(0xf9), no_xmm_pref] + rmmod(mm_reg, rm_arg_mm)) -addop("psubd", [bs8(0x0f), bs8(0xfa), no_xmm_pref] + rmmod(mm_reg, rm_arg_mm)) -addop("psubq", [bs8(0x0f), bs8(0xfb), no_xmm_pref] + rmmod(mm_reg, rm_arg_mm)) - -### Arithmetic (floating-point) -### - -## Additions -# SSE -addop("addps", [bs8(0x0f), bs8(0x58), no_xmm_pref] + rmmod(xmm_reg, rm_arg_xmm)) -addop("addpd", [bs8(0x0f), bs8(0x58), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) - -## Substractions -# SSE -addop("subps", [bs8(0x0f), bs8(0x5c), no_xmm_pref] + rmmod(xmm_reg, rm_arg_xmm)) -addop("subpd", [bs8(0x0f), bs8(0x5c), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) - -## Multiplications -# SSE -addop("mulps", [bs8(0x0f), bs8(0x59), no_xmm_pref] + rmmod(xmm_reg, rm_arg_xmm)) -addop("mulpd", [bs8(0x0f), bs8(0x59), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) - -## Divisions -# SSE -addop("divps", [bs8(0x0f), bs8(0x5e), no_xmm_pref] + rmmod(xmm_reg, rm_arg_xmm)) -addop("divpd", [bs8(0x0f), bs8(0x5e), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) - -### Logical (floating-point) -### - -## XOR -addop("xorps", [bs8(0x0f), bs8(0x57), no_xmm_pref] + rmmod(xmm_reg, rm_arg_xmm)) -addop("xorpd", [bs8(0x0f), bs8(0x57), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) - -## AND -addop("andps", [bs8(0x0f), bs8(0x54), no_xmm_pref] + rmmod(xmm_reg, rm_arg_xmm)) -addop("andpd", [bs8(0x0f), bs8(0x54), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) - -addop("andnps", [bs8(0x0f), bs8(0x55), no_xmm_pref] + rmmod(xmm_reg, rm_arg_xmm)) -addop("andnpd", [bs8(0x0f), bs8(0x55), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) - -## OR -addop("orps", [bs8(0x0f), bs8(0x56), no_xmm_pref] + rmmod(xmm_reg, rm_arg_xmm)) -addop("orpd", [bs8(0x0f), bs8(0x56), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) - -## AND -# MMX -addop("pand", [bs8(0x0f), bs8(0xdb), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm), [mm_reg, rm_arg_mm]) -# SSE -addop("pand", [bs8(0x0f), bs8(0xdb), pref_66] + - rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm]) - -## ANDN -# MMX -addop("pandn", [bs8(0x0f), bs8(0xdf), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm), [mm_reg, rm_arg_mm]) -# SSE -addop("pandn", [bs8(0x0f), bs8(0xdf), pref_66] + - rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm]) - -## OR -# MMX -addop("por", [bs8(0x0f), bs8(0xeb), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm), [mm_reg, rm_arg_mm]) -# SSE -addop("por", [bs8(0x0f), bs8(0xeb), pref_66] + - rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm]) - -## XOR -# MMX -addop("pxor", [bs8(0x0f), bs8(0xef), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm)) -# MMX -addop("pxor", [bs8(0x0f), bs8(0xef), pref_66] + - rmmod(xmm_reg, rm_arg_xmm)) - -### Comparisons (floating-point) -### -addop("minps", [bs8(0x0f), bs8(0x5d), no_xmm_pref] + rmmod(xmm_reg, - rm_arg_xmm_m128)) -addop("minss", [bs8(0x0f), bs8(0x5d), pref_f3] + rmmod(xmm_reg, - rm_arg_xmm_m32)) -addop("minpd", [bs8(0x0f), bs8(0x5d), pref_66] + rmmod(xmm_reg, - rm_arg_xmm_m128)) -addop("minsd", [bs8(0x0f), bs8(0x5d), pref_f2] + rmmod(xmm_reg, - rm_arg_xmm_m64)) -addop("maxps", [bs8(0x0f), bs8(0x5f), no_xmm_pref] + rmmod(xmm_reg, - rm_arg_xmm_m128)) -addop("maxpd", [bs8(0x0f), bs8(0x5f), pref_66] + rmmod(xmm_reg, - rm_arg_xmm_m128)) -addop("maxsd", [bs8(0x0f), bs8(0x5f), pref_f2] + rmmod(xmm_reg, rm_arg_xmm_m64)) -addop("maxss", [bs8(0x0f), bs8(0x5f), pref_f3] + rmmod(xmm_reg, rm_arg_xmm_m32)) - -for cond_name, value in [ - ("eq", 0x00), - ("lt", 0x01), - ("le", 0x02), - ("unord", 0x03), - ("neq", 0x04), - ("nlt", 0x05), - ("nle", 0x06), - ("ord", 0x07), -]: - addop("cmp%sps" % cond_name, [bs8(0x0f), bs8(0xc2), no_xmm_pref] + - rmmod(xmm_reg, rm_arg_xmm_m64) + [bs8(value)]) - addop("cmp%spd" % cond_name, [bs8(0x0f), bs8(0xc2), pref_66] + - rmmod(xmm_reg, rm_arg_xmm_m64) + [bs8(value)]) - addop("cmp%sss" % cond_name, [bs8(0x0f), bs8(0xc2), pref_f3] + - rmmod(xmm_reg, rm_arg_xmm_m32) + [bs8(value)]) - addop("cmp%ssd" % cond_name, [bs8(0x0f), bs8(0xc2), pref_f2] + - rmmod(xmm_reg, rm_arg_xmm_m32) + [bs8(value)]) - - - -addop("pshufb", [bs8(0x0f), bs8(0x38), bs8(0x00), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm_m64)) -addop("pshufb", [bs8(0x0f), bs8(0x38), bs8(0x00), pref_66] + - rmmod(xmm_reg, rm_arg_xmm_m128)) -addop("pshufd", [bs8(0x0f), bs8(0x70), pref_66] + - rmmod(xmm_reg, rm_arg_xmm_m128) + [u08]) -addop("pshuflw", [bs8(0x0f), bs8(0x70), pref_f2] + - rmmod(xmm_reg, rm_arg_xmm_m128) + [u08]) -addop("pshufhw", [bs8(0x0f), bs8(0x70), pref_f3] + - rmmod(xmm_reg, rm_arg_xmm_m128) + [u08]) - - -### Convert -### SS = single precision -### SD = double precision -### - -## SS -> SD -## - -addop("cvtdq2pd", [bs8(0x0f), bs8(0xe6), pref_f3] - + rmmod(xmm_reg, rm_arg_xmm_m64)) -addop("cvtdq2ps", [bs8(0x0f), bs8(0x5b), no_xmm_pref] - + rmmod(xmm_reg, rm_arg_xmm)) -addop("cvtpd2dq", [bs8(0x0f), bs8(0xe6), pref_f2] - + rmmod(xmm_reg, rm_arg_xmm)) -addop("cvtpd2pi", [bs8(0x0f), bs8(0x2d), pref_66] - + rmmod(mm_reg, rm_arg_xmm)) -addop("cvtpd2ps", [bs8(0x0f), bs8(0x5a), pref_66] - + rmmod(xmm_reg, rm_arg_xmm)) -addop("cvtpi2pd", [bs8(0x0f), bs8(0x2a), pref_66] - + rmmod(xmm_reg, rm_arg_mm_m64)) -addop("cvtpi2ps", [bs8(0x0f), bs8(0x2a), no_xmm_pref] - + rmmod(xmm_reg, rm_arg_mm_m64)) -addop("cvtps2dq", [bs8(0x0f), bs8(0x5b), pref_66] - + rmmod(xmm_reg, rm_arg_xmm)) -addop("cvtps2pd", [bs8(0x0f), bs8(0x5a), no_xmm_pref] - + rmmod(xmm_reg, rm_arg_xmm_m64)) -addop("cvtps2pi", [bs8(0x0f), bs8(0x2d), no_xmm_pref] - + rmmod(mm_reg, rm_arg_xmm_m64)) -addop("cvtsd2si", [bs8(0x0f), bs8(0x2d), pref_f2] - + rmmod(reg, rm_arg_xmm_m64)) -addop("cvtsd2ss", [bs8(0x0f), bs8(0x5a), pref_f2] - + rmmod(xmm_reg, rm_arg_xmm_m64)) -addop("cvtsi2sd", [bs8(0x0f), bs8(0x2a), pref_f2] - + rmmod(xmm_reg, rm_arg)) -addop("cvtsi2ss", [bs8(0x0f), bs8(0x2a), xmmreg, pref_f3] - + rmmod(xmm_reg, rm_arg)) -addop("cvtss2sd", [bs8(0x0f), bs8(0x5a), pref_f3] - + rmmod(xmm_reg, rm_arg_xmm_m32)) -addop("cvtss2si", [bs8(0x0f), bs8(0x2d), pref_f3] - + rmmod(rmreg, rm_arg_xmm_m32)) -addop("cvttpd2pi",[bs8(0x0f), bs8(0x2c), pref_66] - + rmmod(mm_reg, rm_arg_xmm)) -addop("cvttpd2dq",[bs8(0x0f), bs8(0xe6), pref_66] - + rmmod(xmm_reg, rm_arg_xmm)) -addop("cvttps2dq",[bs8(0x0f), bs8(0x5b), pref_f3] - + rmmod(xmm_reg, rm_arg_xmm)) -addop("cvttps2pi",[bs8(0x0f), bs8(0x2c), no_xmm_pref] - + rmmod(mm_reg, rm_arg_xmm_m64)) -addop("cvttsd2si",[bs8(0x0f), bs8(0x2c), pref_f2] - + rmmod(reg, rm_arg_xmm_m64)) -addop("cvttss2si",[bs8(0x0f), bs8(0x2c), pref_f3] - + rmmod(reg, rm_arg_xmm_m32)) - -addop("palignr", [bs8(0x0f), bs8(0x73), bs8(0x0f), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm_m64) + [u08], [mm_reg, rm_arg_mm_m64, u08]) -addop("palignr", [bs8(0x0f), bs8(0x3a), bs8(0x0f), pref_66] + - rmmod(xmm_reg, rm_arg_xmm_m128) + [u08], [xmm_reg, rm_arg_xmm_m128, u08]) - -addop("psrlq", [bs8(0x0f), bs8(0x73), no_xmm_pref] + - rmmod(d2, rm_arg_mm) + [u08], [rm_arg_mm, u08]) -addop("psrlq", [bs8(0x0f), bs8(0x73), pref_66] + - rmmod(d2, rm_arg_xmm) + [u08], [rm_arg_xmm, u08]) - -addop("psrlq", [bs8(0x0f), bs8(0xd3), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm), [mm_reg, rm_arg_mm]) -addop("psrlq", [bs8(0x0f), bs8(0xd3), pref_66] + - rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm]) - - -addop("psrld", [bs8(0x0f), bs8(0x72), no_xmm_pref] + - rmmod(d2, rm_arg_mm) + [u08], [rm_arg_mm, u08]) -addop("psrld", [bs8(0x0f), bs8(0x72), pref_66] + - rmmod(d2, rm_arg_xmm) + [u08], [rm_arg_xmm, u08]) - -addop("psrld", [bs8(0x0f), bs8(0xd2), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm), [mm_reg, rm_arg_mm]) -addop("psrld", [bs8(0x0f), bs8(0xd2), pref_66] + - rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm]) - -addop("psrldq", [bs8(0x0f), bs8(0x73), pref_66] + - rmmod(d3, rm_arg_xmm) + [u08], [rm_arg_xmm, u08]) - -addop("psrlw", [bs8(0x0f), bs8(0x71), no_xmm_pref] + - rmmod(d2, rm_arg_mm) + [u08], [rm_arg_mm, u08]) -addop("psrlw", [bs8(0x0f), bs8(0x71), pref_66] + - rmmod(d2, rm_arg_xmm) + [u08], [rm_arg_xmm, u08]) - -addop("psrlw", [bs8(0x0f), bs8(0xd1), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm_m64), [mm_reg, rm_arg_mm_m64]) -addop("psrlw", [bs8(0x0f), bs8(0xd1), pref_66] + - rmmod(xmm_reg, rm_arg_xmm_m128), [xmm_reg, rm_arg_xmm_m128]) - -addop("psraw", [bs8(0x0f), bs8(0xe1), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm_m64), [mm_reg, rm_arg_mm_m64]) -addop("psraw", [bs8(0x0f), bs8(0xe1), pref_66] + - rmmod(xmm_reg, rm_arg_xmm_m128), [xmm_reg, rm_arg_xmm_m128]) - -addop("psraw", [bs8(0x0f), bs8(0x71), no_xmm_pref] + - rmmod(d4, rm_arg_mm_m64) + [u08], [rm_arg_mm_m64, u08]) -addop("psraw", [bs8(0x0f), bs8(0x71), pref_66] + - rmmod(d4, rm_arg_xmm_m128) + [u08], [rm_arg_xmm_m128, u08]) - -addop("psrad", [bs8(0x0f), bs8(0xe2), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm_m64), [mm_reg, rm_arg_mm_m64]) -addop("psrad", [bs8(0x0f), bs8(0xe2), pref_66] + - rmmod(xmm_reg, rm_arg_xmm_m128), [xmm_reg, rm_arg_xmm_m128]) - -addop("psrad", [bs8(0x0f), bs8(0x72), no_xmm_pref] + - rmmod(d4, rm_arg_mm_m64) + [u08], [rm_arg_mm_m64, u08]) -addop("psrad", [bs8(0x0f), bs8(0x72), pref_66] + - rmmod(d4, rm_arg_xmm_m128) + [u08], [rm_arg_xmm_m128, u08]) - - -addop("psllq", [bs8(0x0f), bs8(0x73), no_xmm_pref] + - rmmod(d6, rm_arg_mm) + [u08], [rm_arg_mm, u08]) -addop("psllq", [bs8(0x0f), bs8(0x73), pref_66] + - rmmod(d6, rm_arg_xmm) + [u08], [rm_arg_xmm, u08]) - -addop("psllq", [bs8(0x0f), bs8(0xf3), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm), [mm_reg, rm_arg_mm]) -addop("psllq", [bs8(0x0f), bs8(0xf3), pref_66] + - rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm]) - - -addop("pslld", [bs8(0x0f), bs8(0x72), no_xmm_pref] + - rmmod(d6, rm_arg_mm) + [u08], [rm_arg_mm, u08]) -addop("pslld", [bs8(0x0f), bs8(0x72), pref_66] + - rmmod(d6, rm_arg_xmm) + [u08], [rm_arg_xmm, u08]) - -addop("pslld", [bs8(0x0f), bs8(0xf2), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm), [mm_reg, rm_arg_mm]) -addop("pslld", [bs8(0x0f), bs8(0xf2), pref_66] + - rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm]) - - -addop("psllw", [bs8(0x0f), bs8(0x71), no_xmm_pref] + - rmmod(d6, rm_arg_mm) + [u08], [rm_arg_mm, u08]) -addop("psllw", [bs8(0x0f), bs8(0x71), pref_66] + - rmmod(d6, rm_arg_xmm) + [u08], [rm_arg_xmm, u08]) - -addop("psllw", [bs8(0x0f), bs8(0xf1), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm), [mm_reg, rm_arg_mm]) -addop("psllw", [bs8(0x0f), bs8(0xf1), pref_66] + - rmmod(xmm_reg, rm_arg_xmm), [xmm_reg, rm_arg_xmm]) - -addop("pslldq", [bs8(0x0f), bs8(0x73), pref_66] + - rmmod(d7, rm_arg_xmm) + [u08], [rm_arg_xmm, u08]) - - -addop("pmaxub", [bs8(0x0f), bs8(0xde), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm)) -addop("pmaxub", [bs8(0x0f), bs8(0xde), pref_66] + - rmmod(xmm_reg, rm_arg_xmm)) - -addop("pmaxuw", [bs8(0x0f), bs8(0x38), bs8(0x3e), pref_66] + - rmmod(xmm_reg, rm_arg_xmm)) - -addop("pmaxud", [bs8(0x0f), bs8(0x38), bs8(0x3f), pref_66] + - rmmod(xmm_reg, rm_arg_xmm)) - -addop("pmaxsw", [bs8(0x0f), bs8(0xee), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm_m64)) -addop("pmaxsw", [bs8(0x0f), bs8(0xee), pref_66] + - rmmod(xmm_reg, rm_arg_xmm_m128)) - -addop("pminub", [bs8(0x0f), bs8(0xda), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm)) -addop("pminub", [bs8(0x0f), bs8(0xda), pref_66] + - rmmod(xmm_reg, rm_arg_xmm)) - -addop("pminuw", [bs8(0x0f), bs8(0x38), bs8(0x3a), pref_66] + - rmmod(xmm_reg, rm_arg_xmm)) - -addop("pminud", [bs8(0x0f), bs8(0x38), bs8(0x3b), pref_66] + - rmmod(xmm_reg, rm_arg_xmm)) - - -addop("pcmpeqb", [bs8(0x0f), bs8(0x74), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm)) -addop("pcmpeqb", [bs8(0x0f), bs8(0x74), pref_66] + - rmmod(xmm_reg, rm_arg_xmm)) - -addop("pcmpeqw", [bs8(0x0f), bs8(0x75), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm)) -addop("pcmpeqw", [bs8(0x0f), bs8(0x75), pref_66] + - rmmod(xmm_reg, rm_arg_xmm)) - -addop("pcmpeqd", [bs8(0x0f), bs8(0x76), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm)) -addop("pcmpeqd", [bs8(0x0f), bs8(0x76), pref_66] + - rmmod(xmm_reg, rm_arg_xmm)) - -addop("pcmpgtb", [bs8(0x0f), bs8(0x64), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm)) -addop("pcmpgtb", [bs8(0x0f), bs8(0x64), pref_66] + - rmmod(xmm_reg, rm_arg_xmm)) - -addop("pcmpgtw", [bs8(0x0f), bs8(0x65), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm)) -addop("pcmpgtw", [bs8(0x0f), bs8(0x65), pref_66] + - rmmod(xmm_reg, rm_arg_xmm)) - -addop("pcmpgtd", [bs8(0x0f), bs8(0x66), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm)) -addop("pcmpgtd", [bs8(0x0f), bs8(0x66), pref_66] + - rmmod(xmm_reg, rm_arg_xmm)) - -addop("pcmpeqq", [bs8(0x0f), bs8(0x38), bs8(0x29), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) -addop("pcmpgtq", [bs8(0x0f), bs8(0x38), bs8(0x37), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) - -addop("punpckhbw", [bs8(0x0f), bs8(0x68), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm)) -addop("punpckhbw", [bs8(0x0f), bs8(0x68), pref_66] + - rmmod(xmm_reg, rm_arg_xmm)) - -addop("punpckhwd", [bs8(0x0f), bs8(0x69), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm)) -addop("punpckhwd", [bs8(0x0f), bs8(0x69), pref_66] + - rmmod(xmm_reg, rm_arg_xmm)) - -addop("punpckhdq", [bs8(0x0f), bs8(0x6a), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm)) -addop("punpckhdq", [bs8(0x0f), bs8(0x6a), pref_66] + - rmmod(xmm_reg, rm_arg_xmm)) - -addop("punpckhqdq", [bs8(0x0f), bs8(0x6d), pref_66] + - rmmod(xmm_reg, rm_arg_xmm)) - - - -addop("punpcklbw", [bs8(0x0f), bs8(0x60), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm)) -addop("punpcklbw", [bs8(0x0f), bs8(0x60), pref_66] + - rmmod(xmm_reg, rm_arg_xmm)) - -addop("punpcklwd", [bs8(0x0f), bs8(0x61), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm)) -addop("punpcklwd", [bs8(0x0f), bs8(0x61), pref_66] + - rmmod(xmm_reg, rm_arg_xmm)) - -addop("punpckldq", [bs8(0x0f), bs8(0x62), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm)) -addop("punpckldq", [bs8(0x0f), bs8(0x62), pref_66] + - rmmod(xmm_reg, rm_arg_xmm)) - -addop("punpcklqdq", [bs8(0x0f), bs8(0x6c), pref_66] + - rmmod(xmm_reg, rm_arg_xmm)) - - -addop("unpckhps", [bs8(0x0f), bs8(0x15), no_xmm_pref] + - rmmod(xmm_reg, rm_arg_xmm)) -addop("unpckhpd", [bs8(0x0f), bs8(0x15), pref_66] + - rmmod(xmm_reg, rm_arg_xmm)) - - -addop("unpcklps", [bs8(0x0f), bs8(0x14), no_xmm_pref] + - rmmod(xmm_reg, rm_arg_xmm)) -addop("unpcklpd", [bs8(0x0f), bs8(0x14), pref_66] + - rmmod(xmm_reg, rm_arg_xmm)) - - - -addop("pinsrb", [bs8(0x0f), bs8(0x3a), bs8(0x20), pref_66] + - rmmod(xmm_reg, rm_arg_reg_m08) + [u08]) -addop("pinsrd", [bs8(0x0f), bs8(0x3a), bs8(0x22), pref_66, bs_opmode32] + - rmmod(xmm_reg, rm_arg) + [u08]) -addop("pinsrq", [bs8(0x0f), bs8(0x3a), bs8(0x22), pref_66] + - rmmod(xmm_reg, rm_arg_m64) + [bs_opmode64] + [u08]) - -addop("pinsrw", [bs8(0x0f), bs8(0xc4), no_xmm_pref] + - rmmod(mm_reg, rm_arg_reg_m16) + [u08]) -addop("pinsrw", [bs8(0x0f), bs8(0xc4), pref_66] + - rmmod(xmm_reg, rm_arg_reg_m16) + [u08]) - - -addop("pextrb", [bs8(0x0f), bs8(0x3a), bs8(0x14), pref_66] + - rmmod(xmm_reg, rm_arg_reg_m08) + [u08], [rm_arg_reg_m08, xmm_reg, u08]) -addop("pextrd", [bs8(0x0f), bs8(0x3a), bs8(0x16), pref_66, bs_opmode32] + - rmmod(xmm_reg, rm_arg) + [u08], [rm_arg, xmm_reg, u08]) -addop("pextrq", [bs8(0x0f), bs8(0x3a), bs8(0x16), pref_66] + - rmmod(xmm_reg, rm_arg_m64) + [bs_opmode64] + [u08], [rm_arg_m64, xmm_reg, u08]) - - -addop("pextrw", [bs8(0x0f), bs8(0x3a), bs8(0x15), pref_66] + - rmmod(xmm_reg, rm_arg_reg_m16) + [u08], [rm_arg_reg_m16, xmm_reg, u08]) -addop("pextrw", [bs8(0x0f), bs8(0xc5), no_xmm_pref] + - rmmod(rmreg, rm_arg_mm) + [u08], [rmreg, rm_arg_mm, u08]) -addop("pextrw", [bs8(0x0f), bs8(0xc5), pref_66] + - rmmod(rmreg, rm_arg_xmm) + [u08], [rmreg, rm_arg_xmm, u08]) - - -addop("sqrtpd", [bs8(0x0f), bs8(0x51), pref_66] + - rmmod(xmm_reg, rm_arg_xmm)) -addop("sqrtps", [bs8(0x0f), bs8(0x51), no_xmm_pref] + - rmmod(xmm_reg, rm_arg_xmm)) -addop("sqrtsd", [bs8(0x0f), bs8(0x51), pref_f2] + - rmmod(xmm_reg, rm_arg_xmm_m64)) -addop("sqrtss", [bs8(0x0f), bs8(0x51), pref_f3] + - rmmod(xmm_reg, rm_arg_xmm_m32)) - -addop("pmovmskb", [bs8(0x0f), bs8(0xd7), no_xmm_pref] + - rmmod(reg_modrm, rm_arg_mm_reg)) -addop("pmovmskb", [bs8(0x0f), bs8(0xd7), pref_66] + - rmmod(reg_modrm, rm_arg_xmm_reg)) - -addop("shufps", [bs8(0x0f), bs8(0xc6), no_xmm_pref] + - rmmod(xmm_reg, rm_arg_xmm) + [u08]) -addop("shufpd", [bs8(0x0f), bs8(0xc6), pref_66] + - rmmod(xmm_reg, rm_arg_xmm) + [u08]) - -addop("aesenc", [bs8(0x0f), bs8(0x38), bs8(0xdc), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) -addop("aesdec", [bs8(0x0f), bs8(0x38), bs8(0xde), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) - -addop("aesenclast", [bs8(0x0f), bs8(0x38), bs8(0xdd), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) -addop("aesdeclast", [bs8(0x0f), bs8(0x38), bs8(0xdf), pref_66] + rmmod(xmm_reg, rm_arg_xmm)) - -addop("packsswb", [bs8(0x0f), bs8(0x63), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm_m64)) -addop("packsswb", [bs8(0x0f), bs8(0x63), pref_66] + - rmmod(xmm_reg, rm_arg_xmm_m128)) -addop("packssdw", [bs8(0x0f), bs8(0x6b), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm_m64)) -addop("packssdw", [bs8(0x0f), bs8(0x6b), pref_66] + - rmmod(xmm_reg, rm_arg_xmm_m128)) - -addop("packuswb", [bs8(0x0f), bs8(0x67), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm_m64)) -addop("packuswb", [bs8(0x0f), bs8(0x67), pref_66] + - rmmod(xmm_reg, rm_arg_xmm_m128)) - -addop("pmullw", [bs8(0x0f), bs8(0xd5), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm_m64)) -addop("pmullw", [bs8(0x0f), bs8(0xd5), pref_66] + - rmmod(xmm_reg, rm_arg_xmm_m128)) -addop("pmulhuw", [bs8(0x0f), bs8(0xe4), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm_m64)) -addop("pmulhuw", [bs8(0x0f), bs8(0xe4), pref_66] + - rmmod(xmm_reg, rm_arg_xmm_m128)) -addop("pmulhw", [bs8(0x0f), bs8(0xe5), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm_m64)) -addop("pmulhw", [bs8(0x0f), bs8(0xe5), pref_66] + - rmmod(xmm_reg, rm_arg_xmm_m128)) -addop("pmuludq", [bs8(0x0f), bs8(0xf4), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm_m64)) -addop("pmuludq", [bs8(0x0f), bs8(0xf4), pref_66] + - rmmod(xmm_reg, rm_arg_xmm_m128)) - - -addop("psubusb", [bs8(0x0f), bs8(0xd8), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm_m64)) -addop("psubusb", [bs8(0x0f), bs8(0xd8), pref_66] + - rmmod(xmm_reg, rm_arg_xmm_m128)) -addop("psubusw", [bs8(0x0f), bs8(0xd9), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm_m64)) -addop("psubusw", [bs8(0x0f), bs8(0xd9), pref_66] + - rmmod(xmm_reg, rm_arg_xmm_m128)) -addop("psubsb", [bs8(0x0f), bs8(0xe8), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm_m64)) -addop("psubsb", [bs8(0x0f), bs8(0xe8), pref_66] + - rmmod(xmm_reg, rm_arg_xmm_m128)) -addop("psubsw", [bs8(0x0f), bs8(0xe9), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm_m64)) -addop("psubsw", [bs8(0x0f), bs8(0xe9), pref_66] + - rmmod(xmm_reg, rm_arg_xmm_m128)) - - -addop("paddusb", [bs8(0x0f), bs8(0xdc), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm_m64)) -addop("paddusb", [bs8(0x0f), bs8(0xdc), pref_66] + - rmmod(xmm_reg, rm_arg_xmm_m128)) -addop("paddusw", [bs8(0x0f), bs8(0xdd), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm_m64)) -addop("paddusw", [bs8(0x0f), bs8(0xdd), pref_66] + - rmmod(xmm_reg, rm_arg_xmm_m128)) -addop("paddsb", [bs8(0x0f), bs8(0xec), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm_m64)) -addop("paddsb", [bs8(0x0f), bs8(0xec), pref_66] + - rmmod(xmm_reg, rm_arg_xmm_m128)) -addop("paddsw", [bs8(0x0f), bs8(0xed), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm_m64)) -addop("paddsw", [bs8(0x0f), bs8(0xed), pref_66] + - rmmod(xmm_reg, rm_arg_xmm_m128)) - -addop("pmaddwd", [bs8(0x0f), bs8(0xf5), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm_m64)) -addop("pmaddwd", [bs8(0x0f), bs8(0xf5), pref_66] + - rmmod(xmm_reg, rm_arg_xmm_m128)) - -addop("psadbw", [bs8(0x0f), bs8(0xf6), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm_m64)) -addop("psadbw", [bs8(0x0f), bs8(0xf6), pref_66] + - rmmod(xmm_reg, rm_arg_xmm_m128)) - -addop("pavgb", [bs8(0x0f), bs8(0xe0), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm_m64)) -addop("pavgb", [bs8(0x0f), bs8(0xe0), pref_66] + - rmmod(xmm_reg, rm_arg_xmm_m128)) -addop("pavgw", [bs8(0x0f), bs8(0xe3), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm_m64)) -addop("pavgw", [bs8(0x0f), bs8(0xe3), pref_66] + - rmmod(xmm_reg, rm_arg_xmm_m128)) - -addop("maskmovq", [bs8(0x0f), bs8(0xf7), no_xmm_pref] + - rmmod(mm_reg, rm_arg_mm_reg)) -addop("maskmovdqu", [bs8(0x0f), bs8(0xf7), pref_66] + - rmmod(xmm_reg, rm_arg_xmm_reg)) - -addop("emms", [bs8(0x0f), bs8(0x77)]) - -addop("endbr64", [pref_f3, bs8(0x0f), bs8(0x1e), bs8(0xfa)]) -addop("endbr32", [pref_f3, bs8(0x0f), bs8(0x1e), bs8(0xfb)]) - -mn_x86.bintree = factor_one_bit(mn_x86.bintree) -# mn_x86.bintree = factor_fields_all(mn_x86.bintree) -""" -mod reg r/m - XX XXX XXX - -""" - - -def print_size(e): - print(e, e.size) - return e diff --git a/miasm2/arch/x86/ctype.py b/miasm2/arch/x86/ctype.py deleted file mode 100644 index 5d1be0de..00000000 --- a/miasm2/arch/x86/ctype.py +++ /dev/null @@ -1,137 +0,0 @@ -from miasm2.core.objc import CLeafTypes, ObjCDecl, PADDING_TYPE_NAME -from miasm2.core.ctypesmngr import CTypeId, CTypePtr - - -class CTypeAMD64_unk(CLeafTypes): - """Define C types sizes/alignment for x86_64 architecture""" - - obj_pad = ObjCDecl(PADDING_TYPE_NAME, 1, 1) # __padding__ is size 1/align 1 - - obj_char = ObjCDecl("char", 1, 1) - obj_short = ObjCDecl("short", 2, 2) - obj_int = ObjCDecl("int", 4, 4) - obj_long = ObjCDecl("long", 8, 8) - - obj_uchar = ObjCDecl("uchar", 1, 1) - obj_ushort = ObjCDecl("ushort", 2, 2) - obj_uint = ObjCDecl("uint", 4, 4) - obj_ulong = ObjCDecl("ulong", 8, 8) - - obj_void = ObjCDecl("void", 1, 1) - - obj_enum = ObjCDecl("enum", 4, 4) - - obj_float = ObjCDecl("float", 4, 4) - obj_double = ObjCDecl("double", 8, 8) - obj_ldouble = ObjCDecl("ldouble", 16, 16) - - def __init__(self): - self.types = { - CTypeId(PADDING_TYPE_NAME): self.obj_pad, - - CTypeId('char'): self.obj_char, - CTypeId('short'): self.obj_short, - CTypeId('int'): self.obj_int, - CTypeId('void'): self.obj_void, - CTypeId('long',): self.obj_long, - CTypeId('float'): self.obj_float, - CTypeId('double'): self.obj_double, - - CTypeId('signed', 'char'): self.obj_char, - CTypeId('unsigned', 'char'): self.obj_uchar, - - CTypeId('short', 'int'): self.obj_short, - CTypeId('signed', 'short'): self.obj_short, - CTypeId('signed', 'short', 'int'): self.obj_short, - CTypeId('unsigned', 'short'): self.obj_ushort, - CTypeId('unsigned', 'short', 'int'): self.obj_ushort, - - CTypeId('unsigned', ): self.obj_uint, - CTypeId('unsigned', 'int'): self.obj_uint, - CTypeId('signed', 'int'): self.obj_int, - - CTypeId('long', 'int'): self.obj_long, - CTypeId('long', 'long'): self.obj_long, - CTypeId('long', 'long', 'int'): self.obj_long, - CTypeId('signed', 'long', 'long'): self.obj_long, - CTypeId('unsigned', 'long', 'long'): self.obj_ulong, - CTypeId('signed', 'long', 'long', 'int'): self.obj_long, - CTypeId('unsigned', 'long', 'long', 'int'): self.obj_ulong, - - CTypeId('signed', 'long'): self.obj_long, - CTypeId('unsigned', 'long'): self.obj_ulong, - CTypeId('signed', 'long', 'int'): self.obj_long, - CTypeId('unsigned', 'long', 'int'): self.obj_ulong, - - CTypeId('long', 'double'): self.obj_ldouble, - CTypePtr(CTypeId('void')): self.obj_ulong, - } - - - - - -class CTypeX86_unk(CLeafTypes): - """Define C types sizes/alignment for x86_32 architecture""" - - obj_pad = ObjCDecl(PADDING_TYPE_NAME, 1, 1) # __padding__ is size 1/align 1 - - obj_char = ObjCDecl("char", 1, 1) - obj_short = ObjCDecl("short", 2, 2) - obj_int = ObjCDecl("int", 4, 4) - obj_long = ObjCDecl("long", 4, 4) - - obj_uchar = ObjCDecl("uchar", 1, 1) - obj_ushort = ObjCDecl("ushort", 2, 2) - obj_uint = ObjCDecl("uint", 4, 4) - obj_ulong = ObjCDecl("ulong", 4, 4) - - obj_void = ObjCDecl("void", 1, 1) - - obj_enum = ObjCDecl("enum", 4, 4) - - obj_float = ObjCDecl("float", 4, 4) - obj_double = ObjCDecl("double", 8, 8) - obj_ldouble = ObjCDecl("ldouble", 16, 16) - - def __init__(self): - self.types = { - CTypeId(PADDING_TYPE_NAME): self.obj_pad, - - CTypeId('char'): self.obj_char, - CTypeId('short'): self.obj_short, - CTypeId('int'): self.obj_int, - CTypeId('void'): self.obj_void, - CTypeId('long',): self.obj_long, - CTypeId('float'): self.obj_float, - CTypeId('double'): self.obj_double, - - CTypeId('signed', 'char'): self.obj_char, - CTypeId('unsigned', 'char'): self.obj_uchar, - - CTypeId('short', 'int'): self.obj_short, - CTypeId('signed', 'short'): self.obj_short, - CTypeId('signed', 'short', 'int'): self.obj_short, - CTypeId('unsigned', 'short'): self.obj_ushort, - CTypeId('unsigned', 'short', 'int'): self.obj_ushort, - - CTypeId('unsigned', ): self.obj_uint, - CTypeId('unsigned', 'int'): self.obj_uint, - CTypeId('signed', 'int'): self.obj_int, - - CTypeId('long', 'int'): self.obj_long, - CTypeId('long', 'long'): self.obj_long, - CTypeId('long', 'long', 'int'): self.obj_long, - CTypeId('signed', 'long', 'long'): self.obj_long, - CTypeId('unsigned', 'long', 'long'): self.obj_ulong, - CTypeId('signed', 'long', 'long', 'int'): self.obj_long, - CTypeId('unsigned', 'long', 'long', 'int'): self.obj_ulong, - - CTypeId('signed', 'long'): self.obj_long, - CTypeId('unsigned', 'long'): self.obj_ulong, - CTypeId('signed', 'long', 'int'): self.obj_long, - CTypeId('unsigned', 'long', 'int'): self.obj_ulong, - - CTypeId('long', 'double'): self.obj_ldouble, - CTypePtr(CTypeId('void')): self.obj_uint, - } diff --git a/miasm2/arch/x86/disasm.py b/miasm2/arch/x86/disasm.py deleted file mode 100644 index ecb1b8da..00000000 --- a/miasm2/arch/x86/disasm.py +++ /dev/null @@ -1,30 +0,0 @@ -from miasm2.core.asmblock import disasmEngine -from miasm2.arch.x86.arch import mn_x86 - - -cb_x86_funcs = [] - - -def cb_x86_disasm(*args, **kwargs): - for func in cb_x86_funcs: - func(*args, **kwargs) - - -class dis_x86(disasmEngine): - attrib = None - - def __init__(self, bs=None, **kwargs): - super(dis_x86, self).__init__(mn_x86, self.attrib, bs, **kwargs) - self.dis_block_callback = cb_x86_disasm - - -class dis_x86_16(dis_x86): - attrib = 16 - - -class dis_x86_32(dis_x86): - attrib = 32 - - -class dis_x86_64(dis_x86): - attrib = 64 diff --git a/miasm2/arch/x86/ira.py b/miasm2/arch/x86/ira.py deleted file mode 100644 index 749069f6..00000000 --- a/miasm2/arch/x86/ira.py +++ /dev/null @@ -1,80 +0,0 @@ -#-*- coding:utf-8 -*- - -from miasm2.expression.expression import ExprAssign, ExprOp -from miasm2.ir.ir import AssignBlock -from miasm2.ir.analysis import ira -from miasm2.arch.x86.sem import ir_x86_16, ir_x86_32, ir_x86_64 - - -class ir_a_x86_16(ir_x86_16, ira): - - def __init__(self, loc_db=None): - ir_x86_16.__init__(self, loc_db) - self.ret_reg = self.arch.regs.AX - - def get_out_regs(self, _): - return set([self.ret_reg, self.sp]) - -class ir_a_x86_32(ir_x86_32, ir_a_x86_16): - - def __init__(self, loc_db=None): - ir_x86_32.__init__(self, loc_db) - self.ret_reg = self.arch.regs.EAX - - def sizeof_char(self): - return 8 - - def sizeof_short(self): - return 16 - - def sizeof_int(self): - return 32 - - def sizeof_long(self): - return 32 - - def sizeof_pointer(self): - return 32 - - -class ir_a_x86_64(ir_x86_64, ir_a_x86_16): - - def __init__(self, loc_db=None): - ir_x86_64.__init__(self, loc_db) - self.ret_reg = self.arch.regs.RAX - - def call_effects(self, ad, instr): - call_assignblk = AssignBlock( - [ - ExprAssign( - self.ret_reg, - ExprOp( - 'call_func_ret', - ad, - self.sp, - self.arch.regs.RCX, - self.arch.regs.RDX, - self.arch.regs.R8, - self.arch.regs.R9, - ) - ), - ExprAssign(self.sp, ExprOp('call_func_stack', ad, self.sp)), - ], - instr - ) - return [call_assignblk], [] - - def sizeof_char(self): - return 8 - - def sizeof_short(self): - return 16 - - def sizeof_int(self): - return 32 - - def sizeof_long(self): - return 64 - - def sizeof_pointer(self): - return 64 diff --git a/miasm2/arch/x86/jit.py b/miasm2/arch/x86/jit.py deleted file mode 100644 index 14418902..00000000 --- a/miasm2/arch/x86/jit.py +++ /dev/null @@ -1,286 +0,0 @@ -from builtins import range -import logging - -from miasm2.jitter.jitload import Jitter, named_arguments -from miasm2.arch.x86.sem import ir_x86_16, ir_x86_32, ir_x86_64 -from miasm2.jitter.codegen import CGen -from miasm2.core.locationdb import LocationDB -from miasm2.ir.translators.C import TranslatorC - -log = logging.getLogger('jit_x86') -hnd = logging.StreamHandler() -hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) -log.addHandler(hnd) -log.setLevel(logging.CRITICAL) - - -class x86_32_CGen(CGen): - def __init__(self, ir_arch): - self.ir_arch = ir_arch - self.PC = self.ir_arch.arch.regs.RIP - self.translator = TranslatorC(self.ir_arch.loc_db) - self.init_arch_C() - - def gen_post_code(self, attrib, pc_value): - out = [] - if attrib.log_regs: - # Update PC for dump_gpregs - out.append("%s = %s;" % (self.C_PC, pc_value)) - out.append('dump_gpregs_32(jitcpu->cpu);') - return out - -class x86_64_CGen(x86_32_CGen): - def gen_post_code(self, attrib, pc_value): - out = [] - if attrib.log_regs: - # Update PC for dump_gpregs - out.append("%s = %s;" % (self.C_PC, pc_value)) - out.append('dump_gpregs_64(jitcpu->cpu);') - return out - -class jitter_x86_16(Jitter): - - C_Gen = x86_32_CGen - - def __init__(self, *args, **kwargs): - sp = LocationDB() - Jitter.__init__(self, ir_x86_16(sp), *args, **kwargs) - self.vm.set_little_endian() - self.ir_arch.do_stk_segm = False - self.orig_irbloc_fix_regs_for_mode = self.ir_arch.irbloc_fix_regs_for_mode - self.ir_arch.irbloc_fix_regs_for_mode = self.ir_archbloc_fix_regs_for_mode - - def ir_archbloc_fix_regs_for_mode(self, irblock, attrib=64): - return self.orig_irbloc_fix_regs_for_mode(irblock, 64) - - def push_uint16_t(self, value): - self.cpu.SP -= self.ir_arch.sp.size // 8 - self.vm.set_u16(self.cpu.SP, value) - - def pop_uint16_t(self): - value = self.vm.get_u16(self.cpu.SP) - self.cpu.SP += self.ir_arch.sp.size // 8 - return value - - def get_stack_arg(self, index): - return self.vm.get_u16(self.cpu.SP + 4 * index) - - def init_run(self, *args, **kwargs): - Jitter.init_run(self, *args, **kwargs) - self.cpu.IP = self.pc - - -class jitter_x86_32(Jitter): - - C_Gen = x86_32_CGen - - def __init__(self, *args, **kwargs): - sp = LocationDB() - Jitter.__init__(self, ir_x86_32(sp), *args, **kwargs) - self.vm.set_little_endian() - self.ir_arch.do_stk_segm = False - - self.orig_irbloc_fix_regs_for_mode = self.ir_arch.irbloc_fix_regs_for_mode - self.ir_arch.irbloc_fix_regs_for_mode = self.ir_archbloc_fix_regs_for_mode - - def ir_archbloc_fix_regs_for_mode(self, irblock, attrib=64): - return self.orig_irbloc_fix_regs_for_mode(irblock, 64) - - def push_uint16_t(self, value): - self.cpu.ESP -= self.ir_arch.sp.size // 8 - self.vm.set_u16(self.cpu.ESP, value) - - def pop_uint16_t(self): - value = self.vm.get_u16(self.cpu.ESP) - self.cpu.ESP += self.ir_arch.sp.size // 8 - return value - - def push_uint32_t(self, value): - self.cpu.ESP -= self.ir_arch.sp.size // 8 - self.vm.set_u32(self.cpu.ESP, value) - - def pop_uint32_t(self): - value = self.vm.get_u32(self.cpu.ESP) - self.cpu.ESP += self.ir_arch.sp.size // 8 - return value - - def get_stack_arg(self, index): - return self.vm.get_u32(self.cpu.ESP + 4 * index) - - def init_run(self, *args, **kwargs): - Jitter.init_run(self, *args, **kwargs) - self.cpu.EIP = self.pc - - # calling conventions - - # stdcall - @named_arguments - def func_args_stdcall(self, n_args): - ret_ad = self.pop_uint32_t() - args = [self.pop_uint32_t() for _ in range(n_args)] - return ret_ad, args - - def func_ret_stdcall(self, ret_addr, ret_value1=None, ret_value2=None): - self.pc = self.cpu.EIP = ret_addr - if ret_value1 is not None: - self.cpu.EAX = ret_value1 - if ret_value2 is not None: - self.cpu.EDX = ret_value2 - - def func_prepare_stdcall(self, ret_addr, *args): - for arg in reversed(args): - self.push_uint32_t(arg) - self.push_uint32_t(ret_addr) - - get_arg_n_stdcall = get_stack_arg - - # cdecl - @named_arguments - def func_args_cdecl(self, n_args): - ret_ad = self.pop_uint32_t() - args = [self.get_stack_arg(i) for i in range(n_args)] - return ret_ad, args - - def func_ret_cdecl(self, ret_addr, ret_value1=None, ret_value2=None): - self.pc = self.cpu.EIP = ret_addr - if ret_value1 is not None: - self.cpu.EAX = ret_value1 - if ret_value2 is not None: - self.cpu.EDX = ret_value2 - - get_arg_n_cdecl = get_stack_arg - - # System V - func_args_systemv = func_args_cdecl - func_ret_systemv = func_ret_cdecl - func_prepare_systemv = func_prepare_stdcall - get_arg_n_systemv = get_stack_arg - - - # fastcall - @named_arguments - def func_args_fastcall(self, n_args): - args_regs = ['ECX', 'EDX'] - ret_ad = self.pop_uint32_t() - args = [] - for i in range(n_args): - args.append(self.get_arg_n_fastcall(i)) - return ret_ad, args - - def func_prepare_fastcall(self, ret_addr, *args): - args_regs = ['ECX', 'EDX'] - for i in range(min(len(args), len(args_regs))): - setattr(self.cpu, args_regs[i], args[i]) - remaining_args = args[len(args_regs):] - for arg in reversed(remaining_args): - self.push_uint32_t(arg) - self.push_uint32_t(ret_addr) - - def get_arg_n_fastcall(self, index): - args_regs = ['ECX', 'EDX'] - if index < len(args_regs): - return getattr(self.cpu, args_regs[index]) - return self.get_stack_arg(index - len(args_regs)) - - - -class jitter_x86_64(Jitter): - - C_Gen = x86_64_CGen - args_regs_systemv = ['RDI', 'RSI', 'RDX', 'RCX', 'R8', 'R9'] - args_regs_stdcall = ['RCX', 'RDX', 'R8', 'R9'] - - def __init__(self, *args, **kwargs): - sp = LocationDB() - Jitter.__init__(self, ir_x86_64(sp), *args, **kwargs) - self.vm.set_little_endian() - self.ir_arch.do_stk_segm = False - - self.orig_irbloc_fix_regs_for_mode = self.ir_arch.irbloc_fix_regs_for_mode - self.ir_arch.irbloc_fix_regs_for_mode = self.ir_archbloc_fix_regs_for_mode - - def ir_archbloc_fix_regs_for_mode(self, irblock, attrib=64): - return self.orig_irbloc_fix_regs_for_mode(irblock, 64) - - def push_uint64_t(self, value): - self.cpu.RSP -= self.ir_arch.sp.size // 8 - self.vm.set_u64(self.cpu.RSP, value) - - def pop_uint64_t(self): - value = self.vm.get_u64(self.cpu.RSP) - self.cpu.RSP += self.ir_arch.sp.size // 8 - return value - - def get_stack_arg(self, index): - return self.vm.get_u64(self.cpu.RSP + 8 * index) - - def init_run(self, *args, **kwargs): - Jitter.init_run(self, *args, **kwargs) - self.cpu.RIP = self.pc - - # calling conventions - - # stdcall - @named_arguments - def func_args_stdcall(self, n_args): - args_regs = self.args_regs_stdcall - ret_ad = self.pop_uint64_t() - args = [] - for i in range(min(n_args, 4)): - args.append(self.cpu.get_gpreg()[args_regs[i]]) - for i in range(max(0, n_args - 4)): - args.append(self.get_stack_arg(i)) - return ret_ad, args - - def func_prepare_stdcall(self, ret_addr, *args): - args_regs = self.args_regs_stdcall - for i in range(min(len(args), len(args_regs))): - setattr(self.cpu, args_regs[i], args[i]) - remaining_args = args[len(args_regs):] - for arg in reversed(remaining_args): - self.push_uint64_t(arg) - self.push_uint64_t(ret_addr) - - def func_ret_stdcall(self, ret_addr, ret_value=None): - self.pc = self.cpu.RIP = ret_addr - if ret_value is not None: - self.cpu.RAX = ret_value - return True - - # cdecl - func_args_cdecl = func_args_stdcall - func_ret_cdecl = func_ret_stdcall - func_prepare_cdecl = func_prepare_stdcall - - # System V - - def get_arg_n_systemv(self, index): - args_regs = self.args_regs_systemv - if index < len(args_regs): - return getattr(self.cpu, args_regs[index]) - return self.get_stack_arg(index - len(args_regs)) - - @named_arguments - def func_args_systemv(self, n_args): - ret_ad = self.pop_uint64_t() - args = [self.get_arg_n_systemv(index) for index in range(n_args)] - return ret_ad, args - - func_ret_systemv = func_ret_cdecl - - def func_prepare_systemv(self, ret_addr, *args): - args_regs = self.args_regs_systemv - self.push_uint64_t(ret_addr) - for i in range(min(len(args), len(args_regs))): - setattr(self.cpu, args_regs[i], args[i]) - remaining_args = args[len(args_regs):] - for arg in reversed(remaining_args): - self.push_uint64_t(arg) - - def syscall_args_systemv(self, n_args): - args = [self.cpu.RDI, self.cpu.RSI, self.cpu.RDX, self.cpu.R10, - self.cpu.R8, self.cpu.R9][:n_args] - return args - - def syscall_ret_systemv(self, value): - self.cpu.RAX = value diff --git a/miasm2/arch/x86/regs.py b/miasm2/arch/x86/regs.py deleted file mode 100644 index b3f6534b..00000000 --- a/miasm2/arch/x86/regs.py +++ /dev/null @@ -1,454 +0,0 @@ -from builtins import range -from miasm2.expression.expression import ExprId -from miasm2.core.cpu import reg_info - - -IP = ExprId('IP', 16) -EIP = ExprId('EIP', 32) -RIP = ExprId('RIP', 64) -exception_flags = ExprId('exception_flags', 32) -interrupt_num = ExprId('interrupt_num', 8) - -# GP - - -regs08_str = ["AL", "CL", "DL", "BL", "AH", "CH", "DH", "BH"] + \ - ["R%dB" % (i + 8) for i in range(8)] -regs08_expr = [ExprId(x, 8) for x in regs08_str] - -regs08_64_str = ["AL", "CL", "DL", "BL", "SPL", "BPL", "SIL", "DIL"] + \ - ["R%dB" % (i + 8) for i in range(8)] -regs08_64_expr = [ExprId(x, 8) for x in regs08_64_str] - - -regs16_str = ["AX", "CX", "DX", "BX", "SP", "BP", "SI", "DI"] + \ - ["R%dW" % (i + 8) for i in range(8)] -regs16_expr = [ExprId(x, 16) for x in regs16_str] - -regs32_str = ["EAX", "ECX", "EDX", "EBX", "ESP", "EBP", "ESI", "EDI"] + \ - ["R%dD" % (i + 8) for i in range(8)] -regs32_expr = [ExprId(x, 32) for x in regs32_str] - -regs64_str = ["RAX", "RCX", "RDX", "RBX", "RSP", "RBP", "RSI", "RDI", - "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15", - "RIP"] -regs64_expr = [ExprId(x, 64) for x in regs64_str] - - -regs_xmm_str = ["XMM%d" % i for i in range(16)] -regs_xmm_expr = [ExprId(x, 128) for x in regs_xmm_str] - -regs_mm_str = ["MM%d" % i for i in range(16)] -regs_mm_expr = [ExprId(x, 64) for x in regs_mm_str] - -regs_bnd_str = ["BND%d" % i for i in range(4)] -regs_bnd_expr = [ExprId(x, 128) for x in regs_bnd_str] - -gpregs08 = reg_info(regs08_str, regs08_expr) -gpregs08_64 = reg_info(regs08_64_str, regs08_64_expr) -gpregs16 = reg_info(regs16_str, regs16_expr) -gpregs32 = reg_info(regs32_str, regs32_expr) -gpregs64 = reg_info(regs64_str, regs64_expr) - -gpregs_xmm = reg_info(regs_xmm_str, regs_xmm_expr) -gpregs_mm = reg_info(regs_mm_str, regs_mm_expr) -gpregs_bnd = reg_info(regs_bnd_str, regs_bnd_expr) - -r08_eax = reg_info([regs08_str[0]], [regs08_expr[0]]) -r16_eax = reg_info([regs16_str[0]], [regs16_expr[0]]) -r32_eax = reg_info([regs32_str[0]], [regs32_expr[0]]) -r64_eax = reg_info([regs64_str[0]], [regs64_expr[0]]) - -r08_ecx = reg_info([regs08_str[1]], [regs08_expr[1]]) - -r_eax_all = reg_info( - [regs08_str[0], regs16_str[0], regs32_str[0], regs64_str[0]], - [regs08_expr[0], regs16_expr[0], regs32_expr[0], regs64_expr[0]]) -r_edx_all = reg_info( - [regs08_str[2], regs16_str[2], regs32_str[2], regs64_str[2]], - [regs08_expr[2], regs16_expr[2], regs32_expr[2], regs64_expr[2]]) - -r16_edx = reg_info([regs16_str[2]], [regs16_expr[2]]) - - -selectr_str = ["ES", "CS", "SS", "DS", "FS", "GS"] -selectr_expr = [ExprId(x, 16) for x in selectr_str] -segmreg = reg_info(selectr_str, selectr_expr) - -crregs32_str = ["CR%d" % i for i in range(8)] -crregs32_expr = [ExprId(x, 32) for x in crregs32_str] -crregs = reg_info(crregs32_str, crregs32_expr) - - -drregs32_str = ["DR%d" % i for i in range(8)] -drregs32_expr = [ExprId(x, 32) for x in drregs32_str] -drregs = reg_info(drregs32_str, drregs32_expr) - - -fltregs32_str = ["ST(%d)" % i for i in range(8)] -fltregs32_expr = [ExprId(x, 64) for x in fltregs32_str] -fltregs = reg_info(fltregs32_str, fltregs32_expr) - -r_st_all = reg_info(['ST'], - [ExprId('ST', 64)]) - -r_cs_all = reg_info(['CS'], - [ExprId('CS', 16)]) -r_ds_all = reg_info(['DS'], - [ExprId('DS', 16)]) -r_es_all = reg_info(['ES'], - [ExprId('ES', 16)]) -r_ss_all = reg_info(['SS'], - [ExprId('SS', 16)]) -r_fs_all = reg_info(['FS'], - [ExprId('FS', 16)]) -r_gs_all = reg_info(['GS'], - [ExprId('GS', 16)]) - - -AL = regs08_expr[0] -CL = regs08_expr[1] -DL = regs08_expr[2] -BL = regs08_expr[3] -AH = regs08_expr[4] -CH = regs08_expr[5] -DH = regs08_expr[6] -BH = regs08_expr[7] -R8B = regs08_expr[8] -R9B = regs08_expr[9] -R10B = regs08_expr[10] -R11B = regs08_expr[11] -R12B = regs08_expr[12] -R13B = regs08_expr[13] -R14B = regs08_expr[14] -R15B = regs08_expr[15] - -SPL = regs08_64_expr[4] -BPL = regs08_64_expr[5] -SIL = regs08_64_expr[6] -DIL = regs08_64_expr[7] - - -AX = regs16_expr[0] -CX = regs16_expr[1] -DX = regs16_expr[2] -BX = regs16_expr[3] -SP = regs16_expr[4] -BP = regs16_expr[5] -SI = regs16_expr[6] -DI = regs16_expr[7] -R8W = regs16_expr[8] -R9W = regs16_expr[9] -R10W = regs16_expr[10] -R11W = regs16_expr[11] -R12W = regs16_expr[12] -R13W = regs16_expr[13] -R14W = regs16_expr[14] -R15W = regs16_expr[15] - - -EAX = regs32_expr[0] -ECX = regs32_expr[1] -EDX = regs32_expr[2] -EBX = regs32_expr[3] -ESP = regs32_expr[4] -EBP = regs32_expr[5] -ESI = regs32_expr[6] -EDI = regs32_expr[7] -R8D = regs32_expr[8] -R9D = regs32_expr[9] -R10D = regs32_expr[10] -R11D = regs32_expr[11] -R12D = regs32_expr[12] -R13D = regs32_expr[13] -R14D = regs32_expr[14] -R15D = regs32_expr[15] - - -RAX = regs64_expr[0] -RCX = regs64_expr[1] -RDX = regs64_expr[2] -RBX = regs64_expr[3] -RSP = regs64_expr[4] -RBP = regs64_expr[5] -RSI = regs64_expr[6] -RDI = regs64_expr[7] -R8 = regs64_expr[8] -R9 = regs64_expr[9] -R10 = regs64_expr[10] -R11 = regs64_expr[11] -R12 = regs64_expr[12] -R13 = regs64_expr[13] -R14 = regs64_expr[14] -R15 = regs64_expr[15] - - -reg_zf = 'zf' -reg_nf = 'nf' -reg_pf = 'pf' -reg_of = 'of' -reg_cf = 'cf' -reg_tf = 'tf' -reg_if = 'i_f' -reg_df = 'df' -reg_af = 'af' -reg_iopl = 'iopl_f' -reg_nt = 'nt' -reg_rf = 'rf' -reg_vm = 'vm' -reg_ac = 'ac' -reg_vif = 'vif' -reg_vip = 'vip' -reg_id = 'i_d' - - -reg_es = "ES" -reg_cs = "CS" -reg_ss = "SS" -reg_ds = "DS" -reg_fs = "FS" -reg_gs = "GS" - -reg_dr0 = 'DR0' -reg_dr1 = 'DR1' -reg_dr2 = 'DR2' -reg_dr3 = 'DR3' -reg_dr4 = 'DR4' -reg_dr5 = 'DR5' -reg_dr6 = 'DR6' -reg_dr7 = 'DR7' - -reg_cr0 = 'CR0' -reg_cr1 = 'CR1' -reg_cr2 = 'CR2' -reg_cr3 = 'CR3' -reg_cr4 = 'CR4' -reg_cr5 = 'CR5' -reg_cr6 = 'CR6' -reg_cr7 = 'CR7' - -reg_mm0 = 'MM0' -reg_mm1 = 'MM1' -reg_mm2 = 'MM2' -reg_mm3 = 'MM3' -reg_mm4 = 'MM4' -reg_mm5 = 'MM5' -reg_mm6 = 'MM6' -reg_mm7 = 'MM7' - -reg_tsc = "tsc" - -reg_float_c0 = 'float_c0' -reg_float_c1 = 'float_c1' -reg_float_c2 = 'float_c2' -reg_float_c3 = 'float_c3' -reg_float_stack_ptr = "float_stack_ptr" -reg_float_control = 'reg_float_control' -reg_float_eip = 'reg_float_eip' -reg_float_cs = 'reg_float_cs' -reg_float_address = 'reg_float_address' -reg_float_ds = 'reg_float_ds' - - -dr0 = ExprId(reg_dr0, 32) -dr1 = ExprId(reg_dr1, 32) -dr2 = ExprId(reg_dr2, 32) -dr3 = ExprId(reg_dr3, 32) -dr4 = ExprId(reg_dr4, 32) -dr5 = ExprId(reg_dr5, 32) -dr6 = ExprId(reg_dr6, 32) -dr7 = ExprId(reg_dr7, 32) - -cr0 = ExprId(reg_cr0, 32) -cr1 = ExprId(reg_cr1, 32) -cr2 = ExprId(reg_cr2, 32) -cr3 = ExprId(reg_cr3, 32) -cr4 = ExprId(reg_cr4, 32) -cr5 = ExprId(reg_cr5, 32) -cr6 = ExprId(reg_cr6, 32) -cr7 = ExprId(reg_cr7, 32) - -mm0 = ExprId(reg_mm0, 64) -mm1 = ExprId(reg_mm1, 64) -mm2 = ExprId(reg_mm2, 64) -mm3 = ExprId(reg_mm3, 64) -mm4 = ExprId(reg_mm4, 64) -mm5 = ExprId(reg_mm5, 64) -mm6 = ExprId(reg_mm6, 64) -mm7 = ExprId(reg_mm7, 64) - -XMM0 = regs_xmm_expr[0] -XMM1 = regs_xmm_expr[1] -XMM2 = regs_xmm_expr[2] -XMM3 = regs_xmm_expr[3] -XMM4 = regs_xmm_expr[4] -XMM5 = regs_xmm_expr[5] -XMM6 = regs_xmm_expr[6] -XMM7 = regs_xmm_expr[7] -XMM8 = regs_xmm_expr[8] -XMM9 = regs_xmm_expr[9] -XMM10 = regs_xmm_expr[10] -XMM11 = regs_xmm_expr[11] -XMM12 = regs_xmm_expr[12] -XMM13 = regs_xmm_expr[13] -XMM14 = regs_xmm_expr[14] -XMM15 = regs_xmm_expr[15] - -# tmp1= ExprId(reg_tmp1) -zf = ExprId(reg_zf, size=1) -nf = ExprId(reg_nf, size=1) -pf = ExprId(reg_pf, size=1) -of = ExprId(reg_of, size=1) -cf = ExprId(reg_cf, size=1) -tf = ExprId(reg_tf, size=1) -i_f = ExprId(reg_if, size=1) -df = ExprId(reg_df, size=1) -af = ExprId(reg_af, size=1) -iopl = ExprId(reg_iopl, size=2) -nt = ExprId(reg_nt, size=1) -rf = ExprId(reg_rf, size=1) -vm = ExprId(reg_vm, size=1) -ac = ExprId(reg_ac, size=1) -vif = ExprId(reg_vif, size=1) -vip = ExprId(reg_vip, size=1) -i_d = ExprId(reg_id, size=1) - -ES = ExprId(reg_es, size=16) -CS = ExprId(reg_cs, size=16) -SS = ExprId(reg_ss, size=16) -DS = ExprId(reg_ds, size=16) -FS = ExprId(reg_fs, size=16) -GS = ExprId(reg_gs, size=16) - -tsc = ExprId(reg_tsc, size=64) - -float_c0 = ExprId(reg_float_c0, size=1) -float_c1 = ExprId(reg_float_c1, size=1) -float_c2 = ExprId(reg_float_c2, size=1) -float_c3 = ExprId(reg_float_c3, size=1) -float_stack_ptr = ExprId(reg_float_stack_ptr, size=3) -float_control = ExprId(reg_float_control, 16) -float_eip = ExprId(reg_float_eip, 32) -float_cs = ExprId(reg_float_cs, size=16) -float_address = ExprId(reg_float_address, 32) -float_ds = ExprId(reg_float_ds, size=16) - -float_st0 = ExprId("float_st0", 64) -float_st1 = ExprId("float_st1", 64) -float_st2 = ExprId("float_st2", 64) -float_st3 = ExprId("float_st3", 64) -float_st4 = ExprId("float_st4", 64) -float_st5 = ExprId("float_st5", 64) -float_st6 = ExprId("float_st6", 64) -float_st7 = ExprId("float_st7", 64) - - -float_list = [float_st0, float_st1, float_st2, float_st3, - float_st4, float_st5, float_st6, float_st7] - -float_replace = {fltregs32_expr[i]: float_list[i] for i in range(8)} -float_replace[r_st_all.expr[0]] = float_st0 - - -EAX_init = ExprId('EAX_init', 32) -EBX_init = ExprId('EBX_init', 32) -ECX_init = ExprId('ECX_init', 32) -EDX_init = ExprId('EDX_init', 32) -ESI_init = ExprId('ESI_init', 32) -EDI_init = ExprId('EDI_init', 32) -ESP_init = ExprId('ESP_init', 32) -EBP_init = ExprId('EBP_init', 32) - - -RAX_init = ExprId('RAX_init', 64) -RBX_init = ExprId('RBX_init', 64) -RCX_init = ExprId('RCX_init', 64) -RDX_init = ExprId('RDX_init', 64) -RSI_init = ExprId('RSI_init', 64) -RDI_init = ExprId('RDI_init', 64) -RSP_init = ExprId('RSP_init', 64) -RBP_init = ExprId('RBP_init', 64) - - -all_regs_ids = [ - AL, CL, DL, BL, AH, CH, DH, BH, - R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B, - SPL, BPL, SIL, DIL, - AX, CX, DX, BX, SP, BP, SI, DI, - R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W, - IP, - EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, - R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D, - EIP, - - RAX, RBX, RCX, RDX, RSP, RBP, RIP, RSI, RDI, - R8, R9, R10, R11, R12, R13, R14, R15, - zf, nf, pf, of, cf, af, df, - tf, i_f, iopl, nt, rf, vm, ac, vif, vip, i_d, - float_control, float_eip, float_cs, float_address, float_ds, - tsc, - ES, CS, SS, DS, FS, GS, - float_st0, float_st1, float_st2, float_st3, - float_st4, float_st5, float_st6, float_st7, - float_c0, float_c1, float_c2, float_c3, - cr0, cr3, - dr0, dr1, dr2, dr3, dr4, dr5, dr6, dr7, - float_stack_ptr, - mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, - - XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, - - - exception_flags, interrupt_num, -] + fltregs32_expr - -all_regs_ids_no_alias = [ - RAX, RBX, RCX, RDX, RSP, RBP, RIP, RSI, RDI, - R8, R9, R10, R11, R12, R13, R14, R15, - zf, nf, pf, of, cf, af, df, - tf, i_f, iopl, nt, rf, vm, ac, vif, vip, i_d, - float_control, float_eip, float_cs, float_address, float_ds, - tsc, - ES, CS, SS, DS, FS, GS, - float_st0, float_st1, float_st2, float_st3, - float_st4, float_st5, float_st6, float_st7, - float_c0, float_c1, float_c2, float_c3, - cr0, cr3, - dr0, dr1, dr2, dr3, dr4, dr5, dr6, dr7, - float_stack_ptr, - mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, - XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, - - - exception_flags, interrupt_num, -] + fltregs32_expr - -attrib_to_regs = { - 16: regs16_expr + all_regs_ids_no_alias[all_regs_ids_no_alias.index(zf):] + [IP], - 32: regs32_expr + all_regs_ids_no_alias[all_regs_ids_no_alias.index(zf):] + [EIP], - 64: all_regs_ids_no_alias, -} - -all_regs_ids_byname = dict([(x.name, x) for x in all_regs_ids]) - -all_regs_ids_init = [ExprId("%s_init" % x.name, x.size) for x in all_regs_ids] - -regs_init = {} -for i, r in enumerate(all_regs_ids): - regs_init[r] = all_regs_ids_init[i] - -regs_flt_expr = [float_st0, float_st1, float_st2, float_st3, - float_st4, float_st5, float_st6, float_st7, - ] - -mRAX = {16: AX, 32: EAX, 64: RAX} -mRBX = {16: BX, 32: EBX, 64: RBX} -mRCX = {16: CX, 32: ECX, 64: RCX} -mRDX = {16: DX, 32: EDX, 64: RDX} -mRSI = {16: SI, 32: ESI, 64: RSI} -mRDI = {16: DI, 32: EDI, 64: RDI} -mRBP = {16: BP, 32: EBP, 64: RBP} -mRSP = {16: SP, 32: ESP, 64: RSP} -mRIP = {16: IP, 32: EIP, 64: RIP} diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py deleted file mode 100644 index bec09249..00000000 --- a/miasm2/arch/x86/sem.py +++ /dev/null @@ -1,5822 +0,0 @@ -# -# Copyright (C) 2011 EADS France, Fabrice Desclaux -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# - -from builtins import range - -from future.utils import viewitems - -import logging -import miasm2.expression.expression as m2_expr -from miasm2.expression.simplifications import expr_simp -from miasm2.arch.x86.regs import * -from miasm2.arch.x86.arch import mn_x86, repeat_mn, replace_regs -from miasm2.ir.ir import IntermediateRepresentation, IRBlock, AssignBlock -from miasm2.core.sembuilder import SemBuilder -from miasm2.jitter.csts import EXCEPT_DIV_BY_ZERO, EXCEPT_ILLEGAL_INSN, \ - EXCEPT_PRIV_INSN, EXCEPT_SOFT_BP, EXCEPT_INT_XX -import math -import struct - - -LOG_X86_SEM = logging.getLogger("x86_sem") -CONSOLE_HANDLER = logging.StreamHandler() -CONSOLE_HANDLER.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) -LOG_X86_SEM.addHandler(CONSOLE_HANDLER) -LOG_X86_SEM.setLevel(logging.WARNING) - - -# SemBuilder context -ctx = {'mRAX': mRAX, - 'mRBX': mRBX, - 'mRCX': mRCX, - 'mRDX': mRDX, - 'zf': zf, - } -sbuild = SemBuilder(ctx) - - - -""" -http://www.emulators.com/docs/nx11_flags.htm - -CF(A+B) = (((A XOR B) XOR D) < 0) XOR (((A XOR D) AND NOT (A XOR B)) < 0) -CF(A-B) = (((A XOR B) XOR D) < 0) XOR (((A XOR D) AND (A XOR B)) < 0) - -OF(A+B) = ((A XOR D) AND NOT (A XOR B)) < 0 -OF(A-B) = ((A XOR D) AND (A XOR B)) < 0 -""" - - -# XXX TODO make default check against 0 or not 0 (same eq as in C) -def update_flag_zf_eq(a, b): - return [m2_expr.ExprAssign(zf, m2_expr.ExprOp("FLAG_EQ_CMP", a, b))] - - -def update_flag_zf(a): - return [ - m2_expr.ExprAssign( - zf, - m2_expr.ExprCond( - a, - m2_expr.ExprInt(0, zf.size), - m2_expr.ExprInt(1, zf.size) - ) - ) - ] - - -def update_flag_nf(arg): - return [ - m2_expr.ExprAssign( - nf, - m2_expr.ExprOp("FLAG_SIGN_SUB", arg, m2_expr.ExprInt(0, arg.size)) - ) - ] - - -def update_flag_pf(a): - return [m2_expr.ExprAssign(pf, - m2_expr.ExprOp('parity', - a & m2_expr.ExprInt(0xFF, a.size)))] - - -def update_flag_af(op1, op2, res): - return [m2_expr.ExprAssign(af, (op1 ^ op2 ^ res)[4:5])] - - -def update_flag_znp(a): - e = [] - e += update_flag_zf(a) - e += update_flag_nf(a) - e += update_flag_pf(a) - return e - - -def update_flag_np(result): - e = [] - e += update_flag_nf(result) - e += update_flag_pf(result) - return e - - -def null_flag_co(): - e = [] - e.append(m2_expr.ExprAssign(of, m2_expr.ExprInt(0, of.size))) - e.append(m2_expr.ExprAssign(cf, m2_expr.ExprInt(0, cf.size))) - return e - - -def update_flag_arith(a): - e = [] - e += update_flag_znp(a) - return e - - -def update_flag_zfaddwc_eq(arg1, arg2, arg3): - return [m2_expr.ExprAssign(zf, m2_expr.ExprOp("FLAG_EQ_ADDWC", arg1, arg2, arg3))] - -def update_flag_zfsubwc_eq(arg1, arg2, arg3): - return [m2_expr.ExprAssign(zf, m2_expr.ExprOp("FLAG_EQ_SUBWC", arg1, arg2, arg3))] - - -def update_flag_arith_add_znp(arg1, arg2): - """ - Compute znp flags for (arg1 + arg2) - """ - e = [] - e += update_flag_zf_eq(arg1, -arg2) - e += [m2_expr.ExprAssign(nf, m2_expr.ExprOp("FLAG_SIGN_SUB", arg1, -arg2))] - e += update_flag_pf(arg1+arg2) - return e - - -def update_flag_arith_addwc_znp(arg1, arg2, arg3): - """ - Compute znp flags for (arg1 + arg2 + cf) - """ - e = [] - e += update_flag_zfaddwc_eq(arg1, arg2, arg3) - e += [m2_expr.ExprAssign(nf, m2_expr.ExprOp("FLAG_SIGN_ADDWC", arg1, arg2, arg3))] - e += update_flag_pf(arg1+arg2+arg3.zeroExtend(arg2.size)) - return e - - - - -def update_flag_arith_sub_znp(arg1, arg2): - """ - Compute znp flags for (arg1 - arg2) - """ - e = [] - e += update_flag_zf_eq(arg1, arg2) - e += [m2_expr.ExprAssign(nf, m2_expr.ExprOp("FLAG_SIGN_SUB", arg1, arg2))] - e += update_flag_pf(arg1 - arg2) - return e - - -def update_flag_arith_subwc_znp(arg1, arg2, arg3): - """ - Compute znp flags for (arg1 - (arg2 + cf)) - """ - e = [] - e += update_flag_zfsubwc_eq(arg1, arg2, arg3) - e += [m2_expr.ExprAssign(nf, m2_expr.ExprOp("FLAG_SIGN_SUBWC", arg1, arg2, arg3))] - e += update_flag_pf(arg1 - (arg2+arg3.zeroExtend(arg2.size))) - return e - - -def check_ops_msb(a, b, c): - if not a or not b or not c or a != b or a != c: - raise ValueError('bad ops size %s %s %s' % (a, b, c)) - - -def arith_flag(a, b, c): - a_s, b_s, c_s = a.size, b.size, c.size - check_ops_msb(a_s, b_s, c_s) - a_s, b_s, c_s = a.msb(), b.msb(), c.msb() - return a_s, b_s, c_s - -# checked: ok for adc add because b & c before +cf - - -def update_flag_add_cf(op1, op2, res): - "Compute cf in @res = @op1 + @op2" - #return [m2_expr.ExprAssign(cf, m2_expr.ExprOp("FLAG_SUB_CF", op1, -op2))] - return [m2_expr.ExprAssign(cf, m2_expr.ExprOp("FLAG_ADD_CF", op1, op2))] - - -def update_flag_add_of(op1, op2, res): - "Compute of in @res = @op1 + @op2" - return [m2_expr.ExprAssign(of, m2_expr.ExprOp("FLAG_ADD_OF", op1, op2))] - - -# checked: ok for sbb add because b & c before +cf -def update_flag_sub_cf(op1, op2, res): - "Compote CF in @res = @op1 - @op2" - return [m2_expr.ExprAssign(cf, m2_expr.ExprOp("FLAG_SUB_CF", op1, op2))] - - -def update_flag_sub_of(op1, op2, res): - "Compote OF in @res = @op1 - @op2" - return [m2_expr.ExprAssign(of, m2_expr.ExprOp("FLAG_SUB_OF", op1, op2))] - - -def update_flag_addwc_cf(op1, op2, op3): - "Compute cf in @res = @op1 + @op2 + @op3" - return [m2_expr.ExprAssign(cf, m2_expr.ExprOp("FLAG_ADDWC_CF", op1, op2, op3))] - - -def update_flag_addwc_of(op1, op2, op3): - "Compute of in @res = @op1 + @op2 + @op3" - return [m2_expr.ExprAssign(of, m2_expr.ExprOp("FLAG_ADDWC_OF", op1, op2, op3))] - - - -def update_flag_subwc_cf(op1, op2, op3): - "Compute cf in @res = @op1 + @op2 + @op3" - return [m2_expr.ExprAssign(cf, m2_expr.ExprOp("FLAG_SUBWC_CF", op1, op2, op3))] - - -def update_flag_subwc_of(op1, op2, op3): - "Compute of in @res = @op1 + @op2 + @op3" - return [m2_expr.ExprAssign(of, m2_expr.ExprOp("FLAG_SUBWC_OF", op1, op2, op3))] - - - - -def update_flag_arith_add_co(x, y, z): - e = [] - e += update_flag_add_cf(x, y, z) - e += update_flag_add_of(x, y, z) - return e - - -def update_flag_arith_sub_co(x, y, z): - e = [] - e += update_flag_sub_cf(x, y, z) - e += update_flag_sub_of(x, y, z) - return e - - - - -def update_flag_arith_addwc_co(arg1, arg2, arg3): - e = [] - e += update_flag_addwc_cf(arg1, arg2, arg3) - e += update_flag_addwc_of(arg1, arg2, arg3) - return e - - -def update_flag_arith_subwc_co(arg1, arg2, arg3): - e = [] - e += update_flag_subwc_cf(arg1, arg2, arg3) - e += update_flag_subwc_of(arg1, arg2, arg3) - return e - - - -def set_float_cs_eip(instr): - e = [] - # XXX TODO check float updt - e.append(m2_expr.ExprAssign(float_eip, - m2_expr.ExprInt(instr.offset, float_eip.size))) - e.append(m2_expr.ExprAssign(float_cs, CS)) - return e - - -def mode2addrsize(mode): - """Returns the address size for a given @mode""" - - mode2size = {16:32, 32:32, 64:64} - if mode not in mode2size: - raise RuntimeError("Unknown size %s", mode) - return mode2size[mode] - - -def instr2addrsize(instr): - """Returns the address size for a given @instr""" - - return mode2addrsize(instr.mode) - - -def expraddr(mode, ptr): - """Returns memory address pointer with size according to current @mode""" - return ptr.zeroExtend(mode2addrsize(mode)) - - -def fix_mem_args_size(instr, *args): - out = [] - for arg in args: - if not arg.is_mem(): - out.append(arg) - continue - ptr = arg.ptr - size = arg.size - if ptr.is_op('segm'): - ptr = m2_expr.ExprOp( - 'segm', ptr.args[0], expraddr(instr.mode, ptr.args[1])) - else: - ptr = expraddr(instr.mode, ptr) - out.append(m2_expr.ExprMem(ptr, size)) - return out - - -def mem2double(instr, arg): - """ - Add float conversion if argument is an ExprMem - @arg: argument to transform - """ - if isinstance(arg, m2_expr.ExprMem): - if arg.size > 64: - # TODO: move to 80 bits - arg = m2_expr.ExprMem(expraddr(instr.mode, arg.ptr), size=64) - return m2_expr.ExprOp('sint_to_fp', arg.signExtend(64)) - else: - return arg - - -def float_implicit_st0(arg1, arg2): - """ - Generate full float operators if one argument is implicit (float_st0) - """ - if arg2 is None: - arg2 = arg1 - arg1 = float_st0 - return arg1, arg2 - - -def gen_jcc(ir, instr, cond, dst, jmp_if): - """ - Macro to generate jcc semantic - @ir: ir instance - @instr: instruction - @cond: condition of the jcc - @dst: the destination if jcc is taken - @jmp_if: jump if/notif cond - """ - - e = [] - meip = mRIP[ir.IRDst.size] - loc_next = ir.get_next_loc_key(instr) - loc_next_expr = m2_expr.ExprLoc(loc_next, dst.size) - - if jmp_if: - dstA, dstB = dst, loc_next_expr - else: - dstA, dstB = loc_next_expr, dst - mn_dst = m2_expr.ExprCond(cond, - dstA.zeroExtend(ir.IRDst.size), - dstB.zeroExtend(ir.IRDst.size)) - e.append(m2_expr.ExprAssign(meip, mn_dst)) - e.append(m2_expr.ExprAssign(ir.IRDst, mn_dst)) - return e, [] - - -def gen_fcmov(ir, instr, cond, arg1, arg2, mov_if): - """Generate fcmov - @ir: ir instance - @instr: instruction instance - @cond: condition - @mov_if: invert condition if False""" - - loc_do, loc_do_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) - loc_skip = ir.get_next_loc_key(instr) - loc_skip_expr = m2_expr.ExprLoc(loc_skip, ir.IRDst.size) - if mov_if: - dstA, dstB = loc_do_expr, loc_skip_expr - else: - dstA, dstB = loc_skip_expr, loc_do_expr - e = [] - e_do, extra_irs = [m2_expr.ExprAssign(arg1, arg2)], [] - e_do.append(m2_expr.ExprAssign(ir.IRDst, loc_skip_expr)) - e.append(m2_expr.ExprAssign(ir.IRDst, m2_expr.ExprCond(cond, dstA, dstB))) - return e, [IRBlock(loc_do, [AssignBlock(e_do, instr)])] - - -def gen_cmov(ir, instr, cond, dst, src, mov_if): - """Generate cmov - @ir: ir instance - @instr: instruction instance - @cond: condition - @mov_if: invert condition if False""" - - loc_do, loc_do_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) - loc_skip = ir.get_next_loc_key(instr) - loc_skip_expr = m2_expr.ExprLoc(loc_skip, ir.IRDst.size) - if mov_if: - dstA, dstB = loc_do_expr, loc_skip_expr - else: - dstA, dstB = loc_skip_expr, loc_do_expr - e = [m2_expr.ExprAssign(dst, dst)] - e_do, extra_irs = mov(ir, instr, dst, src) - e_do.append(m2_expr.ExprAssign(ir.IRDst, loc_skip_expr)) - e.append(m2_expr.ExprAssign(ir.IRDst, m2_expr.ExprCond(cond, dstA, dstB))) - return e, [IRBlock(loc_do, [AssignBlock(e_do, instr)])] - - -def mov(_, instr, dst, src): - if dst in [ES, CS, SS, DS, FS, GS]: - src = src[:dst.size] - if src in [ES, CS, SS, DS, FS, GS]: - src = src.zeroExtend(dst.size) - e = [m2_expr.ExprAssign(dst, src)] - return e, [] - - -def movq(_, instr, dst, src): - src_final = (src.zeroExtend(dst.size) - if dst.size >= src.size else - src[:dst.size]) - return [m2_expr.ExprAssign(dst, src_final)], [] - - -@sbuild.parse -def xchg(arg1, arg2): - arg1 = arg2 - arg2 = arg1 - - - -def movzx(_, instr, dst, src): - e = [m2_expr.ExprAssign(dst, src.zeroExtend(dst.size))] - return e, [] - - -def movsx(_, instr, dst, src): - e = [m2_expr.ExprAssign(dst, src.signExtend(dst.size))] - return e, [] - - -def lea(_, instr, dst, src): - ptr = src.ptr - if src.is_mem_segm(): - # Do not use segmentation here - ptr = ptr.args[1] - - if ptr.size > dst.size: - ptr = ptr[:dst.size] - e = [m2_expr.ExprAssign(dst, ptr.zeroExtend(dst.size))] - return e, [] - - -def add(_, instr, dst, src): - e = [] - - result = dst + src - - e += update_flag_arith_add_znp(dst, src) - e += update_flag_arith_add_co(dst, src, result) - e += update_flag_af(dst, src, result) - e.append(m2_expr.ExprAssign(dst, result)) - return e, [] - - -def xadd(_, instr, dst, src): - e = [] - - result = dst + src - e += update_flag_arith_add_znp(dst, src) - e += update_flag_arith_add_co(src, dst, result) - e += update_flag_af(dst, src, result) - if dst != src: - e.append(m2_expr.ExprAssign(src, dst)) - e.append(m2_expr.ExprAssign(dst, result)) - return e, [] - - -def adc(_, instr, dst, src): - e = [] - - arg1 = dst - arg2 = src - result = arg1 + (arg2 + cf.zeroExtend(src.size)) - - e += update_flag_arith_addwc_znp(arg1, arg2, cf) - e += update_flag_arith_addwc_co(arg1, arg2, cf) - e += update_flag_af(arg1, arg2, result) - e.append(m2_expr.ExprAssign(dst, result)) - return e, [] - - -def sub(_, instr, dst, src): - e = [] - arg1, arg2 = dst, src - result = dst - src - - e += update_flag_arith_sub_znp(arg1, arg2) - e += update_flag_arith_sub_co(arg1, arg2, result) - e += update_flag_af(dst, src, result) - - e.append(m2_expr.ExprAssign(dst, result)) - return e, [] - -# a-(b+cf) - - -def sbb(_, instr, dst, src): - e = [] - arg1 = dst - arg2 = src - result = arg1 - (arg2 + cf.zeroExtend(src.size)) - - e += update_flag_arith_subwc_znp(arg1, arg2, cf) - e += update_flag_af(arg1, arg2, result) - e += update_flag_arith_subwc_co(arg1, arg2, cf) - e.append(m2_expr.ExprAssign(dst, result)) - return e, [] - - -def neg(_, instr, src): - e = [] - dst = m2_expr.ExprInt(0, src.size) - arg1, arg2 = dst, src - result = arg1 - arg2 - - e += update_flag_arith_sub_znp(arg1, arg2) - e += update_flag_arith_sub_co(arg1, arg2, result) - e += update_flag_af(arg1, arg2, result) - e.append(m2_expr.ExprAssign(src, result)) - return (e, []) - - -def l_not(_, instr, dst): - e = [] - result = (~dst) - e.append(m2_expr.ExprAssign(dst, result)) - return (e, []) - - -def l_cmp(_, instr, dst, src): - e = [] - arg1, arg2 = dst, src - result = dst - src - - e += update_flag_arith_sub_znp(arg1, arg2) - e += update_flag_arith_sub_co(arg1, arg2, result) - e += update_flag_af(dst, src, result) - return (e, []) - - -def xor(_, instr, dst, src): - e = [] - result = dst ^ src - e += [m2_expr.ExprAssign(zf, m2_expr.ExprOp('FLAG_EQ_CMP', dst, src))] - e += update_flag_np(result) - e += null_flag_co() - e.append(m2_expr.ExprAssign(dst, result)) - return (e, []) - - -def pxor(_, instr, dst, src): - e = [] - result = dst ^ src - e.append(m2_expr.ExprAssign(dst, result)) - return (e, []) - - -def l_or(_, instr, dst, src): - e = [] - result = dst | src - e += [m2_expr.ExprAssign(zf, m2_expr.ExprOp('FLAG_EQ', dst | src))] - e += update_flag_np(result) - e += null_flag_co() - e.append(m2_expr.ExprAssign(dst, result)) - return (e, []) - - -def l_and(_, instr, dst, src): - e = [] - result = dst & src - e += [m2_expr.ExprAssign(zf, m2_expr.ExprOp('FLAG_EQ_AND', dst, src))] - e += update_flag_np(result) - e += null_flag_co() - - e.append(m2_expr.ExprAssign(dst, result)) - return (e, []) - - -def l_test(_, instr, dst, src): - e = [] - result = dst & src - - e += [m2_expr.ExprAssign(zf, m2_expr.ExprOp('FLAG_EQ_CMP', result, m2_expr.ExprInt(0, result.size)))] - e += [m2_expr.ExprAssign(nf, m2_expr.ExprOp("FLAG_SIGN_SUB", result, m2_expr.ExprInt(0, result.size)))] - e += update_flag_pf(result) - e += null_flag_co() - - return (e, []) - - -def get_shift(dst, src): - if isinstance(src, m2_expr.ExprInt): - src = m2_expr.ExprInt(int(src), dst.size) - else: - src = src.zeroExtend(dst.size) - if dst.size == 64: - shift = src & m2_expr.ExprInt(63, src.size) - else: - shift = src & m2_expr.ExprInt(31, src.size) - shift = expr_simp(shift) - return shift - - -def _rotate_tpl(ir, instr, dst, src, op, left=False): - '''Template to generate a rotater with operation @op - A temporary basic block is generated to handle 0-rotate - @op: operation to execute - @left (optional): indicates a left rotate if set, default is False - ''' - # Compute results - shifter = get_shift(dst, src) - res = m2_expr.ExprOp(op, dst, shifter) - - # CF is computed with 1-less round than `res` - new_cf = m2_expr.ExprOp( - op, dst, shifter - m2_expr.ExprInt(1, size=shifter.size)) - new_cf = new_cf.msb() if left else new_cf[:1] - - # OF is defined only for @b == 1 - new_of = m2_expr.ExprCond(src - m2_expr.ExprInt(1, size=src.size), - m2_expr.ExprInt(0, size=of.size), - res.msb() ^ new_cf if left else (dst ^ res).msb()) - - # Build basic blocks - e_do = [m2_expr.ExprAssign(cf, new_cf), - m2_expr.ExprAssign(of, new_of), - m2_expr.ExprAssign(dst, res) - ] - e = [m2_expr.ExprAssign(dst, dst)] - # Don't generate conditional shifter on constant - if isinstance(shifter, m2_expr.ExprInt): - if int(shifter) != 0: - return (e_do, []) - else: - return (e, []) - loc_do, loc_do_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) - loc_skip = ir.get_next_loc_key(instr) - loc_skip_expr = m2_expr.ExprLoc(loc_skip, ir.IRDst.size) - e_do.append(m2_expr.ExprAssign(ir.IRDst, loc_skip_expr)) - e.append(m2_expr.ExprAssign( - ir.IRDst, m2_expr.ExprCond(shifter, loc_do_expr, loc_skip_expr))) - return (e, [IRBlock(loc_do, [AssignBlock(e_do, instr)])]) - - -def l_rol(ir, instr, dst, src): - return _rotate_tpl(ir, instr, dst, src, '<<<', left=True) - - -def l_ror(ir, instr, dst, src): - return _rotate_tpl(ir, instr, dst, src, '>>>') - - -def rotate_with_carry_tpl(ir, instr, op, dst, src): - # Compute results - shifter = get_shift(dst, src).zeroExtend(dst.size + 1) - result = m2_expr.ExprOp(op, m2_expr.ExprCompose(dst, cf), shifter) - - new_cf = result[dst.size:dst.size +1] - new_dst = result[:dst.size] - - result_trunc = result[:dst.size] - if op == '<<<': - of_value = result_trunc.msb() ^ new_cf - else: - of_value = (dst ^ result_trunc).msb() - # OF is defined only for @b == 1 - new_of = m2_expr.ExprCond(src - m2_expr.ExprInt(1, size=src.size), - m2_expr.ExprInt(0, size=of.size), - of_value) - - - # Build basic blocks - e_do = [m2_expr.ExprAssign(cf, new_cf), - m2_expr.ExprAssign(of, new_of), - m2_expr.ExprAssign(dst, new_dst) - ] - e = [m2_expr.ExprAssign(dst, dst)] - # Don't generate conditional shifter on constant - if isinstance(shifter, m2_expr.ExprInt): - if int(shifter) != 0: - return (e_do, []) - else: - return (e, []) - loc_do, loc_do_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) - loc_skip = ir.get_next_loc_key(instr) - loc_skip_expr = m2_expr.ExprLoc(loc_skip, ir.IRDst.size) - e_do.append(m2_expr.ExprAssign(ir.IRDst, loc_skip_expr)) - e.append(m2_expr.ExprAssign( - ir.IRDst, m2_expr.ExprCond(shifter, loc_do_expr, loc_skip_expr))) - return (e, [IRBlock(loc_do, [AssignBlock(e_do, instr)])]) - -def rcl(ir, instr, dst, src): - return rotate_with_carry_tpl(ir, instr, '<<<', dst, src) - -def rcr(ir, instr, dst, src): - return rotate_with_carry_tpl(ir, instr, '>>>', dst, src) - - -def _shift_tpl(op, ir, instr, a, b, c=None, op_inv=None, left=False, - custom_of=None): - """Template to generate a shifter with operation @op - A temporary basic block is generated to handle 0-shift - @op: operation to execute - @c (optional): if set, instruction has a bit provider - @op_inv (optional): opposite operation of @op. Must be provided if @c - @left (optional): indicates a left shift if set, default is False - @custom_of (optional): if set, override the computed value of OF - """ - if c is not None: - shifter = get_shift(a, c) - else: - shifter = get_shift(a, b) - - res = m2_expr.ExprOp(op, a, shifter) - cf_from_dst = m2_expr.ExprOp(op, a, - (shifter - m2_expr.ExprInt(1, a.size))) - cf_from_dst = cf_from_dst.msb() if left else cf_from_dst[:1] - - new_cf = cf_from_dst - i1 = m2_expr.ExprInt(1, size=a.size) - if c is not None: - # There is a source for new bits - isize = m2_expr.ExprInt(a.size, size=a.size) - mask = m2_expr.ExprOp(op_inv, i1, (isize - shifter)) - i1 - - # An overflow can occurred, emulate the 'undefined behavior' - # Overflow behavior if (shift / size % 2) - base_cond_overflow = shifter if left else ( - shifter - m2_expr.ExprInt(1, size=shifter.size)) - cond_overflow = base_cond_overflow & m2_expr.ExprInt(a.size, shifter.size) - if left: - # Overflow occurs one round before right - mask = m2_expr.ExprCond(cond_overflow, mask, ~mask) - else: - mask = m2_expr.ExprCond(cond_overflow, ~mask, mask) - - # Build res with dst and src - res = ((m2_expr.ExprOp(op, a, shifter) & mask) | - (m2_expr.ExprOp(op_inv, b, (isize - shifter)) & ~mask)) - - # Overflow case: cf come from src (bit number shifter % size) - cf_from_src = m2_expr.ExprOp(op, b, - (shifter.zeroExtend(b.size) & - m2_expr.ExprInt(a.size - 1, b.size)) - i1) - cf_from_src = cf_from_src.msb() if left else cf_from_src[:1] - new_cf = m2_expr.ExprCond(cond_overflow, cf_from_src, cf_from_dst) - - # Overflow flag, only occurred when shifter is equal to 1 - if custom_of is None: - value_of = a.msb() ^ a[-2:-1] if left else b[:1] ^ a.msb() - else: - value_of = custom_of - - # Build basic blocks - e_do = [ - m2_expr.ExprAssign(cf, new_cf), - m2_expr.ExprAssign(of, m2_expr.ExprCond(shifter - i1, - m2_expr.ExprInt(0, of.size), - value_of)), - m2_expr.ExprAssign(a, res), - ] - e_do += update_flag_znp(res) - e = [m2_expr.ExprAssign(a, a)] - # Don't generate conditional shifter on constant - if isinstance(shifter, m2_expr.ExprInt): - if int(shifter) != 0: - return (e_do, []) - else: - return (e, []) - loc_do, loc_do_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) - loc_skip = ir.get_next_loc_key(instr) - loc_skip_expr = m2_expr.ExprLoc(loc_skip, ir.IRDst.size) - e_do.append(m2_expr.ExprAssign(ir.IRDst, loc_skip_expr)) - e.append(m2_expr.ExprAssign(ir.IRDst, m2_expr.ExprCond(shifter, loc_do_expr, - loc_skip_expr))) - return e, [IRBlock(loc_do, [AssignBlock(e_do, instr)])] - - -def sar(ir, instr, dst, src): - # Fixup OF, always cleared if src != 0 - i0 = m2_expr.ExprInt(0, size=of.size) - return _shift_tpl("a>>", ir, instr, dst, src, custom_of=i0) - - -def shr(ir, instr, dst, src): - return _shift_tpl(">>", ir, instr, dst, src, custom_of=dst.msb()) - - -def shrd(ir, instr, dst, src1, src2): - return _shift_tpl(">>>", ir, instr, dst, src1, src2, "<<<") - - -def shl(ir, instr, dst, src): - return _shift_tpl("<<", ir, instr, dst, src, left=True) - - -def shld(ir, instr, dst, src1, src2): - return _shift_tpl("<<<", ir, instr, dst, src1, src2, ">>>", left=True) - - -# XXX todo ### -def cmc(_, instr): - e = [m2_expr.ExprAssign(cf, m2_expr.ExprCond(cf, m2_expr.ExprInt(0, cf.size), - m2_expr.ExprInt(1, cf.size)))] - return e, [] - - -def clc(_, instr): - e = [m2_expr.ExprAssign(cf, m2_expr.ExprInt(0, cf.size))] - return e, [] - - -def stc(_, instr): - e = [m2_expr.ExprAssign(cf, m2_expr.ExprInt(1, cf.size))] - return e, [] - - -def cld(_, instr): - e = [m2_expr.ExprAssign(df, m2_expr.ExprInt(0, df.size))] - return e, [] - - -def std(_, instr): - e = [m2_expr.ExprAssign(df, m2_expr.ExprInt(1, df.size))] - return e, [] - - -def cli(_, instr): - e = [m2_expr.ExprAssign(i_f, m2_expr.ExprInt(0, i_f.size))] - return e, [] - - -def sti(_, instr): - e = [m2_expr.ExprAssign(exception_flags, m2_expr.ExprInt(EXCEPT_PRIV_INSN, 32))] - return e, [] - - -def inc(_, instr, dst): - e = [] - src = m2_expr.ExprInt(1, dst.size) - arg1, arg2 = dst, src - result = dst + src - - e += update_flag_arith_add_znp(arg1, arg2) - e += update_flag_af(arg1, arg2, result) - e += update_flag_add_of(arg1, arg2, result) - - e.append(m2_expr.ExprAssign(dst, result)) - return e, [] - - -def dec(_, instr, dst): - e = [] - src = m2_expr.ExprInt(1, dst.size) - arg1, arg2 = dst, src - result = dst - src - - e += update_flag_arith_sub_znp(arg1, arg2) - e += update_flag_af(arg1, arg2, result) - e += update_flag_sub_of(arg1, arg2, result) - - e.append(m2_expr.ExprAssign(dst, result)) - return e, [] - - -def push_gen(ir, instr, src, size): - e = [] - if not size in [16, 32, 64]: - raise ValueError('bad size stacker!') - if src.size < size: - src = src.zeroExtend(size) - off_size = src.size - - sp = mRSP[instr.mode] - new_sp = sp - m2_expr.ExprInt(off_size // 8, sp.size) - e.append(m2_expr.ExprAssign(sp, new_sp)) - if ir.do_stk_segm: - new_sp = ir.gen_segm_expr(SS, new_sp) - e.append(m2_expr.ExprAssign(ir.ExprMem(new_sp, off_size), - src)) - return e, [] - - -def push(ir, instr, src): - return push_gen(ir, instr, src, instr.mode) - - -def pushw(ir, instr, src): - return push_gen(ir, instr, src, 16) - - -def pop_gen(ir, instr, src, size): - e = [] - if not size in [16, 32, 64]: - raise ValueError('bad size stacker!') - - sp = mRSP[instr.mode] - new_sp = sp + m2_expr.ExprInt(src.size // 8, sp.size) - # don't generate ESP incrementation on POP ESP - if src != ir.sp: - e.append(m2_expr.ExprAssign(sp, new_sp)) - # XXX FIX XXX for pop [esp] - if isinstance(src, m2_expr.ExprMem): - src = expr_simp(src.replace_expr({sp: new_sp})) - result = sp - if ir.do_stk_segm: - result = ir.gen_segm_expr(SS, result) - - e.append(m2_expr.ExprAssign(src, ir.ExprMem(result, src.size))) - return e, [] - - -def pop(ir, instr, src): - return pop_gen(ir, instr, src, instr.mode) - - -def popw(ir, instr, src): - return pop_gen(ir, instr, src, 16) - - -def sete(_, instr, dst): - e = [] - e.append( - m2_expr.ExprAssign( - dst, - m2_expr.ExprOp("CC_EQ", zf).zeroExtend(dst.size), - ) - ) - return e, [] - - -def setnz(_, instr, dst): - e = [] - e.append( - m2_expr.ExprAssign( - dst, - m2_expr.ExprOp("CC_EQ", ~zf).zeroExtend(dst.size), - ) - ) - return e, [] - - -def setl(_, instr, dst): - e = [] - e.append( - m2_expr.ExprAssign( - dst, - m2_expr.ExprOp("CC_S<", nf, of).zeroExtend(dst.size), - ) - ) - return e, [] - - -def setg(_, instr, dst): - e = [] - e.append( - m2_expr.ExprAssign( - dst, - m2_expr.ExprOp("CC_S>", nf, of, zf).zeroExtend(dst.size), - ) - ) - return e, [] - - -def setge(_, instr, dst): - e = [] - e.append( - m2_expr.ExprAssign( - dst, - m2_expr.ExprOp("CC_S>=", nf, of).zeroExtend(dst.size), - ) - ) - return e, [] - - -def seta(_, instr, dst): - e = [] - e.append( - m2_expr.ExprAssign( - dst, - m2_expr.ExprOp("CC_U>", cf, zf).zeroExtend(dst.size), - ) - ) - return e, [] - - -def setae(_, instr, dst): - e = [] - e.append( - m2_expr.ExprAssign( - dst, - m2_expr.ExprOp("CC_U>=", cf).zeroExtend(dst.size), - ) - ) - return e, [] - - -def setb(_, instr, dst): - e = [] - e.append( - m2_expr.ExprAssign( - dst, - m2_expr.ExprOp("CC_U<", cf).zeroExtend(dst.size), - ) - ) - return e, [] - - -def setbe(_, instr, dst): - e = [] - e.append( - m2_expr.ExprAssign( - dst, - m2_expr.ExprOp("CC_U<=", cf, zf).zeroExtend(dst.size), - ) - ) - return e, [] - - -def setns(_, instr, dst): - e = [] - e.append( - m2_expr.ExprAssign( - dst, - m2_expr.ExprOp("CC_NEG", ~nf).zeroExtend(dst.size), - ) - ) - return e, [] - - -def sets(_, instr, dst): - e = [] - e.append( - m2_expr.ExprAssign( - dst, - m2_expr.ExprOp("CC_NEG", nf).zeroExtend(dst.size), - ) - ) - return e, [] - - -def seto(_, instr, dst): - e = [] - e.append( - m2_expr.ExprAssign( - dst, - of.zeroExtend(dst.size) - ) - ) - return e, [] - - -def setp(_, instr, dst): - e = [] - e.append( - m2_expr.ExprAssign( - dst, - pf.zeroExtend(dst.size) - ) - ) - return e, [] - - -def setnp(_, instr, dst): - e = [] - e.append( - m2_expr.ExprAssign( - dst, - m2_expr.ExprCond( - pf, - m2_expr.ExprInt(0, dst.size), - m2_expr.ExprInt(1, dst.size) - ) - ) - ) - return e, [] - - -def setle(_, instr, dst): - e = [] - e.append( - m2_expr.ExprAssign( - dst, - m2_expr.ExprOp("CC_S<=", nf, of, zf).zeroExtend(dst.size), - ) - ) - return e, [] - - -def setna(_, instr, dst): - e = [] - e.append( - m2_expr.ExprAssign( - dst, - m2_expr.ExprOp("CC_U<=", cf, zf).zeroExtend(dst.size), - ) - ) - return e, [] - - -def setnbe(_, instr, dst): - e = [] - e.append( - m2_expr.ExprAssign( - dst, - m2_expr.ExprOp("CC_U>", cf, zf).zeroExtend(dst.size), - ) - ) - return e, [] - - -def setno(_, instr, dst): - e = [] - e.append( - m2_expr.ExprAssign( - dst, - m2_expr.ExprCond( - of, - m2_expr.ExprInt(0, dst.size), - m2_expr.ExprInt(1, dst.size) - ) - ) - ) - return e, [] - - -def setnb(_, instr, dst): - e = [] - e.append( - m2_expr.ExprAssign( - dst, - m2_expr.ExprOp("CC_U>=", cf).zeroExtend(dst.size), - ) - ) - return e, [] - - -def setalc(_, instr): - dst = mRAX[instr.mode][0:8] - e = [] - e.append( - m2_expr.ExprAssign(dst, m2_expr.ExprCond(cf, m2_expr.ExprInt(0xff, dst.size), - m2_expr.ExprInt(0, dst.size)))) - return e, [] - - -def bswap(_, instr, dst): - e = [] - if dst.size == 16: - result = m2_expr.ExprCompose(dst[8:16], dst[:8]) - elif dst.size == 32: - result = m2_expr.ExprCompose( - dst[24:32], dst[16:24], dst[8:16], dst[:8]) - elif dst.size == 64: - result = m2_expr.ExprCompose(dst[56:64], dst[48:56], dst[40:48], dst[32:40], - dst[24:32], dst[16:24], dst[8:16], dst[:8]) - else: - raise ValueError('the size DOES matter') - e.append(m2_expr.ExprAssign(dst, result)) - return e, [] - - -def cmps(ir, instr, size): - loc_df_0, loc_df_0_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) - loc_df_1, loc_df_1_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) - loc_next_expr = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) - - src1 = mRSI[instr.mode][:instr.v_admode()] - src2 = mRDI[instr.mode][:instr.v_admode()] - - if ir.do_str_segm: - if instr.additional_info.g2.value: - raise NotImplementedError("add segm support") - src1_sgm = ir.gen_segm_expr(DS, src1) - src2_sgm = ir.gen_segm_expr(ES, src2) - else: - src1_sgm = src1 - src2_sgm = src2 - - offset = m2_expr.ExprInt(size // 8, src1.size) - - e, _ = l_cmp(ir, instr, - ir.ExprMem(src1_sgm, size), - ir.ExprMem(src2_sgm, size)) - - - e0 = [] - e0.append(m2_expr.ExprAssign(src1, src1 + offset)) - e0.append(m2_expr.ExprAssign(src2, src2 + offset)) - e0.append(m2_expr.ExprAssign(ir.IRDst, loc_next_expr)) - e0 = IRBlock(loc_df_0, [AssignBlock(e0, instr)]) - - e1 = [] - e1.append(m2_expr.ExprAssign(src1, src1 - offset)) - e1.append(m2_expr.ExprAssign(src2, src2 - offset)) - e1.append(m2_expr.ExprAssign(ir.IRDst, loc_next_expr)) - e1 = IRBlock(loc_df_1, [AssignBlock(e1, instr)]) - - e.append(m2_expr.ExprAssign(ir.IRDst, - m2_expr.ExprCond(df, loc_df_1_expr, loc_df_0_expr))) - return e, [e0, e1] - - -def scas(ir, instr, size): - loc_df_0, loc_df_0_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) - loc_df_1, loc_df_1_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) - loc_next_expr = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) - - src = mRDI[instr.mode][:instr.v_admode()] - - if ir.do_str_segm: - if instr.additional_info.g2.value: - raise NotImplementedError("add segm support") - src_sgm = ir.gen_segm_expr(ES, src) - - else: - src_sgm = src - - offset = m2_expr.ExprInt(size // 8, src.size) - e, extra = l_cmp(ir, instr, - mRAX[instr.mode][:size], - ir.ExprMem(src_sgm, size)) - - e0 = [] - e0.append(m2_expr.ExprAssign(src, src + offset)) - - e0.append(m2_expr.ExprAssign(ir.IRDst, loc_next_expr)) - e0 = IRBlock(loc_df_0, [AssignBlock(e0, instr)]) - - e1 = [] - e1.append(m2_expr.ExprAssign(src, src - offset)) - e1.append(m2_expr.ExprAssign(ir.IRDst, loc_next_expr)) - e1 = IRBlock(loc_df_1, [AssignBlock(e1, instr)]) - - e.append(m2_expr.ExprAssign(ir.IRDst, - m2_expr.ExprCond(df, loc_df_1_expr, loc_df_0_expr))) - - return e, [e0, e1] - - -def compose_eflag(s=32): - args = [] - - args = [cf, m2_expr.ExprInt(1, 1), pf, m2_expr.ExprInt(0, 1), af, - m2_expr.ExprInt(0, 1), zf, nf, tf, i_f, df, of, iopl] - - if s == 32: - args += [nt, m2_expr.ExprInt(0, 1), rf, vm, ac, vif, vip, i_d] - elif s == 16: - args += [nt, m2_expr.ExprInt(0, 1)] - else: - raise ValueError('unk size') - if s == 32: - args.append(m2_expr.ExprInt(0, 10)) - return m2_expr.ExprCompose(*args) - - -def pushfd(ir, instr): - return push(ir, instr, compose_eflag()) - - -def pushfq(ir, instr): - return push(ir, instr, compose_eflag().zeroExtend(64)) - - -def pushfw(ir, instr): - return pushw(ir, instr, compose_eflag(16)) - - -def popfd(ir, instr): - tmp = ir.ExprMem(mRSP[instr.mode], 32) - e = [] - e.append(m2_expr.ExprAssign(cf, m2_expr.ExprSlice(tmp, 0, 1))) - e.append(m2_expr.ExprAssign(pf, m2_expr.ExprSlice(tmp, 2, 3))) - e.append(m2_expr.ExprAssign(af, m2_expr.ExprSlice(tmp, 4, 5))) - e.append(m2_expr.ExprAssign(zf, m2_expr.ExprSlice(tmp, 6, 7))) - e.append(m2_expr.ExprAssign(nf, m2_expr.ExprSlice(tmp, 7, 8))) - e.append(m2_expr.ExprAssign(tf, m2_expr.ExprSlice(tmp, 8, 9))) - e.append(m2_expr.ExprAssign(i_f, m2_expr.ExprSlice(tmp, 9, 10))) - e.append(m2_expr.ExprAssign(df, m2_expr.ExprSlice(tmp, 10, 11))) - e.append(m2_expr.ExprAssign(of, m2_expr.ExprSlice(tmp, 11, 12))) - e.append(m2_expr.ExprAssign(iopl, m2_expr.ExprSlice(tmp, 12, 14))) - e.append(m2_expr.ExprAssign(nt, m2_expr.ExprSlice(tmp, 14, 15))) - e.append(m2_expr.ExprAssign(rf, m2_expr.ExprSlice(tmp, 16, 17))) - e.append(m2_expr.ExprAssign(vm, m2_expr.ExprSlice(tmp, 17, 18))) - e.append(m2_expr.ExprAssign(ac, m2_expr.ExprSlice(tmp, 18, 19))) - e.append(m2_expr.ExprAssign(vif, m2_expr.ExprSlice(tmp, 19, 20))) - e.append(m2_expr.ExprAssign(vip, m2_expr.ExprSlice(tmp, 20, 21))) - e.append(m2_expr.ExprAssign(i_d, m2_expr.ExprSlice(tmp, 21, 22))) - e.append(m2_expr.ExprAssign(mRSP[instr.mode], - mRSP[instr.mode] + m2_expr.ExprInt(instr.mode // 8, mRSP[instr.mode].size))) - e.append(m2_expr.ExprAssign(exception_flags, - m2_expr.ExprCond(m2_expr.ExprSlice(tmp, 8, 9), - m2_expr.ExprInt( - EXCEPT_SOFT_BP, 32), - exception_flags - ) - ) - ) - return e, [] - - -def _tpl_eflags(tmp): - """Extract eflags from @tmp - @tmp: Expr instance with a size >= 16 - """ - return [m2_expr.ExprAssign(dest, tmp[base:base + dest.size]) - for base, dest in ((0, cf), (2, pf), (4, af), (6, zf), (7, nf), - (8, tf), (9, i_f), (10, df), (11, of), - (12, iopl), (14, nt))] - - -def popfw(ir, instr): - tmp = ir.ExprMem(mRSP[instr.mode], 16) - e = _tpl_eflags(tmp) - e.append( - m2_expr.ExprAssign(mRSP[instr.mode], mRSP[instr.mode] + m2_expr.ExprInt(2, mRSP[instr.mode].size))) - return e, [] - -pa_regs = [ - mRAX, mRCX, - mRDX, mRBX, - mRSP, mRBP, - mRSI, mRDI -] - - -def pusha_gen(ir, instr, size): - e = [] - cur_sp = mRSP[instr.mode] - for i, reg in enumerate(pa_regs): - stk_ptr = cur_sp + m2_expr.ExprInt(-(size // 8) * (i + 1), instr.mode) - e.append(m2_expr.ExprAssign(ir.ExprMem(stk_ptr, size), reg[size])) - e.append(m2_expr.ExprAssign(cur_sp, stk_ptr)) - return e, [] - - -def pusha(ir, instr): - return pusha_gen(ir, instr, 16) - - -def pushad(ir, instr): - return pusha_gen(ir, instr, 32) - - -def popa_gen(ir, instr, size): - e = [] - cur_sp = mRSP[instr.mode] - for i, reg in enumerate(reversed(pa_regs)): - if reg == mRSP: - continue - stk_ptr = cur_sp + m2_expr.ExprInt((size // 8) * i, instr.mode) - e.append(m2_expr.ExprAssign(reg[size], ir.ExprMem(stk_ptr, size))) - - stk_ptr = cur_sp + m2_expr.ExprInt((size // 8) * (i + 1), instr.mode) - e.append(m2_expr.ExprAssign(cur_sp, stk_ptr)) - - return e, [] - - -def popa(ir, instr): - return popa_gen(ir, instr, 16) - - -def popad(ir, instr): - return popa_gen(ir, instr, 32) - - -def call(ir, instr, dst): - e = [] - # opmode, admode = instr.opmode, instr.admode - s = dst.size - meip = mRIP[ir.IRDst.size] - opmode, admode = s, instr.v_admode() - myesp = mRSP[instr.mode][:opmode] - n = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) - - if isinstance(dst, m2_expr.ExprOp): - if dst.op == "segm": - # Far call segm:addr - if instr.mode not in [16, 32]: - raise RuntimeError('not supported') - segm = dst.args[0] - base = dst.args[1] - m1 = segm.zeroExtend(CS.size) - m2 = base.zeroExtend(meip.size) - elif dst.op == "far": - # Far call far [eax] - addr = dst.args[0].arg - m1 = ir.ExprMem(addr, CS.size) - m2 = ir.ExprMem(addr + m2_expr.ExprInt(2, addr.size), meip.size) - else: - raise RuntimeError("bad call operator") - - e.append(m2_expr.ExprAssign(CS, m1)) - e.append(m2_expr.ExprAssign(meip, m2)) - - e.append(m2_expr.ExprAssign(ir.IRDst, m2)) - - c = myesp + m2_expr.ExprInt(-s // 8, s) - e.append(m2_expr.ExprAssign(ir.ExprMem(c, size=s).zeroExtend(s), - CS.zeroExtend(s))) - - c = myesp + m2_expr.ExprInt((-2 * s) // 8, s) - e.append(m2_expr.ExprAssign(ir.ExprMem(c, size=s).zeroExtend(s), - meip.zeroExtend(s))) - - c = myesp + m2_expr.ExprInt((-2 * s) // 8, s) - e.append(m2_expr.ExprAssign(myesp, c)) - return e, [] - - c = myesp + m2_expr.ExprInt(-s // 8, s) - e.append(m2_expr.ExprAssign(myesp, c)) - if ir.do_stk_segm: - c = ir.gen_segm_expr(SS, c) - - e.append(m2_expr.ExprAssign(ir.ExprMem(c, size=s), n)) - e.append(m2_expr.ExprAssign(meip, dst.zeroExtend(ir.IRDst.size))) - e.append(m2_expr.ExprAssign(ir.IRDst, dst.zeroExtend(ir.IRDst.size))) - return e, [] - - -def ret(ir, instr, src=None): - e = [] - meip = mRIP[ir.IRDst.size] - size, admode = instr.v_opmode(), instr.v_admode() - myesp = mRSP[instr.mode][:size] - - if src is None: - value = (myesp + (m2_expr.ExprInt(size // 8, size))) - else: - src = m2_expr.ExprInt(int(src), size) - value = (myesp + (m2_expr.ExprInt(size // 8, size) + src)) - - e.append(m2_expr.ExprAssign(myesp, value)) - result = myesp - if ir.do_stk_segm: - result = ir.gen_segm_expr(SS, result) - - e.append(m2_expr.ExprAssign(meip, ir.ExprMem( - result, size=size).zeroExtend(size))) - e.append(m2_expr.ExprAssign(ir.IRDst, - ir.ExprMem(result, size=size).zeroExtend(size))) - return e, [] - - -def retf(ir, instr, src=None): - e = [] - meip = mRIP[ir.IRDst.size] - size, admode = instr.v_opmode(), instr.v_admode() - if src is None: - src = m2_expr.ExprInt(0, instr.mode) - myesp = mRSP[instr.mode][:size] - - src = src.zeroExtend(size) - - result = myesp - if ir.do_stk_segm: - result = ir.gen_segm_expr(SS, result) - - e.append(m2_expr.ExprAssign(meip, ir.ExprMem( - result, size=size).zeroExtend(size))) - e.append(m2_expr.ExprAssign(ir.IRDst, - ir.ExprMem(result, size=size).zeroExtend(size))) - # e.append(m2_expr.ExprAssign(meip, ir.ExprMem(c, size = s))) - result = myesp + m2_expr.ExprInt(size // 8, size) - if ir.do_stk_segm: - result = ir.gen_segm_expr(SS, result) - - e.append(m2_expr.ExprAssign(CS, ir.ExprMem(result, size=16))) - - value = myesp + (m2_expr.ExprInt((2 * size) // 8, size) + src) - e.append(m2_expr.ExprAssign(myesp, value)) - return e, [] - - -def leave(ir, instr): - size = instr.mode - myesp = mRSP[size] - e = [] - e.append(m2_expr.ExprAssign(mRBP[size], ir.ExprMem(mRBP[size], size=size))) - e.append(m2_expr.ExprAssign(myesp, - m2_expr.ExprInt(size // 8, size) + mRBP[size])) - return e, [] - - -def enter(ir, instr, src1, src2): - size, admode = instr.v_opmode(), instr.v_admode() - myesp = mRSP[instr.mode][:size] - myebp = mRBP[instr.mode][:size] - - src1 = src1.zeroExtend(size) - - e = [] - esp_tmp = myesp - m2_expr.ExprInt(size // 8, size) - e.append(m2_expr.ExprAssign(ir.ExprMem(esp_tmp, size=size), - myebp)) - e.append(m2_expr.ExprAssign(myebp, esp_tmp)) - e.append(m2_expr.ExprAssign(myesp, - myesp - (src1 + m2_expr.ExprInt(size // 8, size)))) - return e, [] - - -def jmp(ir, instr, dst): - e = [] - meip = mRIP[ir.IRDst.size] - - if isinstance(dst, m2_expr.ExprOp): - if dst.op == "segm": - # Far jmp segm:addr - segm = dst.args[0] - base = dst.args[1] - m1 = segm.zeroExtend(CS.size) - m2 = base.zeroExtend(meip.size) - elif dst.op == "far": - # Far jmp far [eax] - addr = dst.args[0].arg - m1 = ir.ExprMem(addr, CS.size) - m2 = ir.ExprMem(addr + m2_expr.ExprInt(2, addr.size), meip.size) - else: - raise RuntimeError("bad jmp operator") - - e.append(m2_expr.ExprAssign(CS, m1)) - e.append(m2_expr.ExprAssign(meip, m2)) - e.append(m2_expr.ExprAssign(ir.IRDst, m2)) - - else: - # Classic jmp - e.append(m2_expr.ExprAssign(meip, dst)) - e.append(m2_expr.ExprAssign(ir.IRDst, dst)) - - if isinstance(dst, m2_expr.ExprMem): - dst = meip - return e, [] - - -def jz(ir, instr, dst): - #return gen_jcc(ir, instr, zf, dst, True) - return gen_jcc(ir, instr, m2_expr.ExprOp("CC_EQ", zf), dst, True) - - -def jcxz(ir, instr, dst): - return gen_jcc(ir, instr, mRCX[instr.mode][:16], dst, False) - - -def jecxz(ir, instr, dst): - return gen_jcc(ir, instr, mRCX[instr.mode][:32], dst, False) - - -def jrcxz(ir, instr, dst): - return gen_jcc(ir, instr, mRCX[instr.mode], dst, False) - - -def jnz(ir, instr, dst): - #return gen_jcc(ir, instr, zf, dst, False) - return gen_jcc(ir, instr, m2_expr.ExprOp("CC_EQ", zf), dst, False) - - - -def jp(ir, instr, dst): - return gen_jcc(ir, instr, pf, dst, True) - - -def jnp(ir, instr, dst): - return gen_jcc(ir, instr, pf, dst, False) - - -def ja(ir, instr, dst): - #return gen_jcc(ir, instr, cf | zf, dst, False) - return gen_jcc(ir, instr, m2_expr.ExprOp("CC_U>", cf, zf), dst, True) - - -def jae(ir, instr, dst): - #return gen_jcc(ir, instr, cf, dst, False) - return gen_jcc(ir, instr, m2_expr.ExprOp("CC_U>=", cf), dst, True) - - -def jb(ir, instr, dst): - #return gen_jcc(ir, instr, cf, dst, True) - return gen_jcc(ir, instr, m2_expr.ExprOp("CC_U<", cf), dst, True) - - -def jbe(ir, instr, dst): - #return gen_jcc(ir, instr, cf | zf, dst, True) - return gen_jcc(ir, instr, m2_expr.ExprOp("CC_U<=", cf, zf), dst, True) - - -def jge(ir, instr, dst): - #return gen_jcc(ir, instr, nf - of, dst, False) - return gen_jcc(ir, instr, m2_expr.ExprOp("CC_S>=", nf, of), dst, True) - - -def jg(ir, instr, dst): - #return gen_jcc(ir, instr, zf | (nf - of), dst, False) - return gen_jcc(ir, instr, m2_expr.ExprOp("CC_S>", nf, of, zf), dst, True) - - -def jl(ir, instr, dst): - #return gen_jcc(ir, instr, nf - of, dst, True) - return gen_jcc(ir, instr, m2_expr.ExprOp("CC_S<", nf, of), dst, True) - - -def jle(ir, instr, dst): - #return gen_jcc(ir, instr, zf | (nf - of), dst, True) - return gen_jcc(ir, instr, m2_expr.ExprOp("CC_S<=", nf, of, zf), dst, True) - - - -def js(ir, instr, dst): - #return gen_jcc(ir, instr, nf, dst, True) - return gen_jcc(ir, instr, m2_expr.ExprOp("CC_NEG", nf), dst, True) - - - -def jns(ir, instr, dst): - #return gen_jcc(ir, instr, nf, dst, False) - return gen_jcc(ir, instr, m2_expr.ExprOp("CC_NEG", nf), dst, False) - - -def jo(ir, instr, dst): - return gen_jcc(ir, instr, of, dst, True) - - -def jno(ir, instr, dst): - return gen_jcc(ir, instr, of, dst, False) - - -def loop(ir, instr, dst): - e = [] - meip = mRIP[ir.IRDst.size] - admode = instr.v_admode() - myecx = mRCX[instr.mode][:admode] - - n = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) - c = myecx - m2_expr.ExprInt(1, myecx.size) - dst_o = m2_expr.ExprCond(c, - dst.zeroExtend(ir.IRDst.size), - n.zeroExtend(ir.IRDst.size)) - e.append(m2_expr.ExprAssign(myecx, c)) - e.append(m2_expr.ExprAssign(meip, dst_o)) - e.append(m2_expr.ExprAssign(ir.IRDst, dst_o)) - return e, [] - - -def loopne(ir, instr, dst): - e = [] - meip = mRIP[ir.IRDst.size] - admode = instr.v_admode() - myecx = mRCX[instr.mode][:admode] - - n = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) - - c = m2_expr.ExprCond(myecx - m2_expr.ExprInt(1, size=myecx.size), - m2_expr.ExprInt(1, 1), - m2_expr.ExprInt(0, 1)) - c &= zf ^ m2_expr.ExprInt(1, 1) - - e.append(m2_expr.ExprAssign(myecx, myecx - m2_expr.ExprInt(1, myecx.size))) - dst_o = m2_expr.ExprCond(c, - dst.zeroExtend(ir.IRDst.size), - n.zeroExtend(ir.IRDst.size)) - e.append(m2_expr.ExprAssign(meip, dst_o)) - e.append(m2_expr.ExprAssign(ir.IRDst, dst_o)) - return e, [] - - -def loope(ir, instr, dst): - e = [] - meip = mRIP[ir.IRDst.size] - admode = instr.v_admode() - myecx = mRCX[instr.mode][:admode] - - n = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) - c = m2_expr.ExprCond(myecx - m2_expr.ExprInt(1, size=myecx.size), - m2_expr.ExprInt(1, 1), - m2_expr.ExprInt(0, 1)) - c &= zf - e.append(m2_expr.ExprAssign(myecx, myecx - m2_expr.ExprInt(1, myecx.size))) - dst_o = m2_expr.ExprCond(c, - dst.zeroExtend(ir.IRDst.size), - n.zeroExtend(ir.IRDst.size)) - e.append(m2_expr.ExprAssign(meip, dst_o)) - e.append(m2_expr.ExprAssign(ir.IRDst, dst_o)) - return e, [] - -# XXX size to do; eflag - - -def div(ir, instr, src1): - e = [] - size = src1.size - if size == 8: - src2 = mRAX[instr.mode][:16] - elif size in [16, 32, 64]: - s1, s2 = mRDX[size], mRAX[size] - src2 = m2_expr.ExprCompose(s2, s1) - else: - raise ValueError('div arg not impl', src1) - - c_d = m2_expr.ExprOp('udiv', src2, src1.zeroExtend(src2.size)) - c_r = m2_expr.ExprOp('umod', src2, src1.zeroExtend(src2.size)) - - # if 8 bit div, only ax is assigned - if size == 8: - e.append(m2_expr.ExprAssign(src2, m2_expr.ExprCompose(c_d[:8], c_r[:8]))) - else: - e.append(m2_expr.ExprAssign(s1, c_r[:size])) - e.append(m2_expr.ExprAssign(s2, c_d[:size])) - - loc_div, loc_div_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) - loc_except, loc_except_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) - loc_next = ir.get_next_loc_key(instr) - loc_next_expr = m2_expr.ExprLoc(loc_next, ir.IRDst.size) - - do_div = [] - do_div += e - do_div.append(m2_expr.ExprAssign(ir.IRDst, loc_next_expr)) - blk_div = IRBlock(loc_div, [AssignBlock(do_div, instr)]) - - do_except = [] - do_except.append(m2_expr.ExprAssign(exception_flags, m2_expr.ExprInt( - EXCEPT_DIV_BY_ZERO, exception_flags.size))) - do_except.append(m2_expr.ExprAssign(ir.IRDst, loc_next_expr)) - blk_except = IRBlock(loc_except, [AssignBlock(do_except, instr)]) - - e = [] - e.append(m2_expr.ExprAssign(ir.IRDst, - m2_expr.ExprCond(src1, loc_div_expr, loc_except_expr))) - - return e, [blk_div, blk_except] - - -# XXX size to do; eflag - -def idiv(ir, instr, src1): - e = [] - size = src1.size - - if size == 8: - src2 = mRAX[instr.mode][:16] - elif size in [16, 32, 64]: - s1, s2 = mRDX[size], mRAX[size] - src2 = m2_expr.ExprCompose(s2, s1) - else: - raise ValueError('div arg not impl', src1) - - c_d = m2_expr.ExprOp('sdiv', src2, src1.signExtend(src2.size)) - c_r = m2_expr.ExprOp('smod', src2, src1.signExtend(src2.size)) - - # if 8 bit div, only ax is assigned - if size == 8: - e.append(m2_expr.ExprAssign(src2, m2_expr.ExprCompose(c_d[:8], c_r[:8]))) - else: - e.append(m2_expr.ExprAssign(s1, c_r[:size])) - e.append(m2_expr.ExprAssign(s2, c_d[:size])) - - loc_div, loc_div_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) - loc_except, loc_except_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) - loc_next = ir.get_next_loc_key(instr) - loc_next_expr = m2_expr.ExprLoc(loc_next, ir.IRDst.size) - - do_div = [] - do_div += e - do_div.append(m2_expr.ExprAssign(ir.IRDst, loc_next_expr)) - blk_div = IRBlock(loc_div, [AssignBlock(do_div, instr)]) - - do_except = [] - do_except.append(m2_expr.ExprAssign(exception_flags, m2_expr.ExprInt( - EXCEPT_DIV_BY_ZERO, exception_flags.size))) - do_except.append(m2_expr.ExprAssign(ir.IRDst, loc_next_expr)) - blk_except = IRBlock(loc_except, [AssignBlock(do_except, instr)]) - - e = [] - e.append(m2_expr.ExprAssign(ir.IRDst, - m2_expr.ExprCond(src1, loc_div_expr, loc_except_expr))) - - return e, [blk_div, blk_except] - - -# XXX size to do; eflag - - -def mul(_, instr, src1): - e = [] - size = src1.size - if src1.size in [16, 32, 64]: - result = m2_expr.ExprOp('*', - mRAX[size].zeroExtend(size * 2), - src1.zeroExtend(size * 2)) - e.append(m2_expr.ExprAssign(mRAX[size], result[:size])) - e.append(m2_expr.ExprAssign(mRDX[size], result[size:size * 2])) - - elif src1.size == 8: - result = m2_expr.ExprOp('*', - mRAX[instr.mode][:8].zeroExtend(16), - src1.zeroExtend(16)) - e.append(m2_expr.ExprAssign(mRAX[instr.mode][:16], result)) - else: - raise ValueError('unknow size') - - e.append(m2_expr.ExprAssign(of, m2_expr.ExprCond(result[size:size * 2], - m2_expr.ExprInt(1, 1), - m2_expr.ExprInt(0, 1)))) - e.append(m2_expr.ExprAssign(cf, m2_expr.ExprCond(result[size:size * 2], - m2_expr.ExprInt(1, 1), - m2_expr.ExprInt(0, 1)))) - - return e, [] - - -def imul(_, instr, src1, src2=None, src3=None): - e = [] - size = src1.size - if src2 is None: - if size in [16, 32, 64]: - result = m2_expr.ExprOp('*', - mRAX[size].signExtend(size * 2), - src1.signExtend(size * 2)) - e.append(m2_expr.ExprAssign(mRAX[size], result[:size])) - e.append(m2_expr.ExprAssign(mRDX[size], result[size:size * 2])) - elif size == 8: - dst = mRAX[instr.mode][:16] - result = m2_expr.ExprOp('*', - mRAX[instr.mode][:8].signExtend(16), - src1.signExtend(16)) - - e.append(m2_expr.ExprAssign(dst, result)) - value = m2_expr.ExprCond(result - result[:size].signExtend(size * 2), - m2_expr.ExprInt(1, 1), - m2_expr.ExprInt(0, 1)) - e.append(m2_expr.ExprAssign(cf, value)) - value = m2_expr.ExprCond(result - result[:size].signExtend(size * 2), - m2_expr.ExprInt(1, 1), - m2_expr.ExprInt(0, 1)) - e.append(m2_expr.ExprAssign(of, value)) - - else: - if src3 is None: - src3 = src2 - src2 = src1 - result = m2_expr.ExprOp('*', - src2.signExtend(size * 2), - src3.signExtend(size * 2)) - e.append(m2_expr.ExprAssign(src1, result[:size])) - - value = m2_expr.ExprCond(result - result[:size].signExtend(size * 2), - m2_expr.ExprInt(1, 1), - m2_expr.ExprInt(0, 1)) - e.append(m2_expr.ExprAssign(cf, value)) - value = m2_expr.ExprCond(result - result[:size].signExtend(size * 2), - m2_expr.ExprInt(1, 1), - m2_expr.ExprInt(0, 1)) - e.append(m2_expr.ExprAssign(of, value)) - return e, [] - - -def cbw(_, instr): - # Only in 16 bit - e = [] - tempAL = mRAX[instr.v_opmode()][:8] - tempAX = mRAX[instr.v_opmode()][:16] - e.append(m2_expr.ExprAssign(tempAX, tempAL.signExtend(16))) - return e, [] - - -def cwde(_, instr): - # Only in 32/64 bit - e = [] - tempAX = mRAX[instr.v_opmode()][:16] - tempEAX = mRAX[instr.v_opmode()][:32] - e.append(m2_expr.ExprAssign(tempEAX, tempAX.signExtend(32))) - return e, [] - - -def cdqe(_, instr): - # Only in 64 bit - e = [] - tempEAX = mRAX[instr.mode][:32] - tempRAX = mRAX[instr.mode][:64] - e.append(m2_expr.ExprAssign(tempRAX, tempEAX.signExtend(64))) - return e, [] - - -def cwd(_, instr): - # Only in 16 bit - e = [] - tempAX = mRAX[instr.mode][:16] - tempDX = mRDX[instr.mode][:16] - result = tempAX.signExtend(32) - e.append(m2_expr.ExprAssign(tempAX, result[:16])) - e.append(m2_expr.ExprAssign(tempDX, result[16:32])) - return e, [] - - -def cdq(_, instr): - # Only in 32/64 bit - e = [] - tempEAX = mRAX[instr.v_opmode()] - tempEDX = mRDX[instr.v_opmode()] - result = tempEAX.signExtend(64) - e.append(m2_expr.ExprAssign(tempEDX, result[32:64])) - return e, [] - - -def cqo(_, instr): - # Only in 64 bit - e = [] - tempRAX = mRAX[instr.mode][:64] - tempRDX = mRDX[instr.mode][:64] - result = tempRAX.signExtend(128) - e.append(m2_expr.ExprAssign(tempRAX, result[:64])) - e.append(m2_expr.ExprAssign(tempRDX, result[64:128])) - return e, [] - - -def stos(ir, instr, size): - loc_df_0, loc_df_0_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) - loc_df_1, loc_df_1_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) - loc_next_expr = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) - - addr_o = mRDI[instr.mode][:instr.v_admode()] - addr = addr_o - addr_p = addr + m2_expr.ExprInt(size // 8, addr.size) - addr_m = addr - m2_expr.ExprInt(size // 8, addr.size) - if ir.do_str_segm: - mss = ES - if instr.additional_info.g2.value: - raise NotImplementedError("add segm support") - addr = ir.gen_segm_expr(mss, addr) - - - b = mRAX[instr.mode][:size] - - e0 = [] - e0.append(m2_expr.ExprAssign(addr_o, addr_p)) - e0.append(m2_expr.ExprAssign(ir.IRDst, loc_next_expr)) - e0 = IRBlock(loc_df_0, [AssignBlock(e0, instr)]) - - e1 = [] - e1.append(m2_expr.ExprAssign(addr_o, addr_m)) - e1.append(m2_expr.ExprAssign(ir.IRDst, loc_next_expr)) - e1 = IRBlock(loc_df_1, [AssignBlock(e1, instr)]) - - e = [] - e.append(m2_expr.ExprAssign(ir.ExprMem(addr, size), b)) - e.append(m2_expr.ExprAssign(ir.IRDst, - m2_expr.ExprCond(df, loc_df_1_expr, loc_df_0_expr))) - return e, [e0, e1] - - -def lods(ir, instr, size): - loc_df_0, loc_df_0_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) - loc_df_1, loc_df_1_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) - loc_next_expr = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) - e = [] - - addr_o = mRSI[instr.mode][:instr.v_admode()] - addr = addr_o - addr_p = addr + m2_expr.ExprInt(size // 8, addr.size) - addr_m = addr - m2_expr.ExprInt(size // 8, addr.size) - if ir.do_str_segm: - mss = DS - if instr.additional_info.g2.value: - raise NotImplementedError("add segm support") - addr = ir.gen_segm_expr(mss, addr) - - - b = mRAX[instr.mode][:size] - - e0 = [] - e0.append(m2_expr.ExprAssign(addr_o, addr_p)) - e0.append(m2_expr.ExprAssign(ir.IRDst, loc_next_expr)) - e0 = IRBlock(loc_df_0, [AssignBlock(e0, instr)]) - - e1 = [] - e1.append(m2_expr.ExprAssign(addr_o, addr_m)) - e1.append(m2_expr.ExprAssign(ir.IRDst, loc_next_expr)) - e1 = IRBlock(loc_df_1, [AssignBlock(e1, instr)]) - - e = [] - if instr.mode == 64 and b.size == 32: - e.append(m2_expr.ExprAssign(mRAX[instr.mode], - ir.ExprMem(addr, size).zeroExtend(64))) - else: - e.append(m2_expr.ExprAssign(b, ir.ExprMem(addr, size))) - - e.append(m2_expr.ExprAssign(ir.IRDst, - m2_expr.ExprCond(df, loc_df_1_expr, loc_df_0_expr))) - return e, [e0, e1] - - -def movs(ir, instr, size): - loc_df_0, loc_df_0_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) - loc_df_1, loc_df_1_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) - loc_next_expr = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) - - dst = mRDI[instr.mode][:instr.v_admode()] - src = mRSI[instr.mode][:instr.v_admode()] - - e = [] - if ir.do_str_segm: - if instr.additional_info.g2.value: - raise NotImplementedError("add segm support") - src_sgm = ir.gen_segm_expr(DS, src) - dst_sgm = ir.gen_segm_expr(ES, dst) - - else: - src_sgm = src - dst_sgm = dst - - offset = m2_expr.ExprInt(size // 8, src.size) - - e.append(m2_expr.ExprAssign(ir.ExprMem(dst_sgm, size), - ir.ExprMem(src_sgm, size))) - - e0 = [] - e0.append(m2_expr.ExprAssign(src, src + offset)) - e0.append(m2_expr.ExprAssign(dst, dst + offset)) - e0.append(m2_expr.ExprAssign(ir.IRDst, loc_next_expr)) - e0 = IRBlock(loc_df_0, [AssignBlock(e0, instr)]) - - e1 = [] - e1.append(m2_expr.ExprAssign(src, src - offset)) - e1.append(m2_expr.ExprAssign(dst, dst - offset)) - e1.append(m2_expr.ExprAssign(ir.IRDst, loc_next_expr)) - e1 = IRBlock(loc_df_1, [AssignBlock(e1, instr)]) - - e.append(m2_expr.ExprAssign(ir.IRDst, - m2_expr.ExprCond(df, loc_df_1_expr, loc_df_0_expr))) - return e, [e0, e1] - - -def movsd(_, instr, dst, src): - # 64 bits access - if dst.is_id() and src.is_id(): - src = src[:64] - dst = dst[:64] - elif dst.is_mem() and src.is_id(): - dst = m2_expr.ExprMem(dst.ptr, 64) - src = src[:64] - else: - src = m2_expr.ExprMem(src.ptr, 64) - # Erase dst high bits - src = src.zeroExtend(dst.size) - return [m2_expr.ExprAssign(dst, src)], [] - - -def movsd_dispatch(ir, instr, dst=None, src=None): - if dst is None and src is None: - return movs(ir, instr, 32) - else: - return movsd(ir, instr, dst, src) - - -def float_prev(flt, popcount=1): - if not flt in float_list: - return None - i = float_list.index(flt) - if i < popcount: - # Drop value (ex: FSTP ST(0)) - return None - flt = float_list[i - popcount] - return flt - - -def float_pop(avoid_flt=None, popcount=1): - """ - Generate floatpop semantic (@popcount times), avoiding the avoid_flt@ float - @avoid_flt: float avoided in the generated semantic - @popcount: pop count - """ - avoid_flt = float_prev(avoid_flt, popcount) - e = [] - for i in range(8 - popcount): - if avoid_flt != float_list[i]: - e.append(m2_expr.ExprAssign(float_list[i], - float_list[i + popcount])) - fill_value = m2_expr.ExprOp("sint_to_fp", m2_expr.ExprInt(0, 64)) - for i in range(8 - popcount, 8): - e.append(m2_expr.ExprAssign(float_list[i], - fill_value)) - e.append( - m2_expr.ExprAssign(float_stack_ptr, - float_stack_ptr - m2_expr.ExprInt(popcount, 3))) - return e - -# XXX TODO - - -def fcom(_, instr, dst=None, src=None): - - if dst is None and src is None: - dst, src = float_st0, float_st1 - elif src is None: - src = mem2double(instr, dst) - dst = float_st0 - - e = [] - - e.append(m2_expr.ExprAssign(float_c0, m2_expr.ExprOp('fcom_c0', dst, src))) - e.append(m2_expr.ExprAssign(float_c1, m2_expr.ExprOp('fcom_c1', dst, src))) - e.append(m2_expr.ExprAssign(float_c2, m2_expr.ExprOp('fcom_c2', dst, src))) - e.append(m2_expr.ExprAssign(float_c3, m2_expr.ExprOp('fcom_c3', dst, src))) - - e += set_float_cs_eip(instr) - return e, [] - - -def ftst(_, instr): - dst = float_st0 - - e = [] - src = m2_expr.ExprOp('sint_to_fp', m2_expr.ExprInt(0, 64)) - e.append(m2_expr.ExprAssign(float_c0, m2_expr.ExprOp('fcom_c0', dst, src))) - e.append(m2_expr.ExprAssign(float_c1, m2_expr.ExprOp('fcom_c1', dst, src))) - e.append(m2_expr.ExprAssign(float_c2, m2_expr.ExprOp('fcom_c2', dst, src))) - e.append(m2_expr.ExprAssign(float_c3, m2_expr.ExprOp('fcom_c3', dst, src))) - - e += set_float_cs_eip(instr) - return e, [] - - -def fxam(ir, instr): - """ - NaN: - C3, C2, C0 = 001; - Normal: - C3, C2, C0 = 010; - Infinity: - C3, C2, C0 = 011; - Zero: - C3, C2, C0 = 100; - Empty: - C3, C2, C0 = 101; - Denormal: - C3, C2, C0 = 110; - - C1 = sign bit of ST; (* 0 for positive, 1 for negative *) - """ - dst = float_st0 - - # Empty not handled - locs = {} - for name in ["NaN", "Normal", "Infinity", "Zero", "Denormal"]: - locs[name] = ir.gen_loc_key_and_expr(ir.IRDst.size) - loc_next = ir.get_next_loc_key(instr) - loc_next_expr = m2_expr.ExprLoc(loc_next, ir.IRDst.size) - - # if Denormal: - # if zero: - # do_zero - # else: - # do_denormal - # else: - # if Nan: - # do_nan - # else: - # if infinity: - # do_infinity - # else: - # do_normal - - irdst = m2_expr.ExprCond( - m2_expr.expr_is_IEEE754_denormal(dst), - m2_expr.ExprCond(m2_expr.expr_is_IEEE754_zero(dst), - locs["Zero"][1], - locs["Denormal"][1], - ), - m2_expr.ExprCond(m2_expr.expr_is_NaN(dst), - locs["NaN"][1], - m2_expr.ExprCond(m2_expr.expr_is_infinite(dst), - locs["Infinity"][1], - locs["Normal"][1], - ) - ) - ) - base = [m2_expr.ExprAssign(ir.IRDst, irdst), - m2_expr.ExprAssign(float_c1, dst.msb()) - ] - base += set_float_cs_eip(instr) - - out = [ - IRBlock(locs["Zero"][0], [AssignBlock({ - float_c0: m2_expr.ExprInt(0, float_c0.size), - float_c2: m2_expr.ExprInt(0, float_c2.size), - float_c3: m2_expr.ExprInt(1, float_c3.size), - ir.IRDst: loc_next_expr, - }, instr)]), - IRBlock(locs["Denormal"][0], [AssignBlock({ - float_c0: m2_expr.ExprInt(0, float_c0.size), - float_c2: m2_expr.ExprInt(1, float_c2.size), - float_c3: m2_expr.ExprInt(1, float_c3.size), - ir.IRDst: loc_next_expr, - }, instr)]), - IRBlock(locs["NaN"][0], [AssignBlock({ - float_c0: m2_expr.ExprInt(1, float_c0.size), - float_c2: m2_expr.ExprInt(0, float_c2.size), - float_c3: m2_expr.ExprInt(0, float_c3.size), - ir.IRDst: loc_next_expr, - }, instr)]), - IRBlock(locs["Infinity"][0], [AssignBlock({ - float_c0: m2_expr.ExprInt(1, float_c0.size), - float_c2: m2_expr.ExprInt(1, float_c2.size), - float_c3: m2_expr.ExprInt(0, float_c3.size), - ir.IRDst: loc_next_expr, - }, instr)]), - IRBlock(locs["Normal"][0], [AssignBlock({ - float_c0: m2_expr.ExprInt(0, float_c0.size), - float_c2: m2_expr.ExprInt(1, float_c2.size), - float_c3: m2_expr.ExprInt(0, float_c3.size), - ir.IRDst: loc_next_expr, - }, instr)]), - ] - return base, out - - -def ficom(_, instr, dst, src=None): - - dst, src = float_implicit_st0(dst, src) - - e = [] - - e.append(m2_expr.ExprAssign(float_c0, - m2_expr.ExprOp('fcom_c0', dst, - src.zeroExtend(dst.size)))) - e.append(m2_expr.ExprAssign(float_c1, - m2_expr.ExprOp('fcom_c1', dst, - src.zeroExtend(dst.size)))) - e.append(m2_expr.ExprAssign(float_c2, - m2_expr.ExprOp('fcom_c2', dst, - src.zeroExtend(dst.size)))) - e.append(m2_expr.ExprAssign(float_c3, - m2_expr.ExprOp('fcom_c3', dst, - src.zeroExtend(dst.size)))) - - e += set_float_cs_eip(instr) - return e, [] - - -def fcomi(_, instr, dst=None, src=None): - # TODO unordered float - if dst is None and src is None: - dst, src = float_st0, float_st1 - elif src is None: - src = dst - dst = float_st0 - - e = [] - - e.append(m2_expr.ExprAssign(cf, m2_expr.ExprOp('fcom_c0', dst, src))) - e.append(m2_expr.ExprAssign(pf, m2_expr.ExprOp('fcom_c2', dst, src))) - e.append(m2_expr.ExprAssign(zf, m2_expr.ExprOp('fcom_c3', dst, src))) - - e.append(m2_expr.ExprAssign(of, m2_expr.ExprInt(0, 1))) - e.append(m2_expr.ExprAssign(nf, m2_expr.ExprInt(0, 1))) - e.append(m2_expr.ExprAssign(af, m2_expr.ExprInt(0, 1))) - - e += set_float_cs_eip(instr) - return e, [] - - -def fcomip(ir, instr, dst=None, src=None): - e, extra = fcomi(ir, instr, dst, src) - e += float_pop() - e += set_float_cs_eip(instr) - return e, extra - - -def fucomi(ir, instr, dst=None, src=None): - # TODO unordered float - return fcomi(ir, instr, dst, src) - - -def fucomip(ir, instr, dst=None, src=None): - # TODO unordered float - return fcomip(ir, instr, dst, src) - - -def fcomp(ir, instr, dst=None, src=None): - e, extra = fcom(ir, instr, dst, src) - e += float_pop() - e += set_float_cs_eip(instr) - return e, extra - - -def fcompp(ir, instr, dst=None, src=None): - e, extra = fcom(ir, instr, dst, src) - e += float_pop(popcount=2) - e += set_float_cs_eip(instr) - return e, extra - - -def ficomp(ir, instr, dst, src=None): - e, extra = ficom(ir, instr, dst, src) - e += float_pop() - e += set_float_cs_eip(instr) - return e, extra - - -def fucom(ir, instr, dst=None, src=None): - # TODO unordered float - return fcom(ir, instr, dst, src) - - -def fucomp(ir, instr, dst=None, src=None): - # TODO unordered float - return fcomp(ir, instr, dst, src) - - -def fucompp(ir, instr, dst=None, src=None): - # TODO unordered float - return fcompp(ir, instr, dst, src) - - -def comiss(_, instr, dst, src): - # TODO unordered float - - e = [] - - dst = m2_expr.ExprOp('sint_to_fp', dst[:32]) - src = m2_expr.ExprOp('sint_to_fp', src[:32]) - - e.append(m2_expr.ExprAssign(cf, m2_expr.ExprOp('fcom_c0', dst, src))) - e.append(m2_expr.ExprAssign(pf, m2_expr.ExprOp('fcom_c2', dst, src))) - e.append(m2_expr.ExprAssign(zf, m2_expr.ExprOp('fcom_c3', dst, src))) - - e.append(m2_expr.ExprAssign(of, m2_expr.ExprInt(0, 1))) - e.append(m2_expr.ExprAssign(nf, m2_expr.ExprInt(0, 1))) - e.append(m2_expr.ExprAssign(af, m2_expr.ExprInt(0, 1))) - - e += set_float_cs_eip(instr) - return e, [] - - -def comisd(_, instr, dst, src): - # TODO unordered float - - e = [] - - dst = m2_expr.ExprOp('sint_to_fp', dst[:64]) - src = m2_expr.ExprOp('sint_to_fp', src[:64]) - - e.append(m2_expr.ExprAssign(cf, m2_expr.ExprOp('fcom_c0', dst, src))) - e.append(m2_expr.ExprAssign(pf, m2_expr.ExprOp('fcom_c2', dst, src))) - e.append(m2_expr.ExprAssign(zf, m2_expr.ExprOp('fcom_c3', dst, src))) - - e.append(m2_expr.ExprAssign(of, m2_expr.ExprInt(0, 1))) - e.append(m2_expr.ExprAssign(nf, m2_expr.ExprInt(0, 1))) - e.append(m2_expr.ExprAssign(af, m2_expr.ExprInt(0, 1))) - - e += set_float_cs_eip(instr) - return e, [] - - -def fld(_, instr, src): - - if src.size == 32: - src = m2_expr.ExprOp("fpconvert_fp64", src) - if isinstance(src, m2_expr.ExprMem) and src.size > 64: - raise NotImplementedError('convert from 80bits') - - e = [] - e.append(m2_expr.ExprAssign(float_st7, float_st6)) - e.append(m2_expr.ExprAssign(float_st6, float_st5)) - e.append(m2_expr.ExprAssign(float_st5, float_st4)) - e.append(m2_expr.ExprAssign(float_st4, float_st3)) - e.append(m2_expr.ExprAssign(float_st3, float_st2)) - e.append(m2_expr.ExprAssign(float_st2, float_st1)) - e.append(m2_expr.ExprAssign(float_st1, float_st0)) - e.append(m2_expr.ExprAssign(float_st0, src)) - e.append( - m2_expr.ExprAssign(float_stack_ptr, - float_stack_ptr + m2_expr.ExprInt(1, 3))) - - e += set_float_cs_eip(instr) - return e, [] - - -def fst(_, instr, dst): - e = [] - - if isinstance(dst, m2_expr.ExprMem) and dst.size > 64: - raise NotImplementedError('convert to 80bits') - src = float_st0 - - if dst.size == 32: - src = m2_expr.ExprOp("fpconvert_fp32", src) - e.append(m2_expr.ExprAssign(dst, src)) - e += set_float_cs_eip(instr) - return e, [] - - -def fstp(ir, instr, dst): - e = [] - - if isinstance(dst, m2_expr.ExprMem) and dst.size > 64: - raise NotImplementedError('convert to 80bits') - - if isinstance(dst, m2_expr.ExprMem): - src = float_st0 - if dst.size == 32: - src = m2_expr.ExprOp("fpconvert_fp32", src) - e.append(m2_expr.ExprAssign(dst, src)) - else: - src = float_st0 - if float_list.index(dst) > 1: - # a = st0 -> st0 is dropped - # a = st1 -> st0 = st0, useless - e.append(m2_expr.ExprAssign(float_prev(dst), src)) - - e += set_float_cs_eip(instr) - e += float_pop(dst) - return e, [] - - -def fist(_, instr, dst): - e = [] - e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('fp_to_sint%d' % dst.size, - float_st0))) - - e += set_float_cs_eip(instr) - return e, [] - - -def fistp(ir, instr, dst): - e, extra = fist(ir, instr, dst) - e += float_pop(dst) - return e, extra - - -def fisttp(_, instr, dst): - e = [] - e.append(m2_expr.ExprAssign( - dst, - m2_expr.ExprOp('fp_to_sint%d' % dst.size, - m2_expr.ExprOp('fpround_towardszero', float_st0) - ))) - - e += set_float_cs_eip(instr) - e += float_pop(dst) - return e, [] - - -def fild(ir, instr, src): - # XXXXX - src = m2_expr.ExprOp('sint_to_fp', src.signExtend(64)) - e = [] - e += set_float_cs_eip(instr) - e_fld, extra = fld(ir, instr, src) - e += e_fld - return e, extra - - -def fldz(ir, instr): - return fld(ir, instr, m2_expr.ExprOp('sint_to_fp', m2_expr.ExprInt(0, 64))) - - -def fld1(ir, instr): - return fld(ir, instr, m2_expr.ExprOp('sint_to_fp', m2_expr.ExprInt(1, 64))) - - -def fldl2t(ir, instr): - value_f = math.log(10) / math.log(2) - value = struct.unpack('Q', struct.pack('d', value_f))[0] - return fld(ir, instr, m2_expr.ExprOp( - 'sint_to_fp', - m2_expr.ExprInt(value, 64) - )) - - -def fldpi(ir, instr): - value_f = math.pi - value = struct.unpack('Q', struct.pack('d', value_f))[0] - return fld(ir, instr, m2_expr.ExprOp( - 'sint_to_fp', - m2_expr.ExprInt(value, 64) - )) - - -def fldln2(ir, instr): - value_f = math.log(2) - value = struct.unpack('Q', struct.pack('d', value_f))[0] - return fld(ir, instr, m2_expr.ExprOp('mem_64_to_double', - m2_expr.ExprInt(value, 64))) - - -def fldl2e(ir, instr): - x = struct.pack('d', 1 / math.log(2)) - x = struct.unpack('Q', x)[0] - return fld(ir, instr, m2_expr.ExprOp('mem_64_to_double', - m2_expr.ExprInt(x, 64))) - - -def fldlg2(ir, instr): - x = struct.pack('d', math.log10(2)) - x = struct.unpack('Q', x)[0] - return fld(ir, instr, m2_expr.ExprOp('mem_64_to_double', - m2_expr.ExprInt(x, 64))) - - -def fadd(_, instr, dst, src=None): - dst, src = float_implicit_st0(dst, src) - e = [] - src = mem2double(instr, src) - e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('fadd', dst, src))) - - e += set_float_cs_eip(instr) - return e, [] - - -def fiadd(_, instr, dst, src=None): - dst, src = float_implicit_st0(dst, src) - e = [] - src = mem2double(instr, src) - e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('fiadd', dst, src))) - e += set_float_cs_eip(instr) - return e, [] - - -def fisub(_, instr, dst, src=None): - dst, src = float_implicit_st0(dst, src) - e = [] - src = mem2double(instr, src) - e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('fisub', dst, src))) - e += set_float_cs_eip(instr) - return e, [] - - -def fisubr(_, instr, dst, src=None): - dst, src = float_implicit_st0(dst, src) - e = [] - src = mem2double(instr, src) - e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('fisub', src, dst))) - e += set_float_cs_eip(instr) - return e, [] - - -def fpatan(_, instr): - e = [] - a = float_st1 - e.append(m2_expr.ExprAssign(float_prev(a), - m2_expr.ExprOp('fpatan', float_st0, float_st1))) - e += set_float_cs_eip(instr) - e += float_pop(a) - return e, [] - - -def fprem(_, instr): - e = [] - e.append( - m2_expr.ExprAssign(float_st0, m2_expr.ExprOp('fprem', float_st0, float_st1))) - # Remaining bits (ex: used in argument reduction in tan) - quotient = m2_expr.ExprOp('fp_to_sint32', m2_expr.ExprOp('fpround_towardszero', m2_expr.ExprOp('fdiv', float_st0, float_st1))) - e += [m2_expr.ExprAssign(float_c0, quotient[2:3]), - m2_expr.ExprAssign(float_c3, quotient[1:2]), - m2_expr.ExprAssign(float_c1, quotient[0:1]), - # Consider the reduction is always completed - m2_expr.ExprAssign(float_c2, m2_expr.ExprInt(0, 1)), - ] - e += set_float_cs_eip(instr) - return e, [] - - -def fprem1(_, instr): - e = [] - e.append( - m2_expr.ExprAssign(float_st0, m2_expr.ExprOp('fprem1', float_st0, float_st1))) - e += set_float_cs_eip(instr) - return e, [] - - -def faddp(_, instr, dst, src=None): - dst, src = float_implicit_st0(dst, src) - e = [] - src = mem2double(instr, src) - e.append(m2_expr.ExprAssign(float_prev(dst), m2_expr.ExprOp('fadd', dst, src))) - e += set_float_cs_eip(instr) - e += float_pop(dst) - return e, [] - - -def fninit(_, instr): - e = [] - e += set_float_cs_eip(instr) - return e, [] - - -def fyl2x(_, instr): - e = [] - a = float_st1 - e.append( - m2_expr.ExprAssign(float_prev(a), m2_expr.ExprOp('fyl2x', float_st0, float_st1))) - e += set_float_cs_eip(instr) - e += float_pop(a) - return e, [] - - -def fnstenv(ir, instr, dst): - e = [] - # XXX TODO tag word, ... - status_word = m2_expr.ExprCompose(m2_expr.ExprInt(0, 8), - float_c0, float_c1, float_c2, - float_stack_ptr, float_c3, - m2_expr.ExprInt(0, 1)) - - s = instr.mode - # The behaviour in 64bit is identical to 32 bit - # This will truncate addresses - size = min(32, s) - ad = ir.ExprMem(dst.ptr, size=16) - e.append(m2_expr.ExprAssign(ad, float_control)) - ad = ir.ExprMem( - dst.ptr + m2_expr.ExprInt( - size // (8 * 1), - dst.ptr.size - ), - size=16 - ) - e.append(m2_expr.ExprAssign(ad, status_word)) - ad = ir.ExprMem( - dst.ptr + m2_expr.ExprInt( - size // (8 * 3), - dst.ptr.size - ), - size=size - ) - e.append(m2_expr.ExprAssign(ad, float_eip[:size])) - ad = ir.ExprMem( - dst.ptr + m2_expr.ExprInt( - size // (8 * 4), - dst.ptr.size - ), - size=16 - ) - e.append(m2_expr.ExprAssign(ad, float_cs)) - ad = ir.ExprMem( - dst.ptr + m2_expr.ExprInt( - size // (8 * 5), - dst.ptr.size - ), - size=size - ) - e.append(m2_expr.ExprAssign(ad, float_address[:size])) - ad = ir.ExprMem( - dst.ptr + m2_expr.ExprInt( - size // (8 * 6), - dst.ptr.size - ), - size=16 - ) - e.append(m2_expr.ExprAssign(ad, float_ds)) - return e, [] - - -def fldenv(ir, instr, src): - e = [] - # Inspired from fnstenv (same TODOs / issues) - - s = instr.mode - # The behaviour in 64bit is identical to 32 bit - # This will truncate addresses - size = min(32, s) - - # Float control - ad = ir.ExprMem(src.ptr, size=16) - e.append(m2_expr.ExprAssign(float_control, ad)) - - # Status word - ad = ir.ExprMem( - src.ptr + m2_expr.ExprInt( - size // (8 * 1), - size=src.ptr.size - ), - size=16 - ) - e += [ - m2_expr.ExprAssign(x, y) for x, y in ((float_c0, ad[8:9]), - (float_c1, ad[9:10]), - (float_c2, ad[10:11]), - (float_stack_ptr, ad[11:14]), - (float_c3, ad[14:15])) - ] - - # EIP, CS, Address, DS - for offset, target in ( - (3, float_eip[:size]), - (4, float_cs), - (5, float_address[:size]), - (6, float_ds) - ): - ad = ir.ExprMem( - src.ptr + m2_expr.ExprInt( - size // ( 8 * offset), - size=src.ptr.size - ), - size=target.size - ) - e.append(m2_expr.ExprAssign(target, ad)) - - return e, [] - - -def fsub(_, instr, dst, src=None): - dst, src = float_implicit_st0(dst, src) - e = [] - src = mem2double(instr, src) - e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('fsub', dst, src))) - e += set_float_cs_eip(instr) - return e, [] - - -def fsubp(_, instr, dst, src=None): - dst, src = float_implicit_st0(dst, src) - e = [] - src = mem2double(instr, src) - e.append(m2_expr.ExprAssign(float_prev(dst), m2_expr.ExprOp('fsub', dst, src))) - e += set_float_cs_eip(instr) - e += float_pop(dst) - return e, [] - - -def fsubr(_, instr, dst, src=None): - dst, src = float_implicit_st0(dst, src) - e = [] - src = mem2double(instr, src) - e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('fsub', src, dst))) - e += set_float_cs_eip(instr) - return e, [] - - -def fsubrp(_, instr, dst, src=None): - dst, src = float_implicit_st0(dst, src) - e = [] - src = mem2double(instr, src) - e.append(m2_expr.ExprAssign(float_prev(dst), m2_expr.ExprOp('fsub', src, dst))) - e += set_float_cs_eip(instr) - e += float_pop(dst) - return e, [] - - -def fmul(_, instr, dst, src=None): - dst, src = float_implicit_st0(dst, src) - e = [] - src = mem2double(instr, src) - e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('fmul', dst, src))) - e += set_float_cs_eip(instr) - return e, [] - - -def fimul(_, instr, dst, src=None): - dst, src = float_implicit_st0(dst, src) - e = [] - src = mem2double(instr, src) - e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('fimul', dst, src))) - e += set_float_cs_eip(instr) - return e, [] - - -def fdiv(_, instr, dst, src=None): - dst, src = float_implicit_st0(dst, src) - e = [] - src = mem2double(instr, src) - e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('fdiv', dst, src))) - e += set_float_cs_eip(instr) - return e, [] - - -def fdivr(_, instr, dst, src=None): - dst, src = float_implicit_st0(dst, src) - e = [] - src = mem2double(instr, src) - e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('fdiv', src, dst))) - e += set_float_cs_eip(instr) - return e, [] - - -def fdivrp(_, instr, dst, src=None): - dst, src = float_implicit_st0(dst, src) - e = [] - src = mem2double(instr, src) - e.append(m2_expr.ExprAssign(float_prev(dst), m2_expr.ExprOp('fdiv', src, dst))) - e += set_float_cs_eip(instr) - e += float_pop(dst) - return e, [] - - -def fidiv(_, instr, dst, src=None): - dst, src = float_implicit_st0(dst, src) - e = [] - src = mem2double(instr, src) - e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('fidiv', dst, src))) - e += set_float_cs_eip(instr) - return e, [] - - -def fidivr(_, instr, dst, src=None): - dst, src = float_implicit_st0(dst, src) - e = [] - src = mem2double(instr, src) - e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('fidiv', src, dst))) - e += set_float_cs_eip(instr) - return e, [] - - -def fdivp(_, instr, dst, src=None): - # Invalid emulation - dst, src = float_implicit_st0(dst, src) - e = [] - src = mem2double(instr, src) - e.append(m2_expr.ExprAssign(float_prev(dst), m2_expr.ExprOp('fdiv', dst, src))) - e += set_float_cs_eip(instr) - e += float_pop(dst) - return e, [] - - -def fmulp(_, instr, dst, src=None): - # Invalid emulation - dst, src = float_implicit_st0(dst, src) - e = [] - src = mem2double(instr, src) - e.append(m2_expr.ExprAssign(float_prev(dst), m2_expr.ExprOp('fmul', dst, src))) - e += set_float_cs_eip(instr) - e += float_pop(dst) - return e, [] - - -def ftan(_, instr, src): - e = [] - src = mem2double(instr, src) - e.append(m2_expr.ExprAssign(float_st0, m2_expr.ExprOp('ftan', src))) - e += set_float_cs_eip(instr) - return e, [] - - -def fxch(_, instr, src): - e = [] - src = mem2double(instr, src) - e.append(m2_expr.ExprAssign(float_st0, src)) - e.append(m2_expr.ExprAssign(src, float_st0)) - e += set_float_cs_eip(instr) - return e, [] - - -def fptan(_, instr): - e = [] - e.append(m2_expr.ExprAssign(float_st7, float_st6)) - e.append(m2_expr.ExprAssign(float_st6, float_st5)) - e.append(m2_expr.ExprAssign(float_st5, float_st4)) - e.append(m2_expr.ExprAssign(float_st4, float_st3)) - e.append(m2_expr.ExprAssign(float_st3, float_st2)) - e.append(m2_expr.ExprAssign(float_st2, float_st1)) - e.append(m2_expr.ExprAssign(float_st1, m2_expr.ExprOp('ftan', float_st0))) - e.append( - m2_expr.ExprAssign( - float_st0, - m2_expr.ExprOp( - 'sint_to_fp', - m2_expr.ExprInt(1, 64) - ) - ) - ) - e.append( - m2_expr.ExprAssign(float_stack_ptr, - float_stack_ptr + m2_expr.ExprInt(1, 3))) - return e, [] - - -def frndint(_, instr): - e = [] - e.append(m2_expr.ExprAssign(float_st0, m2_expr.ExprOp('frndint', float_st0))) - e += set_float_cs_eip(instr) - return e, [] - - -def fsin(_, instr): - e = [] - e.append(m2_expr.ExprAssign(float_st0, m2_expr.ExprOp('fsin', float_st0))) - e += set_float_cs_eip(instr) - return e, [] - - -def fcos(_, instr): - e = [] - e.append(m2_expr.ExprAssign(float_st0, m2_expr.ExprOp('fcos', float_st0))) - e += set_float_cs_eip(instr) - return e, [] - - -def fsincos(_, instr): - e = [] - e.append(m2_expr.ExprAssign(float_st7, float_st6)) - e.append(m2_expr.ExprAssign(float_st6, float_st5)) - e.append(m2_expr.ExprAssign(float_st5, float_st4)) - e.append(m2_expr.ExprAssign(float_st4, float_st3)) - e.append(m2_expr.ExprAssign(float_st3, float_st2)) - e.append(m2_expr.ExprAssign(float_st2, float_st1)) - e.append(m2_expr.ExprAssign(float_st1, m2_expr.ExprOp('fsin', float_st0))) - e.append(m2_expr.ExprAssign(float_st0, m2_expr.ExprOp('fcos', float_st0))) - e.append( - m2_expr.ExprAssign(float_stack_ptr, - float_stack_ptr + m2_expr.ExprInt(1, 3))) - return e, [] - - -def fscale(_, instr): - e = [] - e.append(m2_expr.ExprAssign(float_st0, m2_expr.ExprOp('fscale', float_st0, - float_st1))) - e += set_float_cs_eip(instr) - return e, [] - - -def f2xm1(_, instr): - e = [] - e.append(m2_expr.ExprAssign(float_st0, m2_expr.ExprOp('f2xm1', float_st0))) - e += set_float_cs_eip(instr) - return e, [] - - -def fchs(_, instr): - e = [] - e.append(m2_expr.ExprAssign(float_st0, m2_expr.ExprOp('fchs', float_st0))) - e += set_float_cs_eip(instr) - return e, [] - - -def fsqrt(_, instr): - e = [] - e.append(m2_expr.ExprAssign(float_st0, m2_expr.ExprOp('fsqrt', float_st0))) - e += set_float_cs_eip(instr) - return e, [] - - -def fabs(_, instr): - e = [] - e.append(m2_expr.ExprAssign(float_st0, m2_expr.ExprOp('fabs', float_st0))) - e += set_float_cs_eip(instr) - return e, [] - - -def fnstsw(_, instr, dst): - args = [ - # Exceptions -> 0 - m2_expr.ExprInt(0, 8), - float_c0, - float_c1, - float_c2, - float_stack_ptr, - float_c3, - # B: FPU is not busy -> 0 - m2_expr.ExprInt(0, 1)] - e = [m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*args))] - return e, [] - - -def fnstcw(_, instr, dst): - e = [] - e.append(m2_expr.ExprAssign(dst, float_control)) - return e, [] - - -def fldcw(_, instr, src): - e = [] - e.append(m2_expr.ExprAssign(float_control, src)) - return e, [] - - -def fwait(_, instr): - return [], [] - - -def fcmovb(ir, instr, arg1, arg2): - return gen_fcmov(ir, instr, cf, arg1, arg2, True) - - -def fcmove(ir, instr, arg1, arg2): - return gen_fcmov(ir, instr, zf, arg1, arg2, True) - - -def fcmovbe(ir, instr, arg1, arg2): - return gen_fcmov(ir, instr, cf | zf, arg1, arg2, True) - - -def fcmovu(ir, instr, arg1, arg2): - return gen_fcmov(ir, instr, pf, arg1, arg2, True) - - -def fcmovnb(ir, instr, arg1, arg2): - return gen_fcmov(ir, instr, cf, arg1, arg2, False) - - -def fcmovne(ir, instr, arg1, arg2): - return gen_fcmov(ir, instr, zf, arg1, arg2, False) - - -def fcmovnbe(ir, instr, arg1, arg2): - return gen_fcmov(ir, instr, cf | zf, arg1, arg2, False) - - -def fcmovnu(ir, instr, arg1, arg2): - return gen_fcmov(ir, instr, pf, arg1, arg2, False) - - -def nop(_, instr, a=None): - return [], [] - - -def prefetch0(_, instr, src=None): - # see 4-198 on this documentation - # https://www-ssl.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf - return [], [] - - -def prefetch1(_, instr, src=None): - # see 4-198 on this documentation - # https://www-ssl.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf - return [], [] - - -def prefetch2(_, instr, src=None): - # see 4-198 on this documentation - # https://www-ssl.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf - return [], [] - - -def prefetchw(_, instr, src=None): - # see 4-201 on this documentation - # https://www-ssl.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf - return [], [] - -def prefetchnta(_, instr, src=None): - # see 4-201 on this documentation - # https://www-ssl.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf - return [], [] - - -def lfence(_, instr, src=None): - # see 3-485 on this documentation - # https://www-ssl.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf - return [], [] - - -def mfence(_, instr, src=None): - # see 3-516 on this documentation - # https://www-ssl.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf - return [], [] - - -def sfence(_, instr, src=None): - # see 3-356 on this documentation - # https://www-ssl.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf - return [], [] - - -def ud2(_, instr, src=None): - e = [m2_expr.ExprAssign(exception_flags, m2_expr.ExprInt( - EXCEPT_ILLEGAL_INSN, exception_flags.size))] - return e, [] - - -def hlt(_, instr): - e = [] - except_int = EXCEPT_PRIV_INSN - e.append(m2_expr.ExprAssign(exception_flags, m2_expr.ExprInt(except_int, 32))) - return e, [] - - -def rdtsc(_, instr): - e = [] - e.append(m2_expr.ExprAssign(tsc, tsc + m2_expr.ExprInt(1, 64))) - e.append(m2_expr.ExprAssign(mRAX[32], tsc[:32])) - e.append(m2_expr.ExprAssign(mRDX[32], tsc[32:])) - return e, [] - - -def daa(_, instr): - e = [] - r_al = mRAX[instr.mode][:8] - - cond1 = m2_expr.expr_is_unsigned_greater(r_al[:4], m2_expr.ExprInt(0x9, 4)) | af - e.append(m2_expr.ExprAssign(af, cond1)) - - cond2 = m2_expr.expr_is_unsigned_greater(m2_expr.ExprInt(6, 8), r_al) - cond3 = m2_expr.expr_is_unsigned_greater(r_al, m2_expr.ExprInt(0x99, 8)) | cf - - cf_c1 = m2_expr.ExprCond(cond1, - cf | (cond2), - m2_expr.ExprInt(0, 1)) - new_cf = m2_expr.ExprCond(cond3, - m2_expr.ExprInt(1, 1), - m2_expr.ExprInt(0, 1)) - e.append(m2_expr.ExprAssign(cf, new_cf)) - - al_c1 = m2_expr.ExprCond(cond1, - r_al + m2_expr.ExprInt(6, 8), - r_al) - - new_al = m2_expr.ExprCond(cond3, - al_c1 + m2_expr.ExprInt(0x60, 8), - al_c1) - e.append(m2_expr.ExprAssign(r_al, new_al)) - e += update_flag_znp(new_al) - return e, [] - - -def das(_, instr): - e = [] - r_al = mRAX[instr.mode][:8] - - cond1 = m2_expr.expr_is_unsigned_greater(r_al[:4], m2_expr.ExprInt(0x9, 4)) | af - e.append(m2_expr.ExprAssign(af, cond1)) - - cond2 = m2_expr.expr_is_unsigned_greater(m2_expr.ExprInt(6, 8), r_al) - cond3 = m2_expr.expr_is_unsigned_greater(r_al, m2_expr.ExprInt(0x99, 8)) | cf - - cf_c1 = m2_expr.ExprCond(cond1, - cf | (cond2), - m2_expr.ExprInt(0, 1)) - new_cf = m2_expr.ExprCond(cond3, - m2_expr.ExprInt(1, 1), - cf_c1) - e.append(m2_expr.ExprAssign(cf, new_cf)) - - al_c1 = m2_expr.ExprCond(cond1, - r_al - m2_expr.ExprInt(6, 8), - r_al) - - new_al = m2_expr.ExprCond(cond3, - al_c1 - m2_expr.ExprInt(0x60, 8), - al_c1) - e.append(m2_expr.ExprAssign(r_al, new_al)) - e += update_flag_znp(new_al) - return e, [] - - -def aam(ir, instr, src): - e = [] - assert src.is_int() - - value = int(src) - if value: - tempAL = mRAX[instr.mode][0:8] - newEAX = m2_expr.ExprCompose( - m2_expr.ExprOp("umod", tempAL, src), - m2_expr.ExprOp("udiv", tempAL, src), - mRAX[instr.mode][16:] - ) - e += [m2_expr.ExprAssign(mRAX[instr.mode], newEAX)] - e += update_flag_arith(newEAX) - e.append(m2_expr.ExprAssign(af, m2_expr.ExprInt(0, 1))) - else: - e.append( - m2_expr.ExprAssign( - exception_flags, - m2_expr.ExprInt(EXCEPT_DIV_BY_ZERO, exception_flags.size) - ) - ) - return e, [] - - -def aad(_, instr, src): - e = [] - tempAL = mRAX[instr.mode][0:8] - tempAH = mRAX[instr.mode][8:16] - newEAX = m2_expr.ExprCompose((tempAL + (tempAH * src)) & m2_expr.ExprInt(0xFF, 8), - m2_expr.ExprInt(0, 8), - mRAX[instr.mode][16:]) - e += [m2_expr.ExprAssign(mRAX[instr.mode], newEAX)] - e += update_flag_arith(newEAX) - e.append(m2_expr.ExprAssign(af, m2_expr.ExprInt(0, 1))) - return e, [] - - -def _tpl_aaa(_, instr, op): - """Templating for aaa, aas with operation @op - @op: operation to apply - """ - e = [] - r_al = mRAX[instr.mode][:8] - r_ah = mRAX[instr.mode][8:16] - r_ax = mRAX[instr.mode][:16] - i0 = m2_expr.ExprInt(0, 1) - i1 = m2_expr.ExprInt(1, 1) - # cond: if (al & 0xf) > 9 OR af == 1 - cond = (r_al & m2_expr.ExprInt(0xf, 8)) - m2_expr.ExprInt(9, 8) - cond = ~cond.msb() & m2_expr.ExprCond(cond, i1, i0) - cond |= af & i1 - - to_add = m2_expr.ExprInt(0x106, size=r_ax.size) - if op == "-": - # Avoid ExprOp("-", A, B), should be ExprOp("+", A, ExprOp("-", B)) - first_part = r_ax - to_add - else: - first_part = m2_expr.ExprOp(op, r_ax, to_add) - new_ax = first_part & m2_expr.ExprInt(0xff0f, - size=r_ax.size) - # set AL - e.append(m2_expr.ExprAssign(r_ax, m2_expr.ExprCond(cond, new_ax, r_ax))) - e.append(m2_expr.ExprAssign(af, cond)) - e.append(m2_expr.ExprAssign(cf, cond)) - return e, [] - - -def aaa(ir, instr): - return _tpl_aaa(ir, instr, "+") - - -def aas(ir, instr): - return _tpl_aaa(ir, instr, "-") - - -def bsr_bsf(ir, instr, dst, src, op_func): - """ - IF SRC == 0 - ZF = 1 - DEST is left unchanged - ELSE - ZF = 0 - DEST = @op_func(SRC) - """ - loc_src_null, loc_src_null_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) - loc_src_not_null, loc_src_not_null_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) - loc_next = ir.get_next_loc_key(instr) - loc_next_expr = m2_expr.ExprLoc(loc_next, ir.IRDst.size) - - aff_dst = m2_expr.ExprAssign(ir.IRDst, loc_next_expr) - e = [m2_expr.ExprAssign(ir.IRDst, m2_expr.ExprCond(src, - loc_src_not_null_expr, - loc_src_null_expr))] - e_src_null = [] - e_src_null.append(m2_expr.ExprAssign(zf, m2_expr.ExprInt(1, zf.size))) - # XXX destination is undefined - e_src_null.append(aff_dst) - - e_src_not_null = [] - e_src_not_null.append(m2_expr.ExprAssign(zf, m2_expr.ExprInt(0, zf.size))) - e_src_not_null.append(m2_expr.ExprAssign(dst, op_func(src))) - e_src_not_null.append(aff_dst) - - return e, [IRBlock(loc_src_null, [AssignBlock(e_src_null, instr)]), - IRBlock(loc_src_not_null, [AssignBlock(e_src_not_null, instr)])] - - -def bsf(ir, instr, dst, src): - return bsr_bsf(ir, instr, dst, src, - lambda src: m2_expr.ExprOp("cnttrailzeros", src)) - - -def bsr(ir, instr, dst, src): - return bsr_bsf( - ir, instr, dst, src, - lambda src: m2_expr.ExprInt(src.size - 1, src.size) - m2_expr.ExprOp("cntleadzeros", src) - ) - - -def arpl(_, instr, dst, src): - e = [] - e.append(m2_expr.ExprAssign(exception_flags, m2_expr.ExprInt(1 << 7, 32))) - return e, [] - - -def ins(_, instr, size): - e = [] - e.append(m2_expr.ExprAssign(exception_flags, m2_expr.ExprInt(1 << 7, 32))) - return e, [] - - -def sidt(ir, instr, dst): - e = [] - if not isinstance(dst, m2_expr.ExprMem) or dst.size != 32: - raise ValueError('not exprmem 32bit instance!!') - ptr = dst.ptr - LOG_X86_SEM.warning("DEFAULT SIDT ADDRESS %s!!", dst) - e.append(m2_expr.ExprAssign(ir.ExprMem(ptr, 32), - m2_expr.ExprInt(0xe40007ff, 32))) - e.append( - m2_expr.ExprAssign(ir.ExprMem(ptr + m2_expr.ExprInt(4, ptr.size), 16), - m2_expr.ExprInt(0x8245, 16))) - return e, [] - - -def sldt(_, instr, dst): - LOG_X86_SEM.warning("DEFAULT SLDT ADDRESS %s!!", dst) - e = [m2_expr.ExprAssign(dst, m2_expr.ExprInt(0, dst.size))] - return e, [] - - -def cmovz(ir, instr, dst, src): - #return gen_cmov(ir, instr, zf, dst, src, True) - return gen_cmov(ir, instr, m2_expr.ExprOp("CC_EQ", zf), dst, src, True) - - -def cmovnz(ir, instr, dst, src): - #return gen_cmov(ir, instr, zf, dst, src, False) - return gen_cmov(ir, instr, m2_expr.ExprOp("CC_EQ", zf), dst, src, False) - - -def cmovpe(ir, instr, dst, src): - return gen_cmov(ir, instr, pf, dst, src, True) - - -def cmovnp(ir, instr, dst, src): - return gen_cmov(ir, instr, pf, dst, src, False) - - -def cmovge(ir, instr, dst, src): - #return gen_cmov(ir, instr, nf ^ of, dst, src, False) - return gen_cmov(ir, instr, m2_expr.ExprOp("CC_S>=", nf, of), dst, src, True) - - -def cmovg(ir, instr, dst, src): - #return gen_cmov(ir, instr, zf | (nf ^ of), dst, src, False) - return gen_cmov(ir, instr, m2_expr.ExprOp("CC_S>", nf, of, zf), dst, src, True) - - -def cmovl(ir, instr, dst, src): - #return gen_cmov(ir, instr, nf ^ of, dst, src, True) - return gen_cmov(ir, instr, m2_expr.ExprOp("CC_S<", nf, of), dst, src, True) - - -def cmovle(ir, instr, dst, src): - #return gen_cmov(ir, instr, zf | (nf ^ of), dst, src, True) - return gen_cmov(ir, instr, m2_expr.ExprOp("CC_S<=", nf, of, zf), dst, src, True) - - -def cmova(ir, instr, dst, src): - #return gen_cmov(ir, instr, cf | zf, dst, src, False) - return gen_cmov(ir, instr, m2_expr.ExprOp("CC_U>", cf, zf), dst, src, True) - - -def cmovae(ir, instr, dst, src): - #return gen_cmov(ir, instr, cf, dst, src, False) - return gen_cmov(ir, instr, m2_expr.ExprOp("CC_U>=", cf), dst, src, True) - - -def cmovbe(ir, instr, dst, src): - #return gen_cmov(ir, instr, cf | zf, dst, src, True) - return gen_cmov(ir, instr, m2_expr.ExprOp("CC_U<=", cf, zf), dst, src, True) - - -def cmovb(ir, instr, dst, src): - #return gen_cmov(ir, instr, cf, dst, src, True) - return gen_cmov(ir, instr, m2_expr.ExprOp("CC_U<", cf), dst, src, True) - - -def cmovo(ir, instr, dst, src): - return gen_cmov(ir, instr, of, dst, src, True) - - -def cmovno(ir, instr, dst, src): - return gen_cmov(ir, instr, of, dst, src, False) - - -def cmovs(ir, instr, dst, src): - #return gen_cmov(ir, instr, nf, dst, src, True) - return gen_cmov(ir, instr, m2_expr.ExprOp("CC_NEG", nf), dst, src, True) - - -def cmovns(ir, instr, dst, src): - #return gen_cmov(ir, instr, nf, dst, src, False) - return gen_cmov(ir, instr, m2_expr.ExprOp("CC_NEG", nf), dst, src, False) - - -def icebp(_, instr): - e = [] - e.append(m2_expr.ExprAssign(exception_flags, - m2_expr.ExprInt(EXCEPT_SOFT_BP, 32))) - return e, [] -# XXX - - -def l_int(_, instr, src): - e = [] - # XXX - if src.arg in [1, 3]: - except_int = EXCEPT_SOFT_BP - else: - except_int = EXCEPT_INT_XX - e.append(m2_expr.ExprAssign(exception_flags, - m2_expr.ExprInt(except_int, 32))) - e.append(m2_expr.ExprAssign(interrupt_num, src)) - return e, [] - - -def l_sysenter(_, instr): - e = [] - e.append(m2_expr.ExprAssign(exception_flags, - m2_expr.ExprInt(EXCEPT_PRIV_INSN, 32))) - return e, [] - - -def l_syscall(_, instr): - e = [] - e.append(m2_expr.ExprAssign(exception_flags, - m2_expr.ExprInt(EXCEPT_PRIV_INSN, 32))) - return e, [] - -# XXX - - -def l_out(_, instr, src1, src2): - e = [] - e.append(m2_expr.ExprAssign(exception_flags, - m2_expr.ExprInt(EXCEPT_PRIV_INSN, 32))) - return e, [] - -# XXX - - -def l_outs(_, instr, size): - e = [] - e.append(m2_expr.ExprAssign(exception_flags, - m2_expr.ExprInt(EXCEPT_PRIV_INSN, 32))) - return e, [] - -# XXX actually, xlat performs al = (ds:[e]bx + ZeroExtend(al)) - - -def xlat(ir, instr): - e = [] - ptr = mRAX[instr.mode][0:8].zeroExtend(mRBX[instr.mode].size) - src = ir.ExprMem(mRBX[instr.mode] + ptr, 8) - e.append(m2_expr.ExprAssign(mRAX[instr.mode][0:8], src)) - return e, [] - - -def cpuid(_, instr): - e = [] - e.append( - m2_expr.ExprAssign(mRAX[instr.mode], - m2_expr.ExprOp('x86_cpuid', mRAX[instr.mode], m2_expr.ExprInt(0, instr.mode)))) - e.append( - m2_expr.ExprAssign(mRBX[instr.mode], - m2_expr.ExprOp('x86_cpuid', mRAX[instr.mode], m2_expr.ExprInt(1, instr.mode)))) - e.append( - m2_expr.ExprAssign(mRCX[instr.mode], - m2_expr.ExprOp('x86_cpuid', mRAX[instr.mode], m2_expr.ExprInt(2, instr.mode)))) - e.append( - m2_expr.ExprAssign(mRDX[instr.mode], - m2_expr.ExprOp('x86_cpuid', mRAX[instr.mode], m2_expr.ExprInt(3, instr.mode)))) - return e, [] - - -def bittest_get(ir, instr, src, index): - index = index.zeroExtend(src.size) - if isinstance(src, m2_expr.ExprMem): - b_mask = {16: 4, 32: 5, 64: 6} - b_decal = {16: 1, 32: 3, 64: 7} - ptr = src.ptr - segm = src.is_mem_segm() - if segm: - ptr = ptr.args[1] - - off_bit = index.zeroExtend( - src.size) & m2_expr.ExprInt((1 << b_mask[src.size]) - 1, - src.size) - off_byte = ((index.zeroExtend(ptr.size) >> m2_expr.ExprInt(3, ptr.size)) & - m2_expr.ExprInt(((1 << src.size) - 1) ^ b_decal[src.size], ptr.size)) - - addr = ptr + off_byte - if segm: - addr = ir.gen_segm_expr(src.ptr.args[0], addr) - - d = ir.ExprMem(addr, src.size) - else: - off_bit = m2_expr.ExprOp( - '&', index, m2_expr.ExprInt(src.size - 1, src.size)) - d = src - return d, off_bit - - -def bt(ir, instr, src, index): - e = [] - index = index.zeroExtend(src.size) - d, off_bit = bittest_get(ir, instr, src, index) - d = d >> off_bit - e.append(m2_expr.ExprAssign(cf, d[:1])) - return e, [] - - -def btc(ir, instr, src, index): - e = [] - d, off_bit = bittest_get(ir, instr, src, index) - e.append(m2_expr.ExprAssign(cf, (d >> off_bit)[:1])) - - m = m2_expr.ExprInt(1, src.size) << off_bit - e.append(m2_expr.ExprAssign(d, d ^ m)) - - return e, [] - - -def bts(ir, instr, src, index): - e = [] - d, off_bit = bittest_get(ir, instr, src, index) - e.append(m2_expr.ExprAssign(cf, (d >> off_bit)[:1])) - m = m2_expr.ExprInt(1, src.size) << off_bit - e.append(m2_expr.ExprAssign(d, d | m)) - - return e, [] - - -def btr(ir, instr, src, index): - e = [] - d, off_bit = bittest_get(ir, instr, src, index) - e.append(m2_expr.ExprAssign(cf, (d >> off_bit)[:1])) - m = ~(m2_expr.ExprInt(1, src.size) << off_bit) - e.append(m2_expr.ExprAssign(d, d & m)) - - return e, [] - - -def into(_, instr): - return [], [] - - -def l_in(_, instr, src1, src2): - e = [] - e.append(m2_expr.ExprAssign(exception_flags, - m2_expr.ExprInt(EXCEPT_PRIV_INSN, 32))) - return e, [] - - -@sbuild.parse -def cmpxchg(arg1, arg2): - accumulator = mRAX[instr.v_opmode()][:arg1.size] - if (accumulator - arg1): - zf = i1(0) - accumulator = arg1 - else: - zf = i1(1) - arg1 = arg2 - - -@sbuild.parse -def cmpxchg8b(arg1): - accumulator = {mRAX[32], mRDX[32]} - if accumulator - arg1: - zf = i1(0) - mRAX[32] = arg1[:32] - mRDX[32] = arg1[32:] - else: - zf = i1(1) - arg1 = {mRBX[32], mRCX[32]} - - -@sbuild.parse -def cmpxchg16b(arg1): - accumulator = {mRAX[64], mRDX[64]} - if accumulator - arg1: - zf = i1(0) - mRAX[64] = arg1[:64] - mRDX[64] = arg1[64:] - else: - zf = i1(1) - arg1 = {mRBX[64], mRCX[64]} - - -def lds(ir, instr, dst, src): - e = [] - e.append(m2_expr.ExprAssign(dst, ir.ExprMem(src.ptr, size=dst.size))) - DS_value = ir.ExprMem(src.ptr + m2_expr.ExprInt(dst.size // 8, src.ptr.size), - size=16) - e.append(m2_expr.ExprAssign(DS, DS_value)) - return e, [] - - -def les(ir, instr, dst, src): - e = [] - e.append(m2_expr.ExprAssign(dst, ir.ExprMem(src.ptr, size=dst.size))) - ES_value = ir.ExprMem(src.ptr + m2_expr.ExprInt(dst.size // 8, src.ptr.size), - size=16) - e.append(m2_expr.ExprAssign(ES, ES_value)) - return e, [] - - -def lss(ir, instr, dst, src): - e = [] - e.append(m2_expr.ExprAssign(dst, ir.ExprMem(src.ptr, size=dst.size))) - SS_value = ir.ExprMem(src.ptr + m2_expr.ExprInt(dst.size // 8, src.ptr.size), - size=16) - e.append(m2_expr.ExprAssign(SS, SS_value)) - return e, [] - - -def lfs(ir, instr, dst, src): - e = [] - e.append(m2_expr.ExprAssign(dst, ir.ExprMem(src.ptr, size=dst.size))) - FS_value = ir.ExprMem(src.ptr + m2_expr.ExprInt(dst.size // 8, src.ptr.size), - size=16) - e.append(m2_expr.ExprAssign(FS, FS_value)) - return e, [] - - -def lgs(ir, instr, dst, src): - e = [] - e.append(m2_expr.ExprAssign(dst, ir.ExprMem(src.ptr, size=dst.size))) - GS_value = ir.ExprMem(src.ptr + m2_expr.ExprInt(dst.size // 8, src.ptr.size), - size=16) - e.append(m2_expr.ExprAssign(GS, GS_value)) - return e, [] - - -def lahf(_, instr): - e = [] - args = [cf, m2_expr.ExprInt(1, 1), pf, m2_expr.ExprInt(0, 1), af, - m2_expr.ExprInt(0, 1), zf, nf] - e.append( - m2_expr.ExprAssign(mRAX[instr.mode][8:16], m2_expr.ExprCompose(*args))) - return e, [] - - -def sahf(_, instr): - tmp = mRAX[instr.mode][8:16] - e = [] - e.append(m2_expr.ExprAssign(cf, tmp[0:1])) - e.append(m2_expr.ExprAssign(pf, tmp[2:3])) - e.append(m2_expr.ExprAssign(af, tmp[4:5])) - e.append(m2_expr.ExprAssign(zf, tmp[6:7])) - e.append(m2_expr.ExprAssign(nf, tmp[7:8])) - return e, [] - - -def lar(_, instr, dst, src): - e = [] - e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('access_segment', src))) - e.append(m2_expr.ExprAssign(zf, m2_expr.ExprOp('access_segment_ok', src))) - return e, [] - - -def lsl(_, instr, dst, src): - e = [] - e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('load_segment_limit', src))) - e.append(m2_expr.ExprAssign(zf, m2_expr.ExprOp('load_segment_limit_ok', src))) - return e, [] - - -def fclex(_, instr): - # XXX TODO - return [], [] - - -def fnclex(_, instr): - # XXX TODO - return [], [] - - -def l_str(_, instr, dst): - e = [] - e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('load_tr_segment_selector', - m2_expr.ExprInt(0, 32)))) - return e, [] - - -def movd(_, instr, dst, src): - e = [] - if dst in regs_mm_expr: - e.append(m2_expr.ExprAssign( - dst, m2_expr.ExprCompose(src, m2_expr.ExprInt(0, 32)))) - elif dst in regs_xmm_expr: - e.append(m2_expr.ExprAssign( - dst, m2_expr.ExprCompose(src, m2_expr.ExprInt(0, 96)))) - else: - e.append(m2_expr.ExprAssign(dst, src[:32])) - return e, [] - - -def movdqu(_, instr, dst, src): - # XXX TODO alignment check - return [m2_expr.ExprAssign(dst, src)], [] - - -def movapd(_, instr, dst, src): - # XXX TODO alignment check - return [m2_expr.ExprAssign(dst, src)], [] - - -def andps(_, instr, dst, src): - e = [] - e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('&', dst, src))) - return e, [] - - -def andnps(_, instr, dst, src): - e = [] - e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('&', dst ^ dst.mask, src))) - return e, [] - - -def orps(_, instr, dst, src): - e = [] - e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('|', dst, src))) - return e, [] - - -def xorps(_, instr, dst, src): - e = [] - e.append(m2_expr.ExprAssign(dst, m2_expr.ExprOp('^', dst, src))) - return e, [] - - -def rdmsr(ir, instr): - e = [m2_expr.ExprAssign(exception_flags,m2_expr.ExprInt(EXCEPT_PRIV_INSN, 32))] - return e, [] - - -def wrmsr(ir, instr): - e = [m2_expr.ExprAssign(exception_flags,m2_expr.ExprInt(EXCEPT_PRIV_INSN, 32))] - return e, [] - -# MMX/SSE/AVX operations -# - -def vec_op_clip(op, size, callback=None): - """ - Generate simd operations - @op: the operator - @size: size of an element - """ - def vec_op_clip_instr(ir, instr, dst, src): - if op == '-': - result = dst[:size] - src[:size] - else: - result = m2_expr.ExprOp(op, dst[:size], src[:size]) - if callback is not None: - result = callback(result) - return [m2_expr.ExprAssign(dst[:size], result)], [] - return vec_op_clip_instr - -# Generic vertical operation - - -def vec_vertical_sem(op, elt_size, reg_size, dst, src, apply_on_output): - assert reg_size % elt_size == 0 - n = reg_size // elt_size - if op == '-': - ops = [ - apply_on_output((dst[i * elt_size:(i + 1) * elt_size] - - src[i * elt_size:(i + 1) * elt_size])) - for i in range(0, n) - ] - else: - ops = [ - apply_on_output(m2_expr.ExprOp(op, dst[i * elt_size:(i + 1) * elt_size], - src[i * elt_size:(i + 1) * elt_size])) - for i in range(0, n) - ] - - return m2_expr.ExprCompose(*ops) - - -def __vec_vertical_instr_gen(op, elt_size, sem, apply_on_output): - def vec_instr(ir, instr, dst, src): - e = [] - if isinstance(src, m2_expr.ExprMem): - src = ir.ExprMem(src.ptr, dst.size) - reg_size = dst.size - e.append(m2_expr.ExprAssign(dst, sem(op, elt_size, reg_size, dst, src, - apply_on_output))) - return e, [] - return vec_instr - - -def vec_vertical_instr(op, elt_size, apply_on_output=lambda x: x): - return __vec_vertical_instr_gen(op, elt_size, vec_vertical_sem, - apply_on_output) - - -def _keep_mul_high(expr, signed=False): - assert expr.is_op("*") and len(expr.args) == 2 - - if signed: - arg1 = expr.args[0].signExtend(expr.size * 2) - arg2 = expr.args[1].signExtend(expr.size * 2) - else: - arg1 = expr.args[0].zeroExtend(expr.size * 2) - arg2 = expr.args[1].zeroExtend(expr.size * 2) - return m2_expr.ExprOp("*", arg1, arg2)[expr.size:] - -# Op, signed => associated comparison -_min_max_func = { - ("min", False): m2_expr.expr_is_unsigned_lower, - ("min", True): m2_expr.expr_is_signed_lower, - ("max", False): m2_expr.expr_is_unsigned_greater, - ("max", True): m2_expr.expr_is_signed_greater, -} -def _min_max(expr, signed): - assert (expr.is_op("min") or expr.is_op("max")) and len(expr.args) == 2 - return m2_expr.ExprCond( - _min_max_func[(expr.op, signed)](expr.args[1], expr.args[0]), - expr.args[1], - expr.args[0], - ) - -def _float_min_max(expr): - assert (expr.is_op("fmin") or expr.is_op("fmax")) and len(expr.args) == 2 - src1 = expr.args[0] - src2 = expr.args[1] - if expr.is_op("fmin"): - comp = m2_expr.expr_is_float_lower(src1, src2) - elif expr.is_op("fmax"): - comp = m2_expr.expr_is_float_lower(src2, src1) - - # x86 documentation (for MIN): - # IF ((SRC1 = 0.0) and (SRC2 = 0.0)) THEN DEST <-SRC2; - # ELSE IF (SRC1 = SNaN) THEN DEST <-SRC2; FI; - # ELSE IF (SRC2 = SNaN) THEN DEST <-SRC2; FI; - # ELSE IF (SRC1 < SRC2) THEN DEST <-SRC1; - # ELSE DEST<-SRC2; - # - # But this includes the NaN output of "SRC1 < SRC2" - # Associated text is more detailed, and this is the version impl here - return m2_expr.ExprCond( - m2_expr.expr_is_sNaN(src2), src2, - m2_expr.ExprCond( - m2_expr.expr_is_NaN(src2) | m2_expr.expr_is_NaN(src1), src2, - m2_expr.ExprCond(comp, src1, src2) - ) - ) - - -# Integer arithmetic -# - -# Additions -# - -# SSE -paddb = vec_vertical_instr('+', 8) -paddw = vec_vertical_instr('+', 16) -paddd = vec_vertical_instr('+', 32) -paddq = vec_vertical_instr('+', 64) - -# Substractions -# - -# SSE -psubb = vec_vertical_instr('-', 8) -psubw = vec_vertical_instr('-', 16) -psubd = vec_vertical_instr('-', 32) -psubq = vec_vertical_instr('-', 64) - -# Multiplications -# - -# SSE -pmullb = vec_vertical_instr('*', 8) -pmullw = vec_vertical_instr('*', 16) -pmulld = vec_vertical_instr('*', 32) -pmullq = vec_vertical_instr('*', 64) -pmulhub = vec_vertical_instr('*', 8, _keep_mul_high) -pmulhuw = vec_vertical_instr('*', 16, _keep_mul_high) -pmulhud = vec_vertical_instr('*', 32, _keep_mul_high) -pmulhuq = vec_vertical_instr('*', 64, _keep_mul_high) -pmulhb = vec_vertical_instr('*', 8, lambda x: _keep_mul_high(x, signed=True)) -pmulhw = vec_vertical_instr('*', 16, lambda x: _keep_mul_high(x, signed=True)) -pmulhd = vec_vertical_instr('*', 32, lambda x: _keep_mul_high(x, signed=True)) -pmulhq = vec_vertical_instr('*', 64, lambda x: _keep_mul_high(x, signed=True)) - -def pmuludq(ir, instr, dst, src): - e = [] - if dst.size == 64: - e.append(m2_expr.ExprAssign( - dst, - src[:32].zeroExtend(64) * dst[:32].zeroExtend(64) - )) - elif dst.size == 128: - e.append(m2_expr.ExprAssign( - dst[:64], - src[:32].zeroExtend(64) * dst[:32].zeroExtend(64) - )) - e.append(m2_expr.ExprAssign( - dst[64:], - src[64:96].zeroExtend(64) * dst[64:96].zeroExtend(64) - )) - else: - raise RuntimeError("Unsupported size %d" % dst.size) - return e, [] - -# Mix -# - -# SSE -def pmaddwd(ir, instr, dst, src): - sizedst = 32 - sizesrc = 16 - out = [] - for start in range(0, dst.size, sizedst): - base = start - mul1 = src[base: base + sizesrc].signExtend(sizedst) * dst[base: base + sizesrc].signExtend(sizedst) - base += sizesrc - mul2 = src[base: base + sizesrc].signExtend(sizedst) * dst[base: base + sizesrc].signExtend(sizedst) - out.append(mul1 + mul2) - return [m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*out))], [] - - -def _absolute(expr): - """Return abs(@expr)""" - signed = expr.msb() - value_unsigned = (expr ^ expr.mask) + m2_expr.ExprInt(1, expr.size) - return m2_expr.ExprCond(signed, value_unsigned, expr) - - -def psadbw(ir, instr, dst, src): - sizedst = 16 - sizesrc = 8 - out_dst = [] - for start in range(0, dst.size, 64): - out = [] - for src_start in range(0, 64, sizesrc): - beg = start + src_start - end = beg + sizesrc - # Not clear in the doc equations, but in the text, src and dst are: - # "8 unsigned byte integers" - out.append(_absolute(dst[beg: end].zeroExtend(sizedst) - src[beg: end].zeroExtend(sizedst))) - out_dst.append(m2_expr.ExprOp("+", *out)) - out_dst.append(m2_expr.ExprInt(0, 64 - sizedst)) - - return [m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*out_dst))], [] - -def _average(expr): - assert expr.is_op("avg") and len(expr.args) == 2 - - arg1 = expr.args[0].zeroExtend(expr.size * 2) - arg2 = expr.args[1].zeroExtend(expr.size * 2) - one = m2_expr.ExprInt(1, arg1.size) - # avg(unsigned) = (a + b + 1) >> 1, addition being at least on one more bit - return ((arg1 + arg2 + one) >> one)[:expr.size] - -pavgb = vec_vertical_instr('avg', 8, _average) -pavgw = vec_vertical_instr('avg', 16, _average) - -# Comparisons -# - -# SSE -pminsw = vec_vertical_instr('min', 16, lambda x: _min_max(x, signed=True)) -pminub = vec_vertical_instr('min', 8, lambda x: _min_max(x, signed=False)) -pminuw = vec_vertical_instr('min', 16, lambda x: _min_max(x, signed=False)) -pminud = vec_vertical_instr('min', 32, lambda x: _min_max(x, signed=False)) -pmaxub = vec_vertical_instr('max', 8, lambda x: _min_max(x, signed=False)) -pmaxuw = vec_vertical_instr('max', 16, lambda x: _min_max(x, signed=False)) -pmaxud = vec_vertical_instr('max', 32, lambda x: _min_max(x, signed=False)) -pmaxsw = vec_vertical_instr('max', 16, lambda x: _min_max(x, signed=True)) - -# Floating-point arithmetic -# - -# SSE -addss = vec_op_clip('fadd', 32) -addsd = vec_op_clip('fadd', 64) -addps = vec_vertical_instr('fadd', 32) -addpd = vec_vertical_instr('fadd', 64) -subss = vec_op_clip('fsub', 32) -subsd = vec_op_clip('fsub', 64) -subps = vec_vertical_instr('fsub', 32) -subpd = vec_vertical_instr('fsub', 64) -mulss = vec_op_clip('fmul', 32) -mulsd = vec_op_clip('fmul', 64) -mulps = vec_vertical_instr('fmul', 32) -mulpd = vec_vertical_instr('fmul', 64) -divss = vec_op_clip('fdiv', 32) -divsd = vec_op_clip('fdiv', 64) -divps = vec_vertical_instr('fdiv', 32) -divpd = vec_vertical_instr('fdiv', 64) - -# Comparisons (floating-point) - -minps = vec_vertical_instr('fmin', 32, _float_min_max) -minpd = vec_vertical_instr('fmin', 64, _float_min_max) -minss = vec_op_clip('fmin', 32, _float_min_max) -minsd = vec_op_clip('fmin', 64, _float_min_max) -maxps = vec_vertical_instr('fmax', 32, _float_min_max) -maxpd = vec_vertical_instr('fmax', 64, _float_min_max) -maxss = vec_op_clip('fmax', 32, _float_min_max) -maxsd = vec_op_clip('fmax', 64, _float_min_max) - -def _float_compare_to_mask(expr): - if expr.op == 'unord': - to_ext = m2_expr.expr_is_NaN(expr.args[0]) | m2_expr.expr_is_NaN(expr.args[1]) - elif expr.op == 'ord': - to_ext = ~m2_expr.expr_is_NaN(expr.args[0]) & ~m2_expr.expr_is_NaN(expr.args[1]) - else: - if expr.op == '==fu': - to_ext = m2_expr.expr_is_float_equal(expr.args[0], expr.args[1]) - on_NaN = m2_expr.ExprInt(0, 1) - elif expr.op == ' fp32 is needed - if double: - tmp_src = m2_expr.ExprOp('fpconvert_fp32', src[i*64:i*64 + 64]) - else: - tmp_src = src[i*32:i*32 + 32] - - e.append(m2_expr.ExprAssign( - dst[i*32:i*32 + 32], - m2_expr.ExprOp('fp_to_sint32', m2_expr.ExprOp( - 'fpround_towardszero', - tmp_src - )))) - return e - -def cvttpd2pi(_, instr, dst, src): - return _cvtt_tpl(dst, src, [0, 1], double=True), [] - -def cvttpd2dq(_, instr, dst, src): - e = _cvtt_tpl(dst, src, [0, 1], double=True) - e.append(m2_expr.ExprAssign(dst[64:128], m2_expr.ExprInt(0, 64))) - return e, [] - -def cvttsd2si(_, instr, dst, src): - return _cvtt_tpl(dst, src, [0], double=True), [] - -def cvttps2dq(_, instr, dst, src): - return _cvtt_tpl(dst, src, [0, 1, 2, 3], double=False), [] - -def cvttps2pi(_, instr, dst, src): - return _cvtt_tpl(dst, src, [0, 1], double=False), [] - -def cvttss2si(_, instr, dst, src): - return _cvtt_tpl(dst, src, [0], double=False), [] - -def movss(_, instr, dst, src): - e = [] - if not isinstance(dst, m2_expr.ExprMem) and not isinstance(src, m2_expr.ExprMem): - # Source and Destination xmm - e.append(m2_expr.ExprAssign(dst[:32], src[:32])) - elif not isinstance(src, m2_expr.ExprMem) and isinstance(dst, m2_expr.ExprMem): - # Source XMM Destination Mem - e.append(m2_expr.ExprAssign(dst, src[:32])) - else: - # Source Mem Destination XMM - e.append(m2_expr.ExprAssign( - dst, m2_expr.ExprCompose(src, m2_expr.ExprInt(0, 96)))) - return e, [] - - -def ucomiss(_, instr, src1, src2): - e = [] - e.append(m2_expr.ExprAssign(zf, m2_expr.ExprOp( - 'ucomiss_zf', src1[:32], src2[:32]))) - e.append(m2_expr.ExprAssign(pf, m2_expr.ExprOp( - 'ucomiss_pf', src1[:32], src2[:32]))) - e.append(m2_expr.ExprAssign(cf, m2_expr.ExprOp( - 'ucomiss_cf', src1[:32], src2[:32]))) - - e.append(m2_expr.ExprAssign(of, m2_expr.ExprInt(0, 1))) - e.append(m2_expr.ExprAssign(af, m2_expr.ExprInt(0, 1))) - e.append(m2_expr.ExprAssign(nf, m2_expr.ExprInt(0, 1))) - - return e, [] - -def ucomisd(_, instr, src1, src2): - e = [] - e.append(m2_expr.ExprAssign(zf, m2_expr.ExprOp( - 'ucomisd_zf', src1[:64], src2[:64]))) - e.append(m2_expr.ExprAssign(pf, m2_expr.ExprOp( - 'ucomisd_pf', src1[:64], src2[:64]))) - e.append(m2_expr.ExprAssign(cf, m2_expr.ExprOp( - 'ucomisd_cf', src1[:64], src2[:64]))) - - e.append(m2_expr.ExprAssign(of, m2_expr.ExprInt(0, 1))) - e.append(m2_expr.ExprAssign(af, m2_expr.ExprInt(0, 1))) - e.append(m2_expr.ExprAssign(nf, m2_expr.ExprInt(0, 1))) - - return e, [] - - -def pshufb(_, instr, dst, src): - e = [] - if dst.size == 64: - bit_l = 3 - elif dst.size == 128: - bit_l = 4 - else: - raise NotImplementedError("bad size") - for i in range(0, src.size, 8): - index = src[ - i:i + bit_l].zeroExtend(dst.size) << m2_expr.ExprInt(3, dst.size) - value = (dst >> index)[:8] - e.append(m2_expr.ExprAssign(dst[i:i + 8], - m2_expr.ExprCond(src[i + 7:i + 8], - m2_expr.ExprInt(0, 8), - value))) - return e, [] - - -def pshufd(_, instr, dst, src, imm): - control = int(imm) - out = [] - for i in range(4): - shift = ((control >> (i * 2)) & 3) * 32 - # shift is 2 bits long, expr.size is 128 - # => shift + 32 <= src.size - out.append(src[shift: shift + 32]) - return [m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*out))], [] - - -def pshuflw(_, instr, dst, src, imm): - control = int(imm) - out = [] - for i in range(4): - shift = ((control >> (i * 2)) & 3) * 16 - out.append(src[shift: shift + 16]) - out.append(src[64:]) - return [m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*out))], [] - - -def pshufhw(_, instr, dst, src, imm): - control = int(imm) - out = [src[:64]] - for i in range(4): - shift = ((control >> (i * 2)) & 3) * 16 - out.append(src[shift + 64: shift + 16 + 64]) - return [m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*out))], [] - - -def ps_rl_ll(ir, instr, dst, src, op, size): - mask = {16: 0xF, - 32: 0x1F, - 64: 0x3F}[size] - mask = m2_expr.ExprInt(mask, dst.size) - - # Saturate the counter to 2**size - count = src.zeroExtend(dst.size) - count = m2_expr.ExprCond(count & expr_simp(~mask), - m2_expr.ExprInt(size, dst.size), # saturation - count, # count < 2**size - ) - count = count[:size] - if src.is_int(): - count = expr_simp(count) - - out = [] - for i in range(0, dst.size, size): - out.append(m2_expr.ExprOp(op, dst[i:i + size], count)) - return [m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*out))], [] - - -def psrlw(ir, instr, dst, src): - return ps_rl_ll(ir, instr, dst, src, ">>", 16) - - -def psrld(ir, instr, dst, src): - return ps_rl_ll(ir, instr, dst, src, ">>", 32) - - -def psrlq(ir, instr, dst, src): - return ps_rl_ll(ir, instr, dst, src, ">>", 64) - - -def psllw(ir, instr, dst, src): - return ps_rl_ll(ir, instr, dst, src, "<<", 16) - - -def pslld(ir, instr, dst, src): - return ps_rl_ll(ir, instr, dst, src, "<<", 32) - - -def psllq(ir, instr, dst, src): - return ps_rl_ll(ir, instr, dst, src, "<<", 64) - - -def psraw(ir, instr, dst, src): - return ps_rl_ll(ir, instr, dst, src, "a>>", 16) - - -def psrad(ir, instr, dst, src): - return ps_rl_ll(ir, instr, dst, src, "a>>", 32) - - -def pslldq(_, instr, dst, src): - assert src.is_int() - e = [] - count = int(src) - if count > 15: - return [m2_expr.ExprAssign(dst, m2_expr.ExprInt(0, dst.size))], [] - else: - return [m2_expr.ExprAssign(dst, dst << m2_expr.ExprInt(8 * count, dst.size))], [] - - -def psrldq(_, instr, dst, src): - assert src.is_int() - count = int(src) - if count > 15: - return [m2_expr.ExprAssign(dst, m2_expr.ExprInt(0, dst.size))], [] - else: - return [m2_expr.ExprAssign(dst, dst >> m2_expr.ExprInt(8 * count, dst.size))], [] - - -def iret(ir, instr): - """IRET implementation - XXX: only support "no-privilege change" - """ - size = instr.v_opmode() - exprs, _ = retf(ir, instr, m2_expr.ExprInt(size // 8, size=size)) - tmp = mRSP[instr.mode][:size] + m2_expr.ExprInt((2 * size) // 8, size=size) - exprs += _tpl_eflags(tmp) - return exprs, [] - - -def pcmpeq(_, instr, dst, src, size): - e = [] - for i in range(0, dst.size, size): - test = m2_expr.expr_is_equal(dst[i:i + size], src[i:i + size]) - e.append(m2_expr.ExprAssign(dst[i:i + size], - m2_expr.ExprCond(test, - m2_expr.ExprInt(-1, size), - m2_expr.ExprInt(0, size)))) - return e, [] - - -def pcmpgt(_, instr, dst, src, size): - e = [] - for i in range(0, dst.size, size): - test = m2_expr.expr_is_signed_greater(dst[i:i + size], src[i:i + size]) - e.append(m2_expr.ExprAssign(dst[i:i + size], - m2_expr.ExprCond(test, - m2_expr.ExprInt(-1, size), - m2_expr.ExprInt(0, size)))) - return e, [] - - -def pcmpeqb(ir, instr, dst, src): - return pcmpeq(ir, instr, dst, src, 8) - -def pcmpeqw(ir, instr, dst, src): - return pcmpeq(ir, instr, dst, src, 16) - -def pcmpeqd(ir, instr, dst, src): - return pcmpeq(ir, instr, dst, src, 32) - -def pcmpeqq(ir, instr, dst, src): - return pcmpeq(ir, instr, dst, src, 64) - - - - -def pcmpgtb(ir, instr, dst, src): - return pcmpgt(ir, instr, dst, src, 8) - -def pcmpgtw(ir, instr, dst, src): - return pcmpgt(ir, instr, dst, src, 16) - -def pcmpgtd(ir, instr, dst, src): - return pcmpgt(ir, instr, dst, src, 32) - -def pcmpgtq(ir, instr, dst, src): - return pcmpgt(ir, instr, dst, src, 64) - - - -def punpck(_, instr, dst, src, size, off): - e = [] - slices = [] - for i in range(dst.size // (2 * size)): - slices.append(dst[size * i + off: size * i + off + size]) - slices.append(src[size * i + off: size * i + off + size]) - e.append(m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*slices))) - return e, [] - - -def punpckhbw(ir, instr, dst, src): - return punpck(ir, instr, dst, src, 8, dst.size // 2) - - -def punpckhwd(ir, instr, dst, src): - return punpck(ir, instr, dst, src, 16, dst.size // 2) - - -def punpckhdq(ir, instr, dst, src): - return punpck(ir, instr, dst, src, 32, dst.size // 2) - - -def punpckhqdq(ir, instr, dst, src): - return punpck(ir, instr, dst, src, 64, dst.size // 2) - - -def punpcklbw(ir, instr, dst, src): - return punpck(ir, instr, dst, src, 8, 0) - - -def punpcklwd(ir, instr, dst, src): - return punpck(ir, instr, dst, src, 16, 0) - - -def punpckldq(ir, instr, dst, src): - return punpck(ir, instr, dst, src, 32, 0) - - -def punpcklqdq(ir, instr, dst, src): - return punpck(ir, instr, dst, src, 64, 0) - - -def pinsr(_, instr, dst, src, imm, size): - e = [] - - mask = {8: 0xF, - 16: 0x7, - 32: 0x3, - 64: 0x1}[size] - - sel = (int(imm) & mask) * size - e.append(m2_expr.ExprAssign(dst[sel:sel + size], src[:size])) - - return e, [] - - -def pinsrb(ir, instr, dst, src, imm): - return pinsr(ir, instr, dst, src, imm, 8) - - -def pinsrw(ir, instr, dst, src, imm): - return pinsr(ir, instr, dst, src, imm, 16) - - -def pinsrd(ir, instr, dst, src, imm): - return pinsr(ir, instr, dst, src, imm, 32) - - -def pinsrq(ir, instr, dst, src, imm): - return pinsr(ir, instr, dst, src, imm, 64) - - -def pextr(_, instr, dst, src, imm, size): - e = [] - - mask = {8: 0xF, - 16: 0x7, - 32: 0x3, - 64: 0x1}[size] - - sel = (int(imm) & mask) * size - e.append(m2_expr.ExprAssign(dst, src[sel:sel + size].zeroExtend(dst.size))) - - return e, [] - - -def pextrb(ir, instr, dst, src, imm): - return pextr(ir, instr, dst, src, imm, 8) - - -def pextrw(ir, instr, dst, src, imm): - return pextr(ir, instr, dst, src, imm, 16) - - -def pextrd(ir, instr, dst, src, imm): - return pextr(ir, instr, dst, src, imm, 32) - - -def pextrq(ir, instr, dst, src, imm): - return pextr(ir, instr, dst, src, imm, 64) - - -def unpckhps(_, instr, dst, src): - e = [] - src = m2_expr.ExprCompose(dst[64:96], src[64:96], dst[96:128], src[96:128]) - e.append(m2_expr.ExprAssign(dst, src)) - return e, [] - - -def unpckhpd(_, instr, dst, src): - e = [] - src = m2_expr.ExprCompose(dst[64:128], src[64:128]) - e.append(m2_expr.ExprAssign(dst, src)) - return e, [] - - -def unpcklps(_, instr, dst, src): - e = [] - src = m2_expr.ExprCompose(dst[0:32], src[0:32], dst[32:64], src[32:64]) - e.append(m2_expr.ExprAssign(dst, src)) - return e, [] - - -def unpcklpd(_, instr, dst, src): - e = [] - src = m2_expr.ExprCompose(dst[0:64], src[0:64]) - e.append(m2_expr.ExprAssign(dst, src)) - return e, [] - - -def movlpd(_, instr, dst, src): - e = [] - e.append(m2_expr.ExprAssign(dst[:64], src[:64])) - return e, [] - - -def movlps(_, instr, dst, src): - e = [] - e.append(m2_expr.ExprAssign(dst[:64], src[:64])) - return e, [] - - -def movhpd(_, instr, dst, src): - e = [] - if src.size == 64: - e.append(m2_expr.ExprAssign(dst[64:128], src)) - elif dst.size == 64: - e.append(m2_expr.ExprAssign(dst, src[64:128])) - else: - raise RuntimeError("bad encoding!") - return e, [] - - -def movlhps(_, instr, dst, src): - e = [] - e.append(m2_expr.ExprAssign(dst[64:128], src[:64])) - return e, [] - - -def movhlps(_, instr, dst, src): - e = [] - e.append(m2_expr.ExprAssign(dst[:64], src[64:128])) - return e, [] - - -def movdq2q(_, instr, dst, src): - e = [] - e.append(m2_expr.ExprAssign(dst, src[:64])) - return e, [] - - -def movq2dq(_, instr, dst, src): - e = [] - e.append(m2_expr.ExprAssign(dst, src[:64].zeroExtend(dst.size))) - return e, [] - - -def sqrt_gen(_, instr, dst, src, size): - e = [] - out = [] - for i in range(src.size // size): - out.append(m2_expr.ExprOp('fsqrt', - src[i * size: (i + 1) * size])) - src = m2_expr.ExprCompose(*out) - e.append(m2_expr.ExprAssign(dst, src)) - return e, [] - - -def sqrtpd(ir, instr, dst, src): - return sqrt_gen(ir, instr, dst, src, 64) - - -def sqrtps(ir, instr, dst, src): - return sqrt_gen(ir, instr, dst, src, 32) - - -def sqrtsd(_, instr, dst, src): - e = [] - e.append(m2_expr.ExprAssign(dst[:64], - m2_expr.ExprOp('fsqrt', - src[:64]))) - return e, [] - - -def sqrtss(_, instr, dst, src): - e = [] - e.append(m2_expr.ExprAssign(dst[:32], - m2_expr.ExprOp('fsqrt', - src[:32]))) - return e, [] - - -def pmovmskb(_, instr, dst, src): - e = [] - out = [] - for i in range(src.size // 8): - out.append(src[8 * i + 7:8 * (i + 1)]) - src = m2_expr.ExprCompose(*out) - e.append(m2_expr.ExprAssign(dst, src.zeroExtend(dst.size))) - return e, [] - - -def smsw(ir, instr, dst): - e = [] - LOG_X86_SEM.warning("DEFAULT SMSW %s!!", str(dst)) - e.append(m2_expr.ExprAssign(dst, m2_expr.ExprInt(0x80050033, 32)[:dst.size])) - return e, [] - - -def bndmov(ir, instr, dst, src): - # Implemented as a NOP, because BND side effects are not yet supported - return [], [] - -def palignr(ir, instr, dst, src, imm): - # dst.src >> imm * 8 [:dst.size] - - shift = int(imm) * 8 - if shift == 0: - result = src - elif shift == src.size: - result = dst - elif shift > src.size: - result = dst >> m2_expr.ExprInt(shift - src.size, dst.size) - else: - # shift < src.size - result = m2_expr.ExprCompose( - src[shift:], - dst[:shift], - ) - - return [m2_expr.ExprAssign(dst, result)], [] - - -def _signed_saturation(expr, dst_size): - """Saturate the expr @expr for @dst_size bit - Signed saturation return MAX_INT / MIN_INT or value depending on the value - """ - assert expr.size > dst_size - - median = 1 << (dst_size - 1) - min_int = m2_expr.ExprInt(- median, dst_size) - max_int = m2_expr.ExprInt(median - 1, dst_size) - signed = expr.msb() - value_unsigned = (expr ^ expr.mask) + m2_expr.ExprInt(1, expr.size) - # Re-use the sign bit - value = m2_expr.ExprCompose(expr[:dst_size - 1], signed) - - # Bit hack: to avoid a double signed comparison, use mask - # ie., in unsigned, 0xXY > 0x0f iff X is not null - - # if expr >s 0 - # if expr[dst_size - 1:] > 0: # bigger than max_int - # -> max_int - # else - # -> value - # else # negative - # if expr[dst_size:-1] > 0: # smaller than min_int - # -> value - # else - # -> min_int - - return m2_expr.ExprCond( - signed, - m2_expr.ExprCond(value_unsigned[dst_size - 1:], - min_int, - value), - m2_expr.ExprCond(expr[dst_size - 1:], - max_int, - value), - ) - - -def _unsigned_saturation(expr, dst_size): - """Saturate the expr @expr for @dst_size bit - Unsigned saturation return MAX_INT or value depending on the value - """ - assert expr.size > dst_size - - zero = m2_expr.ExprInt(0, dst_size) - max_int = m2_expr.ExprInt(-1, dst_size) - value = expr[:dst_size] - signed = expr.msb() - - - # Bit hack: to avoid a double signed comparison, use mask - # ie., in unsigned, 0xXY > 0x0f iff X is not null - - return m2_expr.ExprCond( - signed, - zero, - m2_expr.ExprCond(expr[dst_size:], - max_int, - value), - ) - - - -def packsswb(ir, instr, dst, src): - out = [] - for source in [dst, src]: - for start in range(0, dst.size, 16): - out.append(_signed_saturation(source[start:start + 16], 8)) - return [m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*out))], [] - - -def packssdw(ir, instr, dst, src): - out = [] - for source in [dst, src]: - for start in range(0, dst.size, 32): - out.append(_signed_saturation(source[start:start + 32], 16)) - return [m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*out))], [] - - -def packuswb(ir, instr, dst, src): - out = [] - for source in [dst, src]: - for start in range(0, dst.size, 16): - out.append(_unsigned_saturation(source[start:start + 16], 8)) - return [m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*out))], [] - - -def _saturation_sub_unsigned(expr): - assert expr.is_op("+") and len(expr.args) == 2 and expr.args[-1].is_op("-") - - # Compute the soustraction on one more bit to be able to distinguish cases: - # 0x48 - 0xd7 in 8 bit, should saturate - arg1 = expr.args[0].zeroExtend(expr.size + 1) - arg2 = expr.args[1].args[0].zeroExtend(expr.size + 1) - return _unsigned_saturation(arg1 - arg2, expr.size) - -def _saturation_sub_signed(expr): - assert expr.is_op("+") and len(expr.args) == 2 and expr.args[-1].is_op("-") - - # Compute the subtraction on two more bits, see _saturation_sub_unsigned - arg1 = expr.args[0].signExtend(expr.size + 2) - arg2 = expr.args[1].args[0].signExtend(expr.size + 2) - return _signed_saturation(arg1 - arg2, expr.size) - -def _saturation_add(expr): - assert expr.is_op("+") and len(expr.args) == 2 - - # Compute the addition on one more bit to be able to distinguish cases: - # 0x48 + 0xd7 in 8 bit, should saturate - - arg1 = expr.args[0].zeroExtend(expr.size + 1) - arg2 = expr.args[1].zeroExtend(expr.size + 1) - - # We can also use _unsigned_saturation with two additional bits (to - # distinguish minus and overflow case) - # The resulting expression being more complicated with an impossible case - # (signed=True), we rewrite the rule here - - return m2_expr.ExprCond((arg1 + arg2).msb(), m2_expr.ExprInt(-1, expr.size), - expr) - -def _saturation_add_signed(expr): - assert expr.is_op("+") and len(expr.args) == 2 - - # Compute the subtraction on two more bits, see _saturation_add_unsigned - - arg1 = expr.args[0].signExtend(expr.size + 2) - arg2 = expr.args[1].signExtend(expr.size + 2) - - return _signed_saturation(arg1 + arg2, expr.size) - - -# Saturate SSE operations - -psubusb = vec_vertical_instr('-', 8, _saturation_sub_unsigned) -psubusw = vec_vertical_instr('-', 16, _saturation_sub_unsigned) -paddusb = vec_vertical_instr('+', 8, _saturation_add) -paddusw = vec_vertical_instr('+', 16, _saturation_add) -psubsb = vec_vertical_instr('-', 8, _saturation_sub_signed) -psubsw = vec_vertical_instr('-', 16, _saturation_sub_signed) -paddsb = vec_vertical_instr('+', 8, _saturation_add_signed) -paddsw = vec_vertical_instr('+', 16, _saturation_add_signed) - - -# Others SSE operations - -def maskmovq(ir, instr, src, mask): - loc_next = ir.get_next_loc_key(instr) - loc_next_expr = m2_expr.ExprLoc(loc_next, ir.IRDst.size) - blks = [] - - # For each possibility, check if a write is necessary - check_labels = [m2_expr.ExprLoc(ir.loc_db.add_location(), ir.IRDst.size) - for _ in range(0, mask.size, 8)] - # If the write has to be done, do it (otherwise, nothing happen) - write_labels = [m2_expr.ExprLoc(ir.loc_db.add_location(), ir.IRDst.size) - for _ in range(0, mask.size, 8)] - - # Build check blocks - for i, start in enumerate(range(0, mask.size, 8)): - bit = mask[start + 7: start + 8] - cur_label = check_labels[i] - next_check_label = check_labels[i + 1] if (i + 1) < len(check_labels) else loc_next_expr - write_label = write_labels[i] - check = m2_expr.ExprAssign(ir.IRDst, - m2_expr.ExprCond(bit, - write_label, - next_check_label)) - blks.append(IRBlock(cur_label.loc_key, [AssignBlock([check], instr)])) - - # Build write blocks - dst_addr = mRDI[instr.mode] - for i, start in enumerate(range(0, mask.size, 8)): - cur_label = write_labels[i] - next_check_label = check_labels[i + 1] if (i + 1) < len(check_labels) else loc_next_expr - write_addr = dst_addr + m2_expr.ExprInt(i, dst_addr.size) - - # @8[DI/EDI/RDI + i] = src[byte i] - write_mem = m2_expr.ExprAssign(m2_expr.ExprMem(write_addr, 8), - src[start: start + 8]) - jump = m2_expr.ExprAssign(ir.IRDst, next_check_label) - blks.append(IRBlock(cur_label.loc_key, [AssignBlock([write_mem, jump], instr)])) - - # If mask is null, bypass all - e = [m2_expr.ExprAssign(ir.IRDst, m2_expr.ExprCond(mask, - check_labels[0], - loc_next_expr))] - return e, blks - - -def emms(ir, instr): - # Implemented as a NOP - return [], [] - -def endbr64(ir, instr): - # Implemented as a NOP - return [], [] - -def endbr32(ir, instr): - # Implemented as a NOP - return [], [] - -# Common value without too many option, 0x1fa0 -STMXCSR_VALUE = 0x1fa0 -def stmxcsr(ir, instr, dst): - return [m2_expr.ExprAssign(dst, m2_expr.ExprInt(STMXCSR_VALUE, dst.size))], [] - -def ldmxcsr(ir, instr, dst): - # Implemented as a NOP - return [], [] - - -def _select4(src, control): - # Implementation inspired from Intel Intrisics Guide - # @control is already resolved (was an immediate) - - if control == 0: - return src[:32] # 0 - elif control == 1: - return src[32:64] - elif control == 2: - return src[64:96] - elif control == 3: - return src[96:] - else: - raise ValueError("Control must be on 2 bits") - - -def shufps(ir, instr, dst, src, imm8): - out = [] - control = int(imm8) - for i in range(4): - if i < 2: - source = dst - else: - source = src - out.append(_select4(source, (control >> (i * 2)) & 3)) - return [m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*out))], [] - - -def shufpd(ir, instr, dst, src, imm8): - out = [] - control = int(imm8) - out.append(dst[64:] if control & 1 else dst[:64]) - out.append(src[64:] if control & 2 else src[:64]) - return [m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*out))], [] - -def movmskps(ir, instr, dst, src): - out = [] - for i in range(4): - out.append(src[(32 * i) + 31:(32 * i) + 32]) - return [m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*out).zeroExtend(dst.size))], [] - -def movmskpd(ir, instr, dst, src): - out = [] - for i in range(2): - out.append(src[(64 * i) + 63:(64 * i) + 64]) - return [m2_expr.ExprAssign(dst, m2_expr.ExprCompose(*out).zeroExtend(dst.size))], [] - - -mnemo_func = {'mov': mov, - 'xchg': xchg, - 'movzx': movzx, - 'movsx': movsx, - 'movsxd': movsx, - 'lea': lea, - 'add': add, - 'xadd': xadd, - 'adc': adc, - 'sub': sub, - 'sbb': sbb, - 'neg': neg, - 'not': l_not, - 'cmp': l_cmp, - 'xor': xor, - 'pxor': pxor, - 'or': l_or, - 'and': l_and, - 'test': l_test, - 'rol': l_rol, - 'ror': l_ror, - 'rcl': rcl, - 'rcr': rcr, - 'sar': sar, - 'shr': shr, - 'sal': shl, - 'shl': shl, - 'shld': shld, - 'cmc': cmc, - 'clc': clc, - 'stc': stc, - 'cld': cld, - 'std': std, - 'cli': cli, - 'sti': sti, - 'bsf': bsf, - 'bsr': bsr, - 'inc': inc, - 'dec': dec, - 'push': push, - 'pushw': pushw, - 'pop': pop, - 'popw': popw, - 'sete': sete, - 'setnz': setnz, - 'setl': setl, - 'setg': setg, - 'setge': setge, - 'seta': seta, - 'setae': setae, - 'setb': setb, - 'setbe': setbe, - 'setns': setns, - 'sets': sets, - 'seto': seto, - 'setp': setp, - 'setpe': setp, - 'setnp': setnp, - 'setpo': setnp, - 'setle': setle, - 'setng': setle, - 'setna': setna, - 'setnbe': setnbe, - 'setno': setno, - 'setnc': setnb, - 'setz': sete, - 'setne': setnz, - 'setnb': setae, - 'setnae': setb, - 'setc': setb, - 'setnge': setl, - 'setnl': setge, - 'setnle': setg, - 'setalc': setalc, - 'bswap': bswap, - 'cmpsb': lambda ir, instr: cmps(ir, instr, 8), - 'cmpsw': lambda ir, instr: cmps(ir, instr, 16), - 'cmpsd': lambda ir, instr: cmps(ir, instr, 32), - 'cmpsq': lambda ir, instr: cmps(ir, instr, 64), - 'scasb': lambda ir, instr: scas(ir, instr, 8), - 'scasw': lambda ir, instr: scas(ir, instr, 16), - 'scasd': lambda ir, instr: scas(ir, instr, 32), - 'scasq': lambda ir, instr: scas(ir, instr, 64), - 'pushfd': pushfd, - 'pushfq': pushfq, - 'pushfw': pushfw, - 'popfd': popfd, - 'popfq': popfd, - 'popfw': popfw, - 'pusha': pusha, - 'pushad': pushad, - 'popad': popad, - 'popa': popa, - 'call': call, - 'ret': ret, - 'retf': retf, - 'iret': iret, - 'iretd': iret, - 'leave': leave, - 'enter': enter, - 'jmp': jmp, - 'jz': jz, - 'je': jz, - 'jcxz': jcxz, - 'jecxz': jecxz, - 'jrcxz': jrcxz, - 'jnz': jnz, - 'jp': jp, - 'jpe': jp, - 'jnp': jnp, - 'ja': ja, - 'jae': jae, - 'jb': jb, - 'jbe': jbe, - 'jg': jg, - 'jge': jge, - 'jl': jl, - 'jle': jle, - 'js': js, - 'jns': jns, - 'jo': jo, - 'jno': jno, - 'loop': loop, - 'loopne': loopne, - 'loope': loope, - 'div': div, - 'mul': mul, - 'imul': imul, - 'idiv': idiv, - - 'cbw': cbw, - 'cwde': cwde, - 'cdqe': cdqe, - - 'cwd': cwd, - 'cdq': cdq, - 'cqo': cqo, - - 'daa': daa, - 'das': das, - 'aam': aam, - 'aad': aad, - 'aaa': aaa, - 'aas': aas, - 'shrd': shrd, - 'stosb': lambda ir, instr: stos(ir, instr, 8), - 'stosw': lambda ir, instr: stos(ir, instr, 16), - 'stosd': lambda ir, instr: stos(ir, instr, 32), - 'stosq': lambda ir, instr: stos(ir, instr, 64), - - 'lodsb': lambda ir, instr: lods(ir, instr, 8), - 'lodsw': lambda ir, instr: lods(ir, instr, 16), - 'lodsd': lambda ir, instr: lods(ir, instr, 32), - 'lodsq': lambda ir, instr: lods(ir, instr, 64), - - 'movsb': lambda ir, instr: movs(ir, instr, 8), - 'movsw': lambda ir, instr: movs(ir, instr, 16), - 'movsd': movsd_dispatch, - 'movsq': lambda ir, instr: movs(ir, instr, 64), - 'fcomp': fcomp, - 'fcompp': fcompp, - 'ficomp': ficomp, - 'fucom': fucom, - 'fucomp': fucomp, - 'fucompp': fucompp, - 'comiss': comiss, - 'comisd': comisd, - 'nop': nop, - 'ud2': ud2, - 'prefetch0': prefetch0, - 'prefetch1': prefetch1, - 'prefetch2': prefetch2, - 'prefetchw': prefetchw, - 'prefetchnta': prefetchnta, - 'lfence': lfence, - 'mfence': mfence, - 'sfence': sfence, - 'fnop': nop, # XXX - 'hlt': hlt, - 'rdtsc': rdtsc, - 'fst': fst, - 'fstp': fstp, - 'fist': fist, - 'fistp': fistp, - 'fisttp': fisttp, - 'fld': fld, - 'fldz': fldz, - 'fld1': fld1, - 'fldl2t': fldl2t, - 'fldpi': fldpi, - 'fldln2': fldln2, - 'fldl2e': fldl2e, - 'fldlg2': fldlg2, - 'fild': fild, - 'fadd': fadd, - 'fiadd': fiadd, - 'fisub': fisub, - 'fisubr': fisubr, - 'fpatan': fpatan, - 'fprem': fprem, - 'fprem1': fprem1, - 'fninit': fninit, - 'fyl2x': fyl2x, - 'faddp': faddp, - 'fsub': fsub, - 'fsubp': fsubp, - 'fsubr': fsubr, - 'fsubrp': fsubrp, - 'fmul': fmul, - 'fimul': fimul, - 'fmulp': fmulp, - 'fdiv': fdiv, - 'fdivr': fdivr, - 'fdivrp': fdivrp, - 'fidiv': fidiv, - 'fidivr': fidivr, - 'fdivp': fdivp, - 'fxch': fxch, - 'fptan': fptan, - 'frndint': frndint, - 'fsin': fsin, - 'fcos': fcos, - 'fsincos': fsincos, - 'fscale': fscale, - 'f2xm1': f2xm1, - 'fchs': fchs, - 'fsqrt': fsqrt, - 'fabs': fabs, - 'fnstsw': fnstsw, - 'fnstcw': fnstcw, - 'fldcw': fldcw, - 'fwait': fwait, - 'fcmovb': fcmovb, - 'fcmove': fcmove, - 'fcmovbe': fcmovbe, - 'fcmovu': fcmovu, - 'fcmovnb': fcmovnb, - 'fcmovne': fcmovne, - 'fcmovnbe': fcmovnbe, - 'fcmovnu': fcmovnu, - 'fnstenv': fnstenv, - 'fldenv': fldenv, - 'sidt': sidt, - 'sldt': sldt, - 'arpl': arpl, - 'cmovz': cmovz, - 'cmove': cmovz, - 'cmovnz': cmovnz, - 'cmovpe': cmovpe, - 'cmovnp': cmovnp, - 'cmovge': cmovge, - 'cmovnl': cmovge, - 'cmovg': cmovg, - 'cmovl': cmovl, - 'cmova': cmova, - 'cmovae': cmovae, - 'cmovbe': cmovbe, - 'cmovb': cmovb, - 'cmovnge': cmovl, - 'cmovle': cmovle, - 'cmovng': cmovle, - 'cmovo': cmovo, - 'cmovno': cmovno, - 'cmovs': cmovs, - 'cmovns': cmovns, - 'icebp': icebp, - 'int': l_int, - 'xlat': xlat, - 'bt': bt, - 'cpuid': cpuid, - 'fcom': fcom, - 'ftst': ftst, - 'fxam': fxam, - 'ficom': ficom, - 'fcomi': fcomi, - 'fcomip': fcomip, - 'fucomi': fucomi, - 'fucomip': fucomip, - 'insb': lambda ir, instr: ins(ir, instr, 8), - 'insw': lambda ir, instr: ins(ir, instr, 16), - 'insd': lambda ir, instr: ins(ir, instr, 32), - 'btc': btc, - 'bts': bts, - 'btr': btr, - 'into': into, - 'in': l_in, - 'outsb': lambda ir, instr: l_outs(ir, instr, 8), - 'outsw': lambda ir, instr: l_outs(ir, instr, 16), - 'outsd': lambda ir, instr: l_outs(ir, instr, 32), - - 'out': l_out, - "sysenter": l_sysenter, - "syscall": l_syscall, - "cmpxchg": cmpxchg, - "cmpxchg8b": cmpxchg8b, - "lds": lds, - "les": les, - "lss": lss, - "lfs": lfs, - "lgs": lgs, - "lahf": lahf, - "sahf": sahf, - "lar": lar, - "lsl": lsl, - "fclex": fclex, - "fnclex": fnclex, - "str": l_str, - "movd": movd, - "movdqu": movdqu, - "movdqa": movdqu, - "movapd": movapd, # XXX TODO alignment check - "movupd": movapd, # XXX TODO alignment check - "movaps": movapd, # XXX TODO alignment check - "movups": movapd, # XXX TODO alignment check - "andps": andps, - "andpd": andps, - "andnps": andnps, - "andnpd": andnps, - "orps": orps, - "orpd": orps, - "xorps": xorps, - "xorpd": xorps, - - "movq": movq, - - "pminsw": pminsw, - "cvtdq2pd": cvtdq2pd, - "cvtdq2ps": cvtdq2ps, - "cvtpd2dq": cvtpd2dq, - "cvtpd2pi": cvtpd2pi, - "cvtpd2ps": cvtpd2ps, - "cvtpi2pd": cvtpi2pd, - "cvtpi2ps": cvtpi2ps, - "cvtps2dq": cvtps2dq, - "cvtps2pd": cvtps2pd, - "cvtps2pi": cvtps2pi, - "cvtsd2si": cvtsd2si, - "cvtsd2ss": cvtsd2ss, - "cvtsi2sd": cvtsi2sd, - "cvtsi2ss": cvtsi2ss, - "cvtss2sd": cvtss2sd, - "cvtss2si": cvtss2si, - "cvttpd2pi": cvttpd2pi, - "cvttpd2dq": cvttpd2dq, - "cvttps2dq": cvttps2dq, - "cvttps2pi": cvttps2pi, - "cvttsd2si": cvttsd2si, - "cvttss2si": cvttss2si, - - - "bndmov": bndmov, - - - - - "movss": movss, - - "ucomiss": ucomiss, - "ucomisd": ucomisd, - - # - # MMX/AVX/SSE operations - - # Arithmetic (integers) - # - - # Additions - # SSE - "paddb": paddb, - "paddw": paddw, - "paddd": paddd, - "paddq": paddq, - - # Substractions - # SSE - "psubb": psubb, - "psubw": psubw, - "psubd": psubd, - "psubq": psubq, - - # Multiplications - # SSE - "pmullb": pmullb, - "pmullw": pmullw, - "pmulld": pmulld, - "pmullq": pmullq, - "pmulhub": pmulhub, - "pmulhuw": pmulhuw, - "pmulhud": pmulhud, - "pmulhuq": pmulhuq, - "pmulhb": pmulhb, - "pmulhw": pmulhw, - "pmulhd": pmulhd, - "pmulhq": pmulhq, - "pmuludq": pmuludq, - - # Mix - # SSE - "pmaddwd": pmaddwd, - "psadbw": psadbw, - "pavgb": pavgb, - "pavgw": pavgw, - - # Arithmetic (floating-point) - # - - # Additions - # SSE - "addss": addss, - "addsd": addsd, - "addps": addps, - "addpd": addpd, - - # Substractions - # SSE - "subss": subss, - "subsd": subsd, - "subps": subps, - "subpd": subpd, - - # Multiplications - # SSE - "mulss": mulss, - "mulsd": mulsd, - "mulps": mulps, - "mulpd": mulpd, - - # Divisions - # SSE - "divss": divss, - "divsd": divsd, - "divps": divps, - "divpd": divpd, - - # Comparisons (floating-point) - # - "minps": minps, - "minpd": minpd, - "minss": minss, - "minsd": minsd, - "maxps": maxps, - "maxpd": maxpd, - "maxss": maxss, - "maxsd": maxsd, - "cmpeqps": cmpeqps, - "cmpeqpd": cmpeqpd, - "cmpeqss": cmpeqss, - "cmpeqsd": cmpeqsd, - "cmpltps": cmpltps, - "cmpltpd": cmpltpd, - "cmpltss": cmpltss, - "cmpltsd": cmpltsd, - "cmpleps": cmpleps, - "cmplepd": cmplepd, - "cmpless": cmpless, - "cmplesd": cmplesd, - "cmpunordps": cmpunordps, - "cmpunordpd": cmpunordpd, - "cmpunordss": cmpunordss, - "cmpunordsd": cmpunordsd, - "cmpneqps": cmpneqps, - "cmpneqpd": cmpneqpd, - "cmpneqss": cmpneqss, - "cmpneqsd": cmpneqsd, - "cmpnltps": cmpnltps, - "cmpnltpd": cmpnltpd, - "cmpnltss": cmpnltss, - "cmpnltsd": cmpnltsd, - "cmpnleps": cmpnleps, - "cmpnlepd": cmpnlepd, - "cmpnless": cmpnless, - "cmpnlesd": cmpnlesd, - "cmpordps": cmpordps, - "cmpordpd": cmpordpd, - "cmpordss": cmpordss, - "cmpordsd": cmpordsd, - - # Logical (floating-point) - # - - "pand": pand, - "pandn": pandn, - "por": por, - - "rdmsr": rdmsr, - "wrmsr": wrmsr, - "pshufb": pshufb, - "pshufd": pshufd, - "pshuflw": pshuflw, - "pshufhw": pshufhw, - - "psrlw": psrlw, - "psrld": psrld, - "psrlq": psrlq, - "psllw": psllw, - "pslld": pslld, - "psllq": psllq, - "pslldq": pslldq, - "psrldq": psrldq, - "psraw": psraw, - "psrad": psrad, - - "palignr": palignr, - - "pmaxub": pmaxub, - "pmaxuw": pmaxuw, - "pmaxud": pmaxud, - "pmaxsw": pmaxsw, - - "pminub": pminub, - "pminuw": pminuw, - "pminud": pminud, - - "pcmpeqb": pcmpeqb, - "pcmpeqw": pcmpeqw, - "pcmpeqd": pcmpeqd, - "pcmpeqq": pcmpeqq, - - "pcmpgtb": pcmpgtb, - "pcmpgtw": pcmpgtw, - "pcmpgtd": pcmpgtd, - "pcmpgtq": pcmpgtq, - - "punpckhbw": punpckhbw, - "punpckhwd": punpckhwd, - "punpckhdq": punpckhdq, - "punpckhqdq": punpckhqdq, - - - "punpcklbw": punpcklbw, - "punpcklwd": punpcklwd, - "punpckldq": punpckldq, - "punpcklqdq": punpcklqdq, - - "pinsrb": pinsrb, - "pinsrw": pinsrw, - "pinsrd": pinsrd, - "pinsrq": pinsrq, - - "pextrb": pextrb, - "pextrw": pextrw, - "pextrd": pextrd, - "pextrq": pextrq, - - "unpckhps": unpckhps, - "unpckhpd": unpckhpd, - "unpcklps": unpcklps, - "unpcklpd": unpcklpd, - - "movlpd": movlpd, - "movlps": movlps, - "movhpd": movhpd, - "movhps": movhpd, - "movlhps": movlhps, - "movhlps": movhlps, - "movdq2q": movdq2q, - "movq2dq": movq2dq, - - "sqrtpd": sqrtpd, - "sqrtps": sqrtps, - "sqrtsd": sqrtsd, - "sqrtss": sqrtss, - - "pmovmskb": pmovmskb, - - "packsswb": packsswb, - "packssdw": packssdw, - "packuswb": packuswb, - - "psubusb": psubusb, - "psubusw": psubusw, - "paddusb": paddusb, - "paddusw": paddusw, - "psubsb": psubsb, - "psubsw": psubsw, - "paddsb": paddsb, - "paddsw": paddsw, - - "smsw": smsw, - "maskmovq": maskmovq, - "maskmovdqu": maskmovq, - "emms": emms, - "shufps": shufps, - "shufpd": shufpd, - "movmskps": movmskps, - "movmskpd": movmskpd, - "stmxcsr": stmxcsr, - "ldmxcsr": ldmxcsr, - "endbr64": endbr64, - "endbr32": endbr32, - } - - -class ir_x86_16(IntermediateRepresentation): - - def __init__(self, loc_db=None): - IntermediateRepresentation.__init__(self, mn_x86, 16, loc_db) - self.do_stk_segm = False - self.do_ds_segm = False - self.do_str_segm = False - self.do_all_segm = False - self.pc = IP - self.sp = SP - self.IRDst = m2_expr.ExprId('IRDst', 16) - # Size of memory pointer access in IR - # 16 bit mode memory accesses may be greater than 16 bits - # 32 bit size may be enough - self.addrsize = 32 - - def mod_pc(self, instr, instr_ir, extra_ir): - pass - - def ExprMem(self, ptr, size): - """Generate a memory access to @ptr - The ptr is resized to a fixed size self.addrsize - - @ptr: Expr instance to the memory address - @size: size of the memory""" - - return m2_expr.ExprMem(expraddr(self.addrsize, ptr), size) - - def gen_segm_expr(self, selector, addr): - ptr = m2_expr.ExprOp( - 'segm', - selector, - addr.zeroExtend(self.addrsize) - ) - - return ptr - - def get_ir(self, instr): - args = instr.args[:] - args = [arg.replace_expr(float_replace) for arg in args] - args = fix_mem_args_size(instr, *args) - my_ss = None - if self.do_ds_segm: - my_ss = DS - if self.do_all_segm and instr.additional_info.g2.value: - my_ss = {1: CS, 2: SS, 3: DS, 4: ES, 5: FS, 6: GS}[ - instr.additional_info.g2.value] - if my_ss is not None: - for i, a in enumerate(args): - if a.is_mem() and not a.is_mem_segm(): - args[i] = self.ExprMem(m2_expr.ExprOp('segm', my_ss, - a.ptr), a.size) - - if not instr.name.lower() in mnemo_func: - raise NotImplementedError( - "Mnemonic %s not implemented" % instr.name) - - instr_ir, extra_ir = mnemo_func[ - instr.name.lower()](self, instr, *args) - self.mod_pc(instr, instr_ir, extra_ir) - instr.additional_info.except_on_instr = False - if instr.additional_info.g1.value & 6 == 0 or \ - not instr.name in repeat_mn: - return instr_ir, extra_ir - if instr.name == "MOVSD" and len(instr.args) == 2: - return instr_ir, extra_ir - - instr.additional_info.except_on_instr = True - admode = instr.v_admode() - c_reg = mRCX[instr.mode][:admode] - - zf_val = None - # set if zf is tested (cmps, scas) - for e in instr_ir: # +[updt_c]: - if e.dst == zf: - zf_val = e.src - - cond_dec = m2_expr.ExprCond(c_reg - m2_expr.ExprInt(1, c_reg.size), - m2_expr.ExprInt(0, 1), m2_expr.ExprInt(1, 1)) - # end condition - if zf_val is None: - c_cond = cond_dec - elif instr.additional_info.g1.value & 2: # REPNE and REPNZ - c_cond = cond_dec | zf - elif instr.additional_info.g1.value & 12: # REPE, REP and REPZ - c_cond = cond_dec | (zf ^ m2_expr.ExprInt(1, 1)) - - # gen while - loc_do, loc_do_expr = self.gen_loc_key_and_expr(self.IRDst.size) - loc_end, loc_end_expr = self.gen_loc_key_and_expr(self.IRDst.size) - loc_skip = self.get_next_loc_key(instr) - loc_skip_expr = m2_expr.ExprLoc(loc_skip, self.IRDst.size) - loc_next = self.get_next_loc_key(instr) - loc_next_expr = m2_expr.ExprLoc(loc_next, self.IRDst.size) - - fix_next_loc = {loc_next_expr: loc_end_expr} - new_extra_ir = [irblock.modify_exprs(mod_src=lambda expr: expr.replace_expr(fix_next_loc)) - for irblock in extra_ir] - - cond_bloc = [] - cond_bloc.append(m2_expr.ExprAssign(c_reg, - c_reg - m2_expr.ExprInt(1, - c_reg.size))) - cond_bloc.append(m2_expr.ExprAssign(self.IRDst, m2_expr.ExprCond(c_cond, - loc_skip_expr, - loc_do_expr))) - cond_bloc = IRBlock(loc_end, [AssignBlock(cond_bloc, instr)]) - e_do = instr_ir - - c = IRBlock(loc_do, [AssignBlock(e_do, instr)]) - e_n = [m2_expr.ExprAssign(self.IRDst, m2_expr.ExprCond(c_reg, loc_do_expr, - loc_skip_expr))] - return e_n, [cond_bloc, c] + new_extra_ir - - def expr_fix_regs_for_mode(self, e, mode=64): - return e.replace_expr(replace_regs[mode]) - - def expraff_fix_regs_for_mode(self, e, mode=64): - dst = self.expr_fix_regs_for_mode(e.dst, mode) - src = self.expr_fix_regs_for_mode(e.src, mode) - return m2_expr.ExprAssign(dst, src) - - def irbloc_fix_regs_for_mode(self, irblock, mode=64): - irs = [] - for assignblk in irblock: - new_assignblk = dict(assignblk) - for dst, src in viewitems(assignblk): - del new_assignblk[dst] - # Special case for 64 bits: - # If destination is a 32 bit reg, zero extend the 64 bit reg - if mode == 64: - if (isinstance(dst, m2_expr.ExprId) and - dst.size == 32 and - dst in replace_regs[64]): - src = src.zeroExtend(64) - dst = replace_regs[64][dst].arg - dst = self.expr_fix_regs_for_mode(dst, mode) - src = self.expr_fix_regs_for_mode(src, mode) - new_assignblk[dst] = src - irs.append(AssignBlock(new_assignblk, assignblk.instr)) - return IRBlock(irblock.loc_key, irs) - - -class ir_x86_32(ir_x86_16): - - def __init__(self, loc_db=None): - IntermediateRepresentation.__init__(self, mn_x86, 32, loc_db) - self.do_stk_segm = False - self.do_ds_segm = False - self.do_str_segm = False - self.do_all_segm = False - self.pc = EIP - self.sp = ESP - self.IRDst = m2_expr.ExprId('IRDst', 32) - self.addrsize = 32 - - -class ir_x86_64(ir_x86_16): - - def __init__(self, loc_db=None): - IntermediateRepresentation.__init__(self, mn_x86, 64, loc_db) - self.do_stk_segm = False - self.do_ds_segm = False - self.do_str_segm = False - self.do_all_segm = False - self.pc = RIP - self.sp = RSP - self.IRDst = m2_expr.ExprId('IRDst', 64) - self.addrsize = 64 - - def mod_pc(self, instr, instr_ir, extra_ir): - # fix RIP for 64 bit - pc_fixed = {self.pc: m2_expr.ExprInt(instr.offset + instr.l, 64)} - - for i, expr in enumerate(instr_ir): - dst, src = expr.dst, expr.src - if dst != self.pc: - dst = dst.replace_expr(pc_fixed) - src = src.replace_expr(pc_fixed) - instr_ir[i] = m2_expr.ExprAssign(dst, src) - - for idx, irblock in enumerate(extra_ir): - extra_ir[idx] = irblock.modify_exprs(lambda expr: expr.replace_expr(pc_fixed) \ - if expr != self.pc else expr, - lambda expr: expr.replace_expr(pc_fixed)) diff --git a/miasm2/core/__init__.py b/miasm2/core/__init__.py deleted file mode 100644 index d154134b..00000000 --- a/miasm2/core/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"Core components" diff --git a/miasm2/core/asm_ast.py b/miasm2/core/asm_ast.py deleted file mode 100644 index 69ff1f9c..00000000 --- a/miasm2/core/asm_ast.py +++ /dev/null @@ -1,93 +0,0 @@ -from builtins import int as int_types - -class AstNode(object): - """ - Ast node object - """ - def __neg__(self): - if isinstance(self, AstInt): - value = AstInt(-self.value) - else: - value = AstOp('-', self) - return value - - def __add__(self, other): - return AstOp('+', self, other) - - def __sub__(self, other): - return AstOp('-', self, other) - - def __div__(self, other): - return AstOp('/', self, other) - - def __mod__(self, other): - return AstOp('%', self, other) - - def __mul__(self, other): - return AstOp('*', self, other) - - def __lshift__(self, other): - return AstOp('<<', self, other) - - def __rshift__(self, other): - return AstOp('>>', self, other) - - def __xor__(self, other): - return AstOp('^', self, other) - - def __or__(self, other): - return AstOp('|', self, other) - - def __and__(self, other): - return AstOp('&', self, other) - - -class AstInt(AstNode): - """ - Ast integer - """ - def __init__(self, value): - self.value = value - - def __str__(self): - return "%s" % self.value - - -class AstId(AstNode): - """ - Ast Id - """ - def __init__(self, name): - self.name = name - - def __str__(self): - return "%s" % self.name - - -class AstMem(AstNode): - """ - Ast memory deref - """ - def __init__(self, ptr, size): - assert isinstance(ptr, AstNode) - assert isinstance(size, int_types) - self.ptr = ptr - self.size = size - - def __str__(self): - return "@%d[%s]" % (self.size, self.ptr) - - -class AstOp(AstNode): - """ - Ast operator - """ - def __init__(self, op, *args): - assert all(isinstance(arg, AstNode) for arg in args) - self.op = op - self.args = args - - def __str__(self): - if len(self.args) == 1: - return "(%s %s)" % (self.op, self.args[0]) - return '(' + ("%s" % self.op).join(str(x) for x in self.args) + ')' diff --git a/miasm2/core/asmblock.py b/miasm2/core/asmblock.py deleted file mode 100644 index 811cc824..00000000 --- a/miasm2/core/asmblock.py +++ /dev/null @@ -1,1629 +0,0 @@ -#-*- coding:utf-8 -*- - -from builtins import map -from builtins import range -import logging -import warnings -from collections import namedtuple -from builtins import int as int_types - -from future.utils import viewitems, viewvalues - -from miasm2.expression.expression import ExprId, ExprInt, get_expr_locs -from miasm2.expression.expression import LocKey -from miasm2.expression.simplifications import expr_simp -from miasm2.expression.modint import moduint, modint -from miasm2.core.utils import Disasm_Exception, pck -from miasm2.core.graph import DiGraph, DiGraphSimplifier, MatchGraphJoker -from miasm2.core.interval import interval -from miasm2.core.locationdb import LocationDB - - -log_asmblock = logging.getLogger("asmblock") -console_handler = logging.StreamHandler() -console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) -log_asmblock.addHandler(console_handler) -log_asmblock.setLevel(logging.WARNING) - - -def is_int(a): - return isinstance(a, (modint, moduint, int_types)) - - -class AsmRaw(object): - - def __init__(self, raw=b""): - self.raw = raw - - def __str__(self): - return repr(self.raw) - - def to_string(self, loc_db): - return str(self) - - -class asm_raw(AsmRaw): - - def __init__(self, raw=b""): - warnings.warn('DEPRECATION WARNING: use "AsmRaw" instead of "asm_raw"') - super(asm_label, self).__init__(raw) - - -class AsmConstraint(object): - c_to = "c_to" - c_next = "c_next" - - def __init__(self, loc_key, c_t=c_to): - # Sanity check - assert isinstance(loc_key, LocKey) - - self.loc_key = loc_key - self.c_t = c_t - - def get_label(self): - warnings.warn('DEPRECATION WARNING: use ".loc_key" instead of ".label"') - return self.loc_key - - def set_label(self, loc_key): - warnings.warn('DEPRECATION WARNING: use ".loc_key" instead of ".label"') - self.loc_key = loc_key - - label = property(get_label, set_label) - - def to_string(self, loc_db=None): - if loc_db is None: - return "%s:%s" % (self.c_t, self.loc_key) - else: - return "%s:%s" % ( - self.c_t, - loc_db.pretty_str(self.loc_key) - ) - - def __str__(self): - return self.to_string() - - -class asm_constraint(AsmConstraint): - - def __init__(self, loc_key, c_t=AsmConstraint.c_to): - warnings.warn('DEPRECATION WARNING: use "AsmConstraint" instead of "asm_constraint"') - super(asm_constraint, self).__init__(loc_key, c_t) - - -class AsmConstraintNext(AsmConstraint): - - def __init__(self, loc_key): - super(AsmConstraintNext, self).__init__( - loc_key, - c_t=AsmConstraint.c_next - ) - - -class asm_constraint_next(AsmConstraint): - - def __init__(self, loc_key): - warnings.warn('DEPRECATION WARNING: use "AsmConstraintNext" instead of "asm_constraint_next"') - super(asm_constraint_next, self).__init__(loc_key) - - -class AsmConstraintTo(AsmConstraint): - - def __init__(self, loc_key): - super(AsmConstraintTo, self).__init__( - loc_key, - c_t=AsmConstraint.c_to - ) - -class asm_constraint_to(AsmConstraint): - - def __init__(self, loc_key): - warnings.warn('DEPRECATION WARNING: use "AsmConstraintTo" instead of "asm_constraint_to"') - super(asm_constraint_to, self).__init__(loc_key) - - -class AsmBlock(object): - - def __init__(self, loc_key, alignment=1): - assert isinstance(loc_key, LocKey) - - self.bto = set() - self.lines = [] - self._loc_key = loc_key - self.alignment = alignment - - def get_label(self): - warnings.warn('DEPRECATION WARNING: use ".loc_key" instead of ".label"') - return self.loc_key - - loc_key = property(lambda self:self._loc_key) - label = property(get_label) - - - def to_string(self, loc_db=None): - out = [] - if loc_db is None: - out.append(str(self.loc_key)) - else: - out.append(loc_db.pretty_str(self.loc_key)) - - for instr in self.lines: - out.append(instr.to_string(loc_db)) - if self.bto: - lbls = ["->"] - for dst in self.bto: - if dst is None: - lbls.append("Unknown? ") - else: - lbls.append(dst.to_string(loc_db) + " ") - lbls = '\t'.join(sorted(lbls)) - out.append(lbls) - return '\n'.join(out) - - def __str__(self): - return self.to_string() - - def addline(self, l): - self.lines.append(l) - - def addto(self, c): - assert isinstance(self.bto, set) - self.bto.add(c) - - def split(self, loc_db, offset): - loc_key = loc_db.get_or_create_offset_location(offset) - log_asmblock.debug('split at %x', offset) - offsets = [x.offset for x in self.lines] - offset = loc_db.get_location_offset(loc_key) - if offset not in offsets: - log_asmblock.warning( - 'cannot split bloc at %X ' % offset + - 'middle instruction? default middle') - offsets.sort() - return None - new_bloc = AsmBlock(loc_key) - i = offsets.index(offset) - - self.lines, new_bloc.lines = self.lines[:i], self.lines[i:] - flow_mod_instr = self.get_flow_instr() - log_asmblock.debug('flow mod %r', flow_mod_instr) - c = AsmConstraint(loc_key, AsmConstraint.c_next) - # move dst if flowgraph modifier was in original bloc - # (usecase: split delayslot bloc) - if flow_mod_instr: - for xx in self.bto: - log_asmblock.debug('lbl %s', xx) - c_next = set( - x for x in self.bto if x.c_t == AsmConstraint.c_next - ) - c_to = [x for x in self.bto if x.c_t != AsmConstraint.c_next] - self.bto = set([c] + c_to) - new_bloc.bto = c_next - else: - new_bloc.bto = self.bto - self.bto = set([c]) - return new_bloc - - def get_range(self): - """Returns the offset hull of an AsmBlock""" - if len(self.lines): - return (self.lines[0].offset, - self.lines[-1].offset + self.lines[-1].l) - else: - return 0, 0 - - def get_offsets(self): - return [x.offset for x in self.lines] - - def add_cst(self, loc_key, constraint_type): - """ - Add constraint between current block and block at @loc_key - @loc_key: LocKey instance of constraint target - @constraint_type: AsmConstraint c_to/c_next - """ - assert isinstance(loc_key, LocKey) - c = AsmConstraint(loc_key, constraint_type) - self.bto.add(c) - - def get_flow_instr(self): - if not self.lines: - return None - for i in range(-1, -1 - self.lines[0].delayslot - 1, -1): - if not 0 <= i < len(self.lines): - return None - l = self.lines[i] - if l.splitflow() or l.breakflow(): - raise NotImplementedError('not fully functional') - - def get_subcall_instr(self): - if not self.lines: - return None - delayslot = self.lines[0].delayslot - end_index = len(self.lines) - 1 - ds_max_index = max(end_index - delayslot, 0) - for i in range(end_index, ds_max_index - 1, -1): - l = self.lines[i] - if l.is_subcall(): - return l - return None - - def get_next(self): - for constraint in self.bto: - if constraint.c_t == AsmConstraint.c_next: - return constraint.loc_key - return None - - @staticmethod - def _filter_constraint(constraints): - """Sort and filter @constraints for AsmBlock.bto - @constraints: non-empty set of AsmConstraint instance - - Always the same type -> one of the constraint - c_next and c_to -> c_next - """ - # Only one constraint - if len(constraints) == 1: - return next(iter(constraints)) - - # Constraint type -> set of corresponding constraint - cbytype = {} - for cons in constraints: - cbytype.setdefault(cons.c_t, set()).add(cons) - - # Only one type -> any constraint is OK - if len(cbytype) == 1: - return next(iter(constraints)) - - # At least 2 types -> types = {c_next, c_to} - # c_to is included in c_next - return next(iter(cbytype[AsmConstraint.c_next])) - - def fix_constraints(self): - """Fix next block constraints""" - # destination -> associated constraints - dests = {} - for constraint in self.bto: - dests.setdefault(constraint.loc_key, set()).add(constraint) - - self.bto = set( - self._filter_constraint(constraints) - for constraints in viewvalues(dests) - ) - - -class asm_bloc(object): - - def __init__(self, loc_key, alignment=1): - warnings.warn('DEPRECATION WARNING: use "AsmBlock" instead of "asm_bloc"') - super(asm_bloc, self).__init__(loc_key, alignment) - - -class AsmBlockBad(AsmBlock): - - """Stand for a *bad* ASM block (malformed, unreachable, - not disassembled, ...)""" - - - ERROR_UNKNOWN = -1 - ERROR_CANNOT_DISASM = 0 - ERROR_NULL_STARTING_BLOCK = 1 - ERROR_FORBIDDEN = 2 - ERROR_IO = 3 - - - ERROR_TYPES = { - ERROR_UNKNOWN: "Unknown error", - ERROR_CANNOT_DISASM: "Unable to disassemble", - ERROR_NULL_STARTING_BLOCK: "Null starting block", - ERROR_FORBIDDEN: "Address forbidden by dont_dis", - ERROR_IO: "IOError", - } - - def __init__(self, loc_key=None, alignment=1, errno=ERROR_UNKNOWN, *args, **kwargs): - """Instantiate an AsmBlock_bad. - @loc_key, @alignment: same as AsmBlock.__init__ - @errno: (optional) specify a error type associated with the block - """ - super(AsmBlockBad, self).__init__(loc_key, alignment, *args, **kwargs) - self._errno = errno - - errno = property(lambda self: self._errno) - - def __str__(self): - error_txt = self.ERROR_TYPES.get(self._errno, self._errno) - return "%s\n\tBad block: %s" % ( - self.loc_key, - error_txt - ) - - def addline(self, *args, **kwargs): - raise RuntimeError("An AsmBlockBad cannot have line") - - def addto(self, *args, **kwargs): - raise RuntimeError("An AsmBlockBad cannot have bto") - - def split(self, *args, **kwargs): - raise RuntimeError("An AsmBlockBad cannot be split") - - -class asm_block_bad(AsmBlockBad): - - def __init__(self, loc_key=None, alignment=1, errno=-1, *args, **kwargs): - warnings.warn('DEPRECATION WARNING: use "AsmBlockBad" instead of "asm_block_bad"') - super(asm_block_bad, self).__init__(loc_key, alignment, *args, **kwargs) - -class AsmSymbolPool(LocationDB): - """[DEPRECATED API] use 'LocationDB' instead""" - - def __init__(self, *args, **kwargs): - warnings.warn("Deprecated API, use 'LocationDB' instead") - super(AsmSymbolPool, self).__init__(*args, **kwargs) - -class asm_symbol_pool(AsmSymbolPool): - - def __init__(self): - warnings.warn('DEPRECATION WARNING: use "LocationDB" instead of "asm_symbol_pool"') - super(asm_symbol_pool, self).__init__() - - -class AsmCFG(DiGraph): - - """Directed graph standing for a ASM Control Flow Graph with: - - nodes: AsmBlock - - edges: constraints between blocks, synchronized with AsmBlock's "bto" - - Specialized the .dot export and force the relation between block to be uniq, - and associated with a constraint. - - Offer helpers on AsmCFG management, such as research by loc_key, sanity - checking and mnemonic size guessing. - """ - - # Internal structure for pending management - AsmCFGPending = namedtuple("AsmCFGPending", - ["waiter", "constraint"]) - - def __init__(self, loc_db=None, *args, **kwargs): - super(AsmCFG, self).__init__(*args, **kwargs) - # Edges -> constraint - self.edges2constraint = {} - # Expected LocKey -> set( (src, dst), constraint ) - self._pendings = {} - # Loc_Key2block built on the fly - self._loc_key_to_block = {} - # loc_db - self.loc_db = loc_db - - - def copy(self): - """Copy the current graph instance""" - graph = self.__class__(self.loc_db) - return graph + self - - - # Compatibility with old list API - def append(self, *args, **kwargs): - raise DeprecationWarning("AsmCFG is a graph, use add_node") - - def remove(self, *args, **kwargs): - raise DeprecationWarning("AsmCFG is a graph, use del_node") - - def __getitem__(self, *args, **kwargs): - raise DeprecationWarning("Order of AsmCFG elements is not reliable") - - def __contains__(self, _): - """ - DEPRECATED. Use: - - loc_key in AsmCFG.nodes() to test loc_key existence - """ - raise RuntimeError("DEPRECATED") - - def __iter__(self): - """ - DEPRECATED. Use: - - AsmCFG.blocks() to iter on blocks - - loc_key in AsmCFG.nodes() to test loc_key existence - """ - raise RuntimeError("DEPRECATED") - - def __len__(self): - """Return the number of blocks in AsmCFG""" - return len(self._nodes) - - @property - def blocks(self): - return viewvalues(self._loc_key_to_block) - - # Manage graph with associated constraints - def add_edge(self, src, dst, constraint): - """Add an edge to the graph - @src: LocKey instance, source - @dst: LocKey instance, destination - @constraint: constraint associated to this edge - """ - # Sanity check - assert isinstance(src, LocKey) - assert isinstance(dst, LocKey) - known_cst = self.edges2constraint.get((src, dst), None) - if known_cst is not None: - assert known_cst == constraint - return - - # Add the edge to src.bto if needed - block_src = self.loc_key_to_block(src) - if block_src: - if dst not in [cons.loc_key for cons in block_src.bto]: - block_src.bto.add(AsmConstraint(dst, constraint)) - - # Add edge - self.edges2constraint[(src, dst)] = constraint - super(AsmCFG, self).add_edge(src, dst) - - def add_uniq_edge(self, src, dst, constraint): - """ - Synonym for `add_edge` - """ - self.add_edge(src, dst, constraint) - - def del_edge(self, src, dst): - """Delete the edge @src->@dst and its associated constraint""" - src_blk = self.loc_key_to_block(src) - dst_blk = self.loc_key_to_block(dst) - assert src_blk is not None - assert dst_blk is not None - # Delete from src.bto - to_remove = [cons for cons in src_blk.bto if cons.loc_key == dst] - if to_remove: - assert len(to_remove) == 1 - src_blk.bto.remove(to_remove[0]) - - # Del edge - del self.edges2constraint[(src, dst)] - super(AsmCFG, self).del_edge(src, dst) - - def del_block(self, block): - super(AsmCFG, self).del_node(block.loc_key) - del self._loc_key_to_block[block.loc_key] - - - def add_node(self, node): - assert isinstance(node, LocKey) - return super(AsmCFG, self).add_node(node) - - def add_block(self, block): - """ - Add the block @block to the current instance, if it is not already in - @block: AsmBlock instance - - Edges will be created for @block.bto, if destinations are already in - this instance. If not, they will be resolved when adding these - aforementioned destinations. - `self.pendings` indicates which blocks are not yet resolved. - - """ - status = super(AsmCFG, self).add_node(block.loc_key) - - if not status: - return status - - # Update waiters - if block.loc_key in self._pendings: - for bblpend in self._pendings[block.loc_key]: - self.add_edge(bblpend.waiter.loc_key, block.loc_key, bblpend.constraint) - del self._pendings[block.loc_key] - - # Synchronize edges with block destinations - self._loc_key_to_block[block.loc_key] = block - - for constraint in block.bto: - dst = self._loc_key_to_block.get(constraint.loc_key, - None) - if dst is None: - # Block is yet unknown, add it to pendings - to_add = self.AsmCFGPending(waiter=block, - constraint=constraint.c_t) - self._pendings.setdefault(constraint.loc_key, - set()).add(to_add) - else: - # Block is already in known nodes - self.add_edge(block.loc_key, dst.loc_key, constraint.c_t) - - return status - - def merge(self, graph): - """Merge with @graph, taking in account constraints""" - # Add known blocks - for block in graph.blocks: - self.add_block(block) - # Add nodes not already in it (ie. not linked to a block) - for node in graph.nodes(): - self.add_node(node) - # -> add_edge(x, y, constraint) - for edge in graph._edges: - # May fail if there is an incompatibility in edges constraints - # between the two graphs - self.add_edge(*edge, constraint=graph.edges2constraint[edge]) - - - def node2lines(self, node): - if self.loc_db is None: - loc_key_name = node - else: - loc_key_name = self.loc_db.pretty_str(node) - yield self.DotCellDescription(text=loc_key_name, - attr={'align': 'center', - 'colspan': 2, - 'bgcolor': 'grey'}) - block = self._loc_key_to_block.get(node, None) - if block is None: - return - if isinstance(block, AsmBlockBad): - yield [ - self.DotCellDescription( - text=block.ERROR_TYPES.get(block._errno, - block._errno - ), - attr={}) - ] - return - for line in block.lines: - if self._dot_offset: - yield [self.DotCellDescription(text="%.8X" % line.offset, - attr={}), - self.DotCellDescription(text=line.to_string(self.loc_db), attr={})] - else: - yield self.DotCellDescription(text=line.to_string(self.loc_db), attr={}) - - def node_attr(self, node): - block = self._loc_key_to_block.get(node, None) - if isinstance(block, AsmBlockBad): - return {'style': 'filled', 'fillcolor': 'red'} - return {} - - def edge_attr(self, src, dst): - cst = self.edges2constraint.get((src, dst), None) - edge_color = "blue" - - if len(self.successors(src)) > 1: - if cst == AsmConstraint.c_next: - edge_color = "red" - else: - edge_color = "limegreen" - - return {"color": edge_color} - - def dot(self, offset=False): - """ - @offset: (optional) if set, add the corresponding offsets in each node - """ - self._dot_offset = offset - return super(AsmCFG, self).dot() - - # Helpers - @property - def pendings(self): - """Dictionary of loc_key -> set(AsmCFGPending instance) indicating - which loc_key are missing in the current instance. - A loc_key is missing if a block which is already in nodes has constraints - with him (thanks to its .bto) and the corresponding block is not yet in - nodes - """ - return self._pendings - - def label2block(self, loc_key): - """ - DEPRECATED: Use "loc_key_to_block" instead of "label2block" - - Return the block corresponding to loc_key @loc_key - @loc_key: LocKey instance - """ - warnings.warn('DEPRECATION WARNING: use "loc_key_to_block" instead of "label2block"') - return self.loc_key_to_block(loc_key) - - def rebuild_edges(self): - """Consider blocks '.bto' and rebuild edges according to them, ie: - - update constraint type - - add missing edge - - remove no more used edge - - This method should be called if a block's '.bto' in nodes have been - modified without notifying this instance to resynchronize edges. - """ - for block in self.blocks: - edges = [] - # Rebuild edges from bto - for constraint in block.bto: - dst = self._loc_key_to_block.get(constraint.loc_key, - None) - if dst is None: - # Missing destination, add to pendings - self._pendings.setdefault( - constraint.loc_key, - set() - ).add( - self.AsmCFGPending( - block, - constraint.c_t - ) - ) - continue - edge = (block.loc_key, dst.loc_key) - edges.append(edge) - if edge in self._edges: - # Already known edge, constraint may have changed - self.edges2constraint[edge] = constraint.c_t - else: - # An edge is missing - self.add_edge(edge[0], edge[1], constraint.c_t) - - # Remove useless edges - for succ in self.successors(block.loc_key): - edge = (block.loc_key, succ) - if edge not in edges: - self.del_edge(*edge) - - def get_bad_blocks(self): - """Iterator on AsmBlockBad elements""" - # A bad asm block is always a leaf - for loc_key in self.leaves(): - block = self._loc_key_to_block.get(loc_key, None) - if isinstance(block, AsmBlockBad): - yield block - - def get_bad_blocks_predecessors(self, strict=False): - """Iterator on loc_keys with an AsmBlockBad destination - @strict: (optional) if set, return loc_key with only bad - successors - """ - # Avoid returning the same block - done = set() - for badblock in self.get_bad_blocks(): - for predecessor in self.predecessors_iter(badblock.loc_key): - if predecessor not in done: - if (strict and - not all(isinstance(self._loc_key_to_block.get(block, None), AsmBlockBad) - for block in self.successors_iter(predecessor))): - continue - yield predecessor - done.add(predecessor) - - def getby_offset(self, offset): - """Return asmblock containing @offset""" - for block in self.blocks: - if block.lines[0].offset <= offset < \ - (block.lines[-1].offset + block.lines[-1].l): - return block - return None - - def loc_key_to_block(self, loc_key): - """ - Return the asmblock corresponding to loc_key @loc_key, None if unknown - loc_key - @loc_key: LocKey instance - """ - return self._loc_key_to_block.get(loc_key, None) - - def sanity_check(self): - """Do sanity checks on blocks' constraints: - * no pendings - * no multiple next constraint to same block - * no next constraint to self - """ - - if len(self._pendings) != 0: - raise RuntimeError( - "Some blocks are missing: %s" % list( - map( - str, - self._pendings - ) - ) - ) - - next_edges = { - edge: constraint - for edge, constraint in viewitems(self.edges2constraint) - if constraint == AsmConstraint.c_next - } - - for loc_key in self._nodes: - if loc_key not in self._loc_key_to_block: - raise RuntimeError("Not supported yet: every node must have a corresponding AsmBlock") - # No next constraint to self - if (loc_key, loc_key) in next_edges: - raise RuntimeError('Bad constraint: self in next') - - # No multiple next constraint to same block - pred_next = list(ploc_key - for (ploc_key, dloc_key) in next_edges - if dloc_key == loc_key) - - if len(pred_next) > 1: - raise RuntimeError("Too many next constraints for bloc %r" - "(%s)" % (loc_key, - pred_next)) - - def guess_blocks_size(self, mnemo): - """Asm and compute max block size - Add a 'size' and 'max_size' attribute on each block - @mnemo: metamn instance""" - for block in self.blocks: - size = 0 - for instr in block.lines: - if isinstance(instr, AsmRaw): - # for special AsmRaw, only extract len - if isinstance(instr.raw, list): - data = None - if len(instr.raw) == 0: - l = 0 - else: - l = (instr.raw[0].size // 8) * len(instr.raw) - elif isinstance(instr.raw, str): - data = instr.raw.encode() - l = len(data) - elif isinstance(instr.raw, bytes): - data = instr.raw - l = len(data) - else: - raise NotImplementedError('asm raw') - else: - # Assemble the instruction to retrieve its len. - # If the instruction uses symbol it will fail - # In this case, the max_instruction_len is used - try: - candidates = mnemo.asm(instr) - l = len(candidates[-1]) - except: - l = mnemo.max_instruction_len - data = None - instr.data = data - instr.l = l - size += l - - block.size = size - block.max_size = size - log_asmblock.info("size: %d max: %d", block.size, block.max_size) - - def apply_splitting(self, loc_db, dis_block_callback=None, **kwargs): - """Consider @self' bto destinations and split block in @self if one of - these destinations jumps in the middle of this block. - In order to work, they must be only one block in @self per loc_key in - @loc_db (which is true if @self come from the same disasmEngine). - - @loc_db: LocationDB instance associated with @self'loc_keys - @dis_block_callback: (optional) if set, this callback will be called on - new block destinations - @kwargs: (optional) named arguments to pass to dis_block_callback - """ - # Get all possible destinations not yet resolved, with a resolved - # offset - block_dst = [] - for loc_key in self.pendings: - offset = loc_db.get_location_offset(loc_key) - if offset is not None: - block_dst.append(offset) - - todo = set(self.blocks) - rebuild_needed = False - - while todo: - # Find a block with a destination inside another one - cur_block = todo.pop() - range_start, range_stop = cur_block.get_range() - - for off in block_dst: - if not (off > range_start and off < range_stop): - continue - - # `cur_block` must be split at offset `off`from miasm2.core.locationdb import LocationDB - - new_b = cur_block.split(loc_db, off) - log_asmblock.debug("Split block %x", off) - if new_b is None: - log_asmblock.error("Cannot split %x!!", off) - continue - - # Remove pending from cur_block - # Links from new_b will be generated in rebuild_edges - for dst in new_b.bto: - if dst.loc_key not in self.pendings: - continue - self.pendings[dst.loc_key] = set(pending for pending in self.pendings[dst.loc_key] - if pending.waiter != cur_block) - - # The new block destinations may need to be disassembled - if dis_block_callback: - offsets_to_dis = set( - self.loc_db.get_location_offset(constraint.loc_key) - for constraint in new_b.bto - ) - dis_block_callback(cur_bloc=new_b, - offsets_to_dis=offsets_to_dis, - loc_db=loc_db, **kwargs) - - # Update structure - rebuild_needed = True - self.add_block(new_b) - - # The new block must be considered - todo.add(new_b) - range_start, range_stop = cur_block.get_range() - - # Rebuild edges to match new blocks'bto - if rebuild_needed: - self.rebuild_edges() - - def __str__(self): - out = [] - for block in self.blocks: - out.append(str(block)) - for loc_key_a, loc_key_b in self.edges(): - out.append("%s -> %s" % (loc_key_a, loc_key_b)) - return '\n'.join(out) - - def __repr__(self): - return "<%s %s>" % (self.__class__.__name__, hex(id(self))) - -# Out of _merge_blocks to be computed only once -_acceptable_block = lambda graph, loc_key: (not isinstance(graph.loc_key_to_block(loc_key), AsmBlockBad) and - len(graph.loc_key_to_block(loc_key).lines) > 0) -_parent = MatchGraphJoker(restrict_in=False, filt=_acceptable_block) -_son = MatchGraphJoker(restrict_out=False, filt=_acceptable_block) -_expgraph = _parent >> _son - - -def _merge_blocks(dg, graph): - """Graph simplification merging AsmBlock with one and only one son with this - son if this son has one and only one parent""" - - # Blocks to ignore, because they have been removed from the graph - to_ignore = set() - - for match in _expgraph.match(graph): - - # Get matching blocks - lbl_block, lbl_succ = match[_parent], match[_son] - block = graph.loc_key_to_block(lbl_block) - succ = graph.loc_key_to_block(lbl_succ) - - # Ignore already deleted blocks - if (block in to_ignore or - succ in to_ignore): - continue - - # Remove block last instruction if needed - last_instr = block.lines[-1] - if last_instr.delayslot > 0: - # TODO: delayslot - raise RuntimeError("Not implemented yet") - - if last_instr.is_subcall(): - continue - if last_instr.breakflow() and last_instr.dstflow(): - block.lines.pop() - - # Merge block - block.lines += succ.lines - for nextb in graph.successors_iter(lbl_succ): - graph.add_edge(lbl_block, nextb, graph.edges2constraint[(lbl_succ, nextb)]) - - graph.del_block(succ) - to_ignore.add(lbl_succ) - - -bbl_simplifier = DiGraphSimplifier() -bbl_simplifier.enable_passes([_merge_blocks]) - - -def conservative_asm(mnemo, instr, symbols, conservative): - """ - Asm instruction; - Try to keep original instruction bytes if it exists - """ - candidates = mnemo.asm(instr, symbols) - if not candidates: - raise ValueError('cannot asm:%s' % str(instr)) - if not hasattr(instr, "b"): - return candidates[0], candidates - if instr.b in candidates: - return instr.b, candidates - if conservative: - for c in candidates: - if len(c) == len(instr.b): - return c, candidates - return candidates[0], candidates - - -def fix_expr_val(expr, symbols): - """Resolve an expression @expr using @symbols""" - def expr_calc(e): - if isinstance(e, ExprId): - # Example: - # toto: - # .dword label - loc_key = symbols.get_name_location(e.name) - offset = symbols.get_location_offset(loc_key) - e = ExprInt(offset, e.size) - return e - result = expr.visit(expr_calc) - result = expr_simp(result) - if not isinstance(result, ExprInt): - raise RuntimeError('Cannot resolve symbol %s' % expr) - return result - - -def fix_loc_offset(loc_db, loc_key, offset, modified): - """ - Fix the @loc_key offset to @offset. If the @offset has changed, add @loc_key - to @modified - @loc_db: current loc_db - """ - loc_offset = loc_db.get_location_offset(loc_key) - if loc_offset == offset: - return - loc_db.set_location_offset(loc_key, offset, force=True) - modified.add(loc_key) - - -class BlockChain(object): - - """Manage blocks linked with an asm_constraint_next""" - - def __init__(self, loc_db, blocks): - self.loc_db = loc_db - self.blocks = blocks - self.place() - - @property - def pinned(self): - """Return True iff at least one block is pinned""" - return self.pinned_block_idx is not None - - def _set_pinned_block_idx(self): - self.pinned_block_idx = None - for i, block in enumerate(self.blocks): - loc_key = block.loc_key - if self.loc_db.get_location_offset(loc_key) is not None: - if self.pinned_block_idx is not None: - raise ValueError("Multiples pinned block detected") - self.pinned_block_idx = i - - def place(self): - """Compute BlockChain min_offset and max_offset using pinned block and - blocks' size - """ - self._set_pinned_block_idx() - self.max_size = 0 - for block in self.blocks: - self.max_size += block.max_size + block.alignment - 1 - - # Check if chain has one block pinned - if not self.pinned: - return - - loc = self.blocks[self.pinned_block_idx].loc_key - offset_base = self.loc_db.get_location_offset(loc) - assert(offset_base % self.blocks[self.pinned_block_idx].alignment == 0) - - self.offset_min = offset_base - for block in self.blocks[:self.pinned_block_idx - 1:-1]: - self.offset_min -= block.max_size + \ - (block.alignment - block.max_size) % block.alignment - - self.offset_max = offset_base - for block in self.blocks[self.pinned_block_idx:]: - self.offset_max += block.max_size + \ - (block.alignment - block.max_size) % block.alignment - - def merge(self, chain): - """Best effort merge two block chains - Return the list of resulting blockchains""" - self.blocks += chain.blocks - self.place() - return [self] - - def fix_blocks(self, modified_loc_keys): - """Propagate a pinned to its blocks' neighbour - @modified_loc_keys: store new pinned loc_keys""" - - if not self.pinned: - raise ValueError('Trying to fix unpinned block') - - # Propagate offset to blocks before pinned block - pinned_block = self.blocks[self.pinned_block_idx] - offset = self.loc_db.get_location_offset(pinned_block.loc_key) - if offset % pinned_block.alignment != 0: - raise RuntimeError('Bad alignment') - - for block in self.blocks[:self.pinned_block_idx - 1:-1]: - new_offset = offset - block.size - new_offset = new_offset - new_offset % pinned_block.alignment - fix_loc_offset(self.loc_db, - block.loc_key, - new_offset, - modified_loc_keys) - - # Propagate offset to blocks after pinned block - offset = self.loc_db.get_location_offset(pinned_block.loc_key) + pinned_block.size - - last_block = pinned_block - for block in self.blocks[self.pinned_block_idx + 1:]: - offset += (- offset) % last_block.alignment - fix_loc_offset(self.loc_db, - block.loc_key, - offset, - modified_loc_keys) - offset += block.size - last_block = block - return modified_loc_keys - - -class BlockChainWedge(object): - - """Stand for wedges between blocks""" - - def __init__(self, loc_db, offset, size): - self.loc_db = loc_db - self.offset = offset - self.max_size = size - self.offset_min = offset - self.offset_max = offset + size - - def merge(self, chain): - """Best effort merge two block chains - Return the list of resulting blockchains""" - self.loc_db.set_location_offset(chain.blocks[0].loc_key, self.offset_max) - chain.place() - return [self, chain] - - -def group_constrained_blocks(loc_db, asmcfg): - """ - Return the BlockChains list built from grouped blocks in asmcfg linked by - asm_constraint_next - @asmcfg: an AsmCfg instance - """ - log_asmblock.info('group_constrained_blocks') - - # Group adjacent asmcfg - remaining_blocks = list(asmcfg.blocks) - known_block_chains = {} - - while remaining_blocks: - # Create a new block chain - block_list = [remaining_blocks.pop()] - - # Find sons in remainings blocks linked with a next constraint - while True: - # Get next block - next_loc_key = block_list[-1].get_next() - if next_loc_key is None or asmcfg.loc_key_to_block(next_loc_key) is None: - break - next_block = asmcfg.loc_key_to_block(next_loc_key) - - # Add the block at the end of the current chain - if next_block not in remaining_blocks: - break - block_list.append(next_block) - remaining_blocks.remove(next_block) - - # Check if son is in a known block group - if next_loc_key is not None and next_loc_key in known_block_chains: - block_list += known_block_chains[next_loc_key] - del known_block_chains[next_loc_key] - - known_block_chains[block_list[0].loc_key] = block_list - - out_block_chains = [] - for loc_key in known_block_chains: - chain = BlockChain(loc_db, known_block_chains[loc_key]) - out_block_chains.append(chain) - return out_block_chains - - -def get_blockchains_address_interval(blockChains, dst_interval): - """Compute the interval used by the pinned @blockChains - Check if the placed chains are in the @dst_interval""" - - allocated_interval = interval() - for chain in blockChains: - if not chain.pinned: - continue - chain_interval = interval([(chain.offset_min, chain.offset_max - 1)]) - if chain_interval not in dst_interval: - raise ValueError('Chain placed out of destination interval') - allocated_interval += chain_interval - return allocated_interval - - -def resolve_symbol(blockChains, loc_db, dst_interval=None): - """Place @blockChains in the @dst_interval""" - - log_asmblock.info('resolve_symbol') - if dst_interval is None: - dst_interval = interval([(0, 0xFFFFFFFFFFFFFFFF)]) - - forbidden_interval = interval( - [(-1, 0xFFFFFFFFFFFFFFFF + 1)]) - dst_interval - allocated_interval = get_blockchains_address_interval(blockChains, - dst_interval) - log_asmblock.debug('allocated interval: %s', allocated_interval) - - pinned_chains = [chain for chain in blockChains if chain.pinned] - - # Add wedge in forbidden intervals - for start, stop in forbidden_interval.intervals: - wedge = BlockChainWedge( - loc_db, offset=start, size=stop + 1 - start) - pinned_chains.append(wedge) - - # Try to place bigger blockChains first - pinned_chains.sort(key=lambda x: x.offset_min) - blockChains.sort(key=lambda x: -x.max_size) - - fixed_chains = list(pinned_chains) - - log_asmblock.debug("place chains") - for chain in blockChains: - if chain.pinned: - continue - fixed = False - for i in range(1, len(fixed_chains)): - prev_chain = fixed_chains[i - 1] - next_chain = fixed_chains[i] - - if prev_chain.offset_max + chain.max_size < next_chain.offset_min: - new_chains = prev_chain.merge(chain) - fixed_chains[i - 1:i] = new_chains - fixed = True - break - if not fixed: - raise RuntimeError('Cannot find enough space to place blocks') - - return [chain for chain in fixed_chains if isinstance(chain, BlockChain)] - - -def get_block_loc_keys(block): - """Extract loc_keys used by @block""" - symbols = set() - for instr in block.lines: - if isinstance(instr, AsmRaw): - if isinstance(instr.raw, list): - for expr in instr.raw: - symbols.update(get_expr_locs(expr)) - else: - for arg in instr.args: - symbols.update(get_expr_locs(arg)) - return symbols - - -def assemble_block(mnemo, block, loc_db, conservative=False): - """Assemble a @block using @loc_db - @conservative: (optional) use original bytes when possible - """ - offset_i = 0 - - for instr in block.lines: - if isinstance(instr, AsmRaw): - if isinstance(instr.raw, list): - # Fix special AsmRaw - data = b"" - for expr in instr.raw: - expr_int = fix_expr_val(expr, loc_db) - data += pck[expr_int.size](expr_int.arg) - instr.data = data - - instr.offset = offset_i - offset_i += instr.l - continue - - # Assemble an instruction - saved_args = list(instr.args) - instr.offset = loc_db.get_location_offset(block.loc_key) + offset_i - - # Replace instruction's arguments by resolved ones - instr.args = instr.resolve_args_with_symbols(loc_db) - - if instr.dstflow(): - instr.fixDstOffset() - - old_l = instr.l - cached_candidate, _ = conservative_asm(mnemo, instr, loc_db, - conservative) - - # Restore original arguments - instr.args = saved_args - - # We need to update the block size - block.size = block.size - old_l + len(cached_candidate) - instr.data = cached_candidate - instr.l = len(cached_candidate) - - offset_i += instr.l - - -def asmblock_final(mnemo, asmcfg, blockChains, loc_db, conservative=False): - """Resolve and assemble @blockChains using @loc_db until fixed point is - reached""" - - log_asmblock.debug("asmbloc_final") - - # Init structures - blocks_using_loc_key = {} - for block in asmcfg.blocks: - exprlocs = get_block_loc_keys(block) - loc_keys = set(expr.loc_key for expr in exprlocs) - for loc_key in loc_keys: - blocks_using_loc_key.setdefault(loc_key, set()).add(block) - - block2chain = {} - for chain in blockChains: - for block in chain.blocks: - block2chain[block] = chain - - # Init worklist - blocks_to_rework = set(asmcfg.blocks) - - # Fix and re-assemble blocks until fixed point is reached - while True: - - # Propagate pinned blocks into chains - modified_loc_keys = set() - for chain in blockChains: - chain.fix_blocks(modified_loc_keys) - - for loc_key in modified_loc_keys: - # Retrieve block with modified reference - mod_block = asmcfg.loc_key_to_block(loc_key) - if mod_block is not None: - blocks_to_rework.add(mod_block) - - # Enqueue blocks referencing a modified loc_key - if loc_key not in blocks_using_loc_key: - continue - for block in blocks_using_loc_key[loc_key]: - blocks_to_rework.add(block) - - # No more work - if not blocks_to_rework: - break - - while blocks_to_rework: - block = blocks_to_rework.pop() - assemble_block(mnemo, block, loc_db, conservative) - - -def asmbloc_final(mnemo, blocks, blockChains, loc_db, conservative=False): - """Resolve and assemble @blockChains using @loc_db until fixed point is - reached""" - - warnings.warn('DEPRECATION WARNING: use "asmblock_final" instead of "asmbloc_final"') - asmblock_final(mnemo, blocks, blockChains, loc_db, conservative) - -def asm_resolve_final(mnemo, asmcfg, loc_db, dst_interval=None): - """Resolve and assemble @asmcfg using @loc_db into interval - @dst_interval""" - - asmcfg.sanity_check() - - asmcfg.guess_blocks_size(mnemo) - blockChains = group_constrained_blocks(loc_db, asmcfg) - resolved_blockChains = resolve_symbol( - blockChains, - loc_db, - dst_interval - ) - - asmblock_final(mnemo, asmcfg, resolved_blockChains, loc_db) - patches = {} - output_interval = interval() - - for block in asmcfg.blocks: - offset = loc_db.get_location_offset(block.loc_key) - for instr in block.lines: - if not instr.data: - # Empty line - continue - assert len(instr.data) == instr.l - patches[offset] = instr.data - instruction_interval = interval([(offset, offset + instr.l - 1)]) - if not (instruction_interval & output_interval).empty: - raise RuntimeError("overlapping bytes %X" % int(offset)) - instr.offset = offset - offset += instr.l - return patches - - -class disasmEngine(object): - - """Disassembly engine, taking care of disassembler options and mutli-block - strategy. - - Engine options: - - + Object supporting membership test (offset in ..) - - dont_dis: stop the current disassembly branch if reached - - split_dis: force a basic block end if reached, - with a next constraint on its successor - - dont_dis_retcall_funcs: stop disassembly after a call to one - of the given functions - - + On/Off - - follow_call: recursively disassemble CALL destinations - - dontdis_retcall: stop on CALL return addresses - - dont_dis_nulstart_bloc: stop if a block begin with a few \x00 - - + Number - - lines_wd: maximum block's size (in number of instruction) - - blocs_wd: maximum number of distinct disassembled block - - + callback(arch, attrib, pool_bin, cur_bloc, offsets_to_dis, - loc_db) - - dis_block_callback: callback after each new disassembled block - """ - - def __init__(self, arch, attrib, bin_stream, **kwargs): - """Instantiate a new disassembly engine - @arch: targeted architecture - @attrib: architecture attribute - @bin_stream: bytes source - @kwargs: (optional) custom options - """ - self.arch = arch - self.attrib = attrib - self.bin_stream = bin_stream - self.loc_db = LocationDB() - - # Setup options - self.dont_dis = [] - self.split_dis = [] - self.follow_call = False - self.dontdis_retcall = False - self.lines_wd = None - self.blocs_wd = None - self.dis_block_callback = None - self.dont_dis_nulstart_bloc = False - self.dont_dis_retcall_funcs = set() - - # Override options if needed - self.__dict__.update(kwargs) - - def get_job_done(self): - warnings.warn("""DEPRECATION WARNING: "job_done" is not needed anymore, support is dropped.""") - return set() - - def set_job_done(self, _): - warnings.warn("""DEPRECATION WARNING: "job_done" is not needed anymore, support is dropped.""") - return - - def get_dis_bloc_callback(self): - warnings.warn("""DEPRECATION WARNING: "dis_bloc_callback" use dis_block_callback.""") - return self.dis_block_callback - - def set_dis_bloc_callback(self, function): - warnings.warn("""DEPRECATION WARNING: "dis_bloc_callback" use dis_block_callback.""") - self.dis_block_callback = function - - @property - def symbol_pool(self): - warnings.warn("""DEPRECATION WARNING: use 'loc_db'""") - return self.loc_db - - # Deprecated - job_done = property(get_job_done, set_job_done) - dis_bloc_callback = property(get_dis_bloc_callback, set_dis_bloc_callback) - - def _dis_block(self, offset, job_done=None): - """Disassemble the block at offset @offset - @job_done: a set of already disassembled addresses - Return the created AsmBlock and future offsets to disassemble - """ - - if job_done is None: - job_done = set() - lines_cpt = 0 - in_delayslot = False - delayslot_count = self.arch.delayslot - offsets_to_dis = set() - add_next_offset = False - loc_key = self.loc_db.get_or_create_offset_location(offset) - cur_block = AsmBlock(loc_key) - log_asmblock.debug("dis at %X", int(offset)) - while not in_delayslot or delayslot_count > 0: - if in_delayslot: - delayslot_count -= 1 - - if offset in self.dont_dis: - if not cur_block.lines: - job_done.add(offset) - # Block is empty -> bad block - cur_block = AsmBlockBad(loc_key, errno=AsmBlockBad.ERROR_FORBIDDEN) - else: - # Block is not empty, stop the desassembly pass and add a - # constraint to the next block - loc_key_cst = self.loc_db.get_or_create_offset_location(offset) - cur_block.add_cst(loc_key_cst, AsmConstraint.c_next) - break - - if lines_cpt > 0 and offset in self.split_dis: - loc_key_cst = self.loc_db.get_or_create_offset_location(offset) - cur_block.add_cst(loc_key_cst, AsmConstraint.c_next) - offsets_to_dis.add(offset) - break - - lines_cpt += 1 - if self.lines_wd is not None and lines_cpt > self.lines_wd: - log_asmblock.debug("lines watchdog reached at %X", int(offset)) - break - - if offset in job_done: - loc_key_cst = self.loc_db.get_or_create_offset_location(offset) - cur_block.add_cst(loc_key_cst, AsmConstraint.c_next) - break - - off_i = offset - error = None - try: - instr = self.arch.dis(self.bin_stream, self.attrib, offset) - except Disasm_Exception as e: - log_asmblock.warning(e) - instr = None - error = AsmBlockBad.ERROR_CANNOT_DISASM - except IOError as e: - log_asmblock.warning(e) - instr = None - error = AsmBlockBad.ERROR_IO - - - if instr is None: - log_asmblock.warning("cannot disasm at %X", int(off_i)) - if not cur_block.lines: - job_done.add(offset) - # Block is empty -> bad block - cur_block = AsmBlockBad(loc_key, errno=error) - else: - # Block is not empty, stop the desassembly pass and add a - # constraint to the next block - loc_key_cst = self.loc_db.get_or_create_offset_location(off_i) - cur_block.add_cst(loc_key_cst, AsmConstraint.c_next) - break - - # XXX TODO nul start block option - if (self.dont_dis_nulstart_bloc and - not cur_block.lines and - instr.b.count(b'\x00') == instr.l): - log_asmblock.warning("reach nul instr at %X", int(off_i)) - # Block is empty -> bad block - cur_block = AsmBlockBad(loc_key, errno=AsmBlockBad.ERROR_NULL_STARTING_BLOCK) - break - - # special case: flow graph modificator in delayslot - if in_delayslot and instr and (instr.splitflow() or instr.breakflow()): - add_next_offset = True - break - - job_done.add(offset) - log_asmblock.debug("dis at %X", int(offset)) - - offset += instr.l - log_asmblock.debug(instr) - log_asmblock.debug(instr.args) - - cur_block.addline(instr) - if not instr.breakflow(): - continue - # test split - if instr.splitflow() and not (instr.is_subcall() and self.dontdis_retcall): - add_next_offset = True - if instr.dstflow(): - instr.dstflow2label(self.loc_db) - destinations = instr.getdstflow(self.loc_db) - known_dsts = [] - for dst in destinations: - if not dst.is_loc(): - continue - loc_key = dst.loc_key - loc_key_offset = self.loc_db.get_location_offset(loc_key) - known_dsts.append(loc_key) - if loc_key_offset in self.dont_dis_retcall_funcs: - add_next_offset = False - if (not instr.is_subcall()) or self.follow_call: - cur_block.bto.update([AsmConstraint(loc_key, AsmConstraint.c_to) for loc_key in known_dsts]) - - # get in delayslot mode - in_delayslot = True - delayslot_count = instr.delayslot - - for c in cur_block.bto: - loc_key_offset = self.loc_db.get_location_offset(c.loc_key) - offsets_to_dis.add(loc_key_offset) - - if add_next_offset: - loc_key_cst = self.loc_db.get_or_create_offset_location(offset) - cur_block.add_cst(loc_key_cst, AsmConstraint.c_next) - offsets_to_dis.add(offset) - - # Fix multiple constraints - cur_block.fix_constraints() - - if self.dis_block_callback is not None: - self.dis_block_callback(mn=self.arch, attrib=self.attrib, - pool_bin=self.bin_stream, cur_bloc=cur_block, - offsets_to_dis=offsets_to_dis, - loc_db=self.loc_db, - # Deprecated API - symbol_pool=self.loc_db) - return cur_block, offsets_to_dis - - def dis_block(self, offset): - """Disassemble the block at offset @offset and return the created - AsmBlock - @offset: targeted offset to disassemble - """ - current_block, _ = self._dis_block(offset) - return current_block - - def dis_bloc(self, offset): - """ - DEPRECATED function - Use dis_block instead of dis_bloc - """ - warnings.warn('DEPRECATION WARNING: use "dis_block" instead of "dis_bloc"') - return self.dis_block(offset) - - def dis_multiblock(self, offset, blocks=None): - """Disassemble every block reachable from @offset regarding - specific disasmEngine conditions - Return an AsmCFG instance containing disassembled blocks - @offset: starting offset - @blocks: (optional) AsmCFG instance of already disassembled blocks to - merge with - """ - log_asmblock.info("dis bloc all") - job_done = set() - if blocks is None: - blocks = AsmCFG(self.loc_db) - todo = [offset] - - bloc_cpt = 0 - while len(todo): - bloc_cpt += 1 - if self.blocs_wd is not None and bloc_cpt > self.blocs_wd: - log_asmblock.debug("blocks watchdog reached at %X", int(offset)) - break - - target_offset = int(todo.pop(0)) - if (target_offset is None or - target_offset in job_done): - continue - cur_block, nexts = self._dis_block(target_offset, job_done) - todo += nexts - blocks.add_block(cur_block) - - blocks.apply_splitting(self.loc_db, - dis_block_callback=self.dis_block_callback, - mn=self.arch, attrib=self.attrib, - pool_bin=self.bin_stream) - return blocks - - def dis_multibloc(self, offset, blocs=None): - """ - DEPRECATED function - Use dis_multiblock instead of dis_multibloc - """ - warnings.warn('DEPRECATION WARNING: use "dis_multiblock" instead of "dis_multibloc"') - return self.dis_multiblock(offset, blocs) - - def dis_instr(self, offset): - """Disassemble one instruction at offset @offset and return the - corresponding instruction instance - @offset: targeted offset to disassemble - """ - old_lineswd = self.lines_wd - self.lines_wd = 1 - try: - block = self.dis_block(offset) - finally: - self.lines_wd = old_lineswd - - instr = block.lines[0] - return instr diff --git a/miasm2/core/bin_stream.py b/miasm2/core/bin_stream.py deleted file mode 100644 index 4977e2ae..00000000 --- a/miasm2/core/bin_stream.py +++ /dev/null @@ -1,316 +0,0 @@ -# -# Copyright (C) 2011 EADS France, Fabrice Desclaux -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# - -from builtins import str -from future.utils import PY3 - -from miasm2.core.utils import BIG_ENDIAN, LITTLE_ENDIAN -from miasm2.core.utils import upck8le, upck16le, upck32le, upck64le -from miasm2.core.utils import upck8be, upck16be, upck32be, upck64be - - -class bin_stream(object): - - # Cache must be initialized by entering atomic mode - _cache = None - CACHE_SIZE = 10000 - # By default, no atomic mode - _atomic_mode = False - - def __init__(self, *args, **kargs): - self.endianness = LITTLE_ENDIAN - - def __repr__(self): - return "<%s !!>" % self.__class__.__name__ - - def __str__(self): - if PY3: - return repr(self) - return self.__bytes__() - - def hexdump(self, offset, l): - return - - def enter_atomic_mode(self): - """Enter atomic mode. In this mode, read may be cached""" - assert not self._atomic_mode - self._atomic_mode = True - self._cache = {} - - def leave_atomic_mode(self): - """Leave atomic mode""" - assert self._atomic_mode - self._atomic_mode = False - self._cache = None - - def _getbytes(self, start, length): - return self.bin[start:start + length] - - def getbytes(self, start, l=1): - """Return the bytes from the bit stream - @start: starting offset (in byte) - @l: (optional) number of bytes to read - - Wrapper on _getbytes, with atomic mode handling. - """ - if self._atomic_mode: - val = self._cache.get((start,l), None) - if val is None: - val = self._getbytes(start, l) - self._cache[(start,l)] = val - else: - val = self._getbytes(start, l) - return val - - def getbits(self, start, n): - """Return the bits from the bit stream - @start: the offset in bits - @n: number of bits to read - """ - # Trivial case - if n == 0: - return 0 - - # Get initial bytes - if n > self.getlen() * 8: - raise IOError('not enough bits %r %r' % (n, len(self.bin) * 8)) - byte_start = start // 8 - byte_stop = (start + n + 7) // 8 - temp = self.getbytes(byte_start, byte_stop - byte_start) - if not temp: - raise IOError('cannot get bytes') - - # Init - start = start % 8 - out = 0 - while n: - # Get needed bits, working on maximum 8 bits at a time - cur_byte_idx = start // 8 - new_bits = ord(temp[cur_byte_idx:cur_byte_idx + 1]) - to_keep = 8 - start % 8 - new_bits &= (1 << to_keep) - 1 - cur_len = min(to_keep, n) - new_bits >>= (to_keep - cur_len) - - # Update output - out <<= cur_len - out |= new_bits - - # Update counters - n -= cur_len - start += cur_len - return out - - def get_u8(self, addr, endianness=None): - """ - Return u8 from address @addr - endianness: Optional: LITTLE_ENDIAN/BIG_ENDIAN - """ - if endianness is None: - endianness = self.endianness - data = self.getbytes(addr, 1) - return data - - def get_u16(self, addr, endianness=None): - """ - Return u16 from address @addr - endianness: Optional: LITTLE_ENDIAN/BIG_ENDIAN - """ - if endianness is None: - endianness = self.endianness - data = self.getbytes(addr, 2) - if endianness == LITTLE_ENDIAN: - return upck16le(data) - else: - return upck32be(data) - - def get_u32(self, addr, endianness=None): - """ - Return u32 from address @addr - endianness: Optional: LITTLE_ENDIAN/BIG_ENDIAN - """ - if endianness is None: - endianness = self.endianness - data = self.getbytes(addr, 4) - if endianness == LITTLE_ENDIAN: - return upck32le(data) - else: - return upck32be(data) - - def get_u64(self, addr, endianness=None): - """ - Return u64 from address @addr - endianness: Optional: LITTLE_ENDIAN/BIG_ENDIAN - """ - if endianness is None: - endianness = self.endianness - data = self.getbytes(addr, 8) - if endianness == LITTLE_ENDIAN: - return upck64le(data) - else: - return upck64be(data) - - -class bin_stream_str(bin_stream): - - def __init__(self, input_str=b"", offset=0, base_address=0, shift=None): - bin_stream.__init__(self) - if shift is not None: - raise DeprecationWarning("use base_address instead of shift") - self.bin = input_str - self.offset = offset - self.base_address = base_address - self.l = len(input_str) - - def _getbytes(self, start, l=1): - if start + l - self.base_address > self.l: - raise IOError("not enough bytes in str") - if start - self.base_address < 0: - raise IOError("Negative offset") - - return super(bin_stream_str, self)._getbytes(start - self.base_address, l) - - def readbs(self, l=1): - if self.offset + l - self.base_address > self.l: - raise IOError("not enough bytes in str") - if self.offset - self.base_address < 0: - raise IOError("Negative offset") - self.offset += l - return self.bin[self.offset - l - self.base_address:self.offset - self.base_address] - - def __bytes__(self): - return self.bin[self.offset - self.base_address:] - - def setoffset(self, val): - self.offset = val - - def getlen(self): - return self.l - (self.offset - self.base_address) - - -class bin_stream_file(bin_stream): - - def __init__(self, binary, offset=0, base_address=0, shift=None): - bin_stream.__init__(self) - if shift is not None: - raise DeprecationWarning("use base_address instead of shift") - self.bin = binary - self.bin.seek(0, 2) - self.base_address = base_address - self.l = self.bin.tell() - self.offset = offset - - def getoffset(self): - return self.bin.tell() + self.base_address - - def setoffset(self, val): - self.bin.seek(val - self.base_address) - offset = property(getoffset, setoffset) - - def readbs(self, l=1): - if self.offset + l - self.base_address > self.l: - raise IOError("not enough bytes in file") - if self.offset - self.base_address < 0: - raise IOError("Negative offset") - return self.bin.read(l) - - def __bytes__(self): - return self.bin.read() - - def getlen(self): - return self.l - (self.offset - self.base_address) - - -class bin_stream_container(bin_stream): - - def __init__(self, binary, offset=0): - bin_stream.__init__(self) - self.bin = binary - self.l = binary.virt.max_addr() - self.offset = offset - - def is_addr_in(self, ad): - return self.bin.virt.is_addr_in(ad) - - def getlen(self): - return self.l - - def readbs(self, l=1): - if self.offset + l > self.l: - raise IOError("not enough bytes") - if self.offset < 0: - raise IOError("Negative offset") - self.offset += l - return self.bin.virt.get(self.offset - l, self.offset) - - def _getbytes(self, start, l=1): - try: - return self.bin.virt.get(start, start + l) - except ValueError: - raise IOError("cannot get bytes") - - def __bytes__(self): - return self.bin.virt.get(self.offset, self.offset + self.l) - - def setoffset(self, val): - self.offset = val - - -class bin_stream_pe(bin_stream_container): - def __init__(self, binary, *args, **kwargs): - super(bin_stream_pe, self).__init__(binary, *args, **kwargs) - self.endianness = binary._sex - - -class bin_stream_elf(bin_stream_container): - def __init__(self, binary, *args, **kwargs): - super(bin_stream_elf, self).__init__(binary, *args, **kwargs) - self.endianness = binary.sex - - -class bin_stream_vm(bin_stream): - - def __init__(self, vm, offset=0, base_offset=0): - self.offset = offset - self.base_offset = base_offset - self.vm = vm - if self.vm.is_little_endian(): - self.endianness = LITTLE_ENDIAN - else: - self.endianness = BIG_ENDIAN - - def getlen(self): - return 0xFFFFFFFFFFFFFFFF - - def _getbytes(self, start, l=1): - try: - s = self.vm.get_mem(start + self.base_offset, l) - except: - raise IOError('cannot get mem ad', hex(start)) - return s - - def readbs(self, l=1): - try: - s = self.vm.get_mem(self.offset + self.base_offset, l) - except: - raise IOError('cannot get mem ad', hex(self.offset)) - self.offset += l - return s - - def setoffset(self, val): - self.offset = val diff --git a/miasm2/core/bin_stream_ida.py b/miasm2/core/bin_stream_ida.py deleted file mode 100644 index 44cf9367..00000000 --- a/miasm2/core/bin_stream_ida.py +++ /dev/null @@ -1,45 +0,0 @@ -from builtins import range -from idc import Byte, SegEnd -from idautils import Segments -from idaapi import is_mapped - -from miasm2.core.utils import int_to_byte -from miasm2.core.bin_stream import bin_stream_str - - -class bin_stream_ida(bin_stream_str): - """ - bin_stream implementation for IDA - - Don't generate xrange using address computation: - It can raise error on overflow 7FFFFFFF with 32 bit python - """ - def _getbytes(self, start, l=1): - out = [] - for ad in range(l): - offset = ad + start + self.base_address - if not is_mapped(offset): - raise IOError("not enough bytes") - out.append(int_to_byte(Byte(offset))) - return b''.join(out) - - def readbs(self, l=1): - if self.offset + l > self.l: - raise IOError("not enough bytes") - content = self.getbytes(self.offset) - self.offset += l - return content - - def __str__(self): - raise NotImplementedError('Not fully functional') - - def setoffset(self, val): - self.offset = val - - def getlen(self): - # Lazy version - if hasattr(self, "_getlen"): - return self._getlen - max_addr = SegEnd(list(Segments())[-1] - (self.offset - self.base_address)) - self._getlen = max_addr - return max_addr diff --git a/miasm2/core/cpu.py b/miasm2/core/cpu.py deleted file mode 100644 index c24b693d..00000000 --- a/miasm2/core/cpu.py +++ /dev/null @@ -1,1713 +0,0 @@ -#-*- coding:utf-8 -*- - -from builtins import range -import re -import struct -import logging -from collections import defaultdict - - -from future.utils import viewitems, viewvalues - -import pyparsing - -from miasm2.core.utils import decode_hex -import miasm2.expression.expression as m2_expr -from miasm2.core.bin_stream import bin_stream, bin_stream_str -from miasm2.core.utils import Disasm_Exception -from miasm2.expression.simplifications import expr_simp -from miasm2.core.locationdb import LocationDB - - -from miasm2.core.asm_ast import AstNode, AstInt, AstId, AstOp -from future.utils import with_metaclass - -log = logging.getLogger("cpuhelper") -console_handler = logging.StreamHandler() -console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) -log.addHandler(console_handler) -log.setLevel(logging.WARN) - - -class bitobj(object): - - def __init__(self, s=b""): - if not s: - bits = [] - else: - bits = [int(x) for x in bin(int(encode_hex(s), 16))[2:]] - if len(bits) % 8: - bits = [0 for x in range(8 - (len(bits) % 8))] + bits - self.bits = bits - self.offset = 0 - - def __len__(self): - return len(self.bits) - self.offset - - def getbits(self, n): - if not n: - return 0 - if n > len(self.bits) - self.offset: - raise ValueError('not enough bits %r %r' % (n, len(self.bits))) - b = self.bits[self.offset:self.offset + n] - b = int("".join(str(x) for x in b), 2) - self.offset += n - return b - - def putbits(self, b, n): - if not n: - return - bits = list(bin(b)[2:]) - bits = [int(x) for x in bits] - bits = [0 for x in range(n - len(bits))] + bits - self.bits += bits - - def tostring(self): - if len(self.bits) % 8: - raise ValueError( - 'num bits must be 8 bit aligned: %d' % len(self.bits) - ) - b = int("".join(str(x) for x in self.bits), 2) - b = "%X" % b - b = '0' * (len(self.bits) // 4 - len(b)) + b - b = decode_hex(b.encode()) - return b - - def reset(self): - self.offset = 0 - - def copy_state(self): - b = self.__class__() - b.bits = self.bits - b.offset = self.offset - return b - - -def literal_list(l): - l = l[:] - l.sort() - l = l[::-1] - o = pyparsing.Literal(l[0]) - for x in l[1:]: - o |= pyparsing.Literal(x) - return o - - -class reg_info(object): - - def __init__(self, reg_str, reg_expr): - self.str = reg_str - self.expr = reg_expr - self.parser = literal_list(reg_str).setParseAction(self.cb_parse) - - def cb_parse(self, tokens): - assert len(tokens) == 1 - i = self.str.index(tokens[0]) - reg = self.expr[i] - result = AstId(reg) - return result - - def reg2expr(self, s): - i = self.str.index(s[0]) - return self.expr[i] - - def expr2regi(self, e): - return self.expr.index(e) - - -class reg_info_dct(object): - - def __init__(self, reg_expr): - self.dct_str_inv = dict((v.name, k) for k, v in viewitems(reg_expr)) - self.dct_expr = reg_expr - self.dct_expr_inv = dict((v, k) for k, v in viewitems(reg_expr)) - reg_str = [v.name for v in viewvalues(reg_expr)] - self.parser = literal_list(reg_str).setParseAction(self.cb_parse) - - def cb_parse(self, tokens): - assert len(tokens) == 1 - i = self.dct_str_inv[tokens[0]] - reg = self.dct_expr[i] - result = AstId(reg) - return result - - def reg2expr(self, s): - i = self.dct_str_inv[s[0]] - return self.dct_expr[i] - - def expr2regi(self, e): - return self.dct_expr_inv[e] - - -def gen_reg(reg_name, sz=32): - """Gen reg expr and parser""" - reg = m2_expr.ExprId(reg_name, sz) - reginfo = reg_info([reg_name], [reg]) - return reg, reginfo - - -def gen_reg_bs(reg_name, reg_info, base_cls): - """ - Generate: - class bs_reg_name(base_cls): - reg = reg_info - - bs_reg_name = bs(l=0, cls=(bs_reg_name,)) - """ - - bs_name = "bs_%s" % reg_name - cls = type(bs_name, base_cls, {'reg': reg_info}) - - bs_obj = bs(l=0, cls=(cls,)) - - return cls, bs_obj - - -def gen_regs(rnames, env, sz=32): - regs_str = [] - regs_expr = [] - regs_init = [] - for rname in rnames: - r = m2_expr.ExprId(rname, sz) - r_init = m2_expr.ExprId(rname+'_init', sz) - regs_str.append(rname) - regs_expr.append(r) - regs_init.append(r_init) - env[rname] = r - - reginfo = reg_info(regs_str, regs_expr) - return regs_expr, regs_init, reginfo - - -LPARENTHESIS = pyparsing.Literal("(") -RPARENTHESIS = pyparsing.Literal(")") - - -def int2expr(tokens): - v = tokens[0] - return (m2_expr.ExprInt, v) - - -def parse_op(tokens): - v = tokens[0] - return (m2_expr.ExprOp, v) - - -def parse_id(tokens): - v = tokens[0] - return (m2_expr.ExprId, v) - - -def ast_parse_op(tokens): - if len(tokens) == 1: - return tokens[0] - if len(tokens) == 2: - if tokens[0] in ['-', '+', '!']: - return m2_expr.ExprOp(tokens[0], tokens[1]) - if len(tokens) == 3: - if tokens[1] == '-': - # a - b => a + (-b) - tokens[1] = '+' - tokens[2] = - tokens[2] - return m2_expr.ExprOp(tokens[1], tokens[0], tokens[2]) - tokens = tokens[::-1] - while len(tokens) >= 3: - o1, op, o2 = tokens.pop(), tokens.pop(), tokens.pop() - if op == '-': - # a - b => a + (-b) - op = '+' - o2 = - o2 - e = m2_expr.ExprOp(op, o1, o2) - tokens.append(e) - if len(tokens) != 1: - raise NotImplementedError('strange op') - return tokens[0] - - -def ast_id2expr(a): - return m2_expr.ExprId(a, 32) - - -def ast_int2expr(a): - return m2_expr.ExprInt(a, 32) - - -def neg_int(tokens): - x = -tokens[0] - return x - - -integer = pyparsing.Word(pyparsing.nums).setParseAction(lambda tokens: int(tokens[0])) -hex_word = pyparsing.Literal('0x') + pyparsing.Word(pyparsing.hexnums) -hex_int = pyparsing.Combine(hex_word).setParseAction(lambda tokens: int(tokens[0], 16)) - -# str_int = (Optional('-') + (hex_int | integer)) -str_int_pos = (hex_int | integer) -str_int_neg = (pyparsing.Suppress('-') + \ - (hex_int | integer)).setParseAction(neg_int) - -str_int = str_int_pos | str_int_neg -str_int.setParseAction(int2expr) - -logicop = pyparsing.oneOf('& | ^ >> << <<< >>>') -signop = pyparsing.oneOf('+ -') -multop = pyparsing.oneOf('* / %') -plusop = pyparsing.oneOf('+ -') - - -########################## - -def literal_list(l): - l = l[:] - l.sort() - l = l[::-1] - o = pyparsing.Literal(l[0]) - for x in l[1:]: - o |= pyparsing.Literal(x) - return o - - -def cb_int(tokens): - assert len(tokens) == 1 - integer = AstInt(tokens[0]) - return integer - - -def cb_parse_id(tokens): - assert len(tokens) == 1 - reg = tokens[0] - return AstId(reg) - - -def cb_op_not(tokens): - tokens = tokens[0] - assert len(tokens) == 2 - assert tokens[0] == "!" - result = AstOp("!", tokens[1]) - return result - - -def merge_ops(tokens, op): - args = [] - if len(tokens) >= 3: - args = [tokens.pop(0)] - i = 0 - while i < len(tokens): - op_tmp = tokens[i] - arg = tokens[i+1] - i += 2 - if op_tmp != op: - raise ValueError("Bad operator") - args.append(arg) - result = AstOp(op, *args) - return result - - -def cb_op_and(tokens): - result = merge_ops(tokens[0], "&") - return result - - -def cb_op_xor(tokens): - result = merge_ops(tokens[0], "^") - return result - - -def cb_op_sign(tokens): - assert len(tokens) == 1 - op, value = tokens[0] - return -value - - -def cb_op_div(tokens): - tokens = tokens[0] - assert len(tokens) == 3 - assert tokens[1] == "/" - result = AstOp("/", tokens[0], tokens[2]) - return result - - -def cb_op_plusminus(tokens): - tokens = tokens[0] - if len(tokens) == 3: - # binary op - assert isinstance(tokens[0], AstNode) - assert isinstance(tokens[2], AstNode) - op, args = tokens[1], [tokens[0], tokens[2]] - elif len(tokens) > 3: - args = [tokens.pop(0)] - i = 0 - while i < len(tokens): - op = tokens[i] - arg = tokens[i+1] - i += 2 - if op == '-': - arg = -arg - elif op == '+': - pass - else: - raise ValueError("Bad operator") - args.append(arg) - op = '+' - else: - raise ValueError("Parsing error") - assert all(isinstance(arg, AstNode) for arg in args) - result = AstOp(op, *args) - return result - - -def cb_op_mul(tokens): - tokens = tokens[0] - assert len(tokens) == 3 - assert isinstance(tokens[0], AstNode) - assert isinstance(tokens[2], AstNode) - - # binary op - op, args = tokens[1], [tokens[0], tokens[2]] - result = AstOp(op, *args) - return result - - -integer = pyparsing.Word(pyparsing.nums).setParseAction(lambda tokens: int(tokens[0])) -hex_word = pyparsing.Literal('0x') + pyparsing.Word(pyparsing.hexnums) -hex_int = pyparsing.Combine(hex_word).setParseAction(lambda tokens: int(tokens[0], 16)) - -str_int_pos = (hex_int | integer) - -str_int = str_int_pos -str_int.setParseAction(cb_int) - -notop = pyparsing.oneOf('!') -andop = pyparsing.oneOf('&') -orop = pyparsing.oneOf('|') -xorop = pyparsing.oneOf('^') -shiftop = pyparsing.oneOf('>> <<') -rotop = pyparsing.oneOf('<<< >>>') -signop = pyparsing.oneOf('+ -') -mulop = pyparsing.oneOf('*') -plusop = pyparsing.oneOf('+ -') -divop = pyparsing.oneOf('/') - - -variable = pyparsing.Word(pyparsing.alphas + "_$.", pyparsing.alphanums + "_") -variable.setParseAction(cb_parse_id) -operand = str_int | variable - -base_expr = pyparsing.operatorPrecedence(operand, - [(notop, 1, pyparsing.opAssoc.RIGHT, cb_op_not), - (andop, 2, pyparsing.opAssoc.RIGHT, cb_op_and), - (xorop, 2, pyparsing.opAssoc.RIGHT, cb_op_xor), - (signop, 1, pyparsing.opAssoc.RIGHT, cb_op_sign), - (mulop, 2, pyparsing.opAssoc.RIGHT, cb_op_mul), - (divop, 2, pyparsing.opAssoc.RIGHT, cb_op_div), - (plusop, 2, pyparsing.opAssoc.LEFT, cb_op_plusminus), - ]) - - -default_prio = 0x1337 - - -def isbin(s): - return re.match('[0-1]+$', s) - - -def int2bin(i, l): - s = '0' * l + bin(i)[2:] - return s[-l:] - - -def myror32(v, r): - return ((v & 0xFFFFFFFF) >> r) | ((v << (32 - r)) & 0xFFFFFFFF) - - -def myrol32(v, r): - return ((v & 0xFFFFFFFF) >> (32 - r)) | ((v << r) & 0xFFFFFFFF) - - -class bs(object): - all_new_c = {} - prio = default_prio - - def __init__(self, strbits=None, l=None, cls=None, - fname=None, order=0, flen=None, **kargs): - if fname is None: - fname = hex(id(str((strbits, l, cls, fname, order, flen, kargs)))) - if strbits is None: - strbits = "" # "X"*l - elif l is None: - l = len(strbits) - if strbits and isbin(strbits): - value = int(strbits, 2) - elif 'default_val' in kargs: - value = int(kargs['default_val'], 2) - else: - value = None - allbits = list(strbits) - allbits.reverse() - fbits = 0 - fmask = 0 - while allbits: - a = allbits.pop() - if a == " ": - continue - fbits <<= 1 - fmask <<= 1 - if a in '01': - a = int(a) - fbits |= a - fmask |= 1 - lmask = (1 << l) - 1 - # gen conditional field - if cls: - for b in cls: - if 'flen' in b.__dict__: - flen = getattr(b, 'flen') - - self.strbits = strbits - self.l = l - self.cls = cls - self.fname = fname - self.order = order - self.fbits = fbits - self.fmask = fmask - self.flen = flen - self.value = value - self.kargs = kargs - - lmask = property(lambda self:(1 << self.l) - 1) - - def __getitem__(self, item): - return getattr(self, item) - - def __repr__(self): - o = self.__class__.__name__ - if self.fname: - o += "_%s" % self.fname - o += "_%(strbits)s" % self - if self.cls: - o += '_' + '_'.join([x.__name__ for x in self.cls]) - return o - - def gen(self, parent): - c_name = 'nbsi' - if self.cls: - c_name += '_' + '_'.join([x.__name__ for x in self.cls]) - bases = list(self.cls) - else: - bases = [] - # bsi added at end of list - # used to use first function of added class - bases += [bsi] - k = c_name, tuple(bases) - if k in self.all_new_c: - new_c = self.all_new_c[k] - else: - new_c = type(c_name, tuple(bases), {}) - self.all_new_c[k] = new_c - c = new_c(parent, - self.strbits, self.l, self.cls, - self.fname, self.order, self.lmask, self.fbits, - self.fmask, self.value, self.flen, **self.kargs) - return c - - def check_fbits(self, v): - return v & self.fmask == self.fbits - - @classmethod - def flen(cls, v): - raise NotImplementedError('not fully functional') - - -class dum_arg(object): - - def __init__(self, e=None): - self.expr = e - - -class bsopt(bs): - - def ispresent(self): - return True - - -class bsi(object): - - def __init__(self, parent, strbits, l, cls, fname, order, - lmask, fbits, fmask, value, flen, **kargs): - self.parent = parent - self.strbits = strbits - self.l = l - self.cls = cls - self.fname = fname - self.order = order - self.fbits = fbits - self.fmask = fmask - self.flen = flen - self.value = value - self.kargs = kargs - self.__dict__.update(self.kargs) - - lmask = property(lambda self:(1 << self.l) - 1) - - def decode(self, v): - self.value = v & self.lmask - return True - - def encode(self): - return True - - def clone(self): - s = self.__class__(self.parent, - self.strbits, self.l, self.cls, - self.fname, self.order, self.lmask, self.fbits, - self.fmask, self.value, self.flen, **self.kargs) - s.__dict__.update(self.kargs) - if hasattr(self, 'expr'): - s.expr = self.expr - return s - - def __hash__(self): - kargs = [] - for k, v in list(viewitems(self.kargs)): - if isinstance(v, list): - v = tuple(v) - kargs.append((k, v)) - l = [self.strbits, self.l, self.cls, - self.fname, self.order, self.lmask, self.fbits, - self.fmask, self.value] # + kargs - - return hash(tuple(l)) - - -class bs_divert(object): - prio = default_prio - - def __init__(self, **kargs): - self.args = kargs - - def __getattr__(self, item): - if item in self.__dict__: - return self.__dict__[item] - elif item in self.args: - return self.args.get(item) - else: - raise AttributeError - - -class bs_name(bs_divert): - prio = 1 - - def divert(self, i, candidates): - out = [] - for cls, _, bases, dct, fields in candidates: - for new_name, value in viewitems(self.args['name']): - nfields = fields[:] - s = int2bin(value, self.args['l']) - args = dict(self.args) - args.update({'strbits': s}) - f = bs(**args) - nfields[i] = f - ndct = dict(dct) - ndct['name'] = new_name - out.append((cls, new_name, bases, ndct, nfields)) - return out - - -class bs_mod_name(bs_divert): - prio = 2 - - def divert(self, i, candidates): - out = [] - for cls, _, bases, dct, fields in candidates: - tab = self.args['mn_mod'] - if isinstance(tab, list): - tmp = {} - for j, v in enumerate(tab): - tmp[j] = v - tab = tmp - for value, new_name in viewitems(tab): - nfields = fields[:] - s = int2bin(value, self.args['l']) - args = dict(self.args) - args.update({'strbits': s}) - f = bs(**args) - nfields[i] = f - ndct = dict(dct) - ndct['name'] = self.modname(ndct['name'], value) - out.append((cls, new_name, bases, ndct, nfields)) - return out - - def modname(self, name, i): - return name + self.args['mn_mod'][i] - - -class bs_cond(bsi): - pass - - -class bs_swapargs(bs_divert): - - def divert(self, i, candidates): - out = [] - for cls, name, bases, dct, fields in candidates: - # args not permuted - ndct = dict(dct) - nfields = fields[:] - # gen fix field - f = gen_bsint(0, self.args['l'], self.args) - nfields[i] = f - out.append((cls, name, bases, ndct, nfields)) - - # args permuted - ndct = dict(dct) - nfields = fields[:] - ap = ndct['args_permut'][:] - a = ap.pop(0) - b = ap.pop(0) - ndct['args_permut'] = [b, a] + ap - # gen fix field - f = gen_bsint(1, self.args['l'], self.args) - nfields[i] = f - - out.append((cls, name, bases, ndct, nfields)) - return out - - -class m_arg(object): - - def fromstring(self, text, loc_db, parser_result=None): - if parser_result: - e, start, stop = parser_result[self.parser] - self.expr = e - return start, stop - try: - v, start, stop = next(self.parser.scanString(text)) - except StopIteration: - return None, None - arg = v[0] - expr = self.asm_ast_to_expr(arg, loc_db) - self.expr = expr - return start, stop - - def asm_ast_to_expr(self, arg, loc_db, **kwargs): - raise NotImplementedError("Virtual") - - -class m_reg(m_arg): - prio = default_prio - - @property - def parser(self): - return self.reg.parser - - def decode(self, v): - self.expr = self.reg.expr[0] - return True - - def encode(self): - return self.expr == self.reg.expr[0] - - -class reg_noarg(object): - reg_info = None - parser = None - - def fromstring(self, text, loc_db, parser_result=None): - if parser_result: - e, start, stop = parser_result[self.parser] - self.expr = e - return start, stop - try: - v, start, stop = next(self.parser.scanString(text)) - except StopIteration: - return None, None - arg = v[0] - expr = self.parses_to_expr(arg, loc_db) - self.expr = expr - return start, stop - - def decode(self, v): - v = v & self.lmask - if v >= len(self.reg_info.expr): - return False - self.expr = self.reg_info.expr[v] - return True - - def encode(self): - if not self.expr in self.reg_info.expr: - log.debug("cannot encode reg %r", self.expr) - return False - self.value = self.reg_info.expr.index(self.expr) - if self.value > self.lmask: - log.debug("cannot encode field value %x %x", - self.value, self.lmask) - return False - return True - - def check_fbits(self, v): - return v & self.fmask == self.fbits - - -class mn_prefix(object): - pass - - -def swap16(v): - return struct.unpack('H', v))[0] - - -def swap32(v): - return struct.unpack('I', v))[0] - - -def perm_inv(p): - o = [None for x in range(len(p))] - for i, x in enumerate(p): - o[x] = i - return o - - -def gen_bsint(value, l, args): - s = int2bin(value, l) - args = dict(args) - args.update({'strbits': s}) - f = bs(**args) - return f - -total_scans = 0 - - -def branch2nodes(branch, nodes=None): - if nodes is None: - nodes = [] - for k, v in viewitems(branch): - if not isinstance(v, dict): - continue - for k2 in v: - nodes.append((k, k2)) - branch2nodes(v, nodes) - - -def factor_one_bit(tree): - if isinstance(tree, set): - return tree - new_keys = defaultdict(lambda: defaultdict(dict)) - if len(tree) == 1: - return tree - for k, v in viewitems(tree): - if k == "mn": - new_keys[k] = v - continue - l, fmask, fbits, fname, flen = k - if flen is not None or l <= 1: - new_keys[k] = v - continue - cfmask = fmask >> (l - 1) - nfmask = fmask & ((1 << (l - 1)) - 1) - cfbits = fbits >> (l - 1) - nfbits = fbits & ((1 << (l - 1)) - 1) - ck = 1, cfmask, cfbits, None, flen - nk = l - 1, nfmask, nfbits, fname, flen - if nk in new_keys[ck]: - raise NotImplementedError('not fully functional') - new_keys[ck][nk] = v - for k, v in list(viewitems(new_keys)): - new_keys[k] = factor_one_bit(v) - # try factor sons - if len(new_keys) != 1: - return new_keys - subtree = next(iter(viewvalues(new_keys))) - if len(subtree) != 1: - return new_keys - if next(iter(subtree)) == 'mn': - return new_keys - - return new_keys - - -def factor_fields(tree): - if not isinstance(tree, dict): - return tree - if len(tree) != 1: - return tree - # merge - k1, v1 = next(iter(viewitems(tree))) - if k1 == "mn": - return tree - l1, fmask1, fbits1, fname1, flen1 = k1 - if fname1 is not None: - return tree - if flen1 is not None: - return tree - - if not isinstance(v1, dict): - return tree - if len(v1) != 1: - return tree - k2, v2 = next(iter(viewitems(v1))) - if k2 == "mn": - return tree - l2, fmask2, fbits2, fname2, flen2 = k2 - if fname2 is not None: - return tree - if flen2 is not None: - return tree - l = l1 + l2 - fmask = (fmask1 << l2) | fmask2 - fbits = (fbits1 << l2) | fbits2 - fname = fname2 - flen = flen2 - k = l, fmask, fbits, fname, flen - new_keys = {k: v2} - return new_keys - - -def factor_fields_all(tree): - if not isinstance(tree, dict): - return tree - new_keys = {} - for k, v in viewitems(tree): - v = factor_fields(v) - new_keys[k] = factor_fields_all(v) - return new_keys - - -def graph_tree(tree): - nodes = [] - branch2nodes(tree, nodes) - - out = """ - digraph G { - """ - for a, b in nodes: - if b == 'mn': - continue - out += "%s -> %s;\n" % (id(a), id(b)) - out += "}" - open('graph.txt', 'w').write(out) - - -def add_candidate_to_tree(tree, c): - branch = tree - for f in c.fields: - if f.l == 0: - continue - node = f.l, f.fmask, f.fbits, f.fname, f.flen - - if not node in branch: - branch[node] = {} - branch = branch[node] - if not 'mn' in branch: - branch['mn'] = set() - branch['mn'].add(c) - - -def add_candidate(bases, c): - add_candidate_to_tree(bases[0].bintree, c) - - -def getfieldby_name(fields, fname): - f = [x for x in fields if hasattr(x, 'fname') and x.fname == fname] - if len(f) != 1: - raise ValueError('more than one field with name: %s' % fname) - return f[0] - - -def getfieldindexby_name(fields, fname): - for i, f in enumerate(fields): - if hasattr(f, 'fname') and f.fname == fname: - return f, i - return None - - -class metamn(type): - - def __new__(mcs, name, bases, dct): - if name == "cls_mn" or name.startswith('mn_'): - return type.__new__(mcs, name, bases, dct) - alias = dct.get('alias', False) - - fields = bases[0].mod_fields(dct['fields']) - if not 'name' in dct: - dct["name"] = bases[0].getmn(name) - if 'args' in dct: - # special case for permuted arguments - o = [] - p = [] - for i, a in enumerate(dct['args']): - o.append((i, a)) - if a in fields: - p.append((fields.index(a), a)) - p.sort() - p = [x[1] for x in p] - p = [dct['args'].index(x) for x in p] - dct['args_permut'] = perm_inv(p) - # order fields - f_ordered = [x for x in enumerate(fields)] - f_ordered.sort(key=lambda x: (x[1].prio, x[0])) - candidates = bases[0].gen_modes(mcs, name, bases, dct, fields) - for i, fc in f_ordered: - if isinstance(fc, bs_divert): - candidates = fc.divert(i, candidates) - for cls, name, bases, dct, fields in candidates: - ndct = dict(dct) - fields = [f for f in fields if f] - ndct['fields'] = fields - ndct['mn_len'] = sum([x.l for x in fields]) - c = type.__new__(cls, name, bases, ndct) - c.alias = alias - c.check_mnemo(fields) - c.num = bases[0].num - bases[0].num += 1 - bases[0].all_mn.append(c) - mode = dct['mode'] - bases[0].all_mn_mode[mode].append(c) - bases[0].all_mn_name[c.name].append(c) - i = c() - i.init_class() - bases[0].all_mn_inst[c].append(i) - add_candidate(bases, c) - # gen byte lookup - o = "" - for f in i.fields_order: - if not isinstance(f, bsi): - raise ValueError('f is not bsi') - if f.l == 0: - continue - o += f.strbits - return c - - -class instruction(object): - __slots__ = ["name", "mode", "args", - "l", "b", "offset", "data", - "additional_info", "delayslot"] - - def __init__(self, name, mode, args, additional_info=None): - self.name = name - self.mode = mode - self.args = args - self.additional_info = additional_info - self.offset = None - self.l = None - self.b = None - - def gen_args(self, args): - out = ', '.join([str(x) for x in args]) - return out - - def __str__(self): - return self.to_string() - - def to_string(self, loc_db=None): - o = "%-10s " % self.name - args = [] - for i, arg in enumerate(self.args): - if not isinstance(arg, m2_expr.Expr): - raise ValueError('zarb arg type') - x = self.arg2str(arg, i, loc_db) - args.append(x) - o += self.gen_args(args) - return o - - def get_asm_offset(self, expr): - return m2_expr.ExprInt(self.offset, expr.size) - - def get_asm_next_offset(self, expr): - return m2_expr.ExprInt(self.offset+self.l, expr.size) - - def resolve_args_with_symbols(self, symbols=None): - if symbols is None: - symbols = LocationDB() - args_out = [] - for expr in self.args: - # try to resolve symbols using symbols (0 for default value) - loc_keys = m2_expr.get_expr_locs(expr) - fixed_expr = {} - for exprloc in loc_keys: - loc_key = exprloc.loc_key - names = symbols.get_location_names(loc_key) - # special symbols - if b'$' in names: - fixed_expr[exprloc] = self.get_asm_offset(exprloc) - continue - if b'_' in names: - fixed_expr[exprloc] = self.get_asm_next_offset(exprloc) - continue - arg_int = symbols.get_location_offset(loc_key) - if arg_int is not None: - fixed_expr[exprloc] = m2_expr.ExprInt(arg_int, exprloc.size) - continue - if not names: - raise ValueError('Unresolved symbol: %r' % exprloc) - - offset = symbols.get_location_offset(loc_key) - if offset is None: - raise ValueError( - 'The offset of loc_key "%s" cannot be determined' % names - ) - else: - # Fix symbol with its offset - size = exprloc.size - if size is None: - default_size = self.get_symbol_size(exprloc, symbols) - size = default_size - value = m2_expr.ExprInt(offset, size) - fixed_expr[exprloc] = value - - expr = expr.replace_expr(fixed_expr) - expr = expr_simp(expr) - args_out.append(expr) - return args_out - - def get_info(self, c): - return - - -class cls_mn(with_metaclass(metamn, object)): - args_symb = [] - instruction = instruction - # Block's offset alignment - alignment = 1 - - @classmethod - def guess_mnemo(cls, bs, attrib, pre_dis_info, offset): - candidates = [] - - candidates = set() - - fname_values = pre_dis_info - todo = [ - (dict(fname_values), branch, offset * 8) - for branch in list(viewitems(cls.bintree)) - ] - for fname_values, branch, offset_b in todo: - (l, fmask, fbits, fname, flen), vals = branch - - if flen is not None: - l = flen(attrib, fname_values) - if l is not None: - try: - v = cls.getbits(bs, attrib, offset_b, l) - except IOError: - # Raised if offset is out of bound - continue - offset_b += l - if v & fmask != fbits: - continue - if fname is not None and not fname in fname_values: - fname_values[fname] = v - for nb, v in viewitems(vals): - if 'mn' in nb: - candidates.update(v) - else: - todo.append((dict(fname_values), (nb, v), offset_b)) - - return [c for c in candidates] - - def reset_class(self): - for f in self.fields_order: - if f.strbits and isbin(f.strbits): - f.value = int(f.strbits, 2) - elif 'default_val' in f.kargs: - f.value = int(f.kargs['default_val'], 2) - else: - f.value = None - if f.fname: - setattr(self, f.fname, f) - - def init_class(self): - args = [] - fields_order = [] - to_decode = [] - off = 0 - for i, fc in enumerate(self.fields): - f = fc.gen(self) - f.offset = off - off += f.l - fields_order.append(f) - to_decode.append((i, f)) - - if isinstance(f, m_arg): - args.append(f) - if f.fname: - setattr(self, f.fname, f) - if hasattr(self, 'args_permut'): - args = [args[self.args_permut[i]] - for i in range(len(self.args_permut))] - to_decode.sort(key=lambda x: (x[1].order, x[0])) - to_decode = [fields_order.index(f[1]) for f in to_decode] - self.args = args - self.fields_order = fields_order - self.to_decode = to_decode - - def add_pre_dis_info(self, prefix=None): - return True - - @classmethod - def getbits(cls, bs, attrib, offset_b, l): - return bs.getbits(offset_b, l) - - @classmethod - def getbytes(cls, bs, offset, l): - return bs.getbytes(offset, l) - - @classmethod - def pre_dis(cls, v_o, attrib, offset): - return {}, v_o, attrib, offset, 0 - - def post_dis(self): - return self - - @classmethod - def check_mnemo(cls, fields): - pass - - @classmethod - def mod_fields(cls, fields): - return fields - - @classmethod - def dis(cls, bs_o, mode_o = None, offset=0): - if not isinstance(bs_o, bin_stream): - bs_o = bin_stream_str(bs_o) - - bs_o.enter_atomic_mode() - - offset_o = offset - try: - pre_dis_info, bs, mode, offset, prefix_len = cls.pre_dis( - bs_o, mode_o, offset) - except: - bs_o.leave_atomic_mode() - raise - candidates = cls.guess_mnemo(bs, mode, pre_dis_info, offset) - if not candidates: - bs_o.leave_atomic_mode() - raise Disasm_Exception('cannot disasm (guess) at %X' % offset) - - out = [] - out_c = [] - if hasattr(bs, 'getlen'): - bs_l = bs.getlen() - else: - bs_l = len(bs) - - alias = False - for c in candidates: - log.debug("*" * 40, mode, c.mode) - log.debug(c.fields) - - c = cls.all_mn_inst[c][0] - - c.reset_class() - c.mode = mode - - if not c.add_pre_dis_info(pre_dis_info): - continue - - todo = {} - getok = True - fname_values = dict(pre_dis_info) - offset_b = offset * 8 - - total_l = 0 - for i, f in enumerate(c.fields_order): - if f.flen is not None: - l = f.flen(mode, fname_values) - else: - l = f.l - if l is not None: - total_l += l - f.l = l - f.is_present = True - log.debug("FIELD %s %s %s %s", f.__class__, f.fname, - offset_b, l) - if bs_l * 8 - offset_b < l: - getok = False - break - try: - bv = cls.getbits(bs, mode, offset_b, l) - except: - bs_o.leave_atomic_mode() - raise - offset_b += l - if not f.fname in fname_values: - fname_values[f.fname] = bv - todo[i] = bv - else: - f.is_present = False - todo[i] = None - - if not getok: - continue - - c.l = prefix_len + total_l // 8 - for i in c.to_decode: - f = c.fields_order[i] - if f.is_present: - ret = f.decode(todo[i]) - if not ret: - log.debug("cannot decode %r", f) - break - - if not ret: - continue - for a in c.args: - a.expr = expr_simp(a.expr) - - c.b = cls.getbytes(bs, offset_o, c.l) - c.offset = offset_o - c = c.post_dis() - if c is None: - continue - c_args = [a.expr for a in c.args] - instr = cls.instruction(c.name, mode, c_args, - additional_info=c.additional_info()) - instr.l = prefix_len + total_l // 8 - instr.b = cls.getbytes(bs, offset_o, instr.l) - instr.offset = offset_o - instr.get_info(c) - if c.alias: - alias = True - out.append(instr) - out_c.append(c) - - bs_o.leave_atomic_mode() - - if not out: - raise Disasm_Exception('cannot disasm at %X' % offset_o) - if len(out) != 1: - if not alias: - log.warning('dis multiple args ret default') - - for i, o in enumerate(out_c): - if o.alias: - return out[i] - raise NotImplementedError( - 'Multiple disas: \n' + - "\n".join(str(x) for x in out) - ) - return out[0] - - @classmethod - def fromstring(cls, text, loc_db, mode = None): - global total_scans - name = re.search('(\S+)', text).groups() - if not name: - raise ValueError('cannot find name', text) - name = name[0] - - if not name in cls.all_mn_name: - raise ValueError('unknown name', name) - clist = [x for x in cls.all_mn_name[name]] - out = [] - out_args = [] - parsers = defaultdict(dict) - - for cc in clist: - for c in cls.get_cls_instance(cc, mode): - args_expr = [] - args_str = text[len(name):].strip(' ') - - start = 0 - cannot_parse = False - len_o = len(args_str) - - for i, f in enumerate(c.args): - start_i = len_o - len(args_str) - if type(f.parser) == tuple: - parser = f.parser - else: - parser = (f.parser,) - for p in parser: - if p in parsers[(i, start_i)]: - continue - try: - total_scans += 1 - v, start, stop = next(p.scanString(args_str)) - except StopIteration: - v, start, stop = [None], None, None - if start != 0: - v, start, stop = [None], None, None - if v != [None]: - v = f.asm_ast_to_expr(v[0], loc_db) - if v is None: - v, start, stop = [None], None, None - parsers[(i, start_i)][p] = v, start, stop - start, stop = f.fromstring(args_str, loc_db, parsers[(i, start_i)]) - if start != 0: - log.debug("cannot fromstring %r", args_str) - cannot_parse = True - break - if f.expr is None: - raise NotImplementedError('not fully functional') - f.expr = expr_simp(f.expr) - args_expr.append(f.expr) - args_str = args_str[stop:].strip(' ') - if args_str.startswith(','): - args_str = args_str[1:] - args_str = args_str.strip(' ') - if args_str: - cannot_parse = True - if cannot_parse: - continue - - out.append(c) - out_args.append(args_expr) - break - - if len(out) == 0: - raise ValueError('cannot fromstring %r' % text) - if len(out) != 1: - log.debug('fromstring multiple args ret default') - c = out[0] - c_args = out_args[0] - - instr = cls.instruction(c.name, mode, c_args, - additional_info=c.additional_info()) - return instr - - def dup_info(self, infos): - return - - @classmethod - def get_cls_instance(cls, cc, mode, infos=None): - c = cls.all_mn_inst[cc][0] - - c.reset_class() - c.add_pre_dis_info() - c.dup_info(infos) - - c.mode = mode - yield c - - @classmethod - def asm(cls, instr, symbols=None): - """ - Re asm instruction by searching mnemo using name and args. We then - can modify args and get the hex of a modified instruction - """ - clist = cls.all_mn_name[instr.name] - clist = [x for x in clist] - vals = [] - candidates = [] - args = instr.resolve_args_with_symbols(symbols) - - for cc in clist: - - for c in cls.get_cls_instance( - cc, instr.mode, instr.additional_info): - - cannot_parse = False - if len(c.args) != len(instr.args): - continue - - # only fix args expr - for i in range(len(c.args)): - c.args[i].expr = args[i] - - v = c.value(instr.mode) - if not v: - log.debug("cannot encode %r", c) - cannot_parse = True - if cannot_parse: - continue - vals += v - candidates.append((c, v)) - if len(vals) == 0: - raise ValueError( - 'cannot asm %r %r' % - (instr.name, [str(x) for x in instr.args]) - ) - if len(vals) != 1: - log.debug('asm multiple args ret default') - - vals = cls.filter_asm_candidates(instr, candidates) - return vals - - @classmethod - def filter_asm_candidates(cls, instr, candidates): - o = [] - for _, v in candidates: - o += v - o.sort(key=len) - return o - - def value(self, mode): - todo = [(0, 0, [(x, self.fields_order[x]) for x in self.to_decode[::-1]])] - - result = [] - done = [] - - while todo: - index, cur_len, to_decode = todo.pop() - # TEST XXX - for _, f in to_decode: - setattr(self, f.fname, f) - if (index, [x[1].value for x in to_decode]) in done: - continue - done.append((index, [x[1].value for x in to_decode])) - - can_encode = True - for i, f in to_decode[index:]: - f.parent.l = cur_len - ret = f.encode() - if not ret: - log.debug('cannot encode %r', f) - can_encode = False - break - - if f.value is not None and f.l: - assert f.value <= f.lmask - cur_len += f.l - index += 1 - if ret is True: - continue - - for _ in ret: - o = [] - if ((index, cur_len, [xx[1].value for xx in to_decode]) in todo or - (index, cur_len, [xx[1].value for xx in to_decode]) in done): - raise NotImplementedError('not fully functional') - - for p, f in to_decode: - fnew = f.clone() - o.append((p, fnew)) - todo.append((index, cur_len, o)) - can_encode = False - - break - if not can_encode: - continue - result.append(to_decode) - - return self.decoded2bytes(result) - - def encodefields(self, decoded): - bits = bitobj() - for _, f in decoded: - setattr(self, f.fname, f) - - if f.value is None: - continue - bits.putbits(f.value, f.l) - - return bits.tostring() - - def decoded2bytes(self, result): - if not result: - return [] - - out = [] - for decoded in result: - decoded.sort() - - o = self.encodefields(decoded) - if o is None: - continue - out.append(o) - out = list(set(out)) - return out - - def gen_args(self, args): - out = ', '.join([str(x) for x in args]) - return out - - def args2str(self): - args = [] - for arg in self.args: - # XXX todo test - if not (isinstance(arg, m2_expr.Expr) or - isinstance(arg.expr, m2_expr.Expr)): - raise ValueError('zarb arg type') - x = str(arg) - args.append(x) - return args - - def __str__(self): - o = "%-10s " % self.name - args = [] - for arg in self.args: - # XXX todo test - if not (isinstance(arg, m2_expr.Expr) or - isinstance(arg.expr, m2_expr.Expr)): - raise ValueError('zarb arg type') - x = str(arg) - args.append(x) - - o += self.gen_args(args) - return o - - def parse_prefix(self, v): - return 0 - - def set_dst_symbol(self, loc_db): - dst = self.getdstflow(loc_db) - args = [] - for d in dst: - if isinstance(d, m2_expr.ExprInt): - l = loc_db.get_or_create_offset_location(int(d)) - - a = m2_expr.ExprId(l.name, d.size) - else: - a = d - args.append(a) - self.args_symb = args - - def getdstflow(self, loc_db): - return [self.args[0].expr] - - -class imm_noarg(object): - intsize = 32 - intmask = (1 << intsize) - 1 - - def int2expr(self, v): - if (v & ~self.intmask) != 0: - return None - return m2_expr.ExprInt(v, self.intsize) - - def expr2int(self, e): - if not isinstance(e, m2_expr.ExprInt): - return None - v = int(e) - if v & ~self.intmask != 0: - return None - return v - - def fromstring(self, text, loc_db, parser_result=None): - if parser_result: - e, start, stop = parser_result[self.parser] - else: - try: - e, start, stop = next(self.parser.scanString(text)) - except StopIteration: - return None, None - if e == [None]: - return None, None - - assert(isinstance(e, m2_expr.Expr)) - if isinstance(e, tuple): - self.expr = self.int2expr(e[1]) - elif isinstance(e, m2_expr.Expr): - self.expr = e - else: - raise TypeError('zarb expr') - if self.expr is None: - log.debug('cannot fromstring int %r', text) - return None, None - return start, stop - - def decodeval(self, v): - return v - - def encodeval(self, v): - if v > self.lmask: - return False - return v - - def decode(self, v): - v = v & self.lmask - v = self.decodeval(v) - e = self.int2expr(v) - if not e: - return False - self.expr = e - return True - - def encode(self): - v = self.expr2int(self.expr) - if v is None: - return False - v = self.encodeval(v) - if v is False: - return False - if v > self.lmask: - return False - self.value = v - return True - - -class imm08_noarg(object): - int2expr = lambda self, x: m2_expr.ExprInt(x, 8) - - -class imm16_noarg(object): - int2expr = lambda self, x: m2_expr.ExprInt(x, 16) - - -class imm32_noarg(object): - int2expr = lambda self, x: m2_expr.ExprInt(x, 32) - - -class imm64_noarg(object): - int2expr = lambda self, x: m2_expr.ExprInt(x, 64) - - -class int32_noarg(imm_noarg): - intsize = 32 - intmask = (1 << intsize) - 1 - - def decode(self, v): - v = sign_ext(v, self.l, self.intsize) - v = self.decodeval(v) - self.expr = self.int2expr(v) - return True - - def encode(self): - if not isinstance(self.expr, m2_expr.ExprInt): - return False - v = int(self.expr) - if sign_ext(v & self.lmask, self.l, self.intsize) != v: - return False - v = self.encodeval(v & self.lmask) - if v is False: - return False - self.value = v & self.lmask - return True - -class bs8(bs): - prio = default_prio - - def __init__(self, v, cls=None, fname=None, **kargs): - super(bs8, self).__init__(int2bin(v, 8), 8, - cls=cls, fname=fname, **kargs) - - - - -def swap_uint(size, i): - if size == 8: - return i & 0xff - elif size == 16: - return struct.unpack('H', i & 0xffff))[0] - elif size == 32: - return struct.unpack('I', i & 0xffffffff))[0] - elif size == 64: - return struct.unpack('Q', i & 0xffffffffffffffff))[0] - raise ValueError('unknown int len %r' % size) - - -def swap_sint(size, i): - if size == 8: - return i - elif size == 16: - return struct.unpack('H', i & 0xffff))[0] - elif size == 32: - return struct.unpack('I', i & 0xffffffff))[0] - elif size == 64: - return struct.unpack('Q', i & 0xffffffffffffffff))[0] - raise ValueError('unknown int len %r' % size) - - -def sign_ext(v, s_in, s_out): - assert(s_in <= s_out) - v &= (1 << s_in) - 1 - sign_in = v & (1 << (s_in - 1)) - if not sign_in: - return v - m = (1 << (s_out)) - 1 - m ^= (1 << s_in) - 1 - v |= m - return v diff --git a/miasm2/core/ctypesmngr.py b/miasm2/core/ctypesmngr.py deleted file mode 100644 index 94c96f7e..00000000 --- a/miasm2/core/ctypesmngr.py +++ /dev/null @@ -1,771 +0,0 @@ -import re - -from pycparser import c_parser, c_ast - -RE_HASH_CMT = re.compile(r'^#\s*\d+.*$', flags=re.MULTILINE) - -# Ref: ISO/IEC 9899:TC2 -# http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1124.pdf - - -def c_to_ast(parser, c_str): - """Transform a @c_str into a C ast - Note: will ignore lines containing code refs ie: - # 23 "miasm.h" - - @parser: pycparser instance - @c_str: c string - """ - - new_str = re.sub(RE_HASH_CMT, "", c_str) - return parser.parse(new_str, filename='') - - -class CTypeBase(object): - """Object to represent the 3 forms of C type: - * object types - * function types - * incomplete types - """ - - def __init__(self): - self.__repr = str(self) - self.__hash = hash(self.__repr) - - @property - def _typerepr(self): - return self.__repr - - def __eq__(self, other): - raise NotImplementedError("Abstract method") - - def __ne__(self, other): - return not self.__eq__(other) - - def eq_base(self, other): - """Trivial common equality test""" - return self.__class__ == other.__class__ - - def __hash__(self): - return self.__hash - - def __repr__(self): - return self._typerepr - - -class CTypeId(CTypeBase): - """C type id: - int - unsigned int - """ - - def __init__(self, *names): - # Type specifier order does not matter - # so the canonical form is ordered - self.names = tuple(sorted(names)) - super(CTypeId, self).__init__() - - def __hash__(self): - return hash((self.__class__, self.names)) - - def __eq__(self, other): - return (self.eq_base(other) and - self.names == other.names) - - def __ne__(self, other): - return not self.__eq__(other) - - def __str__(self): - return "" % ', '.join(self.names) - - -class CTypeArray(CTypeBase): - """C type for array: - typedef int XXX[4]; - """ - - def __init__(self, target, size): - assert isinstance(target, CTypeBase) - self.target = target - self.size = size - super(CTypeArray, self).__init__() - - def __hash__(self): - return hash((self.__class__, self.target, self.size)) - - def __eq__(self, other): - return (self.eq_base(other) and - self.target == other.target and - self.size == other.size) - - def __ne__(self, other): - return not self.__eq__(other) - - def __str__(self): - return "" % (self.size, str(self.target)) - - -class CTypePtr(CTypeBase): - """C type for pointer: - typedef int* XXX; - """ - - def __init__(self, target): - assert isinstance(target, CTypeBase) - self.target = target - super(CTypePtr, self).__init__() - - def __hash__(self): - return hash((self.__class__, self.target)) - - def __eq__(self, other): - return (self.eq_base(other) and - self.target == other.target) - - def __ne__(self, other): - return not self.__eq__(other) - - def __str__(self): - return "" % str(self.target) - - -class CTypeStruct(CTypeBase): - """C type for structure""" - - def __init__(self, name, fields=None): - assert name is not None - self.name = name - if fields is None: - fields = () - for field_name, field in fields: - assert field_name is not None - assert isinstance(field, CTypeBase) - self.fields = tuple(fields) - super(CTypeStruct, self).__init__() - - def __hash__(self): - return hash((self.__class__, self.name, self.fields)) - - def __eq__(self, other): - return (self.eq_base(other) and - self.name == other.name and - self.fields == other.fields) - - def __ne__(self, other): - return not self.__eq__(other) - - def __str__(self): - out = [] - out.append("" % self.name) - for name, field in self.fields: - out.append("\t%-10s %s" % (name, field)) - return '\n'.join(out) - - -class CTypeUnion(CTypeBase): - """C type for union""" - - def __init__(self, name, fields=None): - assert name is not None - self.name = name - if fields is None: - fields = [] - for field_name, field in fields: - assert field_name is not None - assert isinstance(field, CTypeBase) - self.fields = tuple(fields) - super(CTypeUnion, self).__init__() - - def __hash__(self): - return hash((self.__class__, self.name, self.fields)) - - def __eq__(self, other): - return (self.eq_base(other) and - self.name == other.name and - self.fields == other.fields) - - def __str__(self): - out = [] - out.append("" % self.name) - for name, field in self.fields: - out.append("\t%-10s %s" % (name, field)) - return '\n'.join(out) - - -class CTypeEnum(CTypeBase): - """C type for enums""" - - def __init__(self, name): - self.name = name - super(CTypeEnum, self).__init__() - - def __hash__(self): - return hash((self.__class__, self.name)) - - def __eq__(self, other): - return (self.eq_base(other) and - self.name == other.name) - - def __ne__(self, other): - return not self.__eq__(other) - - def __str__(self): - return "" % self.name - - -class CTypeFunc(CTypeBase): - """C type for enums""" - - def __init__(self, name, abi=None, type_ret=None, args=None): - if type_ret: - assert isinstance(type_ret, CTypeBase) - if args: - for arg_name, arg in args: - assert isinstance(arg, CTypeBase) - args = tuple(args) - else: - args = tuple() - self.name = name - self.abi = abi - self.type_ret = type_ret - self.args = args - super(CTypeFunc, self).__init__() - - def __hash__(self): - return hash((self.__class__, self.name, self.abi, - self.type_ret, self.args)) - - def __eq__(self, other): - return (self.eq_base(other) and - self.name == other.name and - self.abi == other.abi and - self.type_ret == other.type_ret and - self.args == other.args) - - def __ne__(self, other): - return not self.__eq__(other) - - def __str__(self): - return "" % (self.type_ret, - self.abi, - self.name, - ", ".join(["%s %s" % (name, arg) for (name, arg) in self.args])) - - -class CTypeEllipsis(CTypeBase): - """C type for ellipsis argument (...)""" - - def __hash__(self): - return hash((self.__class__)) - - def __eq__(self, other): - return self.eq_base(other) - - def __ne__(self, other): - return not self.__eq__(other) - - def __str__(self): - return "" - - -class CTypeSizeof(CTypeBase): - """C type for sizeof""" - - def __init__(self, target): - self.target = target - super(CTypeSizeof, self).__init__() - - def __hash__(self): - return hash((self.__class__, self.target)) - - def __eq__(self, other): - return (self.eq_base(other) and - self.target == other.target) - - def __ne__(self, other): - return not self.__eq__(other) - - def __str__(self): - return "" % self.target - - -class CTypeOp(CTypeBase): - """C type for operator (+ * ...)""" - - def __init__(self, operator, *args): - self.operator = operator - self.args = tuple(args) - super(CTypeOp, self).__init__() - - def __hash__(self): - return hash((self.__class__, self.operator, self.args)) - - def __eq__(self, other): - return (self.eq_base(other) and - self.operator == other.operator and - self.args == other.args) - - def __str__(self): - return "" % (self.operator, - ', '.join([str(arg) for arg in self.args])) - - -class FuncNameIdentifier(c_ast.NodeVisitor): - """Visit an c_ast to find IdentifierType""" - - def __init__(self): - super(FuncNameIdentifier, self).__init__() - self.node_name = None - - def visit_TypeDecl(self, node): - """Retrieve the name in a function declaration: - Only one IdentifierType is present""" - self.node_name = node - - -class CAstTypes(object): - """Store all defined C types and typedefs""" - INTERNAL_PREFIX = "__GENTYPE__" - ANONYMOUS_PREFIX = "__ANONYMOUS__" - - def __init__(self, knowntypes=None, knowntypedefs=None): - if knowntypes is None: - knowntypes = {} - if knowntypedefs is None: - knowntypedefs = {} - - self._types = dict(knowntypes) - self._typedefs = dict(knowntypedefs) - self.cpt = 0 - self.loc_to_decl_info = {} - self.parser = c_parser.CParser() - self._cpt_decl = 0 - - - self.ast_to_typeid_rules = { - c_ast.Struct: self.ast_to_typeid_struct, - c_ast.Union: self.ast_to_typeid_union, - c_ast.IdentifierType: self.ast_to_typeid_identifiertype, - c_ast.TypeDecl: self.ast_to_typeid_typedecl, - c_ast.Decl: self.ast_to_typeid_decl, - c_ast.Typename: self.ast_to_typeid_typename, - c_ast.FuncDecl: self.ast_to_typeid_funcdecl, - c_ast.Enum: self.ast_to_typeid_enum, - c_ast.PtrDecl: self.ast_to_typeid_ptrdecl, - c_ast.EllipsisParam: self.ast_to_typeid_ellipsisparam, - c_ast.ArrayDecl: self.ast_to_typeid_arraydecl, - } - - self.ast_parse_rules = { - c_ast.Struct: self.ast_parse_struct, - c_ast.Union: self.ast_parse_union, - c_ast.Typedef: self.ast_parse_typedef, - c_ast.TypeDecl: self.ast_parse_typedecl, - c_ast.IdentifierType: self.ast_parse_identifiertype, - c_ast.Decl: self.ast_parse_decl, - c_ast.PtrDecl: self.ast_parse_ptrdecl, - c_ast.Enum: self.ast_parse_enum, - c_ast.ArrayDecl: self.ast_parse_arraydecl, - c_ast.FuncDecl: self.ast_parse_funcdecl, - c_ast.FuncDef: self.ast_parse_funcdef, - c_ast.Pragma: self.ast_parse_pragma, - } - - def gen_uniq_name(self): - """Generate uniq name for unnamed strucs/union""" - cpt = self.cpt - self.cpt += 1 - return self.INTERNAL_PREFIX + "%d" % cpt - - def gen_anon_name(self): - """Generate name for anonymous strucs/union""" - cpt = self.cpt - self.cpt += 1 - return self.ANONYMOUS_PREFIX + "%d" % cpt - - def is_generated_name(self, name): - """Return True if the name is internal""" - return name.startswith(self.INTERNAL_PREFIX) - - def is_anonymous_name(self, name): - """Return True if the name is anonymous""" - return name.startswith(self.ANONYMOUS_PREFIX) - - def add_type(self, type_id, type_obj): - """Add new C type - @type_id: Type descriptor (CTypeBase instance) - @type_obj: Obj* instance""" - assert isinstance(type_id, CTypeBase) - if type_id in self._types: - assert self._types[type_id] == type_obj - else: - self._types[type_id] = type_obj - - def add_typedef(self, type_new, type_src): - """Add new typedef - @type_new: CTypeBase instance of the new type name - @type_src: CTypeBase instance of the target type""" - assert isinstance(type_src, CTypeBase) - self._typedefs[type_new] = type_src - - def get_type(self, type_id): - """Get ObjC corresponding to the @type_id - @type_id: Type descriptor (CTypeBase instance) - """ - assert isinstance(type_id, CTypeBase) - if isinstance(type_id, CTypePtr): - subobj = self.get_type(type_id.target) - return CTypePtr(subobj) - if type_id in self._types: - return self._types[type_id] - elif type_id in self._typedefs: - return self.get_type(self._typedefs[type_id]) - return type_id - - def is_known_type(self, type_id): - """Return true if @type_id is known - @type_id: Type descriptor (CTypeBase instance) - """ - if isinstance(type_id, CTypePtr): - return self.is_known_type(type_id.target) - if type_id in self._types: - return True - if type_id in self._typedefs: - return self.is_known_type(self._typedefs[type_id]) - return False - - def add_c_decl_from_ast(self, ast): - """ - Adds types from a C ast - @ast: C ast - """ - self.ast_parse_declarations(ast) - - - def digest_decl(self, c_str): - - char_id = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_" - - - # Seek deck - index_decl = [] - index = 0 - for decl in ['__cdecl__', '__stdcall__']: - index = 0 - while True: - index = c_str.find(decl, index) - if index == -1: - break - decl_off = index - decl_len = len(decl) - - index = index+len(decl) - while c_str[index] not in char_id: - index += 1 - - id_start = index - - while c_str[index] in char_id: - index += 1 - id_stop = index - - name = c_str[id_start:id_stop] - index_decl.append((decl_off, decl_len, id_start, id_stop, decl, )) - - index_decl.sort() - - # Remove decl - off = 0 - offsets = [] - for decl_off, decl_len, id_start, id_stop, decl in index_decl: - decl_off -= off - c_str = c_str[:decl_off] + c_str[decl_off+decl_len:] - off += decl_len - offsets.append((id_start-off, id_stop-off, decl)) - - index = 0 - lineno = 1 - - # Index to lineno, column - for id_start, id_stop, decl in offsets: - nbr = c_str.count('\n', index, id_start) - lineno += nbr - last_cr = c_str.rfind('\n', 0, id_start) - # column starts at 1 - column = id_start - last_cr - index = id_start - self.loc_to_decl_info[(lineno, column)] = decl - return c_str - - - def add_c_decl(self, c_str): - """ - Adds types from a C string types declaring - Note: will ignore lines containing code refs ie: - '# 23 "miasm.h"' - Returns the C ast - @c_str: C string containing C types declarations - """ - c_str = self.digest_decl(c_str) - - ast = c_to_ast(self.parser, c_str) - self.add_c_decl_from_ast(ast) - - return ast - - def ast_eval_int(self, ast): - """Eval a C ast object integer - - @ast: parsed pycparser.c_ast object - """ - - if isinstance(ast, c_ast.BinaryOp): - left = self.ast_eval_int(ast.left) - right = self.ast_eval_int(ast.right) - is_pure_int = (isinstance(left, int) and - isinstance(right, int)) - - if is_pure_int: - if ast.op == '*': - result = left * right - elif ast.op == '/': - assert left % right == 0 - result = left // right - elif ast.op == '+': - result = left + right - elif ast.op == '-': - result = left - right - elif ast.op == '<<': - result = left << right - elif ast.op == '>>': - result = left >> right - else: - raise NotImplementedError("Not implemented!") - else: - result = CTypeOp(ast.op, left, right) - - elif isinstance(ast, c_ast.UnaryOp): - if ast.op == 'sizeof' and isinstance(ast.expr, c_ast.Typename): - subobj = self.ast_to_typeid(ast.expr) - result = CTypeSizeof(subobj) - else: - raise NotImplementedError("Not implemented!") - - elif isinstance(ast, c_ast.Constant): - result = int(ast.value, 0) - elif isinstance(ast, c_ast.Cast): - # TODO: Can trunc integers? - result = self.ast_eval_int(ast.expr) - else: - raise NotImplementedError("Not implemented!") - return result - - def ast_to_typeid_struct(self, ast): - """Return the CTypeBase of an Struct ast""" - name = self.gen_uniq_name() if ast.name is None else ast.name - args = [] - if ast.decls: - for arg in ast.decls: - if arg.name is None: - arg_name = self.gen_anon_name() - else: - arg_name = arg.name - args.append((arg_name, self.ast_to_typeid(arg))) - decl = CTypeStruct(name, args) - return decl - - def ast_to_typeid_union(self, ast): - """Return the CTypeBase of an Union ast""" - name = self.gen_uniq_name() if ast.name is None else ast.name - args = [] - if ast.decls: - for arg in ast.decls: - if arg.name is None: - arg_name = self.gen_anon_name() - else: - arg_name = arg.name - args.append((arg_name, self.ast_to_typeid(arg))) - decl = CTypeUnion(name, args) - return decl - - def ast_to_typeid_identifiertype(self, ast): - """Return the CTypeBase of an IdentifierType ast""" - return CTypeId(*ast.names) - - def ast_to_typeid_typedecl(self, ast): - """Return the CTypeBase of a TypeDecl ast""" - return self.ast_to_typeid(ast.type) - - def ast_to_typeid_decl(self, ast): - """Return the CTypeBase of a Decl ast""" - return self.ast_to_typeid(ast.type) - - def ast_to_typeid_typename(self, ast): - """Return the CTypeBase of a TypeName ast""" - return self.ast_to_typeid(ast.type) - - def get_funcname(self, ast): - """Return the name of a function declaration ast""" - funcnameid = FuncNameIdentifier() - funcnameid.visit(ast) - node_name = funcnameid.node_name - if node_name.coord is not None: - lineno, column = node_name.coord.line, node_name.coord.column - decl_info = self.loc_to_decl_info.get((lineno, column), None) - else: - decl_info = None - return node_name.declname, decl_info - - def ast_to_typeid_funcdecl(self, ast): - """Return the CTypeBase of an FuncDecl ast""" - type_ret = self.ast_to_typeid(ast.type) - name, decl_info = self.get_funcname(ast.type) - if ast.args: - args = [] - for arg in ast.args.params: - typeid = self.ast_to_typeid(arg) - if isinstance(typeid, CTypeEllipsis): - arg_name = None - else: - arg_name = arg.name - args.append((arg_name, typeid)) - else: - args = [] - - obj = CTypeFunc(name, decl_info, type_ret, args) - decl = CTypeFunc(name) - if not self.is_known_type(decl): - self.add_type(decl, obj) - return obj - - def ast_to_typeid_enum(self, ast): - """Return the CTypeBase of an Enum ast""" - name = self.gen_uniq_name() if ast.name is None else ast.name - return CTypeEnum(name) - - def ast_to_typeid_ptrdecl(self, ast): - """Return the CTypeBase of a PtrDecl ast""" - return CTypePtr(self.ast_to_typeid(ast.type)) - - def ast_to_typeid_ellipsisparam(self, _): - """Return the CTypeBase of an EllipsisParam ast""" - return CTypeEllipsis() - - def ast_to_typeid_arraydecl(self, ast): - """Return the CTypeBase of an ArrayDecl ast""" - target = self.ast_to_typeid(ast.type) - if ast.dim is None: - value = None - else: - value = self.ast_eval_int(ast.dim) - return CTypeArray(target, value) - - def ast_to_typeid(self, ast): - """Return the CTypeBase of the @ast - @ast: pycparser.c_ast instance""" - cls = ast.__class__ - if not cls in self.ast_to_typeid_rules: - raise NotImplementedError("Strange type %r" % ast) - return self.ast_to_typeid_rules[cls](ast) - - # Ast parse type declarators - - def ast_parse_decl(self, ast): - """Parse ast Decl""" - return self.ast_parse_declaration(ast.type) - - def ast_parse_typedecl(self, ast): - """Parse ast Typedecl""" - return self.ast_parse_declaration(ast.type) - - def ast_parse_struct(self, ast): - """Parse ast Struct""" - obj = self.ast_to_typeid(ast) - if ast.decls and ast.name is not None: - # Add struct to types if named - decl = CTypeStruct(ast.name) - if not self.is_known_type(decl): - self.add_type(decl, obj) - return obj - - def ast_parse_union(self, ast): - """Parse ast Union""" - obj = self.ast_to_typeid(ast) - if ast.decls and ast.name is not None: - # Add union to types if named - decl = CTypeUnion(ast.name) - if not self.is_known_type(decl): - self.add_type(decl, obj) - return obj - - def ast_parse_typedef(self, ast): - """Parse ast TypeDef""" - decl = CTypeId(ast.name) - obj = self.ast_parse_declaration(ast.type) - if (isinstance(obj, (CTypeStruct, CTypeUnion)) and - self.is_generated_name(obj.name)): - # Add typedef name to default name - # for a question of clarity - obj.name += "__%s" % ast.name - self.add_typedef(decl, obj) - # Typedef does not return any object - return None - - def ast_parse_identifiertype(self, ast): - """Parse ast IdentifierType""" - return CTypeId(*ast.names) - - def ast_parse_ptrdecl(self, ast): - """Parse ast PtrDecl""" - return CTypePtr(self.ast_parse_declaration(ast.type)) - - def ast_parse_enum(self, ast): - """Parse ast Enum""" - return self.ast_to_typeid(ast) - - def ast_parse_arraydecl(self, ast): - """Parse ast ArrayDecl""" - return self.ast_to_typeid(ast) - - def ast_parse_funcdecl(self, ast): - """Parse ast FuncDecl""" - return self.ast_to_typeid(ast) - - def ast_parse_funcdef(self, ast): - """Parse ast FuncDef""" - return self.ast_to_typeid(ast.decl) - - def ast_parse_pragma(self, _): - """Prama does not return any object""" - return None - - def ast_parse_declaration(self, ast): - """Add one ast type declaration to the type manager - (packed style in type manager) - - @ast: parsed pycparser.c_ast object - """ - cls = ast.__class__ - if not cls in self.ast_parse_rules: - raise NotImplementedError("Strange declaration %r" % cls) - return self.ast_parse_rules[cls](ast) - - def ast_parse_declarations(self, ast): - """Add ast types declaration to the type manager - (packed style in type manager) - - @ast: parsed pycparser.c_ast object - """ - for ext in ast.ext: - ret = self.ast_parse_declaration(ext) - - def parse_c_type(self, c_str): - """Parse a C string representing a C type and return the associated - Miasm C object. - @c_str: C string of a C type - """ - - new_str = "%s __MIASM_INTERNAL_%s;" % (c_str, self._cpt_decl) - ret = self.parser.cparser.parse(input=new_str, lexer=self.parser.clex) - self._cpt_decl += 1 - return ret diff --git a/miasm2/core/graph.py b/miasm2/core/graph.py deleted file mode 100644 index f585379b..00000000 --- a/miasm2/core/graph.py +++ /dev/null @@ -1,1017 +0,0 @@ -from collections import defaultdict, namedtuple - -from future.utils import viewitems, viewvalues -import re - - -class DiGraph(object): - - """Implementation of directed graph""" - - # Stand for a cell in a dot node rendering - DotCellDescription = namedtuple("DotCellDescription", - ["text", "attr"]) - - def __init__(self): - self._nodes = set() - self._edges = [] - # N -> Nodes N2 with a edge (N -> N2) - self._nodes_succ = {} - # N -> Nodes N2 with a edge (N2 -> N) - self._nodes_pred = {} - - def __repr__(self): - out = [] - for node in self._nodes: - out.append(str(node)) - for src, dst in self._edges: - out.append("%s -> %s" % (src, dst)) - return '\n'.join(out) - - def nodes(self): - return self._nodes - - def edges(self): - return self._edges - - def merge(self, graph): - """Merge the current graph with @graph - @graph: DiGraph instance - """ - for node in graph._nodes: - self.add_node(node) - for edge in graph._edges: - self.add_edge(*edge) - - def __add__(self, graph): - """Wrapper on `.merge`""" - self.merge(graph) - return self - - def copy(self): - """Copy the current graph instance""" - graph = self.__class__() - return graph + self - - def __eq__(self, graph): - if not isinstance(graph, self.__class__): - return False - if self._nodes != graph.nodes(): - return False - return sorted(self._edges) == sorted(graph.edges()) - - def __ne__(self, other): - return not self.__eq__(other) - - def add_node(self, node): - """Add the node @node to the graph. - If the node was already present, return False. - Otherwise, return True - """ - if node in self._nodes: - return False - self._nodes.add(node) - self._nodes_succ[node] = [] - self._nodes_pred[node] = [] - return True - - def del_node(self, node): - """Delete the @node of the graph; Also delete every edge to/from this - @node""" - - if node in self._nodes: - self._nodes.remove(node) - for pred in self.predecessors(node): - self.del_edge(pred, node) - for succ in self.successors(node): - self.del_edge(node, succ) - - def add_edge(self, src, dst): - if not src in self._nodes: - self.add_node(src) - if not dst in self._nodes: - self.add_node(dst) - self._edges.append((src, dst)) - self._nodes_succ[src].append(dst) - self._nodes_pred[dst].append(src) - - def add_uniq_edge(self, src, dst): - """Add an edge from @src to @dst if it doesn't already exist""" - if (src not in self._nodes_succ or - dst not in self._nodes_succ[src]): - self.add_edge(src, dst) - - def del_edge(self, src, dst): - self._edges.remove((src, dst)) - self._nodes_succ[src].remove(dst) - self._nodes_pred[dst].remove(src) - - def discard_edge(self, src, dst): - """Remove edge between @src and @dst if it exits""" - if (src, dst) in self._edges: - self.del_edge(src, dst) - - def predecessors_iter(self, node): - if not node in self._nodes_pred: - return - for n_pred in self._nodes_pred[node]: - yield n_pred - - def predecessors(self, node): - return [x for x in self.predecessors_iter(node)] - - def successors_iter(self, node): - if not node in self._nodes_succ: - return - for n_suc in self._nodes_succ[node]: - yield n_suc - - def successors(self, node): - return [x for x in self.successors_iter(node)] - - def leaves_iter(self): - for node in self._nodes: - if not self._nodes_succ[node]: - yield node - - def leaves(self): - return [x for x in self.leaves_iter()] - - def heads_iter(self): - for node in self._nodes: - if not self._nodes_pred[node]: - yield node - - def heads(self): - return [x for x in self.heads_iter()] - - def find_path(self, src, dst, cycles_count=0, done=None): - """ - Searches for paths from @src to @dst - @src: loc_key of basic block from which it should start - @dst: loc_key of basic block where it should stop - @cycles_count: maximum number of times a basic block can be processed - @done: dictionary of already processed loc_keys, it's value is number of times it was processed - @out: list of paths from @src to @dst - """ - if done is None: - done = {} - if dst in done and done[dst] > cycles_count: - return [[]] - if src == dst: - return [[src]] - out = [] - for node in self.predecessors(dst): - done_n = dict(done) - done_n[dst] = done_n.get(dst, 0) + 1 - for path in self.find_path(src, node, cycles_count, done_n): - if path and path[0] == src: - out.append(path + [dst]) - return out - - def find_path_from_src(self, src, dst, cycles_count=0, done=None): - """ - This function does the same as function find_path. - But it searches the paths from src to dst, not vice versa like find_path. - This approach might be more efficient in some cases. - @src: loc_key of basic block from which it should start - @dst: loc_key of basic block where it should stop - @cycles_count: maximum number of times a basic block can be processed - @done: dictionary of already processed loc_keys, it's value is number of times it was processed - @out: list of paths from @src to @dst - """ - - if done is None: - done = {} - if src == dst: - return [[src]] - if src in done and done[src] > cycles_count: - return [[]] - out = [] - for node in self.successors(src): - done_n = dict(done) - done_n[src] = done_n.get(src, 0) + 1 - for path in self.find_path_from_src(node, dst, cycles_count, done_n): - if path and path[len(path)-1] == dst: - out.append([src] + path) - return out - - def nodeid(self, node): - """ - Returns uniq id for a @node - @node: a node of the graph - """ - return hash(node) & 0xFFFFFFFFFFFFFFFF - - def node2lines(self, node): - """ - Returns an iterator on cells of the dot @node. - A DotCellDescription or a list of DotCellDescription are accepted - @node: a node of the graph - """ - yield self.DotCellDescription(text=str(node), attr={}) - - def node_attr(self, node): - """ - Returns a dictionary of the @node's attributes - @node: a node of the graph - """ - return {} - - def edge_attr(self, src, dst): - """ - Return a dictionary of attributes for the edge between @src and @dst - @src: the source node of the edge - @dst: the destination node of the edge - """ - return {} - - @staticmethod - def _fix_chars(token): - return "&#%04d;" % ord(token.group()) - - @staticmethod - def _attr2str(default_attr, attr): - return ' '.join( - '%s="%s"' % (name, value) - for name, value in - viewitems(dict(default_attr, - **attr)) - ) - - def dot(self): - """Render dot graph with HTML""" - - escape_chars = re.compile('[' + re.escape('{}') + '&|<>' + ']') - td_attr = {'align': 'left'} - nodes_attr = {'shape': 'Mrecord', - 'fontname': 'Courier New'} - - out = ["digraph asm_graph {"] - - # Generate basic nodes - out_nodes = [] - for node in self.nodes(): - node_id = self.nodeid(node) - out_node = '%s [\n' % node_id - out_node += self._attr2str(nodes_attr, self.node_attr(node)) - out_node += 'label =<' - - node_html_lines = [] - - for lineDesc in self.node2lines(node): - out_render = "" - if isinstance(lineDesc, self.DotCellDescription): - lineDesc = [lineDesc] - for col in lineDesc: - out_render += "" % ( - self._attr2str(td_attr, col.attr), - escape_chars.sub(self._fix_chars, str(col.text))) - node_html_lines.append(out_render) - - node_html_lines = ('' + - ('').join(node_html_lines) + - '') - - out_node += node_html_lines + "
%s
> ];" - out_nodes.append(out_node) - - out += out_nodes - - # Generate links - for src, dst in self.edges(): - attrs = self.edge_attr(src, dst) - - attrs = ' '.join( - '%s="%s"' % (name, value) - for name, value in viewitems(attrs) - ) - - out.append('%s -> %s' % (self.nodeid(src), self.nodeid(dst)) + - '[' + attrs + '];') - - out.append("}") - return '\n'.join(out) - - @staticmethod - def _reachable_nodes(head, next_cb): - """Generic algorithm to compute all nodes reachable from/to node - @head""" - - todo = set([head]) - reachable = set() - while todo: - node = todo.pop() - if node in reachable: - continue - reachable.add(node) - yield node - for next_node in next_cb(node): - todo.add(next_node) - - def predecessors_stop_node_iter(self, node, head): - if node == head: - return - for next_node in self.predecessors_iter(node): - yield next_node - - def reachable_sons(self, head): - """Compute all nodes reachable from node @head. Each son is an - immediate successor of an arbitrary, already yielded son of @head""" - return self._reachable_nodes(head, self.successors_iter) - - def reachable_parents(self, leaf): - """Compute all parents of node @leaf. Each parent is an immediate - predecessor of an arbitrary, already yielded parent of @leaf""" - return self._reachable_nodes(leaf, self.predecessors_iter) - - def reachable_parents_stop_node(self, leaf, head): - """Compute all parents of node @leaf. Each parent is an immediate - predecessor of an arbitrary, already yielded parent of @leaf. - Do not compute reachables past @head node""" - return self._reachable_nodes( - leaf, - lambda node_cur: self.predecessors_stop_node_iter( - node_cur, head - ) - ) - - - @staticmethod - def _compute_generic_dominators(head, reachable_cb, prev_cb, next_cb): - """Generic algorithm to compute either the dominators or postdominators - of the graph. - @head: the head/leaf of the graph - @reachable_cb: sons/parents of the head/leaf - @prev_cb: return predecessors/successors of a node - @next_cb: return successors/predecessors of a node - """ - - nodes = set(reachable_cb(head)) - dominators = {} - for node in nodes: - dominators[node] = set(nodes) - - dominators[head] = set([head]) - todo = set(nodes) - - while todo: - node = todo.pop() - - # Heads state must not be changed - if node == head: - continue - - # Compute intersection of all predecessors'dominators - new_dom = None - for pred in prev_cb(node): - if not pred in nodes: - continue - if new_dom is None: - new_dom = set(dominators[pred]) - new_dom.intersection_update(dominators[pred]) - - # We are not a head to we have at least one dominator - assert(new_dom is not None) - - new_dom.update(set([node])) - - # If intersection has changed, add sons to the todo list - if new_dom == dominators[node]: - continue - - dominators[node] = new_dom - for succ in next_cb(node): - todo.add(succ) - return dominators - - def compute_dominators(self, head): - """Compute the dominators of the graph""" - return self._compute_generic_dominators(head, - self.reachable_sons, - self.predecessors_iter, - self.successors_iter) - - def compute_postdominators(self, leaf): - """Compute the postdominators of the graph""" - return self._compute_generic_dominators(leaf, - self.reachable_parents, - self.successors_iter, - self.predecessors_iter) - - - - - def compute_dominator_tree(self, head): - """ - Computes the dominator tree of a graph - :param head: head of graph - :return: DiGraph - """ - idoms = self.compute_immediate_dominators(head) - dominator_tree = DiGraph() - for node in idoms: - dominator_tree.add_edge(idoms[node], node) - - return dominator_tree - - @staticmethod - def _walk_generic_dominator(node, gen_dominators, succ_cb): - """Generic algorithm to return an iterator of the ordered list of - @node's dominators/post_dominator. - - The function doesn't return the self reference in dominators. - @node: The start node - @gen_dominators: The dictionary containing at least node's - dominators/post_dominators - @succ_cb: return predecessors/successors of a node - - """ - # Init - done = set() - if node not in gen_dominators: - # We are in a branch which doesn't reach head - return - node_gen_dominators = set(gen_dominators[node]) - todo = set([node]) - - # Avoid working on itself - node_gen_dominators.remove(node) - - # For each level - while node_gen_dominators: - new_node = None - - # Worklist pattern - while todo: - node = todo.pop() - if node in done: - continue - if node in node_gen_dominators: - new_node = node - break - - # Avoid loops - done.add(node) - - # Look for the next level - for pred in succ_cb(node): - todo.add(pred) - - # Return the node; it's the next starting point - assert(new_node is not None) - yield new_node - node_gen_dominators.remove(new_node) - todo = set([new_node]) - - def walk_dominators(self, node, dominators): - """Return an iterator of the ordered list of @node's dominators - The function doesn't return the self reference in dominators. - @node: The start node - @dominators: The dictionary containing at least node's dominators - """ - return self._walk_generic_dominator(node, - dominators, - self.predecessors_iter) - - def walk_postdominators(self, node, postdominators): - """Return an iterator of the ordered list of @node's postdominators - The function doesn't return the self reference in postdominators. - @node: The start node - @postdominators: The dictionary containing at least node's - postdominators - - """ - return self._walk_generic_dominator(node, - postdominators, - self.successors_iter) - - def compute_immediate_dominators(self, head): - """Compute the immediate dominators of the graph""" - dominators = self.compute_dominators(head) - idoms = {} - - for node in dominators: - for predecessor in self.walk_dominators(node, dominators): - if predecessor in dominators[node] and node != predecessor: - idoms[node] = predecessor - break - return idoms - - def compute_immediate_postdominators(self,tail): - """Compute the immediate postdominators of the graph""" - postdominators = self.compute_postdominators(tail) - ipdoms = {} - - for node in postdominators: - for successor in self.walk_postdominators(node, postdominators): - if successor in postdominators[node] and node != successor: - ipdoms[node] = successor - break - return ipdoms - - def compute_dominance_frontier(self, head): - """ - Compute the dominance frontier of the graph - - Source: Cooper, Keith D., Timothy J. Harvey, and Ken Kennedy. - "A simple, fast dominance algorithm." - Software Practice & Experience 4 (2001), p. 9 - """ - idoms = self.compute_immediate_dominators(head) - frontier = {} - - for node in idoms: - if len(self._nodes_pred[node]) >= 2: - for predecessor in self.predecessors_iter(node): - runner = predecessor - if runner not in idoms: - continue - while runner != idoms[node]: - if runner not in frontier: - frontier[runner] = set() - - frontier[runner].add(node) - runner = idoms[runner] - return frontier - - def _walk_generic_first(self, head, flag, succ_cb): - """ - Generic algorithm to compute breadth or depth first search - for a node. - @head: the head of the graph - @flag: denotes if @todo is used as queue or stack - @succ_cb: returns a node's predecessors/successors - :return: next node - """ - todo = [head] - done = set() - - while todo: - node = todo.pop(flag) - if node in done: - continue - done.add(node) - - for succ in succ_cb(node): - todo.append(succ) - - yield node - - def walk_breadth_first_forward(self, head): - """Performs a breadth first search on the graph from @head""" - return self._walk_generic_first(head, 0, self.successors_iter) - - def walk_depth_first_forward(self, head): - """Performs a depth first search on the graph from @head""" - return self._walk_generic_first(head, -1, self.successors_iter) - - def walk_breadth_first_backward(self, head): - """Performs a breadth first search on the reversed graph from @head""" - return self._walk_generic_first(head, 0, self.predecessors_iter) - - def walk_depth_first_backward(self, head): - """Performs a depth first search on the reversed graph from @head""" - return self._walk_generic_first(head, -1, self.predecessors_iter) - - def has_loop(self): - """Return True if the graph contains at least a cycle""" - todo = list(self.nodes()) - # tested nodes - done = set() - # current DFS nodes - current = set() - while todo: - node = todo.pop() - if node in done: - continue - - if node in current: - # DFS branch end - for succ in self.successors_iter(node): - if succ in current: - return True - # A node cannot be in current AND in done - current.remove(node) - done.add(node) - else: - # Launch DFS from node - todo.append(node) - current.add(node) - todo += self.successors(node) - - return False - - def compute_natural_loops(self, head): - """ - Computes all natural loops in the graph. - - Source: Aho, Alfred V., Lam, Monica S., Sethi, R. and Jeffrey Ullman. - "Compilers: Principles, Techniques, & Tools, Second Edition" - Pearson/Addison Wesley (2007), Chapter 9.6.6 - :param head: head of the graph - :return: yield a tuple of the form (back edge, loop body) - """ - for a, b in self.compute_back_edges(head): - body = self._compute_natural_loop_body(b, a) - yield ((a, b), body) - - def compute_back_edges(self, head): - """ - Computes all back edges from a node to a - dominator in the graph. - :param head: head of graph - :return: yield a back edge - """ - dominators = self.compute_dominators(head) - - # traverse graph - for node in self.walk_depth_first_forward(head): - for successor in self.successors_iter(node): - # check for a back edge to a dominator - if successor in dominators[node]: - edge = (node, successor) - yield edge - - def _compute_natural_loop_body(self, head, leaf): - """ - Computes the body of a natural loop by a depth-first - search on the reversed control flow graph. - :param head: leaf of the loop - :param leaf: header of the loop - :return: set containing loop body - """ - todo = [leaf] - done = {head} - - while todo: - node = todo.pop() - if node in done: - continue - done.add(node) - - for predecessor in self.predecessors_iter(node): - todo.append(predecessor) - return done - - def compute_strongly_connected_components(self): - """ - Partitions the graph into strongly connected components. - - Iterative implementation of Gabow's path-based SCC algorithm. - Source: Gabow, Harold N. - "Path-based depth-first search for strong and biconnected components." - Information Processing Letters 74.3 (2000), pp. 109--110 - - The iterative implementation is inspired by Mark Dickinson's - code: - http://code.activestate.com/recipes/ - 578507-strongly-connected-components-of-a-directed-graph/ - :return: yield a strongly connected component - """ - stack = [] - boundaries = [] - counter = len(self.nodes()) - - # init index with 0 - index = {v: 0 for v in self.nodes()} - - # state machine for worklist algorithm - VISIT, HANDLE_RECURSION, MERGE = 0, 1, 2 - NodeState = namedtuple('NodeState', ['state', 'node']) - - for node in self.nodes(): - # next node if node was already visited - if index[node]: - continue - - todo = [NodeState(VISIT, node)] - done = set() - - while todo: - current = todo.pop() - - if current.node in done: - continue - - # node is unvisited - if current.state == VISIT: - stack.append(current.node) - index[current.node] = len(stack) - boundaries.append(index[current.node]) - - todo.append(NodeState(MERGE, current.node)) - # follow successors - for successor in self.successors_iter(current.node): - todo.append(NodeState(HANDLE_RECURSION, successor)) - - # iterative handling of recursion algorithm - elif current.state == HANDLE_RECURSION: - # visit unvisited successor - if index[current.node] == 0: - todo.append(NodeState(VISIT, current.node)) - else: - # contract cycle if necessary - while index[current.node] < boundaries[-1]: - boundaries.pop() - - # merge strongly connected component - else: - if index[current.node] == boundaries[-1]: - boundaries.pop() - counter += 1 - scc = set() - - while index[current.node] <= len(stack): - popped = stack.pop() - index[popped] = counter - scc.add(popped) - - done.add(current.node) - - yield scc - - -class DiGraphSimplifier(object): - - """Wrapper on graph simplification passes. - - Instance handle passes lists. - """ - - def __init__(self): - self.passes = [] - - def enable_passes(self, passes): - """Add @passes to passes to applied - @passes: sequence of function (DiGraphSimplifier, DiGraph) -> None - """ - self.passes += passes - - def apply_simp(self, graph): - """Apply enabled simplifications on graph @graph - @graph: DiGraph instance - """ - while True: - new_graph = graph.copy() - for simp_func in self.passes: - simp_func(self, new_graph) - - if new_graph == graph: - break - graph = new_graph - return new_graph - - def __call__(self, graph): - """Wrapper on 'apply_simp'""" - return self.apply_simp(graph) - - -class MatchGraphJoker(object): - - """MatchGraphJoker are joker nodes of MatchGraph, that is to say nodes which - stand for any node. Restrictions can be added to jokers. - - If j1, j2 and j3 are MatchGraphJoker, one can quickly build a matcher for - the pattern: - | - +----v----+ - | (j1) | - +----+----+ - | - +----v----+ - | (j2) |<---+ - +----+--+-+ | - | +------+ - +----v----+ - | (j3) | - +----+----+ - | - v - Using: - >>> matcher = j1 >> j2 >> j3 - >>> matcher += j2 >> j2 - Or: - >>> matcher = j1 >> j2 >> j2 >> j3 - - """ - - def __init__(self, restrict_in=True, restrict_out=True, filt=None, - name=None): - """Instantiate a MatchGraphJoker, with restrictions - @restrict_in: (optional) if set, the number of predecessors of the - matched node must be the same than the joker node in the - associated MatchGraph - @restrict_out: (optional) counterpart of @restrict_in for successors - @filt: (optional) function(graph, node) -> boolean for filtering - candidate node - @name: (optional) helper for displaying the current joker - """ - if filt is None: - filt = lambda graph, node: True - self.filt = filt - if name is None: - name = str(id(self)) - self._name = name - self.restrict_in = restrict_in - self.restrict_out = restrict_out - - def __rshift__(self, joker): - """Helper for describing a MatchGraph from @joker - J1 >> J2 stands for an edge going to J2 from J1 - @joker: MatchGraphJoker instance - """ - assert isinstance(joker, MatchGraphJoker) - - graph = MatchGraph() - graph.add_node(self) - graph.add_node(joker) - graph.add_edge(self, joker) - - # For future "A >> B" idiom construction - graph._last_node = joker - - return graph - - def __str__(self): - info = [] - if not self.restrict_in: - info.append("In:*") - if not self.restrict_out: - info.append("Out:*") - return "Joker %s %s" % (self._name, - "(%s)" % " ".join(info) if info else "") - - -class MatchGraph(DiGraph): - - """MatchGraph intends to be the counterpart of match_expr, but for DiGraph - - This class provides API to match a given DiGraph pattern, with addidionnal - restrictions. - The implemented algorithm is a naive approach. - - The recommended way to instantiate a MatchGraph is the use of - MatchGraphJoker. - """ - - def __init__(self, *args, **kwargs): - super(MatchGraph, self).__init__(*args, **kwargs) - # Construction helper - self._last_node = None - - # Construction helpers - def __rshift__(self, joker): - """Construction helper, adding @joker to the current graph as a son of - _last_node - @joker: MatchGraphJoker instance""" - assert isinstance(joker, MatchGraphJoker) - assert isinstance(self._last_node, MatchGraphJoker) - - self.add_node(joker) - self.add_edge(self._last_node, joker) - self._last_node = joker - return self - - def __add__(self, graph): - """Construction helper, merging @graph with self - @graph: MatchGraph instance - """ - assert isinstance(graph, MatchGraph) - - # Reset helpers flag - self._last_node = None - graph._last_node = None - - # Merge graph into self - for node in graph.nodes(): - self.add_node(node) - for edge in graph.edges(): - self.add_edge(*edge) - - return self - - # Graph matching - def _check_node(self, candidate, expected, graph, partial_sol=None): - """Check if @candidate can stand for @expected in @graph, given @partial_sol - @candidate: @graph's node - @expected: MatchGraphJoker instance - @graph: DiGraph instance - @partial_sol: (optional) dictionary of MatchGraphJoker -> @graph's node - standing for a partial solution - """ - # Avoid having 2 different joker for the same node - if partial_sol and candidate in viewvalues(partial_sol): - return False - - # Check lambda filtering - if not expected.filt(graph, candidate): - return False - - # Check arity - # If filter_in/out, then arity must be the same - # Otherwise, arity of the candidate must be at least equal - if ((expected.restrict_in == True and - len(self.predecessors(expected)) != len(graph.predecessors(candidate))) or - (expected.restrict_in == False and - len(self.predecessors(expected)) > len(graph.predecessors(candidate)))): - return False - if ((expected.restrict_out == True and - len(self.successors(expected)) != len(graph.successors(candidate))) or - (expected.restrict_out == False and - len(self.successors(expected)) > len(graph.successors(candidate)))): - return False - - # Check edges with partial solution if any - if not partial_sol: - return True - for pred in self.predecessors(expected): - if (pred in partial_sol and - partial_sol[pred] not in graph.predecessors(candidate)): - return False - - for succ in self.successors(expected): - if (succ in partial_sol and - partial_sol[succ] not in graph.successors(candidate)): - return False - - # All checks OK - return True - - def _propagate_sol(self, node, partial_sol, graph, todo, propagator): - """ - Try to extend the current @partial_sol by propagating the solution using - @propagator on @node. - New solutions are added to @todo - """ - real_node = partial_sol[node] - for candidate in propagator(self, node): - # Edge already in the partial solution, skip it - if candidate in partial_sol: - continue - - # Check candidate - for candidate_real in propagator(graph, real_node): - if self._check_node(candidate_real, candidate, graph, - partial_sol): - temp_sol = partial_sol.copy() - temp_sol[candidate] = candidate_real - if temp_sol not in todo: - todo.append(temp_sol) - - @staticmethod - def _propagate_successors(graph, node): - """Propagate through @node successors in @graph""" - return graph.successors_iter(node) - - @staticmethod - def _propagate_predecessors(graph, node): - """Propagate through @node predecessors in @graph""" - return graph.predecessors_iter(node) - - def match(self, graph): - """Naive subgraph matching between graph and self. - Iterator on matching solution, as dictionary MatchGraphJoker -> @graph - @graph: DiGraph instance - In order to obtained correct and complete results, @graph must be - connected. - """ - # Partial solution: nodes corrects, edges between these nodes corrects - # A partial solution is a dictionary MatchGraphJoker -> @graph's node - todo = list() # Dictionnaries containing partial solution - done = list() # Already computed partial solutions - - # Elect first candidates - to_match = next(iter(self._nodes)) - for node in graph.nodes(): - if self._check_node(node, to_match, graph): - to_add = {to_match: node} - if to_add not in todo: - todo.append(to_add) - - while todo: - # When a partial_sol is computed, if more precise partial solutions - # are found, they will be added to 'todo' - # -> using last entry of todo first performs a "depth first" - # approach on solutions - # -> the algorithm may converge faster to a solution, a desired - # behavior while doing graph simplification (stopping after one - # sol) - partial_sol = todo.pop() - - # Avoid infinite loop and recurrent work - if partial_sol in done: - continue - done.append(partial_sol) - - # If all nodes are matching, this is a potential solution - if len(partial_sol) == len(self._nodes): - yield partial_sol - continue - - # Find node to tests using edges - for node in partial_sol: - self._propagate_sol(node, partial_sol, graph, todo, - MatchGraph._propagate_successors) - self._propagate_sol(node, partial_sol, graph, todo, - MatchGraph._propagate_predecessors) diff --git a/miasm2/core/interval.py b/miasm2/core/interval.py deleted file mode 100644 index 06dc546f..00000000 --- a/miasm2/core/interval.py +++ /dev/null @@ -1,259 +0,0 @@ -from __future__ import print_function - -INT_EQ = 0 # Equivalent -INT_B_IN_A = 1 # B in A -INT_A_IN_B = -1 # A in B -INT_DISJOIN = 2 # Disjoint -INT_JOIN = 3 # Overlap -INT_JOIN_AB = 4 # B starts at the end of A -INT_JOIN_BA = 5 # A starts at the end of B - - -def cmp_interval(inter1, inter2): - """Compare @inter1 and @inter2 and returns the associated INT_* case - @inter1, @inter2: interval instance - """ - if inter1 == inter2: - return INT_EQ - - inter1_start, inter1_stop = inter1 - inter2_start, inter2_stop = inter2 - result = INT_JOIN - if inter1_start <= inter2_start and inter1_stop >= inter2_stop: - result = INT_B_IN_A - if inter2_start <= inter1_start and inter2_stop >= inter1_stop: - result = INT_A_IN_B - if inter1_stop + 1 == inter2_start: - result = INT_JOIN_AB - if inter2_stop + 1 == inter1_start: - result = INT_JOIN_BA - if inter1_start > inter2_stop + 1 or inter2_start > inter1_stop + 1: - result = INT_DISJOIN - return result - - -class interval(object): - """Stands for intervals with integer bounds - - Offers common methods to work with interval""" - - def __init__(self, bounds=None): - """Instance an interval object - @bounds: (optional) list of (int, int) and/or interval instance - """ - if bounds is None: - bounds = [] - elif isinstance(bounds, interval): - bounds = bounds.intervals - self.is_cannon = False - self.intervals = bounds - self.cannon() - - def __iter__(self): - """Iterate on intervals""" - for inter in self.intervals: - yield inter - - @staticmethod - def cannon_list(tmp): - """ - Return a cannonizes list of intervals - @tmp: list of (int, int) - """ - tmp = sorted([x for x in tmp if x[0] <= x[1]]) - out = [] - if not tmp: - return out - out.append(tmp.pop()) - while tmp: - x = tmp.pop() - rez = cmp_interval(out[-1], x) - - if rez == INT_EQ: - continue - elif rez == INT_DISJOIN: - out.append(x) - elif rez == INT_B_IN_A: - continue - elif rez in [INT_JOIN, INT_JOIN_AB, INT_JOIN_BA, INT_A_IN_B]: - u, v = x - while out and cmp_interval(out[-1], (u, v)) in [ - INT_JOIN, INT_JOIN_AB, INT_JOIN_BA, INT_A_IN_B]: - u = min(u, out[-1][0]) - v = max(v, out[-1][1]) - out.pop() - out.append((u, v)) - else: - raise ValueError('unknown state', rez) - return out[::-1] - - def cannon(self): - "Apply .cannon_list() on self contained intervals" - if self.is_cannon is True: - return - self.intervals = interval.cannon_list(self.intervals) - self.is_cannon = True - - def __repr__(self): - if self.intervals: - o = " U ".join(["[0x%X 0x%X]" % (x[0], x[1]) - for x in self.intervals]) - else: - o = "[]" - return o - - def __contains__(self, other): - if isinstance(other, interval): - for intervalB in other.intervals: - is_in = False - for intervalA in self.intervals: - if cmp_interval(intervalA, intervalB) in [INT_EQ, INT_B_IN_A]: - is_in = True - break - if not is_in: - return False - return True - else: - for intervalA in self.intervals: - if intervalA[0] <= other <= intervalA[1]: - return True - return False - - def __eq__(self, i): - return self.intervals == i.intervals - - def __ne__(self, other): - return not self.__eq__(other) - - def __add__(self, i): - if isinstance(i, interval): - i = i.intervals - i = interval(self.intervals + i) - return i - - def __sub__(self, v): - to_test = self.intervals[:] - i = -1 - to_del = v.intervals[:] - while i < len(to_test) - 1: - i += 1 - x = to_test[i] - if x[0] > x[1]: - del to_test[i] - i -= 1 - continue - - while to_del and to_del[0][1] < x[0]: - del to_del[0] - - for y in to_del: - if y[0] > x[1]: - break - rez = cmp_interval(x, y) - if rez == INT_DISJOIN: - continue - elif rez == INT_EQ: - del to_test[i] - i -= 1 - break - elif rez == INT_A_IN_B: - del to_test[i] - i -= 1 - break - elif rez == INT_B_IN_A: - del to_test[i] - i1 = (x[0], y[0] - 1) - i2 = (y[1] + 1, x[1]) - to_test[i:i] = [i1, i2] - i -= 1 - break - elif rez in [INT_JOIN_AB, INT_JOIN_BA]: - continue - elif rez == INT_JOIN: - del to_test[i] - if x[0] < y[0]: - to_test[i:i] = [(x[0], y[0] - 1)] - else: - to_test[i:i] = [(y[1] + 1, x[1])] - i -= 1 - break - else: - raise ValueError('unknown state', rez) - return interval(to_test) - - def __and__(self, v): - out = [] - for x in self.intervals: - if x[0] > x[1]: - continue - for y in v.intervals: - rez = cmp_interval(x, y) - - if rez == INT_DISJOIN: - continue - elif rez == INT_EQ: - out.append(x) - continue - elif rez == INT_A_IN_B: - out.append(x) - continue - elif rez == INT_B_IN_A: - out.append(y) - continue - elif rez == INT_JOIN_AB: - continue - elif rez == INT_JOIN_BA: - continue - elif rez == INT_JOIN: - if x[0] < y[0]: - out.append((y[0], x[1])) - else: - out.append((x[0], y[1])) - continue - else: - raise ValueError('unknown state', rez) - return interval(out) - - def hull(self): - "Return the first and the last bounds of intervals" - if not self.intervals: - return None, None - return self.intervals[0][0], self.intervals[-1][1] - - - @property - def empty(self): - """Return True iff the interval is empty""" - return not self.intervals - - def show(self, img_x=1350, img_y=20, dry_run=False): - """ - show image representing the interval - """ - try: - import Image - import ImageDraw - except ImportError: - print('cannot import python PIL imaging') - return - - img = Image.new('RGB', (img_x, img_y), (100, 100, 100)) - draw = ImageDraw.Draw(img) - i_min, i_max = self.hull() - - print(hex(i_min), hex(i_max)) - - addr2x = lambda addr: ((addr - i_min) * img_x) // (i_max - i_min) - for a, b in self.intervals: - draw.rectangle((addr2x(a), 0, addr2x(b), img_y), (200, 0, 0)) - - if dry_run is False: - img.show() - - @property - def length(self): - """ - Return the cumulated length of intervals - """ - # Do not use __len__ because we may return a value > 32 bits - return sum((stop - start + 1) for start, stop in self.intervals) diff --git a/miasm2/core/locationdb.py b/miasm2/core/locationdb.py deleted file mode 100644 index 906a247a..00000000 --- a/miasm2/core/locationdb.py +++ /dev/null @@ -1,500 +0,0 @@ -import warnings -from builtins import int as int_types - -from functools import reduce -from future.utils import viewitems, viewvalues - -from miasm2.core.utils import printable, force_bytes -from miasm2.expression.expression import LocKey, ExprLoc -from miasm2.expression.modint import moduint, modint - - -def is_int(a): - return isinstance(a, (int_types, moduint, modint)) - - -class LocationDB(object): - """ - LocationDB is a "database" of information associated to location. - - An entry in a LocationDB is uniquely identified with a LocKey. - Additional information which can be associated with a LocKey are: - - an offset (uniq per LocationDB) - - several names (each are uniqs per LocationDB) - - As a schema: - loc_key 1 <-> 0..1 offset - 1 <-> 0..n name - - >>> loc_db = LocationDB() - # Add a location with no additional information - >>> loc_key1 = loc_db.add_location() - # Add a location with an offset - >>> loc_key2 = loc_db.add_location(offset=0x1234) - # Add a location with several names - >>> loc_key3 = loc_db.add_location(name="first_name") - >>> loc_db.add_location_name(loc_key3, "second_name") - # Associate an offset to an existing location - >>> loc_db.set_location_offset(loc_key3, 0x5678) - # Remove a name from an existing location - >>> loc_db.remove_location_name(loc_key3, "second_name") - - # Get back offset - >>> loc_db.get_location_offset(loc_key1) - None - >>> loc_db.get_location_offset(loc_key2) - 0x1234 - >>> loc_db.get_location_offset("first_name") - 0x5678 - - # Display a location - >>> loc_db.pretty_str(loc_key1) - loc_key_1 - >>> loc_db.pretty_str(loc_key2) - loc_1234 - >>> loc_db.pretty_str(loc_key3) - first_name - """ - - def __init__(self): - # Known LocKeys - self._loc_keys = set() - - # Association tables - self._loc_key_to_offset = {} - self._loc_key_to_names = {} - self._name_to_loc_key = {} - self._offset_to_loc_key = {} - - # Counter for new LocKey generation - self._loc_key_num = 0 - - def get_location_offset(self, loc_key): - """ - Return the offset of @loc_key if any, None otherwise. - @loc_key: LocKey instance - """ - assert isinstance(loc_key, LocKey) - return self._loc_key_to_offset.get(loc_key) - - def get_location_names(self, loc_key): - """ - Return the frozenset of names associated to @loc_key - @loc_key: LocKey instance - """ - assert isinstance(loc_key, LocKey) - return frozenset(self._loc_key_to_names.get(loc_key, set())) - - def get_name_location(self, name): - """ - Return the LocKey of @name if any, None otherwise. - @name: target name - """ - name = force_bytes(name) - return self._name_to_loc_key.get(name) - - def get_or_create_name_location(self, name): - """ - Return the LocKey of @name if any, create one otherwise. - @name: target name - """ - name = force_bytes(name) - loc_key = self._name_to_loc_key.get(name) - if loc_key is not None: - return loc_key - return self.add_location(name=name) - - def get_offset_location(self, offset): - """ - Return the LocKey of @offset if any, None otherwise. - @offset: target offset - """ - return self._offset_to_loc_key.get(offset) - - def get_or_create_offset_location(self, offset): - """ - Return the LocKey of @offset if any, create one otherwise. - @offset: target offset - """ - loc_key = self._offset_to_loc_key.get(offset) - if loc_key is not None: - return loc_key - return self.add_location(offset=offset) - - def get_name_offset(self, name): - """ - Return the offset of @name if any, None otherwise. - @name: target name - """ - name = force_bytes(name) - loc_key = self.get_name_location(name) - if loc_key is None: - return None - return self.get_location_offset(loc_key) - - def add_location_name(self, loc_key, name): - """Associate a name @name to a given @loc_key - @name: str instance - @loc_key: LocKey instance - """ - name = force_bytes(name) - assert loc_key in self._loc_keys - already_existing_loc = self._name_to_loc_key.get(name) - if already_existing_loc is not None and already_existing_loc != loc_key: - raise KeyError("%r is already associated to a different loc_key " - "(%r)" % (name, already_existing_loc)) - self._loc_key_to_names.setdefault(loc_key, set()).add(name) - self._name_to_loc_key[name] = loc_key - - def remove_location_name(self, loc_key, name): - """Disassociate a name @name from a given @loc_key - Fail if @name is not already associated to @loc_key - @name: str instance - @loc_key: LocKey instance - """ - assert loc_key in self._loc_keys - name = force_bytes(name) - already_existing_loc = self._name_to_loc_key.get(name) - if already_existing_loc is None: - raise KeyError("%r is not already associated" % name) - if already_existing_loc != loc_key: - raise KeyError("%r is already associated to a different loc_key " - "(%r)" % (name, already_existing_loc)) - del self._name_to_loc_key[name] - self._loc_key_to_names[loc_key].remove(name) - - def set_location_offset(self, loc_key, offset, force=False): - """Associate the offset @offset to an LocKey @loc_key - - If @force is set, override silently. Otherwise, if an offset is already - associated to @loc_key, an error will be raised - """ - assert loc_key in self._loc_keys - already_existing_loc = self.get_offset_location(offset) - if already_existing_loc is not None and already_existing_loc != loc_key: - raise KeyError("%r is already associated to a different loc_key " - "(%r)" % (offset, already_existing_loc)) - already_existing_off = self._loc_key_to_offset.get(loc_key) - if (already_existing_off is not None and - already_existing_off != offset): - if not force: - raise ValueError( - "%r already has an offset (0x%x). Use 'force=True'" - " for silent overriding" % ( - loc_key, already_existing_off - )) - else: - self.unset_location_offset(loc_key) - self._offset_to_loc_key[offset] = loc_key - self._loc_key_to_offset[loc_key] = offset - - def unset_location_offset(self, loc_key): - """Disassociate LocKey @loc_key's offset - - Fail if there is already no offset associate with it - @loc_key: LocKey - """ - assert loc_key in self._loc_keys - already_existing_off = self._loc_key_to_offset.get(loc_key) - if already_existing_off is None: - raise ValueError("%r already has no offset" % (loc_key)) - del self._offset_to_loc_key[already_existing_off] - del self._loc_key_to_offset[loc_key] - - def consistency_check(self): - """Ensure internal structures are consistent with each others""" - assert set(self._loc_key_to_names).issubset(self._loc_keys) - assert set(self._loc_key_to_offset).issubset(self._loc_keys) - assert self._loc_key_to_offset == {v: k for k, v in viewitems(self._offset_to_loc_key)} - assert reduce( - lambda x, y:x.union(y), - viewvalues(self._loc_key_to_names), - set(), - ) == set(self._name_to_loc_key) - for name, loc_key in viewitems(self._name_to_loc_key): - assert name in self._loc_key_to_names[loc_key] - - def find_free_name(self, name): - """ - If @name is not known in DB, return it - Else append an index to it corresponding to the next unknown name - - @name: string - """ - name = force_bytes(name) - if self.get_name_location(name) is None: - return name - i = 0 - while True: - new_name = "%s_%d" % (name, i) - if self.get_name_location(new_name) is None: - return new_name - i += 1 - - def add_location(self, name=None, offset=None, strict=True): - """Add a new location in the locationDB. Returns the corresponding LocKey. - If @name is set, also associate a name to this new location. - If @offset is set, also associate an offset to this new location. - - Strict mode (set by @strict, default): - If a location with @offset or @name already exists, an error will be - raised. - Otherwise: - If a location with @offset or @name already exists, the corresponding - LocKey may be updated and will be returned. - """ - - name = force_bytes(name) - # Deprecation handling - if is_int(name): - assert offset is None or offset == name - warnings.warn("Deprecated API: use 'add_location(offset=)' instead." - " An additional 'name=' can be provided to also " - "associate a name (there is no more default name)") - offset = name - name = None - - # Argument cleaning - offset_loc_key = None - if offset is not None: - offset = int(offset) - offset_loc_key = self.get_offset_location(offset) - - # Test for collisions - name_loc_key = None - if name is not None: - name_loc_key = self.get_name_location(name) - - if strict: - if name_loc_key is not None: - raise ValueError("An entry for %r already exists (%r), and " - "strict mode is enabled" % ( - name, name_loc_key - )) - if offset_loc_key is not None: - raise ValueError("An entry for 0x%x already exists (%r), and " - "strict mode is enabled" % ( - offset, offset_loc_key - )) - else: - # Non-strict mode - if name_loc_key is not None: - known_offset = self.get_offset_location(name_loc_key) - if known_offset is None: - if offset is not None: - self.set_location_offset(name_loc_key, offset) - elif known_offset != offset: - raise ValueError( - "Location with name '%s' already have an offset: 0x%x " - "(!= 0x%x)" % (name, offset, known_offset) - ) - # Name already known, same offset -> nothing to do - return name_loc_key - - elif offset_loc_key is not None: - if name is not None: - # Check for already known name are checked above - return self.add_location_name(offset_loc_key, name) - # Offset already known, no name specified - return offset_loc_key - - # No collision, this is a brand new location - loc_key = LocKey(self._loc_key_num) - self._loc_key_num += 1 - self._loc_keys.add(loc_key) - - if offset is not None: - assert offset not in self._offset_to_loc_key - self._offset_to_loc_key[offset] = loc_key - self._loc_key_to_offset[loc_key] = offset - - if name is not None: - self._name_to_loc_key[name] = loc_key - self._loc_key_to_names[loc_key] = set([name]) - - return loc_key - - def remove_location(self, loc_key): - """ - Delete the location corresponding to @loc_key - @loc_key: LocKey instance - """ - assert isinstance(loc_key, LocKey) - if loc_key not in self._loc_keys: - raise KeyError("Unknown loc_key %r" % loc_key) - names = self._loc_key_to_names.pop(loc_key, []) - for name in names: - del self._name_to_loc_key[name] - offset = self._loc_key_to_offset.pop(loc_key, None) - self._offset_to_loc_key.pop(offset, None) - self._loc_keys.remove(loc_key) - - def pretty_str(self, loc_key): - """Return a human readable version of @loc_key, according to information - available in this LocationDB instance""" - names = self.get_location_names(loc_key) - new_names = set() - for name in names: - try: - name = name.decode() - except AttributeError: - pass - new_names.add(name) - names = new_names - if names: - return ",".join(names) - offset = self.get_location_offset(loc_key) - if offset is not None: - return "loc_%x" % offset - return str(loc_key) - - @property - def loc_keys(self): - """Return all loc_keys""" - return self._loc_keys - - @property - def names(self): - """Return all known names""" - return list(self._name_to_loc_key) - - @property - def offsets(self): - """Return all known offsets""" - return list(self._offset_to_loc_key) - - def __str__(self): - out = [] - for loc_key in self._loc_keys: - names = self.get_location_names(loc_key) - offset = self.get_location_offset(loc_key) - out.append( - "%s: %s - %s" % ( - loc_key, - "0x%x" % offset if offset is not None else None, - ",".join(printable(name) for name in names) - ) - ) - return "\n".join(out) - - def merge(self, location_db): - """Merge with another LocationDB @location_db - - WARNING: old reference to @location_db information (such as LocKeys) - must be retrieved from the updated version of this instance. The - dedicated "get_*" APIs may be used for this task - """ - # A simple merge is not doable here, because LocKey will certainly - # collides - - for foreign_loc_key in location_db.loc_keys: - foreign_names = location_db.get_location_names(foreign_loc_key) - foreign_offset = location_db.get_location_offset(foreign_loc_key) - if foreign_names: - init_name = list(foreign_names)[0] - else: - init_name = None - loc_key = self.add_location(offset=foreign_offset, name=init_name, - strict=False) - cur_names = self.get_location_names(loc_key) - for name in foreign_names: - if name not in cur_names and name != init_name: - self.add_location_name(loc_key, name=name) - - def canonize_to_exprloc(self, expr): - """ - If expr is ExprInt, return ExprLoc with corresponding loc_key - Else, return expr - - @expr: Expr instance - """ - if expr.is_int(): - loc_key = self.get_or_create_offset_location(int(expr)) - ret = ExprLoc(loc_key, expr.size) - return ret - return expr - - # Deprecated APIs - @property - def items(self): - """Return all loc_keys""" - warnings.warn('DEPRECATION WARNING: use "loc_keys" instead of "items"') - return list(self._loc_keys) - - def __getitem__(self, item): - warnings.warn('DEPRECATION WARNING: use "get_name_location" or ' - '"get_offset_location"') - if item in self._name_to_loc_key: - return self._name_to_loc_key[item] - if item in self._offset_to_loc_key: - return self._offset_to_loc_key[item] - raise KeyError('unknown symbol %r' % item) - - def __contains__(self, item): - warnings.warn('DEPRECATION WARNING: use "get_name_location" or ' - '"get_offset_location", or ".offsets" or ".names"') - return item in self._name_to_loc_key or item in self._offset_to_loc_key - - def loc_key_to_name(self, loc_key): - """[DEPRECATED API], see 'get_location_names'""" - warnings.warn("Deprecated API: use 'get_location_names'") - return sorted(self.get_location_names(loc_key))[0] - - def loc_key_to_offset(self, loc_key): - """[DEPRECATED API], see 'get_location_offset'""" - warnings.warn("Deprecated API: use 'get_location_offset'") - return self.get_location_offset(loc_key) - - def remove_loc_key(self, loc_key): - """[DEPRECATED API], see 'remove_location'""" - warnings.warn("Deprecated API: use 'remove_location'") - self.remove_location(loc_key) - - def del_loc_key_offset(self, loc_key): - """[DEPRECATED API], see 'unset_location_offset'""" - warnings.warn("Deprecated API: use 'unset_location_offset'") - self.unset_location_offset(loc_key) - - def getby_offset(self, offset): - """[DEPRECATED API], see 'get_offset_location'""" - warnings.warn("Deprecated API: use 'get_offset_location'") - return self.get_offset_location(offset) - - def getby_name(self, name): - """[DEPRECATED API], see 'get_name_location'""" - warnings.warn("Deprecated API: use 'get_name_location'") - return self.get_name_location(name) - - def getby_offset_create(self, offset): - """[DEPRECATED API], see 'get_or_create_offset_location'""" - warnings.warn("Deprecated API: use 'get_or_create_offset_location'") - return self.get_or_create_offset_location(offset) - - def getby_name_create(self, name): - """[DEPRECATED API], see 'get_or_create_name_location'""" - warnings.warn("Deprecated API: use 'get_or_create_name_location'") - return self.get_or_create_name_location(name) - - def rename_location(self, loc_key, newname): - """[DEPRECATED API], see 'add_name_location' and 'remove_location_name' - """ - warnings.warn("Deprecated API: use 'add_location_name' and " - "'remove_location_name'") - for name in self.get_location_names(loc_key): - self.remove_location_name(loc_key, name) - self.add_location_name(loc_key, name) - - def set_offset(self, loc_key, offset): - """[DEPRECATED API], see 'set_location_offset'""" - warnings.warn("Deprecated API: use 'set_location_offset'") - self.set_location_offset(loc_key, offset, force=True) - - def gen_loc_key(self): - """[DEPRECATED API], see 'add_location'""" - warnings.warn("Deprecated API: use 'add_location'") - return self.add_location() - - def str_loc_key(self, loc_key): - """[DEPRECATED API], see 'pretty_str'""" - warnings.warn("Deprecated API: use 'pretty_str'") - return self.pretty_str(loc_key) diff --git a/miasm2/core/objc.py b/miasm2/core/objc.py deleted file mode 100644 index 30b00682..00000000 --- a/miasm2/core/objc.py +++ /dev/null @@ -1,1761 +0,0 @@ -""" -C helper for Miasm: -* raw C to Miasm expression -* Miasm expression to raw C -* Miasm expression to C type -""" - -from builtins import zip -from builtins import int as int_types - -import warnings -from pycparser import c_parser, c_ast -from functools import total_ordering - -from miasm2.core.utils import cmp_elts -from miasm2.expression.expression_reduce import ExprReducer -from miasm2.expression.expression import ExprInt, ExprId, ExprOp, ExprMem - -from miasm2.core.ctypesmngr import CTypeUnion, CTypeStruct, CTypeId, CTypePtr,\ - CTypeArray, CTypeOp, CTypeSizeof, CTypeEnum, CTypeFunc, CTypeEllipsis - - -PADDING_TYPE_NAME = "___padding___" - -def missing_definition(objtype): - warnings.warn("Null size type: Missing definition? %r" % objtype) - -""" -Display C type -source: "The C Programming Language - 2nd Edition - Ritchie Kernighan.pdf" -p. 124 -""" - -def objc_to_str(objc, result=None): - if result is None: - result = "" - while True: - if isinstance(objc, ObjCArray): - result += "[%d]" % objc.elems - objc = objc.objtype - elif isinstance(objc, ObjCPtr): - if not result and isinstance(objc.objtype, ObjCFunc): - result = objc.objtype.name - if isinstance(objc.objtype, (ObjCPtr, ObjCDecl, ObjCStruct, ObjCUnion)): - result = "*%s" % result - else: - result = "(*%s)" % result - - objc = objc.objtype - elif isinstance(objc, (ObjCDecl, ObjCStruct, ObjCUnion)): - if result: - result = "%s %s" % (objc, result) - else: - result = str(objc) - break - elif isinstance(objc, ObjCFunc): - args_str = [] - for name, arg in objc.args: - args_str.append(objc_to_str(arg, name)) - args = ", ".join(args_str) - result += "(%s)" % args - objc = objc.type_ret - elif isinstance(objc, ObjCInt): - return "int" - elif isinstance(objc, ObjCEllipsis): - return "..." - else: - raise TypeError("Unknown c type") - return result - - -@total_ordering -class ObjC(object): - """Generic ObjC""" - - def __init__(self, align, size): - self._align = align - self._size = size - - @property - def align(self): - """Alignment (in bytes) of the C object""" - return self._align - - @property - def size(self): - """Size (in bytes) of the C object""" - return self._size - - def cmp_base(self, other): - assert self.__class__ in OBJC_PRIO - assert other.__class__ in OBJC_PRIO - - if OBJC_PRIO[self.__class__] != OBJC_PRIO[other.__class__]: - return cmp_elts( - OBJC_PRIO[self.__class__], - OBJC_PRIO[other.__class__] - ) - if self.align != other.align: - return cmp_elts(self.align, other.align) - return cmp_elts(self.size, other.size) - - def __hash__(self): - return hash((self.__class__, self._align, self._size)) - - def __str__(self): - return objc_to_str(self) - - def __eq__(self, other): - return self.cmp_base(other) == 0 - - def __ne__(self, other): - # required Python 2.7.14 - return not self == other - - def __lt__(self, other): - return self.cmp_base(other) < 0 - - -@total_ordering -class ObjCDecl(ObjC): - """C Declaration identified""" - - def __init__(self, name, align, size): - super(ObjCDecl, self).__init__(align, size) - self._name = name - - name = property(lambda self: self._name) - - def __hash__(self): - return hash((super(ObjCDecl, self).__hash__(), self._name)) - - def __repr__(self): - return '<%s %s>' % (self.__class__.__name__, self.name) - - def __str__(self): - return str(self.name) - - def __eq__(self, other): - ret = self.cmp_base(other) - if ret: - return False - return self.name == other.name - - def __lt__(self, other): - ret = self.cmp_base(other) - if ret: - if ret < 0: - return True - return False - return self.name < other.name - - -class ObjCInt(ObjC): - """C integer""" - - def __init__(self): - super(ObjCInt, self).__init__(None, None) - - def __str__(self): - return 'int' - - -@total_ordering -class ObjCPtr(ObjC): - """C Pointer""" - - def __init__(self, objtype, void_p_align, void_p_size): - """Init ObjCPtr - - @objtype: pointer target ObjC - @void_p_align: pointer alignment (in bytes) - @void_p_size: pointer size (in bytes) - """ - - super(ObjCPtr, self).__init__(void_p_align, void_p_size) - self._lock = False - - self.objtype = objtype - if objtype is None: - self._lock = False - - def get_objtype(self): - assert self._lock is True - return self._objtype - - def set_objtype(self, objtype): - assert self._lock is False - self._lock = True - self._objtype = objtype - - objtype = property(get_objtype, set_objtype) - - def __hash__(self): - # Don't try to hash on an unlocked Ptr (still mutable) - assert self._lock - return hash((super(ObjCPtr, self).__hash__(), hash(self._objtype))) - - def __repr__(self): - return '<%s %r>' % ( - self.__class__.__name__, - self.objtype.__class__ - ) - - def __eq__(self, other): - ret = self.cmp_base(other) - if ret: - return False - return self.objtype == other.objtype - - def __lt__(self, other): - ret = self.cmp_base(other) - if ret: - if ret < 0: - return True - return False - return self.objtype < other.objtype - - -@total_ordering -class ObjCArray(ObjC): - """C array (test[XX])""" - - def __init__(self, objtype, elems): - """Init ObjCArray - - @objtype: pointer target ObjC - @elems: number of elements in the array - """ - - super(ObjCArray, self).__init__(objtype.align, elems * objtype.size) - self._elems = elems - self._objtype = objtype - - objtype = property(lambda self: self._objtype) - elems = property(lambda self: self._elems) - - def __hash__(self): - return hash((super(ObjCArray, self).__hash__(), self._elems, hash(self._objtype))) - - def __repr__(self): - return '<%r[%d]>' % (self.objtype, self.elems) - - def __eq__(self, other): - ret = self.cmp_base(other) - if ret: - return False - if self.objtype != other.objtype: - return False - return self.elems == other.elems - - def __lt__(self, other): - ret = self.cmp_base(other) - if ret > 0: - return False - if self.objtype > other.objtype: - return False - return self.elems < other.elems - -@total_ordering -class ObjCStruct(ObjC): - """C object for structures""" - - def __init__(self, name, align, size, fields): - super(ObjCStruct, self).__init__(align, size) - self._name = name - self._fields = tuple(fields) - - name = property(lambda self: self._name) - fields = property(lambda self: self._fields) - - def __hash__(self): - return hash((super(ObjCStruct, self).__hash__(), self._name)) - - def __repr__(self): - out = [] - out.append("Struct %s: (align: %d)" % (self.name, self.align)) - out.append(" off sz name") - for name, objtype, offset, size in self.fields: - out.append(" 0x%-3x %-3d %-10s %r" % - (offset, size, name, objtype.__class__.__name__)) - return '\n'.join(out) - - def __str__(self): - return 'struct %s' % (self.name) - - def __eq__(self, other): - ret = self.cmp_base(other) - if ret: - return False - return self.name == other.name - - def __lt__(self, other): - ret = self.cmp_base(other) - if ret: - if ret < 0: - return True - return False - return self.name < other.name - - -@total_ordering -class ObjCUnion(ObjC): - """C object for unions""" - - def __init__(self, name, align, size, fields): - super(ObjCUnion, self).__init__(align, size) - self._name = name - self._fields = tuple(fields) - - name = property(lambda self: self._name) - fields = property(lambda self: self._fields) - - def __hash__(self): - return hash((super(ObjCUnion, self).__hash__(), self._name)) - - def __repr__(self): - out = [] - out.append("Union %s: (align: %d)" % (self.name, self.align)) - out.append(" off sz name") - for name, objtype, offset, size in self.fields: - out.append(" 0x%-3x %-3d %-10s %r" % - (offset, size, name, objtype)) - return '\n'.join(out) - - def __str__(self): - return 'union %s' % (self.name) - - def __eq__(self, other): - ret = self.cmp_base(other) - if ret: - return False - return self.name == other.name - - def __lt__(self, other): - ret = self.cmp_base(other) - if ret: - if ret < 0: - return True - return False - return self.name < other.name - -class ObjCEllipsis(ObjC): - """C integer""" - - def __init__(self): - super(ObjCEllipsis, self).__init__(None, None) - - align = property(lambda self: self._align) - size = property(lambda self: self._size) - -@total_ordering -class ObjCFunc(ObjC): - """C object for Functions""" - - def __init__(self, name, abi, type_ret, args, void_p_align, void_p_size): - super(ObjCFunc, self).__init__(void_p_align, void_p_size) - self._name = name - self._abi = abi - self._type_ret = type_ret - self._args = tuple(args) - - args = property(lambda self: self._args) - type_ret = property(lambda self: self._type_ret) - abi = property(lambda self: self._abi) - name = property(lambda self: self._name) - - def __hash__(self): - return hash((super(ObjCFunc, self).__hash__(), hash(self._args), self._name)) - - def __repr__(self): - return "<%s %s>" % ( - self.__class__.__name__, - self.name - ) - - def __str__(self): - out = [] - out.append("Function (%s) %s: (align: %d)" % (self.abi, self.name, self.align)) - out.append(" ret: %s" % (str(self.type_ret))) - out.append(" Args:") - for name, arg in self.args: - out.append(" %s %s" % (name, arg)) - return '\n'.join(out) - - def __eq__(self, other): - ret = self.cmp_base(other) - if ret: - return False - return self.name == other.name - - def __lt__(self, other): - ret = self.cmp_base(other) - if ret: - if ret < 0: - return True - return False - return self.name < other.name - -OBJC_PRIO = { - ObjC: 0, - ObjCDecl:1, - ObjCInt:2, - ObjCPtr:3, - ObjCArray:4, - ObjCStruct:5, - ObjCUnion:6, - ObjCEllipsis:7, - ObjCFunc:8, -} - - -def access_simplifier(expr): - """Expression visitor to simplify a C access represented in Miasm - - @expr: Miasm expression representing the C access - - Example: - - IN: (In c: ['*(&((&((*(ptr_Test)).a))[0]))']) - [ExprOp('deref', ExprOp('addr', ExprOp('[]', ExprOp('addr', - ExprOp('field', ExprOp('deref', ExprId('ptr_Test', 64)), - ExprId('a', 64))), ExprInt(0x0, 64))))] - - OUT: (In c: ['(ptr_Test)->a']) - [ExprOp('->', ExprId('ptr_Test', 64), ExprId('a', 64))] - """ - - if (expr.is_op("addr") and - expr.args[0].is_op("[]") and - expr.args[0].args[1] == ExprInt(0, 64)): - return expr.args[0].args[0] - elif (expr.is_op("[]") and - expr.args[0].is_op("addr") and - expr.args[1] == ExprInt(0, 64)): - return expr.args[0].args[0] - elif (expr.is_op("addr") and - expr.args[0].is_op("deref")): - return expr.args[0].args[0] - elif (expr.is_op("deref") and - expr.args[0].is_op("addr")): - return expr.args[0].args[0] - elif (expr.is_op("field") and - expr.args[0].is_op("deref")): - return ExprOp("->", expr.args[0].args[0], expr.args[1]) - return expr - - -def access_str(expr): - """Return the C string of a C access represented in Miasm - - @expr: Miasm expression representing the C access - - In: - ExprOp('->', ExprId('ptr_Test', 64), ExprId('a', 64)) - OUT: - '(ptr_Test)->a' - """ - - if isinstance(expr, ExprId): - out = str(expr) - elif isinstance(expr, ExprInt): - out = str(int(expr)) - elif expr.is_op("addr"): - out = "&(%s)" % access_str(expr.args[0]) - elif expr.is_op("deref"): - out = "*(%s)" % access_str(expr.args[0]) - elif expr.is_op("field"): - out = "(%s).%s" % (access_str(expr.args[0]), access_str(expr.args[1])) - elif expr.is_op("->"): - out = "(%s)->%s" % (access_str(expr.args[0]), access_str(expr.args[1])) - elif expr.is_op("[]"): - out = "(%s)[%s]" % (access_str(expr.args[0]), access_str(expr.args[1])) - else: - raise RuntimeError("unknown op") - - return out - - -class CGen(object): - """Generic object to represent a C expression""" - - default_size = 64 - - - def __init__(self, ctype): - self._ctype = ctype - - @property - def ctype(self): - """Type (ObjC instance) of the current object""" - return self._ctype - - def __hash__(self): - return hash(self.__class__) - - def __eq__(self, other): - return (self.__class__ == other.__class__ and - self._ctype == other.ctype) - - def __ne__(self, other): - return not self.__eq__(other) - - def to_c(self): - """Generate corresponding C""" - - raise NotImplementedError("Virtual") - - def to_expr(self): - """Generate Miasm expression representing the C access""" - - raise NotImplementedError("Virtual") - - -class CGenInt(CGen): - """Int C object""" - - def __init__(self, integer): - assert isinstance(integer, int_types) - self._integer = integer - super(CGenInt, self).__init__(ObjCInt()) - - @property - def integer(self): - """Value of the object""" - return self._integer - - def __hash__(self): - return hash((super(CGenInt, self).__hash__(), self._integer)) - - def __eq__(self, other): - return (super(CGenInt, self).__eq__(other) and - self._integer == other.integer) - - def __ne__(self, other): - return not self.__eq__(other) - - def to_c(self): - """Generate corresponding C""" - - return "0x%X" % self.integer - - def __repr__(self): - return "<%s %s>" % (self.__class__.__name__, - self.integer) - - def to_expr(self): - """Generate Miasm expression representing the C access""" - - return ExprInt(self.integer, self.default_size) - - -class CGenId(CGen): - """ID of a C object""" - - def __init__(self, ctype, name): - self._name = name - assert isinstance(name, str) - super(CGenId, self).__init__(ctype) - - @property - def name(self): - """Name of the Id""" - return self._name - - def __hash__(self): - return hash((super(CGenId, self).__hash__(), self._name)) - - def __eq__(self, other): - return (super(CGenId, self).__eq__(other) and - self._name == other.name) - - def __repr__(self): - return "<%s %s>" % (self.__class__.__name__, - self.name) - - def to_c(self): - """Generate corresponding C""" - - return "%s" % (self.name) - - def to_expr(self): - """Generate Miasm expression representing the C access""" - - return ExprId(self.name, self.default_size) - - -class CGenField(CGen): - """ - Field of a C struct/union - - IN: - - struct (not ptr struct) - - field name - OUT: - - input type of the field => output type - - X[] => X[] - - X => X* - """ - - def __init__(self, struct, field, fieldtype, void_p_align, void_p_size): - self._struct = struct - self._field = field - assert isinstance(field, str) - if isinstance(fieldtype, ObjCArray): - ctype = fieldtype - else: - ctype = ObjCPtr(fieldtype, void_p_align, void_p_size) - super(CGenField, self).__init__(ctype) - - @property - def struct(self): - """Structure containing the field""" - return self._struct - - @property - def field(self): - """Field name""" - return self._field - - def __hash__(self): - return hash((super(CGenField, self).__hash__(), self._struct, self._field)) - - def __eq__(self, other): - return (super(CGenField, self).__eq__(other) and - self._struct == other.struct and - self._field == other.field) - - def to_c(self): - """Generate corresponding C""" - - if isinstance(self.ctype, ObjCArray): - return "(%s).%s" % (self.struct.to_c(), self.field) - elif isinstance(self.ctype, ObjCPtr): - return "&((%s).%s)" % (self.struct.to_c(), self.field) - else: - raise RuntimeError("Strange case") - - def __repr__(self): - return "<%s %s %s>" % (self.__class__.__name__, - self.struct, - self.field) - - def to_expr(self): - """Generate Miasm expression representing the C access""" - - if isinstance(self.ctype, ObjCArray): - return ExprOp("field", - self.struct.to_expr(), - ExprId(self.field, self.default_size)) - elif isinstance(self.ctype, ObjCPtr): - return ExprOp("addr", - ExprOp("field", - self.struct.to_expr(), - ExprId(self.field, self.default_size))) - else: - raise RuntimeError("Strange case") - - -class CGenArray(CGen): - """ - C Array - - This object does *not* deref the source, it only do object casting. - - IN: - - obj - OUT: - - X* => X* - - ..[][] => ..[] - - X[] => X* - """ - - def __init__(self, base, elems, void_p_align, void_p_size): - ctype = base.ctype - if isinstance(ctype, ObjCPtr): - pass - elif isinstance(ctype, ObjCArray) and isinstance(ctype.objtype, ObjCArray): - ctype = ctype.objtype - elif isinstance(ctype, ObjCArray): - ctype = ObjCPtr(ctype.objtype, void_p_align, void_p_size) - else: - raise TypeError("Strange case") - self._base = base - self._elems = elems - super(CGenArray, self).__init__(ctype) - - @property - def base(self): - """Base object supporting the array""" - return self._base - - @property - def elems(self): - """Number of elements in the array""" - return self._elems - - def __hash__(self): - return hash((super(CGenArray, self).__hash__(), self._base, self._elems)) - - def __eq__(self, other): - return (super(CGenField, self).__eq__(other) and - self._base == other.base and - self._elems == other.elems) - - def __repr__(self): - return "<%s %s>" % (self.__class__.__name__, - self.base) - - def to_c(self): - """Generate corresponding C""" - - if isinstance(self.ctype, ObjCPtr): - out_str = "&((%s)[%d])" % (self.base.to_c(), self.elems) - elif isinstance(self.ctype, ObjCArray): - out_str = "(%s)[%d]" % (self.base.to_c(), self.elems) - else: - raise RuntimeError("Strange case") - return out_str - - def to_expr(self): - """Generate Miasm expression representing the C access""" - - if isinstance(self.ctype, ObjCPtr): - return ExprOp("addr", - ExprOp("[]", - self.base.to_expr(), - ExprInt(self.elems, self.default_size))) - elif isinstance(self.ctype, ObjCArray): - return ExprOp("[]", - self.base.to_expr(), - ExprInt(self.elems, self.default_size)) - else: - raise RuntimeError("Strange case") - - -class CGenDeref(CGen): - """ - C dereference - - IN: - - ptr - OUT: - - X* => X - """ - - def __init__(self, ptr): - assert isinstance(ptr.ctype, ObjCPtr) - self._ptr = ptr - super(CGenDeref, self).__init__(ptr.ctype.objtype) - - @property - def ptr(self): - """Pointer object""" - return self._ptr - - def __hash__(self): - return hash((super(CGenDeref, self).__hash__(), self._ptr)) - - def __eq__(self, other): - return (super(CGenField, self).__eq__(other) and - self._ptr == other.ptr) - - def __repr__(self): - return "<%s %s>" % (self.__class__.__name__, - self.ptr) - - def to_c(self): - """Generate corresponding C""" - - if not isinstance(self.ptr.ctype, ObjCPtr): - raise RuntimeError() - return "*(%s)" % (self.ptr.to_c()) - - def to_expr(self): - """Generate Miasm expression representing the C access""" - - if not isinstance(self.ptr.ctype, ObjCPtr): - raise RuntimeError() - return ExprOp("deref", self.ptr.to_expr()) - - -def ast_get_c_access_expr(ast, expr_types, lvl=0): - """Transform C ast object into a C Miasm expression - - @ast: parsed pycparser.c_ast object - @expr_types: a dictionary linking ID names to their types - @lvl: actual recursion level - - Example: - - IN: - StructRef: -> - ID: ptr_Test - ID: a - - OUT: - ExprOp('->', ExprId('ptr_Test', 64), ExprId('a', 64)) - """ - - if isinstance(ast, c_ast.Constant): - obj = ExprInt(int(ast.value), 64) - elif isinstance(ast, c_ast.StructRef): - name, field = ast.name, ast.field.name - name = ast_get_c_access_expr(name, expr_types) - if ast.type == "->": - s_name = name - s_field = ExprId(field, 64) - obj = ExprOp('->', s_name, s_field) - elif ast.type == ".": - s_name = name - s_field = ExprId(field, 64) - obj = ExprOp("field", s_name, s_field) - else: - raise RuntimeError("Unknown struct access") - elif isinstance(ast, c_ast.UnaryOp) and ast.op == "&": - tmp = ast_get_c_access_expr(ast.expr, expr_types, lvl + 1) - obj = ExprOp("addr", tmp) - elif isinstance(ast, c_ast.ArrayRef): - tmp = ast_get_c_access_expr(ast.name, expr_types, lvl + 1) - index = ast_get_c_access_expr(ast.subscript, expr_types, lvl + 1) - obj = ExprOp("[]", tmp, index) - elif isinstance(ast, c_ast.ID): - assert ast.name in expr_types - obj = ExprId(ast.name, 64) - elif isinstance(ast, c_ast.UnaryOp) and ast.op == "*": - tmp = ast_get_c_access_expr(ast.expr, expr_types, lvl + 1) - obj = ExprOp("deref", tmp) - else: - raise NotImplementedError("Unknown type") - return obj - - -def parse_access(c_access): - """Parse C access - - @c_access: C access string - """ - - main = ''' - int main() { - %s; - } - ''' % c_access - - parser = c_parser.CParser() - node = parser.parse(main, filename='') - access = node.ext[-1].body.block_items[0] - return access - - -class ExprToAccessC(ExprReducer): - """ - Generate the C access object(s) for a given native Miasm expression - Example: - IN: - @32[ptr_Test] - OUT: - [> a>>>] - - An expression may be represented by multiple accessor (due to unions). - """ - - def __init__(self, expr_types, types_mngr, enforce_strict_access=True): - """Init GenCAccess - - @expr_types: a dictionary linking ID names to their types - @types_mngr: types manager - @enforce_strict_access: If false, generate access even on expression - pointing to a middle of an object. If true, raise exception if such a - pointer is encountered - """ - - self.expr_types = expr_types - self.types_mngr = types_mngr - self.enforce_strict_access = enforce_strict_access - - def updt_expr_types(self, expr_types): - """Update expr_types - @expr_types: Dictionary associating name to type - """ - - self.expr_types = expr_types - - def cgen_access(self, cgenobj, base_type, offset, deref, lvl=0): - """Return the access(es) which lead to the element at @offset of an - object of type @base_type - - In case of no @deref, stops recursion as soon as we reached the base of - an object. - In other cases, we need to go down to the final dereferenced object - - @cgenobj: current object access - @base_type: type of main object - @offset: offset (in bytes) of the target sub object - @deref: get type for a pointer or a deref - @lvl: actual recursion level - - - IN: - - base_type: struct Toto{ - int a - int b - } - - base_name: var - - 4 - OUT: - - CGenField(var, b) - - - - IN: - - base_type: int a - - 0 - OUT: - - CGenAddr(a) - - IN: - - base_type: X = int* a - - 0 - OUT: - - CGenAddr(X) - - IN: - - X = int* a - - 8 - OUT: - - ASSERT - - - IN: - - struct toto{ - int a - int b[10] - } - - 8 - OUT: - - CGenArray(CGenField(toto, b), 1) - """ - if base_type.size == 0: - missing_definition(base_type) - return set() - - - void_type = self.types_mngr.void_ptr - if isinstance(base_type, ObjCStruct): - if not 0 <= offset < base_type.size: - return set() - - if offset == 0 and not deref: - # In this case, return the struct* - return set([cgenobj]) - - for fieldname, subtype, field_offset, size in base_type.fields: - if not field_offset <= offset < field_offset + size: - continue - fieldptr = CGenField(CGenDeref(cgenobj), fieldname, subtype, - void_type.align, void_type.size) - new_type = self.cgen_access(fieldptr, subtype, - offset - field_offset, - deref, lvl + 1) - break - else: - return set() - elif isinstance(base_type, ObjCArray): - if base_type.objtype.size == 0: - missing_definition(base_type.objtype) - return set() - element_num = offset // (base_type.objtype.size) - field_offset = offset % base_type.objtype.size - if element_num >= base_type.elems: - return set() - if offset == 0 and not deref: - # In this case, return the array - return set([cgenobj]) - - curobj = CGenArray(cgenobj, element_num, - void_type.align, - void_type.size) - if field_offset == 0: - # We point to the start of the sub object, - # return it directly - return set([curobj]) - new_type = self.cgen_access(curobj, base_type.objtype, - field_offset, deref, lvl + 1) - - elif isinstance(base_type, ObjCDecl): - if self.enforce_strict_access and offset % base_type.size != 0: - return set() - elem_num = offset // base_type.size - - nobj = CGenArray(cgenobj, elem_num, - void_type.align, void_type.size) - new_type = set([nobj]) - - elif isinstance(base_type, ObjCUnion): - if offset == 0 and not deref: - # In this case, return the struct* - return set([cgenobj]) - - out = set() - for fieldname, objtype, field_offset, size in base_type.fields: - if not field_offset <= offset < field_offset + size: - continue - field = CGenField(CGenDeref(cgenobj), fieldname, objtype, - void_type.align, void_type.size) - out.update(self.cgen_access(field, objtype, - offset - field_offset, - deref, lvl + 1)) - new_type = out - - elif isinstance(base_type, ObjCPtr): - elem_num = offset // base_type.size - if self.enforce_strict_access and offset % base_type.size != 0: - return set() - nobj = CGenArray(cgenobj, elem_num, - void_type.align, void_type.size) - new_type = set([nobj]) - - else: - raise NotImplementedError("deref type %r" % base_type) - return new_type - - def reduce_known_expr(self, node, ctxt, **kwargs): - """Generate access for known expr""" - if node.expr in ctxt: - objcs = ctxt[node.expr] - return set(CGenId(objc, str(node.expr)) for objc in objcs) - return None - - def reduce_int(self, node, **kwargs): - """Generate access for ExprInt""" - - if not isinstance(node.expr, ExprInt): - return None - return set([CGenInt(int(node.expr))]) - - def get_solo_type(self, node): - """Return the type of the @node if it has only one possible type, - different from not None. In other cases, return None. - """ - if node.info is None or len(node.info) != 1: - return None - return type(list(node.info)[0].ctype) - - def reduce_op(self, node, lvl=0, **kwargs): - """Generate access for ExprOp""" - if not node.expr.is_op("+") or len(node.args) != 2: - return None - type_arg1 = self.get_solo_type(node.args[1]) - if type_arg1 != ObjCInt: - return None - arg0, arg1 = node.args - if arg0.info is None: - return None - void_type = self.types_mngr.void_ptr - out = set() - if not arg1.expr.is_int(): - return None - ptr_offset = int(arg1.expr) - for info in arg0.info: - if isinstance(info.ctype, ObjCArray): - field_type = info.ctype - elif isinstance(info.ctype, ObjCPtr): - field_type = info.ctype.objtype - else: - continue - target_type = info.ctype.objtype - - # Array-like: int* ptr; ptr[1] = X - out.update(self.cgen_access(info, field_type, ptr_offset, False, lvl)) - return out - - def reduce_mem(self, node, lvl=0, **kwargs): - """Generate access for ExprMem: - * @NN[ptr] -> elem (type) - * @64[ptr>] -> ptr - * @32[ptr] -> struct.00 - """ - - if not isinstance(node.expr, ExprMem): - return None - if node.ptr.info is None: - return None - assert isinstance(node.ptr.info, set) - void_type = self.types_mngr.void_ptr - found = set() - for subcgenobj in node.ptr.info: - if isinstance(subcgenobj.ctype, ObjCArray): - nobj = CGenArray(subcgenobj, 0, - void_type.align, - void_type.size) - target = nobj.ctype.objtype - for finalcgenobj in self.cgen_access(nobj, target, 0, True, lvl): - assert isinstance(finalcgenobj.ctype, ObjCPtr) - if self.enforce_strict_access and finalcgenobj.ctype.objtype.size != node.expr.size // 8: - continue - found.add(CGenDeref(finalcgenobj)) - - elif isinstance(subcgenobj.ctype, ObjCPtr): - target = subcgenobj.ctype.objtype - # target : type(elem) - if isinstance(target, (ObjCStruct, ObjCUnion)): - for finalcgenobj in self.cgen_access(subcgenobj, target, 0, True, lvl): - target = finalcgenobj.ctype.objtype - if self.enforce_strict_access and target.size != node.expr.size // 8: - continue - found.add(CGenDeref(finalcgenobj)) - elif isinstance(target, ObjCArray): - if self.enforce_strict_access and subcgenobj.ctype.size != node.expr.size // 8: - continue - found.update(self.cgen_access(CGenDeref(subcgenobj), target, - 0, False, lvl)) - else: - if self.enforce_strict_access and target.size != node.expr.size // 8: - continue - found.add(CGenDeref(subcgenobj)) - if not found: - return None - return found - - reduction_rules = [reduce_known_expr, - reduce_int, - reduce_op, - reduce_mem, - ] - - def get_accesses(self, expr, expr_context=None): - """Generate C access(es) for the native Miasm expression @expr - @expr: native Miasm expression - @expr_context: a dictionary linking known expressions to their - types. An expression is linked to a tuple of types. - """ - if expr_context is None: - expr_context = self.expr_types - ret = self.reduce(expr, ctxt=expr_context) - if ret.info is None: - return set() - return ret.info - - -class ExprCToExpr(ExprReducer): - """Translate a Miasm expression (representing a C access) into a native - Miasm expression and its C type: - - Example: - - IN: ((ptr_struct -> f_mini) field x) - OUT: @32[ptr_struct + 0x80], int - - - Tricky cases: - Struct S0 { - int x; - int y[0x10]; - } - - Struct S1 { - int a; - S0 toto; - } - - S1* ptr; - - Case 1: - ptr->toto => ptr + 0x4 - &(ptr->toto) => ptr + 0x4 - - Case 2: - (ptr->toto).x => @32[ptr + 0x4] - &((ptr->toto).x) => ptr + 0x4 - - Case 3: - (ptr->toto).y => ptr + 0x8 - &((ptr->toto).y) => ptr + 0x8 - - Case 4: - (ptr->toto).y[1] => @32[ptr + 0x8 + 0x4] - &((ptr->toto).y[1]) => ptr + 0x8 + 0x4 - - """ - - def __init__(self, expr_types, types_mngr): - """Init ExprCAccess - - @expr_types: a dictionary linking ID names to their types - @types_mngr: types manager - """ - - self.expr_types = expr_types - self.types_mngr = types_mngr - - def updt_expr_types(self, expr_types): - """Update expr_types - @expr_types: Dictionary associating name to type - """ - - self.expr_types = expr_types - - CST = "CST" - - def reduce_known_expr(self, node, ctxt, **kwargs): - """Reduce known expressions""" - if str(node.expr) in ctxt: - objc = ctxt[str(node.expr)] - out = (node.expr, objc) - elif node.expr.is_id(): - out = (node.expr, None) - else: - out = None - return out - - def reduce_int(self, node, **kwargs): - """Reduce ExprInt""" - - if not isinstance(node.expr, ExprInt): - return None - return self.CST - - def reduce_op_memberof(self, node, **kwargs): - """Reduce -> operator""" - - if not node.expr.is_op('->'): - return None - assert len(node.args) == 2 - out = [] - assert isinstance(node.args[1].expr, ExprId) - field = node.args[1].expr.name - src, src_type = node.args[0].info - if src_type is None: - return None - assert isinstance(src_type, (ObjCPtr, ObjCArray)) - struct_dst = src_type.objtype - assert isinstance(struct_dst, ObjCStruct) - - found = False - for name, objtype, offset, _ in struct_dst.fields: - if name != field: - continue - expr = src + ExprInt(offset, src.size) - if isinstance(objtype, (ObjCArray, ObjCStruct, ObjCUnion)): - pass - else: - expr = ExprMem(expr, objtype.size * 8) - assert not found - found = True - out = (expr, objtype) - assert found - return out - - def reduce_op_field(self, node, **kwargs): - """Reduce field operator (Struct or Union)""" - - if not node.expr.is_op('field'): - return None - assert len(node.args) == 2 - out = [] - assert isinstance(node.args[1].expr, ExprId) - field = node.args[1].expr.name - src, src_type = node.args[0].info - struct_dst = src_type - - if isinstance(struct_dst, ObjCStruct): - found = False - for name, objtype, offset, _ in struct_dst.fields: - if name != field: - continue - expr = src + ExprInt(offset, src.size) - if isinstance(objtype, ObjCArray): - # Case 4 - pass - elif isinstance(objtype, (ObjCStruct, ObjCUnion)): - # Case 1 - pass - else: - # Case 2 - expr = ExprMem(expr, objtype.size * 8) - assert not found - found = True - out = (expr, objtype) - elif isinstance(struct_dst, ObjCUnion): - found = False - for name, objtype, offset, _ in struct_dst.fields: - if name != field: - continue - expr = src + ExprInt(offset, src.size) - if isinstance(objtype, ObjCArray): - # Case 4 - pass - elif isinstance(objtype, (ObjCStruct, ObjCUnion)): - # Case 1 - pass - else: - # Case 2 - expr = ExprMem(expr, objtype.size * 8) - assert not found - found = True - out = (expr, objtype) - else: - raise NotImplementedError("unknown ObjC") - assert found - return out - - def reduce_op_array(self, node, **kwargs): - """Reduce array operator""" - - if not node.expr.is_op('[]'): - return None - assert len(node.args) == 2 - out = [] - assert isinstance(node.args[1].expr, ExprInt) - cst = node.args[1].expr - src, src_type = node.args[0].info - objtype = src_type.objtype - expr = src + cst * ExprInt(objtype.size, cst.size) - if isinstance(src_type, ObjCPtr): - if isinstance(objtype, ObjCArray): - final = objtype.objtype - expr = src + cst * ExprInt(final.size, cst.size) - objtype = final - expr = ExprMem(expr, final.size * 8) - found = True - else: - expr = ExprMem(expr, objtype.size * 8) - found = True - elif isinstance(src_type, ObjCArray): - if isinstance(objtype, ObjCArray): - final = objtype - found = True - elif isinstance(objtype, ObjCStruct): - found = True - else: - expr = ExprMem(expr, objtype.size * 8) - found = True - else: - raise NotImplementedError("Unknown access" % node.expr) - assert found - out = (expr, objtype) - return out - - def reduce_op_addr(self, node, **kwargs): - """Reduce addr operator""" - - if not node.expr.is_op('addr'): - return None - assert len(node.args) == 1 - out = [] - src, src_type = node.args[0].info - - void_type = self.types_mngr.void_ptr - - if isinstance(src_type, ObjCArray): - out = (src.arg, ObjCPtr(src_type.objtype, - void_type.align, void_type.size)) - elif isinstance(src, ExprMem): - out = (src.ptr, ObjCPtr(src_type, - void_type.align, void_type.size)) - elif isinstance(src_type, ObjCStruct): - out = (src, ObjCPtr(src_type, - void_type.align, void_type.size)) - elif isinstance(src_type, ObjCUnion): - out = (src, ObjCPtr(src_type, - void_type.align, void_type.size)) - else: - raise NotImplementedError("unk type") - return out - - def reduce_op_deref(self, node, **kwargs): - """Reduce deref operator""" - - if not node.expr.is_op('deref'): - return None - out = [] - src, src_type = node.args[0].info - assert isinstance(src_type, (ObjCPtr, ObjCArray)) - void_type = self.types_mngr.void_ptr - if isinstance(src_type, ObjCPtr): - if isinstance(src_type.objtype, ObjCArray): - size = void_type.size*8 - else: - size = src_type.objtype.size * 8 - out = (ExprMem(src, size), (src_type.objtype)) - else: - size = src_type.objtype.size * 8 - out = (ExprMem(src, size), (src_type.objtype)) - return out - - reduction_rules = [reduce_known_expr, - reduce_int, - reduce_op_memberof, - reduce_op_field, - reduce_op_array, - reduce_op_addr, - reduce_op_deref, - ] - - def get_expr(self, expr, c_context): - """Translate a Miasm expression @expr (representing a C access) into a - tuple composed of a native Miasm expression and its C type. - @expr: Miasm expression (representing a C access) - @c_context: a dictionary linking known tokens (strings) to their - types. A token is linked to only one type. - """ - ret = self.reduce(expr, ctxt=c_context) - if ret.info is None: - return (None, None) - return ret.info - - -class CTypesManager(object): - """Represent a C object, without any layout information""" - - def __init__(self, types_ast, leaf_types): - self.types_ast = types_ast - self.leaf_types = leaf_types - - @property - def void_ptr(self): - """Retrieve a void* objc""" - return self.leaf_types.types.get(CTypePtr(CTypeId('void'))) - - @property - def padding(self): - """Retrieve a padding ctype""" - return CTypeId(PADDING_TYPE_NAME) - - def _get_objc(self, type_id, resolved=None, to_fix=None, lvl=0): - if resolved is None: - resolved = {} - if to_fix is None: - to_fix = [] - if type_id in resolved: - return resolved[type_id] - type_id = self.types_ast.get_type(type_id) - fixed = True - if isinstance(type_id, CTypeId): - out = self.leaf_types.types.get(type_id, None) - assert out is not None - elif isinstance(type_id, CTypeUnion): - args = [] - align_max, size_max = 0, 0 - for name, field in type_id.fields: - objc = self._get_objc(field, resolved, to_fix, lvl + 1) - resolved[field] = objc - align_max = max(align_max, objc.align) - size_max = max(size_max, objc.size) - args.append((name, objc, 0, objc.size)) - - align, size = self.union_compute_align_size(align_max, size_max) - out = ObjCUnion(type_id.name, align, size, args) - - elif isinstance(type_id, CTypeStruct): - align_max, size_max = 0, 0 - - args = [] - offset, align_max = 0, 1 - pad_index = 0 - for name, field in type_id.fields: - objc = self._get_objc(field, resolved, to_fix, lvl + 1) - resolved[field] = objc - align_max = max(align_max, objc.align) - new_offset = self.struct_compute_field_offset(objc, offset) - if new_offset - offset: - pad_name = "__PAD__%d__" % pad_index - pad_index += 1 - size = new_offset - offset - pad_objc = self._get_objc(CTypeArray(self.padding, size), resolved, to_fix, lvl + 1) - args.append((pad_name, pad_objc, offset, pad_objc.size)) - offset = new_offset - args.append((name, objc, offset, objc.size)) - offset += objc.size - - align, size = self.struct_compute_align_size(align_max, offset) - out = ObjCStruct(type_id.name, align, size, args) - - elif isinstance(type_id, CTypePtr): - target = type_id.target - out = ObjCPtr(None, self.void_ptr.align, self.void_ptr.size) - fixed = False - - elif isinstance(type_id, CTypeArray): - target = type_id.target - objc = self._get_objc(target, resolved, to_fix, lvl + 1) - resolved[target] = objc - if type_id.size is None: - # case: toto[] - # return ObjCPtr - out = ObjCPtr(objc, self.void_ptr.align, self.void_ptr.size) - else: - size = self.size_to_int(type_id.size) - if size is None: - raise RuntimeError('Enable to compute objc size') - else: - out = ObjCArray(objc, size) - assert out.size is not None and out.align is not None - elif isinstance(type_id, CTypeEnum): - # Enum are integer - return self.leaf_types.types.get(CTypeId('int')) - elif isinstance(type_id, CTypeFunc): - type_ret = self._get_objc( - type_id.type_ret, resolved, to_fix, lvl + 1) - resolved[type_id.type_ret] = type_ret - args = [] - for name, arg in type_id.args: - objc = self._get_objc(arg, resolved, to_fix, lvl + 1) - resolved[arg] = objc - args.append((name, objc)) - out = ObjCFunc(type_id.name, type_id.abi, type_ret, args, - self.void_ptr.align, self.void_ptr.size) - elif isinstance(type_id, CTypeEllipsis): - out = ObjCEllipsis() - else: - raise TypeError("Unknown type %r" % type_id.__class__) - if not isinstance(out, ObjCEllipsis): - assert out.align is not None and out.size is not None - - if fixed: - resolved[type_id] = out - else: - to_fix.append((type_id, out)) - return out - - def get_objc(self, type_id): - """Get the ObjC corresponding to the CType @type_id - @type_id: CTypeBase instance""" - resolved = {} - to_fix = [] - out = self._get_objc(type_id, resolved, to_fix) - # Fix sub objects - while to_fix: - type_id, objc_to_fix = to_fix.pop() - objc = self._get_objc(type_id.target, resolved, to_fix) - objc_to_fix.objtype = objc - self.check_objc(out) - return out - - def check_objc(self, objc, done=None): - """Ensure each sub ObjC is resolved - @objc: ObjC instance""" - if done is None: - done = set() - if objc in done: - return True - done.add(objc) - if isinstance(objc, (ObjCDecl, ObjCInt, ObjCEllipsis)): - return True - elif isinstance(objc, (ObjCPtr, ObjCArray)): - assert self.check_objc(objc.objtype, done) - return True - elif isinstance(objc, (ObjCStruct, ObjCUnion)): - for _, field, _, _ in objc.fields: - assert self.check_objc(field, done) - return True - elif isinstance(objc, ObjCFunc): - assert self.check_objc(objc.type_ret, done) - for name, arg in objc.args: - assert self.check_objc(arg, done) - return True - else: - assert False - - def size_to_int(self, size): - """Resolve an array size - @size: CTypeOp or integer""" - if isinstance(size, CTypeOp): - assert len(size.args) == 2 - arg0, arg1 = [self.size_to_int(arg) for arg in size.args] - if size.operator == "+": - return arg0 + arg1 - elif size.operator == "-": - return arg0 - arg1 - elif size.operator == "*": - return arg0 * arg1 - elif size.operator == "/": - return arg0 // arg1 - elif size.operator == "<<": - return arg0 << arg1 - elif size.operator == ">>": - return arg0 >> arg1 - else: - raise ValueError("Unknown operator %s" % size.operator) - elif isinstance(size, int_types): - return size - elif isinstance(size, CTypeSizeof): - obj = self._get_objc(size.target) - return obj.size - else: - raise TypeError("Unknown size type") - - def struct_compute_field_offset(self, obj, offset): - """Compute the offset of the field @obj in the current structure""" - raise NotImplementedError("Abstract method") - - def struct_compute_align_size(self, align_max, size): - """Compute the alignment and size of the current structure""" - raise NotImplementedError("Abstract method") - - def union_compute_align_size(self, align_max, size): - """Compute the alignment and size of the current union""" - raise NotImplementedError("Abstract method") - - -class CTypesManagerNotPacked(CTypesManager): - """Store defined C types (not packed)""" - - def struct_compute_field_offset(self, obj, offset): - """Compute the offset of the field @obj in the current structure - (not packed)""" - - if obj.align > 1: - offset = (offset + obj.align - 1) & ~(obj.align - 1) - return offset - - def struct_compute_align_size(self, align_max, size): - """Compute the alignment and size of the current structure - (not packed)""" - if align_max > 1: - size = (size + align_max - 1) & ~(align_max - 1) - return align_max, size - - def union_compute_align_size(self, align_max, size): - """Compute the alignment and size of the current union - (not packed)""" - return align_max, size - - -class CTypesManagerPacked(CTypesManager): - """Store defined C types (packed form)""" - - def struct_compute_field_offset(self, _, offset): - """Compute the offset of the field @obj in the current structure - (packed form)""" - return offset - - def struct_compute_align_size(self, _, size): - """Compute the alignment and size of the current structure - (packed form)""" - return 1, size - - def union_compute_align_size(self, align_max, size): - """Compute the alignment and size of the current union - (packed form)""" - return 1, size - - -class CHandler(object): - """ - C manipulator for Miasm - Miasm expr <-> C - """ - - exprCToExpr_cls = ExprCToExpr - exprToAccessC_cls = ExprToAccessC - - def __init__(self, types_mngr, expr_types=None, - C_types=None, - simplify_c=access_simplifier, - enforce_strict_access=True): - self.exprc2expr = self.exprCToExpr_cls(expr_types, types_mngr) - self.access_c_gen = self.exprToAccessC_cls(expr_types, - types_mngr, - enforce_strict_access) - self.types_mngr = types_mngr - self.simplify_c = simplify_c - if expr_types is None: - expr_types = {} - self.expr_types = expr_types - if C_types is None: - C_types = {} - self.C_types = C_types - - def updt_expr_types(self, expr_types): - """Update expr_types - @expr_types: Dictionary associating name to type - """ - - self.expr_types = expr_types - self.exprc2expr.updt_expr_types(expr_types) - self.access_c_gen.updt_expr_types(expr_types) - - def expr_to_c_access(self, expr, expr_context=None): - """Generate the C access object(s) for a given native Miasm expression. - @expr: Miasm expression - @expr_context: a dictionary linking known expressions to a set of types - """ - - if expr_context is None: - expr_context = self.expr_types - return self.access_c_gen.get_accesses(expr, expr_context) - - - def expr_to_c_and_types(self, expr, expr_context=None): - """Generate the C access string and corresponding type for a given - native Miasm expression. - @expr_context: a dictionary linking known expressions to a set of types - """ - - accesses = set() - for access in self.expr_to_c_access(expr, expr_context): - c_str = access_str(access.to_expr().visit(self.simplify_c)) - accesses.add((c_str, access.ctype)) - return accesses - - def expr_to_c(self, expr, expr_context=None): - """Convert a Miasm @expr into it's C equivalent string - @expr_context: a dictionary linking known expressions to a set of types - """ - - return set(access[0] - for access in self.expr_to_c_and_types(expr, expr_context)) - - def expr_to_types(self, expr, expr_context=None): - """Get the possible types of the Miasm @expr - @expr_context: a dictionary linking known expressions to a set of types - """ - - return set(access.ctype - for access in self.expr_to_c_access(expr, expr_context)) - - def c_to_expr_and_type(self, c_str, c_context=None): - """Convert a C string expression to a Miasm expression and it's - corresponding c type - @c_str: C string - @c_context: (optional) dictionary linking known tokens (strings) to its - type. - """ - - ast = parse_access(c_str) - if c_context is None: - c_context = self.C_types - access_c = ast_get_c_access_expr(ast, c_context) - return self.exprc2expr.get_expr(access_c, c_context) - - def c_to_expr(self, c_str, c_context=None): - """Convert a C string expression to a Miasm expression - @c_str: C string - @c_context: (optional) dictionary linking known tokens (strings) to its - type. - """ - - if c_context is None: - c_context = self.C_types - expr, _ = self.c_to_expr_and_type(c_str, c_context) - return expr - - def c_to_type(self, c_str, c_context=None): - """Get the type of a C string expression - @expr: Miasm expression - @c_context: (optional) dictionary linking known tokens (strings) to its - type. - """ - - if c_context is None: - c_context = self.C_types - _, ctype = self.c_to_expr_and_type(c_str, c_context) - return ctype - - -class CLeafTypes(object): - """Define C types sizes/alignment for a given architecture""" - pass diff --git a/miasm2/core/parse_asm.py b/miasm2/core/parse_asm.py deleted file mode 100644 index e9982503..00000000 --- a/miasm2/core/parse_asm.py +++ /dev/null @@ -1,305 +0,0 @@ -#-*- coding:utf-8 -*- -import re -import codecs -from builtins import range - -from miasm2.expression.expression import ExprId, ExprInt, ExprOp, LocKey -import miasm2.core.asmblock as asmblock -from miasm2.core.cpu import instruction, base_expr -from miasm2.core.asm_ast import AstInt, AstId, AstOp - -declarator = {'byte': 8, - 'word': 16, - 'dword': 32, - 'qword': 64, - 'long': 32, - } - -size2pck = {8: 'B', - 16: 'H', - 32: 'I', - 64: 'Q', - } - -EMPTY_RE = re.compile(r'\s*$') -COMMENT_RE = re.compile(r'\s*;\S*') -LOCAL_LABEL_RE = re.compile(r'\s*(\.L\S+)\s*:') -DIRECTIVE_START_RE = re.compile(r'\s*\.') -DIRECTIVE_RE = re.compile(r'\s*\.(\S+)') -LABEL_RE = re.compile(r'\s*(\S+)\s*:') -FORGET_LABEL_RE = re.compile(r'\s*\.LF[BE]\d\s*:') - - -class Directive(object): - - """Stand for Directive""" - - pass - -class DirectiveAlign(Directive): - - """Stand for alignment representation""" - - def __init__(self, alignment=1): - self.alignment = alignment - - def __str__(self): - return "Alignment %s" % self.alignment - - -class DirectiveSplit(Directive): - - """Stand for alignment representation""" - - pass - - -class DirectiveDontSplit(Directive): - - """Stand for alignment representation""" - - pass - - -def guess_next_new_label(loc_db): - """Generate a new label - @loc_db: the LocationDB instance""" - i = 0 - gen_name = b"loc_%.8X" - while True: - name = gen_name % i - label = loc_db.get_name_location(name) - if label is None: - return loc_db.add_location(name) - i += 1 - - -STATE_NO_BLOC = 0 -STATE_IN_BLOC = 1 - - -def asm_ast_to_expr_with_size(arg, loc_db, size): - if isinstance(arg, AstId): - return ExprId(arg.name.encode(), size) - if isinstance(arg, AstOp): - args = [asm_ast_to_expr_with_size(tmp, loc_db, size) for tmp in arg.args] - return ExprOp(arg.op, *args) - if isinstance(arg, AstInt): - return ExprInt(arg.value, size) - return None - -def parse_txt(mnemo, attrib, txt, loc_db=None): - """Parse an assembly listing. Returns a couple (asmcfg, loc_db), where - asmcfg is an AsmCfg instance and loc_db the associated LocationDB - - @mnemo: architecture used - @attrib: architecture attribute - @txt: assembly listing - @loc_db: (optional) the LocationDB instance used to handle labels - of the listing - - """ - - if loc_db is None: - loc_db = asmblock.LocationDB() - - C_NEXT = asmblock.AsmConstraint.c_next - C_TO = asmblock.AsmConstraint.c_to - - lines = [] - # parse each line - for line in txt.split('\n'): - # empty - if EMPTY_RE.match(line): - continue - # comment - if COMMENT_RE.match(line): - continue - # labels to forget - if FORGET_LABEL_RE.match(line): - continue - # label beginning with .L - match_re = LABEL_RE.match(line) - if match_re: - label_name = match_re.group(1).encode() - label = loc_db.get_or_create_name_location(label_name) - lines.append(label) - continue - # directive - if DIRECTIVE_START_RE.match(line): - match_re = DIRECTIVE_RE.match(line) - directive = match_re.group(1) - if directive in ['text', 'data', 'bss']: - continue - if directive in ['string', 'ascii']: - # XXX HACK - line = line.replace(r'\n', '\n').replace(r'\r', '\r') - raw = line[line.find(r'"') + 1:line.rfind(r'"')] - raw = codecs.escape_decode(raw)[0] - if directive == 'string': - raw += b"\x00" - lines.append(asmblock.AsmRaw(raw)) - continue - if directive == 'ustring': - # XXX HACK - line = line.replace(r'\n', '\n').replace(r'\r', '\r') - raw = line[line.find(r'"') + 1:line.rfind(r'"')] + "\x00" - raw = codecs.escape_decode(raw)[0] - out = b'' - for i in range(len(raw)): - out += raw[i:i+1] + b'\x00' - lines.append(asmblock.AsmRaw(out)) - continue - if directive in declarator: - data_raw = line[match_re.end():].split(' ', 1)[1] - data_raw = data_raw.split(',') - size = declarator[directive] - expr_list = [] - - # parser - - for element in data_raw: - element = element.strip() - element_parsed = base_expr.parseString(element)[0] - element_expr = asm_ast_to_expr_with_size(element_parsed, loc_db, size) - expr_list.append(element_expr) - - raw_data = asmblock.AsmRaw(expr_list) - raw_data.element_size = size - lines.append(raw_data) - continue - if directive == 'comm': - # TODO - continue - if directive == 'split': # custom command - lines.append(DirectiveSplit()) - continue - if directive == 'dontsplit': # custom command - lines.append(DirectiveDontSplit()) - continue - if directive == "align": - align_value = int(line[match_re.end():], 0) - lines.append(DirectiveAlign(align_value)) - continue - if directive in ['file', 'intel_syntax', 'globl', 'local', - 'type', 'size', 'align', 'ident', 'section']: - continue - if directive[0:4] == 'cfi_': - continue - - raise ValueError("unknown directive %s" % directive) - - # label - match_re = LABEL_RE.match(line) - if match_re: - label_name = match_re.group(1).encode() - label = loc_db.get_or_create_name_location(label_name) - lines.append(label) - continue - - # code - if ';' in line: - line = line[:line.find(';')] - line = line.strip(' ').strip('\t') - instr = mnemo.fromstring(line, loc_db, attrib) - lines.append(instr) - - asmblock.log_asmblock.info("___pre asm oki___") - # make asmcfg - - cur_block = None - state = STATE_NO_BLOC - i = 0 - asmcfg = asmblock.AsmCFG(loc_db) - block_to_nlink = None - delayslot = 0 - while i < len(lines): - if delayslot: - delayslot -= 1 - if delayslot == 0: - state = STATE_NO_BLOC - line = lines[i] - # no current block - if state == STATE_NO_BLOC: - if isinstance(line, DirectiveDontSplit): - block_to_nlink = cur_block - i += 1 - continue - elif isinstance(line, DirectiveSplit): - block_to_nlink = None - i += 1 - continue - elif not isinstance(line, LocKey): - # First line must be a label. If it's not the case, generate - # it. - loc = guess_next_new_label(loc_db) - cur_block = asmblock.AsmBlock(loc, alignment=mnemo.alignment) - else: - cur_block = asmblock.AsmBlock(line, alignment=mnemo.alignment) - i += 1 - # Generate the current bloc - asmcfg.add_block(cur_block) - state = STATE_IN_BLOC - if block_to_nlink: - block_to_nlink.addto( - asmblock.AsmConstraint( - cur_block.loc_key, - C_NEXT - ) - ) - block_to_nlink = None - continue - - # in block - elif state == STATE_IN_BLOC: - if isinstance(line, DirectiveSplit): - state = STATE_NO_BLOC - block_to_nlink = None - elif isinstance(line, DirectiveDontSplit): - state = STATE_NO_BLOC - block_to_nlink = cur_block - elif isinstance(line, DirectiveAlign): - cur_block.alignment = line.alignment - elif isinstance(line, asmblock.AsmRaw): - cur_block.addline(line) - block_to_nlink = cur_block - elif isinstance(line, LocKey): - if block_to_nlink: - cur_block.addto( - asmblock.AsmConstraint(line, C_NEXT) - ) - block_to_nlink = None - state = STATE_NO_BLOC - continue - # instruction - elif isinstance(line, instruction): - cur_block.addline(line) - block_to_nlink = cur_block - if not line.breakflow(): - i += 1 - continue - if delayslot: - raise RuntimeError("Cannot have breakflow in delayslot") - if line.dstflow(): - for dst in line.getdstflow(loc_db): - if not isinstance(dst, ExprId): - continue - if dst in mnemo.regs.all_regs_ids: - continue - cur_block.addto(asmblock.AsmConstraint(dst.name, C_TO)) - - if not line.splitflow(): - block_to_nlink = None - - delayslot = line.delayslot + 1 - else: - raise RuntimeError("unknown class %s" % line.__class__) - i += 1 - - for block in asmcfg.blocks: - # Fix multiple constraints - block.fix_constraints() - - # Log block - asmblock.log_asmblock.info(block) - return asmcfg, loc_db diff --git a/miasm2/core/sembuilder.py b/miasm2/core/sembuilder.py deleted file mode 100644 index 8ea4c4ac..00000000 --- a/miasm2/core/sembuilder.py +++ /dev/null @@ -1,355 +0,0 @@ -"Helper to quickly build instruction's semantic side effects" - -import inspect -import ast -import re - -from future.utils import PY3 - -import miasm2.expression.expression as m2_expr -from miasm2.ir.ir import IRBlock, AssignBlock - - -class MiasmTransformer(ast.NodeTransformer): - """AST visitor translating DSL to Miasm expression - - memX[Y] -> ExprMem(Y, X) - iX(Y) -> ExprIntX(Y) - X if Y else Z -> ExprCond(Y, X, Z) - 'X'(Y) -> ExprOp('X', Y) - ('X' % Y)(Z) -> ExprOp('X' % Y, Z) - {a, b} -> ExprCompose(((a, 0, a.size), (b, a.size, a.size + b.size))) - """ - - # Parsers - parse_integer = re.compile("^i([0-9]+)$") - parse_mem = re.compile("^mem([0-9]+)$") - - # Visitors - def visit_Call(self, node): - """iX(Y) -> ExprIntX(Y), - 'X'(Y) -> ExprOp('X', Y), ('X' % Y)(Z) -> ExprOp('X' % Y, Z)""" - - # Recursive visit - node = self.generic_visit(node) - if isinstance(node.func, ast.Name): - # iX(Y) -> ExprInt(Y, X) - fc_name = node.func.id - - # Match the function name - new_name = fc_name - integer = self.parse_integer.search(fc_name) - - # Do replacement - if integer is not None: - size = int(integer.groups()[0]) - new_name = "ExprInt" - # Replace in the node - node.func.id = new_name - node.args.append(ast.Num(n=size)) - - elif (isinstance(node.func, ast.Str) or - (isinstance(node.func, ast.BinOp) and - isinstance(node.func.op, ast.Mod) and - isinstance(node.func.left, ast.Str))): - # 'op'(args...) -> ExprOp('op', args...) - # ('op' % (fmt))(args...) -> ExprOp('op' % (fmt), args...) - op_name = node.func - - # Do replacement - node.func = ast.Name(id="ExprOp", ctx=ast.Load()) - node.args[0:0] = [op_name] - - return node - - def visit_Subscript(self, node): - """memX[Y] -> ExprMem(Y, X)""" - - # Recursive visit - node = self.generic_visit(node) - - # Detect the syntax - if not isinstance(node.value, ast.Name): - return node - name = node.value.id - mem = self.parse_mem.search(name) - if mem is None: - return node - - # Do replacement - addr = self.visit(node.slice.value) - call = ast.Call(func=ast.Name(id='ExprMem', ctx=ast.Load()), - args=[addr, ast.Num(n=int(mem.groups()[0]))], - keywords=[], starargs=None, kwargs=None) - return call - - def visit_IfExp(self, node): - """X if Y else Z -> ExprCond(Y, X, Z)""" - # Recursive visit - node = self.generic_visit(node) - - # Build the new ExprCond - call = ast.Call(func=ast.Name(id='ExprCond', ctx=ast.Load()), - args=[self.visit(node.test), - self.visit(node.body), - self.visit(node.orelse)], - keywords=[], starargs=None, kwargs=None) - return call - - def visit_Set(self, node): - "{a, b} -> ExprCompose(a, b)" - if len(node.elts) == 0: - return node - - # Recursive visit - node = self.generic_visit(node) - - return ast.Call(func=ast.Name(id='ExprCompose', - ctx=ast.Load()), - args=node.elts, - keywords=[], - starargs=None, - kwargs=None) - -if PY3: - def get_arg_name(name): - return name.arg - def gen_arg(name, ctx): - return ast.arg(arg=name, ctx=ctx) -else: - def get_arg_name(name): - return name.id - def gen_arg(name, ctx): - return ast.Name(id=name, ctx=ctx) - - -class SemBuilder(object): - """Helper for building instruction's semantic side effects method - - This class provides a decorator @parse to use on them. - The context in which the function will be parsed must be supplied on - instantiation - """ - - def __init__(self, ctx): - """Create a SemBuilder - @ctx: context dictionary used during parsing - """ - # Init - self.transformer = MiasmTransformer() - self._ctx = dict(m2_expr.__dict__) - self._ctx["IRBlock"] = IRBlock - self._ctx["AssignBlock"] = AssignBlock - self._functions = {} - - # Update context - self._ctx.update(ctx) - - @property - def functions(self): - """Return a dictionary name -> func of parsed functions""" - return self._functions.copy() - - @staticmethod - def _create_labels(loc_else=False): - """Return the AST standing for label creations - @loc_else (optional): if set, create a label 'loc_else'""" - loc_end = "loc_end = ir.get_next_loc_key(instr)" - loc_end_expr = "loc_end_expr = ExprLoc(loc_end, ir.IRDst.size)" - out = ast.parse(loc_end).body - out += ast.parse(loc_end_expr).body - loc_if = "loc_if = ir.loc_db.add_location()" - loc_if_expr = "loc_if_expr = ExprLoc(loc_if, ir.IRDst.size)" - out += ast.parse(loc_if).body - out += ast.parse(loc_if_expr).body - if loc_else: - loc_else = "loc_else = ir.loc_db.add_location()" - loc_else_expr = "loc_else_expr = ExprLoc(loc_else, ir.IRDst.size)" - out += ast.parse(loc_else).body - out += ast.parse(loc_else_expr).body - return out - - def _parse_body(self, body, argument_names): - """Recursive function transforming a @body to a block expression - Return: - - AST to append to body (real python statements) - - a list of blocks, ie list of affblock, ie list of ExprAssign (AST)""" - - # Init - ## Real instructions - real_body = [] - ## Final blocks - blocks = [[[]]] - - for statement in body: - - if isinstance(statement, ast.Assign): - src = self.transformer.visit(statement.value) - dst = self.transformer.visit(statement.targets[0]) - - if (isinstance(dst, ast.Name) and - dst.id not in argument_names and - dst.id not in self._ctx and - dst.id not in self._local_ctx): - - # Real variable declaration - statement.value = src - real_body.append(statement) - self._local_ctx[dst.id] = src - continue - - dst.ctx = ast.Load() - - res = ast.Call(func=ast.Name(id='ExprAssign', - ctx=ast.Load()), - args=[dst, src], - keywords=[], - starargs=None, - kwargs=None) - - blocks[-1][-1].append(res) - - elif (isinstance(statement, ast.Expr) and - isinstance(statement.value, ast.Str)): - # String (docstring, comment, ...) -> keep it - real_body.append(statement) - - elif isinstance(statement, ast.If): - # Create jumps : ir.IRDst = loc_if if cond else loc_end - # if .. else .. are also handled - cond = statement.test - real_body += self._create_labels(loc_else=True) - - loc_end = ast.Name(id='loc_end_expr', ctx=ast.Load()) - loc_if = ast.Name(id='loc_if_expr', ctx=ast.Load()) - loc_else = ast.Name(id='loc_else_expr', ctx=ast.Load()) \ - if statement.orelse else loc_end - dst = ast.Call(func=ast.Name(id='ExprCond', - ctx=ast.Load()), - args=[cond, - loc_if, - loc_else], - keywords=[], - starargs=None, - kwargs=None) - - if (isinstance(cond, ast.UnaryOp) and - isinstance(cond.op, ast.Not)): - ## if not cond -> switch exprCond - dst.args[1:] = dst.args[1:][::-1] - dst.args[0] = cond.operand - - IRDst = ast.Attribute(value=ast.Name(id='ir', - ctx=ast.Load()), - attr='IRDst', ctx=ast.Load()) - blocks[-1][-1].append(ast.Call(func=ast.Name(id='ExprAssign', - ctx=ast.Load()), - args=[IRDst, dst], - keywords=[], - starargs=None, - kwargs=None)) - - # Create the new blocks - elements = [(statement.body, 'loc_if')] - if statement.orelse: - elements.append((statement.orelse, 'loc_else')) - for content, loc_name in elements: - sub_blocks, sub_body = self._parse_body(content, - argument_names) - if len(sub_blocks) > 1: - raise RuntimeError("Imbricated if unimplemented") - - ## Close the last block - jmp_end = ast.Call(func=ast.Name(id='ExprAssign', - ctx=ast.Load()), - args=[IRDst, loc_end], - keywords=[], - starargs=None, - kwargs=None) - sub_blocks[-1][-1].append(jmp_end) - - - instr = ast.Name(id='instr', ctx=ast.Load()) - effects = ast.List(elts=sub_blocks[-1][-1], - ctx=ast.Load()) - assignblk = ast.Call(func=ast.Name(id='AssignBlock', - ctx=ast.Load()), - args=[effects, instr], - keywords=[], - starargs=None, - kwargs=None) - - - ## Replace the block with a call to 'IRBlock' - loc_if_name = ast.Name(id=loc_name, ctx=ast.Load()) - - assignblks = ast.List(elts=[assignblk], - ctx=ast.Load()) - - sub_blocks[-1] = ast.Call(func=ast.Name(id='IRBlock', - ctx=ast.Load()), - args=[loc_if_name, - assignblks], - keywords=[], - starargs=None, - kwargs=None) - blocks += sub_blocks - real_body += sub_body - - # Prepare a new block for following statement - blocks.append([[]]) - - else: - # TODO: real var, +=, /=, -=, <<=, >>=, if/else, ... - raise RuntimeError("Unimplemented %s" % statement) - - return blocks, real_body - - def parse(self, func): - """Function decorator, returning a correct method from a pseudo-Python - one""" - - # Get the function AST - parsed = ast.parse(inspect.getsource(func)) - fc_ast = parsed.body[0] - argument_names = [get_arg_name(name) for name in fc_ast.args.args] - - # Init local cache - self._local_ctx = {} - - # Translate (blocks[0][0] is the current instr) - blocks, body = self._parse_body(fc_ast.body, argument_names) - - # Build the new function - fc_ast.args.args[0:0] = [ - gen_arg('ir', ast.Param()), - gen_arg('instr', ast.Param()) - ] - cur_instr = blocks[0][0] - if len(blocks[-1][0]) == 0: - ## Last block can be empty - blocks.pop() - other_blocks = blocks[1:] - body.append(ast.Return(value=ast.Tuple(elts=[ast.List(elts=cur_instr, - ctx=ast.Load()), - ast.List(elts=other_blocks, - ctx=ast.Load())], - ctx=ast.Load()))) - - ret = ast.Module([ast.FunctionDef(name=fc_ast.name, - args=fc_ast.args, - body=body, - decorator_list=[])]) - - # To display the generated function, use codegen.to_source - # codegen: https://github.com/andreif/codegen - - # Compile according to the context - fixed = ast.fix_missing_locations(ret) - codeobj = compile(fixed, '', 'exec') - ctx = self._ctx.copy() - eval(codeobj, ctx) - - # Get the function back - self._functions[fc_ast.name] = ctx[fc_ast.name] - return ctx[fc_ast.name] diff --git a/miasm2/core/types.py b/miasm2/core/types.py deleted file mode 100644 index b915c27f..00000000 --- a/miasm2/core/types.py +++ /dev/null @@ -1,1693 +0,0 @@ -"""This module provides classes to manipulate pure C types as well as their -representation in memory. A typical usecase is to use this module to -easily manipylate structures backed by a VmMngr object (a miasm sandbox virtual -memory): - - class ListNode(MemStruct): - fields = [ - ("next", Ptr(", ),]; creates fields that correspond to - certain bits of the field; analogous to a Union of Bits (see Bits below) - - Str: a character string, with an encoding; not directly mapped to a C - type, it is a higher level notion provided for ease of use - - Void: analogous to C void, can be a placeholder in void*-style cases. - - Self: special marker to reference a Struct inside itself (FIXME: to - remove?) - -And some less common types: - - - Bits: mask only some bits of a Num - - RawStruct: abstraction over a simple struct pack/unpack (no mapping to a - standard C type) - -For each type, the `.lval` property returns a MemType subclass that -allows to access the field in memory. - - -The easiest way to use the API to declare and manipulate new structures is to -subclass MemStruct and define a list of (, ): - - class MyStruct(MemStruct): - fields = [ - # Scalar field: just struct.pack field with one value - ("num", Num("I")), - ("flags", Num("B")), - # Ptr fields contain two fields: "val", for the numerical value, - # and "deref" to get the pointed object - ("other", Ptr("I", OtherStruct)), - # Ptr to a variable length String - ("s", Ptr("I", Str())), - ("i", Ptr("I", Num("I"))), - ] - -And access the fields: - - mstruct = MyStruct(jitter.vm, addr) - mstruct.num = 3 - assert mstruct.num == 3 - mstruct.other.val = addr2 - # Also works: - mstruct.other = addr2 - mstruct.other.deref = OtherStruct(jitter.vm, addr) - -MemUnion and MemBitField can also be subclassed, the `fields` field being -in the format expected by, respectively, Union and BitField. - -The `addr` argument can be omitted if an allocator is set, in which case the -structure will be automatically allocated in memory: - - my_heap = miasm2.os_dep.common.heap() - # the allocator is a func(VmMngr) -> integer_address - set_allocator(my_heap) - -Note that some structures (e.g. MemStr or MemArray) do not have a static -size and cannot be allocated automatically. -""" - -from builtins import range, zip -from builtins import int as int_types -import itertools -import logging -import struct -from future.utils import PY3 -from future.utils import viewitems, with_metaclass - -log = logging.getLogger(__name__) -console_handler = logging.StreamHandler() -console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) -log.addHandler(console_handler) -log.setLevel(logging.WARN) - -# Cache for dynamically generated MemTypes -DYN_MEM_STRUCT_CACHE = {} - -def set_allocator(alloc_func): - """Shorthand to set the default allocator of MemType. See - MemType.set_allocator doc for more information. - """ - MemType.set_allocator(alloc_func) - - -# Helpers - -def to_type(obj): - """If possible, return the Type associated with @obj, otherwise raises - a ValueError. - - Works with a Type instance (returns obj) or a MemType subclass or instance - (returns obj.get_type()). - """ - # obj is a python type - if isinstance(obj, type): - if issubclass(obj, MemType): - if obj.get_type() is None: - raise ValueError("%r has no static type; use a subclasses " - "with a non null _type or use a " - "Type instance" % obj) - return obj.get_type() - # obj is not not a type - else: - if isinstance(obj, Type): - return obj - elif isinstance(obj, MemType): - return obj.get_type() - raise ValueError("%r is not a Type or a MemType" % obj) - -def indent(s, size=4): - """Indent a string with @size spaces""" - return ' '*size + ('\n' + ' '*size).join(s.split('\n')) - - -# String generic getter/setter/len-er -# TODO: make miasm2.os_dep.common and jitter ones use these ones - -def get_str(vm, addr, enc, max_char=None, end=u'\x00'): - """Get a @end (by default '\\x00') terminated @enc encoded string from a - VmMngr. - - For example: - - get_str(vm, addr, "ascii") will read "foo\\x00" in memory and - return u"foo" - - get_str(vm, addr, "utf-16le") will read "f\\x00o\\x00o\\x00\\x00\\x00" - in memory and return u"foo" as well. - - Setting @max_char= and @end='' allows to read non null terminated strings - from memory. - - @vm: VmMngr instance - @addr: the address at which to read the string - @enc: the encoding of the string to read. - @max_char: max number of bytes to get in memory - @end: the unencoded ending sequence of the string, by default "\\x00". - Unencoded here means that the actual ending sequence that this function - will look for is end.encode(enc), not directly @end. - """ - s = [] - end_char= end.encode(enc) - step = len(end_char) - i = 0 - while max_char is None or i < max_char: - c = vm.get_mem(addr + i, step) - if c == end_char: - break - s.append(c) - i += step - return b''.join(s).decode(enc) - -def raw_str(s, enc, end=u'\x00'): - """Returns a string representing @s as an @end (by default \\x00) - terminated @enc encoded string. - - @s: the unicode str to serialize - @enc: the encoding to apply to @s and @end before serialization. - @end: the ending string/character to append to the string _before encoding_ - and serialization (by default '\\x00') - """ - return (s + end).encode(enc) - -def set_str(vm, addr, s, enc, end=u'\x00'): - """Encode a string to an @end (by default \\x00) terminated @enc encoded - string and set it in a VmMngr memory. - - @vm: VmMngr instance - @addr: start address to serialize the string to - @s: the unicode str to serialize - @enc: the encoding to apply to @s and @end before serialization. - @end: the ending string/character to append to the string _before encoding_ - and serialization (by default '\\x00') - """ - s = raw_str(s, enc, end=end) - vm.set_mem(addr, s) - -def raw_len(py_unic_str, enc, end=u'\x00'): - """Returns the length in bytes of @py_unic_str in memory (once @end has been - added and the full str has been encoded). It returns exactly the room - necessary to call set_str with similar arguments. - - @py_unic_str: the unicode str to work with - @enc: the encoding to encode @py_unic_str to - @end: the ending string/character to append to the string _before encoding_ - (by default \\x00) - """ - return len(raw_str(py_unic_str, enc)) - -def enc_triplet(enc, max_char=None, end=u'\x00'): - """Returns a triplet of functions (get_str_enc, set_str_enc, raw_len_enc) - for a given encoding (as needed by Str to add an encoding). The prototypes - are: - - - get_str_end: same as get_str without the @enc argument - - set_str_end: same as set_str without the @enc argument - - raw_len_enc: same as raw_len without the @enc argument - """ - return ( - lambda vm, addr, max_char=max_char, end=end: \ - get_str(vm, addr, enc, max_char=max_char, end=end), - lambda vm, addr, s, end=end: set_str(vm, addr, s, enc, end=end), - lambda s, end=end: raw_len(s, enc, end=end), - ) - - -# Type classes - -class Type(object): - """Base class to provide methods to describe a type, as well as how to set - and get fields from virtual mem. - - Each Type subclass is linked to a MemType subclass (e.g. Struct to - MemStruct, Ptr to MemPtr, etc.). - - When nothing is specified, MemValue is used to access the type in memory. - MemValue instances have one `.val` field, setting and getting it call - the set and get of the Type. - - Subclasses can either override _pack and _unpack, or get and set if data - serialization requires more work (see Struct implementation for an example). - - TODO: move any trace of vm and addr out of these classes? - """ - - _self_type = None - - def _pack(self, val): - """Serializes the python value @val to a raw str""" - raise NotImplementedError() - - def _unpack(self, raw_str): - """Deserializes a raw str to an object representing the python value - of this field. - """ - raise NotImplementedError() - - def set(self, vm, addr, val): - """Set a VmMngr memory from a value. - - @vm: VmMngr instance - @addr: the start address in memory to set - @val: the python value to serialize in @vm at @addr - """ - raw = self._pack(val) - vm.set_mem(addr, raw) - - def get(self, vm, addr): - """Get the python value of a field from a VmMngr memory at @addr.""" - raw = vm.get_mem(addr, self.size) - return self._unpack(raw) - - @property - def lval(self): - """Returns a class with a (vm, addr) constructor that allows to - interact with this type in memory. - - In compilation terms, it returns a class allowing to instantiate an - lvalue of this type. - - @return: a MemType subclass. - """ - if self in DYN_MEM_STRUCT_CACHE: - return DYN_MEM_STRUCT_CACHE[self] - pinned_type = self._build_pinned_type() - DYN_MEM_STRUCT_CACHE[self] = pinned_type - return pinned_type - - def _build_pinned_type(self): - """Builds the MemType subclass allowing to interact with this type. - - Called by self.lval when it is not in cache. - """ - pinned_base_class = self._get_pinned_base_class() - pinned_type = type( - "Mem%r" % self, - (pinned_base_class,), - {'_type': self} - ) - return pinned_type - - def _get_pinned_base_class(self): - """Return the MemType subclass that maps this type in memory""" - return MemValue - - def _get_self_type(self): - """Used for the Self trick.""" - return self._self_type - - def _set_self_type(self, self_type): - """If this field refers to MemSelf/Self, replace it with @self_type - (a Type instance) when using it. Generally not used outside this - module. - """ - self._self_type = self_type - - @property - def size(self): - """Return the size in bytes of the serialized version of this field""" - raise NotImplementedError() - - def __len__(self): - return self.size - - def __neq__(self, other): - return not self == other - - def __eq__(self, other): - raise NotImplementedError("Abstract method") - - def __ne__(self, other): - return not self == other - - -class RawStruct(Type): - """Dumb struct.pack/unpack field. Mainly used to factorize code. - - Value is a tuple corresponding to the struct @fmt passed to the constructor. - """ - - def __init__(self, fmt): - self._fmt = fmt - - def _pack(self, fields): - return struct.pack(self._fmt, *fields) - - def _unpack(self, raw_str): - return struct.unpack(self._fmt, raw_str) - - @property - def size(self): - return struct.calcsize(self._fmt) - - def __repr__(self): - return "%s(%s)" % (self.__class__.__name__, self._fmt) - - def __eq__(self, other): - return self.__class__ == other.__class__ and self._fmt == other._fmt - - def __ne__(self, other): - return not self == other - - def __hash__(self): - return hash((self.__class__, self._fmt)) - - -class Num(RawStruct): - """Represents a number (integer or float). The number is encoded with - a struct-style format which must represent only one value. - - TODO: use u32, i16, etc. for format. - """ - - def _pack(self, number): - return super(Num, self)._pack([number]) - - def _unpack(self, raw_str): - upck = super(Num, self)._unpack(raw_str) - if len(upck) != 1: - raise ValueError("Num format string unpacks to multiple values, " - "should be 1") - return upck[0] - - -class Ptr(Num): - """Special case of number of which value indicates the address of a - MemType. - - Mapped to MemPtr (see its doc for more info): - - assert isinstance(mystruct.ptr, MemPtr) - mystruct.ptr = 0x4000 # Assign the Ptr numeric value - mystruct.ptr.val = 0x4000 # Also assigns the Ptr numeric value - assert isinstance(mystruct.ptr.val, int) # Get the Ptr numeric value - mystruct.ptr.deref # Get the pointed MemType - mystruct.ptr.deref = other # Set the pointed MemType - """ - - def __init__(self, fmt, dst_type, *type_args, **type_kwargs): - """ - @fmt: (str) Num compatible format that will be the Ptr representation - in memory - @dst_type: (MemType or Type) the Type this Ptr points to. - If a Type is given, it is transformed into a MemType with - TheType.lval. - *type_args, **type_kwargs: arguments to pass to the the pointed - MemType when instantiating it (e.g. for MemStr encoding or - MemArray field_type). - """ - if (not isinstance(dst_type, Type) and - not (isinstance(dst_type, type) and - issubclass(dst_type, MemType)) and - not dst_type == MemSelf): - raise ValueError("dst_type of Ptr must be a MemType type, a " - "Type instance, the MemSelf marker or a class " - "name.") - super(Ptr, self).__init__(fmt) - if isinstance(dst_type, Type): - # Patch the field to propagate the MemSelf replacement - dst_type._get_self_type = lambda: self._get_self_type() - # dst_type cannot be patched here, since _get_self_type of the outer - # class has not yet been set. Patching dst_type involves calling - # dst_type.lval, which will only return a type that does not point - # on MemSelf but on the right class only when _get_self_type of the - # outer class has been replaced by _MetaMemStruct. - # In short, dst_type = dst_type.lval is not valid here, it is done - # lazily in _fix_dst_type - self._dst_type = dst_type - self._type_args = type_args - self._type_kwargs = type_kwargs - - def _fix_dst_type(self): - if self._dst_type in [MemSelf, SELF_TYPE_INSTANCE]: - if self._get_self_type() is not None: - self._dst_type = self._get_self_type() - else: - raise ValueError("Unsupported usecase for (Mem)Self, sorry") - self._dst_type = to_type(self._dst_type) - - @property - def dst_type(self): - """Return the type (MemType subtype) this Ptr points to.""" - self._fix_dst_type() - return self._dst_type - - def set(self, vm, addr, val): - """A Ptr field can be set with a MemPtr or an int""" - if isinstance(val, MemType) and isinstance(val.get_type(), Ptr): - self.set_val(vm, addr, val.val) - else: - super(Ptr, self).set(vm, addr, val) - - def get(self, vm, addr): - return self.lval(vm, addr) - - def get_val(self, vm, addr): - """Get the numeric value of a Ptr""" - return super(Ptr, self).get(vm, addr) - - def set_val(self, vm, addr, val): - """Set the numeric value of a Ptr""" - return super(Ptr, self).set(vm, addr, val) - - def deref_get(self, vm, addr): - """Deserializes the data in @vm (VmMngr) at @addr to self.dst_type. - Equivalent to a pointer dereference rvalue in C. - """ - dst_addr = self.get_val(vm, addr) - return self.dst_type.lval(vm, dst_addr, - *self._type_args, **self._type_kwargs) - - def deref_set(self, vm, addr, val): - """Serializes the @val MemType subclass instance in @vm (VmMngr) at - @addr. Equivalent to a pointer dereference assignment in C. - """ - # Sanity check - if self.dst_type != val.get_type(): - log.warning("Original type was %s, overridden by value of type %s", - self._dst_type.__name__, val.__class__.__name__) - - # Actual job - dst_addr = self.get_val(vm, addr) - vm.set_mem(dst_addr, bytes(val)) - - def _get_pinned_base_class(self): - return MemPtr - - def __repr__(self): - return "%s(%r)" % (self.__class__.__name__, self.dst_type) - - def __eq__(self, other): - return super(Ptr, self).__eq__(other) and \ - self.dst_type == other.dst_type and \ - self._type_args == other._type_args and \ - self._type_kwargs == other._type_kwargs - - def __ne__(self, other): - return not self == other - - def __hash__(self): - return hash((super(Ptr, self).__hash__(), self.dst_type, - self._type_args)) - - -class Struct(Type): - """Equivalent to a C struct type. Composed of a name, and a - (, ) list describing the fields - of the struct. - - Mapped to MemStruct. - - NOTE: The `.lval` property of Struct creates classes on the fly. If an - equivalent structure is created by subclassing MemStruct, an exception - is raised to prevent creating multiple classes designating the same type. - - Example: - s = Struct("Toto", [("f1", Num("I")), ("f2", Num("I"))]) - - Toto1 = s.lval - - # This raises an exception, because it describes the same structure as - # Toto1 - class Toto(MemStruct): - fields = [("f1", Num("I")), ("f2", Num("I"))] - - Anonymous Struct, Union or BitField can be used if their field name - evaluates to False ("" or None). Such anonymous Struct field will generate - fields to the parent Struct, e.g.: - bla = Struct("Bla", [ - ("a", Num("B")), - ("", Union([("b1", Num("B")), ("b2", Num("H"))])), - ("", Struct("", [("c1", Num("B")), ("c2", Num("B"))])), - ] - Will have a b1, b2 and c1, c2 field directly accessible. The anonymous - fields are renamed to "__anon_", with an incremented number. - - In such case, bla.fields will not contain b1, b2, c1 and c2 (only the 3 - actual fields, with the anonymous ones renamed), but bla.all_fields will - return the 3 fields + b1, b2, c1 and c2 (and an information telling if it - has been generated from an anonymous Struct/Union). - - bla.get_field(vm, addr, "b1") will work. - """ - - def __init__(self, name, fields): - self.name = name - # generates self._fields and self._fields_desc - self._gen_fields(fields) - - def _gen_fields(self, fields): - """Precompute useful metadata on self.fields.""" - self._fields_desc = {} - offset = 0 - - # Build a proper (name, Field()) list, handling cases where the user - # supplies a MemType subclass instead of a Type instance - real_fields = [] - uniq_count = 0 - for fname, field in fields: - field = to_type(field) - - # For reflexion - field._set_self_type(self) - - # Anonymous Struct/Union - if not fname and isinstance(field, Struct): - # Generate field information - updated_fields = { - name: { - # Same field type than the anon field subfield - 'field': fd['field'], - # But the current offset is added - 'offset': fd['offset'] + offset, - } - for name, fd in viewitems(field._fields_desc) - } - - # Add the newly generated fields from the anon field - self._fields_desc.update(updated_fields) - real_fields += [(name, fld, True) - for name, fld in field.fields] - - # Rename the anonymous field - fname = '__anon_%x' % uniq_count - uniq_count += 1 - - self._fields_desc[fname] = {"field": field, "offset": offset} - real_fields.append((fname, field, False)) - offset = self._next_offset(field, offset) - - # fields is immutable - self._fields = tuple(real_fields) - - def _next_offset(self, field, orig_offset): - return orig_offset + field.size - - @property - def fields(self): - """Returns a sequence of (name, field) describing the fields of this - Struct, in order of offset. - - Fields generated from anonymous Unions or Structs are excluded from - this sequence. - """ - return tuple((name, field) for name, field, anon in self._fields - if not anon) - - @property - def all_fields(self): - """Returns a sequence of (, , ), - where is_anon is True when a field is generated from an anonymous - Struct or Union, and False for the fields that have been provided as is. - """ - return self._fields - - def set(self, vm, addr, val): - raw = bytes(val) - vm.set_mem(addr, raw) - - def get(self, vm, addr): - return self.lval(vm, addr) - - def get_field(self, vm, addr, name): - """Get a field value by @name and base structure @addr in @vm VmMngr.""" - if name not in self._fields_desc: - raise ValueError("'%s' type has no field '%s'" % (self, name)) - field = self.get_field_type(name) - offset = self.get_offset(name) - return field.get(vm, addr + offset) - - def set_field(self, vm, addr, name, val): - """Set a field value by @name and base structure @addr in @vm VmMngr. - @val is the python value corresponding to this field type. - """ - if name not in self._fields_desc: - raise AttributeError("'%s' object has no attribute '%s'" - % (self.__class__.__name__, name)) - field = self.get_field_type(name) - offset = self.get_offset(name) - field.set(vm, addr + offset, val) - - @property - def size(self): - return sum(field.size for _, field in self.fields) - - def get_offset(self, field_name): - """ - @field_name: (str, optional) the name of the field to get the - offset of - """ - if field_name not in self._fields_desc: - raise ValueError("This structure has no %s field" % field_name) - return self._fields_desc[field_name]['offset'] - - def get_field_type(self, name): - """Return the Type subclass instance describing field @name.""" - return self._fields_desc[name]['field'] - - def _get_pinned_base_class(self): - return MemStruct - - def __repr__(self): - return "struct %s" % self.name - - def __eq__(self, other): - return self.__class__ == other.__class__ and \ - self.fields == other.fields and \ - self.name == other.name - - def __ne__(self, other): - return not self == other - - def __hash__(self): - # Only hash name, not fields, because if a field is a Ptr to this - # Struct type, an infinite loop occurs - return hash((self.__class__, self.name)) - - -class Union(Struct): - """Represents a C union. - - Allows to put multiple fields at the same offset in a MemStruct, - similar to unions in C. The Union will have the size of the largest of its - fields. - - Mapped to MemUnion. - - Example: - - class Example(MemStruct): - fields = [("uni", Union([ - ("f1", Num("= self.size): - raise IndexError("Index %s out of bounds" % idx) - - def _get_pinned_base_class(self): - if self.is_sized(): - return MemSizedArray - else: - return MemArray - - def __repr__(self): - return "[%r; %s]" % (self.field_type, self.array_len or "unsized") - - def __eq__(self, other): - return self.__class__ == other.__class__ and \ - self.field_type == other.field_type and \ - self.array_len == other.array_len - - def __ne__(self, other): - return not self == other - - def __hash__(self): - return hash((self.__class__, self.field_type, self.array_len)) - - -class Bits(Type): - """Helper class for BitField, not very useful on its own. Represents some - bits of a Num. - - The @backing_num is used to know how to serialize/deserialize data in vm, - but getting/setting this fields only assign bits from @bit_offset to - @bit_offset + @bits. Masking and shifting is handled by the class, the aim - is to provide a transparent way to set and get some bits of a num. - """ - - def __init__(self, backing_num, bits, bit_offset): - if not isinstance(backing_num, Num): - raise ValueError("backing_num should be a Num instance") - self._num = backing_num - self._bits = bits - self._bit_offset = bit_offset - - def set(self, vm, addr, val): - val_mask = (1 << self._bits) - 1 - val_shifted = (val & val_mask) << self._bit_offset - num_size = self._num.size * 8 - - full_num_mask = (1 << num_size) - 1 - num_mask = (~(val_mask << self._bit_offset)) & full_num_mask - - num_val = self._num.get(vm, addr) - res_val = (num_val & num_mask) | val_shifted - self._num.set(vm, addr, res_val) - - def get(self, vm, addr): - val_mask = (1 << self._bits) - 1 - num_val = self._num.get(vm, addr) - res_val = (num_val >> self._bit_offset) & val_mask - return res_val - - @property - def size(self): - return self._num.size - - @property - def bit_size(self): - """Number of bits read/written by this class""" - return self._bits - - @property - def bit_offset(self): - """Offset in bits (beginning at 0, the LSB) from which to read/write - bits. - """ - return self._bit_offset - - def __repr__(self): - return "%s%r(%d:%d)" % (self.__class__.__name__, self._num, - self._bit_offset, self._bit_offset + self._bits) - - def __eq__(self, other): - return self.__class__ == other.__class__ and \ - self._num == other._num and self._bits == other._bits and \ - self._bit_offset == other._bit_offset - - def __ne__(self, other): - return not self == other - - def __hash__(self): - return hash((self.__class__, self._num, self._bits, self._bit_offset)) - - -class BitField(Union): - """A C-like bitfield. - - Constructed with a list [(, )] and a - @backing_num. The @backing_num is a Num instance that determines the total - size of the bitfield and the way the bits are serialized/deserialized (big - endian int, little endian short...). Can be seen (and implemented) as a - Union of Bits fields. - - Mapped to MemBitField. - - Creates fields that allow to access the bitfield fields easily. Example: - - class Example(MemStruct): - fields = [("bf", BitField(Num("B"), [ - ("f1", 2), - ("f2", 4), - ("f3", 1) - ]) - )] - - ex = Example(vm, addr) - ex.memset() - ex.f2 = 2 - ex.f1 = 5 # 5 does not fit on two bits, it will be binarily truncated - assert ex.f1 == 3 - assert ex.f2 == 2 - assert ex.f3 == 0 # previously memset() - assert ex.bf == 3 + 2 << 2 - """ - - def __init__(self, backing_num, bit_list): - """@backing num: Num instance, @bit_list: [(name, n_bits)]""" - self._num = backing_num - fields = [] - offset = 0 - for name, bits in bit_list: - fields.append((name, Bits(self._num, bits, offset))) - offset += bits - if offset > self._num.size == 8: - raise ValueError("sum of bit lengths is > to the backing num size") - super(BitField, self).__init__(fields) - - def set(self, vm, addr, val): - self._num.set(vm, addr, val) - - def _get_pinned_base_class(self): - return MemBitField - - def __eq__(self, other): - return self.__class__ == other.__class__ and \ - self._num == other._num and super(BitField, self).__eq__(other) - - def __ne__(self, other): - return not self == other - - def __hash__(self): - return hash((super(BitField, self).__hash__(), self._num)) - - def __repr__(self): - fields_repr = ', '.join("%s: %r" % (name, field.bit_size) - for name, field in self.fields) - return "%s(%s)" % (self.__class__.__name__, fields_repr) - - -class Str(Type): - """A string type that handles encoding. This type is unsized (no static - size). - - The @encoding is passed to the constructor, and is one of the keys of - Str.encodings, currently: - - ascii - - latin1 - - ansi (= latin1) - - utf8 (= utf-8le) - - utf16 (= utf-16le, Windows UCS-2 compatible) - New encodings can be added with Str.add_encoding. - If an unknown encoding is passed to the constructor, Str will try to add it - to the available ones with Str.add_encoding. - - Mapped to MemStr. - """ - - # Dict of {name: (getter, setter, raw_len)} - # Where: - # - getter(vm, addr) -> unicode - # - setter(vm, addr, unicode) - # - raw_len(unicode_str) -> int (length of the str value one encoded in - # memory) - # See enc_triplet() - # - # NOTE: this appears like it could be implemented only with - # (getter, raw_str), but this would cause trouble for length-prefixed str - # encoding (Pascal-style strings). - encodings = { - "ascii": enc_triplet("ascii"), - "latin1": enc_triplet("latin1"), - "ansi": enc_triplet("latin1"), - "utf8": enc_triplet("utf8"), - "utf16": enc_triplet("utf-16le"), - } - - def __init__(self, encoding="ansi"): - if encoding not in self.encodings: - self.add_encoding(encoding) - self._enc = encoding - - @classmethod - def add_encoding(cls, enc_name, str_enc=None, getter=None, setter=None, - raw_len=None): - """Add an available Str encoding. - - @enc_name: the name that will be used to designate this encoding in the - Str constructor - @str_end: (optional) the actual str encoding name if it differs from - @enc_name - @getter: (optional) func(vm, addr) -> unicode, to force usage of this - function to retrieve the str from memory - @setter: (optional) func(vm, addr, unicode), to force usage of this - function to set the str in memory - @raw_len: (optional) func(unicode_str) -> int (length of the str value - one encoded in memory), to force usage of this function to compute - the length of this string once in memory - """ - default = enc_triplet(str_enc or enc_name) - actual = ( - getter or default[0], - setter or default[1], - raw_len or default[2], - ) - cls.encodings[enc_name] = actual - - def get(self, vm, addr): - """Set the string value in memory""" - get_str = self.encodings[self.enc][0] - return get_str(vm, addr) - - def set(self, vm, addr, s): - """Get the string value from memory""" - set_str = self.encodings[self.enc][1] - set_str(vm, addr, s) - - @property - def size(self): - """This type is unsized.""" - raise ValueError("Str is unsized") - - def value_size(self, py_str): - """Returns the in-memory size of a @py_str for this Str type (handles - encoding, i.e. will not return the same size for "utf16" and "ansi"). - """ - raw_len = self.encodings[self.enc][2] - return raw_len(py_str) - - @property - def enc(self): - """This Str's encoding name (as a str).""" - return self._enc - - def _get_pinned_base_class(self): - return MemStr - - def __repr__(self): - return "%s(%s)" % (self.__class__.__name__, self.enc) - - def __eq__(self, other): - return self.__class__ == other.__class__ and self._enc == other._enc - - def __ne__(self, other): - return not self == other - - def __hash__(self): - return hash((self.__class__, self._enc)) - - -class Void(Type): - """Represents the C void type. - - Mapped to MemVoid. - """ - - def _build_pinned_type(self): - return MemVoid - - def __eq__(self, other): - return self.__class__ == other.__class__ - - def __ne__(self, other): - return not self == other - - def __hash__(self): - return hash(self.__class__) - - def __repr__(self): - return self.__class__.__name__ - - -class Self(Void): - """Special marker to reference a type inside itself. - - Mapped to MemSelf. - - Example: - class ListNode(MemStruct): - fields = [ - ("next", Ptr(" allocated_address - allocator = None - - _type = None - - def __init__(self, vm, addr=None, type_=None): - self._vm = vm - if addr is None: - self._addr = self.alloc(vm, self.get_size()) - else: - self._addr = addr - if type_ is not None: - self._type = type_ - if self._type is None: - raise ValueError("Subclass MemType and define cls._type or pass " - "a type to the constructor") - - @classmethod - def alloc(cls, vm, size): - """Returns an allocated page of size @size if cls.allocator is set. - Raises ValueError otherwise. - """ - if cls.allocator is None: - raise ValueError("Cannot provide None address to MemType() if" - "%s.set_allocator has not been called." - % __name__) - return cls.allocator(vm, size) - - @classmethod - def set_allocator(cls, alloc_func): - """Set an allocator for this class; allows to instantiate statically - sized MemTypes (i.e. sizeof() is implemented) without specifying the - address (the object is allocated by @alloc_func in the vm). - - You may call set_allocator on specific MemType classes if you want - to use a different allocator. - - @alloc_func: func(VmMngr) -> integer_address - """ - cls.allocator = alloc_func - - def get_addr(self, field=None): - """Return the address of this MemType or one of its fields. - - @field: (str, optional) used by subclasses to specify the name or index - of the field to get the address of - """ - if field is not None: - raise NotImplementedError("Getting a field's address is not " - "implemented for this class.") - return self._addr - - @classmethod - def get_type(cls): - """Returns the Type subclass instance representing the C type of this - MemType. - """ - return cls._type - - @classmethod - def sizeof(cls): - """Return the static size of this type. By default, it is the size - of the underlying Type. - """ - return cls._type.size - - def get_size(self): - """Return the dynamic size of this structure (e.g. the size of an - instance). Defaults to sizeof for this base class. - - For example, MemStr defines get_size but not sizeof, as an instance - has a fixed size (at least its value has), but all the instance do not - have the same size. - """ - return self.sizeof() - - def memset(self, byte=b'\x00'): - """Fill the memory space of this MemType with @byte ('\x00' by - default). The size is retrieved with self.get_size() (dynamic size). - """ - # TODO: multibyte patterns - if not isinstance(byte, bytes) or len(byte) != 1: - raise ValueError("byte must be a 1-lengthed str") - self._vm.set_mem(self.get_addr(), byte * self.get_size()) - - def cast(self, other_type): - """Cast this MemType to another MemType (same address, same vm, - but different type). Return the casted MemType. - - @other_type: either a Type instance (other_type.lval is used) or a - MemType subclass - """ - if isinstance(other_type, Type): - other_type = other_type.lval - return other_type(self._vm, self.get_addr()) - - def cast_field(self, field, other_type, *type_args, **type_kwargs): - """ABSTRACT: Same as cast, but the address of the returned MemType - is the address at which @field is in the current MemType. - - @field: field specification, for example its name for a struct, or an - index in an array. See the subclass doc. - @other_type: either a Type instance (other_type.lval is used) or a - MemType subclass - """ - raise NotImplementedError("Abstract") - - def raw(self): - """Raw binary (str) representation of the MemType as it is in - memory. - """ - return self._vm.get_mem(self.get_addr(), self.get_size()) - - def __len__(self): - return self.get_size() - - def __str__(self): - if PY3: - return repr(self) - return self.__bytes__() - - def __bytes__(self): - return self.raw() - - def __repr__(self): - return "Mem%r" % self._type - - def __eq__(self, other): - return self.__class__ == other.__class__ and \ - self.get_type() == other.get_type() and \ - bytes(self) == bytes(other) - - def __ne__(self, other): - return not self == other - - -class MemValue(MemType): - """Simple MemType that gets and sets the Type through the `.val` - attribute. - """ - - @property - def val(self): - return self._type.get(self._vm, self._addr) - - @val.setter - def val(self, value): - self._type.set(self._vm, self._addr, value) - - def __repr__(self): - return "%r: %r" % (self.__class__, self.val) - - -class MemStruct(with_metaclass(_MetaMemStruct, MemType)): - """Base class to easily implement VmMngr backed C-like structures in miasm. - Represents a structure in virtual memory. - - The mechanism is the following: - - set a "fields" class field to be a list of - (, ) - - instances of this class will have properties to interact with these - fields. - - Example: - class MyStruct(MemStruct): - fields = [ - # Scalar field: just struct.pack field with one value - ("num", Num("I")), - ("flags", Num("B")), - # Ptr fields contain two fields: "val", for the numerical value, - # and "deref" to get the pointed object - ("other", Ptr("I", OtherStruct)), - # Ptr to a variable length String - ("s", Ptr("I", Str())), - ("i", Ptr("I", Num("I"))), - ] - - mstruct = MyStruct(vm, addr) - - # Field assignment modifies virtual memory - mstruct.num = 3 - assert mstruct.num == 3 - memval = struct.unpack("I", vm.get_mem(mstruct.get_addr(), - 4))[0] - assert memval == mstruct.num - - # Memset sets the whole structure - mstruct.memset() - assert mstruct.num == 0 - mstruct.memset('\x11') - assert mstruct.num == 0x11111111 - - other = OtherStruct(vm, addr2) - mstruct.other = other.get_addr() - assert mstruct.other.val == other.get_addr() - assert mstruct.other.deref == other - assert mstruct.other.deref.foo == 0x1234 - - Note that: - MyStruct = Struct("MyStruct", ).lval - is equivalent to the previous MyStruct declaration. - - See the various Type-s doc for more information. See MemStruct.gen_fields - doc for more information on how to handle recursive types and cyclic - dependencies. - """ - fields = None - - def get_addr(self, field_name=None): - """ - @field_name: (str, optional) the name of the field to get the - address of - """ - if field_name is not None: - offset = self._type.get_offset(field_name) - else: - offset = 0 - return self._addr + offset - - @classmethod - def get_offset(cls, field_name): - """Shorthand for self.get_type().get_offset(field_name).""" - return cls.get_type().get_offset(field_name) - - def get_field(self, name): - """Get a field value by name. - - useless most of the time since fields are accessible via self.. - """ - return self._type.get_field(self._vm, self.get_addr(), name) - - def set_field(self, name, val): - """Set a field value by name. @val is the python value corresponding to - this field type. - - useless most of the time since fields are accessible via self.. - """ - return self._type.set_field(self._vm, self.get_addr(), name, val) - - def cast_field(self, field, other_type): - """In this implementation, @field is a field name""" - if isinstance(other_type, Type): - other_type = other_type.lval - return other_type(self._vm, self.get_addr(field)) - - # Field generation method, voluntarily public to be able to gen fields - # after class definition - @classmethod - def gen_fields(cls, fields=None): - """Generate the fields of this class (so that they can be accessed with - self.) from a @fields list, as described in the class doc. - - Useful in case of a type cyclic dependency. For example, the following - is not possible in python: - - class A(MemStruct): - fields = [("b", Ptr("I", B))] - - class B(MemStruct): - fields = [("a", Ptr("I", A))] - - With gen_fields, the following is the legal equivalent: - - class A(MemStruct): - pass - - class B(MemStruct): - fields = [("a", Ptr("I", A))] - - A.gen_fields([("b", Ptr("I", B))]) - """ - if fields is not None: - if cls.fields is not None: - raise ValueError("Cannot regen fields of a class. Setting " - "cls.fields at class definition and calling " - "gen_fields are mutually exclusive.") - cls.fields = fields - - if cls._type is None: - if cls.fields is None: - raise ValueError("Cannot create a MemStruct subclass without" - " a cls._type or a cls.fields") - cls._type = cls._gen_type(cls.fields) - - if cls._type in DYN_MEM_STRUCT_CACHE: - # FIXME: Maybe a warning would be better? - raise RuntimeError("Another MemType has the same type as this " - "one. Use it instead.") - - # Register this class so that another one will not be created when - # calling cls._type.lval - DYN_MEM_STRUCT_CACHE[cls._type] = cls - - cls._gen_attributes() - - @classmethod - def _gen_attributes(cls): - # Generate self. getter and setters - for name, _, _ in cls._type.all_fields: - setattr(cls, name, property( - lambda self, name=name: self.get_field(name), - lambda self, val, name=name: self.set_field(name, val) - )) - - @classmethod - def _gen_type(cls, fields): - return Struct(cls.__name__, fields) - - def __repr__(self): - out = [] - for name, field in self._type.fields: - val_repr = repr(self.get_field(name)) - if '\n' in val_repr: - val_repr = '\n' + indent(val_repr, 4) - out.append("%s: %r = %s" % (name, field, val_repr)) - return '%r:\n' % self.__class__ + indent('\n'.join(out), 2) - - -class MemUnion(MemStruct): - """Same as MemStruct but all fields have a 0 offset in the struct.""" - @classmethod - def _gen_type(cls, fields): - return Union(fields) - - -class MemBitField(MemUnion): - """MemUnion of Bits(...) fields.""" - @classmethod - def _gen_type(cls, fields): - return BitField(fields) - - -class MemSelf(MemStruct): - """Special Marker class for reference to current class in a Ptr or Array - (mostly Array of Ptr). See Self doc. - """ - def __repr__(self): - return self.__class__.__name__ - - -class MemVoid(MemType): - """Placeholder for e.g. Ptr to an undetermined type. Useful mostly when - casted to another type. Allows to implement C's "void*" pattern. - """ - _type = Void() - - def __repr__(self): - return self.__class__.__name__ - - -class MemPtr(MemValue): - """Mem version of a Ptr, provides two properties: - - val, to set and get the numeric value of the Ptr - - deref, to set and get the pointed type - """ - @property - def val(self): - return self._type.get_val(self._vm, self._addr) - - @val.setter - def val(self, value): - return self._type.set_val(self._vm, self._addr, value) - - @property - def deref(self): - return self._type.deref_get(self._vm, self._addr) - - @deref.setter - def deref(self, val): - return self._type.deref_set(self._vm, self._addr, val) - - def __repr__(self): - return "*%s" % hex(self.val) - - -class MemStr(MemValue): - """Implements a string representation in memory. - - The string value can be got or set (with python str/unicode) through the - self.val attribute. String encoding/decoding is handled by the class, - - This type is dynamically sized only (get_size is implemented, not sizeof). - """ - - def get_size(self): - """This get_size implementation is quite unsafe: it reads the string - underneath to determine the size, it may therefore read a lot of memory - and provoke mem faults (analogous to strlen). - """ - val = self.val - return self.get_type().value_size(val) - - @classmethod - def from_str(cls, vm, py_str): - """Allocates a MemStr with the global allocator with value py_str. - Raises a ValueError if allocator is not set. - """ - size = cls._type.value_size(py_str) - addr = cls.alloc(vm, size) - memstr = cls(vm, addr) - memstr.val = py_str - return memstr - - def raw(self): - raw = self._vm.get_mem(self.get_addr(), self.get_size()) - return raw - - def __repr__(self): - return "%r: %r" % (self.__class__, self.val) - - -class MemArray(MemType): - """An unsized array of type @field_type (a Type subclass instance). - This class has no static or dynamic size. - - It can be indexed for setting and getting elements, example: - - array = Array(Num("I")).lval(vm, addr)) - array[2] = 5 - array[4:8] = [0, 1, 2, 3] - print array[20] - """ - - @property - def field_type(self): - """Return the Type subclass instance that represents the type of - this MemArray items. - """ - return self.get_type().field_type - - def get_addr(self, idx=0): - return self._addr + self.get_type().get_offset(idx) - - @classmethod - def get_offset(cls, idx): - """Shorthand for self.get_type().get_offset(idx).""" - return cls.get_type().get_offset(idx) - - def __getitem__(self, idx): - return self.get_type().get_item(self._vm, self._addr, idx) - - def __setitem__(self, idx, item): - self.get_type().set_item(self._vm, self._addr, idx, item) - - def raw(self): - raise ValueError("%s is unsized, which prevents from getting its full " - "raw representation. Use MemSizedArray instead." % - self.__class__) - - def __repr__(self): - return "[%r, ...] [%r]" % (self[0], self.field_type) - - -class MemSizedArray(MemArray): - """A fixed size MemArray. - - This type is dynamically sized. Generate a fixed @field_type and @array_len - array which has a static size by using Array(type, size).lval. - """ - - @property - def array_len(self): - """The length, in number of elements, of this array.""" - return self.get_type().array_len - - def get_size(self): - return self.get_type().size - - def __iter__(self): - for i in range(self.get_type().array_len): - yield self[i] - - def raw(self): - return self._vm.get_mem(self.get_addr(), self.get_size()) - - def __repr__(self): - item_reprs = [repr(item) for item in self] - if self.array_len > 0 and '\n' in item_reprs[0]: - items = '\n' + indent(',\n'.join(item_reprs), 2) + '\n' - else: - items = ', '.join(item_reprs) - return "[%s] [%r; %s]" % (items, self.field_type, self.array_len) - diff --git a/miasm2/core/utils.py b/miasm2/core/utils.py deleted file mode 100644 index 9856d4f2..00000000 --- a/miasm2/core/utils.py +++ /dev/null @@ -1,234 +0,0 @@ -from __future__ import print_function -from builtins import range -import struct -import inspect -from collections import MutableMapping as DictMixin - -from operator import itemgetter -import codecs - -from future.utils import viewitems - -import collections - -upck8 = lambda x: struct.unpack('B', x)[0] -upck16 = lambda x: struct.unpack('H', x)[0] -upck32 = lambda x: struct.unpack('I', x)[0] -upck64 = lambda x: struct.unpack('Q', x)[0] -pck8 = lambda x: struct.pack('B', x) -pck16 = lambda x: struct.pack('H', x) -pck32 = lambda x: struct.pack('I', x) -pck64 = lambda x: struct.pack('Q', x) - -# Little endian -upck8le = lambda x: struct.unpack('B', x)[0] -upck16be = lambda x: struct.unpack('>H', x)[0] -upck32be = lambda x: struct.unpack('>I', x)[0] -upck64be = lambda x: struct.unpack('>Q', x)[0] -pck8be = lambda x: struct.pack('>B', x) -pck16be = lambda x: struct.pack('>H', x) -pck32be = lambda x: struct.pack('>I', x) -pck64be = lambda x: struct.pack('>Q', x) - - -LITTLE_ENDIAN = 1 -BIG_ENDIAN = 2 - - -pck = {8: pck8, - 16: pck16, - 32: pck32, - 64: pck64} - - -def get_caller_name(caller_num=0): - """Get the nth caller's name - @caller_num: 0 = the caller of get_caller_name, 1 = next parent, ...""" - pystk = inspect.stack() - if len(pystk) > 1 + caller_num: - return pystk[1 + caller_num][3] - else: - return "Bad caller num" - - -def whoami(): - """Returns the caller's name""" - return get_caller_name(1) - - -class Disasm_Exception(Exception): - pass - - -def printable(string): - if isinstance(string, bytes): - return "".join( - c.decode() if b" " <= c < b"~" else "." - for c in (string[i:i+1] for i in range(len(string))) - ) - return string - - -def force_bytes(value): - try: - return value.encode() - except AttributeError: - return value - - -def iterbytes(string): - for i in range(len(string)): - yield string[i:i+1] - - -def int_to_byte(value): - return struct.pack('B', value) - -def cmp_elts(elt1, elt2): - return (elt1 > elt2) - (elt1 < elt2) - - -_DECODE_HEX = codecs.getdecoder("hex_codec") -_ENCODE_HEX = codecs.getencoder("hex_codec") - -def decode_hex(value): - return _DECODE_HEX(value)[0] - -def encode_hex(value): - return _ENCODE_HEX(value)[0] - - -def hexdump(src, length=16): - lines = [] - for c in range(0, len(src), length): - chars = src[c:c + length] - hexa = ' '.join("%02x" % ord(x) for x in iterbytes(chars)) - printable = ''.join( - x.decode() if 32 <= ord(x) <= 126 else '.' for x in iterbytes(chars) - ) - lines.append("%04x %-*s %s\n" % (c, length * 3, hexa, printable)) - print(''.join(lines)) - - -# stackoverflow.com/questions/2912231 -class keydefaultdict(collections.defaultdict): - - def __missing__(self, key): - if self.default_factory is None: - raise KeyError(key) - value = self[key] = self.default_factory(key) - return value - - -class BoundedDict(DictMixin): - - """Limited in size dictionary. - - To reduce combinatory cost, once an upper limit @max_size is reached, - @max_size - @min_size elements are suppressed. - The targeted elements are the less accessed. - - One can define a callback called when an element is removed - """ - - def __init__(self, max_size, min_size=None, initialdata=None, - delete_cb=None): - """Create a BoundedDict - @max_size: maximum size of the dictionary - @min_size: (optional) number of most used element to keep when resizing - @initialdata: (optional) dict instance with initial data - @delete_cb: (optional) callback called when an element is removed - """ - self._data = initialdata.copy() if initialdata else {} - self._min_size = min_size if min_size else max_size // 3 - self._max_size = max_size - self._size = len(self._data) - # Do not use collections.Counter as it is quite slow - self._counter = {k: 1 for k in self._data} - self._delete_cb = delete_cb - - def __setitem__(self, asked_key, value): - if asked_key not in self._data: - # Update internal size and use's counter - self._size += 1 - - # Bound can only be reached on a new element - if (self._size >= self._max_size): - most_common = sorted( - viewitems(self._counter), - key=itemgetter(1), - reverse=True - ) - - # Handle callback - if self._delete_cb is not None: - for key, _ in most_common[self._min_size - 1:]: - self._delete_cb(key) - - # Keep only the most @_min_size used - self._data = {key: self._data[key] - for key, _ in most_common[:self._min_size - 1]} - self._size = self._min_size - - # Reset use's counter - self._counter = {k: 1 for k in self._data} - - # Avoid rechecking in dict: set to 1 here, add 1 otherwise - self._counter[asked_key] = 1 - else: - self._counter[asked_key] += 1 - - self._data[asked_key] = value - - def __contains__(self, key): - # Do not call has_key to avoid adding function call overhead - return key in self._data - - def has_key(self, key): - return key in self._data - - def keys(self): - "Return the list of dict's keys" - return list(self._data) - - @property - def data(self): - "Return the current instance as a dictionary" - return self._data - - def __getitem__(self, key): - # Retrieve data first to raise the proper exception on error - data = self._data[key] - # Should never raise, since the key is in self._data - self._counter[key] += 1 - return data - - def __delitem__(self, key): - if self._delete_cb is not None: - self._delete_cb(key) - del self._data[key] - self._size -= 1 - del self._counter[key] - - def __del__(self): - """Ensure the callback is called when last reference is lost""" - if self._delete_cb: - for key in self._data: - self._delete_cb(key) - - - def __len__(self): - return len(self._data) - - def __iter__(self): - return iter(self._data) diff --git a/miasm2/expression/__init__.py b/miasm2/expression/__init__.py deleted file mode 100644 index 67f567f7..00000000 --- a/miasm2/expression/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# -# Copyright (C) 2011 EADS France, Fabrice Desclaux -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# -"Intermediate language implementation" diff --git a/miasm2/expression/expression.py b/miasm2/expression/expression.py deleted file mode 100644 index 03febbfd..00000000 --- a/miasm2/expression/expression.py +++ /dev/null @@ -1,2035 +0,0 @@ -# -# Copyright (C) 2011 EADS France, Fabrice Desclaux -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# -# These module implements Miasm IR components and basic operations related. -# IR components are : -# - ExprInt -# - ExprId -# - ExprLoc -# - ExprAssign -# - ExprCond -# - ExprMem -# - ExprOp -# - ExprSlice -# - ExprCompose -# - - -from builtins import zip -from builtins import range -import warnings -import itertools -from builtins import int as int_types -from functools import cmp_to_key, total_ordering -from future.utils import viewitems - -from miasm2.core.utils import force_bytes, cmp_elts -from miasm2.expression.modint import mod_size2uint, is_modint, size2mask, \ - define_uint -from miasm2.core.graph import DiGraph -from functools import reduce - -# Define tokens -TOK_INF = "<" -TOK_INF_SIGNED = TOK_INF + "s" -TOK_INF_UNSIGNED = TOK_INF + "u" -TOK_INF_EQUAL = "<=" -TOK_INF_EQUAL_SIGNED = TOK_INF_EQUAL + "s" -TOK_INF_EQUAL_UNSIGNED = TOK_INF_EQUAL + "u" -TOK_EQUAL = "==" -TOK_POS = "pos" -TOK_POS_STRICT = "Spos" - -# Hashing constants -EXPRINT = 1 -EXPRID = 2 -EXPRLOC = 3 -EXPRASSIGN = 4 -EXPRCOND = 5 -EXPRMEM = 6 -EXPROP = 7 -EXPRSLICE = 8 -EXPRCOMPOSE = 9 - - -priorities_list = [ - [ '+' ], - [ '*', '/', '%' ], - [ '**' ], - [ '-' ], # Unary '-', associativity with + not handled -] - -# dictionary from 'op' to priority, derived from above -priorities = dict((op, prio) - for prio, l in enumerate(priorities_list) - for op in l) -PRIORITY_MAX = len(priorities_list) - 1 - -def should_parenthesize_child(child, parent): - if (isinstance(child, ExprId) or isinstance(child, ExprInt) or - isinstance(child, ExprCompose) or isinstance(child, ExprMem) or - isinstance(child, ExprSlice)): - return False - elif isinstance(child, ExprOp) and not child.is_infix(): - return False - elif (isinstance(child, ExprCond) or isinstance(parent, ExprSlice)): - return True - elif (isinstance(child, ExprOp) and isinstance(parent, ExprOp)): - pri_child = priorities.get(child.op, -1) - pri_parent = priorities.get(parent.op, PRIORITY_MAX + 1) - return pri_child < pri_parent - else: - return True - -def str_protected_child(child, parent): - return ("(%s)" % child) if should_parenthesize_child(child, parent) else str(child) - -def visit_chk(visitor): - "Function decorator launching callback on Expression visit" - def wrapped(expr, callback, test_visit=lambda x: True): - if (test_visit is not None) and (not test_visit(expr)): - return expr - expr_new = visitor(expr, callback, test_visit) - if expr_new is None: - return None - expr_new2 = callback(expr_new) - return expr_new2 - return wrapped - - -# Expression display - - -class DiGraphExpr(DiGraph): - - """Enhanced graph for Expression display - Expression are displayed as a tree with node and edge labeled - with only relevant information""" - - def node2str(self, node): - if isinstance(node, ExprOp): - return node.op - elif isinstance(node, ExprId): - return node.name - elif isinstance(node, ExprLoc): - return "%s" % node.loc_key - elif isinstance(node, ExprMem): - return "@%d" % node.size - elif isinstance(node, ExprCompose): - return "{ %d }" % node.size - elif isinstance(node, ExprCond): - return "? %d" % node.size - elif isinstance(node, ExprSlice): - return "[%d:%d]" % (node.start, node.stop) - return str(node) - - def edge2str(self, nfrom, nto): - if isinstance(nfrom, ExprCompose): - for i in nfrom.args: - if i[0] == nto: - return "[%s, %s]" % (i[1], i[2]) - elif isinstance(nfrom, ExprCond): - if nfrom.cond == nto: - return "?" - elif nfrom.src1 == nto: - return "True" - elif nfrom.src2 == nto: - return "False" - - return "" - - -@total_ordering -class LocKey(object): - def __init__(self, key): - self._key = key - - key = property(lambda self: self._key) - - def __hash__(self): - return hash(self._key) - - def __eq__(self, other): - if self is other: - return True - if self.__class__ is not other.__class__: - return False - return self.key == other.key - - def __ne__(self, other): - # required Python 2.7.14 - return not self == other - - def __lt__(self, other): - return self.key < other.key - - def __repr__(self): - return "<%s %d>" % (self.__class__.__name__, self._key) - - def __str__(self): - return "loc_key_%d" % self.key - -# IR definitions - -class Expr(object): - - "Parent class for Miasm Expressions" - - __slots__ = ["_hash", "_repr", "_size"] - - args2expr = {} - canon_exprs = set() - use_singleton = True - - def set_size(self, _): - raise ValueError('size is not mutable') - - def __init__(self, size): - """Instantiate an Expr with size @size - @size: int - """ - # Common attribute - self._size = size - - # Lazy cache needs - self._hash = None - self._repr = None - - size = property(lambda self: self._size) - - @staticmethod - def get_object(expr_cls, args): - if not expr_cls.use_singleton: - return object.__new__(expr_cls) - - expr = Expr.args2expr.get((expr_cls, args)) - if expr is None: - expr = object.__new__(expr_cls) - Expr.args2expr[(expr_cls, args)] = expr - return expr - - def get_is_canon(self): - return self in Expr.canon_exprs - - def set_is_canon(self, value): - assert value is True - Expr.canon_exprs.add(self) - - is_canon = property(get_is_canon, set_is_canon) - - # Common operations - - def __str__(self): - raise NotImplementedError("Abstract Method") - - def __getitem__(self, i): - if not isinstance(i, slice): - raise TypeError("Expression: Bad slice: %s" % i) - start, stop, step = i.indices(self.size) - if step != 1: - raise ValueError("Expression: Bad slice: %s" % i) - return ExprSlice(self, start, stop) - - def get_size(self): - raise DeprecationWarning("use X.size instead of X.get_size()") - - def is_function_call(self): - """Returns true if the considered Expr is a function call - """ - return False - - def __repr__(self): - if self._repr is None: - self._repr = self._exprrepr() - return self._repr - - def __hash__(self): - if self._hash is None: - self._hash = self._exprhash() - return self._hash - - def __eq__(self, other): - if self is other: - return True - elif self.use_singleton: - # In case of Singleton, pointer comparison is sufficient - # Avoid computation of hash and repr - return False - - if self.__class__ is not other.__class__: - return False - if hash(self) != hash(other): - return False - return repr(self) == repr(other) - - def __ne__(self, other): - return not self.__eq__(other) - - def __add__(self, other): - return ExprOp('+', self, other) - - def __sub__(self, other): - return ExprOp('+', self, ExprOp('-', other)) - - def __div__(self, other): - return ExprOp('/', self, other) - - def __floordiv__(self, other): - return self.__div__(other) - - def __mod__(self, other): - return ExprOp('%', self, other) - - def __mul__(self, other): - return ExprOp('*', self, other) - - def __lshift__(self, other): - return ExprOp('<<', self, other) - - def __rshift__(self, other): - return ExprOp('>>', self, other) - - def __xor__(self, other): - return ExprOp('^', self, other) - - def __or__(self, other): - return ExprOp('|', self, other) - - def __and__(self, other): - return ExprOp('&', self, other) - - def __neg__(self): - return ExprOp('-', self) - - def __pow__(self, other): - return ExprOp("**", self, other) - - def __invert__(self): - return ExprOp('^', self, self.mask) - - def copy(self): - "Deep copy of the expression" - return self.visit(lambda x: x) - - def __deepcopy__(self, _): - return self.copy() - - def replace_expr(self, dct): - """Find and replace sub expression using dct - @dct: dictionary associating replaced Expr to its new Expr value - """ - return self.visit(lambda expr: dct.get(expr, expr)) - - def canonize(self): - "Canonize the Expression" - - def must_canon(expr): - return not expr.is_canon - - def canonize_visitor(expr): - if expr.is_canon: - return expr - if isinstance(expr, ExprOp): - if expr.is_associative(): - # ((a+b) + c) => (a + b + c) - args = [] - for arg in expr.args: - if isinstance(arg, ExprOp) and expr.op == arg.op: - args += arg.args - else: - args.append(arg) - args = canonize_expr_list(args) - new_e = ExprOp(expr.op, *args) - else: - new_e = expr - else: - new_e = expr - new_e.is_canon = True - return new_e - - return self.visit(canonize_visitor, must_canon) - - def msb(self): - "Return the Most Significant Bit" - return self[self.size - 1:self.size] - - def zeroExtend(self, size): - """Zero extend to size - @size: int - """ - assert self.size <= size - if self.size == size: - return self - return ExprOp('zeroExt_%d' % size, self) - - def signExtend(self, size): - """Sign extend to size - @size: int - """ - assert self.size <= size - if self.size == size: - return self - return ExprOp('signExt_%d' % size, self) - - def graph_recursive(self, graph): - """Recursive method used by graph - @graph: miasm2.core.graph.DiGraph instance - Update @graph instance to include sons - This is an Abstract method""" - - raise ValueError("Abstract method") - - def graph(self): - """Return a DiGraph instance standing for Expr tree - Instance's display functions have been override for better visibility - Wrapper on graph_recursive""" - - # Create recursively the graph - graph = DiGraphExpr() - self.graph_recursive(graph) - - return graph - - def set_mask(self, value): - raise ValueError('mask is not mutable') - - mask = property(lambda self: ExprInt(-1, self.size)) - - def is_int(self, value=None): - return False - - def is_id(self, name=None): - return False - - def is_loc(self, label=None): - return False - - def is_aff(self): - return False - - def is_cond(self): - return False - - def is_mem(self): - return False - - def is_op(self, op=None): - return False - - def is_slice(self, start=None, stop=None): - return False - - def is_compose(self): - return False - - def is_op_segm(self): - """Returns True if is ExprOp and op == 'segm'""" - return False - - def is_mem_segm(self): - """Returns True if is ExprMem and ptr is_op_segm""" - return False - -class ExprInt(Expr): - - """An ExprInt represent a constant in Miasm IR. - - Some use cases: - - Constant 0x42 - - Constant -0x30 - - Constant 0x12345678 on 32bits - """ - - __slots__ = Expr.__slots__ + ["_arg"] - - - def __init__(self, arg, size): - """Create an ExprInt from a modint or num/size - @arg: 'intable' number - @size: int size""" - super(ExprInt, self).__init__(size) - # Work for ._arg is done in __new__ - - arg = property(lambda self: self._arg) - - def __reduce__(self): - state = int(self._arg), self._size - return self.__class__, state - - def __new__(cls, arg, size): - """Create an ExprInt from a modint or num/size - @arg: 'intable' number - @size: int size""" - - if is_modint(arg): - assert size == arg.size - # Avoid a common blunder - assert not isinstance(arg, ExprInt) - - # Ensure arg is always a moduint - arg = int(arg) - if size not in mod_size2uint: - define_uint(size) - arg = mod_size2uint[size](arg) - - # Get the Singleton instance - expr = Expr.get_object(cls, (arg, size)) - - # Save parameters (__init__ is called with parameters unchanged) - expr._arg = arg - return expr - - def _get_int(self): - "Return self integer representation" - return int(self._arg & size2mask(self._size)) - - def __str__(self): - if self._arg < 0: - return str("-0x%X" % (- self._get_int())) - else: - return str("0x%X" % self._get_int()) - - def get_r(self, mem_read=False, cst_read=False): - if cst_read: - return set([self]) - else: - return set() - - def get_w(self): - return set() - - def _exprhash(self): - return hash((EXPRINT, self._arg, self._size)) - - def _exprrepr(self): - return "%s(0x%X, %d)" % (self.__class__.__name__, self._get_int(), - self._size) - - def __contains__(self, expr): - return self == expr - - @visit_chk - def visit(self, callback, test_visit=None): - return self - - def copy(self): - return ExprInt(self._arg, self._size) - - def depth(self): - return 1 - - def graph_recursive(self, graph): - graph.add_node(self) - - def __int__(self): - return int(self.arg) - - def __long__(self): - return int(self.arg) - - def is_int(self, value=None): - if value is not None and self._arg != value: - return False - return True - - -class ExprId(Expr): - - """An ExprId represent an identifier in Miasm IR. - - Some use cases: - - EAX register - - 'start' offset - - variable v1 - """ - - __slots__ = Expr.__slots__ + ["_name"] - - def __init__(self, name, size=None): - """Create an identifier - @name: str, identifier's name - @size: int, identifier's size - """ - if size is None: - warnings.warn('DEPRECATION WARNING: size is a mandatory argument: use ExprId(name, SIZE)') - size = 32 - assert isinstance(name, (str, bytes)) - super(ExprId, self).__init__(size) - self._name = name - - name = property(lambda self: self._name) - - def __reduce__(self): - state = self._name, self._size - return self.__class__, state - - def __new__(cls, name, size=None): - if size is None: - warnings.warn('DEPRECATION WARNING: size is a mandatory argument: use ExprId(name, SIZE)') - size = 32 - return Expr.get_object(cls, (name, size)) - - def __str__(self): - return str(self._name) - - def get_r(self, mem_read=False, cst_read=False): - return set([self]) - - def get_w(self): - return set([self]) - - def _exprhash(self): - return hash((EXPRID, self._name, self._size)) - - def _exprrepr(self): - return "%s(%r, %d)" % (self.__class__.__name__, self._name, self._size) - - def __contains__(self, expr): - return self == expr - - @visit_chk - def visit(self, callback, test_visit=None): - return self - - def copy(self): - return ExprId(self._name, self._size) - - def depth(self): - return 1 - - def graph_recursive(self, graph): - graph.add_node(self) - - def is_id(self, name=None): - if name is not None and self._name != name: - return False - return True - - -class ExprLoc(Expr): - - """An ExprLoc represent a Label in Miasm IR. - """ - - __slots__ = Expr.__slots__ + ["_loc_key"] - - def __init__(self, loc_key, size): - """Create an identifier - @loc_key: int, label loc_key - @size: int, identifier's size - """ - assert isinstance(loc_key, LocKey) - super(ExprLoc, self).__init__(size) - self._loc_key = loc_key - - loc_key= property(lambda self: self._loc_key) - - def __reduce__(self): - state = self._loc_key, self._size - return self.__class__, state - - def __new__(cls, loc_key, size): - return Expr.get_object(cls, (loc_key, size)) - - def __str__(self): - return str(self._loc_key) - - def get_r(self, mem_read=False, cst_read=False): - return set() - - def get_w(self): - return set() - - def _exprhash(self): - return hash((EXPRLOC, self._loc_key, self._size)) - - def _exprrepr(self): - return "%s(%r, %d)" % (self.__class__.__name__, self._loc_key, self._size) - - def __contains__(self, expr): - return self == expr - - @visit_chk - def visit(self, callback, test_visit=None): - return self - - def copy(self): - return ExprLoc(self._loc_key, self._size) - - def depth(self): - return 1 - - def graph_recursive(self, graph): - graph.add_node(self) - - def is_loc(self, loc_key=None): - if loc_key is not None and self._loc_key != loc_key: - return False - return True - - -class ExprAssign(Expr): - - """An ExprAssign represent an assignment from an Expression to another one. - - Some use cases: - - var1 <- 2 - """ - - __slots__ = Expr.__slots__ + ["_dst", "_src"] - - def __init__(self, dst, src): - """Create an ExprAssign for dst <- src - @dst: Expr, assignment destination - @src: Expr, assignment source - """ - # dst & src must be Expr - assert isinstance(dst, Expr) - assert isinstance(src, Expr) - - if dst.size != src.size: - raise ValueError( - "sanitycheck: ExprAssign args must have same size! %s" % - ([(str(arg), arg.size) for arg in [dst, src]])) - - super(ExprAssign, self).__init__(self.dst.size) - - dst = property(lambda self: self._dst) - src = property(lambda self: self._src) - - - def __reduce__(self): - state = self._dst, self._src - return self.__class__, state - - def __new__(cls, dst, src): - if dst.is_slice() and dst.arg.size == src.size: - new_dst, new_src = dst.arg, src - elif dst.is_slice(): - # Complete the source with missing slice parts - new_dst = dst.arg - rest = [(ExprSlice(dst.arg, r[0], r[1]), r[0], r[1]) - for r in dst.slice_rest()] - all_a = [(src, dst.start, dst.stop)] + rest - all_a.sort(key=lambda x: x[1]) - args = [expr for (expr, _, _) in all_a] - new_src = ExprCompose(*args) - else: - new_dst, new_src = dst, src - expr = Expr.get_object(cls, (new_dst, new_src)) - expr._dst, expr._src = new_dst, new_src - return expr - - def __str__(self): - return "%s = %s" % (str(self._dst), str(self._src)) - - def get_r(self, mem_read=False, cst_read=False): - elements = self._src.get_r(mem_read, cst_read) - if isinstance(self._dst, ExprMem) and mem_read: - elements.update(self._dst.ptr.get_r(mem_read, cst_read)) - return elements - - def get_w(self): - if isinstance(self._dst, ExprMem): - return set([self._dst]) # [memreg] - else: - return self._dst.get_w() - - def _exprhash(self): - return hash((EXPRASSIGN, hash(self._dst), hash(self._src))) - - def _exprrepr(self): - return "%s(%r, %r)" % (self.__class__.__name__, self._dst, self._src) - - def __contains__(self, expr): - return (self == expr or - self._src.__contains__(expr) or - self._dst.__contains__(expr)) - - @visit_chk - def visit(self, callback, test_visit=None): - dst, src = self._dst.visit(callback, test_visit), self._src.visit(callback, test_visit) - if dst == self._dst and src == self._src: - return self - else: - return ExprAssign(dst, src) - - def copy(self): - return ExprAssign(self._dst.copy(), self._src.copy()) - - def depth(self): - return max(self._src.depth(), self._dst.depth()) + 1 - - def graph_recursive(self, graph): - graph.add_node(self) - for arg in [self._src, self._dst]: - arg.graph_recursive(graph) - graph.add_uniq_edge(self, arg) - - def is_aff(self): - return True - - -class ExprAff(ExprAssign): - """ - DEPRECATED class. - Use ExprAssign instead of ExprAff - """ - - def __init__(self, dst, src): - warnings.warn('DEPRECATION WARNING: use ExprAssign instead of ExprAff') - super(ExprAff, self).__init__(dst, src) - - -class ExprCond(Expr): - - """An ExprCond stand for a condition on an Expr - - Use cases: - - var1 < var2 - - min(var1, var2) - - if (cond) then ... else ... - """ - - __slots__ = Expr.__slots__ + ["_cond", "_src1", "_src2"] - - def __init__(self, cond, src1, src2): - """Create an ExprCond - @cond: Expr, condition - @src1: Expr, value if condition is evaled to not zero - @src2: Expr, value if condition is evaled zero - """ - - # cond, src1, src2 must be Expr - assert isinstance(cond, Expr) - assert isinstance(src1, Expr) - assert isinstance(src2, Expr) - - self._cond, self._src1, self._src2 = cond, src1, src2 - assert src1.size == src2.size - super(ExprCond, self).__init__(self.src1.size) - - cond = property(lambda self: self._cond) - src1 = property(lambda self: self._src1) - src2 = property(lambda self: self._src2) - - def __reduce__(self): - state = self._cond, self._src1, self._src2 - return self.__class__, state - - def __new__(cls, cond, src1, src2): - return Expr.get_object(cls, (cond, src1, src2)) - - def __str__(self): - return "%s?(%s,%s)" % (str_protected_child(self._cond, self), str(self._src1), str(self._src2)) - - def get_r(self, mem_read=False, cst_read=False): - out_src1 = self.src1.get_r(mem_read, cst_read) - out_src2 = self.src2.get_r(mem_read, cst_read) - return self.cond.get_r(mem_read, - cst_read).union(out_src1).union(out_src2) - - def get_w(self): - return set() - - def _exprhash(self): - return hash((EXPRCOND, hash(self.cond), - hash(self._src1), hash(self._src2))) - - def _exprrepr(self): - return "%s(%r, %r, %r)" % (self.__class__.__name__, - self._cond, self._src1, self._src2) - - def __contains__(self, expr): - return (self == expr or - self.cond.__contains__(expr) or - self.src1.__contains__(expr) or - self.src2.__contains__(expr)) - - @visit_chk - def visit(self, callback, test_visit=None): - cond = self._cond.visit(callback, test_visit) - src1 = self._src1.visit(callback, test_visit) - src2 = self._src2.visit(callback, test_visit) - if cond == self._cond and src1 == self._src1 and src2 == self._src2: - return self - return ExprCond(cond, src1, src2) - - def copy(self): - return ExprCond(self._cond.copy(), - self._src1.copy(), - self._src2.copy()) - - def depth(self): - return max(self._cond.depth(), - self._src1.depth(), - self._src2.depth()) + 1 - - def graph_recursive(self, graph): - graph.add_node(self) - for arg in [self._cond, self._src1, self._src2]: - arg.graph_recursive(graph) - graph.add_uniq_edge(self, arg) - - def is_cond(self): - return True - - -class ExprMem(Expr): - - """An ExprMem stand for a memory access - - Use cases: - - Memory read - - Memory write - """ - - __slots__ = Expr.__slots__ + ["_ptr"] - - def __init__(self, ptr, size=None): - """Create an ExprMem - @ptr: Expr, memory access address - @size: int, memory access size - """ - if size is None: - warnings.warn('DEPRECATION WARNING: size is a mandatory argument: use ExprMem(ptr, SIZE)') - size = 32 - - # ptr must be Expr - assert isinstance(ptr, Expr) - assert isinstance(size, int_types) - - if not isinstance(ptr, Expr): - raise ValueError( - 'ExprMem: ptr must be an Expr (not %s)' % type(ptr)) - - super(ExprMem, self).__init__(size) - self._ptr = ptr - - def get_arg(self): - warnings.warn('DEPRECATION WARNING: use exprmem.ptr instead of exprmem.arg') - return self.ptr - - def set_arg(self, value): - warnings.warn('DEPRECATION WARNING: use exprmem.ptr instead of exprmem.arg') - self.ptr = value - - ptr = property(lambda self: self._ptr) - arg = property(get_arg, set_arg) - - def __reduce__(self): - state = self._ptr, self._size - return self.__class__, state - - def __new__(cls, ptr, size=None): - if size is None: - warnings.warn('DEPRECATION WARNING: size is a mandatory argument: use ExprMem(ptr, SIZE)') - size = 32 - - return Expr.get_object(cls, (ptr, size)) - - def __str__(self): - return "@%d[%s]" % (self.size, str(self.ptr)) - - def get_r(self, mem_read=False, cst_read=False): - if mem_read: - return set(self._ptr.get_r(mem_read, cst_read).union(set([self]))) - else: - return set([self]) - - def get_w(self): - return set([self]) # [memreg] - - def _exprhash(self): - return hash((EXPRMEM, hash(self._ptr), self._size)) - - def _exprrepr(self): - return "%s(%r, %r)" % (self.__class__.__name__, - self._ptr, self._size) - - def __contains__(self, expr): - return self == expr or self._ptr.__contains__(expr) - - @visit_chk - def visit(self, callback, test_visit=None): - ptr = self._ptr.visit(callback, test_visit) - if ptr == self._ptr: - return self - return ExprMem(ptr, self.size) - - def copy(self): - ptr = self.ptr.copy() - return ExprMem(ptr, size=self.size) - - def is_mem_segm(self): - """Returns True if is ExprMem and ptr is_op_segm""" - return self._ptr.is_op_segm() - - def depth(self): - return self._ptr.depth() + 1 - - def graph_recursive(self, graph): - graph.add_node(self) - self._ptr.graph_recursive(graph) - graph.add_uniq_edge(self, self._ptr) - - def is_mem(self): - return True - - -class ExprOp(Expr): - - """An ExprOp stand for an operation between Expr - - Use cases: - - var1 XOR var2 - - var1 + var2 + var3 - - parity bit(var1) - """ - - __slots__ = Expr.__slots__ + ["_op", "_args"] - - def __init__(self, op, *args): - """Create an ExprOp - @op: str, operation - @*args: Expr, operand list - """ - - # args must be Expr - assert all(isinstance(arg, Expr) for arg in args) - - sizes = set([arg.size for arg in args]) - - if len(sizes) != 1: - # Special cases : operande sizes can differ - if op not in [ - "segm", - "FLAG_EQ_ADDWC", "FLAG_EQ_SUBWC", - "FLAG_SIGN_ADDWC", "FLAG_SIGN_SUBWC", - "FLAG_ADDWC_CF", "FLAG_ADDWC_OF", - "FLAG_SUBWC_CF", "FLAG_SUBWC_OF", - - ]: - raise ValueError( - "sanitycheck: ExprOp args must have same size! %s" % - ([(str(arg), arg.size) for arg in args])) - - if not isinstance(op, str): - raise ValueError("ExprOp: 'op' argument must be a string") - - assert isinstance(args, tuple) - self._op, self._args = op, args - - # Set size for special cases - if self._op in [ - TOK_EQUAL, 'parity', 'fcom_c0', 'fcom_c1', 'fcom_c2', 'fcom_c3', - 'fxam_c0', 'fxam_c1', 'fxam_c2', 'fxam_c3', - "access_segment_ok", "load_segment_limit_ok", "bcdadd_cf", - "ucomiss_zf", "ucomiss_pf", "ucomiss_cf", - "ucomisd_zf", "ucomisd_pf", "ucomisd_cf"]: - size = 1 - elif self._op in [TOK_INF, TOK_INF_SIGNED, - TOK_INF_UNSIGNED, TOK_INF_EQUAL, - TOK_INF_EQUAL_SIGNED, TOK_INF_EQUAL_UNSIGNED, - TOK_EQUAL, TOK_POS, - TOK_POS_STRICT, - ]: - size = 1 - elif self._op.startswith("fp_to_sint"): - size = int(self._op[len("fp_to_sint"):]) - elif self._op.startswith("fpconvert_fp"): - size = int(self._op[len("fpconvert_fp"):]) - elif self._op in [ - "FLAG_ADD_CF", "FLAG_SUB_CF", - "FLAG_ADD_OF", "FLAG_SUB_OF", - "FLAG_EQ", "FLAG_EQ_CMP", - "FLAG_SIGN_SUB", "FLAG_SIGN_ADD", - "FLAG_EQ_AND", - "FLAG_EQ_ADDWC", "FLAG_EQ_SUBWC", - "FLAG_SIGN_ADDWC", "FLAG_SIGN_SUBWC", - "FLAG_ADDWC_CF", "FLAG_ADDWC_OF", - "FLAG_SUBWC_CF", "FLAG_SUBWC_OF", - ]: - size = 1 - - elif self._op.startswith('signExt_'): - size = int(self._op[8:]) - elif self._op.startswith('zeroExt_'): - size = int(self._op[8:]) - elif self._op in ['segm']: - size = self._args[1].size - else: - if None in sizes: - size = None - else: - # All arguments have the same size - size = list(sizes)[0] - - super(ExprOp, self).__init__(size) - - op = property(lambda self: self._op) - args = property(lambda self: self._args) - - def __reduce__(self): - state = tuple([self._op] + list(self._args)) - return self.__class__, state - - def __new__(cls, op, *args): - return Expr.get_object(cls, (op, args)) - - def __str__(self): - if self._op == '-': # Unary minus - return '-' + str_protected_child(self._args[0], self) - if self.is_associative() or self.is_infix(): - return (' ' + self._op + ' ').join([str_protected_child(arg, self) - for arg in self._args]) - return (self._op + '(' + - ', '.join([str(arg) for arg in self._args]) + ')') - - def get_r(self, mem_read=False, cst_read=False): - return reduce(lambda elements, arg: - elements.union(arg.get_r(mem_read, cst_read)), self._args, set()) - - def get_w(self): - raise ValueError('op cannot be written!', self) - - def _exprhash(self): - h_hargs = [hash(arg) for arg in self._args] - return hash((EXPROP, self._op, tuple(h_hargs))) - - def _exprrepr(self): - return "%s(%r, %s)" % (self.__class__.__name__, self._op, - ', '.join(repr(arg) for arg in self._args)) - - def __contains__(self, expr): - if self == expr: - return True - for arg in self._args: - if arg.__contains__(expr): - return True - return False - - def is_function_call(self): - return self._op.startswith('call') - - def is_infix(self): - return self._op in [ - '-', '+', '*', '^', '&', '|', '>>', '<<', - 'a>>', '>>>', '<<<', '/', '%', '**', - TOK_INF_UNSIGNED, - TOK_INF_SIGNED, - TOK_INF_EQUAL_UNSIGNED, - TOK_INF_EQUAL_SIGNED, - TOK_EQUAL - ] - - def is_associative(self): - "Return True iff current operation is associative" - return (self._op in ['+', '*', '^', '&', '|']) - - def is_commutative(self): - "Return True iff current operation is commutative" - return (self._op in ['+', '*', '^', '&', '|']) - - @visit_chk - def visit(self, callback, test_visit=None): - args = [arg.visit(callback, test_visit) for arg in self._args] - modified = any([arg[0] != arg[1] for arg in zip(self._args, args)]) - if modified: - return ExprOp(self._op, *args) - return self - - def copy(self): - args = [arg.copy() for arg in self._args] - return ExprOp(self._op, *args) - - def depth(self): - depth = [arg.depth() for arg in self._args] - return max(depth) + 1 - - def graph_recursive(self, graph): - graph.add_node(self) - for arg in self._args: - arg.graph_recursive(graph) - graph.add_uniq_edge(self, arg) - - def is_op(self, op=None): - if op is None: - return True - return self.op == op - - def is_op_segm(self): - """Returns True if is ExprOp and op == 'segm'""" - return self.is_op('segm') - -class ExprSlice(Expr): - - __slots__ = Expr.__slots__ + ["_arg", "_start", "_stop"] - - def __init__(self, arg, start, stop): - - # arg must be Expr - assert isinstance(arg, Expr) - assert isinstance(start, int_types) - assert isinstance(stop, int_types) - assert start < stop - - self._arg, self._start, self._stop = arg, start, stop - super(ExprSlice, self).__init__(self._stop - self._start) - - arg = property(lambda self: self._arg) - start = property(lambda self: self._start) - stop = property(lambda self: self._stop) - - def __reduce__(self): - state = self._arg, self._start, self._stop - return self.__class__, state - - def __new__(cls, arg, start, stop): - return Expr.get_object(cls, (arg, start, stop)) - - def __str__(self): - return "%s[%d:%d]" % (str_protected_child(self._arg, self), self._start, self._stop) - - def get_r(self, mem_read=False, cst_read=False): - return self._arg.get_r(mem_read, cst_read) - - def get_w(self): - return self._arg.get_w() - - def _exprhash(self): - return hash((EXPRSLICE, hash(self._arg), self._start, self._stop)) - - def _exprrepr(self): - return "%s(%r, %d, %d)" % (self.__class__.__name__, self._arg, - self._start, self._stop) - - def __contains__(self, expr): - if self == expr: - return True - return self._arg.__contains__(expr) - - @visit_chk - def visit(self, callback, test_visit=None): - arg = self._arg.visit(callback, test_visit) - if arg == self._arg: - return self - return ExprSlice(arg, self._start, self._stop) - - def copy(self): - return ExprSlice(self._arg.copy(), self._start, self._stop) - - def depth(self): - return self._arg.depth() + 1 - - def slice_rest(self): - "Return the completion of the current slice" - size = self._arg.size - if self._start >= size or self._stop > size: - raise ValueError('bad slice rest %s %s %s' % - (size, self._start, self._stop)) - - if self._start == self._stop: - return [(0, size)] - - rest = [] - if self._start != 0: - rest.append((0, self._start)) - if self._stop < size: - rest.append((self._stop, size)) - - return rest - - def graph_recursive(self, graph): - graph.add_node(self) - self._arg.graph_recursive(graph) - graph.add_uniq_edge(self, self._arg) - - def is_slice(self, start=None, stop=None): - if start is not None and self._start != start: - return False - if stop is not None and self._stop != stop: - return False - return True - - -class ExprCompose(Expr): - - """ - Compose is like a hambuger. It concatenate Expressions - """ - - __slots__ = Expr.__slots__ + ["_args"] - - def __init__(self, *args): - """Create an ExprCompose - The ExprCompose is contiguous and starts at 0 - @args: [Expr, Expr, ...] - DEPRECATED: - @args: [(Expr, int, int), (Expr, int, int), ...] - """ - - # args must be Expr - assert all(isinstance(arg, Expr) for arg in args) - - assert isinstance(args, tuple) - self._args = args - super(ExprCompose, self).__init__(sum(arg.size for arg in args)) - - args = property(lambda self: self._args) - - def __reduce__(self): - state = self._args - return self.__class__, state - - def __new__(cls, *args): - return Expr.get_object(cls, args) - - def __str__(self): - return '{' + ', '.join(["%s %s %s" % (arg, idx, idx + arg.size) for idx, arg in self.iter_args()]) + '}' - - def get_r(self, mem_read=False, cst_read=False): - return reduce(lambda elements, arg: - elements.union(arg.get_r(mem_read, cst_read)), self._args, set()) - - def get_w(self): - return reduce(lambda elements, arg: - elements.union(arg.get_w()), self._args, set()) - - def _exprhash(self): - h_args = [EXPRCOMPOSE] + [hash(arg) for arg in self._args] - return hash(tuple(h_args)) - - def _exprrepr(self): - return "%s%r" % (self.__class__.__name__, self._args) - - def __contains__(self, expr): - if self == expr: - return True - for arg in self._args: - if arg == expr: - return True - if arg.__contains__(expr): - return True - return False - - @visit_chk - def visit(self, callback, test_visit=None): - args = [arg.visit(callback, test_visit) for arg in self._args] - modified = any([arg != arg_new for arg, arg_new in zip(self._args, args)]) - if modified: - return ExprCompose(*args) - return self - - def copy(self): - args = [arg.copy() for arg in self._args] - return ExprCompose(*args) - - def depth(self): - depth = [arg.depth() for arg in self._args] - return max(depth) + 1 - - def graph_recursive(self, graph): - graph.add_node(self) - for arg in self.args: - arg.graph_recursive(graph) - graph.add_uniq_edge(self, arg) - - def iter_args(self): - index = 0 - for arg in self._args: - yield index, arg - index += arg.size - - def is_compose(self): - return True - -# Expression order for comparison -EXPR_ORDER_DICT = { - ExprId: 1, - ExprLoc: 2, - ExprCond: 3, - ExprMem: 4, - ExprOp: 5, - ExprSlice: 6, - ExprCompose: 7, - ExprInt: 8, -} - - -def compare_exprs_compose(expr1, expr2): - # Sort by start bit address, then expr, then stop bit address - ret = cmp_elts(expr1[1], expr2[1]) - if ret: - return ret - ret = compare_exprs(expr1[0], expr2[0]) - if ret: - return ret - ret = cmp_elts(expr1[2], expr2[2]) - return ret - - -def compare_expr_list_compose(l1_e, l2_e): - # Sort by list elements in incremental order, then by list size - for i in range(min(len(l1_e), len(l2_e))): - ret = compare_exprs(l1_e[i], l2_e[i]) - if ret: - return ret - return cmp_elts(len(l1_e), len(l2_e)) - - -def compare_expr_list(l1_e, l2_e): - # Sort by list elements in incremental order, then by list size - for i in range(min(len(l1_e), len(l2_e))): - ret = compare_exprs(l1_e[i], l2_e[i]) - if ret: - return ret - return cmp_elts(len(l1_e), len(l2_e)) - - -def compare_exprs(expr1, expr2): - """Compare 2 expressions for canonization - @expr1: Expr - @expr2: Expr - 0 => == - 1 => expr1 > expr2 - -1 => expr1 < expr2 - """ - cls1 = expr1.__class__ - cls2 = expr2.__class__ - if cls1 != cls2: - return cmp_elts(EXPR_ORDER_DICT[cls1], EXPR_ORDER_DICT[cls2]) - if expr1 == expr2: - return 0 - if cls1 == ExprInt: - ret = cmp_elts(expr1.size, expr2.size) - if ret != 0: - return ret - return cmp_elts(expr1.arg, expr2.arg) - elif cls1 == ExprId: - name1 = force_bytes(expr1.name) - name2 = force_bytes(expr2.name) - ret = cmp_elts(name1, name2) - if ret: - return ret - return cmp_elts(expr1.size, expr2.size) - elif cls1 == ExprLoc: - ret = cmp_elts(expr1.loc_key, expr2.loc_key) - if ret: - return ret - return cmp_elts(expr1.size, expr2.size) - elif cls1 == ExprAssign: - raise NotImplementedError( - "Comparison from an ExprAssign not yet implemented" - ) - elif cls2 == ExprCond: - ret = compare_exprs(expr1.cond, expr2.cond) - if ret: - return ret - ret = compare_exprs(expr1.src1, expr2.src1) - if ret: - return ret - ret = compare_exprs(expr1.src2, expr2.src2) - return ret - elif cls1 == ExprMem: - ret = compare_exprs(expr1.ptr, expr2.ptr) - if ret: - return ret - return cmp_elts(expr1.size, expr2.size) - elif cls1 == ExprOp: - if expr1.op != expr2.op: - return cmp_elts(expr1.op, expr2.op) - return compare_expr_list(expr1.args, expr2.args) - elif cls1 == ExprSlice: - ret = compare_exprs(expr1.arg, expr2.arg) - if ret: - return ret - ret = cmp_elts(expr1.start, expr2.start) - if ret: - return ret - ret = cmp_elts(expr1.stop, expr2.stop) - return ret - elif cls1 == ExprCompose: - return compare_expr_list_compose(expr1.args, expr2.args) - raise NotImplementedError( - "Comparison between %r %r not implemented" % (expr1, expr2) - ) - - -def canonize_expr_list(expr_list): - return sorted(expr_list, key=cmp_to_key(compare_exprs)) - - -def canonize_expr_list_compose(expr_list): - return sorted(expr_list, key=cmp_to_key(compare_exprs_compose)) - -# Generate ExprInt with common size - - -def ExprInt1(i): - warnings.warn('DEPRECATION WARNING: use ExprInt(i, 1) instead of '\ - 'ExprInt1(i))') - return ExprInt(i, 1) - - -def ExprInt8(i): - warnings.warn('DEPRECATION WARNING: use ExprInt(i, 8) instead of '\ - 'ExprInt8(i))') - return ExprInt(i, 8) - - -def ExprInt16(i): - warnings.warn('DEPRECATION WARNING: use ExprInt(i, 16) instead of '\ - 'ExprInt16(i))') - return ExprInt(i, 16) - - -def ExprInt32(i): - warnings.warn('DEPRECATION WARNING: use ExprInt(i, 32) instead of '\ - 'ExprInt32(i))') - return ExprInt(i, 32) - - -def ExprInt64(i): - warnings.warn('DEPRECATION WARNING: use ExprInt(i, 64) instead of '\ - 'ExprInt64(i))') - return ExprInt(i, 64) - - -def ExprInt_from(expr, i): - "Generate ExprInt with size equal to expression" - warnings.warn('DEPRECATION WARNING: use ExprInt(i, expr.size) instead of'\ - 'ExprInt_from(expr, i))') - return ExprInt(i, expr.size) - - -def get_expr_ids_visit(expr, ids): - """Visitor to retrieve ExprId in @expr - @expr: Expr""" - if expr.is_id(): - ids.add(expr) - return expr - - -def get_expr_locs_visit(expr, locs): - """Visitor to retrieve ExprLoc in @expr - @expr: Expr""" - if expr.is_loc(): - locs.add(expr) - return expr - - -def get_expr_ids(expr): - """Retrieve ExprId in @expr - @expr: Expr""" - ids = set() - expr.visit(lambda x: get_expr_ids_visit(x, ids)) - return ids - - -def get_expr_locs(expr): - """Retrieve ExprLoc in @expr - @expr: Expr""" - locs = set() - expr.visit(lambda x: get_expr_locs_visit(x, locs)) - return locs - - -def test_set(expr, pattern, tks, result): - """Test if v can correspond to e. If so, update the context in result. - Otherwise, return False - @expr : Expr to match - @pattern : pattern Expr - @tks : list of ExprId, available jokers - @result : dictionary of ExprId -> Expr, current context - """ - - if not pattern in tks: - return expr == pattern - if pattern in result and result[pattern] != expr: - return False - result[pattern] = expr - return result - - -def match_expr(expr, pattern, tks, result=None): - """Try to match the @pattern expression with the pattern @expr with @tks jokers. - Result is output dictionary with matching joker values. - @expr : Expr pattern - @pattern : Targeted Expr to match - @tks : list of ExprId, available jokers - @result : dictionary of ExprId -> Expr, output matching context - """ - - if result is None: - result = {} - - if pattern in tks: - # pattern is a Joker - return test_set(expr, pattern, tks, result) - - if expr.is_int(): - return test_set(expr, pattern, tks, result) - - elif expr.is_id(): - return test_set(expr, pattern, tks, result) - - elif expr.is_loc(): - return test_set(expr, pattern, tks, result) - - elif expr.is_op(): - - # expr need to be the same operation than pattern - if not pattern.is_op(): - return False - if expr.op != pattern.op: - return False - if len(expr.args) != len(pattern.args): - return False - - # Perform permutation only if the current operation is commutative - if expr.is_commutative(): - permutations = itertools.permutations(expr.args) - else: - permutations = [expr.args] - - # For each permutations of arguments - for permut in permutations: - good = True - # We need to use a copy of result to not override it - myresult = dict(result) - for sub_expr, sub_pattern in zip(permut, pattern.args): - ret = match_expr(sub_expr, sub_pattern, tks, myresult) - # If the current permutation do not match EVERY terms - if ret is False: - good = False - break - if good is True: - # We found a possibility - for joker, value in viewitems(myresult): - # Updating result in place (to keep pointer in recursion) - result[joker] = value - return result - return False - - # Recursive tests - - elif expr.is_mem(): - if not pattern.is_mem(): - return False - if expr.size != pattern.size: - return False - return match_expr(expr.ptr, pattern.ptr, tks, result) - - elif expr.is_slice(): - if not pattern.is_slice(): - return False - if expr.start != pattern.start or expr.stop != pattern.stop: - return False - return match_expr(expr.arg, pattern.arg, tks, result) - - elif expr.is_cond(): - if not pattern.is_cond(): - return False - if match_expr(expr.cond, pattern.cond, tks, result) is False: - return False - if match_expr(expr.src1, pattern.src1, tks, result) is False: - return False - if match_expr(expr.src2, pattern.src2, tks, result) is False: - return False - return result - - elif expr.is_compose(): - if not pattern.is_compose(): - return False - for sub_expr, sub_pattern in zip(expr.args, pattern.args): - if match_expr(sub_expr, sub_pattern, tks, result) is False: - return False - return result - - elif expr.is_aff(): - if not pattern.is_aff(): - return False - if match_expr(expr.src, pattern.src, tks, result) is False: - return False - if match_expr(expr.dst, pattern.dst, tks, result) is False: - return False - return result - - else: - raise NotImplementedError("match_expr: Unknown type: %s" % type(expr)) - - -def MatchExpr(expr, pattern, tks, result=None): - warnings.warn('DEPRECATION WARNING: use match_expr instead of MatchExpr') - return match_expr(expr, pattern, tks, result) - - -def get_rw(exprs): - o_r = set() - o_w = set() - for expr in exprs: - o_r.update(expr.get_r(mem_read=True)) - for expr in exprs: - o_w.update(expr.get_w()) - return o_r, o_w - - -def get_list_rw(exprs, mem_read=False, cst_read=True): - """Return list of read/write reg/cst/mem for each @exprs - @exprs: list of expressions - @mem_read: walk though memory accesses - @cst_read: retrieve constants - """ - list_rw = [] - # cst_num = 0 - for expr in exprs: - o_r = set() - o_w = set() - # get r/w - o_r.update(expr.get_r(mem_read=mem_read, cst_read=cst_read)) - if isinstance(expr.dst, ExprMem): - o_r.update(expr.dst.arg.get_r(mem_read=mem_read, cst_read=cst_read)) - o_w.update(expr.get_w()) - # each cst is indexed - o_r_rw = set() - for read in o_r: - o_r_rw.add(read) - o_r = o_r_rw - list_rw.append((o_r, o_w)) - - return list_rw - - -def get_expr_ops(expr): - """Retrieve operators of an @expr - @expr: Expr""" - def visit_getops(expr, out=None): - if out is None: - out = set() - if isinstance(expr, ExprOp): - out.add(expr.op) - return expr - ops = set() - expr.visit(lambda x: visit_getops(x, ops)) - return ops - - -def get_expr_mem(expr): - """Retrieve memory accesses of an @expr - @expr: Expr""" - def visit_getmem(expr, out=None): - if out is None: - out = set() - if isinstance(expr, ExprMem): - out.add(expr) - return expr - ops = set() - expr.visit(lambda x: visit_getmem(x, ops)) - return ops - - -def _expr_compute_cf(op1, op2): - """ - Get carry flag of @op1 - @op2 - Ref: x86 cf flag - @op1: Expression - @op2: Expression - """ - res = op1 - op2 - cf = (((op1 ^ op2) ^ res) ^ ((op1 ^ res) & (op1 ^ op2))).msb() - return cf - -def _expr_compute_of(op1, op2): - """ - Get overflow flag of @op1 - @op2 - Ref: x86 of flag - @op1: Expression - @op2: Expression - """ - res = op1 - op2 - of = (((op1 ^ res) & (op1 ^ op2))).msb() - return of - -def _expr_compute_zf(op1, op2): - """ - Get zero flag of @op1 - @op2 - @op1: Expression - @op2: Expression - """ - res = op1 - op2 - zf = ExprCond(res, - ExprInt(0, 1), - ExprInt(1, 1)) - return zf - - -def _expr_compute_nf(op1, op2): - """ - Get negative (or sign) flag of @op1 - @op2 - @op1: Expression - @op2: Expression - """ - res = op1 - op2 - nf = res.msb() - return nf - - -def expr_is_equal(op1, op2): - """ - if op1 == op2: - Return ExprInt(1, 1) - else: - Return ExprInt(0, 1) - """ - - zf = _expr_compute_zf(op1, op2) - return zf - - -def expr_is_not_equal(op1, op2): - """ - if op1 != op2: - Return ExprInt(1, 1) - else: - Return ExprInt(0, 1) - """ - - zf = _expr_compute_zf(op1, op2) - return ~zf - - -def expr_is_unsigned_greater(op1, op2): - """ - UNSIGNED cmp - if op1 > op2: - Return ExprInt(1, 1) - else: - Return ExprInt(0, 1) - """ - - cf = _expr_compute_cf(op1, op2) - zf = _expr_compute_zf(op1, op2) - return ~(cf | zf) - - -def expr_is_unsigned_greater_or_equal(op1, op2): - """ - Unsigned cmp - if op1 >= op2: - Return ExprInt(1, 1) - else: - Return ExprInt(0, 1) - """ - - cf = _expr_compute_cf(op1, op2) - return ~cf - - -def expr_is_unsigned_lower(op1, op2): - """ - Unsigned cmp - if op1 < op2: - Return ExprInt(1, 1) - else: - Return ExprInt(0, 1) - """ - - cf = _expr_compute_cf(op1, op2) - return cf - - -def expr_is_unsigned_lower_or_equal(op1, op2): - """ - Unsigned cmp - if op1 <= op2: - Return ExprInt(1, 1) - else: - Return ExprInt(0, 1) - """ - - cf = _expr_compute_cf(op1, op2) - zf = _expr_compute_zf(op1, op2) - return cf | zf - - -def expr_is_signed_greater(op1, op2): - """ - Signed cmp - if op1 > op2: - Return ExprInt(1, 1) - else: - Return ExprInt(0, 1) - """ - - nf = _expr_compute_nf(op1, op2) - of = _expr_compute_of(op1, op2) - zf = _expr_compute_zf(op1, op2) - return ~(zf | (nf ^ of)) - - -def expr_is_signed_greater_or_equal(op1, op2): - """ - Signed cmp - if op1 > op2: - Return ExprInt(1, 1) - else: - Return ExprInt(0, 1) - """ - - nf = _expr_compute_nf(op1, op2) - of = _expr_compute_of(op1, op2) - return ~(nf ^ of) - - -def expr_is_signed_lower(op1, op2): - """ - Signed cmp - if op1 < op2: - Return ExprInt(1, 1) - else: - Return ExprInt(0, 1) - """ - - nf = _expr_compute_nf(op1, op2) - of = _expr_compute_of(op1, op2) - return nf ^ of - - -def expr_is_signed_lower_or_equal(op1, op2): - """ - Signed cmp - if op1 <= op2: - Return ExprInt(1, 1) - else: - Return ExprInt(0, 1) - """ - - nf = _expr_compute_nf(op1, op2) - of = _expr_compute_of(op1, op2) - zf = _expr_compute_zf(op1, op2) - return zf | (nf ^ of) - -# sign bit | exponent | significand -size_to_IEEE754_info = { - 16: { - "exponent": 5, - "significand": 10, - }, - 32: { - "exponent": 8, - "significand": 23, - }, - 64: { - "exponent": 11, - "significand": 52, - }, -} - -def expr_is_NaN(expr): - """Return 1 or 0 on 1 bit if expr represent a NaN value according to IEEE754 - """ - info = size_to_IEEE754_info[expr.size] - exponent = expr[info["significand"]: info["significand"] + info["exponent"]] - - # exponent is full of 1s and significand is not NULL - return ExprCond(exponent - ExprInt(-1, exponent.size), - ExprInt(0, 1), - ExprCond(expr[:info["significand"]], ExprInt(1, 1), - ExprInt(0, 1))) - - -def expr_is_infinite(expr): - """Return 1 or 0 on 1 bit if expr represent an infinite value according to - IEEE754 - """ - info = size_to_IEEE754_info[expr.size] - exponent = expr[info["significand"]: info["significand"] + info["exponent"]] - - # exponent is full of 1s and significand is NULL - return ExprCond(exponent - ExprInt(-1, exponent.size), - ExprInt(0, 1), - ExprCond(expr[:info["significand"]], ExprInt(0, 1), - ExprInt(1, 1))) - - -def expr_is_IEEE754_zero(expr): - """Return 1 or 0 on 1 bit if expr represent a zero value according to - IEEE754 - """ - # Sign is the msb - expr_no_sign = expr[:expr.size - 1] - return ExprCond(expr_no_sign, ExprInt(0, 1), ExprInt(1, 1)) - - -def expr_is_IEEE754_denormal(expr): - """Return 1 or 0 on 1 bit if expr represent a denormalized value according - to IEEE754 - """ - info = size_to_IEEE754_info[expr.size] - exponent = expr[info["significand"]: info["significand"] + info["exponent"]] - # exponent is full of 0s - return ExprCond(exponent, ExprInt(0, 1), ExprInt(1, 1)) - - -def expr_is_qNaN(expr): - """Return 1 or 0 on 1 bit if expr represent a qNaN (quiet) value according to - IEEE754 - """ - info = size_to_IEEE754_info[expr.size] - significand_top = expr[info["significand"]: info["significand"] + 1] - return expr_is_NaN(expr) & significand_top - - -def expr_is_sNaN(expr): - """Return 1 or 0 on 1 bit if expr represent a sNaN (signalling) value according - to IEEE754 - """ - info = size_to_IEEE754_info[expr.size] - significand_top = expr[info["significand"]: info["significand"] + 1] - return expr_is_NaN(expr) & ~significand_top - - -def expr_is_float_lower(op1, op2): - """Return 1 on 1 bit if @op1 < @op2, 0 otherwise. - /!\ Assume @op1 and @op2 are not NaN - Comparison is the floating point one, defined in IEEE754 - """ - sign1, sign2 = op1.msb(), op2.msb() - magn1, magn2 = op1[:-1], op2[:-1] - return ExprCond(sign1 ^ sign2, - # Sign different, only the sign matters - sign1, # sign1 ? op1 < op2 : op1 >= op2 - # Sign equals, the result is inversed for negatives - sign1 ^ (expr_is_unsigned_lower(magn1, magn2))) - - -def expr_is_float_equal(op1, op2): - """Return 1 on 1 bit if @op1 == @op2, 0 otherwise. - /!\ Assume @op1 and @op2 are not NaN - Comparison is the floating point one, defined in IEEE754 - """ - sign1, sign2 = op1.msb(), op2.msb() - magn1, magn2 = op1[:-1], op2[:-1] - return ExprCond(magn1 ^ magn2, - ExprInt(0, 1), - ExprCond(magn1, - # magn1 == magn2, are the signal equals? - ~(sign1 ^ sign2), - # Special case: -0.0 == +0.0 - ExprInt(1, 1)) - ) diff --git a/miasm2/expression/expression_helper.py b/miasm2/expression/expression_helper.py deleted file mode 100644 index a50e0d5b..00000000 --- a/miasm2/expression/expression_helper.py +++ /dev/null @@ -1,628 +0,0 @@ -# -# Copyright (C) 2011 EADS France, Fabrice Desclaux -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# - -# Expressions manipulation functions -from builtins import range -import itertools -import collections -import random -import string -import warnings - -from future.utils import viewitems, viewvalues - -import miasm2.expression.expression as m2_expr - - -def parity(a): - tmp = (a) & 0xFF - cpt = 1 - while tmp != 0: - cpt ^= tmp & 1 - tmp >>= 1 - return cpt - - -def merge_sliceto_slice(expr): - """ - Apply basic factorisation on ExprCompose sub components - @expr: ExprCompose - """ - - out_args = [] - last_index = 0 - for index, arg in expr.iter_args(): - # Init - if len(out_args) == 0: - out_args.append(arg) - continue - - last_value = out_args[-1] - # Consecutive - - if last_index + last_value.size == index: - # Merge consecutive integers - if (isinstance(arg, m2_expr.ExprInt) and - isinstance(last_value, m2_expr.ExprInt)): - new_size = last_value.size + arg.size - value = int(arg) << last_value.size - value |= int(last_value) - out_args[-1] = m2_expr.ExprInt(value, size=new_size) - continue - - # Merge consecuvite slice - elif (isinstance(arg, m2_expr.ExprSlice) and - isinstance(last_value, m2_expr.ExprSlice)): - value = arg.arg - if (last_value.arg == value and - last_value.stop == arg.start): - out_args[-1] = value[last_value.start:arg.stop] - continue - - # Unmergeable - last_index = index - out_args.append(arg) - - return out_args - - -op_propag_cst = ['+', '*', '^', '&', '|', '>>', - '<<', "a>>", ">>>", "<<<", - "/", "%", 'sdiv', 'smod', 'umod', 'udiv','**'] - - -def is_pure_int(e): - """ - return True if expr is only composed with integers - /!\ ExprCond returns True is src1 and src2 are integers - """ - def modify_cond(e): - if isinstance(e, m2_expr.ExprCond): - return e.src1 | e.src2 - return e - - def find_int(e, s): - if isinstance(e, m2_expr.ExprId) or isinstance(e, m2_expr.ExprMem): - s.add(e) - return e - s = set() - new_e = e.visit(modify_cond) - new_e.visit(lambda x: find_int(x, s)) - if s: - return False - return True - - -def is_int_or_cond_src_int(e): - if isinstance(e, m2_expr.ExprInt): - return True - if isinstance(e, m2_expr.ExprCond): - return (isinstance(e.src1, m2_expr.ExprInt) and - isinstance(e.src2, m2_expr.ExprInt)) - return False - - -def fast_unify(seq, idfun=None): - # order preserving unifying list function - if idfun is None: - idfun = lambda x: x - seen = {} - result = [] - for item in seq: - marker = idfun(item) - - if marker in seen: - continue - seen[marker] = 1 - result.append(item) - return result - -def get_missing_interval(all_intervals, i_min=0, i_max=32): - """Return a list of missing interval in all_interval - @all_interval: list of (int, int) - @i_min: int, minimal missing interval bound - @i_max: int, maximal missing interval bound""" - - my_intervals = all_intervals[:] - my_intervals.sort() - my_intervals.append((i_max, i_max)) - - missing_i = [] - last_pos = i_min - for start, stop in my_intervals: - if last_pos != start: - missing_i.append((last_pos, start)) - last_pos = stop - return missing_i - - -class Variables_Identifier(object): - """Identify variables in an expression. - Returns: - - variables with their corresponding values - - original expression with variables translated - """ - - def __init__(self, expr, var_prefix="v"): - """Set the expression @expr to handle and launch variable identification - process - @expr: Expr instance - @var_prefix: (optional) prefix of the variable name, default is 'v'""" - - # Init - self.var_indice = itertools.count() - self.var_asked = set() - self._vars = {} # VarID -> Expr - self.var_prefix = var_prefix - - # Launch recurrence - self.find_variables_rec(expr) - - # Compute inter-variable dependencies - has_change = True - while has_change: - has_change = False - for var_id, var_value in list(viewitems(self._vars)): - cur = var_value - - # Do not replace with itself - to_replace = { - v_val:v_id - for v_id, v_val in viewitems(self._vars) - if v_id != var_id - } - var_value = var_value.replace_expr(to_replace) - - if cur != var_value: - # Force @self._vars update - has_change = True - self._vars[var_id] = var_value - break - - # Replace in the original equation - self._equation = expr.replace_expr( - { - v_val: v_id for v_id, v_val - in viewitems(self._vars) - } - ) - - # Compute variables dependencies - self._vars_ordered = collections.OrderedDict() - todo = set(self._vars) - needs = {} - - ## Build initial needs - for var_id, var_expr in viewitems(self._vars): - ### Handle corner cases while using Variable Identifier on an - ### already computed equation - needs[var_id] = [ - var_name - for var_name in var_expr.get_r(mem_read=True) - if self.is_var_identifier(var_name) and \ - var_name in todo and \ - var_name != var_id - ] - - ## Build order list - while todo: - done = set() - for var_id in todo: - all_met = True - for need in needs[var_id]: - if need not in self._vars_ordered: - # A dependency is not met - all_met = False - break - if not all_met: - continue - - # All dependencies are already met, add current - self._vars_ordered[var_id] = self._vars[var_id] - done.add(var_id) - - # Update the todo list - for element_done in done: - todo.remove(element_done) - - def is_var_identifier(self, expr): - "Return True iff @expr is a variable identifier" - if not isinstance(expr, m2_expr.ExprId): - return False - return expr in self._vars - - def find_variables_rec(self, expr): - """Recursive method called by find_variable to expand @expr. - Set @var_names and @var_values. - This implementation is faster than an expression visitor because - we do not rebuild each expression. - """ - - if (expr in self.var_asked): - # Expr has already been asked - if expr not in viewvalues(self._vars): - # Create var - identifier = m2_expr.ExprId( - "%s%s" % ( - self.var_prefix, - next(self.var_indice) - ), - size = expr.size - ) - self._vars[identifier] = expr - - # Recursion stop case - return - else: - # First time for @expr - self.var_asked.add(expr) - - if isinstance(expr, m2_expr.ExprOp): - for a in expr.args: - self.find_variables_rec(a) - - elif isinstance(expr, m2_expr.ExprInt): - pass - - elif isinstance(expr, m2_expr.ExprId): - pass - - elif isinstance(expr, m2_expr.ExprLoc): - pass - - elif isinstance(expr, m2_expr.ExprMem): - self.find_variables_rec(expr.ptr) - - elif isinstance(expr, m2_expr.ExprCompose): - for arg in expr.args: - self.find_variables_rec(arg) - - elif isinstance(expr, m2_expr.ExprSlice): - self.find_variables_rec(expr.arg) - - elif isinstance(expr, m2_expr.ExprCond): - self.find_variables_rec(expr.cond) - self.find_variables_rec(expr.src1) - self.find_variables_rec(expr.src2) - - else: - raise NotImplementedError("Type not handled: %s" % expr) - - @property - def vars(self): - return self._vars_ordered - - @property - def equation(self): - return self._equation - - def __str__(self): - "Display variables and final equation" - out = "" - for var_id, var_expr in viewitems(self.vars): - out += "%s = %s\n" % (var_id, var_expr) - out += "Final: %s" % self.equation - return out - - -class ExprRandom(object): - """Return an expression randomly generated""" - - # Identifiers length - identifier_len = 5 - # Identifiers' name charset - identifier_charset = string.ascii_letters - # Number max value - number_max = 0xFFFFFFFF - # Available operations - operations_by_args_number = {1: ["-"], - 2: ["<<", "<<<", ">>", ">>>"], - "2+": ["+", "*", "&", "|", "^"], - } - # Maximum number of argument for operations - operations_max_args_number = 5 - # If set, output expression is a perfect tree - perfect_tree = True - # Max argument size in slice, relative to slice size - slice_add_size = 10 - # Maximum number of layer in compose - compose_max_layer = 5 - # Maximum size of memory address in bits - memory_max_address_size = 32 - # Re-use already generated elements to mimic a more realistic behavior - reuse_element = True - generated_elements = {} # (depth, size) -> [Expr] - - @classmethod - def identifier(cls, size=32): - """Return a random identifier - @size: (optional) identifier size - """ - return m2_expr.ExprId("".join([random.choice(cls.identifier_charset) - for _ in range(cls.identifier_len)]), - size=size) - - @classmethod - def number(cls, size=32): - """Return a random number - @size: (optional) number max bits - """ - num = random.randint(0, cls.number_max % (2**size)) - return m2_expr.ExprInt(num, size) - - @classmethod - def atomic(cls, size=32): - """Return an atomic Expression - @size: (optional) Expr size - """ - available_funcs = [cls.identifier, cls.number] - return random.choice(available_funcs)(size=size) - - @classmethod - def operation(cls, size=32, depth=1): - """Return an ExprOp - @size: (optional) Operation size - @depth: (optional) Expression depth - """ - operand_type = random.choice(list(cls.operations_by_args_number)) - if isinstance(operand_type, str) and "+" in operand_type: - number_args = random.randint( - int(operand_type[:-1]), - cls.operations_max_args_number - ) - else: - number_args = operand_type - - args = [cls._gen(size=size, depth=depth - 1) - for _ in range(number_args)] - operand = random.choice(cls.operations_by_args_number[operand_type]) - return m2_expr.ExprOp(operand, - *args) - - @classmethod - def slice(cls, size=32, depth=1): - """Return an ExprSlice - @size: (optional) Operation size - @depth: (optional) Expression depth - """ - start = random.randint(0, size) - stop = start + size - return cls._gen(size=random.randint(stop, stop + cls.slice_add_size), - depth=depth - 1)[start:stop] - - @classmethod - def compose(cls, size=32, depth=1): - """Return an ExprCompose - @size: (optional) Operation size - @depth: (optional) Expression depth - """ - # First layer - upper_bound = random.randint(1, size) - args = [cls._gen(size=upper_bound, depth=depth - 1)] - - # Next layers - while (upper_bound < size): - if len(args) == (cls.compose_max_layer - 1): - # We reach the maximum size - new_upper_bound = size - else: - new_upper_bound = random.randint(upper_bound + 1, size) - - args.append(cls._gen(size=new_upper_bound - upper_bound)) - upper_bound = new_upper_bound - return m2_expr.ExprCompose(*args) - - @classmethod - def memory(cls, size=32, depth=1): - """Return an ExprMem - @size: (optional) Operation size - @depth: (optional) Expression depth - """ - - address_size = random.randint(1, cls.memory_max_address_size) - return m2_expr.ExprMem(cls._gen(size=address_size, - depth=depth - 1), - size=size) - - @classmethod - def _gen(cls, size=32, depth=1): - """Internal function for generating sub-expression according to options - @size: (optional) Operation size - @depth: (optional) Expression depth - /!\ @generated_elements is left modified - """ - # Perfect tree handling - if not cls.perfect_tree: - depth = random.randint(max(0, depth - 2), depth) - - # Element re-use - if cls.reuse_element and random.choice([True, False]) and \ - (depth, size) in cls.generated_elements: - return random.choice(cls.generated_elements[(depth, size)]) - - # Recursion stop - if depth == 0: - return cls.atomic(size=size) - - # Build a more complex expression - available_funcs = [cls.operation, cls.slice, cls.compose, cls.memory] - gen = random.choice(available_funcs)(size=size, depth=depth) - - # Save it - new_value = cls.generated_elements.get((depth, size), []) + [gen] - cls.generated_elements[(depth, size)] = new_value - return gen - - @classmethod - def get(cls, size=32, depth=1, clean=True): - """Return a randomly generated expression - @size: (optional) Operation size - @depth: (optional) Expression depth - @clean: (optional) Clean expression cache between two calls - """ - # Init state - if clean: - cls.generated_elements = {} - - # Get an element - got = cls._gen(size=size, depth=depth) - - # Clear state - if clean: - cls.generated_elements = {} - - return got - -def expr_cmpu(arg1, arg2): - """ - Returns a one bit long Expression: - * 1 if @arg1 is strictly greater than @arg2 (unsigned) - * 0 otherwise. - """ - warnings.warn('DEPRECATION WARNING: use "expr_is_unsigned_greater" instead"') - return m2_expr.expr_is_unsigned_greater(arg1, arg2) - -def expr_cmps(arg1, arg2): - """ - Returns a one bit long Expression: - * 1 if @arg1 is strictly greater than @arg2 (signed) - * 0 otherwise. - """ - warnings.warn('DEPRECATION WARNING: use "expr_is_signed_greater" instead"') - return m2_expr.expr_is_signed_greater(arg1, arg2) - - -class CondConstraint(object): - - """Stand for a constraint on an Expr""" - - # str of the associated operator - operator = "" - - def __init__(self, expr): - self.expr = expr - - def __repr__(self): - return "<%s %s 0>" % (self.expr, self.operator) - - def to_constraint(self): - """Transform itself into a constraint using Expr""" - raise NotImplementedError("Abstract method") - - -class CondConstraintZero(CondConstraint): - - """Stand for a constraint like 'A == 0'""" - operator = m2_expr.TOK_EQUAL - - def to_constraint(self): - return m2_expr.ExprAssign(self.expr, m2_expr.ExprInt(0, self.expr.size)) - - -class CondConstraintNotZero(CondConstraint): - - """Stand for a constraint like 'A != 0'""" - operator = "!=" - - def to_constraint(self): - cst1, cst2 = m2_expr.ExprInt(0, 1), m2_expr.ExprInt(1, 1) - return m2_expr.ExprAssign(cst1, m2_expr.ExprCond(self.expr, cst1, cst2)) - - -ConstrainedValue = collections.namedtuple("ConstrainedValue", - ["constraints", "value"]) - - -class ConstrainedValues(set): - - """Set of ConstrainedValue""" - - def __str__(self): - out = [] - for sol in self: - out.append("%s with constraints:" % sol.value) - for constraint in sol.constraints: - out.append("\t%s" % constraint) - return "\n".join(out) - - -def possible_values(expr): - """Return possible values for expression @expr, associated with their - condition constraint as a ConstrainedValues instance - @expr: Expr instance - """ - - consvals = ConstrainedValues() - - # Terminal expression - if (isinstance(expr, m2_expr.ExprInt) or - isinstance(expr, m2_expr.ExprId) or - isinstance(expr, m2_expr.ExprLoc)): - consvals.add(ConstrainedValue(frozenset(), expr)) - # Unary expression - elif isinstance(expr, m2_expr.ExprSlice): - consvals.update(ConstrainedValue(consval.constraints, - consval.value[expr.start:expr.stop]) - for consval in possible_values(expr.arg)) - elif isinstance(expr, m2_expr.ExprMem): - consvals.update(ConstrainedValue(consval.constraints, - m2_expr.ExprMem(consval.value, - expr.size)) - for consval in possible_values(expr.ptr)) - elif isinstance(expr, m2_expr.ExprAssign): - consvals.update(possible_values(expr.src)) - # Special case: constraint insertion - elif isinstance(expr, m2_expr.ExprCond): - src1cond = CondConstraintNotZero(expr.cond) - src2cond = CondConstraintZero(expr.cond) - consvals.update(ConstrainedValue(consval.constraints.union([src1cond]), - consval.value) - for consval in possible_values(expr.src1)) - consvals.update(ConstrainedValue(consval.constraints.union([src2cond]), - consval.value) - for consval in possible_values(expr.src2)) - # N-ary expression - elif isinstance(expr, m2_expr.ExprOp): - # For details, see ExprCompose - consvals_args = [possible_values(arg) for arg in expr.args] - for consvals_possibility in itertools.product(*consvals_args): - args_value = [consval.value for consval in consvals_possibility] - args_constraint = itertools.chain(*[consval.constraints - for consval in consvals_possibility]) - consvals.add(ConstrainedValue(frozenset(args_constraint), - m2_expr.ExprOp(expr.op, *args_value))) - elif isinstance(expr, m2_expr.ExprCompose): - # Generate each possibility for sub-argument, associated with the start - # and stop bit - consvals_args = [ - list(possible_values(arg)) - for arg in expr.args - ] - for consvals_possibility in itertools.product(*consvals_args): - # Merge constraint of each sub-element - args_constraint = itertools.chain(*[consval.constraints - for consval in consvals_possibility]) - # Gen the corresponding constraints / ExprCompose - args = [consval.value for consval in consvals_possibility] - consvals.add( - ConstrainedValue(frozenset(args_constraint), - m2_expr.ExprCompose(*args))) - else: - raise RuntimeError("Unsupported type for expr: %s" % type(expr)) - - return consvals diff --git a/miasm2/expression/expression_reduce.py b/miasm2/expression/expression_reduce.py deleted file mode 100644 index adad552e..00000000 --- a/miasm2/expression/expression_reduce.py +++ /dev/null @@ -1,280 +0,0 @@ -""" -Expression reducer: -Apply reduction rules to an Expression ast -""" - -import logging -from miasm2.expression.expression import ExprInt, ExprId, ExprLoc, ExprOp, \ - ExprSlice, ExprCompose, ExprMem, ExprCond - -log_reduce = logging.getLogger("expr_reduce") -console_handler = logging.StreamHandler() -console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) -log_reduce.addHandler(console_handler) -log_reduce.setLevel(logging.WARNING) - - - -class ExprNode(object): - """Clone of Expression object with additional information""" - - def __init__(self, expr): - self.expr = expr - - -class ExprNodeInt(ExprNode): - def __init__(self, expr): - assert expr.is_int() - super(ExprNodeInt, self).__init__(expr) - self.arg = None - - def __repr__(self): - if self.info is not None: - out = repr(self.info) - else: - out = str(self.expr) - return out - - -class ExprNodeId(ExprNode): - def __init__(self, expr): - assert expr.is_id() - super(ExprNodeId, self).__init__(expr) - self.arg = None - - def __repr__(self): - if self.info is not None: - out = repr(self.info) - else: - out = str(self.expr) - return out - - -class ExprNodeLoc(ExprNode): - def __init__(self, expr): - assert expr.is_loc() - super(ExprNodeLoc, self).__init__(expr) - self.arg = None - - def __repr__(self): - if self.info is not None: - out = repr(self.info) - else: - out = str(self.expr) - return out - - -class ExprNodeMem(ExprNode): - def __init__(self, expr): - assert expr.is_mem() - super(ExprNodeMem, self).__init__(expr) - self.ptr = None - - def __repr__(self): - if self.info is not None: - out = repr(self.info) - else: - out = "@%d[%r]" % (self.expr.size, self.ptr) - return out - - -class ExprNodeOp(ExprNode): - def __init__(self, expr): - assert expr.is_op() - super(ExprNodeOp, self).__init__(expr) - self.args = None - - def __repr__(self): - if self.info is not None: - out = repr(self.info) - else: - if len(self.args) == 1: - out = "(%s(%r))" % (self.expr.op, self.args[0]) - else: - out = "(%s)" % self.expr.op.join(repr(arg) for arg in self.args) - return out - - -class ExprNodeSlice(ExprNode): - def __init__(self, expr): - assert expr.is_slice() - super(ExprNodeSlice, self).__init__(expr) - self.arg = None - - def __repr__(self): - if self.info is not None: - out = repr(self.info) - else: - out = "%r[%d:%d]" % (self.arg, self.expr.start, self.expr.stop) - return out - - -class ExprNodeCompose(ExprNode): - def __init__(self, expr): - assert expr.is_compose() - super(ExprNodeCompose, self).__init__(expr) - self.args = None - - def __repr__(self): - if self.info is not None: - out = repr(self.info) - else: - out = "{%s}" % ', '.join(repr(arg) for arg in self.args) - return out - - -class ExprNodeCond(ExprNode): - def __init__(self, expr): - assert expr.is_cond() - super(ExprNodeCond, self).__init__(expr) - self.cond = None - self.src1 = None - self.src2 = None - - def __repr__(self): - if self.info is not None: - out = repr(self.info) - else: - out = "(%r?%r:%r)" % (self.cond, self.src1, self.src2) - return out - - -class ExprReducer(object): - """Apply reduction rules to an expr - - reduction_rules: list of ordered reduction rules - - List of function representing reduction rules - Function API: - reduction_xxx(self, node, lvl=0) - with: - * node: the ExprNode to qualify - * lvl: [optional] the recursion level - Returns: - * None if the reduction rule is not applied - * the resulting information to store in the ExprNode.info - - allow_none_result: allow missing reduction rules - """ - - reduction_rules = [] - allow_none_result = False - - def expr2node(self, expr): - """Build ExprNode mirror of @expr - - @expr: Expression to analyze - """ - - if isinstance(expr, ExprId): - node = ExprNodeId(expr) - elif isinstance(expr, ExprLoc): - node = ExprNodeLoc(expr) - elif isinstance(expr, ExprInt): - node = ExprNodeInt(expr) - elif isinstance(expr, ExprMem): - son = self.expr2node(expr.ptr) - node = ExprNodeMem(expr) - node.ptr = son - elif isinstance(expr, ExprSlice): - son = self.expr2node(expr.arg) - node = ExprNodeSlice(expr) - node.arg = son - elif isinstance(expr, ExprOp): - sons = [self.expr2node(arg) for arg in expr.args] - node = ExprNodeOp(expr) - node.args = sons - elif isinstance(expr, ExprCompose): - sons = [self.expr2node(arg) for arg in expr.args] - node = ExprNodeCompose(expr) - node.args = sons - elif isinstance(expr, ExprCond): - node = ExprNodeCond(expr) - node.cond = self.expr2node(expr.cond) - node.src1 = self.expr2node(expr.src1) - node.src2 = self.expr2node(expr.src2) - else: - raise TypeError("Unknown Expr Type %r", type(expr)) - return node - - def reduce(self, expr, **kwargs): - """Returns an ExprNode tree mirroring @expr tree. The ExprNode is - computed by applying reduction rules to the expression @expr - - @expr: an Expression - """ - - node = self.expr2node(expr) - return self.categorize(node, lvl=0, **kwargs) - - def categorize(self, node, lvl=0, **kwargs): - """Recursively apply rules to @node - - @node: ExprNode to analyze - @lvl: actual recursion level - """ - - expr = node.expr - log_reduce.debug("\t" * lvl + "Reduce...: %s", node.expr) - if isinstance(expr, ExprId): - node = ExprNodeId(expr) - elif isinstance(expr, ExprInt): - node = ExprNodeInt(expr) - elif isinstance(expr, ExprLoc): - node = ExprNodeLoc(expr) - elif isinstance(expr, ExprMem): - ptr = self.categorize(node.ptr, lvl=lvl + 1, **kwargs) - node = ExprNodeMem(ExprMem(ptr.expr, expr.size)) - node.ptr = ptr - elif isinstance(expr, ExprSlice): - arg = self.categorize(node.arg, lvl=lvl + 1, **kwargs) - node = ExprNodeSlice(ExprSlice(arg.expr, expr.start, expr.stop)) - node.arg = arg - elif isinstance(expr, ExprOp): - new_args = [] - for arg in node.args: - new_a = self.categorize(arg, lvl=lvl + 1, **kwargs) - assert new_a.expr.size == arg.expr.size - new_args.append(new_a) - node = ExprNodeOp(ExprOp(expr.op, *[x.expr for x in new_args])) - node.args = new_args - expr = node.expr - elif isinstance(expr, ExprCompose): - new_args = [] - new_expr_args = [] - for arg in node.args: - arg = self.categorize(arg, lvl=lvl + 1, **kwargs) - new_args.append(arg) - new_expr_args.append(arg.expr) - new_expr = ExprCompose(*new_expr_args) - node = ExprNodeCompose(new_expr) - node.args = new_args - elif isinstance(expr, ExprCond): - cond = self.categorize(node.cond, lvl=lvl + 1, **kwargs) - src1 = self.categorize(node.src1, lvl=lvl + 1, **kwargs) - src2 = self.categorize(node.src2, lvl=lvl + 1, **kwargs) - node = ExprNodeCond(ExprCond(cond.expr, src1.expr, src2.expr)) - node.cond, node.src1, node.src2 = cond, src1, src2 - else: - raise TypeError("Unknown Expr Type %r", type(expr)) - - node.info = self.apply_rules(node, lvl=lvl, **kwargs) - log_reduce.debug("\t" * lvl + "Reduce result: %s %r", - node.expr, node.info) - return node - - def apply_rules(self, node, lvl=0, **kwargs): - """Find and apply reduction rules to @node - - @node: ExprNode to analyse - @lvl: actuel recursion level - """ - - for rule in self.reduction_rules: - ret = rule(self, node, lvl=lvl, **kwargs) - - if ret is not None: - log_reduce.debug("\t" * lvl + "Rule found: %r", rule) - return ret - if not self.allow_none_result: - raise RuntimeError('Missing reduction rule for %r' % node.expr) diff --git a/miasm2/expression/modint.py b/miasm2/expression/modint.py deleted file mode 100644 index 22d17b9b..00000000 --- a/miasm2/expression/modint.py +++ /dev/null @@ -1,259 +0,0 @@ -#-*- coding:utf-8 -*- - -from builtins import range -from functools import total_ordering - -@total_ordering -class moduint(object): - - def __init__(self, arg): - self.arg = int(arg) % self.__class__.limit - assert(self.arg >= 0 and self.arg < self.__class__.limit) - - def __repr__(self): - return self.__class__.__name__ + '(' + hex(self.arg) + ')' - - def __hash__(self): - return hash(self.arg) - - @classmethod - def maxcast(cls, c2): - c2 = c2.__class__ - if cls.size > c2.size: - return cls - else: - return c2 - - def __eq__(self, y): - if isinstance(y, moduint): - return self.arg == y.arg - return self.arg == y - - def __ne__(self, y): - # required Python 2.7.14 - return not self == y - - def __lt__(self, y): - if isinstance(y, moduint): - return self.arg < y.arg - return self.arg < y - - def __add__(self, y): - if isinstance(y, moduint): - cls = self.maxcast(y) - return cls(self.arg + y.arg) - else: - return self.__class__(self.arg + y) - - def __and__(self, y): - if isinstance(y, moduint): - cls = self.maxcast(y) - return cls(self.arg & y.arg) - else: - return self.__class__(self.arg & y) - - def __div__(self, y): - # Python: 8 / -7 == -2 (C-like: -1) - # int(float) trick cannot be used, due to information loss - den = int(y) - num = int(self) - result_sign = 1 if (den * num) >= 0 else -1 - cls = self.__class__ - if isinstance(y, moduint): - cls = self.maxcast(y) - return (abs(num) // abs(den)) * result_sign - - def __floordiv__(self, y): - return self.__div__(y) - - def __int__(self): - return int(self.arg) - - def __long__(self): - return int(self.arg) - - def __index__(self): - return int(self.arg) - - def __invert__(self): - return self.__class__(~self.arg) - - def __lshift__(self, y): - if isinstance(y, moduint): - cls = self.maxcast(y) - return cls(self.arg << y.arg) - else: - return self.__class__(self.arg << y) - - def __mod__(self, y): - # See __div__ for implementation choice - cls = self.__class__ - if isinstance(y, moduint): - cls = self.maxcast(y) - return cls(self.arg - y * (self // y)) - - def __mul__(self, y): - if isinstance(y, moduint): - cls = self.maxcast(y) - return cls(self.arg * y.arg) - else: - return self.__class__(self.arg * y) - - def __neg__(self): - return self.__class__(-self.arg) - - def __or__(self, y): - if isinstance(y, moduint): - cls = self.maxcast(y) - return cls(self.arg | y.arg) - else: - return self.__class__(self.arg | y) - - def __radd__(self, y): - return self.__add__(y) - - def __rand__(self, y): - return self.__and__(y) - - def __rdiv__(self, y): - if isinstance(y, moduint): - cls = self.maxcast(y) - return cls(y.arg // self.arg) - else: - return self.__class__(y // self.arg) - - def __rfloordiv__(self, y): - return self.__rdiv__(y) - - def __rlshift__(self, y): - if isinstance(y, moduint): - cls = self.maxcast(y) - return cls(y.arg << self.arg) - else: - return self.__class__(y << self.arg) - - def __rmod__(self, y): - if isinstance(y, moduint): - cls = self.maxcast(y) - return cls(y.arg % self.arg) - else: - return self.__class__(y % self.arg) - - def __rmul__(self, y): - return self.__mul__(y) - - def __ror__(self, y): - return self.__or__(y) - - def __rrshift__(self, y): - if isinstance(y, moduint): - cls = self.maxcast(y) - return cls(y.arg >> self.arg) - else: - return self.__class__(y >> self.arg) - - def __rshift__(self, y): - if isinstance(y, moduint): - cls = self.maxcast(y) - return cls(self.arg >> y.arg) - else: - return self.__class__(self.arg >> y) - - def __rsub__(self, y): - if isinstance(y, moduint): - cls = self.maxcast(y) - return cls(y.arg - self.arg) - else: - return self.__class__(y - self.arg) - - def __rxor__(self, y): - return self.__xor__(y) - - def __sub__(self, y): - if isinstance(y, moduint): - cls = self.maxcast(y) - return cls(self.arg - y.arg) - else: - return self.__class__(self.arg - y) - - def __xor__(self, y): - if isinstance(y, moduint): - cls = self.maxcast(y) - return cls(self.arg ^ y.arg) - else: - return self.__class__(self.arg ^ y) - - def __hex__(self): - return hex(self.arg) - - def __abs__(self): - return abs(self.arg) - - def __rpow__(self, v): - return v ** self.arg - - def __pow__(self, v): - return self.__class__(self.arg ** v) - - -class modint(moduint): - - def __init__(self, arg): - if isinstance(arg, moduint): - arg = arg.arg - a = arg % self.__class__.limit - if a >= self.__class__.limit // 2: - a -= self.__class__.limit - self.arg = a - assert( - self.arg >= -self.__class__.limit // 2 and - self.arg < self.__class__.limit - ) - - -def is_modint(a): - return isinstance(a, moduint) - - -def size2mask(size): - return (1 << size) - 1 - -mod_size2uint = {} -mod_size2int = {} - -mod_uint2size = {} -mod_int2size = {} - -def define_int(size): - """Build the 'modint' instance corresponding to size @size""" - global mod_size2int, mod_int2size - - name = 'int%d' % size - cls = type(name, (modint,), {"size": size, "limit": 1 << size}) - globals()[name] = cls - mod_size2int[size] = cls - mod_int2size[cls] = size - return cls - -def define_uint(size): - """Build the 'moduint' instance corresponding to size @size""" - global mod_size2uint, mod_uint2size - - name = 'uint%d' % size - cls = type(name, (moduint,), {"size": size, "limit": 1 << size}) - globals()[name] = cls - mod_size2uint[size] = cls - mod_uint2size[cls] = size - return cls - -def define_common_int(): - "Define common int" - common_int = range(1, 257) - - for i in common_int: - define_int(i) - - for i in common_int: - define_uint(i) - -define_common_int() diff --git a/miasm2/expression/parser.py b/miasm2/expression/parser.py deleted file mode 100644 index 71efc849..00000000 --- a/miasm2/expression/parser.py +++ /dev/null @@ -1,84 +0,0 @@ -import pyparsing -from miasm2.expression.expression import ExprInt, ExprId, ExprLoc, ExprSlice, \ - ExprMem, ExprCond, ExprCompose, ExprOp, ExprAssign, LocKey - -integer = pyparsing.Word(pyparsing.nums).setParseAction(lambda t: - int(t[0])) -hex_word = pyparsing.Literal('0x') + pyparsing.Word(pyparsing.hexnums) -hex_int = pyparsing.Combine(hex_word).setParseAction(lambda t: - int(t[0], 16)) - -str_int_pos = (hex_int | integer) -str_int_neg = (pyparsing.Suppress('-') + \ - (hex_int | integer)).setParseAction(lambda t: -t[0]) - -str_int = str_int_pos | str_int_neg - -STR_EXPRINT = pyparsing.Suppress("ExprInt") -STR_EXPRID = pyparsing.Suppress("ExprId") -STR_EXPRLOC = pyparsing.Suppress("ExprLoc") -STR_EXPRSLICE = pyparsing.Suppress("ExprSlice") -STR_EXPRMEM = pyparsing.Suppress("ExprMem") -STR_EXPRCOND = pyparsing.Suppress("ExprCond") -STR_EXPRCOMPOSE = pyparsing.Suppress("ExprCompose") -STR_EXPROP = pyparsing.Suppress("ExprOp") -STR_EXPRASSIGN = pyparsing.Suppress("ExprAssign") - -LOCKEY = pyparsing.Suppress("LocKey") - -STR_COMMA = pyparsing.Suppress(",") -LPARENTHESIS = pyparsing.Suppress("(") -RPARENTHESIS = pyparsing.Suppress(")") - - -T_INF = pyparsing.Suppress("<") -T_SUP = pyparsing.Suppress(">") - - -string_quote = pyparsing.QuotedString(quoteChar="'", escChar='\\', escQuote='\\') -string_dquote = pyparsing.QuotedString(quoteChar='"', escChar='\\', escQuote='\\') - - -string = string_quote | string_dquote - -expr = pyparsing.Forward() - -expr_int = STR_EXPRINT + LPARENTHESIS + str_int + STR_COMMA + str_int + RPARENTHESIS -expr_id = STR_EXPRID + LPARENTHESIS + string + STR_COMMA + str_int + RPARENTHESIS -expr_loc = STR_EXPRLOC + LPARENTHESIS + T_INF + LOCKEY + str_int + T_SUP + STR_COMMA + str_int + RPARENTHESIS -expr_slice = STR_EXPRSLICE + LPARENTHESIS + expr + STR_COMMA + str_int + STR_COMMA + str_int + RPARENTHESIS -expr_mem = STR_EXPRMEM + LPARENTHESIS + expr + STR_COMMA + str_int + RPARENTHESIS -expr_cond = STR_EXPRCOND + LPARENTHESIS + expr + STR_COMMA + expr + STR_COMMA + expr + RPARENTHESIS -expr_compose = STR_EXPRCOMPOSE + LPARENTHESIS + pyparsing.delimitedList(expr, delim=',') + RPARENTHESIS -expr_op = STR_EXPROP + LPARENTHESIS + string + STR_COMMA + pyparsing.delimitedList(expr, delim=',') + RPARENTHESIS -expr_aff = STR_EXPRASSIGN + LPARENTHESIS + expr + STR_COMMA + expr + RPARENTHESIS - -expr << (expr_int | expr_id | expr_loc | expr_slice | expr_mem | expr_cond | \ - expr_compose | expr_op | expr_aff) - -def parse_loc_key(t): - assert len(t) == 2 - loc_key, size = LocKey(t[0]), t[1] - return ExprLoc(loc_key, size) - -expr_int.setParseAction(lambda t: ExprInt(*t)) -expr_id.setParseAction(lambda t: ExprId(*t)) -expr_loc.setParseAction(parse_loc_key) -expr_slice.setParseAction(lambda t: ExprSlice(*t)) -expr_mem.setParseAction(lambda t: ExprMem(*t)) -expr_cond.setParseAction(lambda t: ExprCond(*t)) -expr_compose.setParseAction(lambda t: ExprCompose(*t)) -expr_op.setParseAction(lambda t: ExprOp(*t)) -expr_aff.setParseAction(lambda t: ExprAssign(*t)) - - -def str_to_expr(str_in): - """Parse the @str_in and return the corresponoding Expression - @str_in: repr string of an Expression""" - - try: - value = expr.parseString(str_in) - except: - raise RuntimeError("Cannot parse expression %s" % str_in) - assert len(value) == 1 - return value[0] diff --git a/miasm2/expression/simplifications.py b/miasm2/expression/simplifications.py deleted file mode 100644 index 331018ae..00000000 --- a/miasm2/expression/simplifications.py +++ /dev/null @@ -1,207 +0,0 @@ -# # -# Simplification methods library # -# # - -import logging - -from future.utils import viewitems - -from miasm2.expression import simplifications_common -from miasm2.expression import simplifications_cond -from miasm2.expression import simplifications_explicit -from miasm2.expression.expression_helper import fast_unify -import miasm2.expression.expression as m2_expr - -# Expression Simplifier -# --------------------- - -log_exprsimp = logging.getLogger("exprsimp") -console_handler = logging.StreamHandler() -console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) -log_exprsimp.addHandler(console_handler) -log_exprsimp.setLevel(logging.WARNING) - - -class ExpressionSimplifier(object): - - """Wrapper on expression simplification passes. - - Instance handle passes lists. - - Available passes lists are: - - commons: common passes such as constant folding - - heavy : rare passes (for instance, in case of obfuscation) - """ - - # Common passes - PASS_COMMONS = { - m2_expr.ExprOp: [ - simplifications_common.simp_cst_propagation, - simplifications_common.simp_cond_op_int, - simplifications_common.simp_cond_factor, - simplifications_common.simp_add_multiple, - # CC op - simplifications_common.simp_cc_conds, - simplifications_common.simp_subwc_cf, - simplifications_common.simp_subwc_of, - simplifications_common.simp_sign_subwc_cf, - simplifications_common.simp_double_zeroext, - simplifications_common.simp_double_signext, - simplifications_common.simp_zeroext_eq_cst, - simplifications_common.simp_ext_eq_ext, - - simplifications_common.simp_cmp_int, - simplifications_common.simp_sign_inf_zeroext, - simplifications_common.simp_cmp_int_int, - simplifications_common.simp_ext_cst, - simplifications_common.simp_zeroext_and_cst_eq_cst, - simplifications_common.simp_test_signext_inf, - simplifications_common.simp_test_zeroext_inf, - simplifications_common.simp_cond_inf_eq_unsigned_zero, - - ], - - m2_expr.ExprSlice: [ - simplifications_common.simp_slice, - simplifications_common.simp_slice_of_ext, - simplifications_common.simp_slice_of_op_ext, - ], - m2_expr.ExprCompose: [simplifications_common.simp_compose], - m2_expr.ExprCond: [ - simplifications_common.simp_cond, - simplifications_common.simp_cond_zeroext, - simplifications_common.simp_cond_add, - # CC op - simplifications_common.simp_cond_flag, - simplifications_common.simp_cmp_int_arg, - - simplifications_common.simp_cond_eq_zero, - simplifications_common.simp_x_and_cst_eq_cst, - simplifications_common.simp_cond_logic_ext, - simplifications_common.simp_cond_sign_bit, - simplifications_common.simp_cond_eq_1_0, - ], - m2_expr.ExprMem: [simplifications_common.simp_mem], - - } - - - # Heavy passes - PASS_HEAVY = {} - - # Cond passes - PASS_COND = { - m2_expr.ExprSlice: [ - simplifications_cond.expr_simp_inf_signed, - simplifications_cond.expr_simp_inf_unsigned_inversed - ], - m2_expr.ExprOp: [ - simplifications_cond.expr_simp_inverse, - ], - m2_expr.ExprCond: [ - simplifications_cond.expr_simp_equal - ] - } - - - # Available passes lists are: - # - highlevel: transform high level operators to explicit computations - PASS_HIGH_TO_EXPLICIT = { - m2_expr.ExprOp: [ - simplifications_explicit.simp_flags, - simplifications_explicit.simp_ext, - ], - } - - - def __init__(self): - self.expr_simp_cb = {} - self.simplified_exprs = set() - - def enable_passes(self, passes): - """Add passes from @passes - @passes: dict(Expr class : list(callback)) - - Callback signature: Expr callback(ExpressionSimplifier, Expr) - """ - - # Clear cache of simplifiied expressions when adding a new pass - self.simplified_exprs.clear() - - for k, v in viewitems(passes): - self.expr_simp_cb[k] = fast_unify(self.expr_simp_cb.get(k, []) + v) - - def apply_simp(self, expression): - """Apply enabled simplifications on expression - @expression: Expr instance - Return an Expr instance""" - - cls = expression.__class__ - debug_level = log_exprsimp.level >= logging.DEBUG - for simp_func in self.expr_simp_cb.get(cls, []): - # Apply simplifications - before = expression - expression = simp_func(self, expression) - after = expression - - if debug_level and before != after: - log_exprsimp.debug("[%s] %s => %s", simp_func, before, after) - - # If class changes, stop to prevent wrong simplifications - if expression.__class__ is not cls: - break - - return expression - - def expr_simp(self, expression): - """Apply enabled simplifications on expression and find a stable state - @expression: Expr instance - Return an Expr instance""" - - if expression in self.simplified_exprs: - return expression - - # Find a stable state - while True: - # Canonize and simplify - e_new = self.apply_simp(expression.canonize()) - if e_new == expression: - break - - # Launch recursivity - expression = self.expr_simp_wrapper(e_new) - self.simplified_exprs.add(expression) - # Mark expression as simplified - self.simplified_exprs.add(e_new) - - return e_new - - def expr_simp_wrapper(self, expression, callback=None): - """Apply enabled simplifications on expression - @expression: Expr instance - @manual_callback: If set, call this function instead of normal one - Return an Expr instance""" - - if expression in self.simplified_exprs: - return expression - - if callback is None: - callback = self.expr_simp - - return expression.visit(callback, lambda e: e not in self.simplified_exprs) - - def __call__(self, expression, callback=None): - "Wrapper on expr_simp_wrapper" - return self.expr_simp_wrapper(expression, callback) - - -# Public ExprSimplificationPass instance with commons passes -expr_simp = ExpressionSimplifier() -expr_simp.enable_passes(ExpressionSimplifier.PASS_COMMONS) - -expr_simp_high_to_explicit = ExpressionSimplifier() -expr_simp_high_to_explicit.enable_passes(ExpressionSimplifier.PASS_HIGH_TO_EXPLICIT) - -expr_simp_explicit = ExpressionSimplifier() -expr_simp_explicit.enable_passes(ExpressionSimplifier.PASS_COMMONS) -expr_simp_explicit.enable_passes(ExpressionSimplifier.PASS_HIGH_TO_EXPLICIT) diff --git a/miasm2/expression/simplifications_common.py b/miasm2/expression/simplifications_common.py deleted file mode 100644 index ddcfc668..00000000 --- a/miasm2/expression/simplifications_common.py +++ /dev/null @@ -1,1556 +0,0 @@ -# ----------------------------- # -# Common simplifications passes # -# ----------------------------- # - -from future.utils import viewitems - -from miasm2.expression.modint import mod_size2int, mod_size2uint -from miasm2.expression.expression import ExprInt, ExprSlice, ExprMem, \ - ExprCond, ExprOp, ExprCompose, TOK_INF_SIGNED, TOK_INF_UNSIGNED, \ - TOK_INF_EQUAL_SIGNED, TOK_INF_EQUAL_UNSIGNED, TOK_EQUAL -from miasm2.expression.expression_helper import parity, op_propag_cst, \ - merge_sliceto_slice - - -def simp_cst_propagation(e_s, expr): - """This passe includes: - - Constant folding - - Common logical identities - - Common binary identities - """ - - # merge associatif op - args = list(expr.args) - op_name = expr.op - # simpl integer manip - # int OP int => int - # TODO: <<< >>> << >> are architecture dependent - if op_name in op_propag_cst: - while (len(args) >= 2 and - args[-1].is_int() and - args[-2].is_int()): - int2 = args.pop() - int1 = args.pop() - if op_name == '+': - out = int1.arg + int2.arg - elif op_name == '*': - out = int1.arg * int2.arg - elif op_name == '**': - out =int1.arg ** int2.arg - elif op_name == '^': - out = int1.arg ^ int2.arg - elif op_name == '&': - out = int1.arg & int2.arg - elif op_name == '|': - out = int1.arg | int2.arg - elif op_name == '>>': - if int(int2) > int1.size: - out = 0 - else: - out = int1.arg >> int2.arg - elif op_name == '<<': - if int(int2) > int1.size: - out = 0 - else: - out = int1.arg << int2.arg - elif op_name == 'a>>': - tmp1 = mod_size2int[int1.arg.size](int1.arg) - tmp2 = mod_size2uint[int2.arg.size](int2.arg) - if tmp2 > int1.size: - is_signed = int(int1) & (1 << (int1.size - 1)) - if is_signed: - out = -1 - else: - out = 0 - else: - out = mod_size2uint[int1.arg.size](tmp1 >> tmp2) - elif op_name == '>>>': - shifter = int2.arg % int2.size - out = (int1.arg >> shifter) | (int1.arg << (int2.size - shifter)) - elif op_name == '<<<': - shifter = int2.arg % int2.size - out = (int1.arg << shifter) | (int1.arg >> (int2.size - shifter)) - elif op_name == '/': - out = int1.arg // int2.arg - elif op_name == '%': - out = int1.arg % int2.arg - elif op_name == 'sdiv': - assert int2.arg.arg - tmp1 = mod_size2int[int1.arg.size](int1.arg) - tmp2 = mod_size2int[int2.arg.size](int2.arg) - out = mod_size2uint[int1.arg.size](tmp1 // tmp2) - elif op_name == 'smod': - assert int2.arg.arg - tmp1 = mod_size2int[int1.arg.size](int1.arg) - tmp2 = mod_size2int[int2.arg.size](int2.arg) - out = mod_size2uint[int1.arg.size](tmp1 % tmp2) - elif op_name == 'umod': - assert int2.arg.arg - tmp1 = mod_size2uint[int1.arg.size](int1.arg) - tmp2 = mod_size2uint[int2.arg.size](int2.arg) - out = mod_size2uint[int1.arg.size](tmp1 % tmp2) - elif op_name == 'udiv': - assert int2.arg.arg - tmp1 = mod_size2uint[int1.arg.size](int1.arg) - tmp2 = mod_size2uint[int2.arg.size](int2.arg) - out = mod_size2uint[int1.arg.size](tmp1 // tmp2) - - - - args.append(ExprInt(out, int1.size)) - - # cnttrailzeros(int) => int - if op_name == "cnttrailzeros" and args[0].is_int(): - i = 0 - while args[0].arg & (1 << i) == 0 and i < args[0].size: - i += 1 - return ExprInt(i, args[0].size) - - # cntleadzeros(int) => int - if op_name == "cntleadzeros" and args[0].is_int(): - if args[0].arg == 0: - return ExprInt(args[0].size, args[0].size) - i = args[0].size - 1 - while args[0].arg & (1 << i) == 0: - i -= 1 - return ExprInt(expr.size - (i + 1), args[0].size) - - # -(-(A)) => A - if (op_name == '-' and len(args) == 1 and args[0].is_op('-') and - len(args[0].args) == 1): - return args[0].args[0] - - # -(int) => -int - if op_name == '-' and len(args) == 1 and args[0].is_int(): - return ExprInt(-int(args[0]), expr.size) - # A op 0 =>A - if op_name in ['+', '|', "^", "<<", ">>", "<<<", ">>>"] and len(args) > 1: - if args[-1].is_int(0): - args.pop() - # A - 0 =>A - if op_name == '-' and len(args) > 1 and args[-1].is_int(0): - assert len(args) == 2 # Op '-' with more than 2 args: SantityCheckError - return args[0] - - # A * 1 =>A - if op_name == "*" and len(args) > 1 and args[-1].is_int(1): - args.pop() - - # for cannon form - # A * -1 => - A - if op_name == "*" and len(args) > 1 and args[-1] == args[-1].mask: - args.pop() - args[-1] = - args[-1] - - # op A => A - if op_name in ['+', '*', '^', '&', '|', '>>', '<<', - 'a>>', '<<<', '>>>', 'sdiv', 'smod', 'umod', 'udiv'] and len(args) == 1: - return args[0] - - # A-B => A + (-B) - if op_name == '-' and len(args) > 1: - if len(args) > 2: - raise ValueError( - 'sanity check fail on expr -: should have one or 2 args ' + - '%r %s' % (expr, expr) - ) - return ExprOp('+', args[0], -args[1]) - - # A op 0 => 0 - if op_name in ['&', "*"] and args[-1].is_int(0): - return ExprInt(0, expr.size) - - # - (A + B +...) => -A + -B + -C - if op_name == '-' and len(args) == 1 and args[0].is_op('+'): - args = [-a for a in args[0].args] - return ExprOp('+', *args) - - # -(a?int1:int2) => (a?-int1:-int2) - if (op_name == '-' and len(args) == 1 and - args[0].is_cond() and - args[0].src1.is_int() and args[0].src2.is_int()): - int1 = args[0].src1 - int2 = args[0].src2 - int1 = ExprInt(-int1.arg, int1.size) - int2 = ExprInt(-int2.arg, int2.size) - return ExprCond(args[0].cond, int1, int2) - - i = 0 - while i < len(args) - 1: - j = i + 1 - while j < len(args): - # A ^ A => 0 - if op_name == '^' and args[i] == args[j]: - args[i] = ExprInt(0, args[i].size) - del args[j] - continue - # A + (- A) => 0 - if op_name == '+' and args[j].is_op("-"): - if len(args[j].args) == 1 and args[i] == args[j].args[0]: - args[i] = ExprInt(0, args[i].size) - del args[j] - continue - # (- A) + A => 0 - if op_name == '+' and args[i].is_op("-"): - if len(args[i].args) == 1 and args[j] == args[i].args[0]: - args[i] = ExprInt(0, args[i].size) - del args[j] - continue - # A | A => A - if op_name == '|' and args[i] == args[j]: - del args[j] - continue - # A & A => A - if op_name == '&' and args[i] == args[j]: - del args[j] - continue - j += 1 - i += 1 - - if op_name in ['|', '&', '%', '/', '**'] and len(args) == 1: - return args[0] - - # A <<< A.size => A - if (op_name in ['<<<', '>>>'] and - args[1].is_int() and - args[1].arg == args[0].size): - return args[0] - - # (A <<< X) <<< Y => A <<< (X+Y) (or <<< >>>) if X + Y does not overflow - if (op_name in ['<<<', '>>>'] and - args[0].is_op() and - args[0].op in ['<<<', '>>>']): - A = args[0].args[0] - X = args[0].args[1] - Y = args[1] - if op_name != args[0].op and e_s(X - Y) == ExprInt(0, X.size): - return args[0].args[0] - elif X.is_int() and Y.is_int(): - new_X = int(X) % expr.size - new_Y = int(Y) % expr.size - if op_name == args[0].op: - rot = (new_X + new_Y) % expr.size - op = op_name - else: - rot = new_Y - new_X - op = op_name - if rot < 0: - rot = - rot - op = {">>>": "<<<", "<<<": ">>>"}[op_name] - args = [A, ExprInt(rot, expr.size)] - op_name = op - - else: - # Do not consider this case, too tricky (overflow on addition / - # subtraction) - pass - - # A >> X >> Y => A >> (X+Y) if X + Y does not overflow - # To be sure, only consider the simplification when X.msb and Y.msb are 0 - if (op_name in ['<<', '>>'] and - args[0].is_op(op_name)): - X = args[0].args[1] - Y = args[1] - if (e_s(X.msb()) == ExprInt(0, 1) and - e_s(Y.msb()) == ExprInt(0, 1)): - args = [args[0].args[0], X + Y] - - # ((var >> int1) << int1) => var & mask - # ((var << int1) >> int1) => var & mask - if (op_name in ['<<', '>>'] and - args[0].is_op() and - args[0].op in ['<<', '>>'] and - op_name != args[0]): - var = args[0].args[0] - int1 = args[0].args[1] - int2 = args[1] - if int1 == int2 and int1.is_int() and int(int1) < expr.size: - if op_name == '>>': - mask = ExprInt((1 << (expr.size - int(int1))) - 1, expr.size) - else: - mask = ExprInt( - ((1 << int(int1)) - 1) ^ ((1 << expr.size) - 1), - expr.size - ) - ret = var & mask - return ret - - # ((A & A.mask) - if op_name == "&" and args[-1] == expr.mask: - return ExprOp('&', *args[:-1]) - - # ((A | A.mask) - if op_name == "|" and args[-1] == expr.mask: - return args[-1] - - # ! (!X + int) => X - int - # TODO - - # ((A & mask) >> shift) with mask < 2**shift => 0 - if op_name == ">>" and args[1].is_int() and args[0].is_op("&"): - if (args[0].args[1].is_int() and - 2 ** args[1].arg > args[0].args[1].arg): - return ExprInt(0, args[0].size) - - # parity(int) => int - if op_name == 'parity' and args[0].is_int(): - return ExprInt(parity(int(args[0])), 1) - - # (-a) * b * (-c) * (-d) => (-a) * b * c * d - if op_name == "*" and len(args) > 1: - new_args = [] - counter = 0 - for arg in args: - if arg.is_op('-') and len(arg.args) == 1: - new_args.append(arg.args[0]) - counter += 1 - else: - new_args.append(arg) - if counter % 2: - return -ExprOp(op_name, *new_args) - args = new_args - - # -(a * b * int) => a * b * (-int) - if op_name == "-" and args[0].is_op('*') and args[0].args[-1].is_int(): - args = args[0].args - return ExprOp('*', *(list(args[:-1]) + [ExprInt(-int(args[-1]), expr.size)])) - - - # A << int with A ExprCompose => move index - if (op_name == "<<" and args[0].is_compose() and - args[1].is_int() and int(args[1]) != 0): - final_size = args[0].size - shift = int(args[1]) - new_args = [] - # shift indexes - for index, arg in args[0].iter_args(): - new_args.append((arg, index+shift, index+shift+arg.size)) - # filter out expression - filter_args = [] - min_index = final_size - for tmp, start, stop in new_args: - if start >= final_size: - continue - if stop > final_size: - tmp = tmp[:tmp.size - (stop - final_size)] - filter_args.append(tmp) - min_index = min(start, min_index) - # create entry 0 - assert min_index != 0 - tmp = ExprInt(0, min_index) - args = [tmp] + filter_args - return ExprCompose(*args) - - # A >> int with A ExprCompose => move index - if op_name == ">>" and args[0].is_compose() and args[1].is_int(): - final_size = args[0].size - shift = int(args[1]) - new_args = [] - # shift indexes - for index, arg in args[0].iter_args(): - new_args.append((arg, index-shift, index+arg.size-shift)) - # filter out expression - filter_args = [] - max_index = 0 - for tmp, start, stop in new_args: - if stop <= 0: - continue - if start < 0: - tmp = tmp[-start:] - filter_args.append(tmp) - max_index = max(stop, max_index) - # create entry 0 - tmp = ExprInt(0, final_size - max_index) - args = filter_args + [tmp] - return ExprCompose(*args) - - - # Compose(a) OP Compose(b) with a/b same bounds => Compose(a OP b) - if op_name in ['|', '&', '^'] and all([arg.is_compose() for arg in args]): - bounds = set() - for arg in args: - bound = tuple([tmp.size for tmp in arg.args]) - bounds.add(bound) - if len(bounds) == 1: - new_args = [[tmp] for tmp in args[0].args] - for sub_arg in args[1:]: - for i, tmp in enumerate(sub_arg.args): - new_args[i].append(tmp) - args = [] - for i, arg in enumerate(new_args): - args.append(ExprOp(op_name, *arg)) - return ExprCompose(*args) - - return ExprOp(op_name, *args) - - -def simp_cond_op_int(_, expr): - "Extract conditions from operations" - - - # x?a:b + x?c:d + e => x?(a+c+e:b+d+e) - if not expr.op in ["+", "|", "^", "&", "*", '<<', '>>', 'a>>']: - return expr - if len(expr.args) < 2: - return expr - conds = set() - for arg in expr.args: - if arg.is_cond(): - conds.add(arg) - if len(conds) != 1: - return expr - cond = list(conds).pop() - - args1, args2 = [], [] - for arg in expr.args: - if arg.is_cond(): - args1.append(arg.src1) - args2.append(arg.src2) - else: - args1.append(arg) - args2.append(arg) - - return ExprCond(cond.cond, - ExprOp(expr.op, *args1), - ExprOp(expr.op, *args2)) - - -def simp_cond_factor(e_s, expr): - "Merge similar conditions" - if not expr.op in ["+", "|", "^", "&", "*", '<<', '>>', 'a>>']: - return expr - if len(expr.args) < 2: - return expr - - if expr.op in ['>>', '<<', 'a>>']: - assert len(expr.args) == 2 - - # Note: the following code is correct for non-commutative operation only if - # there is 2 arguments. Otherwise, the order is not conserved - - # Regroup sub-expression by similar conditions - conds = {} - not_conds = [] - multi_cond = False - for arg in expr.args: - if not arg.is_cond(): - not_conds.append(arg) - continue - cond = arg.cond - if not cond in conds: - conds[cond] = [] - else: - multi_cond = True - conds[cond].append(arg) - if not multi_cond: - return expr - - # Rebuild the new expression - c_out = not_conds - for cond, vals in viewitems(conds): - new_src1 = [x.src1 for x in vals] - new_src2 = [x.src2 for x in vals] - src1 = e_s.expr_simp_wrapper(ExprOp(expr.op, *new_src1)) - src2 = e_s.expr_simp_wrapper(ExprOp(expr.op, *new_src2)) - c_out.append(ExprCond(cond, src1, src2)) - - if len(c_out) == 1: - new_e = c_out[0] - else: - new_e = ExprOp(expr.op, *c_out) - return new_e - - -def simp_slice(e_s, expr): - "Slice optimization" - - # slice(A, 0, a.size) => A - if expr.start == 0 and expr.stop == expr.arg.size: - return expr.arg - # Slice(int) => int - if expr.arg.is_int(): - total_bit = expr.stop - expr.start - mask = (1 << (expr.stop - expr.start)) - 1 - return ExprInt(int((expr.arg.arg >> expr.start) & mask), total_bit) - # Slice(Slice(A, x), y) => Slice(A, z) - if expr.arg.is_slice(): - if expr.stop - expr.start > expr.arg.stop - expr.arg.start: - raise ValueError('slice in slice: getting more val', str(expr)) - - return ExprSlice(expr.arg.arg, expr.start + expr.arg.start, - expr.start + expr.arg.start + (expr.stop - expr.start)) - if expr.arg.is_compose(): - # Slice(Compose(A), x) => Slice(A, y) - for index, arg in expr.arg.iter_args(): - if index <= expr.start and index+arg.size >= expr.stop: - return arg[expr.start - index:expr.stop - index] - # Slice(Compose(A, B, C), x) => Compose(A, B, C) with truncated A/B/C - out = [] - for index, arg in expr.arg.iter_args(): - # arg is before slice start - if expr.start >= index + arg.size: - continue - # arg is after slice stop - elif expr.stop <= index: - continue - # arg is fully included in slice - elif expr.start <= index and index + arg.size <= expr.stop: - out.append(arg) - continue - # arg is truncated at start - if expr.start > index: - slice_start = expr.start - index - else: - # arg is not truncated at start - slice_start = 0 - # a is truncated at stop - if expr.stop < index + arg.size: - slice_stop = arg.size + expr.stop - (index + arg.size) - slice_start - else: - slice_stop = arg.size - out.append(arg[slice_start:slice_stop]) - - return ExprCompose(*out) - - # ExprMem(x, size)[:A] => ExprMem(x, a) - # XXXX todo hum, is it safe? - if (expr.arg.is_mem() and - expr.start == 0 and - expr.arg.size > expr.stop and expr.stop % 8 == 0): - return ExprMem(expr.arg.ptr, size=expr.stop) - # distributivity of slice and & - # (a & int)[x:y] => 0 if int[x:y] == 0 - if expr.arg.is_op("&") and expr.arg.args[-1].is_int(): - tmp = e_s.expr_simp_wrapper(expr.arg.args[-1][expr.start:expr.stop]) - if tmp.is_int(0): - return tmp - # distributivity of slice and exprcond - # (a?int1:int2)[x:y] => (a?int1[x:y]:int2[x:y]) - # (a?compose1:compose2)[x:y] => (a?compose1[x:y]:compose2[x:y]) - if (expr.arg.is_cond() and - (expr.arg.src1.is_int() or expr.arg.src1.is_compose()) and - (expr.arg.src2.is_int() or expr.arg.src2.is_compose())): - src1 = expr.arg.src1[expr.start:expr.stop] - src2 = expr.arg.src2[expr.start:expr.stop] - return ExprCond(expr.arg.cond, src1, src2) - - # (a * int)[0:y] => (a[0:y] * int[0:y]) - if expr.start == 0 and expr.arg.is_op("*") and expr.arg.args[-1].is_int(): - args = [e_s.expr_simp_wrapper(a[expr.start:expr.stop]) for a in expr.arg.args] - return ExprOp(expr.arg.op, *args) - - # (a >> int)[x:y] => a[x+int:y+int] with int+y <= a.size - # (a << int)[x:y] => a[x-int:y-int] with x-int >= 0 - if (expr.arg.is_op() and expr.arg.op in [">>", "<<"] and - expr.arg.args[1].is_int()): - arg, shift = expr.arg.args - shift = int(shift) - if expr.arg.op == ">>": - if shift + expr.stop <= arg.size: - return arg[expr.start + shift:expr.stop + shift] - elif expr.arg.op == "<<": - if expr.start - shift >= 0: - return arg[expr.start - shift:expr.stop - shift] - else: - raise ValueError('Bad case') - - return expr - - -def simp_compose(e_s, expr): - "Commons simplification on ExprCompose" - args = merge_sliceto_slice(expr) - out = [] - # compose of compose - for arg in args: - if arg.is_compose(): - out += arg.args - else: - out.append(arg) - args = out - # Compose(a) with a.size = compose.size => a - if len(args) == 1 and args[0].size == expr.size: - return args[0] - - # {(X[z:], 0, X.size-z), (0, X.size-z, X.size)} => (X >> z) - if len(args) == 2 and args[1].is_int(0): - if (args[0].is_slice() and - args[0].stop == args[0].arg.size and - args[0].size + args[1].size == args[0].arg.size): - new_expr = args[0].arg >> ExprInt(args[0].start, args[0].arg.size) - return new_expr - - # {@X[base + i] 0 X, @Y[base + i + X] X (X + Y)} => @(X+Y)[base + i] - for i, arg in enumerate(args[:-1]): - nxt = args[i + 1] - if arg.is_mem() and nxt.is_mem(): - gap = e_s(nxt.ptr - arg.ptr) - if gap.is_int() and arg.size % 8 == 0 and int(gap) == arg.size // 8: - args = args[:i] + [ExprMem(arg.ptr, - arg.size + nxt.size)] + args[i + 2:] - return ExprCompose(*args) - - # {a, x?b:d, x?c:e, f} => x?{a, b, c, f}:{a, d, e, f} - conds = set(arg.cond for arg in expr.args if arg.is_cond()) - if len(conds) == 1: - cond = list(conds)[0] - args1, args2 = [], [] - for arg in expr.args: - if arg.is_cond(): - args1.append(arg.src1) - args2.append(arg.src2) - else: - args1.append(arg) - args2.append(arg) - arg1 = e_s(ExprCompose(*args1)) - arg2 = e_s(ExprCompose(*args2)) - return ExprCond(cond, arg1, arg2) - return ExprCompose(*args) - - -def simp_cond(_, expr): - """ - Common simplifications on ExprCond. - Eval exprcond src1/src2 with satifiable/unsatisfiable condition propagation - """ - if (not expr.cond.is_int()) and expr.cond.size == 1: - src1 = expr.src1.replace_expr({expr.cond: ExprInt(1, 1)}) - src2 = expr.src2.replace_expr({expr.cond: ExprInt(0, 1)}) - if src1 != expr.src1 or src2 != expr.src2: - return ExprCond(expr.cond, src1, src2) - - # -A ? B:C => A ? B:C - if expr.cond.is_op('-') and len(expr.cond.args) == 1: - expr = ExprCond(expr.cond.args[0], expr.src1, expr.src2) - # a?x:x - elif expr.src1 == expr.src2: - expr = expr.src1 - # int ? A:B => A or B - elif expr.cond.is_int(): - if expr.cond.arg == 0: - expr = expr.src2 - else: - expr = expr.src1 - # a?(a?b:c):x => a?b:x - elif expr.src1.is_cond() and expr.cond == expr.src1.cond: - expr = ExprCond(expr.cond, expr.src1.src1, expr.src2) - # a?x:(a?b:c) => a?x:c - elif expr.src2.is_cond() and expr.cond == expr.src2.cond: - expr = ExprCond(expr.cond, expr.src1, expr.src2.src2) - # a|int ? b:c => b with int != 0 - elif (expr.cond.is_op('|') and - expr.cond.args[1].is_int() and - expr.cond.args[1].arg != 0): - return expr.src1 - - # (C?int1:int2)?(A:B) => - elif (expr.cond.is_cond() and - expr.cond.src1.is_int() and - expr.cond.src2.is_int()): - int1 = expr.cond.src1.arg.arg - int2 = expr.cond.src2.arg.arg - if int1 and int2: - expr = expr.src1 - elif int1 == 0 and int2 == 0: - expr = expr.src2 - elif int1 == 0 and int2: - expr = ExprCond(expr.cond.cond, expr.src2, expr.src1) - elif int1 and int2 == 0: - expr = ExprCond(expr.cond.cond, expr.src1, expr.src2) - - elif expr.cond.is_compose(): - # {0, X, 0}?(A:B) => X?(A:B) - args = [arg for arg in expr.cond.args if not arg.is_int(0)] - if len(args) == 1: - arg = args.pop() - return ExprCond(arg, expr.src1, expr.src2) - elif len(args) < len(expr.cond.args): - return ExprCond(ExprCompose(*args), expr.src1, expr.src2) - return expr - - -def simp_mem(_, expr): - """ - Common simplifications on ExprMem: - @32[x?a:b] => x?@32[a]:@32[b] - """ - if expr.ptr.is_cond(): - cond = expr.ptr - ret = ExprCond(cond.cond, - ExprMem(cond.src1, expr.size), - ExprMem(cond.src2, expr.size)) - return ret - return expr - - - - -def test_cc_eq_args(expr, *sons_op): - """ - Return True if expression's arguments match the list in sons_op, and their - sub arguments are identical. Ex: - CC_S<=( - FLAG_SIGN_SUB(A, B), - FLAG_SUB_OF(A, B), - FLAG_EQ_CMP(A, B) - ) - """ - if not expr.is_op(): - return False - if len(expr.args) != len(sons_op): - return False - all_args = set() - for i, arg in enumerate(expr.args): - if not arg.is_op(sons_op[i]): - return False - all_args.add(arg.args) - return len(all_args) == 1 - - -def simp_cc_conds(_, expr): - """ - High level simplifications. Example: - CC_U<(FLAG_SUB_CF(A, B) => A =") and - test_cc_eq_args( - expr, - "FLAG_SUB_CF" - )): - expr = ExprCond( - ExprOp(TOK_INF_UNSIGNED, *expr.args[0].args), - ExprInt(0, expr.size), - ExprInt(1, expr.size)) - - elif (expr.is_op("CC_U<") and - test_cc_eq_args( - expr, - "FLAG_SUB_CF" - )): - expr = ExprOp(TOK_INF_UNSIGNED, *expr.args[0].args) - - elif (expr.is_op("CC_NEG") and - test_cc_eq_args( - expr, - "FLAG_SIGN_SUB" - )): - expr = ExprOp(TOK_INF_SIGNED, *expr.args[0].args) - - elif (expr.is_op("CC_POS") and - test_cc_eq_args( - expr, - "FLAG_SIGN_SUB" - )): - expr = ExprCond( - ExprOp(TOK_INF_SIGNED, *expr.args[0].args), - ExprInt(0, expr.size), - ExprInt(1, expr.size) - ) - - elif (expr.is_op("CC_EQ") and - test_cc_eq_args( - expr, - "FLAG_EQ" - )): - arg = expr.args[0].args[0] - expr = ExprOp(TOK_EQUAL, arg, ExprInt(0, arg.size)) - - elif (expr.is_op("CC_NE") and - test_cc_eq_args( - expr, - "FLAG_EQ" - )): - arg = expr.args[0].args[0] - expr = ExprCond( - ExprOp(TOK_EQUAL,arg, ExprInt(0, arg.size)), - ExprInt(0, expr.size), - ExprInt(1, expr.size) - ) - elif (expr.is_op("CC_NE") and - test_cc_eq_args( - expr, - "FLAG_EQ_CMP" - )): - expr = ExprCond( - ExprOp(TOK_EQUAL, *expr.args[0].args), - ExprInt(0, expr.size), - ExprInt(1, expr.size) - ) - - elif (expr.is_op("CC_EQ") and - test_cc_eq_args( - expr, - "FLAG_EQ_CMP" - )): - expr = ExprOp(TOK_EQUAL, *expr.args[0].args) - - elif (expr.is_op("CC_NE") and - test_cc_eq_args( - expr, - "FLAG_EQ_AND" - )): - expr = ExprOp("&", *expr.args[0].args) - - elif (expr.is_op("CC_EQ") and - test_cc_eq_args( - expr, - "FLAG_EQ_AND" - )): - expr = ExprCond( - ExprOp("&", *expr.args[0].args), - ExprInt(0, expr.size), - ExprInt(1, expr.size) - ) - - elif (expr.is_op("CC_S>") and - test_cc_eq_args( - expr, - "FLAG_SIGN_SUB", - "FLAG_SUB_OF", - "FLAG_EQ_CMP", - )): - expr = ExprCond( - ExprOp(TOK_INF_EQUAL_SIGNED, *expr.args[0].args), - ExprInt(0, expr.size), - ExprInt(1, expr.size) - ) - - elif (expr.is_op("CC_S>") and - len(expr.args) == 3 and - expr.args[0].is_op("FLAG_SIGN_SUB") and - expr.args[2].is_op("FLAG_EQ_CMP") and - expr.args[0].args == expr.args[2].args and - expr.args[1].is_int(0)): - expr = ExprCond( - ExprOp(TOK_INF_EQUAL_SIGNED, *expr.args[0].args), - ExprInt(0, expr.size), - ExprInt(1, expr.size) - ) - - - - elif (expr.is_op("CC_S>=") and - test_cc_eq_args( - expr, - "FLAG_SIGN_SUB", - "FLAG_SUB_OF" - )): - expr = ExprCond( - ExprOp(TOK_INF_SIGNED, *expr.args[0].args), - ExprInt(0, expr.size), - ExprInt(1, expr.size) - ) - - elif (expr.is_op("CC_S<") and - test_cc_eq_args( - expr, - "FLAG_SIGN_SUB", - "FLAG_SUB_OF" - )): - expr = ExprOp(TOK_INF_SIGNED, *expr.args[0].args) - - elif (expr.is_op("CC_S<=") and - test_cc_eq_args( - expr, - "FLAG_SIGN_SUB", - "FLAG_SUB_OF", - "FLAG_EQ_CMP", - )): - expr = ExprOp(TOK_INF_EQUAL_SIGNED, *expr.args[0].args) - - elif (expr.is_op("CC_S<=") and - len(expr.args) == 3 and - expr.args[0].is_op("FLAG_SIGN_SUB") and - expr.args[2].is_op("FLAG_EQ_CMP") and - expr.args[0].args == expr.args[2].args and - expr.args[1].is_int(0)): - expr = ExprOp(TOK_INF_EQUAL_SIGNED, *expr.args[0].args) - - elif (expr.is_op("CC_U<=") and - test_cc_eq_args( - expr, - "FLAG_SUB_CF", - "FLAG_EQ_CMP", - )): - expr = ExprOp(TOK_INF_EQUAL_UNSIGNED, *expr.args[0].args) - - elif (expr.is_op("CC_U>") and - test_cc_eq_args( - expr, - "FLAG_SUB_CF", - "FLAG_EQ_CMP", - )): - expr = ExprCond( - ExprOp(TOK_INF_EQUAL_UNSIGNED, *expr.args[0].args), - ExprInt(0, expr.size), - ExprInt(1, expr.size) - ) - - elif (expr.is_op("CC_S<") and - test_cc_eq_args( - expr, - "FLAG_SIGN_ADD", - "FLAG_ADD_OF" - )): - arg0, arg1 = expr.args[0].args - expr = ExprOp(TOK_INF_SIGNED, arg0, -arg1) - - return expr - - - -def simp_cond_flag(_, expr): - """FLAG_EQ_CMP(X, Y)?A:B => (X == Y)?A:B""" - cond = expr.cond - if cond.is_op("FLAG_EQ_CMP"): - return ExprCond(ExprOp(TOK_EQUAL, *cond.args), expr.src1, expr.src2) - return expr - - -def simp_cmp_int(expr_simp, expr): - """ - ({X, 0} == int) => X == int[:] - X + int1 == int2 => X == int2-int1 - X ^ int1 == int2 => X == int1^int2 - """ - if (expr.is_op(TOK_EQUAL) and - expr.args[1].is_int() and - expr.args[0].is_compose() and - len(expr.args[0].args) == 2 and - expr.args[0].args[1].is_int(0)): - # ({X, 0} == int) => X == int[:] - src = expr.args[0].args[0] - int_val = int(expr.args[1]) - new_int = ExprInt(int_val, src.size) - expr = expr_simp( - ExprOp(TOK_EQUAL, src, new_int) - ) - elif not expr.is_op(TOK_EQUAL): - return expr - assert len(expr.args) == 2 - - left, right = expr.args - if left.is_int() and not right.is_int(): - left, right = right, left - if not right.is_int(): - return expr - if not (left.is_op() and left.op in ['+', '^']): - return expr - if not left.args[-1].is_int(): - return expr - # X + int1 == int2 => X == int2-int1 - # WARNING: - # X - 0x10 <=u 0x20 gives X in [0x10 0x30] - # which is not equivalet to A <=u 0x10 - - left_orig = left - left, last_int = left.args[:-1], left.args[-1] - - if len(left) == 1: - left = left[0] - else: - left = ExprOp(left.op, *left) - - if left_orig.op == "+": - new_int = expr_simp(right - last_int) - elif left_orig.op == '^': - new_int = expr_simp(right ^ last_int) - else: - raise RuntimeError("Unsupported operator") - - expr = expr_simp( - ExprOp(TOK_EQUAL, left, new_int), - ) - return expr - - - -def simp_cmp_int_arg(_, expr): - """ - (0x10 <= R0) ? A:B - => - (R0 < 0x10) ? B:A - """ - cond = expr.cond - if not cond.is_op(): - return expr - op = cond.op - if op not in [ - TOK_EQUAL, - TOK_INF_SIGNED, - TOK_INF_EQUAL_SIGNED, - TOK_INF_UNSIGNED, - TOK_INF_EQUAL_UNSIGNED - ]: - return expr - arg1, arg2 = cond.args - if arg2.is_int(): - return expr - if not arg1.is_int(): - return expr - src1, src2 = expr.src1, expr.src2 - if op == TOK_EQUAL: - return ExprCond(ExprOp(TOK_EQUAL, arg2, arg1), src1, src2) - - arg1, arg2 = arg2, arg1 - src1, src2 = src2, src1 - if op == TOK_INF_SIGNED: - op = TOK_INF_EQUAL_SIGNED - elif op == TOK_INF_EQUAL_SIGNED: - op = TOK_INF_SIGNED - elif op == TOK_INF_UNSIGNED: - op = TOK_INF_EQUAL_UNSIGNED - elif op == TOK_INF_EQUAL_UNSIGNED: - op = TOK_INF_UNSIGNED - return ExprCond(ExprOp(op, arg1, arg2), src1, src2) - - -def simp_subwc_cf(_, expr): - """SUBWC_CF(A, B, SUB_CF(C, D)) => SUB_CF({A, C}, {B, D})""" - if not expr.is_op('FLAG_SUBWC_CF'): - return expr - op3 = expr.args[2] - if not op3.is_op("FLAG_SUB_CF"): - return expr - - op1 = ExprCompose(expr.args[0], op3.args[0]) - op2 = ExprCompose(expr.args[1], op3.args[1]) - - return ExprOp("FLAG_SUB_CF", op1, op2) - - -def simp_subwc_of(_, expr): - """SUBWC_OF(A, B, SUB_CF(C, D)) => SUB_OF({A, C}, {B, D})""" - if not expr.is_op('FLAG_SUBWC_OF'): - return expr - op3 = expr.args[2] - if not op3.is_op("FLAG_SUB_CF"): - return expr - - op1 = ExprCompose(expr.args[0], op3.args[0]) - op2 = ExprCompose(expr.args[1], op3.args[1]) - - return ExprOp("FLAG_SUB_OF", op1, op2) - - -def simp_sign_subwc_cf(_, expr): - """SIGN_SUBWC(A, B, SUB_CF(C, D)) => SIGN_SUB({A, C}, {B, D})""" - if not expr.is_op('FLAG_SIGN_SUBWC'): - return expr - op3 = expr.args[2] - if not op3.is_op("FLAG_SUB_CF"): - return expr - - op1 = ExprCompose(expr.args[0], op3.args[0]) - op2 = ExprCompose(expr.args[1], op3.args[1]) - - return ExprOp("FLAG_SIGN_SUB", op1, op2) - -def simp_double_zeroext(_, expr): - """A.zeroExt(X).zeroExt(Y) => A.zeroExt(Y)""" - if not (expr.is_op() and expr.op.startswith("zeroExt")): - return expr - arg1 = expr.args[0] - if not (arg1.is_op() and arg1.op.startswith("zeroExt")): - return expr - arg2 = arg1.args[0] - return ExprOp(expr.op, arg2) - -def simp_double_signext(_, expr): - """A.signExt(X).signExt(Y) => A.signExt(Y)""" - if not (expr.is_op() and expr.op.startswith("signExt")): - return expr - arg1 = expr.args[0] - if not (arg1.is_op() and arg1.op.startswith("signExt")): - return expr - arg2 = arg1.args[0] - return ExprOp(expr.op, arg2) - -def simp_zeroext_eq_cst(_, expr): - """A.zeroExt(X) == int => A == int[:A.size]""" - if not expr.is_op(TOK_EQUAL): - return expr - arg1, arg2 = expr.args - if not arg2.is_int(): - return expr - if not (arg1.is_op() and arg1.op.startswith("zeroExt")): - return expr - src = arg1.args[0] - if int(arg2) > (1 << src.size): - # Always false - return ExprInt(0, expr.size) - return ExprOp(TOK_EQUAL, src, ExprInt(int(arg2), src.size)) - -def simp_cond_zeroext(_, expr): - """ - X.zeroExt()?(A:B) => X ? A:B - X.signExt()?(A:B) => X ? A:B - """ - if not ( - expr.cond.is_op() and - ( - expr.cond.op.startswith("zeroExt") or - expr.cond.op.startswith("signExt") - ) - ): - return expr - - ret = ExprCond(expr.cond.args[0], expr.src1, expr.src2) - return ret - -def simp_ext_eq_ext(_, expr): - """ - A.zeroExt(X) == B.zeroExt(X) => A == B - A.signExt(X) == B.signExt(X) => A == B - """ - if not expr.is_op(TOK_EQUAL): - return expr - arg1, arg2 = expr.args - if (not ((arg1.is_op() and arg1.op.startswith("zeroExt") and - arg2.is_op() and arg2.op.startswith("zeroExt")) or - (arg1.is_op() and arg1.op.startswith("signExt") and - arg2.is_op() and arg2.op.startswith("signExt")))): - return expr - if arg1.args[0].size != arg2.args[0].size: - return expr - return ExprOp(TOK_EQUAL, arg1.args[0], arg2.args[0]) - -def simp_cond_eq_zero(_, expr): - """(X == 0)?(A:B) => X?(B:A)""" - cond = expr.cond - if not cond.is_op(TOK_EQUAL): - return expr - arg1, arg2 = cond.args - if not arg2.is_int(0): - return expr - new_expr = ExprCond(arg1, expr.src2, expr.src1) - return new_expr - -def simp_sign_inf_zeroext(expr_s, expr): - """ - /!\ Ensure before: X.zeroExt(X.size) => X - - X.zeroExt() 0 - X.zeroExt() <=s 0 => X == 0 - - X.zeroExt() X.zeroExt() X.zeroExt() <=u cst (cst positive) - - X.zeroExt() 0 (cst negative) - X.zeroExt() <=s cst => 0 (cst negative) - - """ - if not (expr.is_op(TOK_INF_SIGNED) or expr.is_op(TOK_INF_EQUAL_SIGNED)): - return expr - arg1, arg2 = expr.args - if not arg2.is_int(): - return expr - if not (arg1.is_op() and arg1.op.startswith("zeroExt")): - return expr - src = arg1.args[0] - assert src.size < arg1.size - - # If cst is zero - if arg2.is_int(0): - if expr.is_op(TOK_INF_SIGNED): - # X.zeroExt() 0 - return ExprInt(0, expr.size) - else: - # X.zeroExt() <=s 0 => X == 0 - return ExprOp(TOK_EQUAL, src, ExprInt(0, src.size)) - - # cst is not zero - cst = int(arg2) - if cst & (1 << (arg2.size - 1)): - # cst is negative - return ExprInt(0, expr.size) - # cst is positive - if expr.is_op(TOK_INF_SIGNED): - # X.zeroExt() X.zeroExt() X.zeroExt() <=u cst (cst positive) - return ExprOp(TOK_INF_EQUAL_UNSIGNED, src, expr_s(arg2[:src.size])) - - -def simp_zeroext_and_cst_eq_cst(expr_s, expr): - """ - A.zeroExt(X) & ... & int == int => A & ... & int[:A.size] == int[:A.size] - """ - if not expr.is_op(TOK_EQUAL): - return expr - arg1, arg2 = expr.args - if not arg2.is_int(): - return expr - if not arg1.is_op('&'): - return expr - is_ok = True - sizes = set() - for arg in arg1.args: - if arg.is_int(): - continue - if (arg.is_op() and - arg.op.startswith("zeroExt")): - sizes.add(arg.args[0].size) - continue - is_ok = False - break - if not is_ok: - return expr - if len(sizes) != 1: - return expr - size = list(sizes)[0] - if int(arg2) > ((1 << size) - 1): - return expr - args = [expr_s(arg[:size]) for arg in arg1.args] - left = ExprOp('&', *args) - right = expr_s(arg2[:size]) - ret = ExprOp(TOK_EQUAL, left, right) - return ret - - -def test_one_bit_set(arg): - """ - Return True if arg has form 1 << X - """ - return arg != 0 and ((arg & (arg - 1)) == 0) - -def simp_x_and_cst_eq_cst(_, expr): - """ - (x & ... & onebitmask == onebitmask) ? A:B => (x & ... & onebitmask) ? A:B - """ - cond = expr.cond - if not cond.is_op(TOK_EQUAL): - return expr - arg1, mask2 = cond.args - if not mask2.is_int(): - return expr - if not test_one_bit_set(int(mask2)): - return expr - if not arg1.is_op('&'): - return expr - mask1 = arg1.args[-1] - if mask1 != mask2: - return expr - cond = ExprOp('&', *arg1.args) - return ExprCond(cond, expr.src1, expr.src2) - -def simp_cmp_int_int(_, expr): - """ - IntA int - IntA int - IntA <=s IntB => int - IntA <=u IntB => int - IntA == IntB => int - """ - if expr.op not in [ - TOK_EQUAL, - TOK_INF_SIGNED, TOK_INF_UNSIGNED, - TOK_INF_EQUAL_SIGNED, TOK_INF_EQUAL_UNSIGNED, - ]: - return expr - if not all(arg.is_int() for arg in expr.args): - return expr - int_a, int_b = expr.args - if expr.is_op(TOK_EQUAL): - if int_a == int_b: - return ExprInt(1, 1) - return ExprInt(0, expr.size) - - if expr.op in [TOK_INF_SIGNED, TOK_INF_EQUAL_SIGNED]: - int_a = int(mod_size2int[int_a.size](int(int_a))) - int_b = int(mod_size2int[int_b.size](int(int_b))) - else: - int_a = int(mod_size2uint[int_a.size](int(int_a))) - int_b = int(mod_size2uint[int_b.size](int(int_b))) - - if expr.op in [TOK_INF_SIGNED, TOK_INF_UNSIGNED]: - ret = int_a < int_b - else: - ret = int_a <= int_b - - if ret: - ret = 1 - else: - ret = 0 - return ExprInt(ret, 1) - - -def simp_ext_cst(_, expr): - """ - Int.zeroExt(X) => Int - Int.signExt(X) => Int - """ - if not (expr.op.startswith("zeroExt") or expr.op.startswith("signExt")): - return expr - arg = expr.args[0] - if not arg.is_int(): - return expr - if expr.op.startswith("zeroExt"): - ret = int(arg) - else: - ret = int(mod_size2int[arg.size](int(arg))) - ret = ExprInt(ret, expr.size) - return ret - - -def simp_slice_of_ext(_, expr): - """ - C.zeroExt(X)[A:B] => 0 if A >= size(C) - C.zeroExt(X)[A:B] => C[A:B] if B <= size(C) - A.zeroExt(X)[0:Y] => A.zeroExt(Y) - """ - if not expr.arg.is_op(): - return expr - if not expr.arg.op.startswith("zeroExt"): - return expr - arg = expr.arg.args[0] - - if expr.start >= arg.size: - # C.zeroExt(X)[A:B] => 0 if A >= size(C) - return ExprInt(0, expr.size) - if expr.stop <= arg.size: - # C.zeroExt(X)[A:B] => C[A:B] if B <= size(C) - return arg[expr.start:expr.stop] - if expr.start == 0: - # A.zeroExt(X)[0:Y] => A.zeroExt(Y) - return arg.zeroExtend(expr.stop) - return expr - -def simp_slice_of_op_ext(expr_s, expr): - """ - (X.zeroExt() + {Z, } + ... + Int)[0:8] => X + ... + int[:] - (X.zeroExt() | ... | Int)[0:8] => X | ... | int[:] - ... - """ - if expr.start != 0: - return expr - src = expr.arg - if not src.is_op(): - return expr - if src.op not in ['+', '|', '^', '&']: - return expr - is_ok = True - for arg in src.args: - if arg.is_int(): - continue - if (arg.is_op() and - arg.op.startswith("zeroExt") and - arg.args[0].size == expr.stop): - continue - if arg.is_compose(): - continue - is_ok = False - break - if not is_ok: - return expr - args = [expr_s(arg[:expr.stop]) for arg in src.args] - return ExprOp(src.op, *args) - - -def simp_cond_logic_ext(expr_s, expr): - """(X.zeroExt() + ... + Int) ? A:B => X + ... + int[:] ? A:B""" - cond = expr.cond - if not cond.is_op(): - return expr - if cond.op not in ["&", "^", "|"]: - return expr - is_ok = True - sizes = set() - for arg in cond.args: - if arg.is_int(): - continue - if (arg.is_op() and - arg.op.startswith("zeroExt")): - sizes.add(arg.args[0].size) - continue - is_ok = False - break - if not is_ok: - return expr - if len(sizes) != 1: - return expr - size = list(sizes)[0] - args = [expr_s(arg[:size]) for arg in cond.args] - cond = ExprOp(cond.op, *args) - return ExprCond(cond, expr.src1, expr.src2) - - -def simp_cond_sign_bit(_, expr): - """(a & .. & 0x80000000) ? A:B => (a & ...) (a == b)?Y:X - (a^b)?X:Y => (a == b)?Y:X - """ - cond = expr.cond - if not cond.is_op(): - return expr - if cond.op not in ['+', '^']: - return expr - if len(cond.args) != 2: - return expr - arg1, arg2 = cond.args - if cond.is_op('+'): - new_cond = ExprOp('==', arg1, expr_s(-arg2)) - elif cond.is_op('^'): - new_cond = ExprOp('==', arg1, arg2) - else: - raise ValueError('Bad case') - return ExprCond(new_cond, expr.src2, expr.src1) - - -def simp_cond_eq_1_0(expr_s, expr): - """ - (a == b)?ExprInt(1, 1):ExprInt(0, 1) => a == b - (a a == b - ... - """ - cond = expr.cond - if not cond.is_op(): - return expr - if cond.op not in [ - TOK_EQUAL, - TOK_INF_SIGNED, TOK_INF_EQUAL_SIGNED, - TOK_INF_UNSIGNED, TOK_INF_EQUAL_UNSIGNED - ]: - return expr - if expr.src1 != ExprInt(1, 1) or expr.src2 != ExprInt(0, 1): - return expr - return cond - - -def simp_cond_inf_eq_unsigned_zero(expr_s, expr): - """ - (a <=u 0) => a == 0 - """ - if not expr.is_op(TOK_INF_EQUAL_UNSIGNED): - return expr - if not expr.args[1].is_int(0): - return expr - return ExprOp(TOK_EQUAL, expr.args[0], expr.args[1]) - - -def simp_test_signext_inf(expr_s, expr): - """A.signExt() A = (1 << (base.size - 1)) or - tmp < -(1 << (base.size - 1)) ): - return ExprInt(1, 1) - return expr - - -def simp_test_zeroext_inf(expr_s, expr): - """A.zeroExt() A = (1 << base.size): - return ExprInt(1, 1) - return expr - - -def simp_add_multiple(_, expr): - """ - X + X => 2 * X - X + X * int1 => X * (1 + int1) - X * int1 + (- X) => X * (int1 - 1) - X + (X << int1) => X * (1 + 2 ** int1) - Correct even if addition overflow/underflow - """ - if not expr.is_op('+'): - return expr - - # Extract each argument and its counter - operands = {} - for arg in expr.args: - if arg.is_op('*') and arg.args[1].is_int(): - base_expr, factor = arg.args - operands[base_expr] = operands.get(base_expr, 0) + int(factor) - elif arg.is_op('<<') and arg.args[1].is_int(): - base_expr, factor = arg.args - operands[base_expr] = operands.get(base_expr, 0) + 2 ** int(factor) - elif arg.is_op("-"): - arg = arg.args[0] - if arg.is_op('<<') and arg.args[1].is_int(): - base_expr, factor = arg.args - operands[base_expr] = operands.get(base_expr, 0) - (2 ** int(factor)) - else: - operands[arg] = operands.get(arg, 0) - 1 - else: - operands[arg] = operands.get(arg, 0) + 1 - out = [] - - # Best effort to factor common args: - # (a + b) * 3 + a + b => (a + b) * 4 - # Does not factor: - # (a + b) * 3 + 2 * a + b => (a + b) * 4 + a - modified = True - while modified: - modified = False - for arg, count in list(viewitems(operands)): - if not arg.is_op('+'): - continue - components = arg.args - if not all(component in operands for component in components): - continue - counters = set(operands[component] for component in components) - if len(counters) != 1: - continue - counter = counters.pop() - for component in components: - del operands[component] - operands[arg] += counter - modified = True - break - - for arg, count in viewitems(operands): - if count == 0: - continue - if count == 1: - out.append(arg) - continue - out.append(arg * ExprInt(count, expr.size)) - - if len(out) == len(expr.args): - # No reductions - return expr - if not out: - return ExprInt(0, expr.size) - if len(out) == 1: - return out[0] - return ExprOp('+', *out) diff --git a/miasm2/expression/simplifications_cond.py b/miasm2/expression/simplifications_cond.py deleted file mode 100644 index f1c224b7..00000000 --- a/miasm2/expression/simplifications_cond.py +++ /dev/null @@ -1,178 +0,0 @@ -################################################################################ -# -# By choice, Miasm2 does not handle comparison as a single operation, but with -# operations corresponding to comparison computation. -# One may want to detect those comparison; this library is designed to add them -# in Miasm2 engine thanks to : -# - Conditions computation in ExprOp -# - Simplifications to catch known condition forms -# -# Conditions currently supported : -# ="): - op_cf, = args - return ~op_cf - - elif expr.is_op("CC_S<"): - op_nf, op_of = args - return op_nf ^ op_of - - elif expr.is_op("CC_S>"): - op_nf, op_of, op_zf = args - return ~(op_zf | (op_nf ^ op_of)) - - elif expr.is_op("CC_S<="): - op_nf, op_of, op_zf = args - return op_zf | (op_nf ^ op_of) - - elif expr.is_op("CC_S>="): - op_nf, op_of = args - return ~(op_nf ^ op_of) - - elif expr.is_op("CC_U>"): - op_cf, op_zf = args - return ~(op_cf | op_zf) - - elif expr.is_op("CC_U<"): - op_cf, = args - return op_cf - - elif expr.is_op("CC_NEG"): - op_nf, = args - return op_nf - - elif expr.is_op("CC_EQ"): - op_zf, = args - return op_zf - - elif expr.is_op("CC_NE"): - op_zf, = args - return ~op_zf - - elif expr.is_op("CC_POS"): - op_nf, = args - return ~op_nf - - return expr - diff --git a/miasm2/expression/smt2_helper.py b/miasm2/expression/smt2_helper.py deleted file mode 100644 index 53d323e8..00000000 --- a/miasm2/expression/smt2_helper.py +++ /dev/null @@ -1,296 +0,0 @@ -# Helper functions for the generation of SMT2 expressions -# The SMT2 expressions will be returned as a string. -# The expressions are divided as follows -# -# - generic SMT2 operations -# - definitions of SMT2 structures -# - bit vector operations -# - array operations - -# generic SMT2 operations - -def smt2_eq(a, b): - """ - Assignment: a = b - """ - return "(= {} {})".format(a, b) - - -def smt2_implies(a, b): - """ - Implication: a => b - """ - return "(=> {} {})".format(a, b) - - -def smt2_and(*args): - """ - Conjunction: a and b and c ... - """ - # transform args into strings - args = [str(arg) for arg in args] - return "(and {})".format(' '.join(args)) - - -def smt2_or(*args): - """ - Disjunction: a or b or c ... - """ - # transform args into strings - args = [str(arg) for arg in args] - return "(or {})".format(' '.join(args)) - - -def smt2_ite(cond, a, b): - """ - If-then-else: cond ? a : b - """ - return "(ite {} {} {})".format(cond, a, b) - - -def smt2_distinct(*args): - """ - Distinction: a != b != c != ... - """ - # transform args into strings - args = [str(arg) for arg in args] - return "(distinct {})".format(' '.join(args)) - - -def smt2_assert(expr): - """ - Assertion that @expr holds - """ - return "(assert {})".format(expr) - - -# definitions - -def declare_bv(bv, size): - """ - Declares an bit vector @bv of size @size - """ - return "(declare-fun {} () {})".format(bv, bit_vec(size)) - - -def declare_array(a, bv1, bv2): - """ - Declares an SMT2 array represented as a map - from a bit vector to another bit vector. - :param a: array name - :param bv1: SMT2 bit vector - :param bv2: SMT2 bit vector - """ - return "(declare-fun {} () (Array {} {}))".format(a, bv1, bv2) - - -def bit_vec_val(v, size): - """ - Declares a bit vector value - :param v: int, value of the bit vector - :param size: size of the bit vector - """ - return "(_ bv{} {})".format(v, size) - - -def bit_vec(size): - """ - Returns a bit vector of size @size - """ - return "(_ BitVec {})".format(size) - - -# bit vector operations - -def bvadd(a, b): - """ - Addition: a + b - """ - return "(bvadd {} {})".format(a, b) - - -def bvsub(a, b): - """ - Subtraction: a - b - """ - return "(bvsub {} {})".format(a, b) - - -def bvmul(a, b): - """ - Multiplication: a * b - """ - return "(bvmul {} {})".format(a, b) - - -def bvand(a, b): - """ - Bitwise AND: a & b - """ - return "(bvand {} {})".format(a, b) - - -def bvor(a, b): - """ - Bitwise OR: a | b - """ - return "(bvor {} {})".format(a, b) - - -def bvxor(a, b): - """ - Bitwise XOR: a ^ b - """ - return "(bvxor {} {})".format(a, b) - - -def bvneg(bv): - """ - Unary minus: - bv - """ - return "(bvneg {})".format(bv) - - -def bvsdiv(a, b): - """ - Signed division: a / b - """ - return "(bvsdiv {} {})".format(a, b) - - -def bvudiv(a, b): - """ - Unsigned division: a / b - """ - return "(bvudiv {} {})".format(a, b) - - -def bvsmod(a, b): - """ - Signed modulo: a mod b - """ - return "(bvsmod {} {})".format(a, b) - - -def bvurem(a, b): - """ - Unsigned modulo: a mod b - """ - return "(bvurem {} {})".format(a, b) - - -def bvshl(a, b): - """ - Shift left: a << b - """ - return "(bvshl {} {})".format(a, b) - - -def bvlshr(a, b): - """ - Logical shift right: a >> b - """ - return "(bvlshr {} {})".format(a, b) - - -def bvashr(a, b): - """ - Arithmetic shift right: a a>> b - """ - return "(bvashr {} {})".format(a, b) - - -def bv_rotate_left(a, b, size): - """ - Rotates bits of a to the left b times: a <<< b - - Since ((_ rotate_left b) a) does not support - symbolic values for b, the implementation is - based on a C implementation. - - Therefore, the rotation will be computed as - a << (b & (size - 1))) | (a >> (size - (b & (size - 1)))) - - :param a: bit vector - :param b: bit vector - :param size: size of a - """ - - # define constant - s = bit_vec_val(size, size) - - # shift = b & (size - 1) - shift = bvand(b, bvsub(s, bit_vec_val(1, size))) - - # (a << shift) | (a >> size - shift) - rotate = bvor(bvshl(a, shift), - bvlshr(a, bvsub(s, shift))) - - return rotate - - -def bv_rotate_right(a, b, size): - """ - Rotates bits of a to the right b times: a >>> b - - Since ((_ rotate_right b) a) does not support - symbolic values for b, the implementation is - based on a C implementation. - - Therefore, the rotation will be computed as - a >> (b & (size - 1))) | (a << (size - (b & (size - 1)))) - - :param a: bit vector - :param b: bit vector - :param size: size of a - """ - - # define constant - s = bit_vec_val(size, size) - - # shift = b & (size - 1) - shift = bvand(b, bvsub(s, bit_vec_val(1, size))) - - # (a >> shift) | (a << size - shift) - rotate = bvor(bvlshr(a, shift), - bvshl(a, bvsub(s, shift))) - - return rotate - - -def bv_extract(high, low, bv): - """ - Extracts bits from a bit vector - :param high: end bit - :param low: start bit - :param bv: bit vector - """ - return "((_ extract {} {}) {})".format(high, low, bv) - - -def bv_concat(a, b): - """ - Concatenation of two SMT2 expressions - """ - return "(concat {} {})".format(a, b) - - -# array operations - -def array_select(array, index): - """ - Reads from an SMT2 array at index @index - :param array: SMT2 array - :param index: SMT2 expression, index of the array - """ - return "(select {} {})".format(array, index) - - -def array_store(array, index, value): - """ - Writes an value into an SMT2 array at address @index - :param array: SMT array - :param index: SMT2 expression, index of the array - :param value: SMT2 expression, value to write - """ - return "(store {} {} {})".format(array, index, value) diff --git a/miasm2/ir/__init__.py b/miasm2/ir/__init__.py deleted file mode 100644 index 0627b488..00000000 --- a/miasm2/ir/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"Intermediate representation methods" diff --git a/miasm2/ir/analysis.py b/miasm2/ir/analysis.py deleted file mode 100644 index 9158aceb..00000000 --- a/miasm2/ir/analysis.py +++ /dev/null @@ -1,113 +0,0 @@ -#-*- coding:utf-8 -*- - -import warnings -import logging - -from miasm2.ir.ir import IntermediateRepresentation, AssignBlock -from miasm2.expression.expression import ExprOp, ExprAssign -from miasm2.analysis.data_flow import dead_simp as new_dead_simp_imp - - -log = logging.getLogger("analysis") -console_handler = logging.StreamHandler() -console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) -log.addHandler(console_handler) -log.setLevel(logging.WARNING) - - -class ira(IntermediateRepresentation): - """IR Analysis - This class provides higher level manipulations on IR, such as dead - instruction removals. - - This class can be used as a common parent with - `miasm2.ir.ir::IntermediateRepresentation` class. - - For instance: - class ira_x86_16(ir_x86_16, ira) - - """ - ret_reg = None - - def call_effects(self, addr, instr): - """Default modelisation of a function call to @addr. This may be used to: - - * insert dependencies to arguments (stack base, registers, ...) - * add some side effects (stack clean, return value, ...) - - Return a couple: - * list of assignments to add to the current irblock - * list of additional irblocks - - @addr: (Expr) address of the called function - @instr: native instruction which is responsible of the call - """ - - call_assignblk = AssignBlock( - [ - ExprAssign(self.ret_reg, ExprOp('call_func_ret', addr, self.sp)), - ExprAssign(self.sp, ExprOp('call_func_stack', addr, self.sp)) - ], - instr - ) - return [call_assignblk], [] - - def add_instr_to_current_state(self, instr, block, assignments, ir_blocks_all, gen_pc_updt): - """ - Add the IR effects of an instruction to the current state. - If the instruction is a function call, replace the original IR by a - model of the sub function - - Returns a bool: - * True if the current assignments list must be split - * False in other cases. - - @instr: native instruction - @block: native block source - @assignments: current irbloc - @ir_blocks_all: list of additional effects - @gen_pc_updt: insert PC update effects between instructions - """ - if instr.is_subcall(): - call_assignblks, extra_irblocks = self.call_effects( - instr.args[0], - instr - ) - assignments += call_assignblks - ir_blocks_all += extra_irblocks - return True - - if gen_pc_updt is not False: - self.gen_pc_update(assignments, instr) - - assignblk, ir_blocks_extra = self.instr2ir(instr) - assignments.append(assignblk) - ir_blocks_all += ir_blocks_extra - if ir_blocks_extra: - return True - return False - - def sizeof_char(self): - "Return the size of a char in bits" - raise NotImplementedError("Abstract method") - - def sizeof_short(self): - "Return the size of a short in bits" - raise NotImplementedError("Abstract method") - - def sizeof_int(self): - "Return the size of an int in bits" - raise NotImplementedError("Abstract method") - - def sizeof_long(self): - "Return the size of a long in bits" - raise NotImplementedError("Abstract method") - - def sizeof_pointer(self): - "Return the size of a void* in bits" - raise NotImplementedError("Abstract method") - - def dead_simp(self, ircfg): - """Deprecated: See miasm2.analysis.data_flow.dead_simp()""" - warnings.warn('DEPRECATION WARNING: Please use miasm2.analysis.data_flow.dead_simp(ira) instead of ira.dead_simp()') - new_dead_simp_imp(self, ircfg) diff --git a/miasm2/ir/ir.py b/miasm2/ir/ir.py deleted file mode 100644 index 82b12dcd..00000000 --- a/miasm2/ir/ir.py +++ /dev/null @@ -1,929 +0,0 @@ -#-*- coding:utf-8 -*- - -# -# Copyright (C) 2013 Fabrice Desclaux -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# -from builtins import zip -import warnings - -from itertools import chain -from future.utils import viewvalues, viewitems - -import miasm2.expression.expression as m2_expr -from miasm2.expression.expression_helper import get_missing_interval -from miasm2.core.asmblock import AsmBlock, AsmConstraint -from miasm2.core.graph import DiGraph -from functools import reduce - - -def _expr_loc_to_symb(expr, loc_db): - if not expr.is_loc(): - return expr - if loc_db is None: - name = str(expr) - else: - names = loc_db.get_location_names(expr.loc_key) - if not names: - name = loc_db.pretty_str(expr.loc_key) - else: - # Use only one name for readability - name = sorted(names)[0] - return m2_expr.ExprId(name, expr.size) - -class AssignBlock(object): - """Represent parallel IR assignment, such as: - EAX = EBX - EBX = EAX - - -> Exchange between EBX and EAX - - AssignBlock can be seen as a dictionary where keys are the destinations - (ExprId or ExprMem), and values their corresponding sources. - - Also provides common manipulation on this assignments. - - """ - __slots__ = ["_assigns", "_instr"] - - def __init__(self, irs=None, instr=None): - """Create a new AssignBlock - @irs: (optional) sequence of ExprAssign, or dictionary dst (Expr) -> src - (Expr) - @instr: (optional) associate an instruction with this AssignBlock - - """ - if irs is None: - irs = [] - self._instr = instr - self._assigns = {} # ExprAssign.dst -> ExprAssign.src - - # Concurrent assignments are handled in _set - if hasattr(irs, "items"): - for dst, src in viewitems(irs): - self._set(dst, src) - else: - for expraff in irs: - self._set(expraff.dst, expraff.src) - - @property - def instr(self): - """Return the associated instruction, if any""" - return self._instr - - def _set(self, dst, src): - """ - Special cases: - * if dst is an ExprSlice, expand it to assign the full Expression - * if dst already known, sources are merged - """ - if dst.size != src.size: - raise RuntimeError( - "sanitycheck: args must have same size! %s" % - ([(str(arg), arg.size) for arg in [dst, src]])) - - if isinstance(dst, m2_expr.ExprSlice): - # Complete the source with missing slice parts - new_dst = dst.arg - rest = [(m2_expr.ExprSlice(dst.arg, r[0], r[1]), r[0], r[1]) - for r in dst.slice_rest()] - all_a = [(src, dst.start, dst.stop)] + rest - all_a.sort(key=lambda x: x[1]) - args = [expr for (expr, _, _) in all_a] - new_src = m2_expr.ExprCompose(*args) - else: - new_dst, new_src = dst, src - - if new_dst in self._assigns and isinstance(new_src, m2_expr.ExprCompose): - if not isinstance(self[new_dst], m2_expr.ExprCompose): - # prev_RAX = 0x1122334455667788 - # input_RAX[0:8] = 0x89 - # final_RAX -> ? (assignment are in parallel) - raise RuntimeError("Concurrent access on same bit not allowed") - - # Consider slice grouping - expr_list = [(new_dst, new_src), - (new_dst, self[new_dst])] - # Find collision - e_colision = reduce(lambda x, y: x.union(y), - (self.get_modified_slice(dst, src) - for (dst, src) in expr_list), - set()) - - # Sort interval collision - known_intervals = sorted([(x[1], x[2]) for x in e_colision]) - - for i, (_, stop) in enumerate(known_intervals[:-1]): - if stop > known_intervals[i + 1][0]: - raise RuntimeError( - "Concurrent access on same bit not allowed") - - # Fill with missing data - missing_i = get_missing_interval(known_intervals, 0, new_dst.size) - remaining = ((m2_expr.ExprSlice(new_dst, *interval), - interval[0], - interval[1]) - for interval in missing_i) - - # Build the merging expression - args = list(e_colision.union(remaining)) - args.sort(key=lambda x: x[1]) - starts = [start for (_, start, _) in args] - assert len(set(starts)) == len(starts) - args = [expr for (expr, _, _) in args] - new_src = m2_expr.ExprCompose(*args) - - # Sanity check - if not isinstance(new_dst, (m2_expr.ExprId, m2_expr.ExprMem)): - raise TypeError("Destination cannot be a %s" % type(new_dst)) - - self._assigns[new_dst] = new_src - - def __setitem__(self, dst, src): - raise RuntimeError('AssignBlock is immutable') - - def __getitem__(self, key): - return self._assigns[key] - - def __contains__(self, key): - return key in self._assigns - - def iteritems(self): - for dst, src in viewitems(self._assigns): - yield dst, src - - def items(self): - return [(dst, src) for dst, src in viewitems(self._assigns)] - - def itervalues(self): - for src in viewvalues(self._assigns): - yield src - - def keys(self): - return list(self._assigns) - - def values(self): - return list(viewvalues(self._assigns)) - - def __iter__(self): - for dst in self._assigns: - yield dst - - def __delitem__(self, _): - raise RuntimeError('AssignBlock is immutable') - - def update(self, _): - raise RuntimeError('AssignBlock is immutable') - - def __eq__(self, other): - if set(self.keys()) != set(other.keys()): - return False - return all(other[dst] == src for dst, src in viewitems(self)) - - def __ne__(self, other): - return not self == other - - def __len__(self): - return len(self._assigns) - - def get(self, key, default): - return self._assigns.get(key, default) - - @staticmethod - def get_modified_slice(dst, src): - """Return an Expr list of extra expressions needed during the - object instantiation""" - if not isinstance(src, m2_expr.ExprCompose): - raise ValueError("Get mod slice not on expraff slice", str(src)) - modified_s = [] - for index, arg in src.iter_args(): - if not (isinstance(arg, m2_expr.ExprSlice) and - arg.arg == dst and - index == arg.start and - index+arg.size == arg.stop): - # If x is not the initial expression - modified_s.append((arg, index, index+arg.size)) - return modified_s - - def get_w(self): - """Return a set of elements written""" - return set(self.keys()) - - def get_rw(self, mem_read=False, cst_read=False): - """Return a dictionary associating written expressions to a set of - their read requirements - @mem_read: (optional) mem_read argument of `get_r` - @cst_read: (optional) cst_read argument of `get_r` - """ - out = {} - for dst, src in viewitems(self): - src_read = src.get_r(mem_read=mem_read, cst_read=cst_read) - if isinstance(dst, m2_expr.ExprMem) and mem_read: - # Read on destination happens only with ExprMem - src_read.update(dst.ptr.get_r(mem_read=mem_read, - cst_read=cst_read)) - out[dst] = src_read - return out - - def get_r(self, mem_read=False, cst_read=False): - """Return a set of elements reads - @mem_read: (optional) mem_read argument of `get_r` - @cst_read: (optional) cst_read argument of `get_r` - """ - return set( - chain.from_iterable( - viewvalues( - self.get_rw( - mem_read=mem_read, - cst_read=cst_read - ) - ) - ) - ) - - def __str__(self): - out = [] - for dst, src in sorted(viewitems(self._assigns)): - out.append("%s = %s" % (dst, src)) - return "\n".join(out) - - def dst2ExprAssign(self, dst): - """Return an ExprAssign corresponding to @dst equation - @dst: Expr instance""" - return m2_expr.ExprAssign(dst, self[dst]) - - def simplify(self, simplifier): - """ - Return a new AssignBlock with expression simplified - - @simplifier: ExpressionSimplifier instance - """ - new_assignblk = {} - for dst, src in viewitems(self): - if dst == src: - continue - new_src = simplifier(src) - new_dst = simplifier(dst) - new_assignblk[new_dst] = new_src - return AssignBlock(irs=new_assignblk, instr=self.instr) - - def to_string(self, loc_db=None): - out = [] - for dst, src in viewitems(self): - new_src = src.visit(lambda expr:_expr_loc_to_symb(expr, loc_db)) - new_dst = dst.visit(lambda expr:_expr_loc_to_symb(expr, loc_db)) - line = "%s = %s" % (new_dst, new_src) - out.append(line) - out.append("") - return "\n".join(out) - -class IRBlock(object): - """Intermediate representation block object. - - Stand for an intermediate representation basic block. - """ - - __slots__ = ["_loc_key", "_assignblks", "_dst", "_dst_linenb"] - - def __init__(self, loc_key, assignblks): - """ - @loc_key: LocKey of the IR basic block - @assignblks: list of AssignBlock - """ - - assert isinstance(loc_key, m2_expr.LocKey) - self._loc_key = loc_key - for assignblk in assignblks: - assert isinstance(assignblk, AssignBlock) - self._assignblks = tuple(assignblks) - self._dst = None - self._dst_linenb = None - - def __eq__(self, other): - if self.__class__ is not other.__class__: - return False - if self.loc_key != other.loc_key: - return False - if len(self.assignblks) != len(other.assignblks): - return False - for assignblk1, assignblk2 in zip(self.assignblks, other.assignblks): - if assignblk1 != assignblk2: - return False - return True - - def __ne__(self, other): - return not self == other - - def get_label(self): - warnings.warn('DEPRECATION WARNING: use ".loc_key" instead of ".label"') - return self.loc_key - - loc_key = property(lambda self:self._loc_key) - label = property(get_label) - - @property - def assignblks(self): - return self._assignblks - - @property - def irs(self): - warnings.warn('DEPRECATION WARNING: use "irblock.assignblks" instead of "irblock.irs"') - return self._assignblks - - def __iter__(self): - """Iterate on assignblks""" - return self._assignblks.__iter__() - - def __getitem__(self, index): - """Getitem on assignblks""" - return self._assignblks.__getitem__(index) - - def __len__(self): - """Length of assignblks""" - return self._assignblks.__len__() - - def is_dst_set(self): - return self._dst is not None - - def cache_dst(self): - final_dst = None - final_linenb = None - for linenb, assignblk in enumerate(self): - for dst, src in viewitems(assignblk): - if dst.is_id("IRDst"): - if final_dst is not None: - raise ValueError('Multiple destinations!') - final_dst = src - final_linenb = linenb - self._dst = final_dst - self._dst_linenb = final_linenb - return final_dst - - @property - def dst(self): - """Return the value of IRDst for the IRBlock""" - if self.is_dst_set(): - return self._dst - return self.cache_dst() - - def set_dst(self, value): - """Generate a new IRBlock with a dst (IRBlock) fixed to @value""" - irs = [] - dst_found = False - for assignblk in self: - new_assignblk = {} - for dst, src in viewitems(assignblk): - if dst.is_id("IRDst"): - assert dst_found is False - dst_found = True - new_assignblk[dst] = value - else: - new_assignblk[dst] = src - irs.append(AssignBlock(new_assignblk, assignblk.instr)) - return IRBlock(self.loc_key, irs) - - @property - def dst_linenb(self): - """Line number of the IRDst setting statement in the current irs""" - if not self.is_dst_set(): - self.cache_dst() - return self._dst_linenb - - def __str__(self): - out = [] - out.append(str(self.loc_key)) - for assignblk in self: - for dst, src in viewitems(assignblk): - out.append('\t%s = %s' % (dst, src)) - out.append("") - return "\n".join(out) - - - def modify_exprs(self, mod_dst=None, mod_src=None): - """ - Generate a new IRBlock with its AssignBlock expressions modified - according to @mod_dst and @mod_src - @mod_dst: function called to modify Expression destination - @mod_src: function called to modify Expression source - """ - - if mod_dst is None: - mod_dst = lambda expr:expr - if mod_src is None: - mod_src = lambda expr:expr - - assignblks = [] - for assignblk in self: - new_assignblk = {} - for dst, src in viewitems(assignblk): - new_assignblk[mod_dst(dst)] = mod_src(src) - assignblks.append(AssignBlock(new_assignblk, assignblk.instr)) - return IRBlock(self.loc_key, assignblks) - - def to_string(self, loc_db=None): - out = [] - if loc_db is None: - node_name = "%s:" % self.loc_key - else: - names = loc_db.get_location_names(self.loc_key) - if not names: - node_name = "%s:" % loc_db.pretty_str(self.loc_key) - else: - node_name = "".join("%s:\n" % name for name in names) - out.append(node_name) - - for assignblk in self: - out.append(assignblk.to_string(loc_db)) - return '\n'.join(out) - - - def simplify(self, simplifier): - """ - Simplify expressions in each assignblock - @simplifier: ExpressionSimplifier instance - """ - modified = False - assignblks = [] - for assignblk in self: - new_assignblk = assignblk.simplify(simplifier) - if assignblk != new_assignblk: - modified = True - assignblks.append(new_assignblk) - return modified, IRBlock(self.loc_key, assignblks) - - -class irbloc(IRBlock): - """ - DEPRECATED object - Use IRBlock instead of irbloc - """ - - def __init__(self, loc_key, irs, lines=None): - warnings.warn('DEPRECATION WARNING: use "IRBlock" instead of "irblock"') - super(irbloc, self).__init__(loc_key, irs) - - -class IRCFG(DiGraph): - - """DiGraph for IR instances""" - - def __init__(self, irdst, loc_db, blocks=None, *args, **kwargs): - """Instantiate a IRCFG - @loc_db: LocationDB instance - @blocks: IR blocks - """ - self.loc_db = loc_db - if blocks is None: - blocks = {} - self._blocks = blocks - self._irdst = irdst - super(IRCFG, self).__init__(*args, **kwargs) - - @property - def IRDst(self): - return self._irdst - - @property - def blocks(self): - return self._blocks - - def add_irblock(self, irblock): - """ - Add the @irblock to the current IRCFG - @irblock: IRBlock instance - """ - self.blocks[irblock.loc_key] = irblock - self.add_node(irblock.loc_key) - - for dst in self.dst_trackback(irblock): - if dst.is_int(): - dst_loc_key = self.loc_db.get_or_create_offset_location(int(dst)) - dst = m2_expr.ExprLoc(dst_loc_key, irblock.dst.size) - if dst.is_loc(): - self.add_uniq_edge(irblock.loc_key, dst.loc_key) - - def node2lines(self, node): - if self.loc_db is None: - node_name = str(node) - else: - node_name = self.loc_db.pretty_str(node) - yield self.DotCellDescription( - text="%s" % node_name, - attr={ - 'align': 'center', - 'colspan': 2, - 'bgcolor': 'grey', - } - ) - if node not in self._blocks: - yield [self.DotCellDescription(text="NOT PRESENT", attr={})] - return - for i, assignblk in enumerate(self._blocks[node]): - for dst, src in viewitems(assignblk): - - new_src = src.visit(lambda expr:_expr_loc_to_symb(expr, self.loc_db)) - new_dst = dst.visit(lambda expr:_expr_loc_to_symb(expr, self.loc_db)) - line = "%s = %s" % (new_dst, new_src) - if self._dot_offset: - yield [self.DotCellDescription(text="%-4d" % i, attr={}), - self.DotCellDescription(text=line, attr={})] - else: - yield self.DotCellDescription(text=line, attr={}) - yield self.DotCellDescription(text="", attr={}) - - def edge_attr(self, src, dst): - if src not in self._blocks or dst not in self._blocks: - return {} - src_irdst = self._blocks[src].dst - edge_color = "blue" - if isinstance(src_irdst, m2_expr.ExprCond): - src1, src2 = src_irdst.src1, src_irdst.src2 - if src1.is_loc(dst): - edge_color = "limegreen" - elif src2.is_loc(dst): - edge_color = "red" - return {"color": edge_color} - - def node_attr(self, node): - if node not in self._blocks: - return {'style': 'filled', 'fillcolor': 'red'} - return {} - - def dot(self, offset=False): - """ - @offset: (optional) if set, add the corresponding line number in each - node - """ - self._dot_offset = offset - return super(IRCFG, self).dot() - - def get_loc_key(self, addr): - """Transforms an ExprId/ExprInt/loc_key/int into a loc_key - @addr: an ExprId/ExprInt/loc_key/int""" - - if isinstance(addr, m2_expr.LocKey): - return addr - elif isinstance(addr, m2_expr.ExprLoc): - return addr.loc_key - - try: - addr = int(addr) - except (ValueError, TypeError): - return None - - return self.loc_db.get_offset_location(addr) - - - def get_or_create_loc_key(self, addr): - """Transforms an ExprId/ExprInt/loc_key/int into a loc_key - If the offset @addr is not in the LocationDB, create it - @addr: an ExprId/ExprInt/loc_key/int""" - - loc_key = self.get_loc_key(addr) - if loc_key is not None: - return loc_key - - return self.loc_db.add_location(offset=int(addr)) - - def get_block(self, addr): - """Returns the irbloc associated to an ExprId/ExprInt/loc_key/int - @addr: an ExprId/ExprInt/loc_key/int""" - - loc_key = self.get_loc_key(addr) - if loc_key is None: - return None - return self.blocks.get(loc_key, None) - - def getby_offset(self, offset): - """ - Return the set of loc_keys of irblocks containing @offset - @offset: address - """ - out = set() - for irb in viewvalues(self.blocks): - for assignblk in irb: - instr = assignblk.instr - if instr is None: - continue - if instr.offset <= offset < instr.offset + instr.l: - out.add(irb.loc_key) - return out - - - def simplify(self, simplifier): - """ - Simplify expressions in each irblocks - @simplifier: ExpressionSimplifier instance - """ - modified = False - for loc_key, block in list(viewitems(self.blocks)): - assignblks = [] - for assignblk in block: - new_assignblk = assignblk.simplify(simplifier) - if assignblk != new_assignblk: - modified = True - assignblks.append(new_assignblk) - self.blocks[loc_key] = IRBlock(loc_key, assignblks) - return modified - - def get_rw(self, regs_ids=None): - """ - Calls get_rw(irb) for each bloc - @regs_ids : ids of registers used in IR - """ - if regs_ids is None: - regs_ids = [] - for irblock in viewvalues(self.blocks): - irblock.get_rw(regs_ids) - - def _extract_dst(self, todo, done): - """ - Naive extraction of @todo destinations - WARNING: @todo and @done are modified - """ - out = set() - while todo: - dst = todo.pop() - if dst.is_loc(): - done.add(dst) - elif dst.is_mem() or dst.is_int(): - done.add(dst) - elif dst.is_cond(): - todo.add(dst.src1) - todo.add(dst.src2) - elif dst.is_id(): - out.add(dst) - else: - done.add(dst) - return out - - def dst_trackback(self, irb): - """ - Naive backtracking of IRDst - @irb: irbloc instance - """ - todo = set([irb.dst]) - done = set() - - for assignblk in reversed(irb): - if not todo: - break - out = self._extract_dst(todo, done) - found = set() - follow = set() - for dst in out: - if dst in assignblk: - follow.add(assignblk[dst]) - found.add(dst) - - follow.update(out.difference(found)) - todo = follow - - return done - - -class DiGraphIR(IRCFG): - """ - DEPRECATED object - Use IRCFG instead of DiGraphIR - """ - - def __init__(self, *args, **kwargs): - warnings.warn('DEPRECATION WARNING: use "IRCFG" instead of "DiGraphIR"') - raise NotImplementedError("Depreceated") - - -class IntermediateRepresentation(object): - """ - Intermediate representation object - - Allow native assembly to intermediate representation traduction - """ - - def __init__(self, arch, attrib, loc_db): - self.pc = arch.getpc(attrib) - self.sp = arch.getsp(attrib) - self.arch = arch - self.attrib = attrib - self.loc_db = loc_db - self.IRDst = None - - def get_ir(self, instr): - raise NotImplementedError("Abstract Method") - - def new_ircfg(self, *args, **kwargs): - """ - Return a new instance of IRCFG - """ - return IRCFG(self.IRDst, self.loc_db, *args, **kwargs) - - def new_ircfg_from_asmcfg(self, asmcfg, *args, **kwargs): - """ - Return a new instance of IRCFG from an @asmcfg - @asmcfg: AsmCFG instance - """ - - ircfg = IRCFG(self.IRDst, self.loc_db, *args, **kwargs) - for block in asmcfg.blocks: - self.add_asmblock_to_ircfg(block, ircfg) - return ircfg - - def instr2ir(self, instr): - ir_bloc_cur, extra_irblocks = self.get_ir(instr) - for index, irb in enumerate(extra_irblocks): - irs = [] - for assignblk in irb: - irs.append(AssignBlock(assignblk, instr)) - extra_irblocks[index] = IRBlock(irb.loc_key, irs) - assignblk = AssignBlock(ir_bloc_cur, instr) - return assignblk, extra_irblocks - - def add_instr_to_ircfg(self, instr, ircfg, loc_key=None, gen_pc_updt=False): - """ - Add the native instruction @instr to the @ircfg - @instr: instruction instance - @ircfg: IRCFG instance - @loc_key: loc_key instance of the instruction destination - @gen_pc_updt: insert PC update effects between instructions - """ - - if loc_key is None: - offset = getattr(instr, "offset", None) - loc_key = self.loc_db.add_location(offset=offset) - block = AsmBlock(loc_key) - block.lines = [instr] - self.add_asmblock_to_ircfg(block, ircfg, gen_pc_updt) - return loc_key - - def gen_pc_update(self, assignments, instr): - offset = m2_expr.ExprInt(instr.offset, self.pc.size) - assignments.append(AssignBlock({self.pc:offset}, instr)) - - def add_instr_to_current_state(self, instr, block, assignments, ir_blocks_all, gen_pc_updt): - """ - Add the IR effects of an instruction to the current state. - - Returns a bool: - * True if the current assignments list must be split - * False in other cases. - - @instr: native instruction - @block: native block source - @assignments: list of current AssignBlocks - @ir_blocks_all: list of additional effects - @gen_pc_updt: insert PC update effects between instructions - """ - if gen_pc_updt is not False: - self.gen_pc_update(assignments, instr) - - assignblk, ir_blocks_extra = self.instr2ir(instr) - assignments.append(assignblk) - ir_blocks_all += ir_blocks_extra - if ir_blocks_extra: - return True - return False - - def add_asmblock_to_ircfg(self, block, ircfg, gen_pc_updt=False): - """ - Add a native block to the current IR - @block: native assembly block - @ircfg: IRCFG instance - @gen_pc_updt: insert PC update effects between instructions - """ - - loc_key = block.loc_key - ir_blocks_all = [] - - assignments = [] - for instr in block.lines: - if loc_key is None: - assignments = [] - loc_key = self.get_loc_key_for_instr(instr) - split = self.add_instr_to_current_state( - instr, block, assignments, - ir_blocks_all, gen_pc_updt - ) - if split: - ir_blocks_all.append(IRBlock(loc_key, assignments)) - loc_key = None - assignments = [] - if loc_key is not None: - ir_blocks_all.append(IRBlock(loc_key, assignments)) - - new_ir_blocks_all = self.post_add_asmblock_to_ircfg(block, ircfg, ir_blocks_all) - for irblock in new_ir_blocks_all: - ircfg.add_irblock(irblock) - return new_ir_blocks_all - - def add_block(self, block, gen_pc_updt=False): - """ - DEPRECATED function - Use add_block instead of add_block - """ - warnings.warn("""DEPRECATION WARNING - ircfg is now out of IntermediateRepresentation - Use: - ircfg = ir_arch.new_ircfg() - ir_arch.add_asmblock_to_ircfg(block, ircfg) - """) - raise RuntimeError("API Deprecated") - - def add_bloc(self, block, gen_pc_updt=False): - """ - DEPRECATED function - Use add_block instead of add_block - """ - self.add_block(block, gen_pc_updt) - - def get_next_loc_key(self, instr): - loc_key = self.loc_db.get_or_create_offset_location(instr.offset + instr.l) - return loc_key - - def get_loc_key_for_instr(self, instr): - """Returns the loc_key associated to an instruction - @instr: current instruction""" - return self.loc_db.get_or_create_offset_location(instr.offset) - - def gen_loc_key_and_expr(self, size): - """ - Return a loc_key and it's corresponding ExprLoc - @size: size of expression - """ - loc_key = self.loc_db.add_location() - return loc_key, m2_expr.ExprLoc(loc_key, size) - - def expr_fix_regs_for_mode(self, expr, *args, **kwargs): - return expr - - def expraff_fix_regs_for_mode(self, expr, *args, **kwargs): - return expr - - def irbloc_fix_regs_for_mode(self, irblock, *args, **kwargs): - return irblock - - def is_pc_written(self, block): - """Return the first Assignblk of the @blockin which PC is written - @block: IRBlock instance""" - all_pc = viewvalues(self.arch.pc) - for assignblk in block: - if assignblk.dst in all_pc: - return assignblk - return None - - def set_empty_dst_to_next(self, block, ir_blocks): - for index, irblock in enumerate(ir_blocks): - if irblock.dst is not None: - continue - next_loc_key = block.get_next() - if next_loc_key is None: - loc_key = None - if block.lines: - line = block.lines[-1] - if line.offset is not None: - loc_key = self.loc_db.get_or_create_offset_location(line.offset + line.l) - if loc_key is None: - loc_key = self.loc_db.add_location() - block.add_cst(loc_key, AsmConstraint.c_next) - else: - loc_key = next_loc_key - dst = m2_expr.ExprLoc(loc_key, self.pc.size) - if irblock.assignblks: - instr = irblock.assignblks[-1].instr - else: - instr = None - assignblk = AssignBlock({self.IRDst: dst}, instr) - ir_blocks[index] = IRBlock(irblock.loc_key, list(irblock.assignblks) + [assignblk]) - - def post_add_asmblock_to_ircfg(self, block, ircfg, ir_blocks): - self.set_empty_dst_to_next(block, ir_blocks) - - new_irblocks = [] - for irblock in ir_blocks: - new_irblock = self.irbloc_fix_regs_for_mode(irblock, self.attrib) - ircfg.add_irblock(new_irblock) - new_irblocks.append(new_irblock) - return new_irblocks - - -class ir(IntermediateRepresentation): - """ - DEPRECATED object - Use IntermediateRepresentation instead of ir - """ - - def __init__(self, loc_key, irs, lines=None): - warnings.warn('DEPRECATION WARNING: use "IntermediateRepresentation" instead of "ir"') - super(ir, self).__init__(loc_key, irs, lines) diff --git a/miasm2/ir/symbexec.py b/miasm2/ir/symbexec.py deleted file mode 100644 index b945e85c..00000000 --- a/miasm2/ir/symbexec.py +++ /dev/null @@ -1,1124 +0,0 @@ -from __future__ import print_function -from builtins import range -import logging -from collections import MutableMapping - -from future.utils import viewitems - -from miasm2.expression.expression import ExprOp, ExprId, ExprLoc, ExprInt, \ - ExprMem, ExprCompose, ExprSlice, ExprCond -from miasm2.expression.simplifications import expr_simp_explicit -from miasm2.ir.ir import AssignBlock - -log = logging.getLogger("symbexec") -console_handler = logging.StreamHandler() -console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) -log.addHandler(console_handler) -log.setLevel(logging.INFO) - - -def get_block(ir_arch, ircfg, mdis, addr): - """Get IRBlock at address @addr""" - loc_key = ircfg.get_or_create_loc_key(addr) - if not loc_key in ircfg.blocks: - offset = mdis.loc_db.get_location_offset(loc_key) - block = mdis.dis_block(offset) - ir_arch.add_asmblock_to_ircfg(block, ircfg) - irblock = ircfg.get_block(loc_key) - if irblock is None: - raise LookupError('No block found at that address: %s' % ir_arch.loc_db.pretty_str(loc_key)) - return irblock - - -class StateEngine(object): - """Stores an Engine state""" - - def merge(self, other): - """Generate a new state, representing the merge of self and @other - @other: a StateEngine instance""" - - raise NotImplementedError("Abstract method") - - -class SymbolicState(StateEngine): - """Stores a SymbolicExecutionEngine state""" - - def __init__(self, dct): - self._symbols = frozenset(viewitems(dct)) - - def __hash__(self): - return hash((self.__class__, self._symbols)) - - def __eq__(self, other): - if self is other: - return True - if self.__class__ != other.__class__: - return False - return self.symbols == other.symbols - - def __ne__(self, other): - return not self == other - - def __iter__(self): - for dst, src in self._symbols: - yield dst, src - - def iteritems(self): - """Iterate on stored memory/values""" - return self.__iter__() - - def merge(self, other): - """Merge two symbolic states - Only equal expressions are kept in both states - @other: second symbolic state - """ - - symb_a = self.symbols - symb_b = other.symbols - intersection = set(symb_a).intersection(set(symb_b)) - out = {} - for dst in intersection: - if symb_a[dst] == symb_b[dst]: - out[dst] = symb_a[dst] - return self.__class__(out) - - @property - def symbols(self): - """Return the dictionary of known symbols""" - return dict(self._symbols) - - -INTERNAL_INTBASE_NAME = "__INTERNAL_INTBASE__" - - -def get_expr_base_offset(expr): - """Return a couple representing the symbolic/concrete part of an addition - expression. - - If there is no symbolic part, ExprId(INTERNAL_INTBASE_NAME) is used - If there is not concrete part, 0 is used - @expr: Expression instance - - """ - if expr.is_int(): - internal_intbase = ExprId(INTERNAL_INTBASE_NAME, expr.size) - return internal_intbase, int(expr) - - if not expr.is_op('+'): - return expr, 0 - if expr.args[-1].is_int(): - args, offset = expr.args[:-1], int(expr.args[-1]) - if len(args) == 1: - return args[0], offset - return ExprOp('+', *args), offset - return expr, 0 - - -class MemArray(MutableMapping): - """Link between base and its (offset, Expr) - - Given an expression (say *base*), this structure will store every memory - content relatively to an integer offset from *base*. - - The value associated to a given offset is a description of the slice of a - stored expression. The slice size depends on the configutation of the - MemArray. For example, for a slice size of 8 bits, the assignment: - - @32[EAX+0x10] = EBX - - will store for the base EAX: - - 0x10: (EBX, 0) - - 0x11: (EBX, 1) - - 0x12: (EBX, 2) - - 0x13: (EBX, 3) - - If the *base* is EAX+EBX, this structure can store the following contents: - - @32[EAX+EBX] - - @8[EAX+EBX+0x100] - But not: - - @32[EAX+0x10] (which is stored in another MemArray based on EAX) - - @32[EAX+EBX+ECX] (which is stored in another MemArray based on - EAX+EBX+ECX) - - """ - - def __init__(self, base, expr_simp=expr_simp_explicit): - self._base = base - self.expr_simp = expr_simp - self._mask = int(base.mask) - self._offset_to_expr = {} - - @property - def base(self): - """Expression representing the symbolic base address""" - return self._base - - @property - def mask(self): - """Mask offset""" - return self._mask - - def __contains__(self, offset): - return offset in self._offset_to_expr - - def __getitem__(self, offset): - assert 0 <= offset <= self._mask - return self._offset_to_expr.__getitem__(offset) - - def __setitem__(self, offset, value): - raise RuntimeError("Use write api to update keys") - - def __delitem__(self, offset): - assert 0 <= offset <= self._mask - return self._offset_to_expr.__delitem__(offset) - - def __iter__(self): - for offset, _ in viewitems(self._offset_to_expr): - yield offset - - def __len__(self): - return len(self._offset_to_expr) - - def __repr__(self): - out = [] - out.append("Base: %s" % self.base) - for offset, (index, value) in sorted(viewitems(self._offset_to_expr)): - out.append("%16X %d %s" % (offset, index, value)) - return '\n'.join(out) - - def copy(self): - """Copy object instance""" - obj = MemArray(self.base, self.expr_simp) - obj._offset_to_expr = self._offset_to_expr.copy() - return obj - - @staticmethod - def offset_to_ptr(base, offset): - """ - Return an expression representing the @base + @offset - @base: symbolic base address - @offset: relative offset integer to the @base address - """ - if base.is_id(INTERNAL_INTBASE_NAME): - ptr = ExprInt(offset, base.size) - elif offset == 0: - ptr = base - else: - ptr = base + ExprInt(offset, base.size) - return ptr.canonize() - - def read(self, offset, size): - """ - Return memory at @offset with @size as an Expr list - @offset: integer (in bytes) - @size: integer (in bits), byte aligned - - Consider the following state: - - 0x10: (EBX, 0) - - 0x11: (EBX, 1) - - 0x12: (EBX, 2) - - 0x13: (EBX, 3) - - A read at 0x10 of 32 bits should return: EBX - """ - - assert size % 8 == 0 - # Parts is (Expr's offset, size, Expr) - parts = [] - for index in range(size // 8): - # Wrap read: - # @32[EAX+0xFFFFFFFF] is ok and will read at 0xFFFFFFFF, 0, 1, 2 - request_offset = (offset + index) & self._mask - if request_offset in self._offset_to_expr: - # Known memory portion - off, data = self._offset_to_expr[request_offset] - parts.append((off, 1, data)) - continue - - # Unknown memory portion - ptr = self.offset_to_ptr(self.base, request_offset) - data = ExprMem(ptr, 8) - parts.append((0, 1, data)) - - # Group similar datas - # XXX TODO: only little endian here - index = 0 - while index + 1 < len(parts): - off_a, size_a, data_a = parts[index] - off_b, size_b, data_b = parts[index+1] - if data_a == data_b and off_a + size_a == off_b: - # Read consecutive bytes of a variable - # [(0, 8, x), (1, 8, x)] => (0, 16, x) - parts[index:index+2] = [(off_a, size_a + size_b, data_a)] - continue - if data_a.is_int() and data_b.is_int(): - # Read integer parts - # [(0, 8, 0x11223344), (1, 8, 0x55667788)] => (0, 16, 0x7744) - int1 = self.expr_simp(data_a[off_a*8:(off_a+size_a)*8]) - int2 = self.expr_simp(data_b[off_b*8:(off_b+size_b)*8]) - assert int1.is_int() and int2.is_int() - int1, int2 = int(int1), int(int2) - result = ExprInt((int2 << (size_a * 8)) | int1, (size_a + size_b) * 8) - parts[index:index+2] = [(0, size_a + size_b, result)] - continue - if data_a.is_mem() and data_b.is_mem(): - # Read consecutive bytes of a memory variable - ptr_base_a, ptr_offset_a = get_expr_base_offset(data_a.ptr) - ptr_base_b, ptr_offset_b = get_expr_base_offset(data_b.ptr) - if ptr_base_a != ptr_base_b: - index += 1 - continue - if (ptr_offset_a + off_a + size_a) & self._mask == (ptr_offset_b + off_b) & self._mask: - assert size_a <= data_a.size // 8 - off_a - assert size_b <= data_b.size // 8 - off_b - # Successive comparable symbolic pointers - # [(0, 8, @8[ptr]), (0, 8, @8[ptr+1])] => (0, 16, @16[ptr]) - ptr = self.offset_to_ptr(ptr_base_a, (ptr_offset_a + off_a) & self._mask) - - data = ExprMem(ptr, (size_a + size_b) * 8) - parts[index:index+2] = [(0, size_a + size_b, data)] - - continue - - index += 1 - - # Slice datas - read_mem = [] - for off, bytesize, data in parts: - if data.size // 8 != bytesize: - data = data[off * 8: (off + bytesize) * 8] - read_mem.append(data) - - return read_mem - - def write(self, offset, expr): - """ - Write @expr at @offset - @offset: integer (in bytes) - @expr: Expr instance value - """ - assert expr.size % 8 == 0 - assert offset <= self._mask - for index in range(expr.size // 8): - # Wrap write: - # @32[EAX+0xFFFFFFFF] is ok and will write at 0xFFFFFFFF, 0, 1, 2 - request_offset = (offset + index) & self._mask - # XXX TODO: only little endian here - self._offset_to_expr[request_offset] = (index, expr) - - tmp = self.expr_simp(expr[index * 8: (index + 1) * 8]) - # Special case: Simplify slice of pointer (simplification is ok - # here, as we won't store the simplified expression) - if tmp.is_slice() and tmp.arg.is_mem() and tmp.start % 8 == 0: - new_ptr = self.expr_simp( - tmp.arg.ptr + ExprInt(tmp.start // 8, tmp.arg.ptr.size) - ) - tmp = ExprMem(new_ptr, tmp.stop - tmp.start) - # Test if write to original value - if tmp.is_mem(): - src_ptr, src_off = get_expr_base_offset(tmp.ptr) - if src_ptr == self.base and src_off == request_offset: - del self._offset_to_expr[request_offset] - - - def _get_variable_parts(self, index, known_offsets, forward=True): - """ - Find consecutive memory parts representing the same variable. The part - starts at offset known_offsets[@index] and search is in offset direction - determined by @forward - Return the number of consecutive parts of the same variable. - - @index: index of the memory offset in known_offsets - @known_offsets: sorted offsets - @forward: Search in offset growing direction if True, else in reverse - order - """ - - offset = known_offsets[index] - value_byte_index, value = self._offset_to_expr[offset] - assert value.size % 8 == 0 - - if forward: - start, end, step = value_byte_index + 1, value.size // 8, 1 - else: - start, end, step = value_byte_index - 1, -1, -1 - - partnum = 1 - for value_offset in range(start, end, step): - offset += step - # Check if next part is in known_offsets - next_index = index + step * partnum - if not 0 <= next_index < len(known_offsets): - break - - offset_next = known_offsets[next_index] - if offset_next != offset: - break - - # Check if next part is a part of the searched value - byte_index, value_next = self._offset_to_expr[offset_next] - if byte_index != value_offset: - break - if value != value_next: - break - partnum += 1 - - return partnum - - - def _build_value_at_offset(self, value, offset, start, length): - """ - Return a couple. The first element is the memory Expression representing - the value at @offset, the second is its value. The value is truncated - at byte @start with @length - - @value: Expression to truncate - @offset: offset in bytes of the variable (integer) - @start: value's byte offset (integer) - @length: length in bytes (integer) - """ - - ptr = self.offset_to_ptr(self.base, offset) - size = length * 8 - if start == 0 and size == value.size: - result = value - else: - result = self.expr_simp(value[start * 8: start * 8 + size]) - - return ExprMem(ptr, size), result - - - def memory(self): - """ - Iterate on stored memory/values - - The goal here is to group entities. - - Consider the following state: - EAX + 0x10 = (0, EDX) - EAX + 0x11 = (1, EDX) - EAX + 0x12 = (2, EDX) - EAX + 0x13 = (3, EDX) - - The function should return: - @32[EAX + 0x10] = EDX - """ - - if not self._offset_to_expr: - return - known_offsets = sorted(self._offset_to_expr) - index = 0 - # Test if the first element is the continuation of the last byte. If - # yes, merge and output it first. - min_int = 0 - max_int = (1 << self.base.size) - 1 - limit_index = len(known_offsets) - - first_element = None - # Special case where a variable spreads on max_int/min_int - if known_offsets[0] == min_int and known_offsets[-1] == max_int: - min_offset, max_offset = known_offsets[0], known_offsets[-1] - min_byte_index, min_value = self._offset_to_expr[min_offset] - max_byte_index, max_value = self._offset_to_expr[max_offset] - if min_value == max_value and max_byte_index + 1 == min_byte_index: - # Look for current variable start - partnum_before = self._get_variable_parts(len(known_offsets) - 1, known_offsets, False) - # Look for current variable end - partnum_after = self._get_variable_parts(0, known_offsets) - - partnum = partnum_before + partnum_after - offset = known_offsets[-partnum_before] - index_value, value = self._offset_to_expr[offset] - - mem, result = self._build_value_at_offset(value, offset, index_value, partnum) - first_element = mem, result - index = partnum_after - limit_index = len(known_offsets) - partnum_before - - # Special cases are done, walk and merge variables - while index < limit_index: - offset = known_offsets[index] - index_value, value = self._offset_to_expr[offset] - partnum = self._get_variable_parts(index, known_offsets) - mem, result = self._build_value_at_offset(value, offset, index_value, partnum) - yield mem, result - index += partnum - - if first_element is not None: - yield first_element - - def dump(self): - """Display MemArray content""" - for mem, value in self.memory(): - print("%s = %s" % (mem, value)) - - -class MemSparse(object): - """Link a symbolic memory pointer to its MemArray. - - For each symbolic memory object, this object will extract the memory pointer - *ptr*. It then splits *ptr* into a symbolic and an integer part. For - example, the memory @[ESP+4] will give ESP+4 for *ptr*. *ptr* is then split - into its base ESP and its offset 4. Each symbolic base address uses a - different MemArray. - - Example: - - @32[EAX+EBX] - - @8[EAX+EBX+0x100] - Will be stored in the same MemArray with a EAX+EBX base - - """ - - def __init__(self, addrsize, expr_simp=expr_simp_explicit): - """ - @addrsize: size (in bits) of the addresses manipulated by the MemSparse - @expr_simp: an ExpressionSimplifier instance - """ - self.addrsize = addrsize - self.expr_simp = expr_simp - self.base_to_memarray = {} - - def __contains__(self, expr): - """ - Return True if the whole @expr is present - For partial check, use 'contains_partial' - """ - if not expr.is_mem(): - return False - ptr = expr.ptr - base, offset = get_expr_base_offset(ptr) - memarray = self.base_to_memarray.get(base, None) - if memarray is None: - return False - for i in range(expr.size // 8): - if offset + i not in memarray: - return False - return True - - def contains_partial(self, expr): - """ - Return True if a part of @expr is present in memory - """ - if not expr.is_mem(): - return False - ptr = expr.ptr - base, offset = get_expr_base_offset(ptr) - memarray = self.base_to_memarray.get(base, None) - if memarray is None: - return False - for i in range(expr.size // 8): - if offset + i in memarray: - return True - return False - - def clear(self): - """Reset the current object content""" - self.base_to_memarray.clear() - - def copy(self): - """Copy the current object instance""" - base_to_memarray = {} - for base, memarray in viewitems(self.base_to_memarray): - base_to_memarray[base] = memarray.copy() - obj = MemSparse(self.addrsize, self.expr_simp) - obj.base_to_memarray = base_to_memarray - return obj - - def __delitem__(self, expr): - """ - Delete a value @expr *fully* present in memory - For partial delete, use delete_partial - """ - ptr = expr.ptr - base, offset = get_expr_base_offset(ptr) - memarray = self.base_to_memarray.get(base, None) - if memarray is None: - raise KeyError - # Check if whole entity is in the MemArray before deleting it - for i in range(expr.size // 8): - if (offset + i) & memarray.mask not in memarray: - raise KeyError - for i in range(expr.size // 8): - del memarray[(offset + i) & memarray.mask] - - def delete_partial(self, expr): - """ - Delete @expr from memory. Skip parts of @expr which are not present in - memory. - """ - ptr = expr.ptr - base, offset = get_expr_base_offset(ptr) - memarray = self.base_to_memarray.get(base, None) - if memarray is None: - raise KeyError - # Check if whole entity is in the MemArray before deleting it - for i in range(expr.size // 8): - real_offset = (offset + i) & memarray.mask - if real_offset in memarray: - del memarray[real_offset] - - def read(self, ptr, size): - """ - Return the value associated with the Expr at address @ptr - @ptr: Expr representing the memory address - @size: memory size (in bits), byte aligned - """ - assert size % 8 == 0 - base, offset = get_expr_base_offset(ptr) - memarray = self.base_to_memarray.get(base, None) - if memarray is not None: - mems = memarray.read(offset, size) - ret = ExprCompose(*mems) - else: - ret = ExprMem(ptr, size) - return ret - - def write(self, ptr, expr): - """ - Update the corresponding Expr @expr at address @ptr - @ptr: Expr representing the memory address - @expr: Expr instance - """ - assert ptr.size == self.addrsize - base, offset = get_expr_base_offset(ptr) - memarray = self.base_to_memarray.get(base, None) - if memarray is None: - memarray = MemArray(base, self.expr_simp) - self.base_to_memarray[base] = memarray - memarray.write(offset, expr) - - def iteritems(self): - """Iterate on stored memory variables and their values.""" - for _, memarray in viewitems(self.base_to_memarray): - for mem, value in memarray.memory(): - yield mem, value - - def items(self): - """Return stored memory variables and their values.""" - return list(self.iteritems()) - - def dump(self): - """Display MemSparse content""" - for mem, value in viewitems(self): - print("%s = %s" % (mem, value)) - - def __repr__(self): - out = [] - for _, memarray in sorted(viewitems(self.base_to_memarray)): - out.append(repr(memarray)) - return '\n'.join(out) - - -class SymbolMngr(object): - """Symbolic store manager (IDs and MEMs)""" - - def __init__(self, init=None, addrsize=None, expr_simp=expr_simp_explicit): - assert addrsize is not None - if init is None: - init = {} - self.addrsize = addrsize - self.expr_simp = expr_simp - self.symbols_id = {} - self.symbols_mem = MemSparse(addrsize, expr_simp) - self.mask = (1 << addrsize) - 1 - for expr, value in viewitems(init): - self.write(expr, value) - - def __contains__(self, expr): - if expr.is_id(): - return self.symbols_id.__contains__(expr) - if expr.is_mem(): - return self.symbols_mem.__contains__(expr) - return False - - def __getitem__(self, expr): - return self.read(expr) - - def __setitem__(self, expr, value): - self.write(expr, value) - - def __delitem__(self, expr): - if expr.is_id(): - del self.symbols_id[expr] - elif expr.is_mem(): - del self.symbols_mem[expr] - else: - raise TypeError("Bad source expr") - - def copy(self): - """Copy object instance""" - obj = SymbolMngr(self, addrsize=self.addrsize, expr_simp=self.expr_simp) - return obj - - def clear(self): - """Forget every variables values""" - self.symbols_id.clear() - self.symbols_mem.clear() - - def read(self, src): - """ - Return the value corresponding to Expr @src - @src: ExprId or ExprMem instance - """ - if src.is_id(): - return self.symbols_id.get(src, src) - elif src.is_mem(): - # Only byte aligned accesses are supported for now - assert src.size % 8 == 0 - return self.symbols_mem.read(src.ptr, src.size) - else: - raise TypeError("Bad source expr") - - def write(self, dst, src): - """ - Update @dst with @src expression - @dst: ExprId or ExprMem instance - @src: Expression instance - """ - assert dst.size == src.size - if dst.is_id(): - if dst == src: - if dst in self.symbols_id: - del self.symbols_id[dst] - else: - self.symbols_id[dst] = src - elif dst.is_mem(): - # Only byte aligned accesses are supported for now - assert dst.size % 8 == 0 - self.symbols_mem.write(dst.ptr, src) - else: - raise TypeError("Bad destination expr") - - def dump(self, ids=True, mems=True): - """Display memory content""" - if ids: - for variable, value in self.ids(): - print('%s = %s' % (variable, value)) - if mems: - for mem, value in self.memory(): - print('%s = %s' % (mem, value)) - - def __repr__(self): - out = [] - for variable, value in viewitems(self): - out.append('%s = %s' % (variable, value)) - return "\n".join(out) - - def iteritems(self): - """ExprId/ExprMem iteritems of the current state""" - for variable, value in self.ids(): - yield variable, value - for variable, value in self.memory(): - yield variable, value - - def items(self): - """Return variables/values of the current state""" - return list(self.iteritems()) - - def __iter__(self): - for expr, _ in self.iteritems(): - yield expr - - def ids(self): - """Iterate on variables and their values.""" - for expr, value in viewitems(self.symbols_id): - yield expr, value - - def memory(self): - """Iterate on memory variables and their values.""" - for mem, value in viewitems(self.symbols_mem): - yield mem, value - - def keys(self): - """Variables of the current state""" - return list(self) - - -def merge_ptr_read(known, ptrs): - """ - Merge common memory parts in a multiple byte memory. - @ptrs: memory bytes list - @known: ptrs' associated boolean for present/unpresent memory part in the - store - """ - assert known - out = [] - known.append(None) - ptrs.append(None) - last, value, size = known[0], ptrs[0], 8 - for index, part in enumerate(known[1:], 1): - if part == last: - size += 8 - else: - out.append((last, value, size)) - last, value, size = part, ptrs[index], 8 - return out - - -class SymbolicExecutionEngine(object): - """ - Symbolic execution engine - Allow IR code emulation in symbolic domain - - - Examples: - from miasm2.ir.symbexec import SymbolicExecutionEngine - from miasm2.ir.ir import AssignBlock - - ir_arch = ir_x86_32() - - init_state = { - ir_arch.arch.regs.EAX: ir_arch.arch.regs.EBX, - ExprMem(id_x+ExprInt(0x10, 32), 32): id_a, - } - - sb_exec = SymbolicExecutionEngine(ir_arch, init_state) - - >>> sb_exec.dump() - EAX = a - @32[x + 0x10] = a - >>> sb_exec.dump(mems=False) - EAX = a - - >>> print sb_exec.eval_expr(ir_arch.arch.regs.EAX + ir_arch.arch.regs.ECX) - EBX + ECX - - Inspecting state: - - dump - - modified - State manipulation: - - '.state' (rw) - - Evaluation (read only): - - eval_expr - - eval_assignblk - Evaluation with state update: - - eval_updt_expr - - eval_updt_assignblk - - eval_updt_irblock - - Start a symbolic execution based on provisioned '.ir_arch' blocks: - - run_block_at - - run_at - """ - - StateEngine = SymbolicState - - def __init__(self, ir_arch, state=None, - sb_expr_simp=expr_simp_explicit): - - self.expr_to_visitor = { - ExprInt: self.eval_exprint, - ExprId: self.eval_exprid, - ExprLoc: self.eval_exprloc, - ExprMem: self.eval_exprmem, - ExprSlice: self.eval_exprslice, - ExprCond: self.eval_exprcond, - ExprOp: self.eval_exprop, - ExprCompose: self.eval_exprcompose, - } - - if state is None: - state = {} - - self.symbols = SymbolMngr(addrsize=ir_arch.addrsize, expr_simp=sb_expr_simp) - - for dst, src in viewitems(state): - self.symbols.write(dst, src) - - self.ir_arch = ir_arch - self.expr_simp = sb_expr_simp - - def get_state(self): - """Return the current state of the SymbolicEngine""" - state = self.StateEngine(dict(self.symbols)) - return state - - def set_state(self, state): - """Restaure the @state of the engine - @state: StateEngine instance - """ - self.symbols = SymbolMngr(addrsize=self.ir_arch.addrsize, expr_simp=self.expr_simp) - for dst, src in viewitems(dict(state)): - self.symbols[dst] = src - - state = property(get_state, set_state) - - def eval_expr_visitor(self, expr, cache=None): - """ - [DEV]: Override to change the behavior of an Expr evaluation. - This function recursively applies 'eval_expr*' to @expr. - This function uses @cache to speedup re-evaluation of expression. - """ - if cache is None: - cache = {} - - ret = cache.get(expr, None) - if ret is not None: - return ret - - new_expr = self.expr_simp(expr) - ret = cache.get(new_expr, None) - if ret is not None: - return ret - - func = self.expr_to_visitor.get(new_expr.__class__, None) - if func is None: - raise TypeError("Unknown expr type") - - ret = func(new_expr, cache=cache) - ret = self.expr_simp(ret) - assert ret is not None - - cache[expr] = ret - cache[new_expr] = ret - return ret - - def eval_exprint(self, expr, **kwargs): - """[DEV]: Evaluate an ExprInt using the current state""" - return expr - - def eval_exprid(self, expr, **kwargs): - """[DEV]: Evaluate an ExprId using the current state""" - ret = self.symbols.read(expr) - return ret - - def eval_exprloc(self, expr, **kwargs): - """[DEV]: Evaluate an ExprLoc using the current state""" - offset = self.ir_arch.loc_db.get_location_offset(expr.loc_key) - if offset is not None: - ret = ExprInt(offset, expr.size) - else: - ret = expr - return ret - - def eval_exprmem(self, expr, **kwargs): - """[DEV]: Evaluate an ExprMem using the current state - This function first evaluate the memory pointer value. - Override 'mem_read' to modify the effective memory accesses - """ - ptr = self.eval_expr_visitor(expr.ptr, **kwargs) - mem = ExprMem(ptr, expr.size) - ret = self.mem_read(mem) - return ret - - def eval_exprcond(self, expr, **kwargs): - """[DEV]: Evaluate an ExprCond using the current state""" - cond = self.eval_expr_visitor(expr.cond, **kwargs) - src1 = self.eval_expr_visitor(expr.src1, **kwargs) - src2 = self.eval_expr_visitor(expr.src2, **kwargs) - ret = ExprCond(cond, src1, src2) - return ret - - def eval_exprslice(self, expr, **kwargs): - """[DEV]: Evaluate an ExprSlice using the current state""" - arg = self.eval_expr_visitor(expr.arg, **kwargs) - ret = ExprSlice(arg, expr.start, expr.stop) - return ret - - def eval_exprop(self, expr, **kwargs): - """[DEV]: Evaluate an ExprOp using the current state""" - args = [] - for oarg in expr.args: - arg = self.eval_expr_visitor(oarg, **kwargs) - args.append(arg) - ret = ExprOp(expr.op, *args) - return ret - - def eval_exprcompose(self, expr, **kwargs): - """[DEV]: Evaluate an ExprCompose using the current state""" - args = [] - for arg in expr.args: - args.append(self.eval_expr_visitor(arg, **kwargs)) - ret = ExprCompose(*args) - return ret - - def eval_expr(self, expr, eval_cache=None): - """ - Evaluate @expr - @expr: Expression instance to evaluate - @cache: None or dictionary linking variables to their values - """ - if eval_cache is None: - eval_cache = {} - ret = self.eval_expr_visitor(expr, cache=eval_cache) - assert ret is not None - return ret - - def modified(self, init_state=None, ids=True, mems=True): - """ - Return the modified variables. - @init_state: a base dictionary linking variables to their initial values - to diff. Can be None. - @ids: track ids only - @mems: track mems only - """ - if init_state is None: - init_state = {} - if ids: - for variable, value in viewitems(self.symbols.symbols_id): - if variable in init_state and init_state[variable] == value: - continue - yield variable, value - if mems: - for mem, value in self.symbols.memory(): - if mem in init_state and init_state[mem] == value: - continue - yield mem, value - - def dump(self, ids=True, mems=True): - """ - Display modififed variables - @ids: display modified ids - @mems: display modified memory - """ - - for variable, value in self.modified(None, ids, mems): - print("%-18s" % variable, "=", "%s" % value) - - def eval_assignblk(self, assignblk): - """ - Evaluate AssignBlock using the current state - - Returns a dictionary containing modified keys associated to their values - - @assignblk: AssignBlock instance - """ - pool_out = {} - eval_cache = {} - for dst, src in viewitems(assignblk): - src = self.eval_expr(src, eval_cache) - if dst.is_mem(): - ptr = self.eval_expr(dst.ptr, eval_cache) - # Test if mem lookup is known - tmp = ExprMem(ptr, dst.size) - pool_out[tmp] = src - elif dst.is_id(): - pool_out[dst] = src - else: - raise ValueError("Unknown destination type", str(dst)) - - return pool_out - - def apply_change(self, dst, src): - """ - Apply @dst = @src on the current state WITHOUT evaluating both side - @dst: Expr, destination - @src: Expr, source - """ - if dst.is_mem(): - self.mem_write(dst, src) - else: - self.symbols.write(dst, src) - - def eval_updt_assignblk(self, assignblk): - """ - Apply an AssignBlock on the current state - @assignblk: AssignBlock instance - """ - mem_dst = [] - dst_src = self.eval_assignblk(assignblk) - for dst, src in viewitems(dst_src): - self.apply_change(dst, src) - if dst.is_mem(): - mem_dst.append(dst) - return mem_dst - - def eval_updt_irblock(self, irb, step=False): - """ - Symbolic execution of the @irb on the current state - @irb: irbloc instance - @step: display intermediate steps - """ - for assignblk in irb: - if step: - print('Instr', assignblk.instr) - print('Assignblk:') - print(assignblk) - print('_' * 80) - self.eval_updt_assignblk(assignblk) - if step: - self.dump(mems=False) - self.dump(ids=False) - print('_' * 80) - dst = self.eval_expr(self.ir_arch.IRDst) - - return dst - - def run_block_at(self, ircfg, addr, step=False): - """ - Symbolic execution of the block at @addr - @addr: address to execute (int or ExprInt or label) - @step: display intermediate steps - """ - irblock = ircfg.get_block(addr) - if irblock is not None: - addr = self.eval_updt_irblock(irblock, step=step) - return addr - - def run_at(self, ircfg, addr, lbl_stop=None, step=False): - """ - Symbolic execution starting at @addr - @addr: address to execute (int or ExprInt or label) - @lbl_stop: LocKey to stop execution on - @step: display intermediate steps - """ - while True: - irblock = ircfg.get_block(addr) - if irblock is None: - break - if irblock.loc_key == lbl_stop: - break - addr = self.eval_updt_irblock(irblock, step=step) - return addr - - def del_mem_above_stack(self, stack_ptr): - """ - Remove all stored memory values with following properties: - * pointer based on initial stack value - * pointer below current stack pointer - """ - stack_ptr = self.eval_expr(stack_ptr) - base, stk_offset = get_expr_base_offset(stack_ptr) - memarray = self.symbols.symbols_mem.base_to_memarray.get(base, None) - if memarray: - to_del = set() - for offset in memarray: - if ((offset - stk_offset) & int(stack_ptr.mask)) >> (stack_ptr.size - 1) != 0: - to_del.add(offset) - - for offset in to_del: - del memarray[offset] - - def eval_updt_expr(self, expr): - """ - Evaluate @expr and apply side effect if needed (ie. if expr is an - assignment). Return the evaluated value - """ - - # Update value if needed - if expr.is_aff(): - ret = self.eval_expr(expr.src) - self.eval_updt_assignblk(AssignBlock([expr])) - else: - ret = self.eval_expr(expr) - - return ret - - def mem_read(self, expr): - """ - [DEV]: Override to modify the effective memory reads - - Read symbolic value at ExprMem @expr - @expr: ExprMem - """ - return self.symbols.read(expr) - - def mem_write(self, dst, src): - """ - [DEV]: Override to modify the effective memory writes - - Write symbolic value @src at ExprMem @dst - @dst: destination ExprMem - @src: source Expression - """ - self.symbols.write(dst, src) diff --git a/miasm2/ir/symbexec_top.py b/miasm2/ir/symbexec_top.py deleted file mode 100644 index a1a255f8..00000000 --- a/miasm2/ir/symbexec_top.py +++ /dev/null @@ -1,221 +0,0 @@ -from future.utils import viewitems - -from miasm2.ir.symbexec import SymbolicExecutionEngine, StateEngine -from miasm2.expression.simplifications import expr_simp -from miasm2.expression.expression import ExprId, ExprInt, ExprSlice,\ - ExprMem, ExprCond, ExprCompose, ExprOp - - -TOPSTR = "TOP" - -def exprid_top(expr): - """Return a TOP expression (ExprId("TOP") of size @expr.size - @expr: expression to replace with TOP - """ - return ExprId(TOPSTR, expr.size) - - -class SymbolicStateTop(StateEngine): - - def __init__(self, dct, regstop): - self._symbols = frozenset(viewitems(dct)) - self._regstop = frozenset(regstop) - - def __hash__(self): - return hash((self.__class__, self._symbols, self._regstop)) - - def __str__(self): - out = [] - for dst, src in sorted(self._symbols): - out.append("%s = %s" % (dst, src)) - for dst in self._regstop: - out.append('TOP %s' %dst) - return "\n".join(out) - - def __eq__(self, other): - if self is other: - return True - if self.__class__ != other.__class__: - return False - return (self.symbols == other.symbols and - self.regstop == other.regstop) - - def __ne__(self, other): - return not self.__eq__(other) - - def __iter__(self): - for dst, src in self._symbols: - yield dst, src - - def merge(self, other): - """Merge two symbolic states - Only equal expressions are kept in both states - @other: second symbolic state - """ - symb_a = self.symbols - symb_b = other.symbols - intersection = set(symb_a).intersection(symb_b) - diff = set(symb_a).union(symb_b).difference(intersection) - symbols = {} - regstop = set() - for dst in diff: - if dst.is_id(): - regstop.add(dst) - for dst in intersection: - if symb_a[dst] == symb_b[dst]: - symbols[dst] = symb_a[dst] - else: - regstop.add(dst) - return self.__class__(symbols, regstop) - - @property - def symbols(self): - """Return the dictionary of known symbols""" - return dict(self._symbols) - - @property - def regstop(self): - """Return the set of expression with TOP values""" - return self._regstop - -class SymbExecTopNoMem(SymbolicExecutionEngine): - """ - Symbolic execution, include TOP value. - ExprMem are not propagated. - Any computation involving a TOP will generate TOP. - """ - - StateEngine = SymbolicStateTop - - def __init__(self, ir_arch, state, regstop, - sb_expr_simp=expr_simp): - known_symbols = dict(state) - super(SymbExecTopNoMem, self).__init__(ir_arch, known_symbols, - sb_expr_simp) - self.regstop = set(regstop) - - def get_state(self): - """Return the current state of the SymbolicEngine""" - return self.StateEngine(self.symbols, self.regstop) - - def eval_expr(self, expr, eval_cache=None): - if expr in self.regstop: - return exprid_top(expr) - if eval_cache is None: - eval_cache = {} - ret = self.apply_expr_on_state_visit_cache(expr, self.symbols, eval_cache) - return ret - - def manage_mem(self, expr, state, cache, level): - ptr = self.apply_expr_on_state_visit_cache(expr.arg, state, cache, level+1) - ret = ExprMem(ptr, expr.size) - ret = self.get_mem_state(ret) - if ret.is_mem() and not ret.arg.is_int() and ret.arg == ptr: - ret = exprid_top(expr) - assert expr.size == ret.size - return ret - - - def eval_exprid(self, expr, **kwargs): - """[DEV]: Evaluate an ExprId using the current state""" - if expr in self.regstop: - ret = exprid_top(expr) - else: - ret = self.symbols.read(expr) - return ret - - def eval_exprloc(self, expr, **kwargs): - offset = self.ir_arch.loc_db.get_location_offset(expr.loc_key) - if offset is not None: - ret = ExprInt(offset, expr.size) - else: - ret = expr - return ret - - def eval_exprcond(self, expr, **kwargs): - """[DEV]: Evaluate an ExprCond using the current state""" - cond = self.eval_expr_visitor(expr.cond, **kwargs) - src1 = self.eval_expr_visitor(expr.src1, **kwargs) - src2 = self.eval_expr_visitor(expr.src2, **kwargs) - if cond.is_id(TOPSTR) or src1.is_id(TOPSTR) or src2.is_id(TOPSTR): - ret = exprid_top(expr) - else: - ret = ExprCond(cond, src1, src2) - return ret - - def eval_exprslice(self, expr, **kwargs): - """[DEV]: Evaluate an ExprSlice using the current state""" - arg = self.eval_expr_visitor(expr.arg, **kwargs) - if arg.is_id(TOPSTR): - ret = exprid_top(expr) - else: - ret = ExprSlice(arg, expr.start, expr.stop) - return ret - - def eval_exprop(self, expr, **kwargs): - """[DEV]: Evaluate an ExprOp using the current state""" - args = [] - for oarg in expr.args: - arg = self.eval_expr_visitor(oarg, **kwargs) - if arg.is_id(TOPSTR): - return exprid_top(expr) - args.append(arg) - ret = ExprOp(expr.op, *args) - return ret - - def eval_exprcompose(self, expr, **kwargs): - """[DEV]: Evaluate an ExprCompose using the current state""" - args = [] - for arg in expr.args: - arg = self.eval_expr_visitor(arg, **kwargs) - if arg.is_id(TOPSTR): - return exprid_top(expr) - args.append(arg) - ret = ExprCompose(*args) - return ret - - def apply_change(self, dst, src): - eval_cache = {} - if dst.is_mem(): - # If Write to TOP, forget all memory information - ret = self.eval_expr(dst.arg, eval_cache) - if ret.is_id(TOPSTR): - to_del = set() - for dst_tmp in self.symbols: - if dst_tmp.is_mem(): - to_del.add(dst_tmp) - for dst_to_del in to_del: - del self.symbols[dst_to_del] - return - src_o = self.expr_simp(src) - - # Force update. Ex: - # EBX += 1 (state: EBX = EBX+1) - # EBX -= 1 (state: EBX = EBX, must be updated) - if dst in self.regstop: - self.regstop.discard(dst) - self.symbols[dst] = src_o - - if dst == src_o: - # Avoid useless X = X information - del self.symbols[dst] - - if src_o.is_id(TOPSTR): - if dst in self.symbols: - del self.symbols[dst] - self.regstop.add(dst) - -class SymbExecTop(SymbExecTopNoMem): - """ - Symbolic execution, include TOP value. - ExprMem are propagated. - Any computation involving a TOP will generate TOP. - WARNING: avoid memory aliases here! - """ - - def manage_mem(self, expr, state, cache, level): - ptr = self.apply_expr_on_state_visit_cache(expr.arg, state, cache, level+1) - ret = ExprMem(ptr, expr.size) - ret = self.get_mem_state(ret) - assert expr.size == ret.size - return ret diff --git a/miasm2/ir/symbexec_types.py b/miasm2/ir/symbexec_types.py deleted file mode 100644 index 57b7580a..00000000 --- a/miasm2/ir/symbexec_types.py +++ /dev/null @@ -1,131 +0,0 @@ -from __future__ import print_function - -from future.utils import viewitems - -from miasm2.ir.symbexec import SymbolicExecutionEngine, StateEngine -from miasm2.expression.simplifications import expr_simp -from miasm2.expression.expression import ExprId, ExprMem - - -class SymbolicStateCTypes(StateEngine): - """Store C types of symbols""" - - def __init__(self, symbols): - tmp = {} - for expr, types in viewitems(symbols): - tmp[expr] = frozenset(types) - self._symbols = frozenset(viewitems(tmp)) - - def __hash__(self): - return hash((self.__class__, self._symbols)) - - def __str__(self): - out = [] - for dst, src in sorted(self._symbols): - out.append("%s = %s" % (dst, src)) - return "\n".join(out) - - def __eq__(self, other): - if self is other: - return True - if self.__class__ != other.__class__: - return False - return self.symbols == other.symbols - - def __ne__(self, other): - return not self.__eq__(other) - - def __iter__(self): - for dst, src in self._symbols: - yield dst, src - - def merge(self, other): - """Merge two symbolic states - The resulting types are the union of types of both states. - @other: second symbolic state - """ - symb_a = self.symbols - symb_b = other.symbols - symbols = {} - for expr in set(symb_a).union(set(symb_b)): - ctypes = symb_a.get(expr, set()).union(symb_b.get(expr, set())) - if ctypes: - symbols[expr] = ctypes - return self.__class__(symbols) - - @property - def symbols(self): - """Return the dictionary of known symbols'types""" - return dict(self._symbols) - - -class SymbExecCType(SymbolicExecutionEngine): - """Engine of C types propagation - WARNING: avoid memory aliases here! - """ - - StateEngine = SymbolicStateCTypes - OBJC_INTERNAL = "___OBJC___" - - def __init__(self, ir_arch, - symbols, - chandler, - sb_expr_simp=expr_simp): - self.chandler = chandler - - super(SymbExecCType, self).__init__(ir_arch, - {}, - sb_expr_simp) - self.symbols = dict(symbols) - - def get_state(self): - """Return the current state of the SymbolicEngine""" - return self.StateEngine(self.symbols) - - def eval_assignblk(self, assignblk): - """ - Evaluate AssignBlock on the current state - @assignblk: AssignBlock instance - """ - pool_out = {} - for dst, src in viewitems(assignblk): - objcs = self.chandler.expr_to_types(src, self.symbols) - if isinstance(dst, ExprMem): - continue - elif isinstance(dst, ExprId): - pool_out[dst] = frozenset(objcs) - else: - raise ValueError("Unsupported assignment", str(dst)) - return pool_out - - def eval_expr(self, expr, eval_cache=None): - return frozenset(self.chandler.expr_to_types(expr, self.symbols)) - - def apply_change(self, dst, src): - if src is None: - if dst in self.symbols: - del self.symbols[dst] - else: - self.symbols[dst] = src - - def del_mem_above_stack(self, stack_ptr): - """No stack deletion""" - return - - def dump_id(self): - """ - Dump modififed registers symbols only - """ - for expr, expr_types in sorted(viewitems(self.symbols)): - if not expr.is_mem(): - print(expr) - for expr_type in expr_types: - print('\t', expr_type) - - def dump_mem(self): - """ - Dump modififed memory symbols - """ - for expr, value in sorted(viewitems(self.symbols)): - if expr.is_mem(): - print(expr, value) diff --git a/miasm2/ir/translators/C.py b/miasm2/ir/translators/C.py deleted file mode 100644 index e44e859f..00000000 --- a/miasm2/ir/translators/C.py +++ /dev/null @@ -1,528 +0,0 @@ -from miasm2.ir.translators.translator import Translator -from miasm2.expression.modint import size2mask -from miasm2.expression.expression import ExprInt, ExprCond, ExprCompose, \ - TOK_EQUAL, \ - TOK_INF_SIGNED, TOK_INF_UNSIGNED, \ - TOK_INF_EQUAL_SIGNED, TOK_INF_EQUAL_UNSIGNED - -def int_size_to_bn(value, size): - if size < 32: - int_str = "%.8x" % value - size_nibble = 8 - else: - # size must be multiple of 4 - size = ((size + 31) // 32) * 32 - size_nibble = size // 4 - fmt_str = "%%.%dx" % size_nibble - int_str = fmt_str % value - assert len(int_str) == size_nibble - return int_str, size_nibble - - -TOK_CMP_TO_NATIVE_C = { - TOK_EQUAL: "==", - TOK_INF_SIGNED: "<", - TOK_INF_UNSIGNED: "<", - TOK_INF_EQUAL_SIGNED: "<=", - TOK_INF_EQUAL_UNSIGNED: "<=", -} - -TOK_CMP_TO_BIGNUM_C = { - TOK_EQUAL: "equal", - TOK_INF_SIGNED: "inf_signed", - TOK_INF_UNSIGNED: "inf_unsigned", - TOK_INF_EQUAL_SIGNED: "inf_equal_signed", - TOK_INF_EQUAL_UNSIGNED: "inf_equal_unsigned", -} - - -class TranslatorC(Translator): - "Translate a Miasm expression to an equivalent C code" - - # Implemented language - __LANG__ = "C" - - # Operations translation - dct_shift = {'a>>': "right_arith", - '>>': "right_logic", - '<<': "left_logic", - } - dct_rot = {'<<<': 'rot_left', - '>>>': 'rot_right', - } - - NATIVE_INT_MAX_SIZE = 64 - - def __init__(self, loc_db=None, **kwargs): - """Instance a C translator - @loc_db: LocationDB instance - """ - super(TranslatorC, self).__init__(**kwargs) - # symbol pool - self.loc_db = loc_db - - def _size2mask(self, size): - """Return a C string corresponding to the size2mask operation, with support for - @size <= 64""" - assert size <= 64 - mask = size2mask(size) - return "0x%x" % mask - - def from_ExprId(self, expr): - return str(expr) - - def from_ExprInt(self, expr): - if expr.size <= self.NATIVE_INT_MAX_SIZE: - assert expr.size <= 64 - out = "0x%x" % int(expr) - if expr.size == 64: - out += "ULL" - return out - value, int_size = int_size_to_bn(int(expr), expr.size) - return 'bignum_from_string("%s", %d)' % (value, int_size) - - def from_ExprLoc(self, expr): - loc_key = expr.loc_key - if self.loc_db is None: - return str(loc_key) - offset = self.loc_db.get_location_offset(loc_key) - if offset is None: - return str(loc_key) - - if expr.size <= self.NATIVE_INT_MAX_SIZE: - return "0x%x" % offset - - value, int_size = int_size_to_bn(offset, 64) - return 'bignum_from_string("%s", %d)' % (value, int_size) - - def from_ExprAssign(self, expr): - new_dst = self.from_expr(expr.dst) - new_src = self.from_expr(expr.src) - return "%s = %s" % (new_dst, new_src) - - def from_ExprCond(self, expr): - cond = self.from_expr(expr.cond) - src1 = self.from_expr(expr.src1) - src2 = self.from_expr(expr.src2) - if not expr.cond.size <= self.NATIVE_INT_MAX_SIZE: - cond = "(!bignum_is_zero(%s))" % cond - out = "(%s?%s:%s)" % (cond, src1, src2) - return out - - def from_ExprMem(self, expr): - ptr = expr.ptr - if ptr.size <= self.NATIVE_INT_MAX_SIZE: - new_ptr = self.from_expr(ptr) - if expr.size <= self.NATIVE_INT_MAX_SIZE: - # Native ptr, Native Mem - return "MEM_LOOKUP_%.2d(jitcpu, %s)" % (expr.size, new_ptr) - else: - # Native ptr, BN mem - return "MEM_LOOKUP_INT_BN(jitcpu, %d, %s)" % (expr.size, new_ptr) - # BN ptr - new_ptr = self.from_expr(ptr) - - if expr.size <= self.NATIVE_INT_MAX_SIZE: - # BN ptr, Native Mem - return "MEM_LOOKUP_BN_INT(jitcpu, %d, %s)" % (expr.size, new_ptr) - else: - # BN ptr, BN mem - return "MEM_LOOKUP_BN_BN(jitcpu, %d, %s)" % (expr.size, new_ptr) - - def from_ExprOp(self, expr): - if len(expr.args) == 1: - if expr.op == 'parity': - arg = expr.args[0] - out = self.from_expr(arg) - if arg.size <= self.NATIVE_INT_MAX_SIZE: - out = "(%s&%s)" % (out, self._size2mask(arg.size)) - else: - out = 'bignum_mask(%s, 8)' % (out, 8) - out = 'bignum_to_uint64(%s)' % out - out = 'parity(%s)' % out - return out - - elif expr.op.startswith("zeroExt_"): - arg = expr.args[0] - if expr.size == arg.size: - return arg - return self.from_expr(ExprCompose(arg, ExprInt(0, expr.size - arg.size))) - - elif expr.op.startswith("signExt_"): - arg = expr.args[0] - if expr.size == arg.size: - return arg - add_size = expr.size - arg.size - new_expr = ExprCompose( - arg, - ExprCond( - arg.msb(), - ExprInt(size2mask(add_size), add_size), - ExprInt(0, add_size) - ) - ) - return self.from_expr(new_expr) - - - elif expr.op in ['cntleadzeros', 'cnttrailzeros']: - arg = expr.args[0] - out = self.from_expr(arg) - if arg.size <= self.NATIVE_INT_MAX_SIZE: - out = "%s(0x%x, %s)" % (expr.op, expr.args[0].size, out) - else: - out = "bignum_%s(%s, %d)" % (expr.op, out, arg.size) - return out - - elif expr.op == '!': - arg = expr.args[0] - out = self.from_expr(arg) - if expr.size <= self.NATIVE_INT_MAX_SIZE: - out = "(~ %s)&%s" % (out, self._size2mask(arg.size)) - else: - out = "bignum_not(%s)" % out - out = "bignum_mask(%s, expr.size)" % out - return out - - elif expr.op in [ - "ftan", "frndint", "f2xm1", "fsin", "fsqrt", "fabs", "fcos", - "fchs", - ]: - return "fpu_%s%d(%s)" % ( - expr.op, - expr.size, - self.from_expr(expr.args[0]), - ) - elif (expr.op.startswith("access_") or - expr.op.startswith("load_") or - expr.op.startswith("fxam_c")): - arg = expr.args[0] - out = self.from_expr(arg) - out = "%s(%s)" % (expr.op, out) - return out - - elif expr.op == "-": - arg = expr.args[0] - out = self.from_expr(arg) - if arg.size <= self.NATIVE_INT_MAX_SIZE: - out = "(%s(%s))" % (expr.op, out) - out = "(%s&%s)" % (out, self._size2mask(arg.size)) - else: - out = "bignum_sub(bignum_from_uint64(0), %s)" % out - out = "bignum_mask(%s, %d)"% (out, expr.size) - return out - - elif expr.op.startswith("fpround_"): - return "%s_fp%d(%s)" % ( - expr.op, - expr.size, - self.from_expr(expr.args[0]), - ) - elif expr.op == "sint_to_fp": - size = expr.size - arg = expr.args[0] - if size not in [32, 64]: - raise RuntimeError( - "Unsupported size for sint_to_fp: %r" % size - ) - return "%s_%d(%s)" % (expr.op, size, self.from_expr(arg)) - elif expr.op.startswith("fp_to_sint"): - dest_size = expr.size - arg_size = expr.args[0].size - if (arg_size, dest_size) in [ - (32, 32), (64, 64), (64, 32), - ]: - func = "fp%d_to_sint%d" % (arg_size, dest_size) - else: - raise RuntimeError( - "Unsupported size for fp_to_sint: %r to %r" % ( - arg_size, - dest_size - )) - return "%s(%s)" % (func, self.from_expr(expr.args[0])) - elif expr.op.startswith("fpconvert_fp"): - dest_size = expr.size - arg_size = expr.args[0].size - if (arg_size, dest_size) in [ - (32, 64), (64, 32) - ]: - func = "fp%d_to_fp%d" % (arg_size, dest_size) - else: - raise RuntimeError( - "Unsupported size for fpconvert: %r to %r" % (arg_size, - dest_size) - ) - return "%s(%s)" % (func, self.from_expr(expr.args[0])) - else: - raise NotImplementedError('Unknown op: %r' % expr.op) - - elif len(expr.args) == 2: - if expr.op in self.dct_shift: - arg0 = self.from_expr(expr.args[0]) - arg1 = self.from_expr(expr.args[1]) - if expr.size <= self.NATIVE_INT_MAX_SIZE: - out = 'SHIFT_%s(%d, %s, %s)' % ( - self.dct_shift[expr.op].upper(), - expr.args[0].size, - arg0, - arg1 - ) - else: - op = { - "<<": "lshift", - ">>": "rshift", - "a>>": "a_rshift" - } - out = "bignum_%s(%s, bignum_to_uint64(%s))" % ( - op[expr.op], arg0, arg1 - ) - out = "bignum_mask(%s, %d)"% (out, expr.size) - return out - - elif expr.is_associative(): - args = [self.from_expr(arg) - for arg in expr.args] - if expr.size <= self.NATIVE_INT_MAX_SIZE: - out = (" %s " % expr.op).join(args) - out = "((%s)&%s)" % (out, self._size2mask(expr.size)) - else: - op_to_bn_func = { - "+": "add", - "*": "mul", - "|": "or", - "^": "xor", - "&": "and", - } - args = list(expr.args) - out = self.from_expr(args.pop()) - while args: - out = 'bignum_mask(bignum_%s(%s, %s), %d)' % ( - op_to_bn_func[expr.op], - out, - self.from_expr(args.pop()), - expr.size - ) - return out - - elif expr.op in ['-']: - return '(((%s&%s) %s (%s&%s))&%s)' % ( - self.from_expr(expr.args[0]), - self._size2mask(expr.args[0].size), - str(expr.op), - self.from_expr(expr.args[1]), - self._size2mask(expr.args[1].size), - self._size2mask(expr.args[0].size) - ) - elif expr.op in self.dct_rot: - arg0 = self.from_expr(expr.args[0]) - arg1 = self.from_expr(expr.args[1]) - if expr.size <= self.NATIVE_INT_MAX_SIZE: - out = '(%s(%s, %s, %s) &%s)' % ( - self.dct_rot[expr.op], - expr.args[0].size, - arg0, - arg1, - self._size2mask(expr.args[0].size), - ) - else: - op = { - ">>>": "ror", - "<<<": "rol" - } - out = "bignum_%s(%s, %d, bignum_to_uint64(%s))" % ( - op[expr.op], arg0, expr.size, arg1 - ) - out = "bignum_mask(%s, %d)"% (out, expr.size) - return out - - elif expr.op == 'x86_cpuid': - return "%s(%s, %s)" % (expr.op, - self.from_expr(expr.args[0]), - self.from_expr(expr.args[1])) - elif expr.op.startswith("fcom"): - arg0 = self.from_expr(expr.args[0]) - arg1 = self.from_expr(expr.args[1]) - if not expr.args[0].size <= self.NATIVE_INT_MAX_SIZE: - raise ValueError("Bad semantic: fpu do operations do not support such size") - out = "fpu_%s(%s, %s)" % (expr.op, arg0, arg1) - return out - - elif expr.op in ["fadd", "fsub", "fdiv", 'fmul', "fscale", - "fprem", "fyl2x", "fpatan"]: - arg0 = self.from_expr(expr.args[0]) - arg1 = self.from_expr(expr.args[1]) - if not expr.args[0].size <= self.NATIVE_INT_MAX_SIZE: - raise ValueError("Bad semantic: fpu do operations do not support such size") - out = "fpu_%s%d(%s, %s)" % (expr.op, expr.size, arg0, arg1) - return out - - elif expr.op == "segm": - return "segm2addr(jitcpu, %s, %s)" % ( - self.from_expr(expr.args[0]), - self.from_expr(expr.args[1]) - ) - - elif expr.op in ['udiv', 'umod']: - arg0 = self.from_expr(expr.args[0]) - arg1 = self.from_expr(expr.args[1]) - - if expr.size <= self.NATIVE_INT_MAX_SIZE: - out = '%s%d(%s, %s)' % ( - expr.op, - expr.args[0].size, - arg0, - arg1 - ) - else: - out = "bignum_%s(%s, %s)" % ( - expr.op, - arg0, - arg1 - ) - out = "bignum_mask(%s, %d)"% (out, expr.size) - return out - - - - elif expr.op in ['sdiv', 'smod']: - arg0 = self.from_expr(expr.args[0]) - arg1 = self.from_expr(expr.args[1]) - - if expr.size <= self.NATIVE_INT_MAX_SIZE: - out = '%s%d(%s, %s)' % ( - expr.op, - expr.args[0].size, - arg0, - arg1 - ) - else: - out = "bignum_%s(%s, %s, %d)" % ( - expr.op, - arg0, - arg1, - expr.size - ) - out = "bignum_mask(%s, %d)"% (out, expr.size) - return out - - elif expr.op in ["bcdadd", "bcdadd_cf"]: - return "%s_%d(%s, %s)" % ( - expr.op, expr.args[0].size, - self.from_expr(expr.args[0]), - self.from_expr(expr.args[1]) - ) - - - elif expr.op in [ - TOK_EQUAL, - TOK_INF_SIGNED, - TOK_INF_UNSIGNED, - TOK_INF_EQUAL_SIGNED, - TOK_INF_EQUAL_UNSIGNED, - ]: - arg0 = self.from_expr(expr.args[0]) - arg1 = self.from_expr(expr.args[1]) - - if expr.size <= self.NATIVE_INT_MAX_SIZE: - op = TOK_CMP_TO_NATIVE_C[expr.op] - if expr.op in [TOK_INF_SIGNED, TOK_INF_EQUAL_SIGNED]: - cast = "(int%d_t)" % expr.args[0].size - else: - cast = "(uint%d_t)" % expr.args[0].size - out = '((%s%s %s %s%s)?1:0)' % ( - cast, - arg0, - op, - cast, - arg1 - ) - else: - op = TOK_CMP_TO_BIGNUM_C[expr.op] - out = "bignum_is_%s(%s, %s)" % ( - op, - arg0, - arg1 - ) - out = "bignum_mask(%s, %d)"% (out, expr.size) - return out - - - else: - raise NotImplementedError('Unknown op: %r' % expr.op) - - elif len(expr.args) >= 3 and expr.is_associative(): # ????? - oper = ['(%s&%s)' % ( - self.from_expr(arg), - self._size2mask(arg.size), - ) - for arg in expr.args] - oper = str(expr.op).join(oper) - return "((%s)&%s)" % ( - oper, - self._size2mask(expr.args[0].size) - ) - else: - raise NotImplementedError('Unknown op: %s' % expr.op) - - def from_ExprSlice(self, expr): - out = self.from_expr(expr.arg) - if expr.arg.size <= self.NATIVE_INT_MAX_SIZE: - # XXX check mask for 64 bit & 32 bit compat - out = "((%s>>%d) &%s)" % ( - out, expr.start, - self._size2mask(expr.stop - expr.start) - ) - else: - out = "bignum_rshift(%s, %d)" % (out, expr.start) - out = "bignum_mask(%s, %d)" % (out, expr.stop - expr.start) - - if expr.size <= self.NATIVE_INT_MAX_SIZE: - # Convert bignum to int - out = "bignum_to_uint64(%s)" % out - return out - - def from_ExprCompose(self, expr): - if expr.size <= self.NATIVE_INT_MAX_SIZE: - - out = [] - # XXX check mask for 64 bit & 32 bit compat - if expr.size in [8, 16, 32, 64, 128]: - size = expr.size - else: - # Uncommon expression size, use at least uint8 - size = max(expr.size, 8) - next_power = 1 - while next_power <= size: - next_power <<= 1 - size = next_power - - dst_cast = "uint%d_t" % size - for index, arg in expr.iter_args(): - out.append("(((%s)(%s & %s)) << %d)" % ( - dst_cast, - self.from_expr(arg), - self._size2mask(arg.size), - index) - ) - out = ' | '.join(out) - return '(' + out + ')' - else: - # Convert all parts to bignum - args = [] - for index, arg in expr.iter_args(): - arg_str = self.from_expr(arg) - if arg.size <= self.NATIVE_INT_MAX_SIZE: - arg_str = '((%s) & %s)' % (arg_str, self._size2mask(arg.size)) - arg_str = 'bignum_from_uint64(%s)' % arg_str - else: - arg_str = 'bignum_mask(%s, %d)' % (arg_str, arg.size) - arg_str = 'bignum_lshift(%s, %d)' % (arg_str, index) - args.append(arg_str) - out = args.pop() - while args: - arg = args.pop() - out = "bignum_or(%s, %s)" % (out, arg) - return out - - -# Register the class -Translator.register(TranslatorC) diff --git a/miasm2/ir/translators/__init__.py b/miasm2/ir/translators/__init__.py deleted file mode 100644 index d3678ffc..00000000 --- a/miasm2/ir/translators/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -"""IR Translators""" -from miasm2.ir.translators.translator import Translator -import miasm2.ir.translators.C -import miasm2.ir.translators.python -import miasm2.ir.translators.miasm -import miasm2.ir.translators.smt2 -try: - import miasm2.ir.translators.z3_ir -except ImportError: - # Nothing to do, z3 not available - pass - -__all__ = ["Translator"] diff --git a/miasm2/ir/translators/miasm.py b/miasm2/ir/translators/miasm.py deleted file mode 100644 index e93e9499..00000000 --- a/miasm2/ir/translators/miasm.py +++ /dev/null @@ -1,45 +0,0 @@ -from builtins import map -from miasm2.ir.translators.translator import Translator - - -class TranslatorMiasm(Translator): - "Translate a Miasm expression to its Python building form" - - __LANG__ = "Miasm" - - def from_ExprId(self, expr): - return "ExprId(%s, size=%d)" % (repr(expr.name), expr.size) - - def from_ExprInt(self, expr): - return "ExprInt(0x%x, %d)" % (int(expr), expr.size) - - def from_ExprCond(self, expr): - return "ExprCond(%s, %s, %s)" % (self.from_expr(expr.cond), - self.from_expr(expr.src1), - self.from_expr(expr.src2)) - - def from_ExprSlice(self, expr): - return "ExprSlice(%s, %d, %d)" % (self.from_expr(expr.arg), - expr.start, - expr.stop) - - def from_ExprOp(self, expr): - return "ExprOp(%s, %s)" % ( - repr(expr.op), - ", ".join(map(self.from_expr, expr.args)) - ) - - def from_ExprCompose(self, expr): - args = ["%s" % self.from_expr(arg) for arg in expr.args] - return "ExprCompose(%s)" % ", ".join(args) - - def from_ExprAssign(self, expr): - return "ExprAssign(%s, %s)" % (self.from_expr(expr.dst), - self.from_expr(expr.src)) - - def from_ExprMem(self, expr): - return "ExprMem(%s, size=%d)" % (self.from_expr(expr.ptr), expr.size) - - -# Register the class -Translator.register(TranslatorMiasm) diff --git a/miasm2/ir/translators/python.py b/miasm2/ir/translators/python.py deleted file mode 100644 index 4b1b4b52..00000000 --- a/miasm2/ir/translators/python.py +++ /dev/null @@ -1,98 +0,0 @@ -from builtins import map -from miasm2.expression.expression import ExprInt -from miasm2.ir.translators.translator import Translator - - -class TranslatorPython(Translator): - """Translate a Miasm expression to an equivalent Python code - - Memory is abstracted using the unimplemented function: - int memory(int address, int size) - """ - - # Implemented language - __LANG__ = "Python" - # Operations translation - op_no_translate = ["+", "-", "/", "%", ">>", "<<", "&", "^", "|", "*"] - - def from_ExprInt(self, expr): - return str(expr) - - def from_ExprId(self, expr): - return str(expr) - - def from_ExprLoc(self, expr): - return str(expr) - - def from_ExprMem(self, expr): - return "memory(%s, 0x%x)" % ( - self.from_expr(expr.ptr), - expr.size // 8 - ) - - def from_ExprSlice(self, expr): - out = self.from_expr(expr.arg) - if expr.start != 0: - out = "(%s >> %d)" % (out, expr.start) - return "(%s & 0x%x)" % (out, (1 << (expr.stop - expr.start)) - 1) - - def from_ExprCompose(self, expr): - out = [] - for index, arg in expr.iter_args(): - out.append( - "((%s & 0x%x) << %d)" % ( - self.from_expr(arg), - (1 << arg.size) - 1, - index - ) - ) - return "(%s)" % ' | '.join(out) - - def from_ExprCond(self, expr): - return "(%s if (%s) else %s)" % ( - self.from_expr(expr.src1), - self.from_expr(expr.cond), - self.from_expr(expr.src2) - ) - - def from_ExprOp(self, expr): - if expr.op in self.op_no_translate: - args = list(map(self.from_expr, expr.args)) - if len(expr.args) == 1: - return "((%s %s) & 0x%x)" % ( - expr.op, - args[0], - (1 << expr.size) - 1 - ) - else: - return "((%s) & 0x%x)" % ( - (" %s " % expr.op).join(args), - (1 << expr.size) - 1 - ) - elif expr.op == "parity": - return "(%s & 0x1)" % self.from_expr(expr.args[0]) - - elif expr.op in ["<<<", ">>>"]: - amount_raw = expr.args[1] - amount = expr.args[1] % ExprInt(amount_raw.size, expr.size) - amount_inv = ExprInt(expr.size, expr.size) - amount - if expr.op == "<<<": - amount, amount_inv = amount_inv, amount - part1 = "(%s >> %s)"% (self.from_expr(expr.args[0]), - self.from_expr(amount)) - part2 = "(%s << %s)"% (self.from_expr(expr.args[0]), - self.from_expr(amount_inv)) - - return "((%s | %s) &0x%x)" % (part1, part2, int(expr.mask)) - - raise NotImplementedError("Unknown operator: %s" % expr.op) - - def from_ExprAssign(self, expr): - return "%s = %s" % ( - self.from_expr(expr.dst), - self.from_expr(expr.src) - ) - - -# Register the class -Translator.register(TranslatorPython) diff --git a/miasm2/ir/translators/smt2.py b/miasm2/ir/translators/smt2.py deleted file mode 100644 index 7b619457..00000000 --- a/miasm2/ir/translators/smt2.py +++ /dev/null @@ -1,326 +0,0 @@ -from builtins import map -from builtins import range -import logging - -from miasm2.ir.translators.translator import Translator -from miasm2.expression.smt2_helper import * - -log = logging.getLogger("translator_smt2") -console_handler = logging.StreamHandler() -console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) -log.addHandler(console_handler) -log.setLevel(logging.WARNING) - -class SMT2Mem(object): - """ - Memory abstraction for TranslatorSMT2. Memory elements are only accessed, - never written. To give a concrete value for a given memory cell in a solver, - add "mem32.get(address, size) == " constraints to your equation. - The endianness of memory accesses is handled accordingly to the "endianness" - attribute. - Note: Will have one memory space for each addressing size used. - For example, if memory is accessed via 32 bits values and 16 bits values, - these access will not occur in the same address space. - - Adapted from Z3Mem - """ - - def __init__(self, endianness="<", name="mem"): - """Initializes an SMT2Mem object with a given @name and @endianness. - @endianness: Endianness of memory representation. '<' for little endian, - '>' for big endian. - @name: name of memory Arrays generated. They will be named - name+str(address size) (for example mem32, mem16...). - """ - if endianness not in ['<', '>']: - raise ValueError("Endianness should be '>' (big) or '<' (little)") - self.endianness = endianness - self.mems = {} # Address size -> SMT2 memory array - self.name = name - # initialise address size - self.addr_size = 0 - - def get_mem_array(self, size): - """Returns an SMT Array used internally to represent memory for addresses - of size @size. - @size: integer, size in bit of addresses in the memory to get. - Return an string with the name of the SMT array.. - """ - try: - mem = self.mems[size] - except KeyError: - # Lazy instantiation - self.mems[size] = self.name + str(size) - mem = self.mems[size] - return mem - - def __getitem__(self, addr): - """One byte memory access. Different address sizes with the same value - will result in different memory accesses. - @addr: an SMT2 expression, the address to read. - Return an SMT2 expression of size 8 bits representing a memory access. - """ - size = self.addr_size - mem = self.get_mem_array(size) - return array_select(mem, addr) - - def get(self, addr, size, addr_size): - """ Memory access at address @addr of size @size with - address size @addr_size. - @addr: an SMT2 expression, the address to read. - @size: int, size of the read in bits. - @addr_size: int, size of the address - Return a SMT2 expression representing a memory access. - """ - # set address size per read access - self.addr_size = addr_size - - original_size = size - if original_size % 8 != 0: - # Size not aligned on 8bits -> read more than size and extract after - size = ((original_size // 8) + 1) * 8 - res = self[addr] - if self.is_little_endian(): - for i in range(1, size // 8): - index = bvadd(addr, bit_vec_val(i, addr_size)) - res = bv_concat(self[index], res) - else: - for i in range(1, size // 8): - res = bv_concat(res, self[index]) - if size == original_size: - return res - else: - # Size not aligned, extract right sized result - return bv_extract(original_size-1, 0, res) - - def is_little_endian(self): - """True if this memory is little endian.""" - return self.endianness == "<" - - def is_big_endian(self): - """True if this memory is big endian.""" - return not self.is_little_endian() - - -class TranslatorSMT2(Translator): - """Translate a Miasm expression into an equivalent SMT2 - expression. Memory is abstracted via SMT2Mem. - The result of from_expr will be an SMT2 expression. - - If you want to interact with the memory abstraction after the translation, - you can instantiate your own SMT2Mem that will be equivalent to the one - used by TranslatorSMT2. - - TranslatorSMT2 provides the creation of a valid SMT2 file. For this, - it keeps track of the translated bit vectors. - - Adapted from TranslatorZ3 - """ - - # Implemented language - __LANG__ = "smt2" - - def __init__(self, endianness="<", loc_db=None, **kwargs): - """Instance a SMT2 translator - @endianness: (optional) memory endianness - """ - super(TranslatorSMT2, self).__init__(**kwargs) - # memory abstraction - self._mem = SMT2Mem(endianness) - # map of translated bit vectors - self._bitvectors = dict() - # symbol pool - self.loc_db = loc_db - - def from_ExprInt(self, expr): - return bit_vec_val(expr.arg.arg, expr.size) - - def from_ExprId(self, expr): - if str(expr) not in self._bitvectors: - self._bitvectors[str(expr)] = expr.size - return str(expr) - - def from_ExprLoc(self, expr): - loc_key = expr.loc_key - if self.loc_db is None or self.loc_db.get_location_offset(loc_key) is None: - if str(loc_key) not in self._bitvectors: - self._bitvectors[str(loc_key)] = expr.size - return str(loc_key) - - offset = self.loc_db.get_location_offset(loc_key) - return bit_vec_val(str(offset), expr.size) - - def from_ExprMem(self, expr): - addr = self.from_expr(expr.ptr) - # size to read from memory - size = expr.size - # size of memory address - addr_size = expr.ptr.size - return self._mem.get(addr, size, addr_size) - - def from_ExprSlice(self, expr): - res = self.from_expr(expr.arg) - res = bv_extract(expr.stop-1, expr.start, res) - return res - - def from_ExprCompose(self, expr): - res = None - for arg in expr.args: - e = bv_extract(arg.size-1, 0, self.from_expr(arg)) - if res: - res = bv_concat(e, res) - else: - res = e - return res - - def from_ExprCond(self, expr): - cond = self.from_expr(expr.cond) - src1 = self.from_expr(expr.src1) - src2 = self.from_expr(expr.src2) - - # (and (distinct cond (_ bv0 )) true) - zero = bit_vec_val(0, expr.cond.size) - distinct = smt2_distinct(cond, zero) - distinct_and = smt2_and(distinct, "true") - - # (ite ((and (distinct cond (_ bv0 )) true) src1 src2)) - return smt2_ite(distinct_and, src1, src2) - - def from_ExprOp(self, expr): - args = list(map(self.from_expr, expr.args)) - res = args[0] - - if len(args) > 1: - for arg in args[1:]: - if expr.op == "+": - res = bvadd(res, arg) - elif expr.op == "-": - res = bvsub(res, arg) - elif expr.op == "*": - res = bvmul(res, arg) - elif expr.op == "/": - res = bvsdiv(res, arg) - elif expr.op == "sdiv": - res = bvsdiv(res, arg) - elif expr.op == "udiv": - res = bvudiv(res, arg) - elif expr.op == "%": - res = bvsmod(res, arg) - elif expr.op == "smod": - res = bvsmod(res, arg) - elif expr.op == "umod": - res = bvurem(res, arg) - elif expr.op == "&": - res = bvand(res, arg) - elif expr.op == "^": - res = bvxor(res, arg) - elif expr.op == "|": - res = bvor(res, arg) - elif expr.op == "<<": - res = bvshl(res, arg) - elif expr.op == ">>": - res = bvlshr(res, arg) - elif expr.op == "a>>": - res = bvashr(res, arg) - elif expr.op == "<<<": - res = bv_rotate_left(res, arg, expr.size) - elif expr.op == ">>>": - res = bv_rotate_right(res, arg, expr.size) - else: - raise NotImplementedError("Unsupported OP yet: %s" % expr.op) - elif expr.op == 'parity': - arg = bv_extract(7, 0, res) - res = bit_vec_val(1, 1) - for i in range(8): - res = bvxor(res, bv_extract(i, i, arg)) - elif expr.op == '-': - res = bvneg(res) - elif expr.op == "cnttrailzeros": - src = res - size = expr.size - size_smt2 = bit_vec_val(size, size) - one_smt2 = bit_vec_val(1, size) - zero_smt2 = bit_vec_val(0, size) - # src & (1 << (size - 1)) - op = bvand(src, bvshl(one_smt2, bvsub(size_smt2, one_smt2))) - # op != 0 - cond = smt2_distinct(op, zero_smt2) - # ite(cond, size - 1, src) - res = smt2_ite(cond, bvsub(size_smt2, one_smt2), src) - for i in range(size - 2, -1, -1): - # smt2 expression of i - i_smt2 = bit_vec_val(i, size) - # src & (1 << i) - op = bvand(src, bvshl(one_smt2, i_smt2)) - # op != 0 - cond = smt2_distinct(op, zero_smt2) - # ite(cond, i, res) - res = smt2_ite(cond, i_smt2, res) - elif expr.op == "cntleadzeros": - src = res - size = expr.size - one_smt2 = bit_vec_val(1, size) - zero_smt2 = bit_vec_val(0, size) - # (src & 1) != 0 - cond = smt2_distinct(bvand(src, one_smt2), zero_smt2) - # ite(cond, 0, src) - res= smt2_ite(cond, zero_smt2, src) - for i in range(size - 1, 0, -1): - index = - i % size - index_smt2 = bit_vec_val(index, size) - # src & (1 << index) - op = bvand(src, bvshl(one_smt2, index_smt2)) - # op != 0 - cond = smt2_distinct(op, zero_smt2) - # ite(cond, index, res) - value_smt2 = bit_vec_val(size - (index + 1), size) - res = smt2_ite(cond, value_smt2, res) - else: - raise NotImplementedError("Unsupported OP yet: %s" % expr.op) - - return res - - def from_ExprAssign(self, expr): - src = self.from_expr(expr.src) - dst = self.from_expr(expr.dst) - return smt2_assert(smt2_eq(src, dst)) - - def to_smt2(self, exprs, logic="QF_ABV", model=False): - """ - Converts a valid SMT2 file for a given list of - SMT2 expressions. - - :param exprs: list of SMT2 expressions - :param logic: SMT2 logic - :param model: model generation flag - :return: String of the SMT2 file - """ - ret = "" - ret += "(set-logic {})\n".format(logic) - - # define bit vectors - for bv in self._bitvectors: - size = self._bitvectors[bv] - ret += "{}\n".format(declare_bv(bv, size)) - - # define memory arrays - for size in self._mem.mems: - mem = self._mem.mems[size] - ret += "{}\n".format(declare_array(mem, bit_vec(size), bit_vec(8))) - - # merge SMT2 expressions - for expr in exprs: - ret += expr + "\n" - - # define action - ret += "(check-sat)\n" - - # enable model generation - if model: - ret += "(get-model)\n" - - return ret - - -# Register the class -Translator.register(TranslatorSMT2) diff --git a/miasm2/ir/translators/translator.py b/miasm2/ir/translators/translator.py deleted file mode 100644 index 65875072..00000000 --- a/miasm2/ir/translators/translator.py +++ /dev/null @@ -1,127 +0,0 @@ -from future.utils import viewitems - -import miasm2.expression.expression as m2_expr -from miasm2.core.utils import BoundedDict - - -class Translator(object): - "Abstract parent class for translators." - - # Registered translators - available_translators = [] - # Implemented language - __LANG__ = "" - - @classmethod - def register(cls, translator): - """Register a translator - @translator: Translator sub-class - """ - cls.available_translators.append(translator) - - @classmethod - def to_language(cls, target_lang, *args, **kwargs): - """Return the corresponding translator instance - @target_lang: str (case insensitive) wanted language - Raise a NotImplementedError in case of unmatched language - """ - target_lang = target_lang.lower() - for translator in cls.available_translators: - if translator.__LANG__.lower() == target_lang: - return translator(*args, **kwargs) - - raise NotImplementedError("Unknown target language: %s" % target_lang) - - @classmethod - def available_languages(cls): - "Return the list of registered languages" - return [translator.__LANG__ for translator in cls.available_translators] - - def __init__(self, cache_size=1000): - """Instance a translator - @cache_size: (optional) Expr cache size - """ - self._cache = BoundedDict(cache_size) - - def from_ExprInt(self, expr): - """Translate an ExprInt - @expr: ExprInt to translate - """ - raise NotImplementedError("Abstract method") - - def from_ExprId(self, expr): - """Translate an ExprId - @expr: ExprId to translate - """ - raise NotImplementedError("Abstract method") - - def from_ExprLoc(self, expr): - """Translate an ExprLoc - @expr: ExprLoc to translate - """ - raise NotImplementedError("Abstract method") - - def from_ExprCompose(self, expr): - """Translate an ExprCompose - @expr: ExprCompose to translate - """ - raise NotImplementedError("Abstract method") - - def from_ExprSlice(self, expr): - """Translate an ExprSlice - @expr: ExprSlice to translate - """ - raise NotImplementedError("Abstract method") - - def from_ExprOp(self, expr): - """Translate an ExprOp - @expr: ExprOp to translate - """ - raise NotImplementedError("Abstract method") - - def from_ExprMem(self, expr): - """Translate an ExprMem - @expr: ExprMem to translate - """ - raise NotImplementedError("Abstract method") - - def from_ExprAssign(self, expr): - """Translate an ExprAssign - @expr: ExprAssign to translate - """ - raise NotImplementedError("Abstract method") - - def from_ExprCond(self, expr): - """Translate an ExprCond - @expr: ExprCond to translate - """ - raise NotImplementedError("Abstract method") - - def from_expr(self, expr): - """Translate an expression according to its type - @expr: expression to translate - """ - # Use cache - if expr in self._cache: - return self._cache[expr] - - # Handle Expr type - handlers = { - m2_expr.ExprInt: self.from_ExprInt, - m2_expr.ExprId: self.from_ExprId, - m2_expr.ExprLoc: self.from_ExprLoc, - m2_expr.ExprCompose: self.from_ExprCompose, - m2_expr.ExprSlice: self.from_ExprSlice, - m2_expr.ExprOp: self.from_ExprOp, - m2_expr.ExprMem: self.from_ExprMem, - m2_expr.ExprAssign: self.from_ExprAssign, - m2_expr.ExprCond: self.from_ExprCond - } - for target, handler in viewitems(handlers): - if isinstance(expr, target): - ## Compute value and update the internal cache - ret = handler(expr) - self._cache[expr] = ret - return ret - raise ValueError("Unhandled type for %s" % expr) - diff --git a/miasm2/ir/translators/z3_ir.py b/miasm2/ir/translators/z3_ir.py deleted file mode 100644 index 902e72bd..00000000 --- a/miasm2/ir/translators/z3_ir.py +++ /dev/null @@ -1,281 +0,0 @@ -from builtins import map -from builtins import range -import imp -import logging - -# Raise an ImportError if z3 is not available WITHOUT actually importing it -imp.find_module("z3") - -from miasm2.ir.translators.translator import Translator - -log = logging.getLogger("translator_z3") -console_handler = logging.StreamHandler() -console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) -log.addHandler(console_handler) -log.setLevel(logging.WARNING) - -class Z3Mem(object): - """Memory abstration for TranslatorZ3. Memory elements are only accessed, - never written. To give a concrete value for a given memory cell in a solver, - add "mem32.get(address, size) == " constraints to your equation. - The endianness of memory accesses is handled accordingly to the "endianness" - attribute. - - Note: Will have one memory space for each addressing size used. - For example, if memory is accessed via 32 bits values and 16 bits values, - these access will not occur in the same address space. - """ - - def __init__(self, endianness="<", name="mem"): - """Initializes a Z3Mem object with a given @name and @endianness. - @endianness: Endianness of memory representation. '<' for little endian, - '>' for big endian. - @name: name of memory Arrays generated. They will be named - name+str(address size) (for example mem32, mem16...). - """ - # Import z3 only on demand - global z3 - import z3 - - if endianness not in ['<', '>']: - raise ValueError("Endianness should be '>' (big) or '<' (little)") - self.endianness = endianness - self.mems = {} # Address size -> memory z3.Array - self.name = name - - def get_mem_array(self, size): - """Returns a z3 Array used internally to represent memory for addresses - of size @size. - @size: integer, size in bit of addresses in the memory to get. - Return a z3 Array: BitVecSort(size) -> BitVecSort(8). - """ - try: - mem = self.mems[size] - except KeyError: - # Lazy instantiation - self.mems[size] = z3.Array(self.name + str(size), - z3.BitVecSort(size), - z3.BitVecSort(8)) - mem = self.mems[size] - return mem - - def __getitem__(self, addr): - """One byte memory access. Different address sizes with the same value - will result in different memory accesses. - @addr: a z3 BitVec, the address to read. - Return a z3 BitVec of size 8 bits representing a memory access. - """ - size = addr.size() - mem = self.get_mem_array(size) - return mem[addr] - - def get(self, addr, size): - """ Memory access at address @addr of size @size. - @addr: a z3 BitVec, the address to read. - @size: int, size of the read in bits. - Return a z3 BitVec of size @size representing a memory access. - """ - original_size = size - if original_size % 8 != 0: - # Size not aligned on 8bits -> read more than size and extract after - size = ((original_size // 8) + 1) * 8 - res = self[addr] - if self.is_little_endian(): - for i in range(1, size // 8): - res = z3.Concat(self[addr+i], res) - else: - for i in range(1, size //8): - res = z3.Concat(res, self[addr+i]) - if size == original_size: - return res - else: - # Size not aligned, extract right sized result - return z3.Extract(original_size-1, 0, res) - - def is_little_endian(self): - """True if this memory is little endian.""" - return self.endianness == "<" - - def is_big_endian(self): - """True if this memory is big endian.""" - return not self.is_little_endian() - - -class TranslatorZ3(Translator): - """Translate a Miasm expression to an equivalent z3 python binding - expression. Memory is abstracted via z3.Array (see Z3Mem). - The result of from_expr will be a z3 Expr. - - If you want to interact with the memory abstraction after the translation, - you can instantiate your own Z3Mem, that will be equivalent to the one - used by TranslatorZ3. - """ - - # Implemented language - __LANG__ = "z3" - # Operations translation - trivial_ops = ["+", "-", "/", "%", "&", "^", "|", "*", "<<"] - - def __init__(self, endianness="<", loc_db=None, **kwargs): - """Instance a Z3 translator - @endianness: (optional) memory endianness - """ - # Import z3 only on demand - global z3 - import z3 - - super(TranslatorZ3, self).__init__(**kwargs) - self._mem = Z3Mem(endianness) - self.loc_db = loc_db - - def from_ExprInt(self, expr): - return z3.BitVecVal(expr.arg.arg, expr.size) - - def from_ExprId(self, expr): - return z3.BitVec(str(expr), expr.size) - - def from_ExprLoc(self, expr): - if self.loc_db is None: - # No loc_db, fallback to default name - return z3.BitVec(str(expr), expr.size) - loc_key = expr.loc_key - offset = self.loc_db.get_location_offset(loc_key) - if offset is not None: - return z3.BitVecVal(offset, expr.size) - # fallback to default name - return z3.BitVec(str(loc_key), expr.size) - - def from_ExprMem(self, expr): - addr = self.from_expr(expr.ptr) - return self._mem.get(addr, expr.size) - - def from_ExprSlice(self, expr): - res = self.from_expr(expr.arg) - res = z3.Extract(expr.stop-1, expr.start, res) - return res - - def from_ExprCompose(self, expr): - res = None - for arg in expr.args: - e = z3.Extract(arg.size-1, 0, self.from_expr(arg)) - if res != None: - res = z3.Concat(e, res) - else: - res = e - return res - - def from_ExprCond(self, expr): - cond = self.from_expr(expr.cond) - src1 = self.from_expr(expr.src1) - src2 = self.from_expr(expr.src2) - return z3.If(cond != 0, src1, src2) - - def _abs(self, z3_value): - return z3.If(z3_value >= 0,z3_value,-z3_value) - - def _sdivC(self, num, den): - """Divide (signed) @num by @den (z3 values) as C would - See modint.__div__ for implementation choice - """ - result_sign = z3.If(num * den >= 0, - z3.BitVecVal(1, num.size()), - z3.BitVecVal(-1, num.size()), - ) - return z3.UDiv(self._abs(num), self._abs(den)) * result_sign - - def from_ExprOp(self, expr): - args = list(map(self.from_expr, expr.args)) - res = args[0] - - if len(args) > 1: - for arg in args[1:]: - if expr.op in self.trivial_ops: - res = eval("res %s arg" % expr.op) - elif expr.op == ">>": - res = z3.LShR(res, arg) - elif expr.op == "a>>": - res = res >> arg - elif expr.op == "<<<": - res = z3.RotateLeft(res, arg) - elif expr.op == ">>>": - res = z3.RotateRight(res, arg) - elif expr.op == "sdiv": - res = self._sdivC(res, arg) - elif expr.op == "udiv": - res = z3.UDiv(res, arg) - elif expr.op == "smod": - res = res - (arg * (self._sdivC(res, arg))) - elif expr.op == "umod": - res = z3.URem(res, arg) - elif expr.op == "==": - res = z3.If( - args[0] == args[1], - z3.BitVecVal(1, 1), - z3.BitVecVal(0, 1) - ) - elif expr.op == " -#include "structmember.h" -#include -#include -#include "compat_py23.h" -#include "queue.h" -#include "vm_mngr.h" -#include "vm_mngr_py.h" -#include "bn.h" -#include "JitCore.h" - - -void JitCpu_dealloc(JitCpu* self) -{ - Py_TYPE(self)->tp_free((PyObject*)self); -} - - -PyObject * JitCpu_new(PyTypeObject *type, PyObject *args, PyObject *kwds) -{ - JitCpu *self; - - self = (JitCpu *)type->tp_alloc(type, 0); - return (PyObject *)self; -} - -PyObject * JitCpu_get_vmmngr(JitCpu *self, void *closure) -{ - if (self->pyvm) { - Py_INCREF(self->pyvm); - return (PyObject*)self->pyvm; - } - Py_INCREF(Py_None); - return Py_None; -} - -PyObject * JitCpu_set_vmmngr(JitCpu *self, PyObject *value, void *closure) -{ - self->pyvm = (VmMngr*)value; - return 0; -} - -PyObject * JitCpu_get_jitter(JitCpu *self, void *closure) -{ - if (self->jitter) { - Py_INCREF(self->jitter); - return self->jitter; - } - Py_INCREF(Py_None); - return Py_None; -} - -PyObject * JitCpu_set_jitter(JitCpu *self, PyObject *value, void *closure) -{ - self->jitter = value; - return 0; -} - -uint8_t MEM_LOOKUP_08(JitCpu* jitcpu, uint64_t addr) -{ - return vm_MEM_LOOKUP_08(&(jitcpu->pyvm->vm_mngr), addr); -} - -uint16_t MEM_LOOKUP_16(JitCpu* jitcpu, uint64_t addr) -{ - return vm_MEM_LOOKUP_16(&(jitcpu->pyvm->vm_mngr), addr); -} - -uint32_t MEM_LOOKUP_32(JitCpu* jitcpu, uint64_t addr) -{ - return vm_MEM_LOOKUP_32(&(jitcpu->pyvm->vm_mngr), addr); -} - -uint64_t MEM_LOOKUP_64(JitCpu* jitcpu, uint64_t addr) -{ - return vm_MEM_LOOKUP_64(&(jitcpu->pyvm->vm_mngr), addr); -} - -bn_t MEM_LOOKUP_BN_BN(JitCpu* jitcpu, int size, bn_t addr) -{ - uint64_t ptr; - int i; - uint8_t tmp; - bn_t val = bignum_from_int(0); - - ptr = bignum_to_uint64(addr); - - - for (i=0; i < size; i += 8) { - tmp = vm_MEM_LOOKUP_08(&((VmMngr*)jitcpu->pyvm)->vm_mngr, ptr); - ptr += 1; - val = bignum_or(val, bignum_lshift(bignum_from_int(tmp), i)); - } - - return val; -} - - -uint64_t MEM_LOOKUP_BN_INT(JitCpu* jitcpu, int size, bn_t addr) -{ - uint64_t ptr; - uint64_t val = 0; - - ptr = bignum_to_uint64(addr); - - switch (size) { - case 8: - val = vm_MEM_LOOKUP_08(&(jitcpu->pyvm->vm_mngr), ptr); - break; - case 16: - val = vm_MEM_LOOKUP_16(&(jitcpu->pyvm->vm_mngr), ptr); - break; - case 32: - val = vm_MEM_LOOKUP_32(&(jitcpu->pyvm->vm_mngr), ptr); - break; - case 64: - val = vm_MEM_LOOKUP_64(&(jitcpu->pyvm->vm_mngr), ptr); - break; - default: - fprintf(stderr, "Error: bad READ size %d\n", size); - exit(-1); - break; - } - - return val; -} - - - -bn_t MEM_LOOKUP_INT_BN(JitCpu* jitcpu, int size, uint64_t addr) -{ - int i; - uint8_t tmp; - bn_t val = bignum_from_int(0); - - for (i=0; i < size; i += 8) { - tmp = vm_MEM_LOOKUP_08(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr); - addr += 1; - val = bignum_or(val, bignum_lshift(bignum_from_int(tmp), i)); - } - - return val; -} - - -void MEM_LOOKUP_INT_BN_TO_PTR(JitCpu* jitcpu, int size, uint64_t addr, char* ptr) -{ - bn_t ret; - - if (size % 8) { - fprintf(stderr, "Bad size %d\n", size); - exit(-1); - } - - ret = MEM_LOOKUP_INT_BN(jitcpu, size, addr); - memcpy(ptr, (char*)&ret, size / 8); -} - - -void MEM_WRITE_BN_BN(JitCpu* jitcpu, int size, bn_t addr, bn_t src) -{ - uint64_t ptr; - int val; - int i; - - ptr = bignum_to_uint64(addr); - for (i=0; i < size; i += 8) { - val = bignum_to_uint64(src) & 0xFF; - vm_MEM_WRITE_08(&((VmMngr*)jitcpu->pyvm)->vm_mngr, ptr, val); - ptr += 1; - src = bignum_rshift(src, 8); - } -} - - -void MEM_WRITE_BN_INT(JitCpu* jitcpu, int size, bn_t addr, uint64_t src) -{ - uint64_t ptr; - ptr = bignum_to_uint64(addr); - - switch (size) { - case 8: - vm_MEM_WRITE_08(&((VmMngr*)jitcpu->pyvm)->vm_mngr, ptr, (unsigned char)src); - break; - case 16: - vm_MEM_WRITE_16(&((VmMngr*)jitcpu->pyvm)->vm_mngr, ptr, (unsigned short)src); - break; - case 32: - vm_MEM_WRITE_32(&((VmMngr*)jitcpu->pyvm)->vm_mngr, ptr, (unsigned int)src); - break; - case 64: - vm_MEM_WRITE_64(&((VmMngr*)jitcpu->pyvm)->vm_mngr, ptr, src); - break; - default: - fprintf(stderr, "Error: bad write size %d\n", size); - exit(-1); - break; - } -} - -void MEM_WRITE_INT_BN(JitCpu* jitcpu, int size, uint64_t addr, bn_t src) -{ - int val; - int i; - - for (i=0; i < size; i += 8) { - val = bignum_to_uint64(src) & 0xFF; - vm_MEM_WRITE_08(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, val); - addr += 1; - src = bignum_rshift(src, 8); - } -} - - -void MEM_WRITE_INT_BN_FROM_PTR(JitCpu* jitcpu, int size, uint64_t addr, char* ptr) -{ - bn_t val; - - if (size % 8) { - fprintf(stderr, "Bad size %d\n", size); - exit(-1); - } - - val = bignum_from_int(0); - memcpy(&val, ptr, size / 8); - MEM_WRITE_INT_BN(jitcpu, size, addr, val); -} - - - -PyObject* vm_get_mem(JitCpu *self, PyObject* args) -{ - PyObject *py_addr; - PyObject *py_len; - - uint64_t addr; - uint64_t size; - PyObject *obj_out; - char * buf_out; - int ret; - - if (!PyArg_ParseTuple(args, "OO", &py_addr, &py_len)) - return NULL; - - PyGetInt(py_addr, addr); - PyGetInt(py_len, size); - - ret = vm_read_mem(&(((VmMngr*)self->pyvm)->vm_mngr), addr, &buf_out, size); - if (ret < 0) { - PyErr_SetString(PyExc_RuntimeError, "cannot find address"); - return NULL; - } - - obj_out = PyBytes_FromStringAndSize(buf_out, size); - free(buf_out); - return obj_out; -} diff --git a/miasm2/jitter/JitCore.h b/miasm2/jitter/JitCore.h deleted file mode 100644 index 15efc7d2..00000000 --- a/miasm2/jitter/JitCore.h +++ /dev/null @@ -1,306 +0,0 @@ -#ifndef JITCORE_H -#define JITCORE_H - -#if _WIN32 -#define _MIASM_EXPORT __declspec(dllexport) -#else -#define _MIASM_EXPORT -#endif - -#define RAISE(errtype, msg) {PyObject* p; p = PyErr_Format( errtype, msg ); return p;} -#define RAISE_ret0(errtype, msg) {PyObject* p; p = PyErr_Format( errtype, msg ); return 0;} - - -#if PY_MAJOR_VERSION >= 3 -#define getset_reg_bn(regname, size) \ - static PyObject *JitCpu_get_ ## regname (JitCpu *self, void *closure) \ - { \ - bn_t bn; \ - int j; \ - PyObject* py_long; \ - PyObject* py_long_new; \ - PyObject* py_tmp; \ - PyObject* cst_32; \ - uint64_t tmp; \ - py_long = PyLong_FromLong(0); \ - cst_32 = PyLong_FromLong(32); \ - bn = ((vm_cpu_t*)(self->cpu))-> regname; \ - bn = bignum_mask(bn, (size)); \ - for (j = BN_BYTE_SIZE - 4; j >= 0 ; j -= 4) { \ - tmp = bignum_to_uint64(bignum_mask(bignum_rshift(bn, 8 * j), 32)); \ - py_tmp = PyLong_FromUnsignedLong(tmp); \ - py_long_new = PyObject_CallMethod(py_long, "__lshift__", "O", cst_32); \ - Py_DECREF(py_long); \ - py_long = PyObject_CallMethod(py_long_new, "__add__", "O", py_tmp); \ - Py_DECREF(py_long_new); \ - Py_DECREF(py_tmp); \ - } \ - Py_DECREF(cst_32); \ - return py_long; \ - } \ - \ - static int JitCpu_set_ ## regname (JitCpu *self, PyObject *value, void *closure) \ - { \ - bn_t bn; \ - int j; \ - PyObject* py_long = value; \ - PyObject* py_long_new; \ - PyObject* py_tmp; \ - PyObject* cst_32; \ - PyObject* cst_ffffffff; \ - uint64_t tmp; \ - if (PyLong_Check(py_long)){ \ - Py_INCREF(py_long); \ - } else { \ - RAISE(PyExc_TypeError,"arg must be int"); \ - } \ - \ - cst_ffffffff = PyLong_FromLong(0xffffffff); \ - cst_32 = PyLong_FromLong(32); \ - bn = bignum_from_int(0); \ - \ - for (j = 0; j < BN_BYTE_SIZE; j += 4) { \ - py_tmp = PyObject_CallMethod(py_long, "__and__", "O", cst_ffffffff); \ - py_long_new = PyObject_CallMethod(py_long, "__rshift__", "O", cst_32); \ - Py_DECREF(py_long); \ - py_long = py_long_new; \ - tmp = PyLong_AsUnsignedLongMask(py_tmp); \ - Py_DECREF(py_tmp); \ - bn = bignum_or(bn, bignum_lshift(bignum_from_uint64(tmp), 8 * j)); \ - } \ - \ - ((vm_cpu_t*)(self->cpu))-> regname = bignum_mask(bn, (size)); \ - Py_DECREF(py_long); \ - Py_DECREF(cst_32); \ - Py_DECREF(cst_ffffffff); \ - return 0; \ - } - - -#else -#define getset_reg_bn(regname, size) \ - static PyObject *JitCpu_get_ ## regname (JitCpu *self, void *closure) \ - { \ - bn_t bn; \ - int j; \ - PyObject* py_long; \ - PyObject* py_long_new; \ - PyObject* py_tmp; \ - PyObject* cst_32; \ - uint64_t tmp; \ - py_long = PyLong_FromLong(0); \ - cst_32 = PyLong_FromLong(32); \ - bn = ((vm_cpu_t*)(self->cpu))-> regname; \ - bn = bignum_mask(bn, (size)); \ - for (j = BN_BYTE_SIZE - 4; j >= 0 ; j -= 4) { \ - tmp = bignum_to_uint64(bignum_mask(bignum_rshift(bn, 8 * j), 32)); \ - py_tmp = PyLong_FromUnsignedLong(tmp); \ - py_long_new = PyObject_CallMethod(py_long, "__lshift__", "O", cst_32); \ - Py_DECREF(py_long); \ - py_long = PyObject_CallMethod(py_long_new, "__add__", "O", py_tmp); \ - Py_DECREF(py_long_new); \ - Py_DECREF(py_tmp); \ - } \ - Py_DECREF(cst_32); \ - return py_long; \ - } \ - \ - static int JitCpu_set_ ## regname (JitCpu *self, PyObject *value, void *closure) \ - { \ - bn_t bn; \ - int j; \ - PyObject* py_long = value; \ - PyObject* py_long_new; \ - PyObject* py_tmp; \ - PyObject* cst_32; \ - PyObject* cst_ffffffff; \ - uint64_t tmp; \ - \ - if (PyInt_Check(py_long)){ \ - tmp = (uint64_t)PyInt_AsLong(py_long); \ - py_long = PyLong_FromLong((long)tmp); \ - } else if (PyLong_Check(py_long)){ \ - Py_INCREF(py_long); \ - } \ - else{ \ - RAISE(PyExc_TypeError,"arg must be int"); \ - } \ - \ - cst_ffffffff = PyLong_FromLong(0xffffffff); \ - cst_32 = PyLong_FromLong(32); \ - bn = bignum_from_int(0); \ - \ - for (j = 0; j < BN_BYTE_SIZE; j += 4) { \ - py_tmp = PyObject_CallMethod(py_long, "__and__", "O", cst_ffffffff); \ - py_long_new = PyObject_CallMethod(py_long, "__rshift__", "O", cst_32); \ - Py_DECREF(py_long); \ - py_long = py_long_new; \ - tmp = PyLong_AsUnsignedLongMask(py_tmp); \ - Py_DECREF(py_tmp); \ - bn = bignum_or(bn, bignum_lshift(bignum_from_uint64(tmp), 8 * j)); \ - } \ - \ - ((vm_cpu_t*)(self->cpu))-> regname = bignum_mask(bn, (size)); \ - Py_DECREF(py_long); \ - Py_DECREF(cst_32); \ - Py_DECREF(cst_ffffffff); \ - return 0; \ - } -#endif - - - - - - - - - - - -#define getset_reg_u64(regname) \ - static PyObject *JitCpu_get_ ## regname (JitCpu *self, void *closure) \ - { \ - return PyLong_FromUnsignedLongLong((uint64_t)(((vm_cpu_t*)(self->cpu))-> regname )); \ - } \ - static int JitCpu_set_ ## regname (JitCpu *self, PyObject *value, void *closure) \ - { \ - uint64_t val; \ - PyGetInt_retneg(value, val); \ - ((vm_cpu_t*)(self->cpu))-> regname = val; \ - return 0; \ - } - -#define getset_reg_u32(regname) \ - static PyObject *JitCpu_get_ ## regname (JitCpu *self, void *closure) \ - { \ - return PyLong_FromUnsignedLongLong((uint32_t)(((vm_cpu_t*)(self->cpu))-> regname )); \ - } \ - static int JitCpu_set_ ## regname (JitCpu *self, PyObject *value, void *closure) \ - { \ - uint32_t val; \ - PyGetInt_retneg(value, val); \ - ((vm_cpu_t*)(self->cpu))-> regname = val; \ - return 0; \ - } - - -#define getset_reg_u16(regname) \ - static PyObject *JitCpu_get_ ## regname (JitCpu *self, void *closure) \ - { \ - return PyLong_FromUnsignedLongLong((uint16_t)(((vm_cpu_t*)(self->cpu))-> regname )); \ - } \ - static int JitCpu_set_ ## regname (JitCpu *self, PyObject *value, void *closure) \ - { \ - uint16_t val; \ - PyGetInt_retneg(value, val); \ - ((vm_cpu_t*)(self->cpu))-> regname = val; \ - return 0; \ - } - - -#define get_reg(reg) do { \ - o = PyLong_FromUnsignedLongLong((uint64_t)((vm_cpu_t*)(self->cpu))->reg); \ - PyDict_SetItemString(dict, #reg, o); \ - Py_DECREF(o); \ - } while(0); - - -#define get_reg_bn(reg, size) do { \ - bn_t bn; \ - int j; \ - PyObject* py_long; \ - PyObject* py_long_new; \ - PyObject* py_tmp; \ - PyObject* cst_32; \ - uint64_t tmp; \ - py_long = PyLong_FromLong(0); \ - cst_32 = PyLong_FromLong(32); \ - bn = ((vm_cpu_t*)(self->cpu))-> reg; \ - bn = bignum_mask(bn, size); \ - for (j = BN_BYTE_SIZE - 4; j >= 0 ; j -= 4) { \ - tmp = bignum_to_uint64(bignum_mask(bignum_rshift(bn, 8 * j), 32)); \ - py_tmp = PyLong_FromUnsignedLong(tmp); \ - py_long_new = PyObject_CallMethod(py_long, "__lshift__", "O", cst_32); \ - Py_DECREF(py_long); \ - py_long = PyObject_CallMethod(py_long_new, "__add__", "O", py_tmp); \ - Py_DECREF(py_long_new); \ - Py_DECREF(py_tmp); \ - } \ - PyDict_SetItemString(dict, #reg, py_long); \ - Py_DECREF(py_long); \ - Py_DECREF(cst_32); \ - } while(0); - - -#define get_reg_off(reg) do { \ - o = PyLong_FromUnsignedLongLong((uint64_t)offsetof(vm_cpu_t, reg)); \ - PyDict_SetItemString(dict, #reg, o); \ - Py_DECREF(o); \ - } while(0); - - - - -typedef struct { - uint8_t is_local; - uint64_t address; -} block_id; - -typedef struct { - PyObject_HEAD - VmMngr *pyvm; - PyObject *jitter; - void* cpu; -} JitCpu; - - -typedef struct _reg_dict{ - char* name; - size_t offset; - size_t size; -} reg_dict; - - - -void JitCpu_dealloc(JitCpu* self); -PyObject * JitCpu_new(PyTypeObject *type, PyObject *args, PyObject *kwds); -PyObject * JitCpu_get_vmmngr(JitCpu *self, void *closure); -PyObject * JitCpu_set_vmmngr(JitCpu *self, PyObject *value, void *closure); -PyObject * JitCpu_get_jitter(JitCpu *self, void *closure); -PyObject * JitCpu_set_jitter(JitCpu *self, PyObject *value, void *closure); -void Resolve_dst(block_id* BlockDst, uint64_t addr, uint64_t is_local); - -#define Resolve_dst(b, arg_addr, arg_is_local) do {(b)->address = (arg_addr); (b)->is_local = (arg_is_local);} while(0) - - - -_MIASM_EXPORT uint8_t MEM_LOOKUP_08(JitCpu* jitcpu, uint64_t addr); -_MIASM_EXPORT uint16_t MEM_LOOKUP_16(JitCpu* jitcpu, uint64_t addr); -_MIASM_EXPORT uint32_t MEM_LOOKUP_32(JitCpu* jitcpu, uint64_t addr); -_MIASM_EXPORT uint64_t MEM_LOOKUP_64(JitCpu* jitcpu, uint64_t addr); - -_MIASM_EXPORT bn_t MEM_LOOKUP_BN_BN(JitCpu* jitcpu, int size, bn_t addr); -_MIASM_EXPORT bn_t MEM_LOOKUP_INT_BN(JitCpu* jitcpu, int size, uint64_t addr); - -_MIASM_EXPORT uint64_t MEM_LOOKUP_BN_INT(JitCpu* jitcpu, int size, bn_t addr); - -_MIASM_EXPORT void MEM_WRITE_BN_BN(JitCpu* jitcpu, int size, bn_t addr, bn_t src); -_MIASM_EXPORT void MEM_WRITE_BN_INT(JitCpu* jitcpu, int size, bn_t addr, uint64_t src); -_MIASM_EXPORT void MEM_WRITE_INT_BN(JitCpu* jitcpu, int size, uint64_t addr, bn_t src); - - -PyObject* vm_get_mem(JitCpu *self, PyObject* args); - -_MIASM_EXPORT void MEM_LOOKUP_INT_BN_TO_PTR(JitCpu* jitcpu, int size, uint64_t addr, char* ptr); -_MIASM_EXPORT void MEM_WRITE_INT_BN_FROM_PTR(JitCpu* jitcpu, int size, uint64_t addr, char* ptr); - - - -#define VM_exception_flag (jitcpu->pyvm->vm_mngr.exception_flags) -#define CPU_exception_flag (((vm_cpu_t*)jitcpu->cpu)->exception_flags) -#define CPU_exception_flag_at_instr ((CPU_exception_flag) && ((CPU_exception_flag) > EXCEPT_NUM_UPDT_EIP)) -#define JIT_RET_EXCEPTION 1 -#define JIT_RET_NO_EXCEPTION 0 - -#endif diff --git a/miasm2/jitter/Jitgcc.c b/miasm2/jitter/Jitgcc.c deleted file mode 100644 index 0a39c998..00000000 --- a/miasm2/jitter/Jitgcc.c +++ /dev/null @@ -1,106 +0,0 @@ -#include -#include -#include -#include "compat_py23.h" - -typedef struct { - uint8_t is_local; - uint64_t address; -} block_id; - -typedef int (*jitted_func)(block_id*, PyObject*); - - -PyObject* gcc_exec_block(PyObject* self, PyObject* args) -{ - jitted_func func; - PyObject* jitcpu; - PyObject* func_py; - PyObject* lbl2ptr; - PyObject* stop_offsets; - PyObject* retaddr = NULL; - int status; - block_id BlockDst; - uint64_t max_exec_per_call = 0; - uint64_t cpt; - int do_cpt; - - - if (!PyArg_ParseTuple(args, "OOOO|K", - &retaddr, &jitcpu, &lbl2ptr, &stop_offsets, - &max_exec_per_call)) - return NULL; - - /* The loop will decref retaddr always once */ - Py_INCREF(retaddr); - - if (max_exec_per_call == 0) { - do_cpt = 0; - cpt = 1; - } else { - do_cpt = 1; - cpt = max_exec_per_call; - } - - - - for (;;) { - if (cpt == 0) - return retaddr; - if (do_cpt) - cpt --; - // Init - BlockDst.is_local = 0; - BlockDst.address = 0; - - // Get the expected jitted function address - func_py = PyDict_GetItem(lbl2ptr, retaddr); - if (func_py) - func = (jitted_func) PyLong_AsVoidPtr((PyObject*) func_py); - else { - if (BlockDst.is_local == 1) { - fprintf(stderr, "return on local label!\n"); - exit(EXIT_FAILURE); - } - // retaddr is not jitted yet - return retaddr; - } - // Execute it - status = func(&BlockDst, jitcpu); - Py_DECREF(retaddr); - retaddr = PyLong_FromUnsignedLongLong(BlockDst.address); - - // Check exception - if (status) - return retaddr; - - // Check stop offsets - if (PySet_Contains(stop_offsets, retaddr)) - return retaddr; - } -} - - - -static PyObject *GccError; - - -static PyMethodDef GccMethods[] = { - {"gcc_exec_block", gcc_exec_block, METH_VARARGS, - "gcc exec block"}, - {NULL, NULL, 0, NULL} /* Sentinel */ -}; - - - -MOD_INIT(Jitgcc) -{ - PyObject *module; - - MOD_DEF(module, "Jitgcc", "gcc module", GccMethods); - - if (module == NULL) - return NULL; - - return module; -} diff --git a/miasm2/jitter/Jitllvm.c b/miasm2/jitter/Jitllvm.c deleted file mode 100644 index efe5250f..00000000 --- a/miasm2/jitter/Jitllvm.c +++ /dev/null @@ -1,99 +0,0 @@ -#include - -#include - -#include -#include "compat_py23.h" -#include "queue.h" -#include "vm_mngr.h" -#include "vm_mngr_py.h" -#include "bn.h" -#include "JitCore.h" -// Needed to get the JitCpu.cpu offset, arch independent -#include "arch/JitCore_x86.h" - -PyObject* llvm_exec_block(PyObject* self, PyObject* args) -{ - uint64_t (*func)(void*, void*, void*, uint8_t*); - vm_cpu_t* cpu; - vm_mngr_t* vm; - uint64_t ret; - JitCpu* jitcpu; - uint8_t status; - PyObject* func_py; - PyObject* lbl2ptr; - PyObject* stop_offsets; - PyObject* retaddr = NULL; - uint64_t max_exec_per_call = 0; - uint64_t cpt; - int do_cpt; - - if (!PyArg_ParseTuple(args, "OOOO|K", - &retaddr, &jitcpu, &lbl2ptr, &stop_offsets, - &max_exec_per_call)) - return NULL; - - cpu = jitcpu->cpu; - vm = &(jitcpu->pyvm->vm_mngr); - /* The loop will decref retaddr always once */ - Py_INCREF(retaddr); - - if (max_exec_per_call == 0) { - do_cpt = 0; - cpt = 1; - } else { - do_cpt = 1; - cpt = max_exec_per_call; - } - - for (;;) { - // Handle cpt - if (cpt == 0) - return retaddr; - if (do_cpt) - cpt --; - - // Get the expected jitted function address - func_py = PyDict_GetItem(lbl2ptr, retaddr); - if (func_py) - func = PyLong_AsVoidPtr((PyObject*) func_py); - else - // retaddr is not jitted yet - return retaddr; - - // Execute it - ret = func((void*) jitcpu, (void*)(intptr_t) cpu, (void*)(intptr_t) vm, &status); - Py_DECREF(retaddr); - retaddr = PyLong_FromUnsignedLongLong(ret); - - // Check exception - if (status) - return retaddr; - - // Check stop offsets - if (PySet_Contains(stop_offsets, retaddr)) - return retaddr; - } -} - - -static PyMethodDef LLVMMethods[] = { - {"llvm_exec_block", llvm_exec_block, METH_VARARGS, - "llvm exec block"}, - {NULL, NULL, 0, NULL} /* Sentinel */ -}; - - - - -MOD_INIT(Jitllvm) -{ - PyObject *module; - - MOD_DEF(module, "Jitllvm", "llvm module", LLVMMethods); - - if (module == NULL) - return NULL; - - return module; -} diff --git a/miasm2/jitter/__init__.py b/miasm2/jitter/__init__.py deleted file mode 100644 index 460e327d..00000000 --- a/miasm2/jitter/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"JustInTime compilation feature" diff --git a/miasm2/jitter/arch/JitCore_aarch64.c b/miasm2/jitter/arch/JitCore_aarch64.c deleted file mode 100644 index 9e1a870e..00000000 --- a/miasm2/jitter/arch/JitCore_aarch64.c +++ /dev/null @@ -1,562 +0,0 @@ -#include -#include "structmember.h" -#include -#include -#include "../compat_py23.h" -#include "../queue.h" -#include "../vm_mngr.h" -#include "../vm_mngr_py.h" -#include "../bn.h" -#include "../JitCore.h" -#include "../op_semantics.h" -#include "JitCore_aarch64.h" - - - -reg_dict gpreg_dict[] = { - {.name = "X0", .offset = offsetof(vm_cpu_t, X0), .size = 64}, - {.name = "X1", .offset = offsetof(vm_cpu_t, X1), .size = 64}, - {.name = "X2", .offset = offsetof(vm_cpu_t, X2), .size = 64}, - {.name = "X3", .offset = offsetof(vm_cpu_t, X3), .size = 64}, - {.name = "X4", .offset = offsetof(vm_cpu_t, X4), .size = 64}, - {.name = "X5", .offset = offsetof(vm_cpu_t, X5), .size = 64}, - {.name = "X6", .offset = offsetof(vm_cpu_t, X6), .size = 64}, - {.name = "X7", .offset = offsetof(vm_cpu_t, X7), .size = 64}, - {.name = "X8", .offset = offsetof(vm_cpu_t, X8), .size = 64}, - {.name = "X9", .offset = offsetof(vm_cpu_t, X9), .size = 64}, - {.name = "X10", .offset = offsetof(vm_cpu_t, X10), .size = 64}, - {.name = "X11", .offset = offsetof(vm_cpu_t, X11), .size = 64}, - {.name = "X12", .offset = offsetof(vm_cpu_t, X12), .size = 64}, - {.name = "X13", .offset = offsetof(vm_cpu_t, X13), .size = 64}, - {.name = "X14", .offset = offsetof(vm_cpu_t, X14), .size = 64}, - {.name = "X15", .offset = offsetof(vm_cpu_t, X15), .size = 64}, - {.name = "X16", .offset = offsetof(vm_cpu_t, X16), .size = 64}, - {.name = "X17", .offset = offsetof(vm_cpu_t, X17), .size = 64}, - {.name = "X18", .offset = offsetof(vm_cpu_t, X18), .size = 64}, - {.name = "X19", .offset = offsetof(vm_cpu_t, X19), .size = 64}, - {.name = "X20", .offset = offsetof(vm_cpu_t, X20), .size = 64}, - {.name = "X21", .offset = offsetof(vm_cpu_t, X21), .size = 64}, - {.name = "X22", .offset = offsetof(vm_cpu_t, X22), .size = 64}, - {.name = "X23", .offset = offsetof(vm_cpu_t, X23), .size = 64}, - {.name = "X24", .offset = offsetof(vm_cpu_t, X24), .size = 64}, - {.name = "X25", .offset = offsetof(vm_cpu_t, X25), .size = 64}, - {.name = "X26", .offset = offsetof(vm_cpu_t, X26), .size = 64}, - {.name = "X27", .offset = offsetof(vm_cpu_t, X27), .size = 64}, - {.name = "X28", .offset = offsetof(vm_cpu_t, X28), .size = 64}, - {.name = "X29", .offset = offsetof(vm_cpu_t, X29), .size = 64}, - {.name = "LR", .offset = offsetof(vm_cpu_t, LR), .size = 64}, - - {.name = "SP", .offset = offsetof(vm_cpu_t, SP), .size = 64}, - {.name = "PC", .offset = offsetof(vm_cpu_t, PC), .size = 64}, - - {.name = "zf", .offset = offsetof(vm_cpu_t, zf), .size = 8}, - {.name = "nf", .offset = offsetof(vm_cpu_t, nf), .size = 8}, - {.name = "of", .offset = offsetof(vm_cpu_t, of), .size = 8}, - {.name = "cf", .offset = offsetof(vm_cpu_t, cf), .size = 8}, - - {.name = "exception_flags", .offset = offsetof(vm_cpu_t, exception_flags), .size = 32}, - {.name = "interrupt_num", .offset = offsetof(vm_cpu_t, interrupt_num), .size = 32}, - -}; - -/************************** JitCpu object **************************/ - - - - -PyObject* cpu_get_gpreg(JitCpu* self) -{ - PyObject *dict = PyDict_New(); - PyObject *o; - - get_reg(X0); - get_reg(X1); - get_reg(X2); - get_reg(X3); - get_reg(X4); - get_reg(X5); - get_reg(X6); - get_reg(X7); - get_reg(X8); - get_reg(X9); - get_reg(X10); - get_reg(X11); - get_reg(X12); - get_reg(X13); - get_reg(X14); - get_reg(X15); - get_reg(X16); - get_reg(X17); - get_reg(X18); - get_reg(X19); - get_reg(X20); - get_reg(X21); - get_reg(X22); - get_reg(X23); - get_reg(X24); - get_reg(X25); - get_reg(X26); - get_reg(X27); - get_reg(X28); - get_reg(X29); - get_reg(LR); - get_reg(SP); - get_reg(PC); - - get_reg(zf); - get_reg(nf); - get_reg(of); - get_reg(cf); - - return dict; -} - - - -PyObject* cpu_set_gpreg(JitCpu* self, PyObject *args) -{ - PyObject* dict; - PyObject *d_key, *d_value = NULL; - Py_ssize_t pos = 0; - char* d_key_name; - uint64_t val; - unsigned int i, found; - - if (!PyArg_ParseTuple(args, "O", &dict)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - if(!PyDict_Check(dict)) - RAISE(PyExc_TypeError, "arg must be dict"); - while(PyDict_Next(dict, &pos, &d_key, &d_value)){ - PyGetStr(d_key_name, d_key); - PyGetInt(d_value, val); - - found = 0; - for (i=0; i < sizeof(gpreg_dict)/sizeof(reg_dict); i++){ - if (strcmp(d_key_name, gpreg_dict[i].name)) - continue; - *((uint32_t*)(((char*)(self->cpu)) + gpreg_dict[i].offset)) = val; - found = 1; - break; - } - - if (found) - continue; - fprintf(stderr, "unknown key: %s\n", d_key_name); - RAISE(PyExc_ValueError, "unknown reg"); - } - Py_INCREF(Py_None); - return Py_None; -} - - -PyObject * cpu_init_regs(JitCpu* self) -{ - memset(self->cpu, 0, sizeof(vm_cpu_t)); - - Py_INCREF(Py_None); - return Py_None; -} - -void dump_gpregs(vm_cpu_t* vmcpu) -{ - printf("X0 %.16"PRIX64" X1 %.16"PRIX64" X2 %.16"PRIX64" X3 %.16"PRIX64" "\ - "X4 %.16"PRIX64" X5 %.16"PRIX64" X6 %.16"PRIX64" X7 %.16"PRIX64"\n", - vmcpu->X0, vmcpu->X1, vmcpu->X2, vmcpu->X3, vmcpu->X4, vmcpu->X5, vmcpu->X6, vmcpu->X7); - printf("X8 %.16"PRIX64" X9 %.16"PRIX64" X10 %.16"PRIX64" X11 %.16"PRIX64" "\ - "X12 %.16"PRIX64" X13 %.16"PRIX64" X14 %.16"PRIX64" X15 %.16"PRIX64"\n", - vmcpu->X8, vmcpu->X9, vmcpu->X10, vmcpu->X11, - vmcpu->X12, vmcpu->X13, vmcpu->X14, vmcpu->X15); - printf("X16 %.16"PRIX64" X17 %.16"PRIX64" X18 %.16"PRIX64" X19 %.16"PRIX64" "\ - "X20 %.16"PRIX64" X21 %.16"PRIX64" X22 %.16"PRIX64" X23 %.16"PRIX64"\n", - vmcpu->X16, vmcpu->X17, vmcpu->X18, vmcpu->X19, - vmcpu->X20, vmcpu->X21, vmcpu->X22, vmcpu->X23); - printf("X24 %.16"PRIX64" X25 %.16"PRIX64" X26 %.16"PRIX64" X27 %.16"PRIX64" "\ - "X28 %.16"PRIX64" X29 %.16"PRIX64" LR %.16"PRIX64"\n", - vmcpu->X24, vmcpu->X25, vmcpu->X26, vmcpu->X27, - vmcpu->X28, vmcpu->X29, vmcpu->LR); - - - printf("SP %.16"PRIX64" PC %.16"PRIX64" "\ - "zf %"PRIX32" nf %"PRIX32" of %"PRIX32" cf %"PRIX32"\n", - vmcpu->SP, vmcpu->PC, - vmcpu->zf, vmcpu->nf, vmcpu->of, vmcpu->cf); -} - - -PyObject * cpu_dump_gpregs(JitCpu* self, PyObject* args) -{ - vm_cpu_t* vmcpu; - - vmcpu = self->cpu; - dump_gpregs(vmcpu); - Py_INCREF(Py_None); - return Py_None; -} - - -PyObject * cpu_dump_gpregs_with_attrib(JitCpu* self, PyObject* args) -{ - return cpu_dump_gpregs(self, args); -} - - -PyObject* cpu_set_exception(JitCpu* self, PyObject* args) -{ - PyObject *item1; - uint64_t i; - - if (!PyArg_ParseTuple(args, "O", &item1)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - - PyGetInt(item1, i); - - ((vm_cpu_t*)self->cpu)->exception_flags = i; - Py_INCREF(Py_None); - return Py_None; -} - -PyObject* cpu_get_exception(JitCpu* self, PyObject* args) -{ - return PyLong_FromUnsignedLongLong((uint64_t)(((vm_cpu_t*)self->cpu)->exception_flags)); -} - - - - - -void check_automod(JitCpu* jitcpu, uint64_t addr, uint64_t size) -{ - PyObject *result; - - if (!(((VmMngr*)jitcpu->pyvm)->vm_mngr.exception_flags & EXCEPT_CODE_AUTOMOD)) - return; - result = PyObject_CallMethod(jitcpu->jitter, "automod_cb", "LL", addr, size); - Py_DECREF(result); - -} - -void MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src) -{ - vm_MEM_WRITE_08(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); - check_automod(jitcpu, addr, 8); -} - -void MEM_WRITE_16(JitCpu* jitcpu, uint64_t addr, uint16_t src) -{ - vm_MEM_WRITE_16(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); - check_automod(jitcpu, addr, 16); -} - -void MEM_WRITE_32(JitCpu* jitcpu, uint64_t addr, uint32_t src) -{ - vm_MEM_WRITE_32(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); - check_automod(jitcpu, addr, 32); -} - -void MEM_WRITE_64(JitCpu* jitcpu, uint64_t addr, uint64_t src) -{ - vm_MEM_WRITE_64(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); - check_automod(jitcpu, addr, 64); -} - - -PyObject* vm_set_mem(JitCpu *self, PyObject* args) -{ - PyObject *py_addr; - PyObject *py_buffer; - Py_ssize_t py_length; - - char * buffer; - uint64_t size; - uint64_t addr; - int ret; - - if (!PyArg_ParseTuple(args, "OO", &py_addr, &py_buffer)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - - PyGetInt(py_addr, addr); - - if(!PyBytes_Check(py_buffer)) - RAISE(PyExc_TypeError,"arg must be bytes"); - - size = PyBytes_Size(py_buffer); - PyBytes_AsStringAndSize(py_buffer, &buffer, &py_length); - - ret = vm_write_mem(&(((VmMngr*)self->pyvm)->vm_mngr), addr, buffer, size); - if (ret < 0) - RAISE(PyExc_TypeError,"arg must be str"); - check_automod(self, addr, size*8); - - Py_INCREF(Py_None); - return Py_None; -} - -static PyMemberDef JitCpu_members[] = { - {NULL} /* Sentinel */ -}; - -static PyMethodDef JitCpu_methods[] = { - {"init_regs", (PyCFunction)cpu_init_regs, METH_NOARGS, - "X"}, - {"dump_gpregs", (PyCFunction)cpu_dump_gpregs, METH_NOARGS, - "X"}, - {"dump_gpregs_with_attrib", (PyCFunction)cpu_dump_gpregs_with_attrib, METH_VARARGS, - "X"}, - {"get_gpreg", (PyCFunction)cpu_get_gpreg, METH_NOARGS, - "X"}, - {"set_gpreg", (PyCFunction)cpu_set_gpreg, METH_VARARGS, - "X"}, - {"get_exception", (PyCFunction)cpu_get_exception, METH_VARARGS, - "X"}, - {"set_exception", (PyCFunction)cpu_set_exception, METH_VARARGS, - "X"}, - {"set_mem", (PyCFunction)vm_set_mem, METH_VARARGS, - "X"}, - {"get_mem", (PyCFunction)vm_get_mem, METH_VARARGS, - "X"}, - {NULL} /* Sentinel */ -}; - -static int -JitCpu_init(JitCpu *self, PyObject *args, PyObject *kwds) -{ - self->cpu = malloc(sizeof(vm_cpu_t)); - if (self->cpu == NULL) { - fprintf(stderr, "cannot alloc vm_cpu_t\n"); - exit(EXIT_FAILURE); - } - return 0; -} - - - -getset_reg_u64(X0); -getset_reg_u64(X1); -getset_reg_u64(X2); -getset_reg_u64(X3); -getset_reg_u64(X4); -getset_reg_u64(X5); -getset_reg_u64(X6); -getset_reg_u64(X7); -getset_reg_u64(X8); -getset_reg_u64(X9); -getset_reg_u64(X10); -getset_reg_u64(X11); -getset_reg_u64(X12); -getset_reg_u64(X13); -getset_reg_u64(X14); -getset_reg_u64(X15); -getset_reg_u64(X16); -getset_reg_u64(X17); -getset_reg_u64(X18); -getset_reg_u64(X19); -getset_reg_u64(X20); -getset_reg_u64(X21); -getset_reg_u64(X22); -getset_reg_u64(X23); -getset_reg_u64(X24); -getset_reg_u64(X25); -getset_reg_u64(X26); -getset_reg_u64(X27); -getset_reg_u64(X28); -getset_reg_u64(X29); -getset_reg_u64(LR); -getset_reg_u64(SP); -getset_reg_u64(PC); - -getset_reg_u32(zf); -getset_reg_u32(nf); -getset_reg_u32(of); -getset_reg_u32(cf); - - -getset_reg_u32(exception_flags); -getset_reg_u32(interrupt_num); - - -PyObject* get_gpreg_offset_all(void) -{ - PyObject *dict = PyDict_New(); - PyObject *o; - - get_reg_off(exception_flags); - - get_reg_off(X0); - get_reg_off(X1); - get_reg_off(X2); - get_reg_off(X3); - get_reg_off(X4); - get_reg_off(X5); - get_reg_off(X6); - get_reg_off(X7); - get_reg_off(X8); - get_reg_off(X9); - get_reg_off(X10); - get_reg_off(X11); - get_reg_off(X12); - get_reg_off(X13); - get_reg_off(X14); - get_reg_off(X15); - get_reg_off(X16); - get_reg_off(X17); - get_reg_off(X18); - get_reg_off(X19); - get_reg_off(X20); - get_reg_off(X21); - get_reg_off(X22); - get_reg_off(X23); - get_reg_off(X24); - get_reg_off(X25); - get_reg_off(X26); - get_reg_off(X27); - get_reg_off(X28); - get_reg_off(X29); - get_reg_off(LR); - get_reg_off(SP); - get_reg_off(PC); - - /* eflag */ - get_reg_off(zf); - get_reg_off(nf); - get_reg_off(of); - get_reg_off(cf); - - return dict; -} - - -static PyGetSetDef JitCpu_getseters[] = { - {"vmmngr", - (getter)JitCpu_get_vmmngr, (setter)JitCpu_set_vmmngr, - "vmmngr", - NULL}, - - {"jitter", - (getter)JitCpu_get_jitter, (setter)JitCpu_set_jitter, - "jitter", - NULL}, - - - - {"X0" , (getter)JitCpu_get_X0 , (setter)JitCpu_set_X0 , "X0" , NULL}, - {"X1" , (getter)JitCpu_get_X1 , (setter)JitCpu_set_X1 , "X1" , NULL}, - {"X2" , (getter)JitCpu_get_X2 , (setter)JitCpu_set_X2 , "X2" , NULL}, - {"X3" , (getter)JitCpu_get_X3 , (setter)JitCpu_set_X3 , "X3" , NULL}, - {"X4" , (getter)JitCpu_get_X4 , (setter)JitCpu_set_X4 , "X4" , NULL}, - {"X5" , (getter)JitCpu_get_X5 , (setter)JitCpu_set_X5 , "X5" , NULL}, - {"X6" , (getter)JitCpu_get_X6 , (setter)JitCpu_set_X6 , "X6" , NULL}, - {"X7" , (getter)JitCpu_get_X7 , (setter)JitCpu_set_X7 , "X7" , NULL}, - {"X8" , (getter)JitCpu_get_X8 , (setter)JitCpu_set_X8 , "X8" , NULL}, - {"X9" , (getter)JitCpu_get_X9 , (setter)JitCpu_set_X9 , "X9" , NULL}, - - {"X10" , (getter)JitCpu_get_X10 , (setter)JitCpu_set_X10 , "X10" , NULL}, - {"X11" , (getter)JitCpu_get_X11 , (setter)JitCpu_set_X11 , "X11" , NULL}, - {"X12" , (getter)JitCpu_get_X12 , (setter)JitCpu_set_X12 , "X12" , NULL}, - {"X13" , (getter)JitCpu_get_X13 , (setter)JitCpu_set_X13 , "X13" , NULL}, - {"X14" , (getter)JitCpu_get_X14 , (setter)JitCpu_set_X14 , "X14" , NULL}, - {"X15" , (getter)JitCpu_get_X15 , (setter)JitCpu_set_X15 , "X15" , NULL}, - {"X16" , (getter)JitCpu_get_X16 , (setter)JitCpu_set_X16 , "X16" , NULL}, - {"X17" , (getter)JitCpu_get_X17 , (setter)JitCpu_set_X17 , "X17" , NULL}, - {"X18" , (getter)JitCpu_get_X18 , (setter)JitCpu_set_X18 , "X18" , NULL}, - {"X19" , (getter)JitCpu_get_X19 , (setter)JitCpu_set_X19 , "X19" , NULL}, - - {"X20" , (getter)JitCpu_get_X20 , (setter)JitCpu_set_X20 , "X20" , NULL}, - {"X21" , (getter)JitCpu_get_X21 , (setter)JitCpu_set_X21 , "X21" , NULL}, - {"X22" , (getter)JitCpu_get_X22 , (setter)JitCpu_set_X22 , "X22" , NULL}, - {"X23" , (getter)JitCpu_get_X23 , (setter)JitCpu_set_X23 , "X23" , NULL}, - {"X24" , (getter)JitCpu_get_X24 , (setter)JitCpu_set_X24 , "X24" , NULL}, - {"X25" , (getter)JitCpu_get_X25 , (setter)JitCpu_set_X25 , "X25" , NULL}, - {"X26" , (getter)JitCpu_get_X26 , (setter)JitCpu_set_X26 , "X26" , NULL}, - {"X27" , (getter)JitCpu_get_X27 , (setter)JitCpu_set_X27 , "X27" , NULL}, - {"X28" , (getter)JitCpu_get_X28 , (setter)JitCpu_set_X28 , "X28" , NULL}, - {"X29" , (getter)JitCpu_get_X29 , (setter)JitCpu_set_X29 , "X29" , NULL}, - - {"LR" , (getter)JitCpu_get_LR , (setter)JitCpu_set_LR , "LR" , NULL}, - - - - {"SP" , (getter)JitCpu_get_SP , (setter)JitCpu_set_SP , "SP" , NULL}, - {"PC" , (getter)JitCpu_get_PC , (setter)JitCpu_set_PC , "PC" , NULL}, - - {"zf", (getter)JitCpu_get_zf, (setter)JitCpu_set_zf, "zf", NULL}, - {"nf", (getter)JitCpu_get_nf, (setter)JitCpu_set_nf, "nf", NULL}, - {"of", (getter)JitCpu_get_of, (setter)JitCpu_set_of, "of", NULL}, - {"cf", (getter)JitCpu_get_cf, (setter)JitCpu_set_cf, "cf", NULL}, - - {"exception_flags", (getter)JitCpu_get_exception_flags, (setter)JitCpu_set_exception_flags, "exception_flags", NULL}, - {"interrupt_num", (getter)JitCpu_get_interrupt_num, (setter)JitCpu_set_interrupt_num, "interrupt_num", NULL}, - - {NULL} /* Sentinel */ -}; - - -static PyTypeObject JitCpuType = { - PyVarObject_HEAD_INIT(NULL, 0) - "JitCore_aarch64.JitCpu", /*tp_name*/ - sizeof(JitCpu), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - (destructor)JitCpu_dealloc,/*tp_dealloc*/ - 0, /*tp_print*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - 0, /*tp_compare*/ - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash */ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ - "JitCpu objects", /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - JitCpu_methods, /* tp_methods */ - JitCpu_members, /* tp_members */ - JitCpu_getseters, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - (initproc)JitCpu_init, /* tp_init */ - 0, /* tp_alloc */ - JitCpu_new, /* tp_new */ -}; - - - -static PyMethodDef JitCore_aarch64_Methods[] = { - {"get_gpreg_offset_all", (PyCFunction)get_gpreg_offset_all, METH_NOARGS}, - {NULL, NULL, 0, NULL} /* Sentinel */ - -}; - - - -MOD_INIT(JitCore_aarch64) -{ - PyObject *module; - - MOD_DEF(module, "JitCore_aarch64", "JitCore_aarch64 module", JitCore_aarch64_Methods); - - if (module == NULL) - return NULL; - - if (PyType_Ready(&JitCpuType) < 0) - return NULL; - - Py_INCREF(&JitCpuType); - if (PyModule_AddObject(module, "JitCpu", (PyObject *)&JitCpuType) < 0) - return NULL; - - return module; -} - diff --git a/miasm2/jitter/arch/JitCore_aarch64.h b/miasm2/jitter/arch/JitCore_aarch64.h deleted file mode 100644 index fa958244..00000000 --- a/miasm2/jitter/arch/JitCore_aarch64.h +++ /dev/null @@ -1,57 +0,0 @@ - -typedef struct { - uint32_t exception_flags; - uint32_t interrupt_num; - - /* gpregs */ - - uint64_t X0; - uint64_t X1; - uint64_t X2; - uint64_t X3; - uint64_t X4; - uint64_t X5; - uint64_t X6; - uint64_t X7; - uint64_t X8; - uint64_t X9; - uint64_t X10; - uint64_t X11; - uint64_t X12; - uint64_t X13; - uint64_t X14; - uint64_t X15; - uint64_t X16; - uint64_t X17; - uint64_t X18; - uint64_t X19; - uint64_t X20; - uint64_t X21; - uint64_t X22; - uint64_t X23; - uint64_t X24; - uint64_t X25; - uint64_t X26; - uint64_t X27; - uint64_t X28; - uint64_t X29; - uint64_t LR; - uint64_t SP; - - uint64_t PC; - - /* eflag */ - uint32_t zf; - uint32_t nf; - uint32_t of; - uint32_t cf; -}vm_cpu_t; - -_MIASM_EXPORT void dump_gpregs(vm_cpu_t* vmcpu); - -_MIASM_EXPORT void MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src); -_MIASM_EXPORT void MEM_WRITE_16(JitCpu* jitcpu, uint64_t addr, uint16_t src); -_MIASM_EXPORT void MEM_WRITE_32(JitCpu* jitcpu, uint64_t addr, uint32_t src); -_MIASM_EXPORT void MEM_WRITE_64(JitCpu* jitcpu, uint64_t addr, uint64_t src); - -#define RETURN_PC return BlockDst; diff --git a/miasm2/jitter/arch/JitCore_arm.c b/miasm2/jitter/arch/JitCore_arm.c deleted file mode 100644 index 64f30cf4..00000000 --- a/miasm2/jitter/arch/JitCore_arm.c +++ /dev/null @@ -1,507 +0,0 @@ -#include -#include "structmember.h" -#include -#include -#include "../compat_py23.h" -#include "../queue.h" -#include "../vm_mngr.h" -#include "../vm_mngr_py.h" -#include "../bn.h" -#include "../JitCore.h" -#include "../op_semantics.h" -#include "JitCore_arm.h" - - - -reg_dict gpreg_dict[] = { - {.name = "R0", .offset = offsetof(vm_cpu_t, R0), .size = 32}, - {.name = "R1", .offset = offsetof(vm_cpu_t, R1), .size = 32}, - {.name = "R2", .offset = offsetof(vm_cpu_t, R2), .size = 32}, - {.name = "R3", .offset = offsetof(vm_cpu_t, R3), .size = 32}, - {.name = "R4", .offset = offsetof(vm_cpu_t, R4), .size = 32}, - {.name = "R5", .offset = offsetof(vm_cpu_t, R5), .size = 32}, - {.name = "R6", .offset = offsetof(vm_cpu_t, R6), .size = 32}, - {.name = "R7", .offset = offsetof(vm_cpu_t, R7), .size = 32}, - {.name = "R8", .offset = offsetof(vm_cpu_t, R8), .size = 32}, - {.name = "R9", .offset = offsetof(vm_cpu_t, R9), .size = 32}, - {.name = "R10", .offset = offsetof(vm_cpu_t, R10), .size = 32}, - {.name = "R11", .offset = offsetof(vm_cpu_t, R11), .size = 32}, - {.name = "R12", .offset = offsetof(vm_cpu_t, R12), .size = 32}, - {.name = "SP", .offset = offsetof(vm_cpu_t, SP), .size = 32}, - {.name = "LR", .offset = offsetof(vm_cpu_t, LR), .size = 32}, - {.name = "PC", .offset = offsetof(vm_cpu_t, PC), .size = 32}, - - {.name = "zf", .offset = offsetof(vm_cpu_t, zf), .size = 8}, - {.name = "nf", .offset = offsetof(vm_cpu_t, nf), .size = 8}, - {.name = "of", .offset = offsetof(vm_cpu_t, of), .size = 8}, - {.name = "cf", .offset = offsetof(vm_cpu_t, cf), .size = 8}, - - {.name = "ge0", .offset = offsetof(vm_cpu_t, ge0), .size = 8}, - {.name = "ge1", .offset = offsetof(vm_cpu_t, ge1), .size = 8}, - {.name = "ge2", .offset = offsetof(vm_cpu_t, ge2), .size = 8}, - {.name = "ge3", .offset = offsetof(vm_cpu_t, ge3), .size = 8}, - - {.name = "exception_flags", .offset = offsetof(vm_cpu_t, exception_flags), .size = 32}, - {.name = "interrupt_num", .offset = offsetof(vm_cpu_t, interrupt_num), .size = 32}, -}; - -/************************** JitCpu object **************************/ - - - - -PyObject* cpu_get_gpreg(JitCpu* self) -{ - PyObject *dict = PyDict_New(); - PyObject *o; - - get_reg(R0); - get_reg(R1); - get_reg(R2); - get_reg(R3); - get_reg(R4); - get_reg(R5); - get_reg(R6); - get_reg(R7); - get_reg(R8); - get_reg(R9); - get_reg(R10); - get_reg(R11); - get_reg(R12); - get_reg(SP); - get_reg(LR); - get_reg(PC); - - get_reg(zf); - get_reg(nf); - get_reg(of); - get_reg(cf); - - get_reg(ge0); - get_reg(ge1); - get_reg(ge2); - get_reg(ge3); - - return dict; -} - - - -PyObject* cpu_set_gpreg(JitCpu* self, PyObject *args) -{ - PyObject* dict; - PyObject *d_key, *d_value = NULL; - Py_ssize_t pos = 0; - char* d_key_name; - uint64_t val; - unsigned int i, found; - - if (!PyArg_ParseTuple(args, "O", &dict)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - if(!PyDict_Check(dict)) - RAISE(PyExc_TypeError, "arg must be dict"); - while(PyDict_Next(dict, &pos, &d_key, &d_value)){ - PyGetStr(d_key_name, d_key); - PyGetInt(d_value, val); - - found = 0; - for (i=0; i < sizeof(gpreg_dict)/sizeof(reg_dict); i++){ - if (strcmp(d_key_name, gpreg_dict[i].name)) - continue; - *((uint32_t*)(((char*)(self->cpu)) + gpreg_dict[i].offset)) = val; - found = 1; - break; - } - - if (found) - continue; - fprintf(stderr, "unknown key: %s\n", d_key); - RAISE(PyExc_ValueError, "unknown reg"); - } - Py_INCREF(Py_None); - return Py_None; -} - - -PyObject * cpu_init_regs(JitCpu* self) -{ - memset(self->cpu, 0, sizeof(vm_cpu_t)); - - Py_INCREF(Py_None); - return Py_None; -} - -void dump_gpregs(vm_cpu_t* vmcpu) -{ - printf("R0 %.8"PRIX32" R1 %.8"PRIX32" R2 %.8"PRIX32" R3 %.8"PRIX32" ", - vmcpu->R0, vmcpu->R1, vmcpu->R2, vmcpu->R3); - printf("R4 %.8"PRIX32" R5 %.8"PRIX32" R6 %.8"PRIX32" R7 %.8"PRIX32"\n", - vmcpu->R4, vmcpu->R5, vmcpu->R6, vmcpu->R7); - printf("R8 %.8"PRIX32" R9 %.8"PRIX32" R10 %.8"PRIX32" R11 %.8"PRIX32" ", - vmcpu->R8, vmcpu->R9, vmcpu->R10, vmcpu->R11); - printf("R12 %.8"PRIX32" SP %.8"PRIX32" LR %.8"PRIX32" PC %.8"PRIX32" ", - vmcpu->R12, vmcpu->SP, vmcpu->LR, vmcpu->PC); - printf("zf %"PRIX32" nf %"PRIX32" of %"PRIX32" cf %"PRIX32"\n", - vmcpu->zf, vmcpu->nf, vmcpu->of, vmcpu->cf); -} - - -PyObject * cpu_dump_gpregs(JitCpu* self, PyObject* args) -{ - vm_cpu_t* vmcpu; - - vmcpu = self->cpu; - dump_gpregs(vmcpu); - Py_INCREF(Py_None); - return Py_None; -} - - -PyObject * cpu_dump_gpregs_with_attrib(JitCpu* self, PyObject* args) -{ - return cpu_dump_gpregs(self, args); -} - - - -PyObject* cpu_set_exception(JitCpu* self, PyObject* args) -{ - PyObject *item1; - uint64_t i; - - if (!PyArg_ParseTuple(args, "O", &item1)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - - PyGetInt(item1, i); - - ((vm_cpu_t*)self->cpu)->exception_flags = i; - Py_INCREF(Py_None); - return Py_None; -} - -PyObject* cpu_get_exception(JitCpu* self, PyObject* args) -{ - return PyLong_FromUnsignedLongLong((uint64_t)(((vm_cpu_t*)self->cpu)->exception_flags)); -} - - - - - -void check_automod(JitCpu* jitcpu, uint64_t addr, uint64_t size) -{ - PyObject *result; - - if (!(((VmMngr*)jitcpu->pyvm)->vm_mngr.exception_flags & EXCEPT_CODE_AUTOMOD)) - return; - result = PyObject_CallMethod(jitcpu->jitter, "automod_cb", "LL", addr, size); - Py_DECREF(result); - -} - -void MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src) -{ - vm_MEM_WRITE_08(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); - check_automod(jitcpu, addr, 8); -} - -void MEM_WRITE_16(JitCpu* jitcpu, uint64_t addr, uint16_t src) -{ - vm_MEM_WRITE_16(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); - check_automod(jitcpu, addr, 16); -} - -void MEM_WRITE_32(JitCpu* jitcpu, uint64_t addr, uint32_t src) -{ - vm_MEM_WRITE_32(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); - check_automod(jitcpu, addr, 32); -} - -void MEM_WRITE_64(JitCpu* jitcpu, uint64_t addr, uint64_t src) -{ - vm_MEM_WRITE_64(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); - check_automod(jitcpu, addr, 64); -} - -PyObject* vm_set_mem(JitCpu *self, PyObject* args) -{ - PyObject *py_addr; - PyObject *py_buffer; - Py_ssize_t py_length; - - char * buffer; - uint64_t size; - uint64_t addr; - int ret; - - if (!PyArg_ParseTuple(args, "OO", &py_addr, &py_buffer)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - - PyGetInt(py_addr, addr); - - if(!PyBytes_Check(py_buffer)) - RAISE(PyExc_TypeError,"arg must be bytes"); - - size = PyBytes_Size(py_buffer); - PyBytes_AsStringAndSize(py_buffer, &buffer, &py_length); - - ret = vm_write_mem(&(((VmMngr*)self->pyvm)->vm_mngr), addr, buffer, size); - if (ret < 0) - RAISE(PyExc_TypeError,"arg must be str"); - check_automod(self, addr, size*8); - - Py_INCREF(Py_None); - return Py_None; -} - -PyObject* cpu_set_interrupt_num(JitCpu* self, PyObject* args) -{ - PyObject *item1; - uint64_t i; - - if (!PyArg_ParseTuple(args, "O", &item1)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - - PyGetInt(item1, i); - - ((vm_cpu_t*)self->cpu)->interrupt_num = i; - Py_INCREF(Py_None); - return Py_None; -} - -PyObject* cpu_get_interrupt_num(JitCpu* self, PyObject* args) -{ - return PyLong_FromUnsignedLongLong((uint64_t)(((vm_cpu_t*)self->cpu)->interrupt_num)); -} - -static PyMemberDef JitCpu_members[] = { - {NULL} /* Sentinel */ -}; - -static PyMethodDef JitCpu_methods[] = { - {"init_regs", (PyCFunction)cpu_init_regs, METH_NOARGS, - "X"}, - {"dump_gpregs", (PyCFunction)cpu_dump_gpregs, METH_NOARGS, - "X"}, - {"dump_gpregs_with_attrib", (PyCFunction)cpu_dump_gpregs_with_attrib, METH_VARARGS, - "X"}, - {"get_gpreg", (PyCFunction)cpu_get_gpreg, METH_NOARGS, - "X"}, - {"set_gpreg", (PyCFunction)cpu_set_gpreg, METH_VARARGS, - "X"}, - {"get_exception", (PyCFunction)cpu_get_exception, METH_VARARGS, - "X"}, - {"set_exception", (PyCFunction)cpu_set_exception, METH_VARARGS, - "X"}, - {"get_interrupt_num", (PyCFunction)cpu_get_interrupt_num, METH_VARARGS, - "X"}, - {"set_interrupt_num", (PyCFunction)cpu_set_interrupt_num, METH_VARARGS, - "X"}, - {"set_mem", (PyCFunction)vm_set_mem, METH_VARARGS, - "X"}, - {"get_mem", (PyCFunction)vm_get_mem, METH_VARARGS, - "X"}, - {NULL} /* Sentinel */ -}; - -static int -JitCpu_init(JitCpu *self, PyObject *args, PyObject *kwds) -{ - self->cpu = malloc(sizeof(vm_cpu_t)); - if (self->cpu == NULL) { - fprintf(stderr, "cannot alloc vm_cpu_t\n"); - exit(EXIT_FAILURE); - } - return 0; -} - -getset_reg_u32(R0); -getset_reg_u32(R1); -getset_reg_u32(R2); -getset_reg_u32(R3); -getset_reg_u32(R4); -getset_reg_u32(R5); -getset_reg_u32(R6); -getset_reg_u32(R7); -getset_reg_u32(R8); -getset_reg_u32(R9); -getset_reg_u32(R10); -getset_reg_u32(R11); -getset_reg_u32(R12); -getset_reg_u32(SP); -getset_reg_u32(LR); -getset_reg_u32(PC); - -getset_reg_u32(zf); -getset_reg_u32(nf); -getset_reg_u32(of); -getset_reg_u32(cf); - -getset_reg_u32(ge0); -getset_reg_u32(ge1); -getset_reg_u32(ge2); -getset_reg_u32(ge3); - -getset_reg_u32(exception_flags); -getset_reg_u32(interrupt_num); - -PyObject* get_gpreg_offset_all(void) -{ - PyObject *dict = PyDict_New(); - PyObject *o; - - get_reg_off(exception_flags); - get_reg_off(interrupt_num); - - get_reg_off(R0); - get_reg_off(R1); - get_reg_off(R2); - get_reg_off(R3); - get_reg_off(R4); - get_reg_off(R5); - get_reg_off(R6); - get_reg_off(R7); - get_reg_off(R8); - get_reg_off(R9); - get_reg_off(R10); - get_reg_off(R11); - get_reg_off(R12); - get_reg_off(SP); - get_reg_off(LR); - get_reg_off(PC); - - /* eflag */ - get_reg_off(zf); - get_reg_off(nf); - get_reg_off(of); - get_reg_off(cf); - - get_reg_off(ge0); - get_reg_off(ge1); - get_reg_off(ge2); - get_reg_off(ge3); - - return dict; -} - -static PyGetSetDef JitCpu_getseters[] = { - {"vmmngr", - (getter)JitCpu_get_vmmngr, (setter)JitCpu_set_vmmngr, - "vmmngr", - NULL}, - - {"jitter", - (getter)JitCpu_get_jitter, (setter)JitCpu_set_jitter, - "jitter", - NULL}, - - - - {"R0" , (getter)JitCpu_get_R0 , (setter)JitCpu_set_R0 , "R0" , NULL}, - {"R1" , (getter)JitCpu_get_R1 , (setter)JitCpu_set_R1 , "R1" , NULL}, - {"R2" , (getter)JitCpu_get_R2 , (setter)JitCpu_set_R2 , "R2" , NULL}, - {"R3" , (getter)JitCpu_get_R3 , (setter)JitCpu_set_R3 , "R3" , NULL}, - {"R4" , (getter)JitCpu_get_R4 , (setter)JitCpu_set_R4 , "R4" , NULL}, - {"R5" , (getter)JitCpu_get_R5 , (setter)JitCpu_set_R5 , "R5" , NULL}, - {"R6" , (getter)JitCpu_get_R6 , (setter)JitCpu_set_R6 , "R6" , NULL}, - {"R7" , (getter)JitCpu_get_R7 , (setter)JitCpu_set_R7 , "R7" , NULL}, - {"R8" , (getter)JitCpu_get_R8 , (setter)JitCpu_set_R8 , "R8" , NULL}, - {"R9" , (getter)JitCpu_get_R9 , (setter)JitCpu_set_R9 , "R9" , NULL}, - {"R10", (getter)JitCpu_get_R10, (setter)JitCpu_set_R10, "R10", NULL}, - {"R11", (getter)JitCpu_get_R11, (setter)JitCpu_set_R11, "R11", NULL}, - {"R12", (getter)JitCpu_get_R12, (setter)JitCpu_set_R12, "R12", NULL}, - {"SP" , (getter)JitCpu_get_SP , (setter)JitCpu_set_SP , "SP" , NULL}, - {"LR" , (getter)JitCpu_get_LR , (setter)JitCpu_set_LR , "LR" , NULL}, - {"PC" , (getter)JitCpu_get_PC , (setter)JitCpu_set_PC , "PC" , NULL}, - - {"zf", (getter)JitCpu_get_zf, (setter)JitCpu_set_zf, "zf", NULL}, - {"nf", (getter)JitCpu_get_nf, (setter)JitCpu_set_nf, "nf", NULL}, - {"of", (getter)JitCpu_get_of, (setter)JitCpu_set_of, "of", NULL}, - {"cf", (getter)JitCpu_get_cf, (setter)JitCpu_set_cf, "cf", NULL}, - - {"ge0", (getter)JitCpu_get_ge0, (setter)JitCpu_set_ge0, "ge0", NULL}, - {"ge1", (getter)JitCpu_get_ge1, (setter)JitCpu_set_ge1, "ge1", NULL}, - {"ge2", (getter)JitCpu_get_ge2, (setter)JitCpu_set_ge2, "ge2", NULL}, - {"ge3", (getter)JitCpu_get_ge3, (setter)JitCpu_set_ge3, "ge3", NULL}, - - {"exception_flags", (getter)JitCpu_get_exception_flags, (setter)JitCpu_set_exception_flags, "exception_flags", NULL}, - {"interrupt_num", (getter)JitCpu_get_interrupt_num, (setter)JitCpu_set_interrupt_num, "interrupt_num", NULL}, - - {NULL} /* Sentinel */ -}; - - -static PyTypeObject JitCpuType = { - PyVarObject_HEAD_INIT(NULL, 0) - "JitCore_arm.JitCpu", /*tp_name*/ - sizeof(JitCpu), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - (destructor)JitCpu_dealloc,/*tp_dealloc*/ - 0, /*tp_print*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - 0, /*tp_compare*/ - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash */ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ - "JitCpu objects", /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - JitCpu_methods, /* tp_methods */ - JitCpu_members, /* tp_members */ - JitCpu_getseters, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - (initproc)JitCpu_init, /* tp_init */ - 0, /* tp_alloc */ - JitCpu_new, /* tp_new */ -}; - - - -static PyMethodDef JitCore_arm_Methods[] = { - - /* - - */ - {"get_gpreg_offset_all", (PyCFunction)get_gpreg_offset_all, METH_NOARGS}, - {NULL, NULL, 0, NULL} /* Sentinel */ - -}; - - - -MOD_INIT(JitCore_arm) -{ - PyObject *module; - - MOD_DEF(module, "JitCore_arm", "JitCore_arm module", JitCore_arm_Methods); - - if (module == NULL) - return NULL; - - if (PyType_Ready(&JitCpuType) < 0) - return NULL; - - Py_INCREF(&JitCpuType); - if (PyModule_AddObject(module, "JitCpu", (PyObject *)&JitCpuType) < 0) - return NULL; - - return module; -} - diff --git a/miasm2/jitter/arch/JitCore_arm.h b/miasm2/jitter/arch/JitCore_arm.h deleted file mode 100644 index 67a1096a..00000000 --- a/miasm2/jitter/arch/JitCore_arm.h +++ /dev/null @@ -1,47 +0,0 @@ - -typedef struct { - uint32_t exception_flags; - uint32_t interrupt_num; - - /* gpregs */ - uint32_t R0; - uint32_t R1; - uint32_t R2; - uint32_t R3; - uint32_t R4; - uint32_t R5; - uint32_t R6; - uint32_t R7; - uint32_t R8; - uint32_t R9; - uint32_t R10; - uint32_t R11; - uint32_t R12; - uint32_t SP; - uint32_t LR; - uint32_t PC; - - /* eflag */ - uint32_t zf; - uint32_t nf; - uint32_t of; - uint32_t cf; - - /* ge */ - uint32_t ge0; - uint32_t ge1; - uint32_t ge2; - uint32_t ge3; - - uint32_t bp_num; -}vm_cpu_t; - - -_MIASM_EXPORT void dump_gpregs(vm_cpu_t* vmcpu); - -_MIASM_EXPORT void MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src); -_MIASM_EXPORT void MEM_WRITE_16(JitCpu* jitcpu, uint64_t addr, uint16_t src); -_MIASM_EXPORT void MEM_WRITE_32(JitCpu* jitcpu, uint64_t addr, uint32_t src); -_MIASM_EXPORT void MEM_WRITE_64(JitCpu* jitcpu, uint64_t addr, uint64_t src); - -#define RETURN_PC return BlockDst; diff --git a/miasm2/jitter/arch/JitCore_mep.c b/miasm2/jitter/arch/JitCore_mep.c deleted file mode 100644 index 6e7f1767..00000000 --- a/miasm2/jitter/arch/JitCore_mep.c +++ /dev/null @@ -1,617 +0,0 @@ -// Inspired from JitCore_mep.c - -#include -#include "structmember.h" -#include - -#include -#include "../compat_py23.h" -#include "../queue.h" -#include "../vm_mngr.h" -#include "../vm_mngr_py.h" -#include "../bn.h" -#include "../JitCore.h" -#include "JitCore_mep.h" - - -reg_dict gpreg_dict[] = { - {.name = "R0", .offset = offsetof(vm_cpu_t, R0), .size = 32}, - {.name = "R1", .offset = offsetof(vm_cpu_t, R1), .size = 32}, - {.name = "R2", .offset = offsetof(vm_cpu_t, R2), .size = 32}, - {.name = "R3", .offset = offsetof(vm_cpu_t, R3), .size = 32}, - {.name = "R4", .offset = offsetof(vm_cpu_t, R4), .size = 32}, - {.name = "R5", .offset = offsetof(vm_cpu_t, R5), .size = 32}, - {.name = "R6", .offset = offsetof(vm_cpu_t, R6), .size = 32}, - {.name = "R7", .offset = offsetof(vm_cpu_t, R7), .size = 32}, - {.name = "R8", .offset = offsetof(vm_cpu_t, R8), .size = 32}, - {.name = "R9", .offset = offsetof(vm_cpu_t, R9), .size = 32}, - {.name = "R10", .offset = offsetof(vm_cpu_t, R10), .size = 32}, - {.name = "R11", .offset = offsetof(vm_cpu_t, R11), .size = 32}, - {.name = "R12", .offset = offsetof(vm_cpu_t, R12), .size = 32}, - {.name = "TP", .offset = offsetof(vm_cpu_t, TP), .size = 32}, - {.name = "GP", .offset = offsetof(vm_cpu_t, GP), .size = 32}, - {.name = "SP", .offset = offsetof(vm_cpu_t, SP), .size = 32}, - - {.name = "PC", .offset = offsetof(vm_cpu_t, PC), .size = 32}, - {.name = "LP", .offset = offsetof(vm_cpu_t, LP), .size = 32}, - {.name = "SAR", .offset = offsetof(vm_cpu_t, SAR), .size = 32}, - {.name = "S3", .offset = offsetof(vm_cpu_t, S3), .size = 32}, - {.name = "RPB", .offset = offsetof(vm_cpu_t, RPB), .size = 32}, - {.name = "RPE", .offset = offsetof(vm_cpu_t, RPE), .size = 32}, - {.name = "RPC", .offset = offsetof(vm_cpu_t, RPC), .size = 32}, - {.name = "HI", .offset = offsetof(vm_cpu_t, HI), .size = 32}, - {.name = "LO", .offset = offsetof(vm_cpu_t, LO), .size = 32}, - {.name = "S9", .offset = offsetof(vm_cpu_t, S9), .size = 32}, - {.name = "S10", .offset = offsetof(vm_cpu_t, S10), .size = 32}, - {.name = "S11", .offset = offsetof(vm_cpu_t, S11), .size = 32}, - {.name = "MB0", .offset = offsetof(vm_cpu_t, MB0), .size = 32}, - {.name = "ME0", .offset = offsetof(vm_cpu_t, ME0), .size = 32}, - {.name = "MB1", .offset = offsetof(vm_cpu_t, MB1), .size = 32}, - {.name = "ME1", .offset = offsetof(vm_cpu_t, ME1), .size = 32}, - {.name = "PSW", .offset = offsetof(vm_cpu_t, PSW), .size = 32}, - {.name = "ID", .offset = offsetof(vm_cpu_t, ID), .size = 32}, - {.name = "TMP", .offset = offsetof(vm_cpu_t, TMP), .size = 32}, - {.name = "EPC", .offset = offsetof(vm_cpu_t, EPC), .size = 32}, - {.name = "EXC", .offset = offsetof(vm_cpu_t, EXC), .size = 32}, - {.name = "CFG", .offset = offsetof(vm_cpu_t, CFG), .size = 32}, - {.name = "S22", .offset = offsetof(vm_cpu_t, S22), .size = 32}, - {.name = "NPC", .offset = offsetof(vm_cpu_t, NPC), .size = 32}, - {.name = "DBG", .offset = offsetof(vm_cpu_t, DBG), .size = 32}, - {.name = "DEPC", .offset = offsetof(vm_cpu_t, DEPC), .size = 32}, - {.name = "OPT", .offset = offsetof(vm_cpu_t, OPT), .size = 32}, - {.name = "RCFG", .offset = offsetof(vm_cpu_t, RCFG), .size = 32}, - {.name = "CCFG", .offset = offsetof(vm_cpu_t, CCFG), .size = 32}, - {.name = "S29", .offset = offsetof(vm_cpu_t, S29), .size = 32}, - {.name = "S30", .offset = offsetof(vm_cpu_t, S30), .size = 32}, - {.name = "S31", .offset = offsetof(vm_cpu_t, S31), .size = 32}, - {.name = "S32", .offset = offsetof(vm_cpu_t, S32), .size = 32}, - {.name = "take_jmp", .offset = offsetof(vm_cpu_t, take_jmp), .size = 32}, - {.name = "last_addr", .offset = offsetof(vm_cpu_t, last_addr), .size = 32}, - {.name = "is_repeat_end", .offset = offsetof(vm_cpu_t, is_repeat_end), .size = 32}, - - {.name = "PC_end", .offset = offsetof(vm_cpu_t, PC_end), .size = 32}, - {.name = "RPE_instr_count", .offset = offsetof(vm_cpu_t, RPE_instr_count), .size = 32}, - {.name = "RPC_current", .offset = offsetof(vm_cpu_t, RPC_current), .size = 32}, - -}; - -/************************** JitCpu object **************************/ - - - -PyObject* cpu_get_gpreg(JitCpu* self) -{ - PyObject *dict = PyDict_New(); - PyObject *o; - - get_reg(R0); - get_reg(R1); - get_reg(R2); - get_reg(R3); - get_reg(R4); - get_reg(R5); - get_reg(R6); - get_reg(R7); - get_reg(R8); - get_reg(R9); - get_reg(R10); - get_reg(R11); - get_reg(R12); - get_reg(TP); - get_reg(GP); - get_reg(SP); - - get_reg(PC); - get_reg(LP); - get_reg(SAR); - get_reg(S3); - get_reg(RPB); - get_reg(RPE); - get_reg(RPC); - get_reg(HI); - get_reg(LO); - get_reg(S9); - get_reg(S10); - get_reg(S11); - get_reg(MB0); - get_reg(ME0); - get_reg(MB1); - get_reg(ME1); - get_reg(PSW); - get_reg(ID); - get_reg(TMP); - get_reg(EPC); - get_reg(EXC); - get_reg(CFG); - get_reg(S22); - get_reg(NPC); - get_reg(DBG); - get_reg(DEPC); - get_reg(OPT); - get_reg(RCFG); - get_reg(CCFG); - get_reg(S29); - get_reg(S30); - get_reg(S31); - get_reg(S32); - - get_reg(PC_end); - get_reg(RPE_instr_count); - get_reg(RPC_current); - - - return dict; -} - - -PyObject* cpu_set_gpreg(JitCpu* self, PyObject *args) -{ - PyObject* dict; - PyObject *d_key, *d_value = NULL; - Py_ssize_t pos = 0; - char* d_key_name; - uint64_t val; - unsigned int i, found; - - if (!PyArg_ParseTuple(args, "O", &dict)) - return NULL; - if(!PyDict_Check(dict)) - RAISE(PyExc_TypeError, "arg must be dict"); - while(PyDict_Next(dict, &pos, &d_key, &d_value)){ - PyGetStr(d_key_name, d_key); - PyGetInt(d_value, val); - - found = 0; - for (i=0; i < sizeof(gpreg_dict)/sizeof(reg_dict); i++){ - if (strcmp(d_key_name, gpreg_dict[i].name)) - continue; - *((uint32_t*)(((char*)(self->cpu)) + gpreg_dict[i].offset)) = val; - found = 1; - break; - } - - if (found) - continue; - fprintf(stderr, "unknown key: %s\n", d_key_name); - RAISE(PyExc_ValueError, "unknown reg"); - } - Py_INCREF(Py_None); - return Py_None; -} - - - - -PyObject * cpu_init_regs(JitCpu* self) -{ - memset(self->cpu, 0, sizeof(vm_cpu_t)); - - Py_INCREF(Py_None); - return Py_None; - -} - -void dump_gpregs(vm_cpu_t* vmcpu) -{ - printf("R0 %.4"PRIX32" ", vmcpu->R0); - printf("R1 %.4"PRIX32" ", vmcpu->R1); - printf("R2 %.4"PRIX32" ", vmcpu->R2); - printf("R3 %.4"PRIX32" ", vmcpu->R3); - printf("R4 %.4"PRIX32" ", vmcpu->R4); - printf("R5 %.4"PRIX32" ", vmcpu->R5); - printf("R6 %.4"PRIX32" ", vmcpu->R6); - printf("R7 %.4"PRIX32" ", vmcpu->R7); - printf("R8 %.4"PRIX32" ", vmcpu->R8); - printf("R9 %.4"PRIX32" ", vmcpu->R9); - printf("R10 %.4"PRIX32" ", vmcpu->R10); - printf("R11 %.4"PRIX32" ", vmcpu->R11); - printf("R12 %.4"PRIX32" ", vmcpu->R12); - printf("TP %.4"PRIX32" ", vmcpu->TP); - printf("GP %.4"PRIX32" ", vmcpu->GP); - printf("SP %.4"PRIX32" ", vmcpu->SP); - printf("\n"); -} - - -PyObject * cpu_dump_gpregs(JitCpu* self, PyObject* args) -{ - vm_cpu_t* vmcpu; - - vmcpu = self->cpu; - dump_gpregs(vmcpu); - Py_INCREF(Py_None); - return Py_None; -} - -PyObject * cpu_dump_gpregs_with_attrib(JitCpu* self, PyObject* args) -{ - return cpu_dump_gpregs(self, args); -} - -PyObject* cpu_set_exception(JitCpu* self, PyObject* args) -{ - PyObject *item1; - uint64_t i; - - if (!PyArg_ParseTuple(args, "O", &item1)) - return NULL; - - PyGetInt(item1, i); - - ((vm_cpu_t*)self->cpu)->exception_flags = i; - Py_INCREF(Py_None); - return Py_None; -} - -PyObject* cpu_get_exception(JitCpu* self, PyObject* args) -{ - return PyLong_FromUnsignedLongLong((uint64_t)(((vm_cpu_t*)self->cpu)->exception_flags)); -} - -void check_automod(JitCpu* jitcpu, uint64_t addr, uint64_t size) -{ - PyObject *result; - - if (!(((VmMngr*)jitcpu->pyvm)->vm_mngr.exception_flags & EXCEPT_CODE_AUTOMOD)) - return; - result = PyObject_CallMethod(jitcpu->jitter, "automod_cb", "LL", addr, size); - Py_DECREF(result); - -} - -void MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src) -{ - vm_MEM_WRITE_08(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); - check_automod(jitcpu, addr, 8); -} - -void MEM_WRITE_16(JitCpu* jitcpu, uint64_t addr, uint16_t src) -{ - vm_MEM_WRITE_16(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); - check_automod(jitcpu, addr, 16); -} - -void MEM_WRITE_32(JitCpu* jitcpu, uint64_t addr, uint32_t src) -{ - vm_MEM_WRITE_32(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); - check_automod(jitcpu, addr, 32); -} - -void MEM_WRITE_64(JitCpu* jitcpu, uint64_t addr, uint64_t src) -{ - vm_MEM_WRITE_64(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); - check_automod(jitcpu, addr, 64); -} - - -PyObject* vm_set_mem(JitCpu *self, PyObject* args) -{ - PyObject *py_addr; - PyObject *py_buffer; - Py_ssize_t py_length; - - char * buffer; - uint64_t size; - uint64_t addr; - int ret = 0x1337; - - if (!PyArg_ParseTuple(args, "OO", &py_addr, &py_buffer)) - return NULL; - - PyGetInt(py_addr, addr); - - if(!PyBytes_Check(py_buffer)) - RAISE(PyExc_TypeError,"arg must be bytes"); - - size = PyBytes_Size(py_buffer); - PyBytes_AsStringAndSize(py_buffer, &buffer, &py_length); - - ret = vm_write_mem(&(((VmMngr*)self->pyvm)->vm_mngr), addr, buffer, size); - if (ret < 0) - RAISE(PyExc_TypeError,"arg must be str"); - check_automod(self, addr, size*8); - - Py_INCREF(Py_None); - return Py_None; -} - -static PyMemberDef JitCpu_members[] = { - {NULL} /* Sentinel */ -}; - -static PyMethodDef JitCpu_methods[] = { - {"init_regs", (PyCFunction)cpu_init_regs, METH_NOARGS, "X"}, - {"dump_gpregs", (PyCFunction)cpu_dump_gpregs, METH_NOARGS, "X"}, - {"dump_gpregs_with_attrib", (PyCFunction)cpu_dump_gpregs_with_attrib, METH_VARARGS, "X"}, - {"get_gpreg", (PyCFunction)cpu_get_gpreg, METH_NOARGS, "X"}, - {"set_gpreg", (PyCFunction)cpu_set_gpreg, METH_VARARGS, "X"}, - {"get_exception", (PyCFunction)cpu_get_exception, METH_VARARGS, "X"}, - {"set_exception", (PyCFunction)cpu_set_exception, METH_VARARGS, "X"}, - {"set_mem", (PyCFunction)vm_set_mem, METH_VARARGS, "X"}, - {"get_mem", (PyCFunction)vm_get_mem, METH_VARARGS, "X"}, - {NULL} /* Sentinel */ -}; - -static int -JitCpu_init(JitCpu *self, PyObject *args, PyObject *kwds) -{ - self->cpu = malloc(sizeof(vm_cpu_t)); - if (self->cpu == NULL) { - fprintf(stderr, "cannot alloc vm_cpu_t\n"); - exit(0); - } - return 0; -} - -getset_reg_u32(R0); -getset_reg_u32(R1); -getset_reg_u32(R2); -getset_reg_u32(R3); -getset_reg_u32(R4); -getset_reg_u32(R5); -getset_reg_u32(R6); -getset_reg_u32(R7); -getset_reg_u32(R8); -getset_reg_u32(R9); -getset_reg_u32(R10); -getset_reg_u32(R11); -getset_reg_u32(R12); -getset_reg_u32(TP); -getset_reg_u32(GP); -getset_reg_u32(SP); - -getset_reg_u32(PC); -getset_reg_u32(LP); -getset_reg_u32(SAR); -getset_reg_u32(S3); -getset_reg_u32(RPB); -getset_reg_u32(RPE); -getset_reg_u32(RPC); -getset_reg_u32(HI); -getset_reg_u32(LO); -getset_reg_u32(S9); -getset_reg_u32(S10); -getset_reg_u32(S11); -getset_reg_u32(MB0); -getset_reg_u32(ME0); -getset_reg_u32(MB1); -getset_reg_u32(ME1); -getset_reg_u32(PSW); -getset_reg_u32(ID); -getset_reg_u32(TMP); -getset_reg_u32(EPC); -getset_reg_u32(EXC); -getset_reg_u32(CFG); -getset_reg_u32(S22); -getset_reg_u32(NPC); -getset_reg_u32(DBG); -getset_reg_u32(DEPC); -getset_reg_u32(OPT); -getset_reg_u32(RCFG); -getset_reg_u32(CCFG); -getset_reg_u32(S29); -getset_reg_u32(S30); -getset_reg_u32(S31); -getset_reg_u32(S32); - -getset_reg_u32(PC_end); -getset_reg_u32(RPE_instr_count); -getset_reg_u32(RPC_current); - - - -PyObject* get_gpreg_offset_all(void) -{ - PyObject *dict = PyDict_New(); - PyObject *o; - get_reg_off(exception_flags); - - get_reg_off(R0); - get_reg_off(R1); - get_reg_off(R2); - get_reg_off(R3); - get_reg_off(R4); - get_reg_off(R5); - get_reg_off(R6); - get_reg_off(R7); - get_reg_off(R8); - get_reg_off(R9); - get_reg_off(R10); - get_reg_off(R11); - get_reg_off(R12); - get_reg_off(TP); - get_reg_off(GP); - get_reg_off(SP); - - get_reg_off(PC); - get_reg_off(LP); - get_reg_off(SAR); - get_reg_off(S3); - get_reg_off(RPB); - get_reg_off(RPE); - get_reg_off(RPC); - get_reg_off(HI); - get_reg_off(LO); - get_reg_off(S9); - get_reg_off(S10); - get_reg_off(S11); - get_reg_off(MB0); - get_reg_off(ME0); - get_reg_off(MB1); - get_reg_off(ME1); - get_reg_off(PSW); - get_reg_off(ID); - get_reg_off(TMP); - get_reg_off(EPC); - get_reg_off(EXC); - get_reg_off(CFG); - get_reg_off(S22); - get_reg_off(NPC); - get_reg_off(DBG); - get_reg_off(DEPC); - get_reg_off(OPT); - get_reg_off(RCFG); - get_reg_off(CCFG); - get_reg_off(S29); - get_reg_off(S30); - get_reg_off(S31); - get_reg_off(S32); - - get_reg_off(PC_end); - get_reg_off(RPE_instr_count); - get_reg_off(RPC_current); - - - return dict; -} - - - - -static PyGetSetDef JitCpu_getseters[] = { - {"vmmngr", - (getter)JitCpu_get_vmmngr, (setter)JitCpu_set_vmmngr, - "vmmngr", - NULL}, - - {"jitter", - (getter)JitCpu_get_jitter, (setter)JitCpu_set_jitter, - "jitter", - NULL}, - - - {"R0" , (getter)JitCpu_get_R0 , (setter)JitCpu_set_R0 , "R0" , NULL}, - {"R1" , (getter)JitCpu_get_R1 , (setter)JitCpu_set_R1 , "R1" , NULL}, - {"R2" , (getter)JitCpu_get_R2 , (setter)JitCpu_set_R2 , "R2" , NULL}, - {"R3" , (getter)JitCpu_get_R3 , (setter)JitCpu_set_R3 , "R3" , NULL}, - {"R4" , (getter)JitCpu_get_R4 , (setter)JitCpu_set_R4 , "R4" , NULL}, - {"R5" , (getter)JitCpu_get_R5 , (setter)JitCpu_set_R5 , "R5" , NULL}, - {"R6" , (getter)JitCpu_get_R6 , (setter)JitCpu_set_R6 , "R6" , NULL}, - {"R7" , (getter)JitCpu_get_R7 , (setter)JitCpu_set_R7 , "R7" , NULL}, - {"R8" , (getter)JitCpu_get_R8 , (setter)JitCpu_set_R8 , "R8" , NULL}, - {"R9" , (getter)JitCpu_get_R9 , (setter)JitCpu_set_R9 , "R9" , NULL}, - {"R10" , (getter)JitCpu_get_R10 , (setter)JitCpu_set_R10 , "R10" , NULL}, - {"R11" , (getter)JitCpu_get_R11 , (setter)JitCpu_set_R11 , "R11" , NULL}, - {"R12" , (getter)JitCpu_get_R12 , (setter)JitCpu_set_R12 , "R12" , NULL}, - {"TP" , (getter)JitCpu_get_TP , (setter)JitCpu_set_TP , "TP" , NULL}, - {"GP" , (getter)JitCpu_get_GP , (setter)JitCpu_set_GP , "GP" , NULL}, - {"SP" , (getter)JitCpu_get_SP , (setter)JitCpu_set_SP , "SP" , NULL}, - - {"PC" , (getter)JitCpu_get_PC , (setter)JitCpu_set_PC , "PC" , NULL}, - {"LP" , (getter)JitCpu_get_LP , (setter)JitCpu_set_LP , "LP" , NULL}, - {"SAR" , (getter)JitCpu_get_SAR , (setter)JitCpu_set_SAR , "SAR" , NULL}, - {"S3" , (getter)JitCpu_get_S3 , (setter)JitCpu_set_S3 , "S3" , NULL}, - {"RPB" , (getter)JitCpu_get_RPB , (setter)JitCpu_set_RPB , "RPB" , NULL}, - {"RPE" , (getter)JitCpu_get_RPE , (setter)JitCpu_set_RPE , "RPE" , NULL}, - {"RPC" , (getter)JitCpu_get_RPC , (setter)JitCpu_set_RPC , "RPC" , NULL}, - {"HI" , (getter)JitCpu_get_HI , (setter)JitCpu_set_HI , "HI" , NULL}, - {"LO" , (getter)JitCpu_get_LO , (setter)JitCpu_set_LO , "LO" , NULL}, - {"S9" , (getter)JitCpu_get_S9 , (setter)JitCpu_set_S9 , "S9" , NULL}, - {"S10" , (getter)JitCpu_get_S10 , (setter)JitCpu_set_S10 , "S10" , NULL}, - {"S11" , (getter)JitCpu_get_S11 , (setter)JitCpu_set_S11 , "S11" , NULL}, - {"MB0" , (getter)JitCpu_get_MB0 , (setter)JitCpu_set_MB0 , "MB0" , NULL}, - {"ME0" , (getter)JitCpu_get_ME0 , (setter)JitCpu_set_ME0 , "ME0" , NULL}, - {"MB1" , (getter)JitCpu_get_MB1 , (setter)JitCpu_set_MB1 , "MB1" , NULL}, - {"ME1" , (getter)JitCpu_get_ME1 , (setter)JitCpu_set_ME1 , "ME1" , NULL}, - {"PSW" , (getter)JitCpu_get_PSW , (setter)JitCpu_set_PSW , "PSW" , NULL}, - {"ID" , (getter)JitCpu_get_ID , (setter)JitCpu_set_ID , "ID" , NULL}, - {"TMP" , (getter)JitCpu_get_TMP , (setter)JitCpu_set_TMP , "TMP" , NULL}, - {"EPC" , (getter)JitCpu_get_EPC , (setter)JitCpu_set_EPC , "EPC" , NULL}, - {"EXC" , (getter)JitCpu_get_EXC , (setter)JitCpu_set_EXC , "EXC" , NULL}, - {"CFG" , (getter)JitCpu_get_CFG , (setter)JitCpu_set_CFG , "CFG" , NULL}, - {"S22" , (getter)JitCpu_get_S22 , (setter)JitCpu_set_S22 , "S22" , NULL}, - {"NPC" , (getter)JitCpu_get_NPC , (setter)JitCpu_set_NPC , "NPC" , NULL}, - {"DBG" , (getter)JitCpu_get_DBG , (setter)JitCpu_set_DBG , "DBG" , NULL}, - {"DEPC" , (getter)JitCpu_get_DEPC , (setter)JitCpu_set_DEPC , "DEPC" , NULL}, - {"OPT" , (getter)JitCpu_get_OPT , (setter)JitCpu_set_OPT , "OPT" , NULL}, - {"RCFG" , (getter)JitCpu_get_RCFG , (setter)JitCpu_set_RCFG , "RCFG" , NULL}, - {"CCFG" , (getter)JitCpu_get_CCFG , (setter)JitCpu_set_CCFG , "CCFG" , NULL}, - {"S29" , (getter)JitCpu_get_S29 , (setter)JitCpu_set_S29 , "S29" , NULL}, - {"S30" , (getter)JitCpu_get_S30 , (setter)JitCpu_set_S30 , "S30" , NULL}, - {"S31" , (getter)JitCpu_get_S31 , (setter)JitCpu_set_S31 , "S31" , NULL}, - {"S32" , (getter)JitCpu_get_S32 , (setter)JitCpu_set_S32 , "S32" , NULL}, - - {"PC_end" , (getter)JitCpu_get_PC_end , (setter)JitCpu_set_PC_end , "PC_end" , NULL}, - {"RPE_instr_count" , (getter)JitCpu_get_RPE_instr_count , (setter)JitCpu_set_RPE_instr_count , "RPE_instr_count" , NULL}, - {"RPC_current" , (getter)JitCpu_get_RPC_current , (setter)JitCpu_set_RPC_current , "RPC_current" , NULL}, - - - - {NULL} /* Sentinel */ -}; - - - -static PyTypeObject JitCpuType = { - PyVarObject_HEAD_INIT(NULL, 0) - "JitCore_mep.JitCpu", /*tp_name*/ - sizeof(JitCpu), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - (destructor)JitCpu_dealloc,/*tp_dealloc*/ - 0, /*tp_print*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - 0, /*tp_compare*/ - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash */ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ - "JitCpu objects", /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - JitCpu_methods, /* tp_methods */ - JitCpu_members, /* tp_members */ - JitCpu_getseters, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - (initproc)JitCpu_init, /* tp_init */ - 0, /* tp_alloc */ - JitCpu_new, /* tp_new */ -}; - - - -static PyMethodDef JitCore_mep_Methods[] = { - - /* - - */ - {"get_gpreg_offset_all", (PyCFunction)get_gpreg_offset_all, METH_NOARGS}, - {NULL, NULL, 0, NULL} /* Sentinel */ - -}; - - - -MOD_INIT(JitCore_mep) -{ - PyObject *module; - - MOD_DEF(module, "JitCore_mep", "JitCore_mep module", JitCore_mep_Methods); - - if (module == NULL) - return NULL; - - if (PyType_Ready(&JitCpuType) < 0) - return NULL; - - Py_INCREF(&JitCpuType); - if (PyModule_AddObject(module, "JitCpu", (PyObject *)&JitCpuType) < 0) - return NULL; - - return module; -} diff --git a/miasm2/jitter/arch/JitCore_mep.h b/miasm2/jitter/arch/JitCore_mep.h deleted file mode 100644 index 0148cd13..00000000 --- a/miasm2/jitter/arch/JitCore_mep.h +++ /dev/null @@ -1,82 +0,0 @@ -// Inspired from JitCore_msp430.h - -typedef struct { - /* miasm2 flags */ - uint32_t exception_flags; - - /* gpregs */ - uint32_t R0; - uint32_t R1; - uint32_t R2; - uint32_t R3; - uint32_t R4; - uint32_t R5; - uint32_t R6; - uint32_t R7; - uint32_t R8; - uint32_t R9; - uint32_t R10; - uint32_t R11; - uint32_t R12; - uint32_t TP; - uint32_t GP; - uint32_t SP; - - /* csregs */ - uint32_t PC; - uint32_t LP; - uint32_t SAR; - uint32_t S3; - uint32_t RPB; - uint32_t RPE; - uint32_t RPC; - uint32_t HI; - uint32_t LO; - uint32_t S9; - uint32_t S10; - uint32_t S11; - uint32_t MB0; - uint32_t ME0; - uint32_t MB1; - uint32_t ME1; - uint32_t PSW; - uint32_t ID; - uint32_t TMP; - uint32_t EPC; - uint32_t EXC; - uint32_t CFG; - uint32_t S22; - uint32_t NPC; - uint32_t DBG; - uint32_t DEPC; - uint32_t OPT; - uint32_t RCFG; - uint32_t CCFG; - uint32_t S29; - uint32_t S30; - uint32_t S31; - uint32_t S32; - - /* miasm2 specific regs */ - uint32_t PC_end; - uint32_t RPE_instr_count; - uint32_t RPC_current; - - - uint32_t take_jmp; - uint32_t last_addr; - uint32_t is_repeat_end; - uint32_t in_erepeat; - - /* flags */ - -} vm_cpu_t; - -_MIASM_EXPORT void dump_gpregs(vm_cpu_t* vmcpu); - -_MIASM_EXPORT void MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src); -_MIASM_EXPORT void MEM_WRITE_16(JitCpu* jitcpu, uint64_t addr, uint16_t src); -_MIASM_EXPORT void MEM_WRITE_32(JitCpu* jitcpu, uint64_t addr, uint32_t src); -_MIASM_EXPORT void MEM_WRITE_64(JitCpu* jitcpu, uint64_t addr, uint64_t src); - -#define RETURN_PC return BlockDst; diff --git a/miasm2/jitter/arch/JitCore_mips32.c b/miasm2/jitter/arch/JitCore_mips32.c deleted file mode 100644 index 1455fec9..00000000 --- a/miasm2/jitter/arch/JitCore_mips32.c +++ /dev/null @@ -1,531 +0,0 @@ -#include -#include "structmember.h" -#include -#include -#include "../compat_py23.h" -#include "../queue.h" -#include "../vm_mngr.h" -#include "../vm_mngr_py.h" -#include "../bn.h" -#include "../JitCore.h" -#include "../op_semantics.h" -#include "JitCore_mips32.h" - - - -reg_dict gpreg_dict[] = { {.name = "ZERO", .offset = offsetof(vm_cpu_t, ZERO), .size = 32}, - {.name = "AT", .offset = offsetof(vm_cpu_t, AT), .size = 32}, - {.name = "V0", .offset = offsetof(vm_cpu_t, V0), .size = 32}, - {.name = "V1", .offset = offsetof(vm_cpu_t, V1), .size = 32}, - {.name = "A0", .offset = offsetof(vm_cpu_t, A0), .size = 32}, - {.name = "A1", .offset = offsetof(vm_cpu_t, A1), .size = 32}, - {.name = "A2", .offset = offsetof(vm_cpu_t, A2), .size = 32}, - {.name = "A3", .offset = offsetof(vm_cpu_t, A3), .size = 32}, - {.name = "T0", .offset = offsetof(vm_cpu_t, T0), .size = 32}, - {.name = "T1", .offset = offsetof(vm_cpu_t, T1), .size = 32}, - {.name = "T2", .offset = offsetof(vm_cpu_t, T2), .size = 32}, - {.name = "T3", .offset = offsetof(vm_cpu_t, T3), .size = 32}, - {.name = "T4", .offset = offsetof(vm_cpu_t, T4), .size = 32}, - {.name = "T5", .offset = offsetof(vm_cpu_t, T5), .size = 32}, - {.name = "T6", .offset = offsetof(vm_cpu_t, T6), .size = 32}, - {.name = "T7", .offset = offsetof(vm_cpu_t, T7), .size = 32}, - {.name = "S0", .offset = offsetof(vm_cpu_t, S0), .size = 32}, - {.name = "S1", .offset = offsetof(vm_cpu_t, S1), .size = 32}, - {.name = "S2", .offset = offsetof(vm_cpu_t, S2), .size = 32}, - {.name = "S3", .offset = offsetof(vm_cpu_t, S3), .size = 32}, - {.name = "S4", .offset = offsetof(vm_cpu_t, S4), .size = 32}, - {.name = "S5", .offset = offsetof(vm_cpu_t, S5), .size = 32}, - {.name = "S6", .offset = offsetof(vm_cpu_t, S6), .size = 32}, - {.name = "S7", .offset = offsetof(vm_cpu_t, S7), .size = 32}, - {.name = "T8", .offset = offsetof(vm_cpu_t, T8), .size = 32}, - {.name = "T9", .offset = offsetof(vm_cpu_t, T9), .size = 32}, - {.name = "K0", .offset = offsetof(vm_cpu_t, K0), .size = 32}, - {.name = "K1", .offset = offsetof(vm_cpu_t, K1), .size = 32}, - {.name = "GP", .offset = offsetof(vm_cpu_t, GP), .size = 32}, - {.name = "SP", .offset = offsetof(vm_cpu_t, SP), .size = 32}, - {.name = "FP", .offset = offsetof(vm_cpu_t, FP), .size = 32}, - {.name = "RA", .offset = offsetof(vm_cpu_t, RA), .size = 32}, - {.name = "PC", .offset = offsetof(vm_cpu_t, PC), .size = 32}, - {.name = "PC_FETCH", .offset = offsetof(vm_cpu_t, PC_FETCH), .size = 32}, - {.name = "R_LO", .offset = offsetof(vm_cpu_t, R_LO), .size = 32}, - {.name = "R_HI", .offset = offsetof(vm_cpu_t, R_HI), .size = 32}, -}; - -/************************** JitCpu object **************************/ - - - -PyObject* cpu_get_gpreg(JitCpu* self) -{ - PyObject *dict = PyDict_New(); - PyObject *o; - - get_reg(ZERO); - get_reg(AT); - get_reg(V0); - get_reg(V1); - get_reg(A0); - get_reg(A1); - get_reg(A2); - get_reg(A3); - get_reg(T0); - get_reg(T1); - get_reg(T2); - get_reg(T3); - get_reg(T4); - get_reg(T5); - get_reg(T6); - get_reg(T7); - get_reg(S0); - get_reg(S1); - get_reg(S2); - get_reg(S3); - get_reg(S4); - get_reg(S5); - get_reg(S6); - get_reg(S7); - get_reg(T8); - get_reg(T9); - get_reg(K0); - get_reg(K1); - get_reg(GP); - get_reg(SP); - get_reg(FP); - get_reg(RA); - get_reg(PC); - get_reg(PC_FETCH); - get_reg(R_LO); - get_reg(R_HI); - - return dict; -} - - - - -PyObject* cpu_set_gpreg(JitCpu* self, PyObject *args) -{ - PyObject* dict; - PyObject *d_key, *d_value = NULL; - Py_ssize_t pos = 0; - char* d_key_name; - uint64_t val; - unsigned int i, found; - - if (!PyArg_ParseTuple(args, "O", &dict)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - if(!PyDict_Check(dict)) - RAISE(PyExc_TypeError, "arg must be dict"); - while(PyDict_Next(dict, &pos, &d_key, &d_value)){ - PyGetStr(d_key_name, d_key); - PyGetInt(d_value, val); - - found = 0; - for (i=0; i < sizeof(gpreg_dict)/sizeof(reg_dict); i++){ - if (strcmp(d_key_name, gpreg_dict[i].name)) - continue; - *((uint32_t*)(((char*)(self->cpu)) + gpreg_dict[i].offset)) = val; - found = 1; - break; - } - - if (found) - continue; - fprintf(stderr, "unknown key: %s\n", d_key_name); - RAISE(PyExc_ValueError, "unknown reg"); - } - Py_INCREF(Py_None); - return Py_None; -} - - - - -PyObject * cpu_init_regs(JitCpu* self) -{ - memset(self->cpu, 0, sizeof(vm_cpu_t)); - - Py_INCREF(Py_None); - return Py_None; - -} - - -void dump_gpregs(vm_cpu_t* vmcpu) -{ - - printf("ZR %.8"PRIX32" AT %.8"PRIX32" V0 %.8"PRIX32" V1 %.8"PRIX32" ", - vmcpu->ZERO, vmcpu->AT, vmcpu->V0, vmcpu->V1); - printf("A0 %.8"PRIX32" A1 %.8"PRIX32" A2 %.8"PRIX32" A3 %.8"PRIX32" ", - vmcpu->A0, vmcpu->A1, vmcpu->A2, vmcpu->A3); - printf("T0 %.8"PRIX32" T1 %.8"PRIX32" T2 %.8"PRIX32" T3 %.8"PRIX32" ", - vmcpu->T0, vmcpu->T1, vmcpu->T2, vmcpu->T3); - printf("T4 %.8"PRIX32" T5 %.8"PRIX32" T6 %.8"PRIX32" T7 %.8"PRIX32"\n", - vmcpu->T4, vmcpu->T5, vmcpu->T6, vmcpu->T7); - printf("S0 %.8"PRIX32" S1 %.8"PRIX32" S2 %.8"PRIX32" S3 %.8"PRIX32" ", - vmcpu->S0, vmcpu->S1, vmcpu->S2, vmcpu->S3); - printf("S4 %.8"PRIX32" S5 %.8"PRIX32" S6 %.8"PRIX32" S7 %.8"PRIX32" ", - vmcpu->S4, vmcpu->S5, vmcpu->S6, vmcpu->S7); - printf("T8 %.8"PRIX32" T9 %.8"PRIX32" K0 %.8"PRIX32" K1 %.8"PRIX32" ", - vmcpu->T8, vmcpu->T9, vmcpu->K0, vmcpu->K1); - printf("GP %.8"PRIX32" SP %.8"PRIX32" FP %.8"PRIX32" RA %.8"PRIX32"\n", - vmcpu->GP, vmcpu->SP, vmcpu->FP, vmcpu->RA); - printf("PC %.8"PRIX32"\n", - vmcpu->PC); -} - - -PyObject * cpu_dump_gpregs(JitCpu* self, PyObject* args) -{ - vm_cpu_t* vmcpu; - - vmcpu = self->cpu; - dump_gpregs(vmcpu); - Py_INCREF(Py_None); - return Py_None; -} - - -PyObject* cpu_set_exception(JitCpu* self, PyObject* args) -{ - PyObject *item1; - uint64_t i; - - if (!PyArg_ParseTuple(args, "O", &item1)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - - PyGetInt(item1, i); - - ((vm_cpu_t*)self->cpu)->exception_flags = i; - Py_INCREF(Py_None); - return Py_None; -} - -PyObject* cpu_get_exception(JitCpu* self, PyObject* args) -{ - return PyLong_FromUnsignedLongLong((uint64_t)(((vm_cpu_t*)self->cpu)->exception_flags)); -} - - - - - - -void check_automod(JitCpu* jitcpu, uint64_t addr, uint64_t size) -{ - PyObject *result; - - if (!(((VmMngr*)jitcpu->pyvm)->vm_mngr.exception_flags & EXCEPT_CODE_AUTOMOD)) - return; - result = PyObject_CallMethod(jitcpu->jitter, "automod_cb", "LL", addr, size); - Py_DECREF(result); - -} - - -void MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src) -{ - vm_MEM_WRITE_08(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); - check_automod(jitcpu, addr, 8); -} - -void MEM_WRITE_16(JitCpu* jitcpu, uint64_t addr, uint16_t src) -{ - vm_MEM_WRITE_16(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); - check_automod(jitcpu, addr, 16); -} - -void MEM_WRITE_32(JitCpu* jitcpu, uint64_t addr, uint32_t src) -{ - vm_MEM_WRITE_32(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); - check_automod(jitcpu, addr, 32); -} - -void MEM_WRITE_64(JitCpu* jitcpu, uint64_t addr, uint64_t src) -{ - vm_MEM_WRITE_64(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); - check_automod(jitcpu, addr, 64); -} - - -PyObject* vm_set_mem(JitCpu *self, PyObject* args) -{ - PyObject *py_addr; - PyObject *py_buffer; - Py_ssize_t py_length; - - char * buffer; - uint64_t size; - uint64_t addr; - int ret; - - if (!PyArg_ParseTuple(args, "OO", &py_addr, &py_buffer)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - - PyGetInt(py_addr, addr); - - if(!PyBytes_Check(py_buffer)) - RAISE(PyExc_TypeError,"arg must be bytes"); - - size = PyBytes_Size(py_buffer); - PyBytes_AsStringAndSize(py_buffer, &buffer, &py_length); - - ret = vm_write_mem(&(((VmMngr*)self->pyvm)->vm_mngr), addr, buffer, size); - if (ret < 0) - RAISE(PyExc_TypeError,"arg must be str"); - check_automod(self, addr, size*8); - - Py_INCREF(Py_None); - return Py_None; -} - -static PyMemberDef JitCpu_members[] = { - {NULL} /* Sentinel */ -}; - -static PyMethodDef JitCpu_methods[] = { - {"init_regs", (PyCFunction)cpu_init_regs, METH_NOARGS, - "X"}, - {"dump_gpregs", (PyCFunction)cpu_dump_gpregs, METH_NOARGS, - "X"}, - {"get_gpreg", (PyCFunction)cpu_get_gpreg, METH_NOARGS, - "X"}, - {"set_gpreg", (PyCFunction)cpu_set_gpreg, METH_VARARGS, - "X"}, - {"get_exception", (PyCFunction)cpu_get_exception, METH_VARARGS, - "X"}, - {"set_exception", (PyCFunction)cpu_set_exception, METH_VARARGS, - "X"}, - {"set_mem", (PyCFunction)vm_set_mem, METH_VARARGS, - "X"}, - {"get_mem", (PyCFunction)vm_get_mem, METH_VARARGS, - "X"}, - {NULL} /* Sentinel */ -}; - - -static int -JitCpu_init(JitCpu *self, PyObject *args, PyObject *kwds) -{ - self->cpu = malloc(sizeof(vm_cpu_t)); - if (self->cpu == NULL) { - fprintf(stderr, "cannot alloc vm_cpu_t\n"); - exit(EXIT_FAILURE); - } - return 0; -} - -getset_reg_u32(ZERO); -getset_reg_u32(AT); -getset_reg_u32(V0); -getset_reg_u32(V1); -getset_reg_u32(A0); -getset_reg_u32(A1); -getset_reg_u32(A2); -getset_reg_u32(A3); -getset_reg_u32(T0); -getset_reg_u32(T1); -getset_reg_u32(T2); -getset_reg_u32(T3); -getset_reg_u32(T4); -getset_reg_u32(T5); -getset_reg_u32(T6); -getset_reg_u32(T7); -getset_reg_u32(S0); -getset_reg_u32(S1); -getset_reg_u32(S2); -getset_reg_u32(S3); -getset_reg_u32(S4); -getset_reg_u32(S5); -getset_reg_u32(S6); -getset_reg_u32(S7); -getset_reg_u32(T8); -getset_reg_u32(T9); -getset_reg_u32(K0); -getset_reg_u32(K1); -getset_reg_u32(GP); -getset_reg_u32(SP); -getset_reg_u32(FP); -getset_reg_u32(RA); -getset_reg_u32(PC); -getset_reg_u32(PC_FETCH); -getset_reg_u32(R_LO); -getset_reg_u32(R_HI); - - -PyObject* get_gpreg_offset_all(void) -{ - PyObject *dict = PyDict_New(); - PyObject *o; - - get_reg_off(exception_flags); - - - get_reg_off(ZERO); - get_reg_off(AT); - get_reg_off(V0); - get_reg_off(V1); - get_reg_off(A0); - get_reg_off(A1); - get_reg_off(A2); - get_reg_off(A3); - get_reg_off(T0); - get_reg_off(T1); - get_reg_off(T2); - get_reg_off(T3); - get_reg_off(T4); - get_reg_off(T5); - get_reg_off(T6); - get_reg_off(T7); - get_reg_off(S0); - get_reg_off(S1); - get_reg_off(S2); - get_reg_off(S3); - get_reg_off(S4); - get_reg_off(S5); - get_reg_off(S6); - get_reg_off(S7); - get_reg_off(T8); - get_reg_off(T9); - get_reg_off(K0); - get_reg_off(K1); - get_reg_off(GP); - get_reg_off(SP); - get_reg_off(FP); - get_reg_off(RA); - get_reg_off(PC); - get_reg_off(PC_FETCH); - get_reg_off(R_LO); - get_reg_off(R_HI); - - return dict; -} - - -static PyGetSetDef JitCpu_getseters[] = { - {"vmmngr", - (getter)JitCpu_get_vmmngr, (setter)JitCpu_set_vmmngr, - "vmmngr", - NULL}, - - {"jitter", - (getter)JitCpu_get_jitter, (setter)JitCpu_set_jitter, - "jitter", - NULL}, - - {"ZERO" , (getter)JitCpu_get_ZERO , (setter)JitCpu_set_ZERO , "ZERO" , NULL}, - {"AT" , (getter)JitCpu_get_AT , (setter)JitCpu_set_AT , "AT" , NULL}, - {"V0" , (getter)JitCpu_get_V0 , (setter)JitCpu_set_V0 , "V0" , NULL}, - {"V1" , (getter)JitCpu_get_V1 , (setter)JitCpu_set_V1 , "V1" , NULL}, - {"A0" , (getter)JitCpu_get_A0 , (setter)JitCpu_set_A0 , "A0" , NULL}, - {"A1" , (getter)JitCpu_get_A1 , (setter)JitCpu_set_A1 , "A1" , NULL}, - {"A2" , (getter)JitCpu_get_A2 , (setter)JitCpu_set_A2 , "A2" , NULL}, - {"A3" , (getter)JitCpu_get_A3 , (setter)JitCpu_set_A3 , "A3" , NULL}, - {"T0" , (getter)JitCpu_get_T0 , (setter)JitCpu_set_T0 , "T0" , NULL}, - {"T1" , (getter)JitCpu_get_T1 , (setter)JitCpu_set_T1 , "T1" , NULL}, - {"T2" , (getter)JitCpu_get_T2 , (setter)JitCpu_set_T2 , "T2" , NULL}, - {"T3" , (getter)JitCpu_get_T3 , (setter)JitCpu_set_T3 , "T3" , NULL}, - {"T4" , (getter)JitCpu_get_T4 , (setter)JitCpu_set_T4 , "T4" , NULL}, - {"T5" , (getter)JitCpu_get_T5 , (setter)JitCpu_set_T5 , "T5" , NULL}, - {"T6" , (getter)JitCpu_get_T6 , (setter)JitCpu_set_T6 , "T6" , NULL}, - {"T7" , (getter)JitCpu_get_T7 , (setter)JitCpu_set_T7 , "T7" , NULL}, - {"S0" , (getter)JitCpu_get_S0 , (setter)JitCpu_set_S0 , "S0" , NULL}, - {"S1" , (getter)JitCpu_get_S1 , (setter)JitCpu_set_S1 , "S1" , NULL}, - {"S2" , (getter)JitCpu_get_S2 , (setter)JitCpu_set_S2 , "S2" , NULL}, - {"S3" , (getter)JitCpu_get_S3 , (setter)JitCpu_set_S3 , "S3" , NULL}, - {"S4" , (getter)JitCpu_get_S4 , (setter)JitCpu_set_S4 , "S4" , NULL}, - {"S5" , (getter)JitCpu_get_S5 , (setter)JitCpu_set_S5 , "S5" , NULL}, - {"S6" , (getter)JitCpu_get_S6 , (setter)JitCpu_set_S6 , "S6" , NULL}, - {"S7" , (getter)JitCpu_get_S7 , (setter)JitCpu_set_S7 , "S7" , NULL}, - {"T8" , (getter)JitCpu_get_T8 , (setter)JitCpu_set_T8 , "T8" , NULL}, - {"T9" , (getter)JitCpu_get_T9 , (setter)JitCpu_set_T9 , "T9" , NULL}, - {"K0" , (getter)JitCpu_get_K0 , (setter)JitCpu_set_K0 , "K0" , NULL}, - {"K1" , (getter)JitCpu_get_K1 , (setter)JitCpu_set_K1 , "K1" , NULL}, - {"GP" , (getter)JitCpu_get_GP , (setter)JitCpu_set_GP , "GP" , NULL}, - {"SP" , (getter)JitCpu_get_SP , (setter)JitCpu_set_SP , "SP" , NULL}, - {"FP" , (getter)JitCpu_get_FP , (setter)JitCpu_set_FP , "FP" , NULL}, - {"RA" , (getter)JitCpu_get_RA , (setter)JitCpu_set_RA , "RA" , NULL}, - {"PC" , (getter)JitCpu_get_PC , (setter)JitCpu_set_PC , "PC" , NULL}, - {"PC_FETCH" , (getter)JitCpu_get_PC_FETCH , (setter)JitCpu_set_PC_FETCH , "PC_FETCH" , NULL}, - {"R_LO" , (getter)JitCpu_get_R_LO , (setter)JitCpu_set_R_LO , "R_LO" , NULL}, - {"R_HI" , (getter)JitCpu_get_R_HI , (setter)JitCpu_set_R_HI , "R_HI" , NULL}, - - {NULL} /* Sentinel */ -}; - - -static PyTypeObject JitCpuType = { - PyVarObject_HEAD_INIT(NULL, 0) - "JitCore_mips32.JitCpu", /*tp_name*/ - sizeof(JitCpu), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - (destructor)JitCpu_dealloc,/*tp_dealloc*/ - 0, /*tp_print*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - 0, /*tp_compare*/ - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash */ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ - "JitCpu objects", /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - JitCpu_methods, /* tp_methods */ - JitCpu_members, /* tp_members */ - JitCpu_getseters, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - (initproc)JitCpu_init, /* tp_init */ - 0, /* tp_alloc */ - JitCpu_new, /* tp_new */ -}; - - - -static PyMethodDef JitCore_mips32_Methods[] = { - - /* - - */ - {"get_gpreg_offset_all", (PyCFunction)get_gpreg_offset_all, METH_NOARGS}, - {NULL, NULL, 0, NULL} /* Sentinel */ - -}; - - - - - -MOD_INIT(JitCore_mips32) -{ - PyObject *module; - - MOD_DEF(module, "JitCore_mips32", "JitCore_mips32 module", JitCore_mips32_Methods); - - if (module == NULL) - return NULL; - - if (PyType_Ready(&JitCpuType) < 0) - return NULL; - - Py_INCREF(&JitCpuType); - if (PyModule_AddObject(module, "JitCpu", (PyObject *)&JitCpuType) < 0) - return NULL; - - return module; -} diff --git a/miasm2/jitter/arch/JitCore_mips32.h b/miasm2/jitter/arch/JitCore_mips32.h deleted file mode 100644 index e20d5133..00000000 --- a/miasm2/jitter/arch/JitCore_mips32.h +++ /dev/null @@ -1,343 +0,0 @@ - -typedef struct { - uint32_t exception_flags; - - /* gpregs */ - - uint32_t ZERO; - uint32_t AT; - uint32_t V0; - uint32_t V1; - uint32_t A0; - uint32_t A1; - uint32_t A2; - uint32_t A3; - uint32_t T0; - uint32_t T1; - uint32_t T2; - uint32_t T3; - uint32_t T4; - uint32_t T5; - uint32_t T6; - uint32_t T7; - uint32_t S0; - uint32_t S1; - uint32_t S2; - uint32_t S3; - uint32_t S4; - uint32_t S5; - uint32_t S6; - uint32_t S7; - uint32_t T8; - uint32_t T9; - uint32_t K0; - uint32_t K1; - uint32_t GP; - uint32_t SP; - uint32_t FP; - uint32_t RA; - uint32_t PC; - uint32_t PC_FETCH; - uint32_t R_LO; - uint32_t R_HI; - - - double F0; - double F1; - double F2; - double F3; - double F4; - double F5; - double F6; - double F7; - double F8; - double F9; - double F10; - double F11; - double F12; - double F13; - double F14; - double F15; - double F16; - double F17; - double F18; - double F19; - double F20; - double F21; - double F22; - double F23; - double F24; - double F25; - double F26; - double F27; - double F28; - double F29; - double F30; - double F31; - - uint32_t INDEX; - uint32_t CPR0_1; - uint32_t CPR0_2; - uint32_t CPR0_3; - uint32_t CPR0_4; - uint32_t CPR0_5; - uint32_t CPR0_6; - uint32_t CPR0_7; - uint32_t CPR0_8; - uint32_t CPR0_9; - uint32_t CPR0_10; - uint32_t CPR0_11; - uint32_t CPR0_12; - uint32_t CPR0_13; - uint32_t CPR0_14; - uint32_t CPR0_15; - uint32_t ENTRYLO0; - uint32_t CPR0_17; - uint32_t CPR0_18; - uint32_t CPR0_19; - uint32_t CPR0_20; - uint32_t CPR0_21; - uint32_t CPR0_22; - uint32_t CPR0_23; - uint32_t ENTRYLO1; - uint32_t CPR0_25; - uint32_t CPR0_26; - uint32_t CPR0_27; - uint32_t CPR0_28; - uint32_t CPR0_29; - uint32_t CPR0_30; - uint32_t CPR0_31; - uint32_t CPR0_32; - uint32_t CPR0_33; - uint32_t CPR0_34; - uint32_t CPR0_35; - uint32_t CPR0_36; - uint32_t CPR0_37; - uint32_t CPR0_38; - uint32_t CPR0_39; - uint32_t PAGEMASK; - uint32_t CPR0_41; - uint32_t CPR0_42; - uint32_t CPR0_43; - uint32_t CPR0_44; - uint32_t CPR0_45; - uint32_t CPR0_46; - uint32_t CPR0_47; - uint32_t CPR0_48; - uint32_t CPR0_49; - uint32_t CPR0_50; - uint32_t CPR0_51; - uint32_t CPR0_52; - uint32_t CPR0_53; - uint32_t CPR0_54; - uint32_t CPR0_55; - uint32_t CPR0_56; - uint32_t CPR0_57; - uint32_t CPR0_58; - uint32_t CPR0_59; - uint32_t CPR0_60; - uint32_t CPR0_61; - uint32_t CPR0_62; - uint32_t CPR0_63; - uint32_t CPR0_64; - uint32_t CPR0_65; - uint32_t CPR0_66; - uint32_t CPR0_67; - uint32_t CPR0_68; - uint32_t CPR0_69; - uint32_t CPR0_70; - uint32_t CPR0_71; - uint32_t COUNT; - uint32_t CPR0_73; - uint32_t CPR0_74; - uint32_t CPR0_75; - uint32_t CPR0_76; - uint32_t CPR0_77; - uint32_t CPR0_78; - uint32_t CPR0_79; - uint32_t ENTRYHI; - uint32_t CPR0_81; - uint32_t CPR0_82; - uint32_t CPR0_83; - uint32_t CPR0_84; - uint32_t CPR0_85; - uint32_t CPR0_86; - uint32_t CPR0_87; - uint32_t CPR0_88; - uint32_t CPR0_89; - uint32_t CPR0_90; - uint32_t CPR0_91; - uint32_t CPR0_92; - uint32_t CPR0_93; - uint32_t CPR0_94; - uint32_t CPR0_95; - uint32_t CPR0_96; - uint32_t CPR0_97; - uint32_t CPR0_98; - uint32_t CPR0_99; - uint32_t CPR0_100; - uint32_t CPR0_101; - uint32_t CPR0_102; - uint32_t CPR0_103; - uint32_t CAUSE; - uint32_t CPR0_105; - uint32_t CPR0_106; - uint32_t CPR0_107; - uint32_t CPR0_108; - uint32_t CPR0_109; - uint32_t CPR0_110; - uint32_t CPR0_111; - uint32_t EPC; - uint32_t CPR0_113; - uint32_t CPR0_114; - uint32_t CPR0_115; - uint32_t CPR0_116; - uint32_t CPR0_117; - uint32_t CPR0_118; - uint32_t CPR0_119; - uint32_t CPR0_120; - uint32_t CPR0_121; - uint32_t CPR0_122; - uint32_t CPR0_123; - uint32_t CPR0_124; - uint32_t CPR0_125; - uint32_t CPR0_126; - uint32_t CPR0_127; - uint32_t CONFIG; - uint32_t CPR0_129; - uint32_t CPR0_130; - uint32_t CPR0_131; - uint32_t CPR0_132; - uint32_t CPR0_133; - uint32_t CPR0_134; - uint32_t CPR0_135; - uint32_t CPR0_136; - uint32_t CPR0_137; - uint32_t CPR0_138; - uint32_t CPR0_139; - uint32_t CPR0_140; - uint32_t CPR0_141; - uint32_t CPR0_142; - uint32_t CPR0_143; - uint32_t CPR0_144; - uint32_t CPR0_145; - uint32_t CPR0_146; - uint32_t CPR0_147; - uint32_t CPR0_148; - uint32_t CPR0_149; - uint32_t CPR0_150; - uint32_t CPR0_151; - uint32_t WATCHHI; - uint32_t CPR0_153; - uint32_t CPR0_154; - uint32_t CPR0_155; - uint32_t CPR0_156; - uint32_t CPR0_157; - uint32_t CPR0_158; - uint32_t CPR0_159; - uint32_t CPR0_160; - uint32_t CPR0_161; - uint32_t CPR0_162; - uint32_t CPR0_163; - uint32_t CPR0_164; - uint32_t CPR0_165; - uint32_t CPR0_166; - uint32_t CPR0_167; - uint32_t CPR0_168; - uint32_t CPR0_169; - uint32_t CPR0_170; - uint32_t CPR0_171; - uint32_t CPR0_172; - uint32_t CPR0_173; - uint32_t CPR0_174; - uint32_t CPR0_175; - uint32_t CPR0_176; - uint32_t CPR0_177; - uint32_t CPR0_178; - uint32_t CPR0_179; - uint32_t CPR0_180; - uint32_t CPR0_181; - uint32_t CPR0_182; - uint32_t CPR0_183; - uint32_t CPR0_184; - uint32_t CPR0_185; - uint32_t CPR0_186; - uint32_t CPR0_187; - uint32_t CPR0_188; - uint32_t CPR0_189; - uint32_t CPR0_190; - uint32_t CPR0_191; - uint32_t CPR0_192; - uint32_t CPR0_193; - uint32_t CPR0_194; - uint32_t CPR0_195; - uint32_t CPR0_196; - uint32_t CPR0_197; - uint32_t CPR0_198; - uint32_t CPR0_199; - uint32_t CPR0_200; - uint32_t CPR0_201; - uint32_t CPR0_202; - uint32_t CPR0_203; - uint32_t CPR0_204; - uint32_t CPR0_205; - uint32_t CPR0_206; - uint32_t CPR0_207; - uint32_t CPR0_208; - uint32_t CPR0_209; - uint32_t CPR0_210; - uint32_t CPR0_211; - uint32_t CPR0_212; - uint32_t CPR0_213; - uint32_t CPR0_214; - uint32_t CPR0_215; - uint32_t CPR0_216; - uint32_t CPR0_217; - uint32_t CPR0_218; - uint32_t CPR0_219; - uint32_t CPR0_220; - uint32_t CPR0_221; - uint32_t CPR0_222; - uint32_t CPR0_223; - uint32_t CPR0_224; - uint32_t CPR0_225; - uint32_t CPR0_226; - uint32_t CPR0_227; - uint32_t CPR0_228; - uint32_t CPR0_229; - uint32_t CPR0_230; - uint32_t CPR0_231; - uint32_t CPR0_232; - uint32_t CPR0_233; - uint32_t CPR0_234; - uint32_t CPR0_235; - uint32_t CPR0_236; - uint32_t CPR0_237; - uint32_t CPR0_238; - uint32_t CPR0_239; - uint32_t CPR0_240; - uint32_t CPR0_241; - uint32_t CPR0_242; - uint32_t CPR0_243; - uint32_t CPR0_244; - uint32_t CPR0_245; - uint32_t CPR0_246; - uint32_t CPR0_247; - uint32_t CPR0_248; - uint32_t CPR0_249; - uint32_t CPR0_250; - uint32_t CPR0_251; - uint32_t CPR0_252; - uint32_t CPR0_253; - uint32_t CPR0_254; - uint32_t CPR0_255; -}vm_cpu_t; - -_MIASM_EXPORT void dump_gpregs(vm_cpu_t* vmcpu); - -_MIASM_EXPORT void MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src); -_MIASM_EXPORT void MEM_WRITE_16(JitCpu* jitcpu, uint64_t addr, uint16_t src); -_MIASM_EXPORT void MEM_WRITE_32(JitCpu* jitcpu, uint64_t addr, uint32_t src); -_MIASM_EXPORT void MEM_WRITE_64(JitCpu* jitcpu, uint64_t addr, uint64_t src); - -#define RETURN_PC return BlockDst; diff --git a/miasm2/jitter/arch/JitCore_msp430.c b/miasm2/jitter/arch/JitCore_msp430.c deleted file mode 100644 index c21296c7..00000000 --- a/miasm2/jitter/arch/JitCore_msp430.c +++ /dev/null @@ -1,477 +0,0 @@ -#include -#include "structmember.h" -#include -#include -#include "../compat_py23.h" -#include "../queue.h" -#include "../vm_mngr.h" -#include "../vm_mngr_py.h" -#include "../bn.h" -#include "../JitCore.h" -#include "JitCore_msp430.h" - - -reg_dict gpreg_dict[] = { {.name = "PC", .offset = offsetof(vm_cpu_t, PC)}, - {.name = "SP", .offset = offsetof(vm_cpu_t, SP)}, - //{.name = "SR", .offset = offsetof(vm_cpu_t, SR)}, - {.name = "R3", .offset = offsetof(vm_cpu_t, R3)}, - {.name = "R4", .offset = offsetof(vm_cpu_t, R4)}, - {.name = "R5", .offset = offsetof(vm_cpu_t, R5)}, - {.name = "R6", .offset = offsetof(vm_cpu_t, R6)}, - {.name = "R7", .offset = offsetof(vm_cpu_t, R7)}, - {.name = "R8", .offset = offsetof(vm_cpu_t, R8)}, - {.name = "R9", .offset = offsetof(vm_cpu_t, R9)}, - {.name = "R10", .offset = offsetof(vm_cpu_t, R10)}, - {.name = "R11", .offset = offsetof(vm_cpu_t, R11)}, - {.name = "R12", .offset = offsetof(vm_cpu_t, R12)}, - {.name = "R13", .offset = offsetof(vm_cpu_t, R13)}, - {.name = "R14", .offset = offsetof(vm_cpu_t, R14)}, - {.name = "R15", .offset = offsetof(vm_cpu_t, R15)}, - - {.name = "zf", .offset = offsetof(vm_cpu_t, zf)}, - {.name = "nf", .offset = offsetof(vm_cpu_t, nf)}, - {.name = "of", .offset = offsetof(vm_cpu_t, of)}, - {.name = "cf", .offset = offsetof(vm_cpu_t, cf)}, - - {.name = "cpuoff", .offset = offsetof(vm_cpu_t, zf)}, - {.name = "gie", .offset = offsetof(vm_cpu_t, zf)}, - {.name = "osc", .offset = offsetof(vm_cpu_t, zf)}, - {.name = "scg0", .offset = offsetof(vm_cpu_t, zf)}, - {.name = "scg1", .offset = offsetof(vm_cpu_t, zf)}, - {.name = "res", .offset = offsetof(vm_cpu_t, zf)}, - -}; - -/************************** JitCpu object **************************/ - - - -PyObject* cpu_get_gpreg(JitCpu* self) -{ - PyObject *dict = PyDict_New(); - PyObject *o; - - get_reg(PC); - get_reg(SP); - //get_reg(SR); - get_reg(R3); - get_reg(R4); - get_reg(R5); - get_reg(R6); - get_reg(R7); - get_reg(R8); - get_reg(R9); - get_reg(R10); - get_reg(R11); - get_reg(R12); - get_reg(R13); - get_reg(R14); - get_reg(R15); - - get_reg(zf); - get_reg(nf); - get_reg(of); - get_reg(cf); - - get_reg(cpuoff); - get_reg(gie); - get_reg(osc); - get_reg(scg0); - get_reg(scg1); - get_reg(res); - - - return dict; -} - - -PyObject* cpu_set_gpreg(JitCpu* self, PyObject *args) -{ - PyObject* dict; - PyObject *d_key, *d_value = NULL; - Py_ssize_t pos = 0; - char* d_key_name; - uint64_t val; - unsigned int i, found; - - if (!PyArg_ParseTuple(args, "O", &dict)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - if(!PyDict_Check(dict)) - RAISE(PyExc_TypeError, "arg must be dict"); - while(PyDict_Next(dict, &pos, &d_key, &d_value)){ - PyGetStr(d_key_name, d_key); - PyGetInt(d_value, val); - found = 0; - for (i=0; i < sizeof(gpreg_dict)/sizeof(reg_dict); i++){ - if (strcmp(d_key_name, gpreg_dict[i].name)) - continue; - *((uint32_t*)(((char*)(self->cpu)) + gpreg_dict[i].offset)) = val; - found = 1; - break; - } - - if (found) - continue; - fprintf(stderr, "unknown key: %s\n", d_key_name); - RAISE(PyExc_ValueError, "unknown reg"); - } - Py_INCREF(Py_None); - return Py_None; -} - - - - -PyObject * cpu_init_regs(JitCpu* self) -{ - memset(self->cpu, 0, sizeof(vm_cpu_t)); - - Py_INCREF(Py_None); - return Py_None; - -} - -void dump_gpregs(vm_cpu_t* vmcpu) -{ - - printf("PC %.4"PRIX32" SP %.4"PRIX32" R3 %.4"PRIX32" ", - vmcpu->PC, vmcpu->SP, vmcpu->R3); - printf("R4 %.4"PRIX32" R5 %.4"PRIX32" R6 %.4"PRIX32" R7 %.4"PRIX32"\n", - vmcpu->R4, vmcpu->R5, vmcpu->R6, vmcpu->R7); - printf("R8 %.4"PRIX32" R9 %.4"PRIX32" R10 %.4"PRIX32" R11 %.4"PRIX32" ", - vmcpu->R8, vmcpu->R9, vmcpu->R10, vmcpu->R11); - printf("R12 %.4"PRIX32" R13 %.4"PRIX32" R14 %.4"PRIX32" R15 %.4"PRIX32"\n", - vmcpu->R12, vmcpu->R13, vmcpu->R14, vmcpu->R15); - printf("zf %"PRIX32" nf %"PRIX32" of %"PRIX32" cf %"PRIX32"\n", - vmcpu->zf, vmcpu->nf, vmcpu->of, vmcpu->cf); -} - - -PyObject * cpu_dump_gpregs(JitCpu* self, PyObject* args) -{ - vm_cpu_t* vmcpu; - - vmcpu = self->cpu; - dump_gpregs(vmcpu); - Py_INCREF(Py_None); - return Py_None; -} - -PyObject * cpu_dump_gpregs_with_attrib(JitCpu* self, PyObject* args) -{ - return cpu_dump_gpregs(self, args); -} - - -PyObject* cpu_set_exception(JitCpu* self, PyObject* args) -{ - PyObject *item1; - uint64_t i; - - if (!PyArg_ParseTuple(args, "O", &item1)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - - PyGetInt(item1, i); - - ((vm_cpu_t*)self->cpu)->exception_flags = i; - Py_INCREF(Py_None); - return Py_None; -} - -PyObject* cpu_get_exception(JitCpu* self, PyObject* args) -{ - return PyLong_FromUnsignedLongLong((uint64_t)(((vm_cpu_t*)self->cpu)->exception_flags)); -} - - - - - -void check_automod(JitCpu* jitcpu, uint64_t addr, uint64_t size) -{ - PyObject *result; - - if (!(((VmMngr*)jitcpu->pyvm)->vm_mngr.exception_flags & EXCEPT_CODE_AUTOMOD)) - return; - result = PyObject_CallMethod(jitcpu->jitter, "automod_cb", "LL", addr, size); - Py_DECREF(result); - -} - -void MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src) -{ - vm_MEM_WRITE_08(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); - check_automod(jitcpu, addr, 8); -} - -void MEM_WRITE_16(JitCpu* jitcpu, uint64_t addr, uint16_t src) -{ - vm_MEM_WRITE_16(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); - check_automod(jitcpu, addr, 16); -} - -void MEM_WRITE_32(JitCpu* jitcpu, uint64_t addr, uint32_t src) -{ - vm_MEM_WRITE_32(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); - check_automod(jitcpu, addr, 32); -} - -void MEM_WRITE_64(JitCpu* jitcpu, uint64_t addr, uint64_t src) -{ - vm_MEM_WRITE_64(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); - check_automod(jitcpu, addr, 64); -} - - -PyObject* vm_set_mem(JitCpu *self, PyObject* args) -{ - PyObject *py_addr; - PyObject *py_buffer; - Py_ssize_t py_length; - - char * buffer; - uint64_t size; - uint64_t addr; - int ret; - - if (!PyArg_ParseTuple(args, "OO", &py_addr, &py_buffer)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - - PyGetInt(py_addr, addr); - - if(!PyBytes_Check(py_buffer)) - RAISE(PyExc_TypeError,"arg must be bytes"); - - size = PyBytes_Size(py_buffer); - PyBytes_AsStringAndSize(py_buffer, &buffer, &py_length); - - ret = vm_write_mem(&(((VmMngr*)self->pyvm)->vm_mngr), addr, buffer, size); - if (ret < 0) - RAISE(PyExc_TypeError,"arg must be str"); - check_automod(self, addr, size*8); - - Py_INCREF(Py_None); - return Py_None; -} - -static PyMemberDef JitCpu_members[] = { - {NULL} /* Sentinel */ -}; - -static PyMethodDef JitCpu_methods[] = { - {"init_regs", (PyCFunction)cpu_init_regs, METH_NOARGS, - "X"}, - {"dump_gpregs", (PyCFunction)cpu_dump_gpregs, METH_NOARGS, - "X"}, - {"dump_gpregs_with_attrib", (PyCFunction)cpu_dump_gpregs_with_attrib, METH_VARARGS, - "X"}, - {"get_gpreg", (PyCFunction)cpu_get_gpreg, METH_NOARGS, - "X"}, - {"set_gpreg", (PyCFunction)cpu_set_gpreg, METH_VARARGS, - "X"}, - {"get_exception", (PyCFunction)cpu_get_exception, METH_VARARGS, - "X"}, - {"set_exception", (PyCFunction)cpu_set_exception, METH_VARARGS, - "X"}, - {"set_mem", (PyCFunction)vm_set_mem, METH_VARARGS, - "X"}, - {"get_mem", (PyCFunction)vm_get_mem, METH_VARARGS, - "X"}, - {NULL} /* Sentinel */ -}; - -static int -JitCpu_init(JitCpu *self, PyObject *args, PyObject *kwds) -{ - self->cpu = malloc(sizeof(vm_cpu_t)); - if (self->cpu == NULL) { - fprintf(stderr, "cannot alloc vm_cpu_t\n"); - exit(EXIT_FAILURE); - } - return 0; -} - -getset_reg_u16(PC); -getset_reg_u16(SP); -getset_reg_u16(R3); -getset_reg_u16(R4); -getset_reg_u16(R5); -getset_reg_u16(R6); -getset_reg_u16(R7); -getset_reg_u16(R8); -getset_reg_u16(R9); -getset_reg_u16(R10); -getset_reg_u16(R11); -getset_reg_u16(R12); -getset_reg_u16(R13); -getset_reg_u16(R14); -getset_reg_u16(R15); -getset_reg_u16(zf); -getset_reg_u16(nf); -getset_reg_u16(of); -getset_reg_u16(cf); -getset_reg_u16(cpuoff); -getset_reg_u16(gie); -getset_reg_u16(osc); -getset_reg_u16(scg0); -getset_reg_u16(scg1); -getset_reg_u16(res); - - - -PyObject* get_gpreg_offset_all(void) -{ - PyObject *dict = PyDict_New(); - PyObject *o; - get_reg_off(exception_flags); - - get_reg_off(PC); - get_reg_off(SP); - get_reg_off(R3); - get_reg_off(R4); - get_reg_off(R5); - get_reg_off(R6); - get_reg_off(R7); - get_reg_off(R8); - get_reg_off(R9); - get_reg_off(R10); - get_reg_off(R11); - get_reg_off(R12); - get_reg_off(R13); - get_reg_off(R14); - get_reg_off(R15); - - get_reg_off(zf); - get_reg_off(nf); - get_reg_off(of); - get_reg_off(cf); - get_reg_off(cpuoff); - get_reg_off(gie); - get_reg_off(osc); - get_reg_off(scg0); - get_reg_off(scg1); - get_reg_off(res); - - return dict; -} - - - - -static PyGetSetDef JitCpu_getseters[] = { - {"vmmngr", - (getter)JitCpu_get_vmmngr, (setter)JitCpu_set_vmmngr, - "vmmngr", - NULL}, - - {"jitter", - (getter)JitCpu_get_jitter, (setter)JitCpu_set_jitter, - "jitter", - NULL}, - - - {"PC" , (getter)JitCpu_get_PC , (setter)JitCpu_set_PC , "PC" , NULL}, - {"SP" , (getter)JitCpu_get_SP , (setter)JitCpu_set_SP , "SP" , NULL}, - {"R3" , (getter)JitCpu_get_R3 , (setter)JitCpu_set_R3 , "R3" , NULL}, - {"R4" , (getter)JitCpu_get_R4 , (setter)JitCpu_set_R4 , "R4" , NULL}, - {"R5" , (getter)JitCpu_get_R5 , (setter)JitCpu_set_R5 , "R5" , NULL}, - {"R6" , (getter)JitCpu_get_R6 , (setter)JitCpu_set_R6 , "R6" , NULL}, - {"R7" , (getter)JitCpu_get_R7 , (setter)JitCpu_set_R7 , "R7" , NULL}, - {"R8" , (getter)JitCpu_get_R8 , (setter)JitCpu_set_R8 , "R8" , NULL}, - {"R9" , (getter)JitCpu_get_R9 , (setter)JitCpu_set_R9 , "R9" , NULL}, - {"R10" , (getter)JitCpu_get_R10 , (setter)JitCpu_set_R10 , "R10" , NULL}, - {"R11" , (getter)JitCpu_get_R11 , (setter)JitCpu_set_R11 , "R11" , NULL}, - {"R12" , (getter)JitCpu_get_R12 , (setter)JitCpu_set_R12 , "R12" , NULL}, - {"R13" , (getter)JitCpu_get_R13 , (setter)JitCpu_set_R13 , "R13" , NULL}, - {"R14" , (getter)JitCpu_get_R14 , (setter)JitCpu_set_R14 , "R14" , NULL}, - {"R15" , (getter)JitCpu_get_R15 , (setter)JitCpu_set_R15 , "R15" , NULL}, - {"zf" , (getter)JitCpu_get_zf , (setter)JitCpu_set_zf , "zf" , NULL}, - {"nf" , (getter)JitCpu_get_nf , (setter)JitCpu_set_nf , "nf" , NULL}, - {"of" , (getter)JitCpu_get_of , (setter)JitCpu_set_of , "of" , NULL}, - {"cf" , (getter)JitCpu_get_cf , (setter)JitCpu_set_cf , "cf" , NULL}, - {"cpuoff" , (getter)JitCpu_get_cpuoff , (setter)JitCpu_set_cpuoff , "cpuoff" , NULL}, - {"gie" , (getter)JitCpu_get_gie , (setter)JitCpu_set_gie , "gie" , NULL}, - {"osc" , (getter)JitCpu_get_osc , (setter)JitCpu_set_osc , "osc" , NULL}, - {"scg0" , (getter)JitCpu_get_scg0 , (setter)JitCpu_set_scg0 , "scg0" , NULL}, - {"scg1" , (getter)JitCpu_get_scg1 , (setter)JitCpu_set_scg1 , "scg1" , NULL}, - {"res" , (getter)JitCpu_get_res , (setter)JitCpu_set_res , "res" , NULL}, - - {NULL} /* Sentinel */ -}; - - - -static PyTypeObject JitCpuType = { - PyVarObject_HEAD_INIT(NULL, 0) - "JitCore_msp430.JitCpu", /*tp_name*/ - sizeof(JitCpu), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - (destructor)JitCpu_dealloc,/*tp_dealloc*/ - 0, /*tp_print*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - 0, /*tp_compare*/ - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash */ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ - "JitCpu objects", /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - JitCpu_methods, /* tp_methods */ - JitCpu_members, /* tp_members */ - JitCpu_getseters, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - (initproc)JitCpu_init, /* tp_init */ - 0, /* tp_alloc */ - JitCpu_new, /* tp_new */ -}; - - - -static PyMethodDef JitCore_msp430_Methods[] = { - - /* - - */ - {"get_gpreg_offset_all", (PyCFunction)get_gpreg_offset_all, METH_NOARGS}, - {NULL, NULL, 0, NULL} /* Sentinel */ - -}; - - - - -MOD_INIT(JitCore_msp430) -{ - PyObject *module; - - MOD_DEF(module, "JitCore_msp430", "JitCore_msp430 module", JitCore_msp430_Methods); - - if (module == NULL) - return NULL; - - if (PyType_Ready(&JitCpuType) < 0) - return NULL; - - Py_INCREF(&JitCpuType); - if (PyModule_AddObject(module, "JitCpu", (PyObject *)&JitCpuType) < 0) - return NULL; - - return module; -} diff --git a/miasm2/jitter/arch/JitCore_msp430.h b/miasm2/jitter/arch/JitCore_msp430.h deleted file mode 100644 index 1c802e9e..00000000 --- a/miasm2/jitter/arch/JitCore_msp430.h +++ /dev/null @@ -1,44 +0,0 @@ - -typedef struct { - uint32_t exception_flags; - - /* gpregs */ - uint32_t PC; - uint32_t SP; - uint32_t R3; - uint32_t R4; - uint32_t R5; - uint32_t R6; - uint32_t R7; - uint32_t R8; - uint32_t R9; - uint32_t R10; - uint32_t R11; - uint32_t R12; - uint32_t R13; - uint32_t R14; - uint32_t R15; - - /* eflag */ - uint32_t zf; - uint32_t nf; - uint32_t of; - uint32_t cf; - - uint32_t cpuoff; - uint32_t gie; - uint32_t osc; - uint32_t scg0; - uint32_t scg1; - uint32_t res; - -}vm_cpu_t; - -#define RETURN_PC return BlockDst; - -_MIASM_EXPORT void dump_gpregs(vm_cpu_t* vmcpu); - -_MIASM_EXPORT void MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src); -_MIASM_EXPORT void MEM_WRITE_16(JitCpu* jitcpu, uint64_t addr, uint16_t src); -_MIASM_EXPORT void MEM_WRITE_32(JitCpu* jitcpu, uint64_t addr, uint32_t src); -_MIASM_EXPORT void MEM_WRITE_64(JitCpu* jitcpu, uint64_t addr, uint64_t src); diff --git a/miasm2/jitter/arch/JitCore_ppc32.c b/miasm2/jitter/arch/JitCore_ppc32.c deleted file mode 100644 index 8a1bb79e..00000000 --- a/miasm2/jitter/arch/JitCore_ppc32.c +++ /dev/null @@ -1,344 +0,0 @@ -#include -#include "structmember.h" -#include -#include -#include "../compat_py23.h" -#include "../queue.h" -#include "../vm_mngr.h" -#include "../vm_mngr_py.h" -#include "../bn.h" -#include "../JitCore.h" -#include "JitCore_ppc32.h" - -reg_dict gpreg_dict[] = { -#define JITCORE_PPC_REG_EXPAND(_name, _size) \ - { .name = #_name, .offset = offsetof(struct vm_cpu, _name), .size = _size }, -#include "JitCore_ppc32_regs.h" -#undef JITCORE_PPC_REG_EXPAND -}; - -PyObject* cpu_get_gpreg(JitCpu* self) -{ - PyObject *dict = PyDict_New(); - PyObject *o; - -#define JITCORE_PPC_REG_EXPAND(_name, _size) \ - get_reg(_name); -#include "JitCore_ppc32_regs.h" -#undef JITCORE_PPC_REG_EXPAND - - return dict; -} - - - -PyObject * -cpu_set_gpreg(JitCpu *self, PyObject *args) { - PyObject *dict; - PyObject *d_key, *d_value = NULL; - Py_ssize_t pos = 0; - char* d_key_name; - uint64_t val; - unsigned int i; - - if (!PyArg_ParseTuple(args, "O", &dict)) - return NULL; - if(!PyDict_Check(dict)) - RAISE(PyExc_TypeError, "arg must be dict"); - - while(PyDict_Next(dict, &pos, &d_key, &d_value)) { - int found = 0; - PyGetStr(d_key_name, d_key); - PyGetInt(d_value, val); - - for (i=0; i < sizeof(gpreg_dict)/sizeof(reg_dict); i++){ - if (strcmp(d_key_name, gpreg_dict[i].name)) - continue; - *((uint32_t*)(((char*)(self->cpu)) + gpreg_dict[i].offset)) = val; - found = 1; - break; - } - - if (found) - continue; - fprintf(stderr, "unknown key: %s\n", d_key_name); - RAISE(PyExc_ValueError, "unknown reg"); - } - - Py_INCREF(Py_None); - return Py_None; -} - - -PyObject * -cpu_init_regs(JitCpu *self) { - memset(self->cpu, 0, sizeof(struct vm_cpu)); - - Py_INCREF(Py_None); - return Py_None; -} - -static void -dump_gpreg(const char *name, uint32_t val, int *n) { - printf("%6s %.8" PRIX32"%c", name, val, (*n + 1) % 4 == 0? '\n':' '); - *n = (*n + 1) % 4; -} - -void -dump_gpregs(struct vm_cpu *vmcpu) { - int reg_num = 0; - -#define JITCORE_PPC_REG_EXPAND(_name, _size) \ - dump_gpreg(#_name, vmcpu->_name, ®_num); -#include "JitCore_ppc32_regs.h" -#undef JITCORE_PPC_REG_EXPAND - - if ((reg_num % 4) != 0) - putchar('\n'); -} - - -PyObject * -cpu_dump_gpregs(JitCpu *self, PyObject *args) { - - dump_gpregs(self->cpu); - - Py_INCREF(Py_None); - return Py_None; -} - -PyObject * -cpu_dump_gpregs_with_attrib(JitCpu* self, PyObject* args) -{ - return cpu_dump_gpregs(self, args); -} - -PyObject * -cpu_set_exception(JitCpu *self, PyObject *args) { - PyObject *item1; - uint64_t i; - - if (!PyArg_ParseTuple(args, "O", &item1)) - return NULL; - - PyGetInt(item1, i); - - ((struct vm_cpu *)self->cpu)->exception_flags = i; - - Py_INCREF(Py_None); - return Py_None; -} - -PyObject * -cpu_get_exception(JitCpu *self, PyObject *args) { - return PyLong_FromUnsignedLongLong(((struct vm_cpu *)self->cpu)->exception_flags); -} - -static PyObject * -cpu_get_spr_access(JitCpu *self, PyObject *args) { - return PyLong_FromUnsignedLongLong(((struct vm_cpu *) self->cpu)->spr_access); -} - -void -check_automod(JitCpu *jitcpu, uint64_t addr, uint64_t size) { - PyObject *result; - - if (!(((VmMngr*)jitcpu->pyvm)->vm_mngr.exception_flags & EXCEPT_CODE_AUTOMOD)) - return; - result = PyObject_CallMethod(jitcpu->jitter, "automod_cb", "LL", addr, size); - Py_DECREF(result); -} - -void MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src) -{ - vm_MEM_WRITE_08(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); - check_automod(jitcpu, addr, 8); -} - -void MEM_WRITE_16(JitCpu* jitcpu, uint64_t addr, uint16_t src) -{ - vm_MEM_WRITE_16(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); - check_automod(jitcpu, addr, 16); -} - -void MEM_WRITE_32(JitCpu* jitcpu, uint64_t addr, uint32_t src) -{ - vm_MEM_WRITE_32(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); - check_automod(jitcpu, addr, 32); -} - -void MEM_WRITE_64(JitCpu* jitcpu, uint64_t addr, uint64_t src) -{ - vm_MEM_WRITE_64(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); - check_automod(jitcpu, addr, 64); -} - - - -PyObject * -vm_set_mem(JitCpu *self, PyObject *args) { - PyObject *py_addr; - PyObject *py_buffer; - Py_ssize_t py_length; - - char *buffer; - uint64_t size; - uint64_t addr; - int ret = 0x1337; - - if (!PyArg_ParseTuple(args, "OO", &py_addr, &py_buffer)) - return NULL; - - PyGetInt(py_addr, addr); - - if(!PyBytes_Check(py_buffer)) - RAISE(PyExc_TypeError,"arg must be bytes"); - - size = PyBytes_Size(py_buffer); - PyBytes_AsStringAndSize(py_buffer, &buffer, &py_length); - - ret = vm_write_mem(&(((VmMngr*)self->pyvm)->vm_mngr), addr, buffer, size); - if (ret < 0) - RAISE(PyExc_TypeError,"arg must be str"); - check_automod(self, addr, size*8); - - Py_INCREF(Py_None); - return Py_None; -} - -static PyMemberDef JitCpu_members[] = { - {NULL} /* Sentinel */ -}; - -static PyMethodDef JitCpu_methods[] = { - {"init_regs", (PyCFunction)cpu_init_regs, METH_NOARGS, "X"}, - {"dump_gpregs", (PyCFunction)cpu_dump_gpregs, METH_NOARGS, "X"}, - {"dump_gpregs_with_attrib", (PyCFunction)cpu_dump_gpregs_with_attrib, METH_VARARGS, "X"}, - {"get_gpreg", (PyCFunction)cpu_get_gpreg, METH_NOARGS, "X"}, - {"set_gpreg", (PyCFunction)cpu_set_gpreg, METH_VARARGS, "X"}, - {"get_exception", (PyCFunction)cpu_get_exception, METH_VARARGS, "X"}, - {"set_exception", (PyCFunction)cpu_set_exception, METH_VARARGS, "X"}, - {"get_spr_access", (PyCFunction)cpu_get_spr_access, METH_VARARGS, "X"}, - {"set_mem", (PyCFunction)vm_set_mem, METH_VARARGS, "X"}, - {"get_mem", (PyCFunction)vm_get_mem, METH_VARARGS, "X"}, - {NULL} /* Sentinel */ -}; - -static int -JitCpu_init(JitCpu *self, PyObject *args, PyObject *kwds) { - self->cpu = malloc(sizeof(struct vm_cpu)); - if (self->cpu == NULL) { - fprintf(stderr, "cannot alloc struct vm_cpu\n"); - exit(1); - } - return 0; -} - - -#define JITCORE_PPC_REG_EXPAND(_name, _size) \ -getset_reg_u32(_name); -#include "JitCore_ppc32_regs.h" -#undef JITCORE_PPC_REG_EXPAND - -PyObject * -get_gpreg_offset_all(void) { - PyObject *dict = PyDict_New(); - PyObject *o; - -#define JITCORE_PPC_REG_EXPAND(_name, _size) \ - get_reg_off(_name); -#include "JitCore_ppc32_regs.h" -#undef JITCORE_PPC_REG_EXPAND - - return dict; -} - -static PyGetSetDef JitCpu_getseters[] = { - {"vmmngr", - (getter)JitCpu_get_vmmngr, (setter)JitCpu_set_vmmngr, - "vmmngr", - NULL}, - - {"jitter", - (getter)JitCpu_get_jitter, (setter)JitCpu_set_jitter, - "jitter", - NULL}, - -#define JITCORE_PPC_REG_EXPAND(_name, _size) \ - { #_name, (getter) JitCpu_get_ ## _name , \ - (setter) JitCpu_set_ ## _name , #_name , NULL}, -#include "JitCore_ppc32_regs.h" -#undef JITCORE_PPC_REG_EXPAND - - {NULL} /* Sentinel */ -}; - - -static PyTypeObject JitCpuType = { - PyVarObject_HEAD_INIT(NULL, 0) - "JitCore_ppc.JitCpu", /*tp_name*/ - sizeof(JitCpu), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - (destructor)JitCpu_dealloc,/*tp_dealloc*/ - 0, /*tp_print*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - 0, /*tp_compare*/ - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash */ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ - "JitCpu objects", /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - JitCpu_methods, /* tp_methods */ - JitCpu_members, /* tp_members */ - JitCpu_getseters, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - (initproc)JitCpu_init, /* tp_init */ - 0, /* tp_alloc */ - JitCpu_new, /* tp_new */ -}; - - - -static PyMethodDef JitCore_ppc32_Methods[] = { - {"get_gpreg_offset_all", (PyCFunction)get_gpreg_offset_all, METH_NOARGS}, - {NULL, NULL, 0, NULL} /* Sentinel */ -}; - - - -MOD_INIT(JitCore_ppc32) -{ - PyObject *module; - - MOD_DEF(module, "JitCore_ppc32", "JitCore_ppc32 module", JitCore_ppc32_Methods); - - if (module == NULL) - return NULL; - - if (PyType_Ready(&JitCpuType) < 0) - return NULL; - - Py_INCREF(&JitCpuType); - if (PyModule_AddObject(module, "JitCpu", (PyObject *)&JitCpuType) < 0) - return NULL; - - return module; -} diff --git a/miasm2/jitter/arch/JitCore_ppc32.h b/miasm2/jitter/arch/JitCore_ppc32.h deleted file mode 100644 index f2a5200e..00000000 --- a/miasm2/jitter/arch/JitCore_ppc32.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * _size can't be used yet because all register accesses are homogeneously - * 32-bit - */ -struct vm_cpu { -#define JITCORE_PPC_REG_EXPAND(_name, _size) \ - uint32_t _name; -#include "JitCore_ppc32_regs.h" -#undef JITCORE_PPC_REG_EXPAND - - uint64_t exception_flags; - uint32_t spr_access; - uint32_t reserve; - uint32_t reserve_address; -}; - -_MIASM_EXPORT void dump_gpregs(struct vm_cpu *); - -typedef struct vm_cpu vm_cpu_t; - -_MIASM_EXPORT void MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src); -_MIASM_EXPORT void MEM_WRITE_16(JitCpu* jitcpu, uint64_t addr, uint16_t src); -_MIASM_EXPORT void MEM_WRITE_32(JitCpu* jitcpu, uint64_t addr, uint32_t src); -_MIASM_EXPORT void MEM_WRITE_64(JitCpu* jitcpu, uint64_t addr, uint64_t src); diff --git a/miasm2/jitter/arch/JitCore_ppc32_regs.h b/miasm2/jitter/arch/JitCore_ppc32_regs.h deleted file mode 100644 index d15b5e51..00000000 --- a/miasm2/jitter/arch/JitCore_ppc32_regs.h +++ /dev/null @@ -1,89 +0,0 @@ -JITCORE_PPC_REG_EXPAND(R0, 32) -JITCORE_PPC_REG_EXPAND(R1, 32) -JITCORE_PPC_REG_EXPAND(R2, 32) -JITCORE_PPC_REG_EXPAND(R3, 32) -JITCORE_PPC_REG_EXPAND(R4, 32) -JITCORE_PPC_REG_EXPAND(R5, 32) -JITCORE_PPC_REG_EXPAND(R6, 32) -JITCORE_PPC_REG_EXPAND(R7, 32) -JITCORE_PPC_REG_EXPAND(R8, 32) -JITCORE_PPC_REG_EXPAND(R9, 32) -JITCORE_PPC_REG_EXPAND(R10, 32) -JITCORE_PPC_REG_EXPAND(R11, 32) -JITCORE_PPC_REG_EXPAND(R12, 32) -JITCORE_PPC_REG_EXPAND(R13, 32) -JITCORE_PPC_REG_EXPAND(R14, 32) -JITCORE_PPC_REG_EXPAND(R15, 32) -JITCORE_PPC_REG_EXPAND(R16, 32) -JITCORE_PPC_REG_EXPAND(R17, 32) -JITCORE_PPC_REG_EXPAND(R18, 32) -JITCORE_PPC_REG_EXPAND(R19, 32) -JITCORE_PPC_REG_EXPAND(R20, 32) -JITCORE_PPC_REG_EXPAND(R21, 32) -JITCORE_PPC_REG_EXPAND(R22, 32) -JITCORE_PPC_REG_EXPAND(R23, 32) -JITCORE_PPC_REG_EXPAND(R24, 32) -JITCORE_PPC_REG_EXPAND(R25, 32) -JITCORE_PPC_REG_EXPAND(R26, 32) -JITCORE_PPC_REG_EXPAND(R27, 32) -JITCORE_PPC_REG_EXPAND(R28, 32) -JITCORE_PPC_REG_EXPAND(R29, 32) -JITCORE_PPC_REG_EXPAND(R30, 32) -JITCORE_PPC_REG_EXPAND(R31, 32) - -JITCORE_PPC_REG_EXPAND(PC, 32) -JITCORE_PPC_REG_EXPAND(LR, 32) -JITCORE_PPC_REG_EXPAND(CTR, 32) -JITCORE_PPC_REG_EXPAND(MSR, 32) - -JITCORE_PPC_REG_EXPAND(XER_SO, 32) -JITCORE_PPC_REG_EXPAND(XER_OV, 32) -JITCORE_PPC_REG_EXPAND(XER_CA, 32) -JITCORE_PPC_REG_EXPAND(XER_BC, 32) - -JITCORE_PPC_REG_EXPAND(CR0_LT, 8) -JITCORE_PPC_REG_EXPAND(CR0_GT, 8) -JITCORE_PPC_REG_EXPAND(CR0_EQ, 8) -JITCORE_PPC_REG_EXPAND(CR0_SO, 8) -JITCORE_PPC_REG_EXPAND(CR1_LT, 8) -JITCORE_PPC_REG_EXPAND(CR1_GT, 8) -JITCORE_PPC_REG_EXPAND(CR1_EQ, 8) -JITCORE_PPC_REG_EXPAND(CR1_SO, 8) -JITCORE_PPC_REG_EXPAND(CR2_LT, 8) -JITCORE_PPC_REG_EXPAND(CR2_GT, 8) -JITCORE_PPC_REG_EXPAND(CR2_EQ, 8) -JITCORE_PPC_REG_EXPAND(CR2_SO, 8) -JITCORE_PPC_REG_EXPAND(CR3_LT, 8) -JITCORE_PPC_REG_EXPAND(CR3_GT, 8) -JITCORE_PPC_REG_EXPAND(CR3_EQ, 8) -JITCORE_PPC_REG_EXPAND(CR3_SO, 8) -JITCORE_PPC_REG_EXPAND(CR4_LT, 8) -JITCORE_PPC_REG_EXPAND(CR4_GT, 8) -JITCORE_PPC_REG_EXPAND(CR4_EQ, 8) -JITCORE_PPC_REG_EXPAND(CR4_SO, 8) -JITCORE_PPC_REG_EXPAND(CR5_LT, 8) -JITCORE_PPC_REG_EXPAND(CR5_GT, 8) -JITCORE_PPC_REG_EXPAND(CR5_EQ, 8) -JITCORE_PPC_REG_EXPAND(CR5_SO, 8) -JITCORE_PPC_REG_EXPAND(CR6_LT, 8) -JITCORE_PPC_REG_EXPAND(CR6_GT, 8) -JITCORE_PPC_REG_EXPAND(CR6_EQ, 8) -JITCORE_PPC_REG_EXPAND(CR6_SO, 8) -JITCORE_PPC_REG_EXPAND(CR7_LT, 8) -JITCORE_PPC_REG_EXPAND(CR7_GT, 8) -JITCORE_PPC_REG_EXPAND(CR7_EQ, 8) -JITCORE_PPC_REG_EXPAND(CR7_SO, 8) - -JITCORE_PPC_REG_EXPAND(SPRG0, 32) -JITCORE_PPC_REG_EXPAND(SPRG1, 32) -JITCORE_PPC_REG_EXPAND(SPRG2, 32) -JITCORE_PPC_REG_EXPAND(SPRG3, 32) -JITCORE_PPC_REG_EXPAND(SRR0, 32) -JITCORE_PPC_REG_EXPAND(SRR1, 32) -JITCORE_PPC_REG_EXPAND(DAR, 32) -JITCORE_PPC_REG_EXPAND(DSISR, 32) -JITCORE_PPC_REG_EXPAND(PIR, 32) -JITCORE_PPC_REG_EXPAND(PVR, 32) -JITCORE_PPC_REG_EXPAND(DEC, 32) -JITCORE_PPC_REG_EXPAND(TBL, 32) -JITCORE_PPC_REG_EXPAND(TBU, 32) diff --git a/miasm2/jitter/arch/JitCore_x86.c b/miasm2/jitter/arch/JitCore_x86.c deleted file mode 100644 index 50ce6bd5..00000000 --- a/miasm2/jitter/arch/JitCore_x86.c +++ /dev/null @@ -1,946 +0,0 @@ -#include -#include "structmember.h" -#include -#include -#include "../compat_py23.h" -#include "../queue.h" -#include "../vm_mngr.h" -#include "../vm_mngr_py.h" -#include "../bn.h" -#include "../JitCore.h" -#include "../op_semantics.h" -#include "JitCore_x86.h" - - -vm_cpu_t ref_arch_regs; - -reg_dict gpreg_dict[] = { - {.name = "RAX", .offset = offsetof(vm_cpu_t, RAX), .size = 64}, - {.name = "RBX", .offset = offsetof(vm_cpu_t, RBX), .size = 64}, - {.name = "RCX", .offset = offsetof(vm_cpu_t, RCX), .size = 64}, - {.name = "RDX", .offset = offsetof(vm_cpu_t, RDX), .size = 64}, - {.name = "RSI", .offset = offsetof(vm_cpu_t, RSI), .size = 64}, - {.name = "RDI", .offset = offsetof(vm_cpu_t, RDI), .size = 64}, - {.name = "RSP", .offset = offsetof(vm_cpu_t, RSP), .size = 64}, - {.name = "RBP", .offset = offsetof(vm_cpu_t, RBP), .size = 64}, - - {.name = "R8", .offset = offsetof(vm_cpu_t, R8), .size = 64}, - {.name = "R9", .offset = offsetof(vm_cpu_t, R9), .size = 64}, - {.name = "R10", .offset = offsetof(vm_cpu_t, R10), .size = 64}, - {.name = "R11", .offset = offsetof(vm_cpu_t, R11), .size = 64}, - {.name = "R12", .offset = offsetof(vm_cpu_t, R12), .size = 64}, - {.name = "R13", .offset = offsetof(vm_cpu_t, R13), .size = 64}, - {.name = "R14", .offset = offsetof(vm_cpu_t, R14), .size = 64}, - {.name = "R15", .offset = offsetof(vm_cpu_t, R15), .size = 64}, - - {.name = "RIP", .offset = offsetof(vm_cpu_t, RIP), .size = 64}, - - {.name = "zf", .offset = offsetof(vm_cpu_t, zf), .size = 8}, - {.name = "nf", .offset = offsetof(vm_cpu_t, nf), .size = 8}, - {.name = "pf", .offset = offsetof(vm_cpu_t, pf), .size = 8}, - {.name = "of", .offset = offsetof(vm_cpu_t, of), .size = 8}, - {.name = "cf", .offset = offsetof(vm_cpu_t, cf), .size = 8}, - {.name = "af", .offset = offsetof(vm_cpu_t, af), .size = 8}, - {.name = "df", .offset = offsetof(vm_cpu_t, df), .size = 8}, - - {.name = "ES", .offset = offsetof(vm_cpu_t, ES), .size = 16}, - {.name = "CS", .offset = offsetof(vm_cpu_t, CS), .size = 16}, - {.name = "SS", .offset = offsetof(vm_cpu_t, SS), .size = 16}, - {.name = "DS", .offset = offsetof(vm_cpu_t, DS), .size = 16}, - {.name = "FS", .offset = offsetof(vm_cpu_t, FS), .size = 16}, - {.name = "GS", .offset = offsetof(vm_cpu_t, GS), .size = 16}, - - {.name = "MM0", .offset = offsetof(vm_cpu_t, MM0), .size = 64}, - {.name = "MM1", .offset = offsetof(vm_cpu_t, MM1), .size = 64}, - {.name = "MM2", .offset = offsetof(vm_cpu_t, MM2), .size = 64}, - {.name = "MM3", .offset = offsetof(vm_cpu_t, MM3), .size = 64}, - {.name = "MM4", .offset = offsetof(vm_cpu_t, MM4), .size = 64}, - {.name = "MM5", .offset = offsetof(vm_cpu_t, MM5), .size = 64}, - {.name = "MM6", .offset = offsetof(vm_cpu_t, MM6), .size = 64}, - {.name = "MM7", .offset = offsetof(vm_cpu_t, MM7), .size = 64}, - - {.name = "XMM0", .offset = offsetof(vm_cpu_t, XMM0), .size = 128}, - {.name = "XMM1", .offset = offsetof(vm_cpu_t, XMM1), .size = 128}, - {.name = "XMM2", .offset = offsetof(vm_cpu_t, XMM2), .size = 128}, - {.name = "XMM3", .offset = offsetof(vm_cpu_t, XMM3), .size = 128}, - {.name = "XMM4", .offset = offsetof(vm_cpu_t, XMM4), .size = 128}, - {.name = "XMM5", .offset = offsetof(vm_cpu_t, XMM5), .size = 128}, - {.name = "XMM6", .offset = offsetof(vm_cpu_t, XMM6), .size = 128}, - {.name = "XMM7", .offset = offsetof(vm_cpu_t, XMM7), .size = 128}, - {.name = "XMM8", .offset = offsetof(vm_cpu_t, XMM8), .size = 128}, - {.name = "XMM9", .offset = offsetof(vm_cpu_t, XMM9), .size = 128}, - {.name = "XMM10", .offset = offsetof(vm_cpu_t, XMM10), .size = 128}, - {.name = "XMM11", .offset = offsetof(vm_cpu_t, XMM11), .size = 128}, - {.name = "XMM12", .offset = offsetof(vm_cpu_t, XMM12), .size = 128}, - {.name = "XMM13", .offset = offsetof(vm_cpu_t, XMM13), .size = 128}, - {.name = "XMM14", .offset = offsetof(vm_cpu_t, XMM14), .size = 128}, - {.name = "XMM15", .offset = offsetof(vm_cpu_t, XMM15), .size = 128}, - - {.name = "tsc", .offset = offsetof(vm_cpu_t, tsc), .size = 64}, - - {.name = "exception_flags", .offset = offsetof(vm_cpu_t, exception_flags), .size = 32}, - {.name = "interrupt_num", .offset = offsetof(vm_cpu_t, interrupt_num), .size = 32}, -}; - - - -/************************** JitCpu object **************************/ - - - - - -PyObject* cpu_get_gpreg(JitCpu* self) -{ - PyObject *dict = PyDict_New(); - PyObject *o; - - get_reg(RAX); - get_reg(RBX); - get_reg(RCX); - get_reg(RDX); - get_reg(RSI); - get_reg(RDI); - get_reg(RSP); - get_reg(RBP); - - get_reg(R8); - get_reg(R9); - get_reg(R10); - get_reg(R11); - get_reg(R12); - get_reg(R13); - get_reg(R14); - get_reg(R15); - - get_reg(RIP); - - get_reg(zf); - get_reg(nf); - get_reg(pf); - get_reg(of); - get_reg(cf); - get_reg(af); - get_reg(df); - - - get_reg(ES); - get_reg(CS); - get_reg(SS); - get_reg(DS); - get_reg(FS); - get_reg(GS); - - get_reg(MM0); - get_reg(MM1); - get_reg(MM2); - get_reg(MM3); - get_reg(MM4); - get_reg(MM5); - get_reg(MM6); - get_reg(MM7); - - get_reg_bn(XMM0, 128); - get_reg_bn(XMM1, 128); - get_reg_bn(XMM2, 128); - get_reg_bn(XMM3, 128); - get_reg_bn(XMM4, 128); - get_reg_bn(XMM5, 128); - get_reg_bn(XMM6, 128); - get_reg_bn(XMM7, 128); - get_reg_bn(XMM8, 128); - get_reg_bn(XMM9, 128); - get_reg_bn(XMM10, 128); - get_reg_bn(XMM11, 128); - get_reg_bn(XMM12, 128); - get_reg_bn(XMM13, 128); - get_reg_bn(XMM14, 128); - get_reg_bn(XMM15, 128); - - get_reg(tsc); - - return dict; -} - - -PyObject* cpu_set_gpreg(JitCpu* self, PyObject *args) -{ - PyObject* dict; - PyObject *d_key, *d_value = NULL; - char* d_key_name; - Py_ssize_t pos = 0; - uint64_t val; - unsigned int i, found; - - if (!PyArg_ParseTuple(args, "O", &dict)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - if(!PyDict_Check(dict)) - RAISE(PyExc_TypeError, "arg must be dict"); - while(PyDict_Next(dict, &pos, &d_key, &d_value)){ - PyGetStr(d_key_name, d_key); - found = 0; - for (i=0; i < sizeof(gpreg_dict)/sizeof(reg_dict); i++){ - if (strcmp(d_key_name, gpreg_dict[i].name)) - continue; - found = 1; - switch (gpreg_dict[i].size) { - case 8: - PyGetInt(d_value, val); - *((uint8_t*)(((char*)(self->cpu)) + gpreg_dict[i].offset)) = val; - break; - case 16: - PyGetInt(d_value, val); - *((uint16_t*)(((char*)(self->cpu)) + gpreg_dict[i].offset)) = val; - break; - case 32: - PyGetInt(d_value, val); - *((uint32_t*)(((char*)(self->cpu)) + gpreg_dict[i].offset)) = val; - break; - case 64: - PyGetInt(d_value, val); - *((uint64_t*)(((char*)(self->cpu)) + gpreg_dict[i].offset)) = val; - break; - case 128: - { - bn_t bn; - int j; - PyObject* py_long = d_value; - PyObject* py_long_new; - PyObject* py_tmp; - PyObject* cst_32; - PyObject* cst_ffffffff; - uint64_t tmp; - - -#if PY_MAJOR_VERSION >= 3 - if (PyLong_Check(py_long)){ - /* Already PyLong */ - /* Increment ref as we will decement it next */ - Py_INCREF(py_long); - } else { - RAISE(PyExc_TypeError,"arg must be int"); - } -#else - if (PyInt_Check(py_long)){ - tmp = (uint64_t)PyInt_AsLong(py_long); - py_long = PyLong_FromLong((long)tmp); - } else if (PyLong_Check(py_long)){ - /* Already PyLong */ - /* Increment ref as we will decement it next */ - Py_INCREF(py_long); - } - else{ - RAISE(PyExc_TypeError,"arg must be int"); - } -#endif - - - cst_ffffffff = PyLong_FromLong(0xffffffff); - cst_32 = PyLong_FromLong(32); - bn = bignum_from_int(0); - - for (j = 0; j < BN_BYTE_SIZE; j += 4) { - py_tmp = PyObject_CallMethod(py_long, "__and__", "O", cst_ffffffff); - tmp = PyLong_AsUnsignedLongMask(py_tmp); - Py_DECREF(py_tmp); - bn = bignum_lshift(bn, 32); - bn = bignum_or(bn, bignum_from_uint64(tmp)); - - py_long_new = PyObject_CallMethod(py_long, "__rshift__", "O", cst_32); - Py_DECREF(py_long); - py_long = py_long_new; - } - Py_DECREF(py_long); - Py_DECREF(cst_32); - Py_DECREF(cst_ffffffff); - *(bn_t*)(((char*)(self->cpu)) + gpreg_dict[i].offset) = bignum_mask(bn, 128); - } - break; - } - break; - } - - if (found) - continue; - fprintf(stderr, "unknown key: %s\n", d_key_name); - RAISE(PyExc_ValueError, "unknown reg"); - } - Py_INCREF(Py_None); - return Py_None; -} - -PyObject * cpu_init_regs(JitCpu* self) -{ - memset(self->cpu, 0, sizeof(vm_cpu_t)); - ((vm_cpu_t*)self->cpu)->tsc = 0x1122334455667788ULL; - ((vm_cpu_t*)self->cpu)->i_f = 1; - Py_INCREF(Py_None); - return Py_None; - -} - -void dump_gpregs_16(vm_cpu_t* vmcpu) -{ - printf("EAX %.8"PRIX32" EBX %.8"PRIX32" ECX %.8"PRIX32" EDX %.8"PRIX32" ", - (uint32_t)(vmcpu->RAX & 0xFFFFFFFF), - (uint32_t)(vmcpu->RBX & 0xFFFFFFFF), - (uint32_t)(vmcpu->RCX & 0xFFFFFFFF), - (uint32_t)(vmcpu->RDX & 0xFFFFFFFF)); - printf("ESI %.8"PRIX32" EDI %.8"PRIX32" ESP %.8"PRIX32" EBP %.8"PRIX32" ", - (uint32_t)(vmcpu->RSI & 0xFFFFFFFF), - (uint32_t)(vmcpu->RDI & 0xFFFFFFFF), - (uint32_t)(vmcpu->RSP & 0xFFFFFFFF), - (uint32_t)(vmcpu->RBP & 0xFFFFFFFF)); - printf("EIP %.8"PRIX32" ", - (uint32_t)(vmcpu->RIP & 0xFFFFFFFF)); - printf("zf %.1d nf %.1d of %.1d cf %.1d\n", - (uint32_t)(vmcpu->zf & 0x1), - (uint32_t)(vmcpu->nf & 0x1), - (uint32_t)(vmcpu->of & 0x1), - (uint32_t)(vmcpu->cf & 0x1)); -} - -void dump_gpregs_32(vm_cpu_t* vmcpu) -{ - - printf("EAX %.8"PRIX32" EBX %.8"PRIX32" ECX %.8"PRIX32" EDX %.8"PRIX32" ", - (uint32_t)(vmcpu->RAX & 0xFFFFFFFF), - (uint32_t)(vmcpu->RBX & 0xFFFFFFFF), - (uint32_t)(vmcpu->RCX & 0xFFFFFFFF), - (uint32_t)(vmcpu->RDX & 0xFFFFFFFF)); - printf("ESI %.8"PRIX32" EDI %.8"PRIX32" ESP %.8"PRIX32" EBP %.8"PRIX32" ", - (uint32_t)(vmcpu->RSI & 0xFFFFFFFF), - (uint32_t)(vmcpu->RDI & 0xFFFFFFFF), - (uint32_t)(vmcpu->RSP & 0xFFFFFFFF), - (uint32_t)(vmcpu->RBP & 0xFFFFFFFF)); - printf("EIP %.8"PRIX32" ", - (uint32_t)(vmcpu->RIP & 0xFFFFFFFF)); - printf("zf %.1d nf %.1d of %.1d cf %.1d\n", - (uint32_t)(vmcpu->zf & 0x1), - (uint32_t)(vmcpu->nf & 0x1), - (uint32_t)(vmcpu->of & 0x1), - (uint32_t)(vmcpu->cf & 0x1)); - -} - -void dump_gpregs_64(vm_cpu_t* vmcpu) -{ - - printf("RAX %.16"PRIX64" RBX %.16"PRIX64" RCX %.16"PRIX64" RDX %.16"PRIX64" ", - vmcpu->RAX, vmcpu->RBX, vmcpu->RCX, vmcpu->RDX); - printf("RSI %.16"PRIX64" RDI %.16"PRIX64" RSP %.16"PRIX64" RBP %.16"PRIX64" ", - vmcpu->RSI, vmcpu->RDI, vmcpu->RSP, vmcpu->RBP); - printf("RIP %.16"PRIX64"\n", - vmcpu->RIP); - printf("R8 %.16"PRIX64" R9 %.16"PRIX64" R10 %.16"PRIX64" R11 %.16"PRIX64" ", - vmcpu->R8, vmcpu->R9, vmcpu->R10, vmcpu->R11); - printf("R12 %.16"PRIX64" R13 %.16"PRIX64" R14 %.16"PRIX64" R15 %.16"PRIX64" ", - vmcpu->R12, vmcpu->R13, vmcpu->R14, vmcpu->R15); - - - printf("zf %.1d nf %.1d of %.1d cf %.1d\n", - vmcpu->zf, vmcpu->nf, vmcpu->of, vmcpu->cf); - -} - -PyObject * cpu_dump_gpregs(JitCpu* self, PyObject* args) -{ - vm_cpu_t* vmcpu; - - vmcpu = self->cpu; - dump_gpregs_64(vmcpu); - Py_INCREF(Py_None); - return Py_None; -} - - -PyObject * cpu_dump_gpregs_with_attrib(JitCpu* self, PyObject* args) -{ - vm_cpu_t* vmcpu; - PyObject *item1; - uint64_t attrib; - - if (!PyArg_ParseTuple(args, "O", &item1)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - - PyGetInt(item1, attrib); - - vmcpu = self->cpu; - if (attrib == 16 || attrib == 32) - dump_gpregs_32(vmcpu); - else if (attrib == 64) - dump_gpregs_64(vmcpu); - else { - RAISE(PyExc_TypeError,"Bad attrib"); - } - - Py_INCREF(Py_None); - return Py_None; -} - - - -PyObject* cpu_set_exception(JitCpu* self, PyObject* args) -{ - PyObject *item1; - uint64_t i; - - if (!PyArg_ParseTuple(args, "O", &item1)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - - PyGetInt(item1, i); - - ((vm_cpu_t*)self->cpu)->exception_flags = i; - Py_INCREF(Py_None); - return Py_None; -} - -PyObject* cpu_get_exception(JitCpu* self, PyObject* args) -{ - return PyLong_FromUnsignedLongLong((uint64_t)(((vm_cpu_t*)self->cpu)->exception_flags)); -} - -PyObject* cpu_set_interrupt_num(JitCpu* self, PyObject* args) -{ - PyObject *item1; - uint64_t i; - - if (!PyArg_ParseTuple(args, "O", &item1)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - - PyGetInt(item1, i); - - ((vm_cpu_t*)self->cpu)->interrupt_num = i; - Py_INCREF(Py_None); - return Py_None; -} - -PyObject* cpu_get_interrupt_num(JitCpu* self, PyObject* args) -{ - return PyLong_FromUnsignedLongLong((uint64_t)(((vm_cpu_t*)self->cpu)->interrupt_num)); -} - -PyObject* cpu_set_segm_base(JitCpu* self, PyObject* args) -{ - PyObject *item1, *item2; - uint64_t segm_num, segm_base; - - if (!PyArg_ParseTuple(args, "OO", &item1, &item2)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - - PyGetInt(item1, segm_num); - PyGetInt(item2, segm_base); - ((vm_cpu_t*)self->cpu)->segm_base[segm_num] = segm_base; - - Py_INCREF(Py_None); - return Py_None; -} - -PyObject* cpu_get_segm_base(JitCpu* self, PyObject* args) -{ - PyObject *item1; - uint64_t segm_num; - PyObject* v; - - if (!PyArg_ParseTuple(args, "O", &item1)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - PyGetInt(item1, segm_num); - v = PyLong_FromLong((long)(((vm_cpu_t*)self->cpu)->segm_base[segm_num])); - return v; -} - -uint64_t segm2addr(JitCpu* jitcpu, uint64_t segm, uint64_t addr) -{ - return addr + ((vm_cpu_t*)jitcpu->cpu)->segm_base[segm]; -} - -void MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src) -{ - vm_MEM_WRITE_08(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); -} - -void MEM_WRITE_16(JitCpu* jitcpu, uint64_t addr, uint16_t src) -{ - vm_MEM_WRITE_16(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); -} - -void MEM_WRITE_32(JitCpu* jitcpu, uint64_t addr, uint32_t src) -{ - vm_MEM_WRITE_32(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); -} - -void MEM_WRITE_64(JitCpu* jitcpu, uint64_t addr, uint64_t src) -{ - vm_MEM_WRITE_64(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); -} - - - -PyObject* vm_set_mem(JitCpu *self, PyObject* args) -{ - PyObject *py_addr; - PyObject *py_buffer; - Py_ssize_t py_length; - - char * buffer; - uint64_t size; - uint64_t addr; - int ret; - - if (!PyArg_ParseTuple(args, "OO", &py_addr, &py_buffer)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - - PyGetInt(py_addr, addr); - - if(!PyBytes_Check(py_buffer)) - RAISE(PyExc_TypeError,"arg must be bytes"); - - size = PyBytes_Size(py_buffer); - PyBytes_AsStringAndSize(py_buffer, &buffer, &py_length); - - ret = vm_write_mem(&(((VmMngr*)self->pyvm)->vm_mngr), addr, buffer, size); - if (ret < 0) - RAISE(PyExc_TypeError,"arg must be str"); - - Py_INCREF(Py_None); - return Py_None; -} - -static PyMemberDef JitCpu_members[] = { - {NULL} /* Sentinel */ -}; - -static PyMethodDef JitCpu_methods[] = { - {"init_regs", (PyCFunction)cpu_init_regs, METH_NOARGS, - "X"}, - {"dump_gpregs", (PyCFunction)cpu_dump_gpregs, METH_NOARGS, - "X"}, - {"dump_gpregs_with_attrib", (PyCFunction)cpu_dump_gpregs_with_attrib, METH_VARARGS, - "X"}, - {"get_gpreg", (PyCFunction)cpu_get_gpreg, METH_NOARGS, - "X"}, - {"set_gpreg", (PyCFunction)cpu_set_gpreg, METH_VARARGS, - "X"}, - {"get_segm_base", (PyCFunction)cpu_get_segm_base, METH_VARARGS, - "X"}, - {"set_segm_base", (PyCFunction)cpu_set_segm_base, METH_VARARGS, - "X"}, - {"get_exception", (PyCFunction)cpu_get_exception, METH_VARARGS, - "X"}, - {"set_exception", (PyCFunction)cpu_set_exception, METH_VARARGS, - "X"}, - {"set_mem", (PyCFunction)vm_set_mem, METH_VARARGS, - "X"}, - {"get_mem", (PyCFunction)vm_get_mem, METH_VARARGS, - "X"}, - {"get_interrupt_num", (PyCFunction)cpu_get_interrupt_num, METH_VARARGS, - "X"}, - {"set_interrupt_num", (PyCFunction)cpu_set_interrupt_num, METH_VARARGS, - "X"}, - {NULL} /* Sentinel */ -}; - -static int -JitCpu_init(JitCpu *self, PyObject *args, PyObject *kwds) -{ - self->cpu = malloc(sizeof(vm_cpu_t)); - if (self->cpu == NULL) { - fprintf(stderr, "cannot alloc vm_cpu_t\n"); - exit(EXIT_FAILURE); - } - return 0; -} - -#define getset_reg_E_u32(regname) \ - static PyObject *JitCpu_get_E ## regname (JitCpu *self, void *closure) \ - { \ - return PyLong_FromUnsignedLongLong((uint32_t)(((vm_cpu_t*)(self->cpu))->R ## regname & 0xFFFFFFFF )); \ - } \ - static int JitCpu_set_E ## regname (JitCpu *self, PyObject *value, void *closure) \ - { \ - uint64_t val; \ - PyGetInt_retneg(value, val); \ - val &= 0xFFFFFFFF; \ - val |= ((vm_cpu_t*)(self->cpu))->R ##regname & 0xFFFFFFFF00000000ULL; \ - ((vm_cpu_t*)(self->cpu))->R ## regname = val; \ - return 0; \ - } - - - -#define getset_reg_R_u16(regname) \ - static PyObject *JitCpu_get_ ## regname (JitCpu *self, void *closure) \ - { \ - return PyLong_FromUnsignedLongLong((uint16_t)(((vm_cpu_t*)(self->cpu))->R ## regname & 0xFFFF )); \ - } \ - static int JitCpu_set_ ## regname (JitCpu *self, PyObject *value, void *closure) \ - { \ - uint64_t val; \ - PyGetInt_retneg(value, val); \ - val &= 0xFFFF; \ - val |= ((vm_cpu_t*)(self->cpu))->R ##regname & 0xFFFFFFFFFFFF0000ULL; \ - ((vm_cpu_t*)(self->cpu))->R ## regname = val; \ - return 0; \ - } - - -getset_reg_u64(RAX); -getset_reg_u64(RBX); -getset_reg_u64(RCX); -getset_reg_u64(RDX); -getset_reg_u64(RSI); -getset_reg_u64(RDI); -getset_reg_u64(RSP); -getset_reg_u64(RBP); - -getset_reg_u64(R8); -getset_reg_u64(R9); -getset_reg_u64(R10); -getset_reg_u64(R11); -getset_reg_u64(R12); -getset_reg_u64(R13); -getset_reg_u64(R14); -getset_reg_u64(R15); - -getset_reg_u64(RIP); - -getset_reg_u64(zf); -getset_reg_u64(nf); -getset_reg_u64(pf); -getset_reg_u64(of); -getset_reg_u64(cf); -getset_reg_u64(af); -getset_reg_u64(df); - - -getset_reg_u16(ES); -getset_reg_u16(CS); -getset_reg_u16(SS); -getset_reg_u16(DS); -getset_reg_u16(FS); -getset_reg_u16(GS); - -getset_reg_E_u32(AX); -getset_reg_E_u32(BX); -getset_reg_E_u32(CX); -getset_reg_E_u32(DX); -getset_reg_E_u32(SI); -getset_reg_E_u32(DI); -getset_reg_E_u32(SP); -getset_reg_E_u32(BP); -getset_reg_E_u32(IP); - -getset_reg_R_u16(AX); -getset_reg_R_u16(BX); -getset_reg_R_u16(CX); -getset_reg_R_u16(DX); -getset_reg_R_u16(SI); -getset_reg_R_u16(DI); -getset_reg_R_u16(SP); -getset_reg_R_u16(BP); - -getset_reg_R_u16(IP); - -getset_reg_u64(MM0); -getset_reg_u64(MM1); -getset_reg_u64(MM2); -getset_reg_u64(MM3); -getset_reg_u64(MM4); -getset_reg_u64(MM5); -getset_reg_u64(MM6); -getset_reg_u64(MM7); - -getset_reg_bn(XMM0, 128); -getset_reg_bn(XMM1, 128); -getset_reg_bn(XMM2, 128); -getset_reg_bn(XMM3, 128); -getset_reg_bn(XMM4, 128); -getset_reg_bn(XMM5, 128); -getset_reg_bn(XMM6, 128); -getset_reg_bn(XMM7, 128); -getset_reg_bn(XMM8, 128); -getset_reg_bn(XMM9, 128); -getset_reg_bn(XMM10, 128); -getset_reg_bn(XMM11, 128); -getset_reg_bn(XMM12, 128); -getset_reg_bn(XMM13, 128); -getset_reg_bn(XMM14, 128); -getset_reg_bn(XMM15, 128); - -getset_reg_u64(tsc); - -getset_reg_u32(exception_flags); -getset_reg_u32(interrupt_num); - - -PyObject* get_gpreg_offset_all(void) -{ - PyObject *dict = PyDict_New(); - PyObject *o; - get_reg_off(exception_flags); - - get_reg_off(RAX); - get_reg_off(RBX); - get_reg_off(RCX); - get_reg_off(RDX); - get_reg_off(RSI); - get_reg_off(RDI); - get_reg_off(RSP); - get_reg_off(RBP); - get_reg_off(R8); - get_reg_off(R9); - get_reg_off(R10); - get_reg_off(R11); - get_reg_off(R12); - get_reg_off(R13); - get_reg_off(R14); - get_reg_off(R15); - get_reg_off(RIP); - get_reg_off(zf); - get_reg_off(nf); - get_reg_off(pf); - get_reg_off(of); - get_reg_off(cf); - get_reg_off(af); - get_reg_off(df); - get_reg_off(tf); - get_reg_off(i_f); - get_reg_off(iopl_f); - get_reg_off(nt); - get_reg_off(rf); - get_reg_off(vm); - get_reg_off(ac); - get_reg_off(vif); - get_reg_off(vip); - get_reg_off(i_d); - get_reg_off(my_tick); - get_reg_off(cond); - - get_reg_off(float_st0); - get_reg_off(float_st1); - get_reg_off(float_st2); - get_reg_off(float_st3); - get_reg_off(float_st4); - get_reg_off(float_st5); - get_reg_off(float_st6); - get_reg_off(float_st7); - - get_reg_off(ES); - get_reg_off(CS); - get_reg_off(SS); - get_reg_off(DS); - get_reg_off(FS); - get_reg_off(GS); - - get_reg_off(MM0); - get_reg_off(MM1); - get_reg_off(MM2); - get_reg_off(MM3); - get_reg_off(MM4); - get_reg_off(MM5); - get_reg_off(MM6); - get_reg_off(MM7); - - get_reg_off(XMM0); - get_reg_off(XMM1); - get_reg_off(XMM2); - get_reg_off(XMM3); - get_reg_off(XMM4); - get_reg_off(XMM5); - get_reg_off(XMM6); - get_reg_off(XMM7); - get_reg_off(XMM8); - get_reg_off(XMM9); - get_reg_off(XMM10); - get_reg_off(XMM11); - get_reg_off(XMM12); - get_reg_off(XMM13); - get_reg_off(XMM14); - get_reg_off(XMM15); - - get_reg_off(tsc); - - get_reg_off(interrupt_num); - get_reg_off(exception_flags); - - get_reg_off(float_stack_ptr); - get_reg_off(reg_float_cs); - get_reg_off(reg_float_eip); - get_reg_off(reg_float_control); - - return dict; -} - - -static PyGetSetDef JitCpu_getseters[] = { - {"vmmngr", - (getter)JitCpu_get_vmmngr, (setter)JitCpu_set_vmmngr, - "vmmngr", - NULL}, - - {"jitter", - (getter)JitCpu_get_jitter, (setter)JitCpu_set_jitter, - "jitter", - NULL}, - - - {"RAX", (getter)JitCpu_get_RAX, (setter)JitCpu_set_RAX, "RAX", NULL}, - {"RBX", (getter)JitCpu_get_RBX, (setter)JitCpu_set_RBX, "RBX", NULL}, - {"RCX", (getter)JitCpu_get_RCX, (setter)JitCpu_set_RCX, "RCX", NULL}, - {"RDX", (getter)JitCpu_get_RDX, (setter)JitCpu_set_RDX, "RDX", NULL}, - {"RSI", (getter)JitCpu_get_RSI, (setter)JitCpu_set_RSI, "RSI", NULL}, - {"RDI", (getter)JitCpu_get_RDI, (setter)JitCpu_set_RDI, "RDI", NULL}, - {"RSP", (getter)JitCpu_get_RSP, (setter)JitCpu_set_RSP, "RSP", NULL}, - {"RBP", (getter)JitCpu_get_RBP, (setter)JitCpu_set_RBP, "RBP", NULL}, - {"R8", (getter)JitCpu_get_R8, (setter)JitCpu_set_R8, "R8", NULL}, - {"R9", (getter)JitCpu_get_R9, (setter)JitCpu_set_R9, "R9", NULL}, - {"R10", (getter)JitCpu_get_R10, (setter)JitCpu_set_R10, "R10", NULL}, - {"R11", (getter)JitCpu_get_R11, (setter)JitCpu_set_R11, "R11", NULL}, - {"R12", (getter)JitCpu_get_R12, (setter)JitCpu_set_R12, "R12", NULL}, - {"R13", (getter)JitCpu_get_R13, (setter)JitCpu_set_R13, "R13", NULL}, - {"R14", (getter)JitCpu_get_R14, (setter)JitCpu_set_R14, "R14", NULL}, - {"R15", (getter)JitCpu_get_R15, (setter)JitCpu_set_R15, "R15", NULL}, - {"RIP", (getter)JitCpu_get_RIP, (setter)JitCpu_set_RIP, "RIP", NULL}, - {"zf", (getter)JitCpu_get_zf, (setter)JitCpu_set_zf, "zf", NULL}, - {"nf", (getter)JitCpu_get_nf, (setter)JitCpu_set_nf, "nf", NULL}, - {"pf", (getter)JitCpu_get_pf, (setter)JitCpu_set_pf, "pf", NULL}, - {"of", (getter)JitCpu_get_of, (setter)JitCpu_set_of, "of", NULL}, - {"cf", (getter)JitCpu_get_cf, (setter)JitCpu_set_cf, "cf", NULL}, - {"af", (getter)JitCpu_get_af, (setter)JitCpu_set_af, "af", NULL}, - {"df", (getter)JitCpu_get_df, (setter)JitCpu_set_df, "df", NULL}, - {"ES", (getter)JitCpu_get_ES, (setter)JitCpu_set_ES, "ES", NULL}, - {"CS", (getter)JitCpu_get_CS, (setter)JitCpu_set_CS, "CS", NULL}, - {"SS", (getter)JitCpu_get_SS, (setter)JitCpu_set_SS, "SS", NULL}, - {"DS", (getter)JitCpu_get_DS, (setter)JitCpu_set_DS, "DS", NULL}, - {"FS", (getter)JitCpu_get_FS, (setter)JitCpu_set_FS, "FS", NULL}, - {"GS", (getter)JitCpu_get_GS, (setter)JitCpu_set_GS, "GS", NULL}, - - {"EAX", (getter)JitCpu_get_EAX, (setter)JitCpu_set_EAX, "EAX", NULL}, - {"EBX", (getter)JitCpu_get_EBX, (setter)JitCpu_set_EBX, "EBX", NULL}, - {"ECX", (getter)JitCpu_get_ECX, (setter)JitCpu_set_ECX, "ECX", NULL}, - {"EDX", (getter)JitCpu_get_EDX, (setter)JitCpu_set_EDX, "EDX", NULL}, - {"ESI", (getter)JitCpu_get_ESI, (setter)JitCpu_set_ESI, "ESI", NULL}, - {"EDI", (getter)JitCpu_get_EDI, (setter)JitCpu_set_EDI, "EDI", NULL}, - {"ESP", (getter)JitCpu_get_ESP, (setter)JitCpu_set_ESP, "ESP", NULL}, - {"EBP", (getter)JitCpu_get_EBP, (setter)JitCpu_set_EBP, "EBP", NULL}, - {"EIP", (getter)JitCpu_get_EIP, (setter)JitCpu_set_EIP, "EIP", NULL}, - - {"AX", (getter)JitCpu_get_AX, (setter)JitCpu_set_AX, "AX", NULL}, - {"BX", (getter)JitCpu_get_BX, (setter)JitCpu_set_BX, "BX", NULL}, - {"CX", (getter)JitCpu_get_CX, (setter)JitCpu_set_CX, "CX", NULL}, - {"DX", (getter)JitCpu_get_DX, (setter)JitCpu_set_DX, "DX", NULL}, - {"SI", (getter)JitCpu_get_SI, (setter)JitCpu_set_SI, "SI", NULL}, - {"DI", (getter)JitCpu_get_DI, (setter)JitCpu_set_DI, "DI", NULL}, - {"SP", (getter)JitCpu_get_SP, (setter)JitCpu_set_SP, "SP", NULL}, - {"BP", (getter)JitCpu_get_BP, (setter)JitCpu_set_BP, "BP", NULL}, - - {"IP", (getter)JitCpu_get_IP, (setter)JitCpu_set_IP, "IP", NULL}, - - {"MM0", (getter)JitCpu_get_MM0, (setter)JitCpu_set_MM0, "MM0", NULL}, - {"MM1", (getter)JitCpu_get_MM1, (setter)JitCpu_set_MM1, "MM1", NULL}, - {"MM2", (getter)JitCpu_get_MM2, (setter)JitCpu_set_MM2, "MM2", NULL}, - {"MM3", (getter)JitCpu_get_MM3, (setter)JitCpu_set_MM3, "MM3", NULL}, - {"MM4", (getter)JitCpu_get_MM4, (setter)JitCpu_set_MM4, "MM4", NULL}, - {"MM5", (getter)JitCpu_get_MM5, (setter)JitCpu_set_MM5, "MM5", NULL}, - {"MM6", (getter)JitCpu_get_MM6, (setter)JitCpu_set_MM6, "MM6", NULL}, - {"MM7", (getter)JitCpu_get_MM7, (setter)JitCpu_set_MM7, "MM7", NULL}, - - {"XMM0", (getter)JitCpu_get_XMM0, (setter)JitCpu_set_XMM0, "XMM0", NULL}, - {"XMM1", (getter)JitCpu_get_XMM1, (setter)JitCpu_set_XMM1, "XMM1", NULL}, - {"XMM2", (getter)JitCpu_get_XMM2, (setter)JitCpu_set_XMM2, "XMM2", NULL}, - {"XMM3", (getter)JitCpu_get_XMM3, (setter)JitCpu_set_XMM3, "XMM3", NULL}, - {"XMM4", (getter)JitCpu_get_XMM4, (setter)JitCpu_set_XMM4, "XMM4", NULL}, - {"XMM5", (getter)JitCpu_get_XMM5, (setter)JitCpu_set_XMM5, "XMM5", NULL}, - {"XMM6", (getter)JitCpu_get_XMM6, (setter)JitCpu_set_XMM6, "XMM6", NULL}, - {"XMM7", (getter)JitCpu_get_XMM7, (setter)JitCpu_set_XMM7, "XMM7", NULL}, - {"XMM8", (getter)JitCpu_get_XMM8, (setter)JitCpu_set_XMM8, "XMM8", NULL}, - {"XMM9", (getter)JitCpu_get_XMM9, (setter)JitCpu_set_XMM9, "XMM9", NULL}, - {"XMM10", (getter)JitCpu_get_XMM10, (setter)JitCpu_set_XMM10, "XMM10", NULL}, - {"XMM11", (getter)JitCpu_get_XMM11, (setter)JitCpu_set_XMM11, "XMM11", NULL}, - {"XMM12", (getter)JitCpu_get_XMM12, (setter)JitCpu_set_XMM12, "XMM12", NULL}, - {"XMM13", (getter)JitCpu_get_XMM13, (setter)JitCpu_set_XMM13, "XMM13", NULL}, - {"XMM14", (getter)JitCpu_get_XMM14, (setter)JitCpu_set_XMM14, "XMM14", NULL}, - {"XMM15", (getter)JitCpu_get_XMM15, (setter)JitCpu_set_XMM15, "XMM15", NULL}, - - {"tsc", (getter)JitCpu_get_tsc, (setter)JitCpu_set_tsc, "tsc", NULL}, - - {"exception_flags", (getter)JitCpu_get_exception_flags, (setter)JitCpu_set_exception_flags, "exception_flags", NULL}, - {"interrupt_num", (getter)JitCpu_get_interrupt_num, (setter)JitCpu_set_interrupt_num, "interrupt_num", NULL}, - - - {NULL} /* Sentinel */ -}; - - -static PyTypeObject JitCpuType = { - PyVarObject_HEAD_INIT(NULL, 0) - "JitCore_x86.JitCpu", /*tp_name*/ - sizeof(JitCpu), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - (destructor)JitCpu_dealloc,/*tp_dealloc*/ - 0, /*tp_print*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - 0, /*tp_compare*/ - 0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash */ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ - "JitCpu objects", /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - JitCpu_methods, /* tp_methods */ - JitCpu_members, /* tp_members */ - JitCpu_getseters, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - (initproc)JitCpu_init, /* tp_init */ - 0, /* tp_alloc */ - JitCpu_new, /* tp_new */ -}; - - - -static PyMethodDef JitCore_x86_Methods[] = { - - /* - - */ - {"get_gpreg_offset_all", (PyCFunction)get_gpreg_offset_all, METH_NOARGS}, - {NULL, NULL, 0, NULL} /* Sentinel */ - -}; - - -MOD_INIT(JitCore_x86) -{ - PyObject *module; - - MOD_DEF(module, "JitCore_x86", "JitCore_x86 module", JitCore_x86_Methods); - - if (module == NULL) - return NULL; - - if (PyType_Ready(&JitCpuType) < 0) - return NULL; - - Py_INCREF(&JitCpuType); - if (PyModule_AddObject(module, "JitCpu", (PyObject *)&JitCpuType) < 0) - return NULL; - - return module; -} diff --git a/miasm2/jitter/arch/JitCore_x86.h b/miasm2/jitter/arch/JitCore_x86.h deleted file mode 100644 index 27d94d7c..00000000 --- a/miasm2/jitter/arch/JitCore_x86.h +++ /dev/null @@ -1,136 +0,0 @@ -#include "../bn.h" - -#if _WIN32 -#define _MIASM_EXPORT __declspec(dllexport) -#else -#define _MIASM_EXPORT -#endif - -typedef struct { - uint32_t exception_flags; - uint32_t interrupt_num; - - - /* gpregs */ - uint64_t RAX; - uint64_t RBX; - uint64_t RCX; - uint64_t RDX; - uint64_t RSI; - uint64_t RDI; - uint64_t RSP; - uint64_t RBP; - uint64_t R8; - uint64_t R9; - uint64_t R10; - uint64_t R11; - uint64_t R12; - uint64_t R13; - uint64_t R14; - uint64_t R15; - - uint64_t RIP; - - /* eflag */ - uint8_t zf; - uint8_t nf; - uint8_t pf; - uint8_t of; - uint8_t cf; - uint8_t af; - uint8_t df; - - uint8_t tf; - uint8_t i_f; - uint8_t iopl_f; - uint8_t nt; - uint8_t rf; - uint8_t vm; - uint8_t ac; - uint8_t vif; - uint8_t vip; - uint8_t i_d; - - bn_t my_tick; - - bn_t cond; - - uint64_t float_st0; - uint64_t float_st1; - uint64_t float_st2; - uint64_t float_st3; - uint64_t float_st4; - uint64_t float_st5; - uint64_t float_st6; - uint64_t float_st7; - - unsigned int float_c0; - unsigned int float_c1; - unsigned int float_c2; - unsigned int float_c3; - - - unsigned int float_stack_ptr; - - unsigned int reg_float_control; - - unsigned int reg_float_eip; - unsigned int reg_float_cs; - unsigned int reg_float_address; - unsigned int reg_float_ds; - - - uint64_t tsc; - - - uint16_t ES; - uint16_t CS; - uint16_t SS; - uint16_t DS; - uint16_t FS; - uint16_t GS; - - unsigned int cr0; - unsigned int cr3; - - uint64_t MM0; - uint64_t MM1; - uint64_t MM2; - uint64_t MM3; - uint64_t MM4; - uint64_t MM5; - uint64_t MM6; - uint64_t MM7; - - /* SSE */ - bn_t XMM0; - bn_t XMM1; - bn_t XMM2; - bn_t XMM3; - bn_t XMM4; - bn_t XMM5; - bn_t XMM6; - bn_t XMM7; - bn_t XMM8; - bn_t XMM9; - bn_t XMM10; - bn_t XMM11; - bn_t XMM12; - bn_t XMM13; - bn_t XMM14; - bn_t XMM15; - - uint32_t segm_base[0x10000]; - -}vm_cpu_t; - -_MIASM_EXPORT void dump_gpregs_32(vm_cpu_t* vmcpu); -_MIASM_EXPORT void dump_gpregs_64(vm_cpu_t* vmcpu); -_MIASM_EXPORT uint64_t segm2addr(JitCpu* jitcpu, uint64_t segm, uint64_t addr); - -_MIASM_EXPORT void MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src); -_MIASM_EXPORT void MEM_WRITE_16(JitCpu* jitcpu, uint64_t addr, uint16_t src); -_MIASM_EXPORT void MEM_WRITE_32(JitCpu* jitcpu, uint64_t addr, uint32_t src); -_MIASM_EXPORT void MEM_WRITE_64(JitCpu* jitcpu, uint64_t addr, uint64_t src); - -#define RETURN_PC return BlockDst; diff --git a/miasm2/jitter/arch/__init__.py b/miasm2/jitter/arch/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/miasm2/jitter/bn.c b/miasm2/jitter/bn.c deleted file mode 100644 index dd4f34ef..00000000 --- a/miasm2/jitter/bn.c +++ /dev/null @@ -1,933 +0,0 @@ -/* - -Big number library - arithmetic on multiple-precision unsigned integers. - -This library is an implementation of arithmetic on arbitrarily large integers. - -The difference between this and other implementations, is that the data structure -has optimal memory utilization (i.e. a 1024 bit integer takes up 128 bytes RAM), -and all memory is allocated statically: no dynamic allocation for better or worse. - -Primary goals are correctness, clarity of code and clean, portable implementation. -Secondary goal is a memory footprint small enough to make it suitable for use in -embedded applications. - - -The current state is correct functionality and adequate performance. -There may well be room for performance-optimizations and improvements. - -Source: https://github.com/kokke/tiny-bignum-c - -Code slightly modified to support ast generation calculus style from Expr. - -*/ - -#include -#include -#include -#include -#include "bn.h" - -/* Functions for shifting number in-place. */ -static bn_t _lshift_one_bit(bn_t a); -static bn_t _rshift_one_bit(bn_t a); -static bn_t _lshift_word(bn_t a, int nwords); -static bn_t _rshift_word(bn_t a, int nwords); - - - - -/* Public / Exported functions. */ -bn_t bignum_init(void) -{ - int i; - bn_t n; - - for (i = 0; i < BN_ARRAY_SIZE; ++i) { - n.array[i] = 0; - } - - return n; -} - - -bn_t bignum_from_int(DTYPE_TMP i) -{ - bn_t n; - - n = bignum_init(); - /* Endianness issue if machine is not little-endian? */ -#ifdef WORD_SIZE - #if (WORD_SIZE == 1) - n.array[0] = (i & 0x000000ff); - n.array[1] = (i & 0x0000ff00) >> 8; - n.array[2] = (i & 0x00ff0000) >> 16; - n.array[3] = (i & 0xff000000) >> 24; - #elif (WORD_SIZE == 2) - n.array[0] = (i & 0x0000ffff); - n.array[1] = (i & 0xffff0000) >> 16; - #elif (WORD_SIZE == 4) - n.array[0] = i; - DTYPE_TMP num_32 = 32; - DTYPE_TMP tmp = i >> num_32; /* bit-shift with U64 operands to force 64-bit results */ - n.array[1] = tmp; - #endif -#endif - - return n; -} - - - -bn_t bignum_from_uint64(uint64_t i) -{ - bn_t n; - n = bignum_init(); - /* Endianness issue if machine is not little-endian? */ -#ifdef WORD_SIZE - #if (WORD_SIZE == 1) - n.array[0] = (i & 0x000000ff); - n.array[1] = (i & 0x0000ff00) >> 8; - n.array[2] = (i & 0x00ff0000) >> 16; - n.array[3] = (i & 0xff000000) >> 24; - #elif (WORD_SIZE == 2) - n.array[0] = (i & 0x0000ffff); - n.array[1] = (i & 0xffff0000) >> 16; - #elif (WORD_SIZE == 4) - n.array[0] = i; - DTYPE_TMP num_32 = 32; - DTYPE_TMP tmp = i >> num_32; /* bit-shift with U64 operands to force 64-bit results */ - n.array[1] = tmp; - #endif -#endif - - return n; -} - - - - - -int bignum_to_int(bn_t n) -{ - - int ret = 0; - - /* Endianness issue if machine is not little-endian? */ -#if (WORD_SIZE == 1) - ret += n.array[0]; - ret += n.array[1] << 8; - ret += n.array[2] << 16; - ret += n.array[3] << 24; -#elif (WORD_SIZE == 2) - ret += n.array[0]; - ret += n.array[1] << 16; -#elif (WORD_SIZE == 4) - ret += n.array[0]; -#endif - - - return ret; -} - - -uint64_t bignum_to_uint64(bn_t n) -{ - - uint64_t ret = 0; - - /* Endianness issue if machine is not little-endian? */ -#if (WORD_SIZE == 1) - ret += (uint64_t)(n.array[0]); - ret += (uint64_t)(n.array[1]) << 8; - ret += (uint64_t)(n.array[2]) << 16; - ret += (uint64_t)(n.array[3]) << 24; - - ret += (uint64_t)(n.array[4]) << 32; - ret += (uint64_t)(n.array[5]) << 40; - ret += (uint64_t)(n.array[6]) << 48; - ret += (uint64_t)(n.array[7]) << 56; - - -#elif (WORD_SIZE == 2) - ret += (uint64_t)(n.array[0]); - ret += (uint64_t)(n.array[1]) << 16; - ret += (uint64_t)(n.array[2]) << 32; - ret += (uint64_t)(n.array[3]) << 48; -#elif (WORD_SIZE == 4) - ret += n.array[0]; - ret += (uint64_t)(n.array[1]) << 32; -#endif - - return ret; -} - - - - -bn_t bignum_from_string(char* str, int nbytes) -{ - - require(str, "str is null"); - require(nbytes > 0, "nbytes must be positive"); - require((nbytes & 1) == 0, "string format must be in hex -> equal number of bytes"); - - bn_t n; - - n = bignum_init(); - - DTYPE tmp; /* DTYPE is defined in bn.h - uint{8,16,32,64}_t */ - int i = nbytes - (2 * WORD_SIZE); /* index into string */ - int j = 0; /* index into array */ - - /* reading last hex-byte "MSB" from string first -> big endian */ - /* MSB ~= most significant byte / block ? :) */ - while (i >= 0) { - tmp = 0; - sscanf(&str[i], SSCANF_FORMAT_STR, &tmp); - n.array[j] = tmp; - i -= (2 * WORD_SIZE); /* step WORD_SIZE hex-byte(s) back in the string. */ - j += 1; /* step one element forward in the array. */ - } - - return n; -} - -void bignum_to_string(bn_t n, char* str, int nbytes) -{ - require(str, "str is null"); - require(nbytes > 0, "nbytes must be positive"); - require((nbytes & 1) == 0, "string format must be in hex -> equal number of bytes"); - - int j = BN_ARRAY_SIZE - 1; /* index into array - reading "MSB" first -> big-endian */ - int i = 0; /* index into string representation. */ - - /* reading last array-element "MSB" first -> big endian */ - while ((j >= 0) && (nbytes > (i + 1))) { - sprintf(&str[i], SPRINTF_FORMAT_STR, n.array[j]); - i += (2 * WORD_SIZE); /* step WORD_SIZE hex-byte(s) forward in the string. */ - j -= 1; /* step one element back in the array. */ - } - - /* Zero-terminate string */ - str[i] = 0; -} - - - -bn_t bignum_dec(bn_t n) -{ - //require(n, "n is null"); - - DTYPE tmp; /* copy of n */ - DTYPE res; - - int i; - for (i = 0; i < BN_ARRAY_SIZE; ++i) { - tmp = n.array[i]; - res = tmp - 1; - n.array[i] = res; - - if (!(res > tmp)) { - break; - } - } - - return n; -} - - -bn_t bignum_inc(bn_t n) -{ - //require(n, "n is null"); - - DTYPE res; - DTYPE_TMP tmp; /* copy of n */ - - int i; - for (i = 0; i < BN_ARRAY_SIZE; ++i) { - tmp = n.array[i]; - res = tmp + 1; - n.array[i] = res; - - if (res > tmp) { - break; - } - } - - return n; -} - - - -bn_t bignum_add(bn_t a, bn_t b) -{ - //require(a, "a is null"); - //require(b, "b is null"); - //require(c, "c is null"); - bn_t c; - - DTYPE_TMP tmp; - int carry = 0; - int i; - for (i = 0; i < BN_ARRAY_SIZE; ++i) { - tmp = (DTYPE_TMP)a.array[i] + b.array[i] + carry; - carry = (tmp > MAX_VAL); - c.array[i] = (tmp & MAX_VAL); - } - - return c; -} - - -bn_t bignum_sub(bn_t a, bn_t b) -{ - //require(a, "a is null"); - //require(b, "b is null"); - //require(c, "c is null"); - bn_t c; - - DTYPE_TMP res; - DTYPE_TMP tmp1; - DTYPE_TMP tmp2; - int borrow = 0; - int i; - for (i = 0; i < BN_ARRAY_SIZE; ++i) { - tmp1 = (DTYPE_TMP)a.array[i] + (MAX_VAL + 1); /* + number_base */ - tmp2 = (DTYPE_TMP)b.array[i] + borrow;; - res = (tmp1 - tmp2); - c.array[i] = (DTYPE)(res & MAX_VAL); /* "modulo number_base" == "% (number_base - 1)" if number_base is 2^N */ - borrow = (res <= MAX_VAL); - } - - return c; -} - - - - -bn_t bignum_mul(bn_t a, bn_t b) -{ - //require(a, "a is null"); - //require(b, "b is null"); - //require(c, "c is null"); - - bn_t c; - bn_t row; - bn_t tmp; - int i, j; - - c = bignum_init(); - - for (i = 0; i < BN_ARRAY_SIZE; ++i) { - row = bignum_init(); - - for (j = 0; j < BN_ARRAY_SIZE; ++j) { - if (i + j < BN_ARRAY_SIZE) { - tmp = bignum_init(); - DTYPE_TMP intermediate = ((DTYPE_TMP)a.array[i] * (DTYPE_TMP)b.array[j]); - tmp = bignum_from_int(intermediate); - tmp = _lshift_word(tmp, i + j); - row = bignum_add(tmp, row); - } - } - c = bignum_add(c, row); - } - - return c; -} - - -bn_t bignum_udiv(bn_t a, bn_t b) -{ - //require(a, "a is null"); - //require(b, "b is null"); - //require(c, "c is null"); - - bn_t c; - bn_t current; - bn_t denom; - bn_t tmp; - - current = bignum_from_int(1); // int current = 1; - denom = bignum_assign(b); // denom = b - tmp = bignum_assign(a); // tmp = a - - const DTYPE_TMP half_max = 1 + (DTYPE_TMP)(MAX_VAL / 2); - bool overflow = false; - - while (bignum_cmp(denom, a) != LARGER) { // while (denom <= a) { - if (denom.array[BN_ARRAY_SIZE - 1] >= half_max) { - overflow = true; - break; - } - current = _lshift_one_bit(current); // current <<= 1; - denom = _lshift_one_bit(denom); // denom <<= 1; - } - if (!overflow) { - denom = _rshift_one_bit(denom); // denom >>= 1; - current = _rshift_one_bit(current); // current >>= 1; - } - c = bignum_init(); // int answer = 0; - - while (!bignum_is_zero(current)) { // while (current != 0) - if (bignum_cmp(tmp, denom) != SMALLER) { // if (dividend >= denom) - tmp = bignum_sub(tmp, denom); // dividend -= denom; - c = bignum_or(c, current); // answer |= current; - } - current = _rshift_one_bit(current); // current >>= 1; - denom = _rshift_one_bit(denom); // denom >>= 1; - } // return answer; - - return c; -} - - - -bn_t bignum_lshift(bn_t a, int nbits) -{ - //require(a, "a is null"); - //require(b, "b is null"); - require(nbits >= 0, "no negative shifts"); - - bn_t b; - - b = bignum_assign(a); - /* Handle shift in multiples of word-size */ - const int nbits_pr_word = (WORD_SIZE * 8); - int nwords = nbits / nbits_pr_word; - if (nwords != 0) { - b = _lshift_word(b, nwords); - nbits -= (nwords * nbits_pr_word); - } - - if (nbits != 0) { - int i; - for (i = (BN_ARRAY_SIZE - 1); i > 0; --i) { - b.array[i] = (b.array[i] << nbits) | (b.array[i - 1] >> ((8 * WORD_SIZE) - nbits)); - } - b.array[i] <<= nbits; - } - - return b; -} - - -bn_t bignum_rshift(bn_t a, int nbits) -{ - //require(a, "a is null"); - //require(b, "b is null"); - require(nbits >= 0, "no negative shifts"); - - bn_t b; - - b = bignum_assign(a); - /* Handle shift in multiples of word-size */ - const int nbits_pr_word = (WORD_SIZE * 8); - int nwords = nbits / nbits_pr_word; - - if (nwords != 0) { - b = _rshift_word(b, nwords); - nbits -= (nwords * nbits_pr_word); - } - if (nbits != 0) { - int i; - for (i = 0; i < (BN_ARRAY_SIZE - 1); ++i) { - b.array[i] = (b.array[i] >> nbits) | (b.array[i + 1] << ((8 * WORD_SIZE) - nbits)); - } - b.array[i] >>= nbits; - } - - return b; -} - - - -bn_t bignum_a_rshift(bn_t a, int size, int nbits) -{ - //require(a, "a is null"); - //require(b, "b is null"); - require(nbits >= 0, "no negative shifts"); - require(size > 0, "no negative shifts"); - - bn_t b; - bn_t tmp, mask; - - b = bignum_rshift(a, nbits); - - /* get sign bit */ - tmp = bignum_rshift(a, size - 1); - tmp = bignum_mask(tmp, 1); - - if (!bignum_is_zero(tmp)) { - /* generate sign propag */ - tmp = bignum_from_int(1); - tmp = bignum_lshift(tmp, size); - tmp = bignum_dec(tmp); - - mask = bignum_from_int(1); - mask = bignum_lshift(mask, size - nbits); - mask = bignum_dec(mask); - - tmp = bignum_xor(tmp, mask); - b = bignum_or(b, tmp); - } - - return b; -} - -bn_t bignum_not(bn_t a) -{ - int i; - bn_t b; - - for (i = 0; i < BN_ARRAY_SIZE; ++i) { - b.array[i] = ~a.array[i]; - } - - return b; -} - - - -bn_t bignum_umod(bn_t a, bn_t b) -{ - /* - Take divmod and throw away div part - */ - //require(a, "a is null"); - //require(b, "b is null"); - //require(c, "c is null"); - - bn_t c, d; - bn_t tmp; - - /* c = (a / b) */ - c = bignum_udiv(a, b); - /* tmp = (c * b) */ - tmp = bignum_mul(c, b); - /* c = a - tmp */ - d = bignum_sub(a, tmp); - return d; -} - - -bn_t bignum_and(bn_t a, bn_t b) -{ - //require(a, "a is null"); - //require(b, "b is null"); - //require(c, "c is null"); - bn_t c; - - int i; - for (i = 0; i < BN_ARRAY_SIZE; ++i) { - c.array[i] = (a.array[i] & b.array[i]); - } - - return c; -} - - -bn_t bignum_or(bn_t a, bn_t b) -{ - //require(a, "a is null"); - //require(b, "b is null"); - //require(c, "c is null"); - bn_t c; - int i; - for (i = 0; i < BN_ARRAY_SIZE; ++i) { - c.array[i] = (a.array[i] | b.array[i]); - } - - return c; -} - - -bn_t bignum_xor(bn_t a, bn_t b) -{ - //require(a, "a is null"); - //require(b, "b is null"); - //require(c, "c is null"); - - bn_t c; - int i; - for (i = 0; i < BN_ARRAY_SIZE; ++i) { - c.array[i] = (a.array[i] ^ b.array[i]); - } - return c; -} - - -int bignum_cmp(bn_t a, bn_t b) -{ - //require(a, "a is null"); - //require(b, "b is null"); - - int i = BN_ARRAY_SIZE; - do { - i -= 1; /* Decrement first, to start with last array element */ - if (a.array[i] > b.array[i]) { - return LARGER; - } - else if (a.array[i] < b.array[i]) { - return SMALLER; - } - } - while (i != 0); - - return EQUAL; -} - - -/* Signed compare bn */ -int bignum_cmp_signed(bn_t a, bn_t b) -{ - int i = BN_ARRAY_SIZE; - do { - i -= 1; /* Decrement first, to start with last array element */ - if ((DTYPE_SIGNED)a.array[i] > (DTYPE_SIGNED)b.array[i]) { - return LARGER; - } - else if ((DTYPE_SIGNED)a.array[i] < (DTYPE_SIGNED)b.array[i]) { - return SMALLER; - } - } - while (i != 0); - - return EQUAL; -} - - -/* Unsigned compare bn */ -int bignum_cmp_unsigned(bn_t a, bn_t b) -{ - return bignum_cmp(a, b); -} - - -/* Return 1 if a == b else 0 */ -int bignum_is_equal(bn_t a, bn_t b) -{ - int ret; - ret = bignum_cmp_unsigned(a, b); - if (ret == EQUAL) - return 1; - else - return 0; -} - - -/* Return 1 if a = 0, "no negative shifts"); - - if (nwords >= BN_ARRAY_SIZE) { - for (i = 0; i < BN_ARRAY_SIZE; ++i) { - a.array[i] = 0; - } - return a; - } - - for (i = 0; i < BN_ARRAY_SIZE - nwords; ++i) { - a.array[i] = a.array[i + nwords]; - } - - for (; i < BN_ARRAY_SIZE; ++i) { - a.array[i] = 0; - } - - return a; -} - - -static bn_t _lshift_word(bn_t a, int nwords) -{ - //require(a, "a is null"); - require(nwords >= 0, "no negative shifts"); - - int i; - - if (nwords >= BN_ARRAY_SIZE) { - for (i = 0; i < BN_ARRAY_SIZE; ++i) { - a.array[i] = 0; - } - return a; - } - - /* Shift whole words */ - for (i = (BN_ARRAY_SIZE - 1); i >= nwords; --i) { - a.array[i] = a.array[i - nwords]; - } - /* Zero pad shifted words. */ - for (; i >= 0; --i) { - a.array[i] = 0; - } - - return a; -} - - -static bn_t _lshift_one_bit(bn_t a) -{ - //require(a, "a is null"); - - int i; - for (i = (BN_ARRAY_SIZE - 1); i > 0; --i) { - a.array[i] = (a.array[i] << 1) | (a.array[i - 1] >> ((8 * WORD_SIZE) - 1)); - } - a.array[0] <<= 1; - - return a; -} - - -static bn_t _rshift_one_bit(bn_t a) -{ - //require(a, "a is null"); - - int i; - for (i = 0; i < (BN_ARRAY_SIZE - 1); ++i) { - a.array[i] = (a.array[i] >> 1) | (a.array[i + 1] << ((8 * WORD_SIZE) - 1)); - } - a.array[BN_ARRAY_SIZE - 1] >>= 1; - - return a; -} - - -bn_t bignum_rol(bn_t a, int size, int nbits) -{ - bn_t c; - - c = bignum_or( - bignum_lshift(a, nbits), - bignum_rshift(a, size - nbits) - ); - c = bignum_mask(c, size); - return c; -} - - -bn_t bignum_ror(bn_t a, int size, int nbits) -{ - bn_t c; - - c = bignum_or( - bignum_rshift(a, nbits), - bignum_lshift(a, size - nbits) - ); - c = bignum_mask(c, size); - return c; -} - - -int bignum_getbit(bn_t a, int pos) -{ - int d_pos, bit_pos; - - require(pos < BN_BIT_SIZE, "size must be below bignum max size"); - - d_pos = pos / (sizeof(DTYPE) * 8); - bit_pos = pos % (sizeof(DTYPE) * 8); - return !!(a.array[d_pos] & (1 << bit_pos)); - -} - - - -/* - * Count leading zeros - count the number of zero starting at the most - * significant bit - * - * Example: - * - cntleadzeros(size=32, src=2): 30 - * - cntleadzeros(size=32, src=0): 32 - */ -int bignum_cntleadzeros(bn_t n, int size) -{ - int i; - - require(size, "size must be greater than 0"); - require(size <= BN_BIT_SIZE, "size must be below bignum max size"); - - for (i = 0; i < size; i++) { - if (bignum_getbit(n, size - i - 1)) - break; - } - - return i; -} - - - -/* - * Count trailing zeros - count the number of zero starting at the least - * significant bit - * - * Example: - * - cnttrailzeros(size=32, src=2): 1 - * - cnttrailzeros(size=32, src=0): 32 - */ -int bignum_cnttrailzeros(bn_t n, int size) -{ - int i; - - require(size, "size must be greater than 0"); - require(size <= BN_BIT_SIZE, "size must be below bignum max size"); - - for (i = 0; i < size; i++) { - if (bignum_getbit(n, i)) - break; - } - - return i; -} - - - - -bn_t bignum_sdiv(bn_t a, bn_t b, int size) -{ - require(size, "size must be greater than 0"); - require(size <= BN_BIT_SIZE, "size must be below bignum max size"); - - int a_sign, b_sign; - bn_t c; - - a_sign = bignum_getbit(a, size - 1); - b_sign = bignum_getbit(b, size - 1); - - if (a_sign) { - /* neg a */ - printf("a neg\n"); - a = bignum_sub(bignum_from_int(0), a); - a = bignum_mask(a, size - 1); - } - - if (b_sign) { - /* neg b */ - printf("b neg\n"); - b = bignum_sub(bignum_from_int(0), b); - b = bignum_mask(b, size - 1); - } - - c = bignum_udiv(a, b); - if (a_sign ^ b_sign) { - c = bignum_sub(bignum_from_int(0), c); - } - - c = bignum_mask(c, size); - return c; -} - - - -bn_t bignum_smod(bn_t a, bn_t b, int size) -{ - require(size, "size must be greater than 0"); - require(size <= BN_BIT_SIZE, "size must be below bignum max size"); - - bn_t c; - - c = bignum_sdiv(a, b, size); - c = bignum_mul(c, b); - c = bignum_sub(a, c); - c = bignum_mask(c, size); - return c; -} diff --git a/miasm2/jitter/bn.h b/miasm2/jitter/bn.h deleted file mode 100644 index 1aa6b432..00000000 --- a/miasm2/jitter/bn.h +++ /dev/null @@ -1,163 +0,0 @@ -#ifndef __BIGNUM_H__ -#define __BIGNUM_H__ - -#if _WIN32 -#define _MIASM_EXPORT __declspec(dllexport) -#else -#define _MIASM_EXPORT -#endif - -/* - -Big number library - arithmetic on multiple-precision unsigned integers. - -This library is an implementation of arithmetic on arbitrarily large integers. - -The difference between this and other implementations, is that the data structure -has optimal memory utilization (i.e. a 1024 bit integer takes up 128 bytes RAM), -and all memory is allocated statically: no dynamic allocation for better or worse. - -Primary goals are correctness, clarity of code and clean, portable implementation. -Secondary goal is a memory footprint small enough to make it suitable for use in -embedded applications. - - -The current state is correct functionality and adequate performance. -There may well be room for performance-optimizations and improvements. - -Source: https://github.com/kokke/tiny-bignum-c - -Code slightly modified to support ast generation calculus style from Expr. - -*/ - -#include -#include - - -/* This macro defines the word size in bytes of the array that constitues the big-number data structure. */ -#ifndef WORD_SIZE - #define WORD_SIZE 4 -#endif - -#define BN_BYTE_SIZE 32 - -#define BN_BIT_SIZE ((BN_BYTE_SIZE) * 8) - -/* Size of big-numbers in bytes */ -//#define BN_ARRAY_SIZE (128 / WORD_SIZE) -#define BN_ARRAY_SIZE (BN_BYTE_SIZE / WORD_SIZE) - - -/* Here comes the compile-time specialization for how large the underlying array size should be. */ -/* The choices are 1, 2 and 4 bytes in size with uint32, uint64 for WORD_SIZE==4, as temporary. */ -#ifndef WORD_SIZE - #error Must define WORD_SIZE to be 1, 2, 4 -#elif (WORD_SIZE == 1) - /* Data type of array in structure */ - #define DTYPE uint8_t - #define DTYPE_SIGNED int8_t - /* bitmask for getting MSB */ - #define DTYPE_MSB ((DTYPE_TMP)(0x80)) - /* Data-type larger than DTYPE, for holding intermediate results of calculations */ - #define DTYPE_TMP uint32_t - /* sprintf format string */ - #define SPRINTF_FORMAT_STR "%.02x" - #define SSCANF_FORMAT_STR "%2hhx" - /* Max value of integer type */ - #define MAX_VAL ((DTYPE_TMP)0xFF) -#elif (WORD_SIZE == 2) - #define DTYPE uint16_t - #define DTYPE_SIGNED int16_t - #define DTYPE_TMP uint32_t - #define DTYPE_MSB ((DTYPE_TMP)(0x8000)) - #define SPRINTF_FORMAT_STR "%.04x" - #define SSCANF_FORMAT_STR "%4hx" - #define MAX_VAL ((DTYPE_TMP)0xFFFF) -#elif (WORD_SIZE == 4) - #define DTYPE uint32_t - #define DTYPE_SIGNED int32_t - #define DTYPE_TMP uint64_t - #define DTYPE_MSB ((DTYPE_TMP)(0x80000000)) - #define SPRINTF_FORMAT_STR "%.08x" - #define SSCANF_FORMAT_STR "%8x" - #define MAX_VAL ((DTYPE_TMP)0xFFFFFFFF) -#endif -#ifndef DTYPE - #error DTYPE must be defined to uint8_t, uint16_t uint32_t or whatever -#endif - - -/* Custom assert macro - easy to disable */ -#define require(p, msg) assert(p && #msg) - - -/* Data-holding structure: array of DTYPEs */ -typedef struct bn -{ - DTYPE array[BN_ARRAY_SIZE]; -} bn_t; - - - -/* Tokens returned by bignum_cmp() for value comparison */ -enum { SMALLER = -1, EQUAL = 0, LARGER = 1 }; - -/* Initialization functions: */ -_MIASM_EXPORT bn_t bignum_init(void); -_MIASM_EXPORT bn_t bignum_from_int(DTYPE_TMP i); -_MIASM_EXPORT bn_t bignum_from_uint64(uint64_t i); -_MIASM_EXPORT int bignum_to_int(bn_t n); -_MIASM_EXPORT uint64_t bignum_to_uint64(bn_t n); -_MIASM_EXPORT bn_t bignum_from_string(char* str, int nbytes); -_MIASM_EXPORT void bignum_to_string(bn_t n, char* str, int maxsize); - - -/* Basic arithmetic operations: */ -_MIASM_EXPORT bn_t bignum_add(bn_t a, bn_t b); /* c = a + b */ -_MIASM_EXPORT bn_t bignum_sub(bn_t a, bn_t b); /* c = a - b */ -_MIASM_EXPORT bn_t bignum_mul(bn_t a, bn_t b); /* c = a * b */ -_MIASM_EXPORT bn_t bignum_udiv(bn_t a, bn_t b); /* c = a / b */ -_MIASM_EXPORT bn_t bignum_umod(bn_t a, bn_t b); /* c = a % b */ -_MIASM_EXPORT bn_t bignum_sdiv(bn_t a, bn_t b, int size); -_MIASM_EXPORT bn_t bignum_smod(bn_t a, bn_t b, int size); -//void bignum_udivmod(struct bn* a, struct bn* b, struct bn* c, struct bn* d); /* c = a/b, d = a%b */ - - - -/* Bitwise operations: */ -_MIASM_EXPORT bn_t bignum_and(bn_t a, bn_t b); /* c = a & b */ -_MIASM_EXPORT bn_t bignum_or(bn_t a, bn_t b); /* c = a | b */ -_MIASM_EXPORT bn_t bignum_xor(bn_t a, bn_t b); /* c = a ^ b */ -_MIASM_EXPORT bn_t bignum_lshift(bn_t a, int nbits); /* b = a << nbits */ -_MIASM_EXPORT bn_t bignum_rshift(bn_t a, int nbits); /* b = a >> nbits */ -_MIASM_EXPORT bn_t bignum_a_rshift(bn_t a, int size, int nbits); /* b = a a>> nbits */ -_MIASM_EXPORT bn_t bignum_not(bn_t a); /* c = ~a */ - -/* Special operators and comparison */ -_MIASM_EXPORT int bignum_cmp(bn_t a, bn_t b); /* Compare: returns LARGER, EQUAL or SMALLER */ -_MIASM_EXPORT int bignum_is_equal(bn_t a, bn_t b); /* Return 1 if a == b else 0 */ -_MIASM_EXPORT int bignum_is_inf_unsigned(bn_t a, bn_t b); /* Return 1 if a 1024 */ -//bn_t bignum_isqrt(bn_t a, bn_t b); /* Integer square root -- e.g. isqrt(5) => 2*/ -_MIASM_EXPORT int bignum_cntleadzeros(bn_t n, int size); -_MIASM_EXPORT int bignum_cnttrailzeros(bn_t n, int size); -_MIASM_EXPORT bn_t bignum_assign(bn_t src); /* Copy src into dst -- dst := src */ -_MIASM_EXPORT bn_t bignum_mask(bn_t src, int bits); /* c = src & ((1<address = %s; - return JIT_RET_EXCEPTION; - } - """ - - CODE_EXCEPTION_AT_INSTR = r""" - if (CPU_exception_flag_at_instr) { - %s = %s; - BlockDst->address = %s; - return JIT_RET_EXCEPTION; - } - """ - - CODE_RETURN_EXCEPTION = r""" - return JIT_RET_EXCEPTION; - """ - - CODE_RETURN_NO_EXCEPTION = r""" - %s: - %s = %s; - BlockDst->address = %s; - return JIT_RET_NO_EXCEPTION; - """ - - CODE_CPU_EXCEPTION_POST_INSTR = r""" - if (CPU_exception_flag) { - %s = DST_value; - BlockDst->address = DST_value; - return JIT_RET_EXCEPTION; - } - """ - - CODE_VM_EXCEPTION_POST_INSTR = r""" - check_memory_breakpoint(&(jitcpu->pyvm->vm_mngr)); - check_invalid_code_blocs(&(jitcpu->pyvm->vm_mngr)); - if (VM_exception_flag) { - %s = DST_value; - BlockDst->address = DST_value; - return JIT_RET_EXCEPTION; - } - """ - - CODE_INIT = r""" - int DST_case; - uint64_t DST_value; - vm_cpu_t* mycpu = (vm_cpu_t*)jitcpu->cpu; - - goto %s; - """ - - CODE_BAD_BLOCK = r""" - // Unknown mnemonic - CPU_exception_flag = EXCEPT_UNK_MNEMO; - """ + CODE_RETURN_EXCEPTION - - def __init__(self, ir_arch): - self.ir_arch = ir_arch - self.PC = self.ir_arch.pc - self.translator = TranslatorC(self.ir_arch.loc_db) - self.init_arch_C() - - def init_arch_C(self): - """Iinitialize jitter internals""" - self.id_to_c_id = {} - for reg in self.ir_arch.arch.regs.all_regs_ids: - self.id_to_c_id[reg] = ExprId('mycpu->%s' % reg, reg.size) - - self.C_PC = self.id_to_c(self.PC) - - def dst_to_c(self, src): - """Translate Expr @src into C code""" - if not isinstance(src, Expr): - src = ExprInt(src, self.PC.size) - return self.id_to_c(src) - - def patch_c_id(self, expr): - """Replace ExprId in @expr with corresponding C variables""" - return expr.replace_expr(self.id_to_c_id) - - def id_to_c(self, expr): - """Translate Expr @expr into corresponding C code""" - return self.translator.from_expr(self.patch_c_id(expr)) - - def add_label_index(self, dst2index, loc_key): - """Insert @lbl to the dictionary @dst2index with a uniq value - @dst2index: LocKey -> uniq value - @loc_key: LocKey instance""" - - if loc_key not in dst2index: - dst2index[loc_key] = len(dst2index) - - def assignblk_to_irbloc(self, instr, assignblk): - """ - Ensure IRDst is always set in the head @assignblk of the @instr - @instr: an instruction instance - @assignblk: Assignblk instance - """ - new_assignblk = dict(assignblk) - if self.ir_arch.IRDst not in assignblk: - offset = instr.offset + instr.l - loc_key = self.ir_arch.loc_db.get_or_create_offset_location(offset) - dst = ExprLoc(loc_key, self.ir_arch.IRDst.size) - new_assignblk[self.ir_arch.IRDst] = dst - irs = [AssignBlock(new_assignblk, instr)] - return IRBlock(self.ir_arch.get_loc_key_for_instr(instr), irs) - - def block2assignblks(self, block): - """ - Return the list of irblocks for a native @block - @block: AsmBlock - """ - irblocks_list = [] - for instr in block.lines: - assignblk_head, assignblks_extra = self.ir_arch.instr2ir(instr) - # Keep result in ordered list as first element is the assignblk head - # The remainings order is not really important - irblock_head = self.assignblk_to_irbloc(instr, assignblk_head) - irblocks = [irblock_head] + assignblks_extra - - # Simplify high level operators - out = [] - for irblock in irblocks: - new_irblock = self.ir_arch.irbloc_fix_regs_for_mode(irblock, self.ir_arch.attrib) - new_irblock = new_irblock.simplify(expr_simp_high_to_explicit)[1] - out.append(new_irblock) - irblocks = out - - for irblock in irblocks: - assert irblock.dst is not None - irblocks_list.append(irblocks) - - return irblocks_list - - def add_local_var(self, dst_var, dst_index, expr): - """ - Add local variable used to store temporay result - @dst_var: dictionary of Expr -> local_var_expr - @dst_index : dictionary of size -> local var count - @expr: Expression source - """ - size = expr.size - if size < 8: - size = 8 - if size not in dst_index: - raise RuntimeError("Unsupported operand size %s", size) - var_num = dst_index[size] - dst = ExprId("var_%.2d_%.2d" % (size, var_num), size) - dst_index[size] += 1 - dst_var[expr] = dst - return dst - - def get_mem_prefetch(self, assignblk): - """ - Generate temporary variables used to fetch memory used in the @assignblk - Return a dictionary: ExprMem -> temporary variable - @assignblk: AssignBlock instance - """ - mem_index = {8: 0, 16: 0, 32: 0, 64: 0, 128:0} - mem_var = {} - - # Prefetch memory read - for expr in assignblk.get_r(mem_read=True): - if not isinstance(expr, ExprMem): - continue - var_num = mem_index[expr.size] - mem_index[expr.size] += 1 - var = ExprId( - "prefetch_%.2d_%.2d" % (expr.size, var_num), expr.size - ) - mem_var[expr] = var - - # Generate memory prefetch - return mem_var - - def gen_c_assignments(self, assignblk): - """ - Return C information used to generate the C code of the @assignblk - @assignblk: an AssignBlock instance - """ - c_var = [] - c_main = [] - c_mem = [] - c_updt = [] - c_prefetch = [] - - dst_index = {8: 0, 16: 0, 32: 0, 64: 0, 128:0} - dst_var = {} - - prefetchers = self.get_mem_prefetch(assignblk) - - for expr, prefetcher in viewitems(prefetchers): - str_src = self.id_to_c(expr) - str_dst = self.id_to_c(prefetcher) - c_prefetch.append('%s = %s;' % (str_dst, str_src)) - - for var in viewvalues(prefetchers): - if var.size <= self.translator.NATIVE_INT_MAX_SIZE: - c_var.append("uint%d_t %s;" % (var.size, var)) - else: - c_var.append("bn_t %s; // %d" % (var, var.size)) - - for dst, src in viewitems(assignblk): - src = src.replace_expr(prefetchers) - if dst == self.ir_arch.IRDst: - pass - elif isinstance(dst, ExprId): - new_dst = self.add_local_var(dst_var, dst_index, dst) - if dst in self.ir_arch.arch.regs.regs_flt_expr: - # Don't mask float assignment - c_main.append( - '%s = (%s);' % (self.id_to_c(new_dst), self.id_to_c(src))) - elif new_dst.size <= self.translator.NATIVE_INT_MAX_SIZE: - c_main.append( - '%s = (%s)&%s;' % (self.id_to_c(new_dst), - self.id_to_c(src), - SIZE_TO_MASK[src.size])) - else: - c_main.append( - '%s = bignum_mask(%s, %d);' % ( - self.id_to_c(new_dst), - self.id_to_c(src), - src.size - ) - ) - elif isinstance(dst, ExprMem): - ptr = dst.ptr.replace_expr(prefetchers) - if ptr.size <= self.translator.NATIVE_INT_MAX_SIZE: - new_dst = ExprMem(ptr, dst.size) - str_dst = self.id_to_c(new_dst).replace('MEM_LOOKUP', 'MEM_WRITE') - c_mem.append('%s, %s);' % (str_dst[:-1], self.id_to_c(src))) - else: - ptr_str = self.id_to_c(ptr) - if ptr.size <= self.translator.NATIVE_INT_MAX_SIZE: - c_mem.append('%s, %s);' % (str_dst[:-1], self.id_to_c(src))) - else: - if src.size <= self.translator.NATIVE_INT_MAX_SIZE: - c_mem.append('MEM_WRITE_BN_INT(jitcpu, %d, %s, %s);' % ( - src.size, ptr_str, self.id_to_c(src)) - ) - else: - c_mem.append('MEM_WRITE_BN_BN(jitcpu, %d, %s, %s);' % ( - src.size, ptr_str, self.id_to_c(src)) - ) - else: - raise ValueError("Unknown dst") - - for dst, new_dst in viewitems(dst_var): - if dst == self.ir_arch.IRDst: - continue - - c_updt.append('%s = %s;' % (self.id_to_c(dst), self.id_to_c(new_dst))) - if dst.size <= self.translator.NATIVE_INT_MAX_SIZE: - c_var.append("uint%d_t %s;" % (new_dst.size, new_dst)) - else: - c_var.append("bn_t %s; // %d" % (new_dst, new_dst.size)) - - return c_prefetch, c_var, c_main, c_mem, c_updt - - def gen_check_memory_exception(self, address): - """Generate C code to check memory exceptions - @address: address of the faulty instruction""" - dst = self.dst_to_c(address) - return (self.CODE_EXCEPTION_MEM_AT_INSTR % (self.C_PC, dst, dst)).split('\n') - - def gen_check_cpu_exception(self, address): - """Generate C code to check cpu exceptions - @address: address of the faulty instruction""" - dst = self.dst_to_c(address) - return (self.CODE_EXCEPTION_AT_INSTR % (self.C_PC, dst, dst)).split('\n') - - def traverse_expr_dst(self, expr, dst2index): - """ - Generate the index of the destination label for the @expr - @dst2index: dictionary to link label to its index - """ - - if isinstance(expr, ExprCond): - src1, src1b = self.traverse_expr_dst(expr.src1, dst2index) - src2, src2b = self.traverse_expr_dst(expr.src2, dst2index) - cond = self.id_to_c(expr.cond) - if not expr.cond.size <= self.translator.NATIVE_INT_MAX_SIZE: - cond = "(!bignum_is_zero(%s))" % cond - - return ("((%s)?(%s):(%s))" % (cond, src1, src2), - "((%s)?(%s):(%s))" % (cond, src1b, src2b)) - if isinstance(expr, ExprInt): - offset = int(expr) - loc_key = self.ir_arch.loc_db.get_or_create_offset_location(offset) - self.add_label_index(dst2index, loc_key) - out = hex(offset) - return ("%s" % dst2index[loc_key], out) - if expr.is_loc(): - loc_key = expr.loc_key - offset = self.ir_arch.loc_db.get_location_offset(expr.loc_key) - if offset is not None: - self.add_label_index(dst2index, loc_key) - out = hex(offset) - return ("%s" % dst2index[loc_key], out) - self.add_label_index(dst2index, loc_key) - out = hex(0) - return ("%s" % dst2index[loc_key], out) - dst2index[expr] = -1 - return ("-1", self.id_to_c(expr)) - - def gen_assignblk_dst(self, dst): - """Generate C code to handle instruction destination - @dst: instruction destination Expr""" - dst2index = {} - (ret, retb) = self.traverse_expr_dst(dst, dst2index) - ret = "DST_case = %s;" % ret - retb = 'DST_value = %s;' % retb - return ['// %s' % dst2index, - '%s' % ret, - '%s' % retb], dst2index - - def gen_post_instr_checks(self, attrib): - """Generate C code for handling potential exceptions - @attrib: Attributes instance""" - out = [] - if attrib.mem_read | attrib.mem_write: - out += (self.CODE_VM_EXCEPTION_POST_INSTR % (self.C_PC)).split('\n') - if attrib.set_exception: - out += (self.CODE_CPU_EXCEPTION_POST_INSTR % (self.C_PC)).split('\n') - - if attrib.mem_read | attrib.mem_write: - out.append("reset_memory_access(&(jitcpu->pyvm->vm_mngr));") - - return out - - def gen_pre_code(self, instr_attrib): - """Callback to generate code BEFORE the instruction execution - @instr_attrib: Attributes instance""" - - out = [] - - if instr_attrib.log_mn: - out.append( - 'printf("%.8X %s\\n");' % ( - instr_attrib.instr.offset, - instr_attrib.instr.to_string(self.ir_arch.loc_db) - ) - ) - return out - - def gen_post_code(self, attrib, pc_value): - """Callback to generate code AFTER the instruction execution - @attrib: Attributes instance""" - out = [] - if attrib.log_regs: - # Update PC for dump_gpregs - out.append("%s = %s;" % (self.C_PC, pc_value)) - out.append('dump_gpregs(jitcpu->cpu);') - return out - - def gen_goto_code(self, attrib, instr_offsets, dst): - """Generate C code for a potential destination @dst - @attrib: instruction Attributes - @instr_offsets: instructions offsets list - @dst: potential instruction destination""" - - out = [] - if isinstance(dst, Expr): - out += self.gen_post_code(attrib, "DST_value") - out.append('BlockDst->address = DST_value;') - out += self.gen_post_instr_checks(attrib) - out.append('\t\treturn JIT_RET_NO_EXCEPTION;') - return out - - assert isinstance(dst, LocKey) - offset = self.ir_arch.loc_db.get_location_offset(dst) - if offset is None: - # Generate goto for local labels - return ['goto %s;' % dst] - if (offset > attrib.instr.offset and - offset in instr_offsets): - # Only generate goto for next instructions. - # (consecutive instructions) - out += self.gen_post_code(attrib, "0x%x" % offset) - out += self.gen_post_instr_checks(attrib) - out.append('goto %s;' % dst) - else: - out += self.gen_post_code(attrib, "0x%x" % offset) - out.append('BlockDst->address = DST_value;') - out += self.gen_post_instr_checks(attrib) - out.append('\t\treturn JIT_RET_NO_EXCEPTION;') - return out - - def gen_dst_goto(self, attrib, instr_offsets, dst2index): - """ - Generate code for possible @dst2index. - - @attrib: an Attributes instance - @instr_offsets: list of instructions offsets - @dst2index: link from destination to index - """ - - if not dst2index: - return [] - out = [] - out.append('switch(DST_case) {') - - stopcase = False - for dst, index in sorted(viewitems(dst2index), key=lambda lblindex: lblindex[1]): - if index == -1: - # Handle '-1' case only once - if not stopcase: - stopcase = True - else: - continue - - out.append('\tcase %d:' % index) - - out += self.gen_goto_code(attrib, instr_offsets, dst) - out.append('\t\tbreak;') - out.append('};') - return out - - def gen_c_code(self, attrib, c_dst, c_assignmnts): - """ - Generate the C code for assignblk. - @attrib: Attributes instance - @c_dst: irdst C code - """ - - c_prefetch, c_var, c_main, c_mem, c_updt = c_assignmnts - out = [] - out.append("{") - out.append("// var") - out += c_var - out.append("// Prefetch") - out += c_prefetch - out.append("// Dst") - out += c_dst - out.append("// Main") - out += c_main - - out.append("// Check op/mem exceptions") - - # Check memory access if assignblk has memory read - if c_prefetch: - out += self.gen_check_memory_exception(attrib.instr.offset) - - out.append("// Mem updt") - out += c_mem - - out.append("// Check exception Mem write") - # Check memory write exceptions - if attrib.mem_write: - out += self.gen_check_memory_exception(attrib.instr.offset) - - out.append("// Updt") - out += c_updt - - out.append("// Checks exception") - - # Check post assignblk exception flags - if attrib.set_exception: - out += self.gen_check_cpu_exception(attrib.instr.offset) - - out.append("}") - - return out - - def get_caracteristics(self, assignblk, attrib): - """ - Set the carateristics in @attrib according to the @assignblk - @assignblk: an AssignBlock instance - @attrib: an Attributes instance - """ - - # Check explicit exception raising - attrib.set_exception = self.ir_arch.arch.regs.exception_flags in assignblk - - element_read = assignblk.get_r(mem_read=True) - # Check mem read - attrib.mem_read = any(isinstance(expr, ExprMem) - for expr in element_read) - # Check mem write - attrib.mem_write = any(isinstance(dst, ExprMem) - for dst in assignblk) - - def get_attributes(self, instr, irblocks, log_mn=False, log_regs=False): - """ - Get the carateristics of each @irblocks. Returns the corresponding - attributes object. - @irblock: a list of irbloc instance - @log_mn: generate code to log instructions - @log_regs: generate code to log registers states - """ - - instr_attrib = Attributes(log_mn, log_regs) - instr_attrib.instr = instr - irblocks_attributes = [] - - for irblock in irblocks: - attributes = [] - irblocks_attributes.append(attributes) - for assignblk in irblock: - attrib = Attributes(log_mn, log_regs) - attributes.append(attrib) - self.get_caracteristics(assignblk, attrib) - attrib.instr = instr - instr_attrib.mem_read |= attrib.mem_read - instr_attrib.mem_write |= attrib.mem_write - instr_attrib.set_exception |= attrib.set_exception - - return instr_attrib, irblocks_attributes - - def gen_bad_block(self): - """ - Generate the C code for a bad_block instance - """ - return self.CODE_BAD_BLOCK.split("\n") - - def get_block_post_label(self, block): - """Get label next to the @block - @block: AsmBlock instance""" - - last_instr = block.lines[-1] - offset = last_instr.offset + last_instr.l - return self.ir_arch.loc_db.get_or_create_offset_location(offset) - - def gen_init(self, block): - """ - Generate the init C code for a @block - @block: an asm_bloc instance - """ - - instr_offsets = [line.offset for line in block.lines] - post_label = self.get_block_post_label(block) - post_offset = self.ir_arch.loc_db.get_location_offset(post_label) - instr_offsets.append(post_offset) - lbl_start = block.loc_key - return (self.CODE_INIT % lbl_start).split("\n"), instr_offsets - - def gen_irblock(self, instr_attrib, attributes, instr_offsets, irblock): - """ - Generate the C code for an @irblock - @irblock: an irbloc instance - @attributes: an Attributes instance list - """ - - out = [] - dst2index = None - for index, assignblk in enumerate(irblock): - if index == irblock.dst_linenb: - c_dst, dst2index = self.gen_assignblk_dst(irblock.dst) - else: - c_dst = [] - - c_assignmnts = self.gen_c_assignments(assignblk) - out += self.gen_c_code(attributes[index], c_dst, c_assignmnts) - - if dst2index: - out.append("// Set irdst") - # Gen goto on irdst set - out += self.gen_dst_goto(instr_attrib, instr_offsets, dst2index) - - return out - - def gen_finalize(self, block): - """ - Generate the C code for the final block instruction - """ - - loc_key = self.get_block_post_label(block) - offset = self.ir_arch.loc_db.get_location_offset(loc_key) - dst = self.dst_to_c(offset) - code = self.CODE_RETURN_NO_EXCEPTION % (loc_key, self.C_PC, dst, dst) - return code.split('\n') - - def gen_c(self, block, log_mn=False, log_regs=False): - """ - Generate the C code for the @block and return it as a list of lines - @log_mn: log mnemonics - @log_regs: log registers - """ - - if isinstance(block, AsmBlockBad): - return self.gen_bad_block() - irblocks_list = self.block2assignblks(block) - out, instr_offsets = self.gen_init(block) - assert len(block.lines) == len(irblocks_list) - for instr, irblocks in zip(block.lines, irblocks_list): - instr_attrib, irblocks_attributes = self.get_attributes(instr, irblocks, log_mn, log_regs) - for index, irblock in enumerate(irblocks): - label = str(irblock.loc_key) - out.append("%-40s // %.16X %s" % - (label + ":", instr.offset, instr)) - if index == 0: - out += self.gen_pre_code(instr_attrib) - out += self.gen_irblock(instr_attrib, irblocks_attributes[index], instr_offsets, irblock) - - out += self.gen_finalize(block) - - return ['\t' + line for line in out] diff --git a/miasm2/jitter/compat_py23.h b/miasm2/jitter/compat_py23.h deleted file mode 100644 index bc66d80b..00000000 --- a/miasm2/jitter/compat_py23.h +++ /dev/null @@ -1,87 +0,0 @@ -#ifndef __COMPAT_PY23_H__ -#define __COMPAT_PY23_H__ - - - -#if PY_MAJOR_VERSION >= 3 -#define PyGetInt(item, value) \ - if (PyLong_Check(item)){ \ - value = (uint64_t)PyLong_AsUnsignedLongLong(item); \ - } \ - else{ \ - RAISE(PyExc_TypeError,"arg must be int"); \ - } - - -#define PyGetInt_retneg(item, value) \ - if (PyLong_Check(item)){ \ - value = (uint64_t)PyLong_AsUnsignedLongLong(item); \ - } \ - else{ \ - PyErr_SetString(PyExc_TypeError, "Arg must be int"); \ - return -1; \ - } - -#define PyGetStr(dest, name) \ - if (!PyUnicode_Check((name))) \ - RAISE(PyExc_TypeError,"Page name must be bytes"); \ - (dest) = PyUnicode_AsUTF8((name)) - - - -#else -#define PyGetInt(item, value) \ - if (PyInt_Check(item)){ \ - value = (uint64_t)PyInt_AsLong(item); \ - } \ - else if (PyLong_Check(item)){ \ - value = (uint64_t)PyLong_AsUnsignedLongLong(item); \ - } \ - else{ \ - RAISE(PyExc_TypeError,"arg must be int"); \ - } - - -#define PyGetInt_retneg(item, value) \ - if (PyInt_Check(item)){ \ - value = (uint64_t)PyLong_AsLong(item); \ - } \ - else if (PyLong_Check(item)){ \ - value = (uint64_t)PyLong_AsUnsignedLongLong(item); \ - } \ - else{ \ - PyErr_SetString(PyExc_TypeError, "Arg must be int"); \ - return -1; \ - } \ - - -#define PyGetStr(dest, name) \ - if (!PyString_Check((name))) \ - RAISE(PyExc_TypeError,"Page name must be bytes"); \ - (dest) = PyString_AsString((name)) - -#endif - - - -#if PY_MAJOR_VERSION >= 3 - -#define MOD_INIT(name) PyMODINIT_FUNC PyInit_##name(void) - -#define MOD_DEF(ob, name, doc, methods) \ - static struct PyModuleDef moduledef = { \ - PyModuleDef_HEAD_INIT, name, doc, -1, methods, }; \ - ob = PyModule_Create(&moduledef); -#else - -#define MOD_INIT(name) PyMODINIT_FUNC init##name(void) - -#define MOD_DEF(ob, name, doc, methods) \ - ob = Py_InitModule3(name, methods, doc); -#endif - - - - - -#endif diff --git a/miasm2/jitter/csts.py b/miasm2/jitter/csts.py deleted file mode 100644 index 9c9919fc..00000000 --- a/miasm2/jitter/csts.py +++ /dev/null @@ -1,30 +0,0 @@ -#-*- coding:utf-8 -*- - - -# VM Mngr Exceptions -EXCEPT_DO_NOT_UPDATE_PC = 1 << 25 -EXCEPT_NUM_UPDT_EIP = (1<<11) - -EXCEPT_CODE_AUTOMOD = (1 << 0) -EXCEPT_SOFT_BP = (1 << 1) -EXCEPT_INT_XX = (1 << 2) -EXCEPT_SPR_ACCESS = (1 << 3) -EXCEPT_BREAKPOINT_MEMORY = (1 << 10) -# Deprecated -EXCEPT_BREAKPOINT_INTERN = EXCEPT_BREAKPOINT_MEMORY - -EXCEPT_ACCESS_VIOL = ((1 << 14) | EXCEPT_DO_NOT_UPDATE_PC) -EXCEPT_DIV_BY_ZERO = ((1 << 16) | EXCEPT_DO_NOT_UPDATE_PC) -EXCEPT_PRIV_INSN = ((1 << 17) | EXCEPT_DO_NOT_UPDATE_PC) -EXCEPT_ILLEGAL_INSN = ((1 << 18) | EXCEPT_DO_NOT_UPDATE_PC) -EXCEPT_UNK_MNEMO = ((1 << 19) | EXCEPT_DO_NOT_UPDATE_PC) - -# VM Mngr constants - -PAGE_READ = 1 -PAGE_WRITE = 2 -PAGE_EXEC = 4 - -BREAKPOINT_READ = 1 -BREAKPOINT_WRITE = 2 - diff --git a/miasm2/jitter/emulatedsymbexec.py b/miasm2/jitter/emulatedsymbexec.py deleted file mode 100644 index 3ccce522..00000000 --- a/miasm2/jitter/emulatedsymbexec.py +++ /dev/null @@ -1,140 +0,0 @@ -from miasm2.core.utils import decode_hex, encode_hex -import miasm2.expression.expression as m2_expr -from miasm2.ir.symbexec import SymbolicExecutionEngine - - -class EmulatedSymbExec(SymbolicExecutionEngine): - """Symbolic exec instance linked with a jitter""" - - x86_cpuid = { - 0: { - 0: 0xa, - 1: 0x756E6547, - 2: 0x6C65746E, - 3: 0x49656E69, - }, - 1: { - 0: 0x00020652, - 1: 0x00000800, - 2: 0x00000209, - 3: 0x078bf9ff - }, - } - - def __init__(self, cpu, vm, *args, **kwargs): - """Instantiate an EmulatedSymbExec, associated to CPU @cpu and bind - memory accesses. - @cpu: JitCpu instance - """ - super(EmulatedSymbExec, self).__init__(*args, **kwargs) - self.cpu = cpu - self.vm = vm - - def reset_regs(self): - """Set registers value to 0. Ignore register aliases""" - for reg in self.ir_arch.arch.regs.all_regs_ids_no_alias: - self.symbols.symbols_id[reg] = m2_expr.ExprInt(0, size=reg.size) - - # Memory management - def mem_read(self, expr_mem): - """Memory read wrapper for symbolic execution - @expr_mem: ExprMem""" - - addr = expr_mem.ptr - if not addr.is_int(): - return super(EmulatedSymbExec, self).mem_read(expr_mem) - addr = int(addr) - size = expr_mem.size // 8 - value = self.cpu.get_mem(addr, size) - if self.vm.is_little_endian(): - value = value[::-1] - self.vm.add_mem_read(addr, size) - - return m2_expr.ExprInt( - int(encode_hex(value), 16), - expr_mem.size - ) - - def mem_write(self, dest, data): - """Memory read wrapper for symbolic execution - @dest: ExprMem instance - @data: Expr instance""" - - # Get the content to write - data = self.expr_simp(data) - if not isinstance(data, m2_expr.ExprInt): - raise RuntimeError("A simplification is missing: %s" % data) - to_write = data.arg.arg - - # Format information - addr = dest.ptr.arg.arg - size = data.size // 8 - content = hex(to_write).replace("0x", "").replace("L", "") - content = "0" * (size * 2 - len(content)) + content - content = decode_hex(content) - - if self.vm.is_little_endian(): - content = content[::-1] - - # Write in VmMngr context - self.cpu.set_mem(addr, content) - self.vm.add_mem_write(addr, len(content)) - - # Interaction symbexec <-> jitter - def update_cpu_from_engine(self): - """Updates @cpu instance according to new CPU values""" - - for symbol in self.symbols: - if isinstance(symbol, m2_expr.ExprId): - if hasattr(self.cpu, symbol.name): - value = self.symbols.symbols_id[symbol] - if not isinstance(value, m2_expr.ExprInt): - raise ValueError("A simplification is missing: %s" % value) - - setattr(self.cpu, symbol.name, value.arg.arg) - else: - raise NotImplementedError("Type not handled: %s" % symbol) - - - def update_engine_from_cpu(self): - """Updates CPU values according to @cpu instance""" - - for symbol in self.symbols: - if isinstance(symbol, m2_expr.ExprId): - if hasattr(self.cpu, symbol.name): - value = m2_expr.ExprInt(getattr(self.cpu, symbol.name), - symbol.size) - self.symbols.symbols_id[symbol] = value - else: - raise NotImplementedError("Type not handled: %s" % symbol) - - # CPU specific simplifications - def _simp_handle_segm(self, e_s, expr): - """Handle 'segm' operation""" - if not expr.is_op_segm(): - return expr - if not expr.args[0].is_int(): - return expr - segm_nb = int(expr.args[0]) - segmaddr = self.cpu.get_segm_base(segm_nb) - return e_s(m2_expr.ExprInt(segmaddr, expr.size) + expr.args[1]) - - def _simp_handle_x86_cpuid(self, e_s, expr): - """From miasm2/jitter/op_semantics.h: x86_cpuid""" - if expr.op != "x86_cpuid": - return expr - - if any(not arg.is_int() for arg in expr.args): - return expr - a, reg_num = (int(arg) for arg in expr.args) - - # Not found error is keeped on purpose - return m2_expr.ExprInt(self.x86_cpuid[a][reg_num], expr.size) - - def enable_emulated_simplifications(self): - """Enable simplifications needing a CPU instance on associated - ExpressionSimplifier - """ - self.expr_simp.enable_passes({ - m2_expr.ExprOp: [self._simp_handle_segm, self._simp_handle_x86_cpuid], - }) diff --git a/miasm2/jitter/jitcore.py b/miasm2/jitter/jitcore.py deleted file mode 100644 index 33efdfd9..00000000 --- a/miasm2/jitter/jitcore.py +++ /dev/null @@ -1,309 +0,0 @@ -from __future__ import print_function -# -# Copyright (C) 2011 EADS France, Fabrice Desclaux -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# -from hashlib import md5 -import warnings - -from future.utils import viewvalues - -from miasm2.core.asmblock import disasmEngine, AsmBlockBad -from miasm2.core.interval import interval -from miasm2.core.utils import BoundedDict -from miasm2.expression.expression import LocKey -from miasm2.jitter.csts import * - -class JitCore(object): - - "JiT management. This is an abstract class" - - # Jitted function's name - FUNCNAME = "block_entry" - - jitted_block_delete_cb = None - jitted_block_max_size = 10000 - - def __init__(self, ir_arch, bin_stream): - """Initialise a JitCore instance. - @ir_arch: ir instance for current architecture - @bin_stream: bin_stream instance - """ - # Arch related - self.ir_arch = ir_arch - self.ircfg = self.ir_arch.new_ircfg() - self.arch_name = "%s%s" % (self.ir_arch.arch.name, self.ir_arch.attrib) - - # Structures for block tracking - self.offset_to_jitted_func = BoundedDict(self.jitted_block_max_size, - delete_cb=self.jitted_block_delete_cb) - self.loc_key_to_block = {} - self.blocks_mem_interval = interval() - - # Logging & options - self.log_mn = False - self.log_regs = False - self.log_newbloc = False - self.options = {"jit_maxline": 50, # Maximum number of line jitted - "max_exec_per_call": 0 # 0 means no limit - } - - # Disassembly Engine - self.split_dis = set() - self.mdis = disasmEngine( - ir_arch.arch, ir_arch.attrib, bin_stream, - lines_wd=self.options["jit_maxline"], - loc_db=ir_arch.loc_db, - follow_call=False, - dontdis_retcall=False, - split_dis=self.split_dis, - ) - - - def set_options(self, **kwargs): - "Set options relative to the backend" - self.options.update(kwargs) - - def clear_jitted_blocks(self): - "Reset all jitted blocks" - self.offset_to_jitted_func.clear() - self.loc_key_to_block.clear() - self.blocks_mem_interval = interval() - - def add_disassembly_splits(self, *args): - """The disassembly engine will stop on address in args if they - are not at the block beginning""" - self.split_dis.update(set(args)) - - def remove_disassembly_splits(self, *args): - """The disassembly engine will no longer stop on address in args""" - self.split_dis.difference_update(set(args)) - - def load(self): - "Initialise the Jitter" - raise NotImplementedError("Abstract class") - - def set_block_min_max(self, cur_block): - "Update cur_block to set min/max address" - - if cur_block.lines: - cur_block.ad_min = cur_block.lines[0].offset - cur_block.ad_max = cur_block.lines[-1].offset + cur_block.lines[-1].l - else: - # 1 byte block for unknown mnemonic - offset = ir_arch.loc_db.get_location_offset(cur_block.loc_key) - cur_block.ad_min = offset - cur_block.ad_max = offset+1 - - - def add_block_to_mem_interval(self, vm, block): - "Update vm to include block addresses in its memory range" - self.blocks_mem_interval += interval([(block.ad_min, block.ad_max - 1)]) - - vm.reset_code_bloc_pool() - for a, b in self.blocks_mem_interval: - vm.add_code_bloc(a, b + 1) - - def add_block(self, block): - """Add a block to JiT and JiT it. - @block: asm_bloc to add - """ - raise NotImplementedError("Abstract class") - - def disasm_and_jit_block(self, addr, vm): - """Disassemble a new block and JiT it - @addr: address of the block to disassemble (LocKey or int) - @vm: VmMngr instance - """ - - # Get the block - if isinstance(addr, LocKey): - addr = self.ir_arch.loc_db.get_location_offset(addr) - if addr is None: - raise RuntimeError("Unknown offset for LocKey") - - # Prepare disassembler - self.mdis.lines_wd = self.options["jit_maxline"] - - # Disassemble it - cur_block = self.mdis.dis_block(addr) - if isinstance(cur_block, AsmBlockBad): - return cur_block - # Logging - if self.log_newbloc: - print(cur_block.to_string(self.mdis.loc_db)) - - # Update label -> block - self.loc_key_to_block[cur_block.loc_key] = cur_block - - # Store min/max block address needed in jit automod code - self.set_block_min_max(cur_block) - - # JiT it - self.add_block(cur_block) - - # Update jitcode mem range - self.add_block_to_mem_interval(vm, cur_block) - return cur_block - - def run_at(self, cpu, offset, stop_offsets): - """Run from the starting address @offset. - Execution will stop if: - - max_exec_per_call option is reached - - a new, yet unknown, block is reached after the execution of block at - address @offset - - an address in @stop_offsets is reached - @cpu: JitCpu instance - @offset: starting address (int) - @stop_offsets: set of address on which the jitter must stop - """ - - if offset is None: - offset = getattr(cpu, self.ir_arch.pc.name) - - if offset not in self.offset_to_jitted_func: - # Need to JiT the block - cur_block = self.disasm_and_jit_block(offset, cpu.vmmngr) - if isinstance(cur_block, AsmBlockBad): - errno = cur_block.errno - if errno == AsmBlockBad.ERROR_IO: - cpu.vmmngr.set_exception(EXCEPT_ACCESS_VIOL) - elif errno == AsmBlockBad.ERROR_CANNOT_DISASM: - cpu.set_exception(EXCEPT_UNK_MNEMO) - else: - raise RuntimeError("Unhandled disasm result %r" % errno) - return offset - - # Run the block and update cpu/vmmngr state - return self.exec_wrapper(offset, cpu, self.offset_to_jitted_func.data, - stop_offsets, - self.options["max_exec_per_call"]) - - def blocks_to_memrange(self, blocks): - """Return an interval instance standing for blocks addresses - @blocks: list of AsmBlock instances - """ - - mem_range = interval() - - for block in blocks: - mem_range += interval([(block.ad_min, block.ad_max - 1)]) - - return mem_range - - def __updt_jitcode_mem_range(self, vm): - """Rebuild the VM blocks address memory range - @vm: VmMngr instance - """ - - # Reset the current pool - vm.reset_code_bloc_pool() - - # Add blocks in the pool - for start, stop in self.blocks_mem_interval: - vm.add_code_bloc(start, stop + 1) - - def del_block_in_range(self, ad1, ad2): - """Find and remove jitted block in range [ad1, ad2]. - Return the list of block removed. - @ad1: First address - @ad2: Last address - """ - - # Find concerned blocks - modified_blocks = set() - for block in viewvalues(self.loc_key_to_block): - if not block.lines: - continue - if block.ad_max <= ad1 or block.ad_min >= ad2: - # Block not modified - pass - else: - # Modified blocks - modified_blocks.add(block) - - # Generate interval to delete - del_interval = self.blocks_to_memrange(modified_blocks) - - # Remove interval from monitored interval list - self.blocks_mem_interval -= del_interval - - # Remove modified blocks - for block in modified_blocks: - try: - for irblock in block.blocks: - # Remove offset -> jitted block link - offset = self.ir_arch.loc_db.get_location_offset(irblock.loc_key) - if offset in self.offset_to_jitted_func: - del(self.offset_to_jitted_func[offset]) - - except AttributeError: - # The block has never been translated in IR - offset = self.ir_arch.loc_db.get_location_offset(block.loc_key) - if offset in self.offset_to_jitted_func: - del(self.offset_to_jitted_func[offset]) - - # Remove label -> block link - del(self.loc_key_to_block[block.loc_key]) - - return modified_blocks - - def updt_automod_code_range(self, vm, mem_range): - """Remove jitted code in range @mem_range - @vm: VmMngr instance - @mem_range: list of start/stop addresses - """ - for addr_start, addr_stop in mem_range: - self.del_block_in_range(addr_start, addr_stop) - self.__updt_jitcode_mem_range(vm) - vm.reset_memory_access() - - def updt_automod_code(self, vm): - """Remove jitted code updated by memory write - @vm: VmMngr instance - """ - mem_range = [] - for addr_start, addr_stop in vm.get_memory_write(): - mem_range.append((addr_start, addr_stop)) - self.updt_automod_code_range(vm, mem_range) - - def hash_block(self, block): - """ - Build a hash of the block @block - @block: asmblock - """ - block_raw = b"".join(line.b for line in block.lines) - offset = self.ir_arch.loc_db.get_location_offset(block.loc_key) - block_hash = md5( - b"%X_%s_%s_%s_%s" % ( - offset, - self.arch_name.encode(), - b'\x01' if self.log_mn else b'\x00', - b'\x01' if self.log_regs else b'\x00', - block_raw - ) - ).hexdigest() - return block_hash - - @property - def disasm_cb(self): - warnings.warn("Deprecated API: use .mdis.dis_block_callback") - return self.mdis.dis_block_callback - - @disasm_cb.setter - def disasm_cb(self, value): - warnings.warn("Deprecated API: use .mdis.dis_block_callback") - self.mdis.dis_block_callback = value diff --git a/miasm2/jitter/jitcore_cc_base.py b/miasm2/jitter/jitcore_cc_base.py deleted file mode 100644 index 997d6330..00000000 --- a/miasm2/jitter/jitcore_cc_base.py +++ /dev/null @@ -1,121 +0,0 @@ -#-*- coding:utf-8 -*- - -import os -import tempfile -import platform -import sysconfig -from distutils.sysconfig import get_python_inc - -from miasm2.jitter.jitcore import JitCore -from miasm2.core.utils import keydefaultdict - -is_win = platform.system() == "Windows" - -def gen_core(arch, attrib): - lib_dir = os.path.dirname(os.path.realpath(__file__)) - - txt = "" - txt += '#include "%s/queue.h"\n' % lib_dir - txt += '#include "%s/op_semantics.h"\n' % lib_dir - txt += '#include "%s/vm_mngr.h"\n' % lib_dir - txt += '#include "%s/vm_mngr_py.h"\n' % lib_dir - txt += '#include "%s/bn.h"\n' % lib_dir - txt += '#include "%s/JitCore.h"\n' % lib_dir - txt += '#include "%s/arch/JitCore_%s.h"\n' % (lib_dir, arch.name) - - txt += r''' -#define RAISE(errtype, msg) {PyObject* p; p = PyErr_Format( errtype, msg ); return p;} -''' - return txt - - -class myresolver(object): - - def __init__(self, offset): - self.offset = offset - - def ret(self): - return "return PyLong_FromUnsignedLongLong(0x%X);" % self.offset - - -class resolver(object): - - def __init__(self): - self.resolvers = keydefaultdict(myresolver) - - def get_resolver(self, offset): - return self.resolvers[offset] - - -class JitCore_Cc_Base(JitCore): - "JiT management, abstract class using a C compiler as backend" - - def __init__(self, ir_arch, bin_stream): - self.jitted_block_delete_cb = self.deleteCB - super(JitCore_Cc_Base, self).__init__(ir_arch, bin_stream) - self.resolver = resolver() - self.ir_arch = ir_arch - self.states = {} - self.tempdir = os.path.join(tempfile.gettempdir(), "miasm_cache") - try: - os.mkdir(self.tempdir, 0o755) - except OSError: - pass - if not os.access(self.tempdir, os.R_OK | os.W_OK): - raise RuntimeError( - 'Cannot access cache directory %s ' % self.tempdir) - self.exec_wrapper = None - self.libs = None - self.include_files = None - - def deleteCB(self, offset): - raise NotImplementedError() - - def load(self): - lib_dir = os.path.dirname(os.path.realpath(__file__)) - ext = sysconfig.get_config_var('EXT_SUFFIX') - if ext is None: - ext = ".so" if not is_win else ".lib" - - libs = [ - os.path.join(lib_dir, "VmMngr" + ext), - os.path.join( - lib_dir, - "arch", - "JitCore_%s%s" % (self.ir_arch.arch.name, ext) - ) - ] - - include_files = [ - os.path.dirname(__file__), - get_python_inc() - ] - self.include_files = include_files - self.libs = libs - - def init_codegen(self, codegen): - """ - Get the code generator @codegen - @codegen: an CGen instance - """ - self.codegen = codegen - - def gen_c_code(self, block): - """ - Return the C code corresponding to the @irblocks - @irblocks: list of irblocks - """ - f_declaration = '_MIASM_EXPORT int %s(block_id * BlockDst, JitCpu* jitcpu)' % self.FUNCNAME - out = self.codegen.gen_c( - block, - log_mn=self.log_mn, - log_regs=self.log_regs - ) - out = [f_declaration + '{'] + out + ['}\n'] - c_code = out - - return self.gen_C_source(self.ir_arch, c_code) - - @staticmethod - def gen_C_source(ir_arch, func_code): - raise NotImplementedError() diff --git a/miasm2/jitter/jitcore_gcc.py b/miasm2/jitter/jitcore_gcc.py deleted file mode 100644 index 5fd54c3d..00000000 --- a/miasm2/jitter/jitcore_gcc.py +++ /dev/null @@ -1,141 +0,0 @@ -#-*- coding:utf-8 -*- - -import os -import tempfile -import ctypes -import _ctypes -import platform -import sysconfig -from subprocess import check_call -from distutils.sysconfig import get_python_inc -from miasm2.jitter import Jitgcc -from miasm2.jitter.jitcore_cc_base import JitCore_Cc_Base, gen_core - -is_win = platform.system() == "Windows" - -class JitCore_Gcc(JitCore_Cc_Base): - "JiT management, using a C compiler as backend" - - def __init__(self, ir_arch, bin_stream): - super(JitCore_Gcc, self).__init__(ir_arch, bin_stream) - self.exec_wrapper = Jitgcc.gcc_exec_block - - def deleteCB(self, offset): - """Free the state associated to @offset and delete it - @offset: gcc state offset - """ - flib = None - if is_win: - flib = _ctypes.FreeLibrary - else: - flib = _ctypes.dlclose - flib(self.states[offset]._handle) - del self.states[offset] - - def load_code(self, label, fname_so): - lib = ctypes.cdll.LoadLibrary(fname_so) - func = getattr(lib, self.FUNCNAME) - addr = ctypes.cast(func, ctypes.c_void_p).value - offset = self.ir_arch.loc_db.get_location_offset(label) - self.offset_to_jitted_func[offset] = addr - self.states[offset] = lib - - def add_block(self, block): - """Add a bloc to JiT and JiT it. - @block: block to jit - """ - block_hash = self.hash_block(block) - ext = sysconfig.get_config_var('EXT_SUFFIX') - if ext is None: - ext = ".so" if not is_win else ".pyd" - fname_out = os.path.join(self.tempdir, "%s%s" % (block_hash, ext)) - - if not os.access(fname_out, os.R_OK | os.X_OK): - func_code = self.gen_c_code(block) - - # Create unique C file - fdesc, fname_in = tempfile.mkstemp(suffix=".c") - os.write(fdesc, func_code.encode()) - os.close(fdesc) - - # Create unique SO file - fdesc, fname_tmp = tempfile.mkstemp(suffix=ext) - os.close(fdesc) - - inc_dir = ["-I%s" % inc for inc in self.include_files] - libs = ["%s" % lib for lib in self.libs] - if is_win: - libs.append( - os.path.join( - get_python_inc(), - "..", - "libs", - "python27.lib" - ) - ) - cl = [ - "cl", "/nologo", "/W3", "/MP", - "/Od", "/DNDEBUG", "/D_WINDOWS", "/Gm-", "/EHsc", - "/RTC1", "/MD", "/GS", - fname_in - ] + inc_dir + libs - cl += ["/link", "/DLL", "/OUT:" + fname_tmp] - out_dir, _ = os.path.split(fname_tmp) - check_call(cl, cwd = out_dir) - basename_out, _ = os.path.splitext(fname_tmp) - basename_in, _ = os.path.splitext(os.path.basename(fname_in)) - for ext in ('.obj', '.exp', '.lib'): - artifact_out_path = os.path.join( - out_dir, - basename_out + ext - ) - if os.path.isfile(artifact_out_path): - os.remove(artifact_out_path) - artifact_in_path = os.path.join( - out_dir, - basename_in + ext - ) - if os.path.isfile(artifact_in_path): - os.remove(artifact_in_path) - else: - args = [ - "cc", - "-O3", - "-shared", - "-fPIC", - fname_in, - "-o", - fname_tmp - ] + inc_dir + libs - check_call(args) - - # Move temporary file to final file - try: - os.rename(fname_tmp, fname_out) - except WindowsError as e: - # On Windows, os.rename works slightly differently than on - # Linux; quoting the documentation: - # "On Unix, if dst exists and is a file, it will be replaced - # silently if the user has permission. The operation may fail - # on some Unix flavors if src and dst are on different - # filesystems. If successful, the renaming will be an atomic - # operation (this is a POSIX requirement). On Windows, if dst - # already exists, OSError will be raised even if it is a file; - # there may be no way to implement an atomic rename when dst - # names an existing file." - # [Error 183] Cannot create a file when that file already exists - if e.winerror != 183: - raise - os.remove(fname_tmp) - os.remove(fname_in) - - self.load_code(block.loc_key, fname_out) - - @staticmethod - def gen_C_source(ir_arch, func_code): - c_source = "" - c_source += "\n".join(func_code) - - c_source = gen_core(ir_arch.arch, ir_arch.attrib) + c_source - c_source = "#define PARITY_IMPORT\n#include \n" + c_source - return c_source diff --git a/miasm2/jitter/jitcore_llvm.py b/miasm2/jitter/jitcore_llvm.py deleted file mode 100644 index d017e122..00000000 --- a/miasm2/jitter/jitcore_llvm.py +++ /dev/null @@ -1,134 +0,0 @@ -from __future__ import print_function -import os -import importlib -import tempfile -import sysconfig - -from miasm2.jitter.llvmconvert import * -import miasm2.jitter.jitcore as jitcore -from miasm2.jitter import Jitllvm -import platform - -is_win = platform.system() == "Windows" - -class JitCore_LLVM(jitcore.JitCore): - "JiT management, using LLVM as backend" - - # Architecture dependent libraries - arch_dependent_libs = { - "x86": "JitCore_x86", - "arm": "JitCore_arm", - "msp430": "JitCore_msp430", - "mips32": "JitCore_mips32", - "aarch64": "JitCore_aarch64", - "ppc32": "JitCore_ppc32", - } - - def __init__(self, ir_arch, bin_stream): - super(JitCore_LLVM, self).__init__(ir_arch, bin_stream) - - self.options.update( - { - "safe_mode": True, # Verify each function - "optimise": True, # Optimise functions - "log_func": False, # Print LLVM functions - "log_assembly": False, # Print assembly executed - } - ) - - self.exec_wrapper = Jitllvm.llvm_exec_block - self.ir_arch = ir_arch - - # Cache temporary dir - self.tempdir = os.path.join(tempfile.gettempdir(), "miasm_cache") - try: - os.mkdir(self.tempdir, 0o755) - except OSError: - pass - if not os.access(self.tempdir, os.R_OK | os.W_OK): - raise RuntimeError( - 'Cannot access cache directory %s ' % self.tempdir) - - def load(self): - - # Library to load within Jit context - libs_to_load = [] - - # Get architecture dependent Jitcore library (if any) - lib_dir = os.path.dirname(os.path.realpath(__file__)) - lib_dir = os.path.join(lib_dir, 'arch') - ext = sysconfig.get_config_var('EXT_SUFFIX') - if ext is None: - ext = ".so" if not is_win else ".pyd" - try: - jit_lib = os.path.join( - lib_dir, self.arch_dependent_libs[self.ir_arch.arch.name] + ext - ) - libs_to_load.append(jit_lib) - except KeyError: - pass - - # Create a context - self.context = LLVMContext_JIT(libs_to_load, self.ir_arch) - - # Set the optimisation level - self.context.optimise_level() - - # Save the current architecture parameters - self.arch = self.ir_arch.arch - - # Get the correspondence between registers and vmcpu struct - mod_name = "miasm2.jitter.arch.JitCore_%s" % (self.ir_arch.arch.name) - mod = importlib.import_module(mod_name) - self.context.set_vmcpu(mod.get_gpreg_offset_all()) - - # Enable caching - self.context.enable_cache() - - def add_block(self, block): - """Add a block to JiT and JiT it. - @block: the block to add - """ - - block_hash = self.hash_block(block) - fname_out = os.path.join(self.tempdir, "%s.bc" % block_hash) - - if not os.access(fname_out, os.R_OK): - # Build a function in the context - func = LLVMFunction(self.context, self.FUNCNAME) - - # Set log level - func.log_regs = self.log_regs - func.log_mn = self.log_mn - - # Import asm block - func.from_asmblock(block) - - # Verify - if self.options["safe_mode"] is True: - func.verify() - - # Optimise - if self.options["optimise"] is True: - func.optimise() - - # Log - if self.options["log_func"] is True: - print(func) - if self.options["log_assembly"] is True: - print(func.get_assembly()) - - # Use propagate the cache filename - self.context.set_cache_filename(func, fname_out) - - # Get a pointer on the function for JiT - ptr = func.get_function_pointer() - - else: - # The cache file exists: function can be loaded from cache - ptr = self.context.get_ptr_from_cache(fname_out, self.FUNCNAME) - - # Store a pointer on the function jitted code - loc_key = block.loc_key - offset = self.ir_arch.loc_db.get_location_offset(loc_key) - self.offset_to_jitted_func[offset] = ptr diff --git a/miasm2/jitter/jitcore_python.py b/miasm2/jitter/jitcore_python.py deleted file mode 100644 index 4262c334..00000000 --- a/miasm2/jitter/jitcore_python.py +++ /dev/null @@ -1,219 +0,0 @@ -from __future__ import print_function -from builtins import zip -import miasm2.jitter.jitcore as jitcore -from miasm2.expression.expression import ExprInt, ExprLoc -import miasm2.jitter.csts as csts -from miasm2.expression.simplifications import expr_simp_explicit -from miasm2.jitter.emulatedsymbexec import EmulatedSymbExec - -################################################################################ -# Python jitter Core # -################################################################################ - - -class JitCore_Python(jitcore.JitCore): - "JiT management, using Miasm2 Symbol Execution engine as backend" - - SymbExecClass = EmulatedSymbExec - - def __init__(self, ir_arch, bin_stream): - super(JitCore_Python, self).__init__(ir_arch, bin_stream) - self.ir_arch = ir_arch - self.ircfg = self.ir_arch.new_ircfg() - - # CPU & VM (None for now) will be set later - - self.symbexec = self.SymbExecClass( - None, None, - self.ir_arch, {}, - sb_expr_simp=expr_simp_explicit - ) - self.symbexec.enable_emulated_simplifications() - - def set_cpu_vm(self, cpu, vm): - self.symbexec.cpu = cpu - self.symbexec.vm = vm - - def load(self): - "Preload symbols according to current architecture" - self.symbexec.reset_regs() - - def arch_specific(self): - """Return arch specific information for the current architecture""" - arch = self.ir_arch.arch - has_delayslot = False - if arch.name == "mips32": - from miasm2.arch.mips32.jit import mipsCGen - cgen_class = mipsCGen - has_delayslot = True - elif arch.name == "arm": - from miasm2.arch.arm.jit import arm_CGen - cgen_class = arm_CGen - else: - from miasm2.jitter.codegen import CGen - cgen_class = CGen - return cgen_class(self.ir_arch), has_delayslot - - def add_block(self, asmblock): - """Create a python function corresponding to an AsmBlock - @asmblock: AsmBlock - """ - - # TODO: merge duplicate code with CGen, llvmconvert - codegen, has_delayslot = self.arch_specific() - irblocks_list = codegen.block2assignblks(asmblock) - instr_offsets = [line.offset for line in asmblock.lines] - - loc_db = self.ir_arch.loc_db - local_loc_keys = [] - for irblocks in irblocks_list: - for irblock in irblocks: - local_loc_keys.append(irblock.loc_key) - - def myfunc(cpu): - """Execute the function according to cpu and vmmngr states - @cpu: JitCpu instance - """ - # Get virtual memory handler - vmmngr = cpu.vmmngr - - # Get execution engine (EmulatedSymbExec instance) - exec_engine = self.symbexec - - # Refresh CPU values according to @cpu instance - exec_engine.update_engine_from_cpu() - - # Get initial loc_key - cur_loc_key = asmblock.loc_key - - # Update PC helper - update_pc = lambda value: setattr(cpu, self.ir_arch.pc.name, value) - - while True: - # Retrieve the expected irblock - for instr, irblocks in zip(asmblock.lines, irblocks_list): - for index, irblock in enumerate(irblocks): - if irblock.loc_key == cur_loc_key: - break - else: - continue - break - else: - raise RuntimeError("Unable to find the block for %r" % cur_loc_key) - - instr_attrib, irblocks_attributes = codegen.get_attributes( - instr, irblocks, self.log_mn, self.log_regs - ) - irblock_attributes = irblocks_attributes[index] - - # Do IRBlock - new_irblock = self.ir_arch.irbloc_fix_regs_for_mode( - irblock, self.ir_arch.attrib - ) - if index == 0: - # Pre code - if instr_attrib.log_mn: - print("%.8X %s" % ( - instr_attrib.instr.offset, - instr_attrib.instr.to_string(loc_db) - )) - - # Exec IRBlock - instr = instr_attrib.instr - - for index, assignblk in enumerate(irblock): - attributes = irblock_attributes[index] - - # Eval current instruction (in IR) - exec_engine.eval_updt_assignblk(assignblk) - - # Check memory access / write exception - # TODO: insert a check between memory reads and writes - if attributes.mem_read or attributes.mem_write: - # Restricted exception - flag = ~csts.EXCEPT_CODE_AUTOMOD & csts.EXCEPT_DO_NOT_UPDATE_PC - if (vmmngr.get_exception() & flag != 0): - # Do not update registers - update_pc(instr.offset) - return instr.offset - - # Update registers values - exec_engine.update_cpu_from_engine() - - # Check post assignblk exception flags - if attributes.set_exception: - # Restricted exception - if cpu.get_exception() > csts.EXCEPT_NUM_UPDT_EIP: - # Update PC - update_pc(instr.offset) - return instr.offset - - dst = exec_engine.eval_expr(self.ir_arch.IRDst) - if dst.is_int(): - loc_key = loc_db.get_or_create_offset_location(int(dst)) - dst = ExprLoc(loc_key, dst.size) - - assert dst.is_loc() - loc_key = dst.loc_key - offset = loc_db.get_location_offset(loc_key) - if offset is None: - # Avoid checks on generated label - cur_loc_key = loc_key - continue - - if instr_attrib.log_regs: - update_pc(offset) - cpu.dump_gpregs_with_attrib(self.ir_arch.attrib) - - # Post-instr checks - if instr_attrib.mem_read | instr_attrib.mem_write: - vmmngr.check_memory_breakpoint() - vmmngr.check_invalid_code_blocs() - if vmmngr.get_exception(): - update_pc(offset) - return offset - - if instr_attrib.set_exception: - if cpu.get_exception(): - update_pc(offset) - return offset - - if instr_attrib.mem_read | instr_attrib.mem_write: - vmmngr.reset_memory_access() - - # Manage resulting address - if (loc_key in local_loc_keys and - offset > instr.offset): - # Forward local jump - # Note: a backward local jump has to be promoted to extern, - # for max_exec_per_call support - cur_loc_key = loc_key - continue - - # Delay slot - if has_delayslot: - delay_slot_set = exec_engine.eval_expr(codegen.delay_slot_set) - if delay_slot_set.is_int() and int(delay_slot_set) != 0: - return int(exec_engine.eval_expr(codegen.delay_slot_dst)) - - # Extern of asmblock, must have an offset - assert offset is not None - return offset - - # Associate myfunc with current loc_key - offset = loc_db.get_location_offset(asmblock.loc_key) - assert offset is not None - self.offset_to_jitted_func[offset] = myfunc - - def exec_wrapper(self, loc_key, cpu, _offset_to_jitted_func, _stop_offsets, - _max_exec_per_call): - """Call the function @loc_key with @cpu - @loc_key: function's loc_key - @cpu: JitCpu instance - """ - - # Get Python function corresponding to @loc_key - fc_ptr = self.offset_to_jitted_func[loc_key] - - # Execute the function - return fc_ptr(cpu) diff --git a/miasm2/jitter/jitload.py b/miasm2/jitter/jitload.py deleted file mode 100644 index 3f3cf10f..00000000 --- a/miasm2/jitter/jitload.py +++ /dev/null @@ -1,547 +0,0 @@ -import logging -import warnings -from functools import wraps -from collections import Sequence, namedtuple - -from future.utils import viewitems - -from miasm2.jitter.csts import * -from miasm2.core.utils import * -from miasm2.core.bin_stream import bin_stream_vm -from miasm2.jitter.emulatedsymbexec import EmulatedSymbExec -from miasm2.jitter.codegen import CGen -from miasm2.jitter.jitcore_cc_base import JitCore_Cc_Base - -hnd = logging.StreamHandler() -hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) -log = logging.getLogger('jitload.py') -log.addHandler(hnd) -log.setLevel(logging.CRITICAL) -log_func = logging.getLogger('jit function call') -log_func.addHandler(hnd) -log_func.setLevel(logging.CRITICAL) - -try: - from miasm2.jitter import VmMngr -except ImportError: - log.error('cannot import VmMngr') - - -def named_arguments(func): - """Function decorator to allow the use of .func_args_*() methods - with either the number of arguments or the list of the argument - names. - - The wrapper is also used to log the argument values. - - @func: function - - """ - @wraps(func) - def newfunc(self, args): - if isinstance(args, Sequence): - ret_ad, arg_vals = func(self, len(args)) - arg_vals = namedtuple("args", args)(*arg_vals) - # func_name(arguments) return address - log_func.info( - '%s(%s) ret addr: %s', - get_caller_name(1), - ', '.join( - "%s=0x%x" % (field, value) - for field, value in viewitems(arg_vals._asdict()) - ), - hex(ret_ad) - ) - return ret_ad, namedtuple("args", args)(*arg_vals) - else: - ret_ad, arg_vals = func(self, args) - # func_name(arguments) return address - log_func.info('%s(%s) ret addr: %s', - get_caller_name(1), - ', '.join(hex(arg) for arg in arg_vals), - hex(ret_ad)) - return ret_ad, arg_vals - return newfunc - - -class CallbackHandler(object): - - "Handle a list of callback" - - def __init__(self): - self.callbacks = {} # Key -> [callback list] - - def add_callback(self, key, callback): - """Add a callback to the key @key, iff the @callback isn't already - assigned to it""" - if callback not in self.callbacks.get(key, []): - self.callbacks[key] = self.callbacks.get(key, []) + [callback] - - def set_callback(self, key, *args): - "Set the list of callback for key 'key'" - self.callbacks[key] = list(args) - - def get_callbacks(self, key): - "Return the list of callbacks associated to key 'key'" - return self.callbacks.get(key, []) - - def remove_callback(self, callback): - """Remove the callback from the list. - Return the list of empty keys (removed)""" - - to_check = set() - for key, cb_list in viewitems(self.callbacks): - try: - cb_list.remove(callback) - to_check.add(key) - except ValueError: - pass - - empty_keys = [] - for key in to_check: - if len(self.callbacks[key]) == 0: - empty_keys.append(key) - del(self.callbacks[key]) - - return empty_keys - - def has_callbacks(self, key): - return key in self.callbacks - - def remove_key(self, key): - """Remove and return all callbacks associated to @key""" - callbacks = self.callbacks.get(key, []) - del self.callbacks[key] - return callbacks - - def call_callbacks(self, key, *args): - """Call callbacks associated to key 'key' with arguments args. While - callbacks return True, continue with next callback. - Iterator on other results.""" - - res = True - - for c in self.get_callbacks(key): - res = c(*args) - if res is not True: - yield res - - def __call__(self, key, *args): - "Wrapper for call_callbacks" - return self.call_callbacks(key, *args) - - -class CallbackHandlerBitflag(CallbackHandler): - - "Handle a list of callback with conditions on bitflag" - - def call_callbacks(self, bitflag, *args): - """Call each callbacks associated with bit set in bitflag. While - callbacks return True, continue with next callback. - Iterator on other results""" - - for bitflag_expected in self.callbacks: - if bitflag_expected & bitflag == bitflag_expected: - # If the flag matched - for res in super(CallbackHandlerBitflag, - self).call_callbacks(bitflag_expected, *args): - if res is not True: - yield res - - -class ExceptionHandle(object): - - "Return type for exception handler" - - def __init__(self, except_flag): - self.except_flag = except_flag - - @classmethod - def memoryBreakpoint(cls): - return cls(EXCEPT_BREAKPOINT_MEMORY) - - def __eq__(self, to_cmp): - if not isinstance(to_cmp, ExceptionHandle): - return False - return (self.except_flag == to_cmp.except_flag) - - def __ne__(self, to_cmp): - return not self.__eq__(to_cmp) - - -class Jitter(object): - - "Main class for JIT handling" - - C_Gen = CGen - - def __init__(self, ir_arch, jit_type="gcc"): - """Init an instance of jitter. - @ir_arch: ir instance for this architecture - @jit_type: JiT backend to use. Available options are: - - "gcc" - - "llvm" - - "python" - """ - - self.arch = ir_arch.arch - self.attrib = ir_arch.attrib - arch_name = ir_arch.arch.name # (ir_arch.arch.name, ir_arch.attrib) - - try: - if arch_name == "x86": - from miasm2.jitter.arch import JitCore_x86 as jcore - elif arch_name == "arm": - from miasm2.jitter.arch import JitCore_arm as jcore - elif arch_name == "armt": - from miasm2.jitter.arch import JitCore_arm as jcore - ir_arch.arch.name = 'arm' - elif arch_name == "aarch64": - from miasm2.jitter.arch import JitCore_aarch64 as jcore - elif arch_name == "msp430": - from miasm2.jitter.arch import JitCore_msp430 as jcore - elif arch_name == "mips32": - from miasm2.jitter.arch import JitCore_mips32 as jcore - elif arch_name == "ppc32": - from miasm2.jitter.arch import JitCore_ppc32 as jcore - elif arch_name == "mep": - from miasm2.jitter.arch import JitCore_mep as jcore - else: - raise ValueError("unknown jit arch: %s" % arch_name) - except ImportError: - raise RuntimeError('Unsupported jit arch: %s' % arch_name) - - self.vm = VmMngr.Vm() - self.cpu = jcore.JitCpu() - self.ir_arch = ir_arch - self.bs = bin_stream_vm(self.vm) - self.ircfg = self.ir_arch.new_ircfg() - - self.symbexec = EmulatedSymbExec( - self.cpu, self.vm, self.ir_arch, {} - ) - self.symbexec.reset_regs() - - try: - if jit_type == "llvm": - from miasm2.jitter.jitcore_llvm import JitCore_LLVM as JitCore - elif jit_type == "python": - from miasm2.jitter.jitcore_python import JitCore_Python as JitCore - elif jit_type == "gcc": - from miasm2.jitter.jitcore_gcc import JitCore_Gcc as JitCore - else: - raise ValueError("Unknown jitter %s" % jit_type) - except ImportError: - raise RuntimeError('Unsupported jitter: %s' % jit_type) - - self.jit = JitCore(self.ir_arch, self.bs) - if isinstance(self.jit, JitCore_Cc_Base): - self.jit.init_codegen(self.C_Gen(self.ir_arch)) - elif jit_type == "python": - self.jit.set_cpu_vm(self.cpu, self.vm) - - self.cpu.init_regs() - self.vm.init_memory_page_pool() - self.vm.init_code_bloc_pool() - self.vm.init_memory_breakpoint() - - self.jit.load() - self.cpu.vmmngr = self.vm - self.cpu.jitter = self.jit - self.stack_size = 0x10000 - self.stack_base = 0x1230000 - - # Init callback handler - self.breakpoints_handler = CallbackHandler() - self.exceptions_handler = CallbackHandlerBitflag() - self.init_exceptions_handler() - self.exec_cb = None - - def init_exceptions_handler(self): - "Add common exceptions handlers" - - def exception_automod(jitter): - "Tell the JiT backend to update blocks modified" - - self.jit.updt_automod_code(jitter.vm) - self.vm.set_exception(0) - - return True - - def exception_memory_breakpoint(jitter): - "Stop the execution and return an identifier" - return ExceptionHandle.memoryBreakpoint() - - self.add_exception_handler(EXCEPT_CODE_AUTOMOD, exception_automod) - self.add_exception_handler(EXCEPT_BREAKPOINT_MEMORY, - exception_memory_breakpoint) - - def add_breakpoint(self, addr, callback): - """Add a callback associated with addr. - @addr: breakpoint address - @callback: function with definition (jitter instance) - """ - self.breakpoints_handler.add_callback(addr, callback) - self.jit.add_disassembly_splits(addr) - # De-jit previously jitted blocks - self.jit.updt_automod_code_range(self.vm, [(addr, addr)]) - - def set_breakpoint(self, addr, *args): - """Set callbacks associated with addr. - @addr: breakpoint address - @args: functions with definition (jitter instance) - """ - self.breakpoints_handler.set_callback(addr, *args) - self.jit.add_disassembly_splits(addr) - - def get_breakpoint(self, addr): - """ - Return breakpoints handlers for address @addr - @addr: integer - """ - return self.breakpoints_handler.get_callbacks(addr) - - def remove_breakpoints_by_callback(self, callback): - """Remove callbacks associated with breakpoint. - @callback: callback to remove - """ - empty_keys = self.breakpoints_handler.remove_callback(callback) - for key in empty_keys: - self.jit.remove_disassembly_splits(key) - - def remove_breakpoints_by_address(self, address): - """Remove all breakpoints associated with @address. - @address: address of breakpoints to remove - """ - callbacks = self.breakpoints_handler.remove_key(address) - if callbacks: - self.jit.remove_disassembly_splits(address) - - def add_exception_handler(self, flag, callback): - """Add a callback associated with an exception flag. - @flag: bitflag - @callback: function with definition (jitter instance) - """ - self.exceptions_handler.add_callback(flag, callback) - - def run_at(self, pc): - """Wrapper on JiT backend. Run the code at PC and return the next PC. - @pc: address of code to run""" - - return self.jit.run_at( - self.cpu, pc, - set(self.breakpoints_handler.callbacks) - ) - - def runiter_once(self, pc): - """Iterator on callbacks results on code running from PC. - Check exceptions before breakpoints.""" - - self.pc = pc - # Callback called before exec - if self.exec_cb is not None: - res = self.exec_cb(self) - if res is not True: - yield res - - # Check breakpoints - old_pc = self.pc - for res in self.breakpoints_handler.call_callbacks(self.pc, self): - if res is not True: - if isinstance(res, collections.Iterator): - # If the breakpoint is a generator, yield it step by step - for tmp in res: - yield tmp - else: - yield res - - # Check exceptions (raised by breakpoints) - exception_flag = self.get_exception() - for res in self.exceptions_handler(exception_flag, self): - if res is not True: - if isinstance(res, collections.Iterator): - for tmp in res: - yield tmp - else: - yield res - - # If a callback changed pc, re call every callback - if old_pc != self.pc: - return - - # Exceptions should never be activated before run - assert(self.get_exception() == 0) - - # Run the bloc at PC - self.pc = self.run_at(self.pc) - - # Check exceptions (raised by the execution of the block) - exception_flag = self.get_exception() - for res in self.exceptions_handler(exception_flag, self): - if res is not True: - if isinstance(res, collections.Iterator): - for tmp in res: - yield tmp - else: - yield res - - def init_run(self, pc): - """Create an iterator on pc with runiter. - @pc: address of code to run - """ - self.run_iterator = self.runiter_once(pc) - self.pc = pc - self.run = True - - def continue_run(self, step=False): - """PRE: init_run. - Continue the run of the current session until iterator returns or run is - set to False. - If step is True, run only one time. - Return the iterator value""" - - while self.run: - try: - return next(self.run_iterator) - except StopIteration: - pass - - self.run_iterator = self.runiter_once(self.pc) - - if step is True: - return None - - return None - - def init_stack(self): - self.vm.add_memory_page( - self.stack_base, - PAGE_READ | PAGE_WRITE, - b"\x00" * self.stack_size, - "Stack") - sp = self.arch.getsp(self.attrib) - setattr(self.cpu, sp.name, self.stack_base + self.stack_size) - # regs = self.cpu.get_gpreg() - # regs[sp.name] = self.stack_base+self.stack_size - # self.cpu.set_gpreg(regs) - - def get_exception(self): - return self.cpu.get_exception() | self.vm.get_exception() - - # commun functions - def get_str_ansi(self, addr, max_char=None): - """Get ansi str from vm. - @addr: address in memory - @max_char: maximum len""" - l = 0 - tmp = addr - while ((max_char is None or l < max_char) and - self.vm.get_mem(tmp, 1) != b"\x00"): - tmp += 1 - l += 1 - return self.vm.get_mem(addr, l) - - def get_str_unic(self, addr, max_char=None): - """Get unicode str from vm. - @addr: address in memory - @max_char: maximum len""" - l = 0 - tmp = addr - while ((max_char is None or l < max_char) and - self.vm.get_mem(tmp, 2) != b"\x00\x00"): - tmp += 2 - l += 2 - s = self.vm.get_mem(addr, l) - s = s.decode("utf-16le") - return s - - def set_str_ansi(self, addr, s): - """Set an ansi string in memory""" - s = s + b"\x00" - self.vm.set_mem(addr, s) - - def set_str_unic(self, addr, s): - """Set an unicode string in memory""" - s = b"\x00".join(list(s)) + b'\x00' * 3 - self.vm.set_mem(addr, s) - - @staticmethod - def handle_lib(jitter): - """Resolve the name of the function which cause the handler call. Then - call the corresponding handler from users callback. - """ - fname = jitter.libs.fad2cname[jitter.pc] - if fname in jitter.user_globals: - func = jitter.user_globals[fname] - else: - log.debug('%r', fname) - raise ValueError('unknown api', hex(jitter.pc), repr(fname)) - ret = func(jitter) - jitter.pc = getattr(jitter.cpu, jitter.ir_arch.pc.name) - - # Don't break on a None return - if ret is None: - return True - else: - return ret - - def handle_function(self, f_addr): - """Add a breakpoint which will trigger the function handler""" - self.add_breakpoint(f_addr, self.handle_lib) - - def add_lib_handler(self, libs, user_globals=None): - """Add a function to handle libs call with breakpoints - @libs: libimp instance - @user_globals: dictionary for defined user function - """ - if user_globals is None: - user_globals = {} - - self.libs = libs - out = {} - for name, func in viewitems(user_globals): - name = force_bytes(name) - out[name] = func - self.user_globals = out - - for f_addr in libs.fad2cname: - self.handle_function(f_addr) - - def eval_expr(self, expr): - """Eval expression @expr in the context of the current instance. Side - effects are passed on it""" - self.symbexec.update_engine_from_cpu() - ret = self.symbexec.eval_updt_expr(expr) - self.symbexec.update_cpu_from_engine() - - return ret - - def set_trace_log(self, - trace_instr=True, trace_regs=True, - trace_new_blocks=False): - """ - Activate/Deactivate trace log options - - @trace_instr: activate instructions tracing log - @trace_regs: activate registers tracing log - @trace_new_blocks: dump new code blocks log - """ - - # As trace state changes, clear already jitted blocks - self.jit.clear_jitted_blocks() - - self.jit.log_mn = trace_instr - self.jit.log_regs = trace_regs - self.jit.log_newbloc = trace_new_blocks - - -class jitter(Jitter): - """ - DEPRECATED object - Use Jitter instead of jitter - """ - - - def __init__(self, *args, **kwargs): - warnings.warn("Deprecated API: use Jitter") - super(jitter, self).__init__(*args, **kwargs) diff --git a/miasm2/jitter/llvmconvert.py b/miasm2/jitter/llvmconvert.py deleted file mode 100644 index bea8cd36..00000000 --- a/miasm2/jitter/llvmconvert.py +++ /dev/null @@ -1,1926 +0,0 @@ -# -# -# Miasm2 Extension: # -# - Miasm2 IR to LLVM IR # -# - JiT # -# -# Requires: # -# - llvmlite (tested on v0.15) # -# -# Authors : Fabrice DESCLAUX (CEA/DAM), Camille MOUGEY (CEA/DAM) # -# -# - -from builtins import zip -from builtins import range -import os -from llvmlite import binding as llvm -from llvmlite import ir as llvm_ir -from builtins import int as int_types - -from future.utils import viewitems, viewvalues - -from miasm2.expression.expression import ExprId, ExprInt, ExprMem, ExprSlice, \ - ExprCond, ExprLoc, ExprOp, ExprCompose, LocKey, Expr, \ - TOK_EQUAL, \ - TOK_INF_SIGNED, TOK_INF_UNSIGNED, \ - TOK_INF_EQUAL_SIGNED, TOK_INF_EQUAL_UNSIGNED - -import miasm2.jitter.csts as m2_csts -import miasm2.core.asmblock as m2_asmblock -from miasm2.jitter.codegen import CGen, Attributes -from miasm2.expression.expression_helper import possible_values - - -class LLVMType(llvm_ir.Type): - - "Handle LLVM Type" - - int_cache = {} - - @classmethod - def IntType(cls, size=32): - try: - return cls.int_cache[size] - except KeyError: - cls.int_cache[size] = llvm_ir.IntType(size) - return cls.int_cache[size] - - @classmethod - def pointer(cls, addr): - "Generic pointer for execution" - return llvm_e.GenericValue.pointer(addr) - - @classmethod - def generic(cls, e): - "Generic value for execution" - if isinstance(e, ExprInt): - return llvm_e.GenericValue.int(LLVMType.IntType(e.size), int(e.arg)) - elif isinstance(e, llvm_e.GenericValue): - return e - else: - raise ValueError() - - @classmethod - def fptype(cls, size): - """Return the floating type corresponding to precision @size""" - if size == 32: - precision = llvm_ir.FloatType() - elif size == 64: - precision = llvm_ir.DoubleType() - else: - raise RuntimeError("Unsupported precision: %x", size) - return precision - - -class LLVMContext(object): - - "Context for llvm binding. Stand for a LLVM Module" - - known_fc = {} - - def __init__(self, name="mod"): - "Initialize a context with a module named 'name'" - # Initialize llvm - llvm.initialize() - llvm.initialize_native_target() - llvm.initialize_native_asmprinter() - - # Initialize target for compilation - target = llvm.Target.from_default_triple() - self.target_machine = target.create_target_machine() - self.init_exec_engine() - - def canonize_label_name(self, label): - """Canonize @label names to a common form. - @label: str or asmlabel instance""" - if isinstance(label, str): - return label - elif isinstance(label, LocKey): - return str(label) - else: - raise ValueError("label must either be str or LocKey") - - def optimise_level(self, level=2): - """Set the optimisation level to @level from 0 to 2 - 0: non-optimized - 2: optimized - """ - - # Set up the optimiser pipeline - pmb = llvm.create_pass_manager_builder() - pmb.opt_level = level - pm = llvm.create_module_pass_manager() - pmb.populate(pm) - self.pass_manager = pm - - def init_exec_engine(self): - mod = llvm.parse_assembly("") - engine = llvm.create_mcjit_compiler(mod, - self.target_machine) - self.exec_engine = engine - - def new_module(self, name="mod"): - """Create a module, with needed functions""" - self.mod = llvm_ir.Module(name=name) - self.add_fc(self.known_fc) - self.add_op() - - def get_execengine(self): - "Return the Execution Engine associated with this context" - return self.exec_engine - - def get_passmanager(self): - "Return the Pass Manager associated with this context" - return self.pass_manager - - def get_module(self): - "Return the module associated with this context" - return self.mod - - def add_shared_library(self, filename): - "Load the shared library 'filename'" - return llvm.load_library_permanently(filename) - - def add_fc(self, fc, readonly=False): - "Add function into known_fc" - - for name, detail in viewitems(fc): - fnty = llvm_ir.FunctionType(detail["ret"], detail["args"]) - fn = llvm_ir.Function(self.mod, fnty, name=name) - if readonly: - fn.attributes.add("readonly") - - def add_op(self): - "Add operations functions" - - i8 = LLVMType.IntType(8) - p8 = llvm_ir.PointerType(i8) - itype = LLVMType.IntType(64) - ftype = llvm_ir.FloatType() - dtype = llvm_ir.DoubleType() - fc = {"llvm.ctpop.i8": {"ret": i8, - "args": [i8]}, - "llvm.nearbyint.f32": {"ret": ftype, - "args": [ftype]}, - "llvm.nearbyint.f64": {"ret": dtype, - "args": [dtype]}, - "llvm.trunc.f32": {"ret": ftype, - "args": [ftype]}, - "segm2addr": {"ret": itype, - "args": [p8, - itype, - itype]}, - "x86_cpuid": {"ret": itype, - "args": [itype, - itype]}, - "fpu_fcom_c0": {"ret": itype, - "args": [dtype, - dtype]}, - "fpu_fcom_c1": {"ret": itype, - "args": [dtype, - dtype]}, - "fpu_fcom_c2": {"ret": itype, - "args": [dtype, - dtype]}, - "fpu_fcom_c3": {"ret": itype, - "args": [dtype, - dtype]}, - "llvm.sqrt.f32": {"ret": ftype, - "args": [ftype]}, - "llvm.sqrt.f64": {"ret": dtype, - "args": [dtype]}, - "llvm.fabs.f32": {"ret": ftype, - "args": [ftype]}, - "llvm.fabs.f64": {"ret": dtype, - "args": [dtype]}, - } - - for k in [8, 16]: - fc["bcdadd_%s" % k] = {"ret": LLVMType.IntType(k), - "args": [LLVMType.IntType(k), - LLVMType.IntType(k)]} - fc["bcdadd_cf_%s" % k] = {"ret": LLVMType.IntType(k), - "args": [LLVMType.IntType(k), - LLVMType.IntType(k)]} - self.add_fc(fc, readonly=True) - - - def memory_lookup(self, func, addr, size): - """Perform a memory lookup at @addr of size @size (in bit)""" - raise NotImplementedError("Abstract method") - - def memory_write(self, func, addr, size, value): - """Perform a memory write at @addr of size @size (in bit) with LLVM IR @value""" - raise NotImplementedError("Abstract method") - - -class LLVMContext_JIT(LLVMContext): - - """Extend LLVMContext_JIT in order to handle memory management and custom - operations""" - - def __init__(self, library_filenames, ir_arch, name="mod"): - "Init a LLVMContext object, and load the mem management shared library" - self.library_filenames = library_filenames - self.ir_arch = ir_arch - self.arch_specific() - self.load_libraries() - LLVMContext.__init__(self, name) - self.vmcpu = {} - - def load_libraries(self): - # Get LLVM specific functions - name = "libLLVM-%d.%d" % (llvm.llvm_version_info[0], - llvm.llvm_version_info[1], - ) - try: - # On Windows, no need to add ".dll" - self.add_shared_library(name) - except RuntimeError: - try: - # On Linux, ".so" is needed - self.add_shared_library("%s.so" % name) - except RuntimeError: - pass - - # Load additional libraries - for lib_fname in self.library_filenames: - self.add_shared_library(lib_fname) - - def new_module(self, name="mod"): - LLVMContext.new_module(self, name) - self.add_memlookups() - self.add_get_exceptionflag() - self.add_log_functions() - - def arch_specific(self): - arch = self.ir_arch.arch - if arch.name == "x86": - self.PC = arch.regs.RIP - self.logging_func = "dump_gpregs_%d" % self.ir_arch.attrib - else: - self.PC = self.ir_arch.pc - self.logging_func = "dump_gpregs" - if arch.name == "mips32": - from miasm2.arch.mips32.jit import mipsCGen - self.cgen_class = mipsCGen - self.has_delayslot = True - elif arch.name == "arm": - from miasm2.arch.arm.jit import arm_CGen - self.cgen_class = arm_CGen - self.has_delayslot = False - else: - self.cgen_class = CGen - self.has_delayslot = False - - def add_memlookups(self): - "Add MEM_LOOKUP functions" - - fc = {} - p8 = llvm_ir.PointerType(LLVMType.IntType(8)) - for i in [8, 16, 32, 64]: - fc["MEM_LOOKUP_%02d" % i] = {"ret": LLVMType.IntType(i), - "args": [p8, - LLVMType.IntType(64)]} - - fc["MEM_WRITE_%02d" % i] = {"ret": llvm_ir.VoidType(), - "args": [p8, - LLVMType.IntType(64), - LLVMType.IntType(i)]} - - fc["MEM_LOOKUP_INT_BN_TO_PTR"] = {"ret": llvm_ir.VoidType(), - "args": [ - p8, - LLVMType.IntType(32), - LLVMType.IntType(64), - p8 - ]} - fc["MEM_WRITE_INT_BN_FROM_PTR"] = {"ret": llvm_ir.VoidType(), - "args": [ - p8, - LLVMType.IntType(32), - LLVMType.IntType(64), - p8, - ]} - - fc["reset_memory_access"] = {"ret": llvm_ir.VoidType(), - "args": [p8, - ]} - fc["check_memory_breakpoint"] = {"ret": llvm_ir.VoidType(), - "args": [p8, - ]} - fc["check_invalid_code_blocs"] = {"ret": llvm_ir.VoidType(), - "args": [p8, - ]} - self.add_fc(fc) - - def add_get_exceptionflag(self): - "Add 'get_exception_flag' function" - p8 = llvm_ir.PointerType(LLVMType.IntType(8)) - self.add_fc({"get_exception_flag": {"ret": LLVMType.IntType(64), - "args": [p8]}}, readonly=True) - - def add_log_functions(self): - "Add functions for state logging" - - p8 = llvm_ir.PointerType(LLVMType.IntType(8)) - self.add_fc({self.logging_func: {"ret": llvm_ir.VoidType(), - "args": [p8]}}, - readonly=True) - - def set_vmcpu(self, lookup_table): - "Set the correspondence between register name and vmcpu offset" - - self.vmcpu = lookup_table - - def memory_lookup(self, func, addr, size): - """Perform a memory lookup at @addr of size @size (in bit)""" - builder = func.builder - if size <= 64: - fc_name = "MEM_LOOKUP_%02d" % size - fc_ptr = self.mod.get_global(fc_name) - addr_casted = builder.zext(addr, LLVMType.IntType(64)) - ret = builder.call( - fc_ptr, [func.local_vars["jitcpu"],addr_casted] - ) - else: - # Miasm uses a memory lookup function which returns a bn_t for its - # result. We cannot simply translate this into IntType. The trick - # here is to use the function MEM_LOOKUP_INT_BN_TO_PTR which has a - # different interface: the resulting bn_t is passed through a char* - # argument. - # - # WARNING: Here, we use the fact that the serialisation of LLVM - # IntType is the *same* as the bn_t structure. - - fc_name = "MEM_LOOKUP_INT_BN_TO_PTR" - fc_ptr = self.mod.get_global(fc_name) - addr_casted = builder.zext(addr, LLVMType.IntType(64)) - size_cst = llvm_ir.Constant(LLVMType.IntType(32), size) - - value_ptr = builder.alloca(llvm_ir.IntType(size)) - value_ptr_u8 = builder.bitcast( - value_ptr, - LLVMType.IntType(8).as_pointer() - ) - - - builder.call( - fc_ptr, - [ - func.local_vars["jitcpu"], - size_cst, - addr_casted, - value_ptr_u8 - ] - ) - ret = builder.load(value_ptr) - - return ret - - def memory_write(self, func, addr, size, value): - """Perform a memory write at @addr of size @size (in bit) with LLVM IR @value""" - # Function call - builder = func.builder - if size <= 64: - fc_name = "MEM_WRITE_%02d" % size - fc_ptr = self.mod.get_global(fc_name) - dst_casted = builder.zext(addr, LLVMType.IntType(64)) - builder.call( - fc_ptr, - [ - func.local_vars["jitcpu"], - dst_casted, - value - ] - ) - else: - # The same trick as described in MEM_LOOKUP_INT_BN_TO_PTR is used - # here. - - fc_name = "MEM_WRITE_INT_BN_FROM_PTR" - fc_ptr = self.mod.get_global(fc_name) - addr_casted = builder.zext(addr, LLVMType.IntType(64)) - size_cst = llvm_ir.Constant(LLVMType.IntType(32), size) - - ret = builder.alloca(value.type) - builder.store(value, ret) - value_ptr = builder.bitcast(ret, llvm_ir.IntType(8).as_pointer()) - - builder.call( - fc_ptr, - [ - func.local_vars["jitcpu"], - size_cst, - addr_casted, - value_ptr, - ] - ) - - - @staticmethod - def cache_notify(module, buffer): - """Called when @module has been compiled to @buffer""" - if not hasattr(module, "fname_out"): - return - fname_out = module.fname_out - - if os.access(fname_out, os.R_OK): - # No need to overwrite - return - - open(fname_out, "wb").write(buffer) - - @staticmethod - def cache_getbuffer(module): - """Return a compiled buffer for @module if available""" - if not hasattr(module, "fname_out"): - return None - - fname_out = module.fname_out - if os.access(fname_out, os.R_OK): - return open(fname_out, "rb").read() - return None - - def enable_cache(self): - "Enable cache of compiled object" - # Load shared libraries - for lib_fname in self.library_filenames: - self.add_shared_library(lib_fname) - - # Activate cache - self.exec_engine.set_object_cache( - self.cache_notify, - self.cache_getbuffer - ) - - def set_cache_filename(self, func, fname_out): - "Set the filename @fname_out to use for cache for @func" - # Use a custom attribute to propagate the cache filename - func.as_llvm_mod().fname_out = fname_out - - def get_ptr_from_cache(self, file_name, func_name): - "Load @file_name and return a pointer on the jitter @func_name" - # We use an empty module to avoid losing time on function building - empty_module = llvm.parse_assembly("") - empty_module.fname_out = file_name - - engine = self.exec_engine - engine.add_module(empty_module) - engine.finalize_object() - return engine.get_function_address(func_name) - - -class LLVMContext_IRCompilation(LLVMContext): - - """Extend LLVMContext in order to handle memory management and custom - operations for Miasm IR compilation""" - - def memory_lookup(self, func, addr, size): - """Perform a memory lookup at @addr of size @size (in bit)""" - builder = func.builder - int_size = LLVMType.IntType(size) - ptr_casted = builder.inttoptr( - addr, - llvm_ir.PointerType(int_size) - ) - return builder.load(ptr_casted) - - def memory_write(self, func, addr, size, value): - """Perform a memory write at @addr of size @size (in bit) with LLVM IR @value""" - builder = func.builder - int_size = LLVMType.IntType(size) - ptr_casted = builder.inttoptr( - addr, - llvm_ir.PointerType(int_size) - ) - return builder.store(value, ptr_casted) - - -class LLVMFunction(object): - """Represent a LLVM function - - Implementation note: - A new module is created each time to avoid cumulative lag (if @new_module) - """ - - # Default logging values - log_mn = False - log_regs = True - - # Operation translation - ## Basics - op_translate = {'x86_cpuid': 'x86_cpuid', - } - ## Add the size as first argument - op_translate_with_size = {} - ## Add the size as suffix - op_translate_with_suffix_size = { - 'bcdadd': 'bcdadd', - 'bcdadd_cf': 'bcdadd_cf', - } - - def __init__(self, llvm_context, name="fc", new_module=True): - "Create a new function with name @name" - self.llvm_context = llvm_context - if new_module: - self.llvm_context.new_module() - self.mod = self.llvm_context.get_module() - - self.my_args = [] # (Expr, LLVMType, Name) - self.ret_type = None - self.builder = None - self.entry_bbl = None - - self.branch_counter = 0 - self.name = name - self._llvm_mod = None - - # Constructor utils - - def new_branch_name(self): - "Return a new branch name" - self.branch_counter += 1 - return str(self.branch_counter) - - def append_basic_block(self, label, overwrite=True): - """Add a new basic block to the current function. - @label: str or asmlabel - @overwrite: if False, do nothing if a bbl with the same name already exists - Return the corresponding LLVM Basic Block""" - name = self.llvm_context.canonize_label_name(label) - bbl = self.name2bbl.get(name, None) - if not overwrite and bbl is not None: - return bbl - bbl = self.fc.append_basic_block(name) - self.name2bbl[name] = bbl - - return bbl - - def CreateEntryBlockAlloca(self, var_type, default_value=None): - """Create an alloca instruction at the beginning of the current fc - @default_value: if set, store the default_value just after the allocation - """ - builder = self.builder - current_bbl = builder.basic_block - builder.position_at_start(self.entry_bbl) - - ret = builder.alloca(var_type) - if default_value is not None: - builder.store(default_value, ret) - builder.position_at_end(current_bbl) - return ret - - def get_ptr_by_expr(self, expr): - """"Return a pointer casted corresponding to ExprId expr. If it is not - already computed, compute it at the end of entry_bloc""" - - name = expr.name - - ptr_casted = self.local_vars_pointers.get(name, None) - if ptr_casted is not None: - # If the pointer has already been computed - return ptr_casted - - # Get current objects - builder = self.builder - current_bbl = builder.basic_block - - # Go at the right position - entry_bloc_bbl = self.entry_bbl - builder.position_at_end(entry_bloc_bbl) - - # Compute the pointer address - offset = self.llvm_context.vmcpu[name] - - # Pointer cast - ptr = builder.gep( - self.local_vars["vmcpu"], - [ - llvm_ir.Constant( - LLVMType.IntType(), - offset - ) - ] - ) - pointee_type = LLVMType.IntType(expr.size) - ptr_casted = builder.bitcast( - ptr, - llvm_ir.PointerType(pointee_type) - ) - # Store in cache - self.local_vars_pointers[name] = ptr_casted - - # Reset builder - builder.position_at_end(current_bbl) - - return ptr_casted - - def update_cache(self, name, value): - "Add 'name' = 'value' to the cache iff main_stream = True" - - if self.main_stream is True: - self.expr_cache[name] = value - - def set_ret(self, var): - "Cast @var and return it at the end of current bbl" - if var.type.width < 64: - var_casted = self.builder.zext(var, LLVMType.IntType(64)) - else: - var_casted = var - self.builder.ret(var_casted) - - def get_basic_block_by_loc_key(self, loc_key): - "Return the bbl corresponding to label, None otherwise" - return self.name2bbl.get( - self.llvm_context.canonize_label_name(loc_key), - None - ) - - def global_constant(self, name, value): - """ - Inspired from numba/cgutils.py - - Get or create a (LLVM module-)global constant with *name* or *value*. - """ - if name in self.mod.globals: - return self.mod.globals[name] - data = llvm_ir.GlobalVariable(self.mod, value.type, name=name) - data.global_constant = True - data.initializer = value - return data - - def make_bytearray(self, buf): - """ - Inspired from numba/cgutils.py - - Make a byte array constant from *buf*. - """ - b = bytearray(buf) - n = len(b) - return llvm_ir.Constant(llvm_ir.ArrayType(llvm_ir.IntType(8), n), b) - - def printf(self, format, *args): - """ - Inspired from numba/cgutils.py - - Calls printf(). - Argument `format` is expected to be a Python string. - Values to be printed are listed in `args`. - - Note: There is no checking to ensure there is correct number of values - in `args` and there type matches the declaration in the format string. - """ - assert isinstance(format, str) - mod = self.mod - # Make global constant for format string - cstring = llvm_ir.IntType(8).as_pointer() - fmt_bytes = self.make_bytearray((format + '\00').encode('ascii')) - - base_name = "printf_format" - count = 0 - while "%s_%d" % (base_name, count) in self.mod.globals: - count += 1 - global_fmt = self.global_constant( - "%s_%d" % (base_name, count), - fmt_bytes - ) - fnty = llvm_ir.FunctionType( - llvm_ir.IntType(32), - [cstring], - var_arg=True - ) - # Insert printf() - fn = mod.globals.get('printf', None) - if fn is None: - fn = llvm_ir.Function(mod, fnty, name="printf") - # Call - ptr_fmt = self.builder.bitcast(global_fmt, cstring) - return self.builder.call(fn, [ptr_fmt] + list(args)) - - # Effective constructors - - def assign(self, src, dst): - "Assign from LLVM src to M2 dst" - - # Destination - builder = self.builder - - if isinstance(dst, ExprId): - ptr_casted = self.get_ptr_by_expr(dst) - builder.store(src, ptr_casted) - - elif isinstance(dst, ExprMem): - addr = self.add_ir(dst.ptr) - self.llvm_context.memory_write(self, addr, dst.size, src) - else: - raise Exception("UnknownAssignmentType") - - def init_fc(self): - "Init the function" - - # Build type for fc signature - fc_type = llvm_ir.FunctionType( - self.ret_type, - [k[1] for k in self.my_args] - ) - - # Add fc in module - try: - fc = llvm_ir.Function(self.mod, fc_type, name=self.name) - except llvm.LLVMException: - # Overwrite the previous function - previous_fc = self.mod.get_global(self.name) - previous_fc.delete() - fc = self.mod.add_function(fc_type, self.name) - - # Name args - for i, a in enumerate(self.my_args): - fc.args[i].name = a[2] - - # Initialize local variable pool - self.local_vars = {} - self.local_vars_pointers = {} - for i, a in enumerate(self.my_args): - self.local_vars[a[2]] = fc.args[i] - - # Init cache - self.expr_cache = {} - self.main_stream = True - self.name2bbl = {} - - # Function link - self.fc = fc - - # Add a first BasicBlock - self.entry_bbl = self.append_basic_block("entry") - - # Instruction builder - self.builder = llvm_ir.IRBuilder(self.entry_bbl) - - def add_ir(self, expr): - "Add a Miasm2 IR to the last bbl. Return the var created" - - if self.main_stream is True and expr in self.expr_cache: - return self.expr_cache[expr] - - builder = self.builder - - if isinstance(expr, ExprInt): - ret = llvm_ir.Constant(LLVMType.IntType(expr.size), int(expr.arg)) - self.update_cache(expr, ret) - return ret - - if expr.is_loc(): - offset = self.llvm_context.ir_arch.loc_db.get_location_offset( - expr.loc_key - ) - ret = llvm_ir.Constant(LLVMType.IntType(expr.size), offset) - self.update_cache(expr, ret) - return ret - - if isinstance(expr, ExprId): - name = expr.name - try: - # If expr.name is already known (args) - return self.local_vars[name] - except KeyError: - pass - - ptr_casted = self.get_ptr_by_expr(expr) - - var = builder.load(ptr_casted, name) - self.update_cache(expr, var) - return var - - if isinstance(expr, ExprOp): - op = expr.op - - if (op in self.op_translate or - op in self.op_translate_with_size or - op in self.op_translate_with_suffix_size): - args = [self.add_ir(arg) for arg in expr.args] - arg_size = expr.args[0].size - - if op in self.op_translate_with_size: - fc_name = self.op_translate_with_size[op] - arg_size_cst = llvm_ir.Constant(LLVMType.IntType(64), - arg_size) - args = [arg_size_cst] + args - elif op in self.op_translate: - fc_name = self.op_translate[op] - elif op in self.op_translate_with_suffix_size: - fc_name = "%s_%s" % (self.op_translate[op], arg_size) - - fc_ptr = self.mod.get_global(fc_name) - - # Cast args if needed - casted_args = [] - for i, arg in enumerate(args): - if arg.type.width < fc_ptr.args[i].type.width: - casted_args.append( - builder.zext( - arg, - fc_ptr.args[i].type - ) - ) - else: - casted_args.append(arg) - ret = builder.call(fc_ptr, casted_args) - - # Cast ret if needed - ret_size = fc_ptr.return_value.type.width - if ret_size > expr.size: - ret = builder.trunc(ret, LLVMType.IntType(expr.size)) - - self.update_cache(expr, ret) - return ret - - if op == "-": - # Unsupported op '-' with more than 1 arg - assert len(expr.args) == 1 - zero = LLVMType.IntType(expr.size)(0) - ret = builder.sub(zero, self.add_ir(expr.args[0])) - self.update_cache(expr, ret) - return ret - - if op == "parity": - assert len(expr.args) == 1 - arg = self.add_ir(expr.args[0]) - truncated = builder.trunc(arg, LLVMType.IntType(8)) - bitcount = builder.call( - self.mod.get_global("llvm.ctpop.i8"), - [truncated] - ) - ret = builder.not_(builder.trunc(bitcount, LLVMType.IntType(1))) - self.update_cache(expr, ret) - return ret - - if op in ["cntleadzeros", "cnttrailzeros"]: - assert len(expr.args) == 1 - arg = self.add_ir(expr.args[0]) - func_name = { - "cntleadzeros": "ctlz", - "cnttrailzeros": "cttz", - }[op] - func_llvm_name = "llvm.%s.i%d" % (func_name, expr.size) - func_sig = { - func_llvm_name: { - "ret": LLVMType.IntType(expr.size), - "args": [LLVMType.IntType(expr.args[0].size)] - } - } - try: - self.mod.get_global(func_llvm_name) - except KeyError: - self.llvm_context.add_fc(func_sig, readonly=True) - ret = builder.call( - self.mod.get_global(func_llvm_name), - [arg] - ) - self.update_cache(expr, ret) - return ret - - - if op.startswith('zeroExt_'): - arg = expr.args[0] - if expr.size == arg.size: - return arg - new_expr = ExprCompose(arg, ExprInt(0, expr.size - arg.size)) - return self.add_ir(new_expr) - - if op.startswith("signExt_"): - arg = expr.args[0] - add_size = expr.size - arg.size - new_expr = ExprCompose( - arg, - ExprCond( - arg.msb(), - ExprInt(size2mask(add_size), add_size), - ExprInt(0, add_size) - ) - ) - return self.add_ir(new_expr) - - - if op == "segm": - fc_ptr = self.mod.get_global("segm2addr") - - # Cast args if needed - args = [self.add_ir(arg) for arg in expr.args] - casted_args = [] - for i, arg in enumerate(args, 1): - if arg.type.width < fc_ptr.args[i].type.width: - casted_args.append( - builder.zext( - arg, - fc_ptr.args[i].type - ) - ) - else: - casted_args.append(arg) - - ret = builder.call( - fc_ptr, - [self.local_vars["jitcpu"]] + casted_args - ) - if ret.type.width > expr.size: - ret = builder.trunc(ret, LLVMType.IntType(expr.size)) - self.update_cache(expr, ret) - return ret - - if op in ["smod", "sdiv", "umod", "udiv"]: - assert len(expr.args) == 2 - - arg_b = self.add_ir(expr.args[1]) - arg_a = self.add_ir(expr.args[0]) - - if op == "smod": - callback = builder.srem - elif op == "sdiv": - callback = builder.sdiv - elif op == "umod": - callback = builder.urem - elif op == "udiv": - callback = builder.udiv - - ret = callback(arg_a, arg_b) - self.update_cache(expr, ret) - return ret - - unsigned_cmps = { - "==": "==", - ">", "<<", "a>>"]: - assert len(expr.args) == 2 - # Undefined behavior must be enforced to 0 - count = self.add_ir(expr.args[1]) - value = self.add_ir(expr.args[0]) - itype = LLVMType.IntType(expr.size) - cond_ok = self.builder.icmp_unsigned( - "<", - count, - itype(expr.size) - ) - zero = itype(0) - if op == ">>": - callback = builder.lshr - elif op == "<<": - callback = builder.shl - elif op == "a>>": - callback = builder.ashr - # x a>> size is 0 or -1, depending on x sign - cond_neg = self.builder.icmp_signed("<", value, zero) - zero = self.builder.select(cond_neg, itype(-1), zero) - - ret = self.builder.select( - cond_ok, - callback(value, count), - zero - ) - self.update_cache(expr, ret) - return ret - - - if op in ['<<<', '>>>']: - assert len(expr.args) == 2 - # First compute rotation modulus size - count = self.add_ir(expr.args[1]) - value = self.add_ir(expr.args[0]) - itype = LLVMType.IntType(expr.size) - expr_size = itype(expr.size) - - # As shift of expr_size is undefined, we urem the shifters - shift = builder.urem(count, expr_size) - shift_inv = builder.urem( - builder.sub(expr_size, shift), - expr_size - ) - - if op == '<<<': - part_a = builder.shl(value, shift) - part_b = builder.lshr(value, shift_inv) - else: - part_a = builder.lshr(value, shift) - part_b = builder.shl(value, shift_inv) - ret = builder.or_(part_a, part_b) - self.update_cache(expr, ret) - return ret - - if op == "sint_to_fp": - fptype = LLVMType.fptype(expr.size) - arg = self.add_ir(expr.args[0]) - ret = builder.sitofp(arg, fptype) - ret = builder.bitcast(ret, llvm_ir.IntType(expr.size)) - self.update_cache(expr, ret) - return ret - - if op.startswith("fp_to_sint"): - size_arg = expr.args[0].size - fptype_orig = LLVMType.fptype(size_arg) - arg = self.add_ir(expr.args[0]) - arg = builder.bitcast(arg, fptype_orig) - # Enforce IEEE-754 behavior. This could be enhanced with - # 'llvm.experimental.constrained.nearbyint' - if size_arg == 32: - func = self.mod.get_global("llvm.nearbyint.f32") - elif size_arg == 64: - func = self.mod.get_global("llvm.nearbyint.f64") - else: - raise RuntimeError("Unsupported size") - rounded = builder.call(func, [arg]) - ret = builder.fptoui(rounded, llvm_ir.IntType(expr.size)) - self.update_cache(expr, ret) - return ret - - if op.startswith("fpconvert_fp"): - assert len(expr.args) == 1 - size_arg = expr.args[0].size - fptype = LLVMType.fptype(expr.size) - fptype_orig = LLVMType.fptype(size_arg) - arg = self.add_ir(expr.args[0]) - arg = builder.bitcast(arg, fptype_orig) - if expr.size > size_arg: - fc = builder.fpext - elif expr.size < size_arg: - fc = builder.fptrunc - else: - raise RuntimeError("Not supported, same size") - ret = fc(arg, fptype) - ret = builder.bitcast(ret, llvm_ir.IntType(expr.size)) - self.update_cache(expr, ret) - return ret - - if op.startswith("fpround_"): - assert len(expr.args) == 1 - fptype = LLVMType.fptype(expr.size) - arg = self.add_ir(expr.args[0]) - arg = builder.bitcast(arg, fptype) - if op == "fpround_towardszero" and expr.size == 32: - fc = self.mod.get_global("llvm.trunc.f32") - else: - raise RuntimeError("Not supported, same size") - rounded = builder.call(fc, [arg]) - ret = builder.bitcast(rounded, llvm_ir.IntType(expr.size)) - self.update_cache(expr, ret) - return ret - - if op in ["fcom_c0", "fcom_c1", "fcom_c2", "fcom_c3"]: - arg1 = self.add_ir(expr.args[0]) - arg2 = self.add_ir(expr.args[0]) - fc_name = "fpu_%s" % op - fc_ptr = self.mod.get_global(fc_name) - casted_args = [ - builder.bitcast(arg1, llvm_ir.DoubleType()), - builder.bitcast(arg2, llvm_ir.DoubleType()), - ] - ret = builder.call(fc_ptr, casted_args) - - # Cast ret if needed - ret_size = fc_ptr.return_value.type.width - if ret_size > expr.size: - ret = builder.trunc(ret, LLVMType.IntType(expr.size)) - self.update_cache(expr, ret) - return ret - - if op in ["fsqrt", "fabs"]: - arg = self.add_ir(expr.args[0]) - if op == "fsqrt": - op = "sqrt" - - # Apply the correct func - if expr.size == 32: - arg = builder.bitcast(arg, llvm_ir.FloatType()) - ret = builder.call( - self.mod.get_global("llvm.%s.f32" % op), - [arg] - ) - elif expr.size == 64: - arg = builder.bitcast(arg, llvm_ir.DoubleType()) - ret = builder.call( - self.mod.get_global("llvm.%s.f64" % op), - [arg] - ) - else: - raise RuntimeError("Unsupported precision: %x", expr.size) - - ret = builder.bitcast(ret, llvm_ir.IntType(expr.size)) - self.update_cache(expr, ret) - return ret - - if op in ["fadd", "fmul", "fsub", "fdiv"]: - # More than 2 args not yet supported - assert len(expr.args) == 2 - arg1 = self.add_ir(expr.args[0]) - arg2 = self.add_ir(expr.args[1]) - precision = LLVMType.fptype(expr.size) - arg1 = builder.bitcast(arg1, precision) - arg2 = builder.bitcast(arg2, precision) - if op == "fadd": - ret = builder.fadd(arg1, arg2) - elif op == "fmul": - ret = builder.fmul(arg1, arg2) - elif op == "fsub": - ret = builder.fsub(arg1, arg2) - elif op == "fdiv": - ret = builder.fdiv(arg1, arg2) - ret = builder.bitcast(ret, llvm_ir.IntType(expr.size)) - self.update_cache(expr, ret) - return ret - - if op in [ - TOK_EQUAL, - TOK_INF_SIGNED, - TOK_INF_EQUAL_SIGNED, - TOK_INF_UNSIGNED, - TOK_INF_EQUAL_UNSIGNED, - ]: - if op == TOK_EQUAL: - opname = "==" - callback = builder.icmp_unsigned - elif op == TOK_INF_SIGNED: - opname = "<" - callback = builder.icmp_signed - elif op == TOK_INF_UNSIGNED: - opname = "<" - callback = builder.icmp_unsigned - elif op == TOK_INF_EQUAL_SIGNED: - opname = "<=" - callback = builder.icmp_signed - elif op == TOK_INF_EQUAL_UNSIGNED: - opname = "<" - callback = builder.icmp_unsigned - - left = self.add_ir(expr.args[0]) - right = self.add_ir(expr.args[1]) - - ret = callback(opname, left, right) - self.update_cache(expr, ret) - - return ret - - if len(expr.args) > 1: - - if op == "*": - callback = builder.mul - elif op == "+": - callback = builder.add - elif op == "&": - callback = builder.and_ - elif op == "^": - callback = builder.xor - elif op == "|": - callback = builder.or_ - elif op == "%": - callback = builder.urem - elif op == "/": - callback = builder.udiv - else: - raise NotImplementedError('Unknown op: %s' % op) - - last = self.add_ir(expr.args[0]) - - for i in range(1, len(expr.args)): - last = callback(last, - self.add_ir(expr.args[i])) - - self.update_cache(expr, last) - - return last - - raise NotImplementedError() - - if isinstance(expr, ExprMem): - - addr = self.add_ir(expr.ptr) - return self.llvm_context.memory_lookup(self, addr, expr.size) - - if isinstance(expr, ExprCond): - # Compute cond - cond = self.add_ir(expr.cond) - zero_casted = LLVMType.IntType(expr.cond.size)(0) - condition_bool = builder.icmp_unsigned("!=", cond, - zero_casted) - then_value = self.add_ir(expr.src1) - else_value = self.add_ir(expr.src2) - ret = builder.select(condition_bool, then_value, else_value) - - self.update_cache(expr, ret) - return ret - - if isinstance(expr, ExprSlice): - - src = self.add_ir(expr.arg) - - # Remove trailing bits - if expr.start != 0: - to_shr = llvm_ir.Constant( - LLVMType.IntType(expr.arg.size), - expr.start - ) - shred = builder.lshr(src, to_shr) - else: - shred = src - - # Remove leading bits - to_and = llvm_ir.Constant( - LLVMType.IntType(expr.arg.size), - (1 << (expr.stop - expr.start)) - 1 - ) - anded = builder.and_(shred, - to_and) - - # Cast into e.size - ret = builder.trunc( - anded, - LLVMType.IntType(expr.size) - ) - - self.update_cache(expr, ret) - return ret - - if isinstance(expr, ExprCompose): - - args = [] - - # Build each part - for start, src in expr.iter_args(): - # src & size - src = self.add_ir(src) - src_casted = builder.zext( - src, - LLVMType.IntType(expr.size) - ) - to_and = llvm_ir.Constant( - LLVMType.IntType(expr.size), - (1 << src.type.width) - 1 - ) - anded = builder.and_(src_casted, - to_and) - - if (start != 0): - # result << start - to_shl = llvm_ir.Constant( - LLVMType.IntType(expr.size), - start - ) - shled = builder.shl(anded, to_shl) - final = shled - else: - # Optimisation - final = anded - - args.append(final) - - # result = part1 | part2 | ... - last = args[0] - for i in range(1, len(expr.args)): - last = builder.or_(last, args[i]) - - self.update_cache(expr, last) - return last - - raise Exception("UnkownExpression", expr.__class__.__name__) - - # JiT specifics - - def check_memory_exception(self, offset, restricted_exception=False): - """Add a check for memory errors. - @offset: offset of the current exception (int or Instruction) - If restricted_exception, check only for exception which do not - require a pc update, and do not consider automod exception""" - - # VmMngr "get_exception_flag" return's size - size = 64 - t_size = LLVMType.IntType(size) - - # Get exception flag value - # TODO: avoid costly call using a structure deref - builder = self.builder - fc_ptr = self.mod.get_global("get_exception_flag") - exceptionflag = builder.call(fc_ptr, [self.local_vars["vmmngr"]]) - - if restricted_exception is True: - flag = ~m2_csts.EXCEPT_CODE_AUTOMOD & m2_csts.EXCEPT_DO_NOT_UPDATE_PC - m2_flag = llvm_ir.Constant(t_size, flag) - exceptionflag = builder.and_(exceptionflag, m2_flag) - - # Compute cond - zero_casted = llvm_ir.Constant(t_size, 0) - condition_bool = builder.icmp_unsigned( - "!=", - exceptionflag, - zero_casted - ) - - # Create bbls - branch_id = self.new_branch_name() - then_block = self.append_basic_block('then%s' % branch_id) - merge_block = self.append_basic_block('ifcond%s' % branch_id) - - builder.cbranch(condition_bool, then_block, merge_block) - - # Deactivate object caching - current_main_stream = self.main_stream - self.main_stream = False - - # Then Bloc - builder.position_at_end(then_block) - PC = self.llvm_context.PC - if isinstance(offset, int_types): - offset = self.add_ir(ExprInt(offset, PC.size)) - self.assign(offset, PC) - self.assign(self.add_ir(ExprInt(1, 8)), ExprId("status", 32)) - self.set_ret(offset) - - builder.position_at_end(merge_block) - # Reactivate object caching - self.main_stream = current_main_stream - - def check_cpu_exception(self, offset, restricted_exception=False): - """Add a check for CPU errors. - @offset: offset of the current exception (int or Instruction) - If restricted_exception, check only for exception which do not - require a pc update""" - - # Get exception flag value - builder = self.builder - m2_exception_flag = self.llvm_context.ir_arch.arch.regs.exception_flags - t_size = LLVMType.IntType(m2_exception_flag.size) - exceptionflag = self.add_ir(m2_exception_flag) - - # Compute cond - if restricted_exception is True: - flag = m2_csts.EXCEPT_NUM_UPDT_EIP - condition_bool = builder.icmp_unsigned( - ">", - exceptionflag, - llvm_ir.Constant(t_size, flag) - ) - else: - zero_casted = llvm_ir.Constant(t_size, 0) - condition_bool = builder.icmp_unsigned( - "!=", - exceptionflag, - zero_casted - ) - - # Create bbls - branch_id = self.new_branch_name() - then_block = self.append_basic_block('then%s' % branch_id) - merge_block = self.append_basic_block('ifcond%s' % branch_id) - - builder.cbranch(condition_bool, then_block, merge_block) - - # Deactivate object caching - current_main_stream = self.main_stream - self.main_stream = False - - # Then Bloc - builder.position_at_end(then_block) - PC = self.llvm_context.PC - if isinstance(offset, int_types): - offset = self.add_ir(ExprInt(offset, PC.size)) - self.assign(offset, PC) - self.assign(self.add_ir(ExprInt(1, 8)), ExprId("status", 32)) - self.set_ret(offset) - - builder.position_at_end(merge_block) - # Reactivate object caching - self.main_stream = current_main_stream - - def gen_pre_code(self, instr_attrib): - if instr_attrib.log_mn: - loc_db = self.llvm_context.ir_arch.loc_db - self.printf( - "%.8X %s\n" % ( - instr_attrib.instr.offset, - instr_attrib.instr.to_string(loc_db) - ) - ) - - def gen_post_code(self, attributes, pc_value): - if attributes.log_regs: - # Update PC for dump_gpregs - PC = self.llvm_context.PC - t_size = LLVMType.IntType(PC.size) - dst = self.builder.zext(t_size(pc_value), t_size) - self.assign(dst, PC) - - fc_ptr = self.mod.get_global(self.llvm_context.logging_func) - self.builder.call(fc_ptr, [self.local_vars["vmcpu"]]) - - def gen_post_instr_checks(self, attrib, next_instr): - if attrib.mem_read | attrib.mem_write: - fc_ptr = self.mod.get_global("check_memory_breakpoint") - self.builder.call(fc_ptr, [self.local_vars["vmmngr"]]) - fc_ptr = self.mod.get_global("check_invalid_code_blocs") - self.builder.call(fc_ptr, [self.local_vars["vmmngr"]]) - self.check_memory_exception(next_instr, restricted_exception=False) - - if attrib.set_exception: - self.check_cpu_exception(next_instr, restricted_exception=False) - - if attrib.mem_read | attrib.mem_write: - fc_ptr = self.mod.get_global("reset_memory_access") - self.builder.call(fc_ptr, [self.local_vars["vmmngr"]]) - - def expr2cases(self, expr): - """ - Evaluate @expr and return: - - switch value -> dst - - evaluation of the switch value (if any) - """ - - to_eval = expr - dst2case = {} - case2dst = {} - for i, solution in enumerate(possible_values(expr)): - value = solution.value - index = dst2case.get(value, i) - to_eval = to_eval.replace_expr({value: ExprInt(index, value.size)}) - dst2case[value] = index - if value.is_int() or value.is_loc(): - case2dst[i] = value - else: - case2dst[i] = self.add_ir(value) - - - evaluated = self.add_ir(to_eval) - return case2dst, evaluated - - def gen_jump2dst(self, attrib, instr_offsets, dst): - """Generate the code for a jump to @dst with final check for error - - Several cases have to be considered: - - jump to an offset out of the current ASM BBL (JMP 0x11223344) - - jump to an offset inside the current ASM BBL (Go to next instruction) - - jump to an offset back in the current ASM BBL (For max_exec jit - option on self loops) - - jump to a generated IR label, which must be jitted in this same - function (REP MOVSB) - - jump to a computed offset (CALL @32[0x11223344]) - - """ - PC = self.llvm_context.PC - # We are no longer in the main stream, deactivate cache - self.main_stream = False - - offset = None - if isinstance(dst, ExprInt): - offset = int(dst) - loc_key = self.llvm_context.ir_arch.loc_db.get_or_create_offset_location(offset) - dst = ExprLoc(loc_key, dst.size) - - if isinstance(dst, ExprLoc): - loc_key = dst.loc_key - bbl = self.get_basic_block_by_loc_key(loc_key) - offset = self.llvm_context.ir_arch.loc_db.get_location_offset(loc_key) - if bbl is not None: - # "local" jump, inside this function - if offset is None: - # Avoid checks on generated label - self.builder.branch(bbl) - return - - if (offset in instr_offsets and - offset > attrib.instr.offset): - # forward local jump (ie. next instruction) - self.gen_post_code(attrib, offset) - self.gen_post_instr_checks(attrib, offset) - self.builder.branch(bbl) - return - - # reaching this point means a backward local jump, promote it to - # extern - - # "extern" jump on a defined offset, return to the caller - dst = self.add_ir(ExprInt(offset, PC.size)) - - # "extern" jump with a computed value, return to the caller - assert isinstance(dst, (llvm_ir.Instruction, llvm_ir.Value)) - # Cast @dst, if needed - # for instance, x86_32: IRDst is 32 bits, so is @dst; PC is 64 bits - if dst.type.width != PC.size: - dst = self.builder.zext(dst, LLVMType.IntType(PC.size)) - - self.gen_post_code(attrib, offset) - self.assign(dst, PC) - self.gen_post_instr_checks(attrib, dst) - self.assign(self.add_ir(ExprInt(0, 8)), ExprId("status", 32)) - self.set_ret(dst) - - - def gen_irblock(self, instr_attrib, attributes, instr_offsets, irblock): - """ - Generate the code for an @irblock - @instr_attrib: an Attributes instance or the instruction to translate - @attributes: list of Attributes corresponding to irblock assignments - @instr_offsets: offset of all asmblock's instructions - @irblock: an irblock instance - """ - - case2dst = None - case_value = None - instr = instr_attrib.instr - - for index, assignblk in enumerate(irblock): - # Enable cache - self.main_stream = True - self.expr_cache = {} - - # Prefetch memory - for element in assignblk.get_r(mem_read=True): - if isinstance(element, ExprMem): - self.add_ir(element) - - # Evaluate expressions - values = {} - for dst, src in viewitems(assignblk): - if dst == self.llvm_context.ir_arch.IRDst: - case2dst, case_value = self.expr2cases(src) - else: - values[dst] = self.add_ir(src) - - # Check memory access exception - if attributes[index].mem_read: - self.check_memory_exception( - instr.offset, - restricted_exception=True - ) - - # Update the memory - for dst, src in viewitems(values): - if isinstance(dst, ExprMem): - self.assign(src, dst) - - # Check memory write exception - if attributes[index].mem_write: - self.check_memory_exception( - instr.offset, - restricted_exception=True - ) - - # Update registers values - for dst, src in viewitems(values): - if not isinstance(dst, ExprMem): - self.assign(src, dst) - - # Check post assignblk exception flags - if attributes[index].set_exception: - self.check_cpu_exception( - instr.offset, - restricted_exception=True - ) - - # Destination - assert case2dst is not None - if len(case2dst) == 1: - # Avoid switch in this common case - self.gen_jump2dst( - instr_attrib, - instr_offsets, - next(iter(viewvalues(case2dst))) - ) - else: - current_bbl = self.builder.basic_block - - # Gen the out cases - branch_id = self.new_branch_name() - case2bbl = {} - for case, dst in list(viewitems(case2dst)): - name = "switch_%s_%d" % (branch_id, case) - bbl = self.append_basic_block(name) - case2bbl[case] = bbl - self.builder.position_at_start(bbl) - self.gen_jump2dst(instr_attrib, instr_offsets, dst) - - # Jump on the correct output - self.builder.position_at_end(current_bbl) - switch = self.builder.switch(case_value, case2bbl[0]) - for i, bbl in viewitems(case2bbl): - if i == 0: - # Default case is case 0, arbitrary - continue - switch.add_case(i, bbl) - - def gen_bad_block(self, asmblock): - """ - Translate an asm_bad_block into a CPU exception - """ - builder = self.builder - m2_exception_flag = self.llvm_context.ir_arch.arch.regs.exception_flags - t_size = LLVMType.IntType(m2_exception_flag.size) - self.assign( - self.add_ir(ExprInt(1, 8)), - ExprId("status", 32) - ) - self.assign( - t_size(m2_csts.EXCEPT_UNK_MNEMO), - m2_exception_flag - ) - offset = self.llvm_context.ir_arch.loc_db.get_location_offset( - asmblock.loc_key - ) - self.set_ret(LLVMType.IntType(64)(offset)) - - def gen_finalize(self, asmblock, codegen): - """ - In case of delayslot, generate a dummy BBL which return on the computed - IRDst or on next_label - """ - if self.llvm_context.has_delayslot: - next_label = codegen.get_block_post_label(asmblock) - builder = self.builder - - builder.position_at_end(self.get_basic_block_by_loc_key(next_label)) - - # Common code - self.assign(self.add_ir(ExprInt(0, 8)), - ExprId("status", 32)) - - # Check if IRDst has been set - zero_casted = LLVMType.IntType(codegen.delay_slot_set.size)(0) - condition_bool = builder.icmp_unsigned( - "!=", - self.add_ir(codegen.delay_slot_set), - zero_casted - ) - - # Create bbls - branch_id = self.new_branch_name() - then_block = self.append_basic_block('then%s' % branch_id) - else_block = self.append_basic_block('else%s' % branch_id) - - builder.cbranch(condition_bool, then_block, else_block) - - # Deactivate object caching - self.main_stream = False - - # Then Block - builder.position_at_end(then_block) - PC = self.llvm_context.PC - to_ret = self.add_ir(codegen.delay_slot_dst) - self.assign(to_ret, PC) - self.assign(self.add_ir(ExprInt(0, 8)), - ExprId("status", 32)) - self.set_ret(to_ret) - - # Else Block - builder.position_at_end(else_block) - PC = self.llvm_context.PC - next_label_offset = self.llvm_context.ir_arch.loc_db.get_location_offset(next_label) - to_ret = LLVMType.IntType(PC.size)(next_label_offset) - self.assign(to_ret, PC) - self.set_ret(to_ret) - - def from_asmblock(self, asmblock): - """Build the function from an asmblock (asm_block instance). - Prototype : f(i8* jitcpu, i8* vmcpu, i8* vmmngr, i8* status)""" - - # Build function signature - self.my_args.append((ExprId("jitcpu", 32), - llvm_ir.PointerType(LLVMType.IntType(8)), - "jitcpu")) - self.my_args.append((ExprId("vmcpu", 32), - llvm_ir.PointerType(LLVMType.IntType(8)), - "vmcpu")) - self.my_args.append((ExprId("vmmngr", 32), - llvm_ir.PointerType(LLVMType.IntType(8)), - "vmmngr")) - self.my_args.append((ExprId("status", 32), - llvm_ir.PointerType(LLVMType.IntType(8)), - "status")) - ret_size = 64 - - self.ret_type = LLVMType.IntType(ret_size) - - # Initialise the function - self.init_fc() - self.local_vars_pointers["status"] = self.local_vars["status"] - - if isinstance(asmblock, m2_asmblock.AsmBlockBad): - self.gen_bad_block(asmblock) - return - - # Create basic blocks (for label branchs) - entry_bbl, builder = self.entry_bbl, self.builder - for instr in asmblock.lines: - lbl = self.llvm_context.ir_arch.loc_db.get_or_create_offset_location(instr.offset) - self.append_basic_block(lbl) - - # TODO: merge duplicate code with CGen - codegen = self.llvm_context.cgen_class(self.llvm_context.ir_arch) - irblocks_list = codegen.block2assignblks(asmblock) - instr_offsets = [line.offset for line in asmblock.lines] - - # Prepare for delayslot - if self.llvm_context.has_delayslot: - for element in (codegen.delay_slot_dst, codegen.delay_slot_set): - eltype = LLVMType.IntType(element.size) - ptr = self.CreateEntryBlockAlloca( - eltype, - default_value=eltype(0) - ) - self.local_vars_pointers[element.name] = ptr - loc_key = codegen.get_block_post_label(asmblock) - offset = self.llvm_context.ir_arch.loc_db.get_location_offset(loc_key) - instr_offsets.append(offset) - self.append_basic_block(loc_key) - - # Add content - builder.position_at_end(entry_bbl) - - - for instr, irblocks in zip(asmblock.lines, irblocks_list): - instr_attrib, irblocks_attributes = codegen.get_attributes( - instr, - irblocks, - self.log_mn, - self.log_regs - ) - - # Pre-create basic blocks - for irblock in irblocks: - self.append_basic_block(irblock.loc_key, overwrite=False) - - # Generate the corresponding code - for index, irblock in enumerate(irblocks): - new_irblock = self.llvm_context.ir_arch.irbloc_fix_regs_for_mode( - irblock, self.llvm_context.ir_arch.attrib) - - # Set the builder at the beginning of the correct bbl - self.builder.position_at_end(self.get_basic_block_by_loc_key(new_irblock.loc_key)) - - if index == 0: - self.gen_pre_code(instr_attrib) - self.gen_irblock(instr_attrib, irblocks_attributes[index], instr_offsets, new_irblock) - - # Gen finalize (see codegen::CGen) is unrecheable, except with delayslot - self.gen_finalize(asmblock, codegen) - - # Branch entry_bbl on first label - builder.position_at_end(entry_bbl) - first_label_bbl = self.get_basic_block_by_loc_key(asmblock.loc_key) - builder.branch(first_label_bbl) - - - # LLVMFunction manipulation - - def __str__(self): - "Print the llvm IR corresponding to the current module" - return str(self.mod) - - def dot(self): - "Return the CFG of the current function" - return llvm.get_function_cfg(self.fc) - - def as_llvm_mod(self): - """Return a ModuleRef standing for the current function""" - if self._llvm_mod is None: - self._llvm_mod = llvm.parse_assembly(str(self.mod)) - return self._llvm_mod - - def verify(self): - "Verify the module syntax" - return self.as_llvm_mod().verify() - - def get_bytecode(self): - "Return LLVM bitcode corresponding to the current module" - return self.as_llvm_mod().as_bitcode() - - def get_assembly(self): - "Return native assembly corresponding to the current module" - return self.llvm_context.target_machine.emit_assembly(self.as_llvm_mod()) - - def optimise(self): - "Optimise the function in place" - return self.llvm_context.pass_manager.run(self.as_llvm_mod()) - - def __call__(self, *args): - "Eval the function with arguments args" - - e = self.llvm_context.get_execengine() - - genargs = [LLVMType.generic(a) for a in args] - ret = e.run_function(self.fc, genargs) - - return ret.as_int() - - def get_function_pointer(self): - "Return a pointer on the Jitted function" - engine = self.llvm_context.get_execengine() - - # Add the module and make sure it is ready for execution - engine.add_module(self.as_llvm_mod()) - engine.finalize_object() - - return engine.get_function_address(self.fc.name) - - -class LLVMFunction_IRCompilation(LLVMFunction): - """LLVMFunction made for IR export, in conjunction with - LLVMContext_IRCompilation. - - This class offers only the basics, and decision must be made by the class - user on how actual registers, ABI, etc. are reflected - - - Example of use: - >>> context = LLVMContext_IRCompilation() - >>> context.ir_arch = ir - >>> - >>> func = LLVMFunction_IRCompilation(context, name="test") - >>> func.ret_type = llvm_ir.VoidType() - >>> func.init_fc() - >>> - >>> # Insert here function additional inits - >>> XX = func.builder.alloca(...) - >>> func.local_vars_pointers["EAX"] = XX - >>> # - >>> - >>> func.from_ircfg(ircfg) - """ - - def init_fc(self): - super(LLVMFunction_IRCompilation, self).init_fc() - - # Create a global IRDst if not any - IRDst = self.llvm_context.ir_arch.IRDst - if str(IRDst) not in self.mod.globals: - llvm_ir.GlobalVariable(self.mod, LLVMType.IntType(IRDst.size), - name=str(IRDst)) - - # Create an 'exit' basic block, the final leave - self.exit_bbl = self.append_basic_block("exit") - - def gen_jump2dst(self, _attrib, _instr_offsets, dst): - self.main_stream = False - - if isinstance(dst, Expr): - if dst.is_int(): - loc = self.llvm_context.ir_arch.loc_db.getby_offset_create(int(dst)) - dst = ExprLoc(loc, dst.size) - assert dst.is_loc() - bbl = self.get_basic_block_by_loc_key(dst.loc_key) - if bbl is not None: - # "local" jump, inside this function - self.builder.branch(bbl) - return - - # extern jump - dst = self.add_ir(dst) - - # Emulate indirect jump with: - # @IRDst = dst - # goto exit - self.builder.store(dst, self.mod.get_global("IRDst")) - self.builder.branch(self.exit_bbl) - - def gen_irblock(self, irblock): - instr_attrib = Attributes() - attributes = [Attributes() for _ in range(len(irblock.assignblks))] - instr_offsets = None - return super(LLVMFunction_IRCompilation, self).gen_irblock( - instr_attrib, attributes, instr_offsets, irblock - ) - - def from_ircfg(self, ircfg, append_ret=True): - # Create basic blocks - for loc_key, irblock in viewitems(ircfg.blocks): - self.append_basic_block(loc_key) - - # Add IRBlocks - for label, irblock in viewitems(ircfg.blocks): - self.builder.position_at_end(self.get_basic_block_by_loc_key(label)) - self.gen_irblock(irblock) - - # Branch the entry BBL on the IRCFG head - self.builder.position_at_end(self.entry_bbl) - heads = ircfg.heads() - assert len(heads) == 1 - starting_label = list(heads).pop() - self.builder.branch(self.get_basic_block_by_loc_key(starting_label)) - - # Returns with the builder on the exit block - self.builder.position_at_end(self.exit_bbl) - - if append_ret: - self.builder.ret_void() diff --git a/miasm2/jitter/loader/__init__.py b/miasm2/jitter/loader/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/miasm2/jitter/loader/elf.py b/miasm2/jitter/loader/elf.py deleted file mode 100644 index 1044fe73..00000000 --- a/miasm2/jitter/loader/elf.py +++ /dev/null @@ -1,337 +0,0 @@ -import struct -from collections import defaultdict - -from future.utils import viewitems - -from elfesteem import cstruct -from elfesteem import * -import elfesteem.elf as elf_csts - -from miasm2.jitter.csts import * -from miasm2.jitter.loader.utils import canon_libname_libfunc, libimp -from miasm2.core.interval import interval - -import logging - -log = logging.getLogger('loader_elf') -hnd = logging.StreamHandler() -hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) -log.addHandler(hnd) -log.setLevel(logging.CRITICAL) - - -def get_import_address_elf(e): - import2addr = defaultdict(set) - for sh in e.sh: - if not hasattr(sh, 'rel'): - continue - for k, v in viewitems(sh.rel): - import2addr[('xxx', k)].add(v.offset) - return import2addr - - -def preload_elf(vm, e, runtime_lib, patch_vm_imp=True, loc_db=None): - # XXX quick hack - fa = get_import_address_elf(e) - dyn_funcs = {} - for (libname, libfunc), ads in viewitems(fa): - # Quick hack - if a symbol is already known, do not stub it - if loc_db and loc_db.get_name_location(libfunc) is not None: - continue - for ad in ads: - ad_base_lib = runtime_lib.lib_get_add_base(libname) - ad_libfunc = runtime_lib.lib_get_add_func(ad_base_lib, libfunc, ad) - - libname_s = canon_libname_libfunc(libname, libfunc) - dyn_funcs[libname_s] = ad_libfunc - if patch_vm_imp: - log.debug('patch 0x%x 0x%x %s', ad, ad_libfunc, libfunc) - set_endianness = { elf_csts.ELFDATA2MSB: ">", - elf_csts.ELFDATA2LSB: "<", - elf_csts.ELFDATANONE: "" }[e.sex] - vm.set_mem(ad, - struct.pack(set_endianness + - cstruct.size2type[e.size], - ad_libfunc)) - return runtime_lib, dyn_funcs - -def fill_loc_db_with_symbols(elf, loc_db, base_addr=0): - """Parse the elfesteem's ELF @elf to extract symbols, and fill the LocationDB - instance @loc_db with parsed symbols. - - The ELF is considered mapped at @base_addr - @elf: elfesteem's ELF instance - @loc_db: LocationDB used to retrieve symbols'offset - @base_addr: addr to reloc to (if any) - """ - # Get symbol sections - symbol_sections = [] - for section_header in elf.sh: - if hasattr(section_header, 'symbols'): - for name, sym in viewitems(section_header.symbols): - if not name or sym.value == 0: - continue - name = loc_db.find_free_name(name) - loc_db.add_location(name, sym.value, strict=False) - - if hasattr(section_header, 'reltab'): - for rel in section_header.reltab: - if not rel.sym or rel.offset == 0: - continue - name = loc_db.find_free_name(rel.sym) - loc_db.add_location(name, rel.offset, strict=False) - - if hasattr(section_header, 'symtab'): - log.debug("Find %d symbols in %r", len(section_header.symtab), - section_header) - symbol_sections.append(section_header) - elif isinstance(section_header, ( - elf_init.GNUVerDef, elf_init.GNUVerSym, elf_init.GNUVerNeed - )): - log.debug("Find GNU version related section, unsupported for now") - - for section in symbol_sections: - for symbol_entry in section.symtab: - # Here, the computation of vaddr assumes 'elf' is an executable or a - # shared object file - - # For relocatable file, symbol_entry.value is an offset from the section - # base -> not handled here - st_bind = symbol_entry.info >> 4 - st_type = symbol_entry.info & 0xF - - if st_type not in [ - elf_csts.STT_NOTYPE, - elf_csts.STT_OBJECT, - elf_csts.STT_FUNC, - elf_csts.STT_COMMON, - elf_csts.STT_GNU_IFUNC, - ]: - # Ignore symbols useless in linking - continue - - if st_bind == elf_csts.STB_GLOBAL: - # Global symbol - weak = False - elif st_bind == elf_csts.STB_WEAK: - # Weak symbol - weak = True - else: - # Ignore local & others symbols - continue - - absolute = False - if symbol_entry.shndx == 0: - # SHN_UNDEF - continue - elif symbol_entry.shndx == 0xfff1: - # SHN_ABS - absolute = True - log.debug("Absolute symbol %r - %x", symbol_entry.name, - symbol_entry.value) - elif 0xff00 <= symbol_entry.shndx <= 0xffff: - # Reserved index (between SHN_LORESERV and SHN_HIRESERVE) - raise RuntimeError("Unsupported reserved index: %r" % symbol_entry) - - name = symbol_entry.name - if name == "": - # Ignore empty symbol - log.debug("Empty symbol %r", symbol_entry) - continue - - if absolute: - vaddr = symbol_entry.value - else: - vaddr = symbol_entry.value + base_addr - - # 'weak' information is only used to force global symbols for now - already_existing_loc = loc_db.get_name_location(name) - if already_existing_loc is not None: - if weak: - # Weak symbol, this is ok to already exists, skip it - continue - else: - # Global symbol, force it - loc_db.remove_location_name(already_existing_loc, - name) - already_existing_off = loc_db.get_offset_location(vaddr) - if already_existing_off is not None: - loc_db.add_location_name(already_existing_off, name) - else: - loc_db.add_location(name=name, offset=vaddr) - - -def apply_reloc_x86(elf, vm, section, base_addr, loc_db): - """Apply relocation for x86 ELF contained in the section @section - @elf: elfesteem's ELF instance - @vm: VmMngr instance - @section: elf's section containing relocation to perform - @base_addr: addr to reloc to - @loc_db: LocationDB used to retrieve symbols'offset - """ - if elf.size == 64: - addr_writer = lambda vaddr, addr: vm.set_mem(vaddr, - struct.pack("> 32) & 0xFFFFFFFF - r_info_type = r_info & 0xFFFFFFFF - elif elf.size == 32: - r_info_sym = (r_info >> 8) & 0xFFFFFF - r_info_type = r_info & 0xFF - - is_ifunc = False - symbol_entry = None - if r_info_sym > 0: - symbol_entry = symb_section.symtab[r_info_sym] - - r_offset = reloc.offset - r_addend = reloc.cstr.sym - - if (elf.size, reloc.type) in [ - (64, elf_csts.R_X86_64_RELATIVE), - (64, elf_csts.R_X86_64_IRELATIVE), - (32, elf_csts.R_386_RELATIVE), - (32, elf_csts.R_386_IRELATIVE), - ]: - # B + A - addr = base_addr + r_addend - where = base_addr + r_offset - elif reloc.type == elf_csts.R_X86_64_64: - # S + A - addr_symb = loc_db.get_name_offset(symbol_entry.name) - if addr_symb is None: - log.warning("Unable to find symbol %r" % symbol_entry.name) - continue - addr = addr_symb + r_addend - where = base_addr + r_offset - elif (elf.size, reloc.type) in [ - (64, elf_csts.R_X86_64_TPOFF64), - (64, elf_csts.R_X86_64_DTPMOD64), - (32, elf_csts.R_386_TLS_TPOFF), - ]: - # Thread dependent, ignore for now - log.debug("Skip relocation TPOFF64 %r", reloc) - continue - elif (elf.size, reloc.type) in [ - (64, elf_csts.R_X86_64_GLOB_DAT), - (64, elf_csts.R_X86_64_JUMP_SLOT), - (32, elf_csts.R_386_JMP_SLOT), - (32, elf_csts.R_386_GLOB_DAT), - ]: - # S - addr = loc_db.get_name_offset(symbol_entry.name) - if addr is None: - log.warning("Unable to find symbol %r" % symbol_entry.name) - continue - is_ifunc = symbol_entry.info & 0xF == elf_csts.STT_GNU_IFUNC - where = base_addr + r_offset - else: - raise ValueError( - "Unknown relocation type: %d (%r)" % (reloc.type, - reloc) - ) - if is_ifunc: - # Resolve at runtime - not implemented for now - log.warning("Relocation for %r (at %x, currently pointing on %x) " - "has to be resolved at runtime", - name, where, sym_addr) - continue - - log.debug("Write %x at %x", addr, where) - addr_writer(where, addr) - - -def vm_load_elf(vm, fdata, name="", base_addr=0, loc_db=None, apply_reloc=False, - **kargs): - """ - Very dirty elf loader - TODO XXX: implement real loader - """ - elf = elf_init.ELF(fdata, **kargs) - i = interval() - all_data = {} - - for p in elf.ph.phlist: - if p.ph.type != elf_csts.PT_LOAD: - continue - log.debug( - '0x%x 0x%x 0x%x 0x%x 0x%x', p.ph.vaddr, p.ph.memsz, p.ph.offset, - p.ph.filesz, p.ph.type) - data_o = elf._content[p.ph.offset:p.ph.offset + p.ph.filesz] - addr_o = p.ph.vaddr + base_addr - a_addr = addr_o & ~0xFFF - b_addr = addr_o + max(p.ph.memsz, p.ph.filesz) - b_addr = (b_addr + 0xFFF) & ~0xFFF - all_data[addr_o] = data_o - # -2: Trick to avoid merging 2 consecutive pages - i += [(a_addr, b_addr - 2)] - for a, b in i.intervals: - vm.add_memory_page( - a, - PAGE_READ | PAGE_WRITE, - b"\x00" * (b + 2 - a), - repr(name) - ) - - for r_vaddr, data in viewitems(all_data): - vm.set_mem(r_vaddr, data) - - if loc_db is not None: - fill_loc_db_with_symbols(elf, loc_db, base_addr) - - if apply_reloc: - arch = guess_arch(elf) - sections = [] - for section in elf.sh: - if not hasattr(section, 'reltab'): - continue - if isinstance(section, elf_init.RelATable): - pass - elif isinstance(section, elf_init.RelTable): - if arch == "x86_64": - log.warning("REL section should not happen in x86_64") - else: - raise RuntimeError("Unknown relocation section type: %r" % section) - sections.append(section) - for section in sections: - if arch in ["x86_64", "x86_32"]: - apply_reloc_x86(elf, vm, section, base_addr, loc_db) - else: - log.debug("Unsupported relocation for arch %r" % arch) - - return elf - - -class libimp_elf(libimp): - pass - - -# machine, size, sex -> arch_name -ELF_machine = {(elf_csts.EM_ARM, 32, elf_csts.ELFDATA2LSB): "arml", - (elf_csts.EM_ARM, 32, elf_csts.ELFDATA2MSB): "armb", - (elf_csts.EM_AARCH64, 64, elf_csts.ELFDATA2LSB): "aarch64l", - (elf_csts.EM_AARCH64, 64, elf_csts.ELFDATA2MSB): "aarch64b", - (elf_csts.EM_MIPS, 32, elf_csts.ELFDATA2MSB): "mips32b", - (elf_csts.EM_MIPS, 32, elf_csts.ELFDATA2LSB): "mips32l", - (elf_csts.EM_386, 32, elf_csts.ELFDATA2LSB): "x86_32", - (elf_csts.EM_X86_64, 64, elf_csts.ELFDATA2LSB): "x86_64", - (elf_csts.EM_SH, 32, elf_csts.ELFDATA2LSB): "sh4", - (elf_csts.EM_PPC, 32, elf_csts.ELFDATA2MSB): "ppc32b", - } - - -def guess_arch(elf): - """Return the architecture specified by the ELF container @elf. - If unknown, return None""" - return ELF_machine.get((elf.Ehdr.machine, elf.size, elf.sex), None) diff --git a/miasm2/jitter/loader/pe.py b/miasm2/jitter/loader/pe.py deleted file mode 100644 index a8e6ec0d..00000000 --- a/miasm2/jitter/loader/pe.py +++ /dev/null @@ -1,565 +0,0 @@ -from builtins import map -import os -import struct -import logging -from collections import defaultdict - -from future.utils import viewitems, viewvalues - -from elfesteem import pe -from elfesteem import cstruct -from elfesteem import * - -from miasm2.jitter.csts import * -from miasm2.jitter.loader.utils import canon_libname_libfunc, libimp - -log = logging.getLogger('loader_pe') -hnd = logging.StreamHandler() -hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) -log.addHandler(hnd) -log.setLevel(logging.INFO) - - -def get_pe_dependencies(pe_obj): - """Return dependency set - @pe_obj: pe object""" - - if pe_obj.DirImport.impdesc is None: - return set() - out = set() - for dependency in pe_obj.DirImport.impdesc: - libname = dependency.dlldescname.name.lower() - out.add(libname) - return out - - -def get_import_address_pe(e): - import2addr = defaultdict(set) - if e.DirImport.impdesc is None: - return import2addr - for s in e.DirImport.impdesc: - # fthunk = e.rva2virt(s.firstthunk) - # l = "%2d %-25s %s" % (i, repr(s.dlldescname), repr(s)) - libname = s.dlldescname.name.lower() - for ii, imp in enumerate(s.impbynames): - if isinstance(imp, pe.ImportByName): - funcname = imp.name - else: - funcname = imp - # l = " %2d %-16s" % (ii, repr(funcname)) - import2addr[(libname, funcname)].add( - e.rva2virt(s.firstthunk + (e._wsize * ii) // 8) - ) - return import2addr - - -def preload_pe(vm, e, runtime_lib, patch_vm_imp=True): - fa = get_import_address_pe(e) - dyn_funcs = {} - # log.debug('imported funcs: %s' % fa) - for (libname, libfunc), ads in viewitems(fa): - for ad in ads: - ad_base_lib = runtime_lib.lib_get_add_base(libname) - ad_libfunc = runtime_lib.lib_get_add_func(ad_base_lib, libfunc, ad) - - libname_s = canon_libname_libfunc(libname, libfunc) - dyn_funcs[libname_s] = ad_libfunc - if patch_vm_imp: - vm.set_mem( - ad, struct.pack(cstruct.size2type[e._wsize], ad_libfunc)) - return dyn_funcs - - -def is_redirected_export(pe_obj, addr): - """Test if the @addr is a forwarded export address. If so, return - dllname/function name couple. If not, return False. - - An export address is a forwarded export if the rva is in the export - directory of the pe. - - @pe_obj: PE instance - @addr: virtual address of the function to test - """ - - export_dir = pe_obj.NThdr.optentries[pe.DIRECTORY_ENTRY_EXPORT] - addr_rva = pe_obj.virt2rva(addr) - if not (export_dir.rva <= addr_rva < export_dir.rva + export_dir.size): - return False - addr_end = pe_obj.virt.find(b'\x00', addr) - data = pe_obj.virt.get(addr, addr_end) - - dllname, func_info = data.split('.', 1) - dllname = dllname.lower() - - # Test if function is forwarded using ordinal - if func_info.startswith('#'): - func_info = int(func_info[1:]) - return dllname, func_info - - -def get_export_name_addr_list(e): - out = [] - # add func name - for i, n in enumerate(e.DirExport.f_names): - addr = e.DirExport.f_address[e.DirExport.f_nameordinals[i].ordinal] - f_name = n.name.name - # log.debug('%s %s' % (f_name, hex(e.rva2virt(addr.rva)))) - out.append((f_name, e.rva2virt(addr.rva))) - - # add func ordinal - for i, o in enumerate(e.DirExport.f_nameordinals): - addr = e.DirExport.f_address[o.ordinal] - # log.debug('%s %s %s' % (o.ordinal, e.DirExport.expdesc.base, - # hex(e.rva2virt(addr.rva)))) - out.append( - (o.ordinal + e.DirExport.expdesc.base, e.rva2virt(addr.rva))) - - for i, s in enumerate(e.DirExport.f_address): - if not s.rva: - continue - out.append((i + e.DirExport.expdesc.base, e.rva2virt(s.rva))) - - return out - - -def vm_load_pe(vm, fdata, align_s=True, load_hdr=True, name="", **kargs): - """Load a PE in memory (@vm) from a data buffer @fdata - @vm: VmMngr instance - @fdata: data buffer to parse - @align_s: (optional) If False, keep gaps between section - @load_hdr: (optional) If False, do not load the NThdr in memory - Return the corresponding PE instance. - - Extra arguments are passed to PE instantiation. - If all sections are aligned, they will be mapped on several different pages - Otherwise, a big page is created, containing all sections - """ - - # Parse and build a PE instance - pe = pe_init.PE(fdata, **kargs) - - # Check if all section are aligned - aligned = True - for section in pe.SHList: - if section.addr & 0xFFF: - aligned = False - break - - if aligned: - # Loader NT header - if load_hdr: - # Header length - hdr_len = max(0x200, pe.NThdr.sizeofheaders) - # Page minimum size - min_len = min(pe.SHList[0].addr, 0x1000) - - # Get and pad the pe_hdr - pe_hdr = ( - pe.content[:hdr_len] + - max(0, (min_len - hdr_len)) * b"\x00" - ) - vm.add_memory_page( - pe.NThdr.ImageBase, - PAGE_READ | PAGE_WRITE, - pe_hdr, - "%r: PE Header" % name - ) - - # Align sections size - if align_s: - # Use the next section address to compute the new size - for i, section in enumerate(pe.SHList[:-1]): - new_size = pe.SHList[i + 1].addr - section.addr - section.size = new_size - section.rawsize = new_size - section.data = strpatchwork.StrPatchwork( - section.data[:new_size] - ) - section.offset = section.addr - - # Last section alignment - last_section = pe.SHList[-1] - last_section.size = (last_section.size + 0xfff) & 0xfffff000 - - # Pad sections with null bytes and map them - for section in pe.SHList: - data = bytes(section.data) - data += b"\x00" * (section.size - len(data)) - attrib = PAGE_READ - if section.flags & 0x80000000: - attrib |= PAGE_WRITE - vm.add_memory_page( - pe.rva2virt(section.addr), - attrib, - data, - "%r: %r" % (name, section.name) - ) - - return pe - - # At least one section is not aligned - log.warning('PE is not aligned, creating big section') - min_addr = 0 if load_hdr else None - max_addr = None - data = "" - - for i, section in enumerate(pe.SHList): - if i < len(pe.SHList) - 1: - # If it is not the last section, use next section address - section.size = pe.SHList[i + 1].addr - section.addr - section.rawsize = section.size - section.offset = section.addr - - # Update min and max addresses - if min_addr is None or section.addr < min_addr: - min_addr = section.addr - max_section_len = max(section.size, len(section.data)) - if max_addr is None or section.addr + max_section_len > max_addr: - max_addr = section.addr + max_section_len - - min_addr = pe.rva2virt(min_addr) - max_addr = pe.rva2virt(max_addr) - log.debug('Min: 0x%x, Max: 0x%x, Size: 0x%x', min_addr, max_addr, - (max_addr - min_addr)) - - # Create only one big section containing the whole PE - vm.add_memory_page( - min_addr, - PAGE_READ | PAGE_WRITE, - (max_addr - min_addr) * b"\x00" - ) - - # Copy each sections content in memory - for section in pe.SHList: - log.debug('Map 0x%x bytes to 0x%x', len(section.data), - pe.rva2virt(section.addr)) - vm.set_mem(pe.rva2virt(section.addr), bytes(section.data)) - - return pe - - -def vm_load_pe_lib(vm, fname_in, libs, lib_path_base, **kargs): - """Call vm_load_pe on @fname_in and update @libs accordingly - @vm: VmMngr instance - @fname_in: library name - @libs: libimp_pe instance - @lib_path_base: DLLs relative path - Return the corresponding PE instance - Extra arguments are passed to vm_load_pe - """ - - log.info('Loading module %r', fname_in) - - fname = os.path.join(lib_path_base, fname_in) - with open(fname, "rb") as fstream: - pe = vm_load_pe(vm, fstream.read(), name=fname_in, **kargs) - libs.add_export_lib(pe, fname_in) - return pe - - -def vm_load_pe_libs(vm, libs_name, libs, lib_path_base, **kargs): - """Call vm_load_pe_lib on each @libs_name filename - @vm: VmMngr instance - @libs_name: list of str - @libs: libimp_pe instance - @lib_path_base: (optional) DLLs relative path - Return a dictionary Filename -> PE instances - Extra arguments are passed to vm_load_pe_lib - """ - return {fname: vm_load_pe_lib(vm, fname, libs, lib_path_base, **kargs) - for fname in libs_name} - - -def vm_fix_imports_pe_libs(lib_imgs, libs, lib_path_base, - patch_vm_imp=True, **kargs): - for e in viewvalues(lib_imgs): - preload_pe(e, libs, patch_vm_imp) - - -def vm2pe(myjit, fname, libs=None, e_orig=None, - min_addr=None, max_addr=None, - min_section_offset=0x1000, img_base=None, - added_funcs=None, **kwargs): - if e_orig: - size = e_orig._wsize - else: - size = 32 - mye = pe_init.PE(wsize=size) - - if min_addr is None and e_orig is not None: - min_addr = min([e_orig.rva2virt(s.addr) for s in e_orig.SHList]) - if max_addr is None and e_orig is not None: - max_addr = max([e_orig.rva2virt(s.addr + s.size) - for s in e_orig.SHList]) - - if img_base is None: - img_base = e_orig.NThdr.ImageBase - - mye.NThdr.ImageBase = img_base - all_mem = myjit.vm.get_all_memory() - addrs = list(all_mem) - addrs.sort() - mye.Opthdr.AddressOfEntryPoint = mye.virt2rva(myjit.pc) - first = True - for ad in addrs: - if not min_addr <= ad < max_addr: - continue - log.debug("0x%x", ad) - if first: - mye.SHList.add_section( - "%.8X" % ad, - addr=ad - mye.NThdr.ImageBase, - data=all_mem[ad]['data'], - offset=min_section_offset) - else: - mye.SHList.add_section( - "%.8X" % ad, - addr=ad - mye.NThdr.ImageBase, - data=all_mem[ad]['data']) - first = False - if libs: - if added_funcs is not None: - for addr, funcaddr in added_funcs: - libbase, dllname = libs.fad2info[funcaddr] - libs.lib_get_add_func(libbase, dllname, addr) - - filter_import = kwargs.get( - 'filter_import', lambda _, ad: mye.virt.is_addr_in(ad)) - new_dll = libs.gen_new_lib(mye, filter_import) - else: - new_dll = {} - - log.debug('%s', new_dll) - - mye.DirImport.add_dlldesc(new_dll) - s_imp = mye.SHList.add_section("import", rawsize=len(mye.DirImport)) - mye.DirImport.set_rva(s_imp.addr) - log.debug('%r', mye.SHList) - if e_orig: - # resource - xx = bytes(mye) - mye.content = xx - ad = e_orig.NThdr.optentries[pe.DIRECTORY_ENTRY_RESOURCE].rva - size = e_orig.NThdr.optentries[pe.DIRECTORY_ENTRY_RESOURCE].size - log.debug('dirres 0x%x', ad) - if ad != 0: - mye.NThdr.optentries[pe.DIRECTORY_ENTRY_RESOURCE].rva = ad - mye.NThdr.optentries[pe.DIRECTORY_ENTRY_RESOURCE].size = size - mye.DirRes = pe.DirRes.unpack(mye.img_rva, ad, mye) - log.debug('%r', mye.DirRes) - s_res = mye.SHList.add_section( - name="myres", - rawsize=len(mye.DirRes) - ) - mye.DirRes.set_rva(s_res.addr) - # generation - open(fname, 'wb').write(bytes(mye)) - return mye - - -class libimp_pe(libimp): - - def __init__(self, *args, **kwargs): - super(libimp_pe, self).__init__(*args, **kwargs) - # dependency -> redirector - self.created_redirected_imports = {} - - def add_export_lib(self, e, name): - if name in self.created_redirected_imports: - log.error("%r has previously been created due to redirect\ - imports due to %r. Change the loading order.", - name, self.created_redirected_imports[name]) - raise RuntimeError('Bad import: loading previously created import') - - self.all_exported_lib.append(e) - # will add real lib addresses to database - if name in self.name2off: - ad = self.name2off[name] - if e is not None and name in self.fake_libs: - log.error( - "You are trying to load %r but it has been faked previously. Try loading this module earlier.", name) - raise RuntimeError("Bad import") - else: - log.debug('new lib %s', name) - ad = e.NThdr.ImageBase - libad = ad - self.name2off[name] = ad - self.libbase2lastad[ad] = ad + 0x1 - self.lib_imp2ad[ad] = {} - self.lib_imp2dstad[ad] = {} - self.libbase_ad += 0x1000 - - ads = get_export_name_addr_list(e) - todo = ads - # done = [] - while todo: - # for imp_ord_or_name, ad in ads: - imp_ord_or_name, ad = todo.pop() - - # if export is a redirection, search redirected dll - # and get function real addr - ret = is_redirected_export(e, ad) - if ret: - exp_dname, exp_fname = ret - exp_dname = exp_dname + '.dll' - exp_dname = exp_dname.lower() - # if dll auto refes in redirection - if exp_dname == name: - libad_tmp = self.name2off[exp_dname] - if not exp_fname in self.lib_imp2ad[libad_tmp]: - # schedule func - todo = [(imp_ord_or_name, ad)] + todo - continue - else: - # import redirected lib from non loaded dll - if not exp_dname in self.name2off: - self.created_redirected_imports.setdefault( - exp_dname, set()).add(name) - - # Ensure import entry is created - new_lib_base = self.lib_get_add_base(exp_dname) - # Ensure function entry is created - _ = self.lib_get_add_func(new_lib_base, exp_fname) - - libad_tmp = self.name2off[exp_dname] - ad = self.lib_imp2ad[libad_tmp][exp_fname] - - self.lib_imp2ad[libad][imp_ord_or_name] = ad - name_inv = dict( - (value, key) for key, value in viewitems(self.name2off) - ) - c_name = canon_libname_libfunc( - name_inv[libad], imp_ord_or_name) - self.fad2cname[ad] = c_name - self.cname2addr[c_name] = ad - log.debug("Add func %s %s", hex(ad), c_name) - self.fad2info[ad] = libad, imp_ord_or_name - - def gen_new_lib(self, target_pe, filter_import=lambda peobj, ad: True, **kwargs): - """Gen a new DirImport description - @target_pe: PE instance - @filter_import: (boolean f(pe, address)) restrict addresses to keep - """ - - new_lib = [] - for lib_name, ad in viewitems(self.name2off): - # Build an IMAGE_IMPORT_DESCRIPTOR - - # Get fixed addresses - out_ads = dict() # addr -> func_name - for func_name, dst_addresses in viewitems(self.lib_imp2dstad[ad]): - out_ads.update({addr: func_name for addr in dst_addresses}) - - # Filter available addresses according to @filter_import - all_ads = [ - addr for addr in list(out_ads) if filter_import(target_pe, addr) - ] - - if not all_ads: - continue - - # Keep non-NULL elements - all_ads.sort(key=str) - for i, x in enumerate(all_ads): - if x not in [0, None]: - break - all_ads = all_ads[i:] - log.debug('ads: %s', list(map(hex, all_ads))) - - while all_ads: - # Find libname's Import Address Table - othunk = all_ads[0] - i = 0 - while (i + 1 < len(all_ads) and - all_ads[i] + target_pe._wsize // 8 == all_ads[i + 1]): - i += 1 - # 'i + 1' is IAT's length - - # Effectively build an IMAGE_IMPORT_DESCRIPTOR - funcs = [out_ads[addr] for addr in all_ads[:i + 1]] - try: - rva = target_pe.virt2rva(othunk) - except pe.InvalidOffset: - pass - else: - new_lib.append(({"name": lib_name, - "firstthunk": rva}, - funcs) - ) - - # Update elements to handle - all_ads = all_ads[i + 1:] - - return new_lib - - -def vm_load_pe_and_dependencies(vm, fname, name2module, runtime_lib, - lib_path_base, **kwargs): - """Load a binary and all its dependencies. Returns a dictionary containing - the association between binaries names and it's pe object - - @vm: virtual memory manager instance - @fname: full path of the binary - @name2module: dict containing association between name and pe - object. Updated. - @runtime_lib: libimp instance - @lib_path_base: directory of the libraries containing dependencies - - """ - - todo = [(fname, fname, 0)] - weight2name = {} - done = set() - - # Walk dependencies recursively - while todo: - name, fname, weight = todo.pop() - if name in done: - continue - done.add(name) - weight2name.setdefault(weight, set()).add(name) - if name in name2module: - pe_obj = name2module[name] - else: - try: - with open(fname, "rb") as fstream: - log.info('Loading module name %r', fname) - pe_obj = vm_load_pe( - vm, fstream.read(), name=fname, **kwargs) - except IOError: - log.error('Cannot open %s' % fname) - name2module[name] = None - continue - name2module[name] = pe_obj - - new_dependencies = get_pe_dependencies(pe_obj) - todo += [(name, os.path.join(lib_path_base, name), weight - 1) - for name in new_dependencies] - - ordered_modules = sorted(viewitems(weight2name)) - for _, modules in ordered_modules: - for name in modules: - pe_obj = name2module[name] - if pe_obj is None: - continue - # Fix imports - if pe_obj.DirExport: - runtime_lib.add_export_lib(pe_obj, name) - - for pe_obj in viewvalues(name2module): - if pe_obj is None: - continue - preload_pe(vm, pe_obj, runtime_lib, patch_vm_imp=True) - - return name2module - -# machine -> arch -PE_machine = {0x14c: "x86_32", - 0x8664: "x86_64", - } - - -def guess_arch(pe): - """Return the architecture specified by the PE container @pe. - If unknown, return None""" - return PE_machine.get(pe.Coffhdr.machine, None) diff --git a/miasm2/jitter/loader/utils.py b/miasm2/jitter/loader/utils.py deleted file mode 100644 index 80e19310..00000000 --- a/miasm2/jitter/loader/utils.py +++ /dev/null @@ -1,100 +0,0 @@ -from builtins import int as int_types -import logging - -from future.utils import viewitems, viewvalues - -from miasm2.core.utils import force_bytes - -log = logging.getLogger('loader_common') -hnd = logging.StreamHandler() -hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) -log.addHandler(hnd) -log.setLevel(logging.INFO) - - -def canon_libname_libfunc(libname, libfunc): - libname = force_bytes(libname) - dn = libname.split(b'.')[0] - if isinstance(libfunc, int_types): - return str(dn), libfunc - else: - libfunc = force_bytes(libfunc) - return b"%s_%s" % (dn, libfunc) - - -class libimp(object): - - def __init__(self, lib_base_ad=0x71111000, **kargs): - self.name2off = {} - self.libbase2lastad = {} - self.libbase_ad = lib_base_ad - self.lib_imp2ad = {} - self.lib_imp2dstad = {} - self.fad2cname = {} - self.cname2addr = {} - self.fad2info = {} - self.all_exported_lib = [] - self.fake_libs = set() - - def lib_get_add_base(self, name): - name = force_bytes(name) - name = name.lower().strip(b' ') - if not b"." in name: - log.debug('warning adding .dll to modulename') - name += b'.dll' - log.debug(name) - - if name in self.name2off: - ad = self.name2off[name] - else: - ad = self.libbase_ad - log.warning("Create dummy entry for %r", name) - self.fake_libs.add(name) - self.name2off[name] = ad - self.libbase2lastad[ad] = ad + 0x4 - self.lib_imp2ad[ad] = {} - self.lib_imp2dstad[ad] = {} - self.libbase_ad += 0x1000 - return ad - - def lib_get_add_func(self, libad, imp_ord_or_name, dst_ad=None): - if not libad in viewvalues(self.name2off): - raise ValueError('unknown lib base!', hex(libad)) - - # test if not ordinatl - # if imp_ord_or_name >0x10000: - # imp_ord_or_name = vm_get_str(imp_ord_or_name, 0x100) - # imp_ord_or_name = imp_ord_or_name[:imp_ord_or_name.find('\x00')] - - #/!\ can have multiple dst ad - if not imp_ord_or_name in self.lib_imp2dstad[libad]: - self.lib_imp2dstad[libad][imp_ord_or_name] = set() - self.lib_imp2dstad[libad][imp_ord_or_name].add(dst_ad) - - if imp_ord_or_name in self.lib_imp2ad[libad]: - return self.lib_imp2ad[libad][imp_ord_or_name] - # log.debug('new imp %s %s' % (imp_ord_or_name, dst_ad)) - ad = self.libbase2lastad[libad] - self.libbase2lastad[libad] += 0x10 # arbitrary - self.lib_imp2ad[libad][imp_ord_or_name] = ad - - name_inv = dict( - (value, key) for key, value in viewitems(self.name2off) - ) - c_name = canon_libname_libfunc(name_inv[libad], imp_ord_or_name) - self.fad2cname[ad] = c_name - self.cname2addr[c_name] = ad - self.fad2info[ad] = libad, imp_ord_or_name - return ad - - def check_dst_ad(self): - for ad in self.lib_imp2dstad: - all_ads = sorted(viewvalues(self.lib_imp2dstad[ad])) - for i, x in enumerate(all_ads[:-1]): - if x is None or all_ads[i + 1] is None: - return False - if x + 4 != all_ads[i + 1]: - return False - return True - - diff --git a/miasm2/jitter/op_semantics.c b/miasm2/jitter/op_semantics.c deleted file mode 100644 index 46e6cca1..00000000 --- a/miasm2/jitter/op_semantics.c +++ /dev/null @@ -1,749 +0,0 @@ -#include -#include -#include -#include -#include -#include "op_semantics.h" - -const uint8_t parity_table[256] = { - CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, - 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, - 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, - CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, - 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, - CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, - CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, - 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, - 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, - CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, - CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, - 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, - CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, - 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, - 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, - CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, - 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, - CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, - CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, - 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, - CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, - 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, - 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, - CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, - CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, - 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, - 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, - CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, - 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, - CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, - CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, - 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, -}; - -uint16_t bcdadd_16(uint16_t a, uint16_t b) -{ - int carry = 0; - int i,j = 0; - uint16_t res = 0; - int nib_a, nib_b; - for (i = 0; i < 16; i += 4) { - nib_a = (a >> i) & (0xF); - nib_b = (b >> i) & (0xF); - - j = (carry + nib_a + nib_b); - if (j >= 10) { - carry = 1; - j -= 10; - j &=0xf; - } - else { - carry = 0; - } - res += j << i; - } - return res; -} - -uint16_t bcdadd_cf_16(uint16_t a, uint16_t b) -{ - int carry = 0; - int i,j = 0; - int nib_a, nib_b; - for (i = 0; i < 16; i += 4) { - nib_a = (a >> i) & (0xF); - nib_b = (b >> i) & (0xF); - - j = (carry + nib_a + nib_b); - if (j >= 10) { - carry = 1; - j -= 10; - j &=0xf; - } - else { - carry = 0; - } - } - return carry; -} - -unsigned int mul_lo_op(unsigned int size, unsigned int a, unsigned int b) -{ - unsigned int mask; - - switch (size) { - case 8: mask = 0xff; break; - case 16: mask = 0xffff; break; - case 32: mask = 0xffffffff; break; - default: fprintf(stderr, "inv size in mul %d\n", size); exit(EXIT_FAILURE); - } - - a &= mask; - b &= mask; - return ((int64_t)a * (int64_t) b) & mask; -} - -unsigned int mul_hi_op(unsigned int size, unsigned int a, unsigned int b) -{ - uint64_t res = 0; - unsigned int mask; - - switch (size) { - case 8: mask = 0xff; break; - case 16: mask = 0xffff; break; - case 32: mask = 0xffffffff; break; - default: fprintf(stderr, "inv size in mul %d\n", size); exit(EXIT_FAILURE); - } - - a &= mask; - b &= mask; - res = ((uint64_t)a * (uint64_t)b); - return (res >> 32) & mask; -} - - -unsigned int imul_lo_op_08(char a, char b) -{ - return a*b; -} - -unsigned int imul_lo_op_16(short a, short b) -{ - return a*b; -} - -unsigned int imul_lo_op_32(int a, int b) -{ - return a*b; -} - -int imul_hi_op_08(char a, char b) -{ - int64_t res = 0; - res = a*b; - return (int)(res>>8); -} - -int imul_hi_op_16(short a, short b) -{ - int64_t res = 0; - res = a*b; - return (int)(res>>16); -} - -int imul_hi_op_32(int a, int b) -{ - int64_t res = 0; - res = (int64_t)a*(int64_t)b; - return (int)(res>>32ULL); -} - -unsigned int umul16_lo(unsigned short a, unsigned short b) -{ - return (a*b) & 0xffff; -} - -unsigned int umul16_hi(unsigned short a, unsigned short b) -{ - uint32_t c; - c = a*b; - return (c>>16) & 0xffff; -} - -uint64_t rot_left(uint64_t size, uint64_t a, uint64_t b) -{ - uint64_t tmp; - - b = b & 0x3F; - b %= size; - switch(size){ - case 8: - tmp = (a << b) | ((a & 0xFF) >> (size - b)); - return tmp & 0xFF; - case 16: - tmp = (a << b) | ((a & 0xFFFF) >> (size - b)); - return tmp & 0xFFFF; - case 32: - tmp = (a << b) | ((a & 0xFFFFFFFF) >> (size - b)); - return tmp & 0xFFFFFFFF; - case 64: - tmp = (a << b) | ((a&0xFFFFFFFFFFFFFFFF) >> (size - b)); - return tmp & 0xFFFFFFFFFFFFFFFF; - - /* Support cases for rcl */ - case 9: - tmp = (a << b) | ((a & 0x1FF) >> (size - b)); - return tmp & 0x1FF; - case 17: - tmp = (a << b) | ((a & 0x1FFFF) >> (size - b)); - return tmp & 0x1FFFF; - case 33: - tmp = (a << b) | ((a & 0x1FFFFFFFF) >> (size - b)); - return tmp & 0x1FFFFFFFF; - /* TODO XXX: support rcl in 64 bit mode */ - - default: - fprintf(stderr, "inv size in rotleft %"PRIX64"\n", size); - exit(EXIT_FAILURE); - } -} - -uint64_t rot_right(uint64_t size, uint64_t a, uint64_t b) -{ - uint64_t tmp; - - b = b & 0x3F; - b %= size; - switch(size){ - case 8: - tmp = ((a & 0xFF) >> b) | (a << (size - b)); - return tmp & 0xff; - case 16: - tmp = ((a & 0xFFFF) >> b) | (a << (size - b)); - return tmp & 0xFFFF; - case 32: - tmp = ((a & 0xFFFFFFFF) >> b) | (a << (size - b)); - return tmp & 0xFFFFFFFF; - case 64: - tmp = ((a & 0xFFFFFFFFFFFFFFFF) >> b) | (a << (size - b)); - return tmp & 0xFFFFFFFFFFFFFFFF; - - /* Support cases for rcr */ - case 9: - tmp = ((a & 0x1FF) >> b) | (a << (size - b)); - return tmp & 0x1FF; - case 17: - tmp = ((a & 0x1FFFF) >> b) | (a << (size - b)); - return tmp & 0x1FFFF; - case 33: - tmp = ((a & 0x1FFFFFFFF) >> b) | (a << (size - b)); - return tmp & 0x1FFFFFFFF; - /* TODO XXX: support rcr in 64 bit mode */ - - default: - fprintf(stderr, "inv size in rotright %"PRIX64"\n", size); - exit(EXIT_FAILURE); - } -} - -/* - * Count leading zeros - count the number of zero starting at the most - * significant bit - * - * Example: - * - cntleadzeros(size=32, src=2): 30 - * - cntleadzeros(size=32, src=0): 32 - */ -uint64_t cntleadzeros(uint64_t size, uint64_t src) -{ - int64_t i; - - for (i=(int64_t)size-1; i>=0; i--){ - if (src & (1ull << i)) - return (uint64_t)(size - (i + 1)); - } - return (uint64_t)size; -} - -/* - * Count trailing zeros - count the number of zero starting at the least - * significant bit - * - * Example: - * - cnttrailzeros(size=32, src=2): 1 - * - cnttrailzeros(size=32, src=0): 32 - */ -unsigned int cnttrailzeros(uint64_t size, uint64_t src) -{ - uint64_t i; - for (i=0; i3){ - fprintf(stderr, "not implemented x86_cpuid reg %x\n", reg_num); - exit(EXIT_FAILURE); - } - // cases are output: EAX: 0; EBX: 1; ECX: 2; EDX: 3 - if (a == 0){ - switch(reg_num){ - case 0: - return 0xa; - // "GenuineIntel" - case 1: - return 0x756E6547; - case 2: - return 0x6C65746E; - case 3: - return 0x49656E69; - } - } - - else if (a == 1){ - switch(reg_num){ - case 0: - // Using a version too high will enable recent - // instruction set - return 0x000006FB; - //return 0x00020652; - case 1: - //return 0x02040800; - return 0x00000800; - case 2: - //return 0x0004E3BD; - return 0x00000209; - case 3: - return (/* fpu */ 1 << 0) | - (/* tsc */ 1 << 4) | - (/* cx8 */ 1 << 8) | - (/* cmov */ 1 << 15) | - (/* mmx */ 1 << 23) | - (/* sse */ 1 << 25) | - (/* sse2 */ 1 << 26) | - (/* ia64 */ 1 << 30); - } - } - // Cache and TLB - else if (a == 2){ - switch(reg_num){ - case 0: - return 0x00000000; - case 1: - return 0x00000000; - case 2: - return 0x00000000; - case 3: - return 0x00000000; - } - } - // Intel thread/core and cache topology - else if (a == 4){ - switch(reg_num){ - case 0: - return 0x00000000; - case 1: - return 0x00000000; - case 2: - return 0x00000000; - case 3: - return 0x00000000; - } - } - // Extended features - else if (a == 7){ - switch(reg_num){ - case 0: - return 0x00000000; - case 1: - return (/* fsgsbase */ 1 << 0) | (/* bmi1 */ 1 << 3); - case 2: - return 0x00000000; - case 3: - return 0x00000000; - } - } - else{ - fprintf(stderr, "WARNING not implemented x86_cpuid index %X!\n", a); - exit(EXIT_FAILURE); - } - return 0; -} - -//#define DEBUG_MIASM_DOUBLE - -void dump_float(void) -{ - /* - printf("%e\n", vmmngr.float_st0); - printf("%e\n", vmmngr.float_st1); - printf("%e\n", vmmngr.float_st2); - printf("%e\n", vmmngr.float_st3); - printf("%e\n", vmmngr.float_st4); - printf("%e\n", vmmngr.float_st5); - printf("%e\n", vmmngr.float_st6); - printf("%e\n", vmmngr.float_st7); - */ -} - -uint32_t fpu_fadd32(uint32_t a, uint32_t b) -{ - float c; - c = *((float*)&a) + *((float*)&b); -#ifdef DEBUG_MIASM_DOUBLE - dump_float(); - printf("%e + %e -> %e\n", a, b, c); -#endif - return *((uint32_t*)&c); -} - -uint64_t fpu_fadd64(uint64_t a, uint64_t b) -{ - double c; - c = *((double*)&a) + *((double*)&b); -#ifdef DEBUG_MIASM_DOUBLE - dump_float(); - printf("%e + %e -> %e\n", a, b, c); -#endif - return *((uint64_t*)&c); -} - -uint32_t fpu_fsub32(uint32_t a, uint32_t b) -{ - float c; - c = *((float*)&a) - *((float*)&b); -#ifdef DEBUG_MIASM_DOUBLE - dump_float(); - printf("%e + %e -> %e\n", a, b, c); -#endif - return *((uint32_t*)&c); -} - -uint64_t fpu_fsub64(uint64_t a, uint64_t b) -{ - double c; - c = *((double*)&a) - *((double*)&b); -#ifdef DEBUG_MIASM_DOUBLE - dump_float(); - printf("%e + %e -> %e\n", a, b, c); -#endif - return *((uint64_t*)&c); -} - -uint32_t fpu_fmul32(uint32_t a, uint32_t b) -{ - float c; - c = *((float*)&a) * *((float*)&b); -#ifdef DEBUG_MIASM_DOUBLE - dump_float(); - printf("%e * %e -> %e\n", a, b, c); -#endif - return *((uint32_t*)&c); -} - -uint64_t fpu_fmul64(uint64_t a, uint64_t b) -{ - double c; - c = *((double*)&a) * *((double*)&b); -#ifdef DEBUG_MIASM_DOUBLE - dump_float(); - printf("%e * %e -> %e\n", a, b, c); -#endif - return *((uint64_t*)&c); -} - -uint32_t fpu_fdiv32(uint32_t a, uint32_t b) -{ - float c; - c = *((float*)&a) / *((float*)&b); -#ifdef DEBUG_MIASM_DOUBLE - dump_float(); - printf("%e * %e -> %e\n", a, b, c); -#endif - return *((uint32_t*)&c); -} - -uint64_t fpu_fdiv64(uint64_t a, uint64_t b) -{ - double c; - c = *((double*)&a) / *((double*)&b); -#ifdef DEBUG_MIASM_DOUBLE - dump_float(); - printf("%e * %e -> %e\n", a, b, c); -#endif - return *((uint64_t*)&c); -} - -double fpu_ftan(double a) -{ - double b; - b = tan(a); -#ifdef DEBUG_MIASM_DOUBLE - dump_float(); - printf("%e tan %e\n", a, b); -#endif - return b; -} - -double fpu_frndint(double a) -{ - int64_t b; - double c; - b = (int64_t)a; - c = (double)b; -#ifdef DEBUG_MIASM_DOUBLE - dump_float(); - printf("%e double %e\n", a, c); -#endif - return c; -} - -double fpu_fsin(double a) -{ - double b; - b = sin(a); -#ifdef DEBUG_MIASM_DOUBLE - dump_float(); - printf("%e sin %e\n", a, b); -#endif - return b; -} - -double fpu_fcos(double a) -{ - double b; - b = cos(a); -#ifdef DEBUG_MIASM_DOUBLE - dump_float(); - printf("%e cos %e\n", a, b); -#endif - return b; -} - - -double fpu_fscale(double a, double b) -{ - double c; - c = a * exp2(trunc(b)); -#ifdef DEBUG_MIASM_DOUBLE - dump_float(); - printf("%e *exp2 %e -> %e\n", a, b, c); -#endif - return c; -} - -double fpu_f2xm1(double a) -{ - double b; - b = exp2(a)-1; -#ifdef DEBUG_MIASM_DOUBLE - dump_float(); - printf("%e exp2 -1 %e\n", a, b); -#endif - return b; -} - -uint32_t fpu_fsqrt32(uint32_t a) -{ - float b; - b = sqrtf(*((float*)&a)); -#ifdef DEBUG_MIASM_DOUBLE - dump_float(); - printf("%e sqrt %e\n", a, b); -#endif - return *((uint32_t*)&b); -} - -uint64_t fpu_fsqrt64(uint64_t a) -{ - double b; - b = sqrt(*((double*)&a)); -#ifdef DEBUG_MIASM_DOUBLE - dump_float(); - printf("%e sqrt %e\n", a, b); -#endif - return *((uint64_t*)&b); -} - -uint64_t fpu_fabs64(uint64_t a) -{ - double b; - b = abs(*((double*)&a)); -#ifdef DEBUG_MIASM_DOUBLE - dump_float(); - printf("%e abs %e\n", a, b); -#endif - return *((uint64_t*)&b); -} - -uint64_t fpu_fprem64(uint64_t a, uint64_t b) -{ - double c; - c = fmod(*((double*)&a), *((double*)&b)); -#ifdef DEBUG_MIASM_DOUBLE - dump_float(); - printf("%e %% %e -> %e\n", a, b, c); -#endif - return *((uint64_t*)&c); -} - -double fpu_fchs(double a) -{ - double b; - b = -a; -#ifdef DEBUG_MIASM_DOUBLE - dump_float(); - printf(" - %e -> %e\n", a, b); -#endif - return b; -} - -double fpu_fyl2x(double a, double b) -{ - double c; - c = b * (log(a) / log(2)); -#ifdef DEBUG_MIASM_DOUBLE - dump_float(); - printf("%e * log(%e) -> %e\n", b, a, c); -#endif - return c; -} - -double fpu_fpatan(double a, double b) -{ - double c; - c = atan2(b, a); -#ifdef DEBUG_MIASM_DOUBLE - dump_float(); - printf("arctan(%e / %e) -> %e\n", b, a, c); -#endif - return c; -} - -unsigned int fpu_fcom_c0(double a, double b) -{ - if (isnan(a) || isnan(b)) - return 1; - if (a>=b) - return 0; - return 1; -} -unsigned int fpu_fcom_c1(double a, double b) -{ - //XXX - return 0; -} -unsigned int fpu_fcom_c2(double a, double b) -{ - if (isnan(a) || isnan(b)) - return 1; - return 0; -} -unsigned int fpu_fcom_c3(double a, double b) -{ - if (isnan(a) || isnan(b)) - return 1; - if (a==b) - return 1; - return 0; -} - -uint64_t sint_to_fp_64(int64_t a) -{ - double result = (double) a; - return *((uint64_t*)&result); -} - -uint32_t sint_to_fp_32(int32_t a) -{ - float result = (float) a; - return *((uint32_t*)&result); -} - -int32_t fp32_to_sint32(uint32_t a) -{ - // Enforce nearbyint (IEEE-754 behavior) - float rounded = *((float*)&a); - rounded = nearbyintf(rounded); - return (int32_t) rounded; -} - -int64_t fp64_to_sint64(uint64_t a) -{ - // Enforce nearbyint (IEEE-754 behavior) - double rounded = *((double*)&a); - rounded = nearbyint(rounded); - return (int64_t) rounded; -} - -int32_t fp64_to_sint32(uint64_t a) -{ - // Enforce nearbyint (IEEE-754 behavior) - double rounded = *((double*)&a); - rounded = nearbyint(rounded); - return (int32_t) rounded; -} - -uint32_t fp64_to_fp32(uint64_t a) -{ - float result = (float) *((double*)&a); - return *((uint32_t*)&result); -} - -uint64_t fp32_to_fp64(uint32_t a) -{ - double result = (double) *((float*)&a); - return *((uint64_t*)&result); -} - -uint32_t fpround_towardszero_fp32(uint32_t a) -{ - float rounded = *((float*)&a); - rounded = truncf(rounded); - return *((uint32_t*)&rounded); -} - -uint64_t fpround_towardszero_fp64(uint64_t a) -{ - double rounded = *((float*)&a); - rounded = trunc(rounded); - return *((uint64_t*)&rounded); -} - - -UDIV(8) -UDIV(16) -UDIV(32) -UDIV(64) - -UMOD(8) -UMOD(16) -UMOD(32) -UMOD(64) - -SDIV(8) -SDIV(16) -SDIV(32) -SDIV(64) - -SMOD(8) -SMOD(16) -SMOD(32) -SMOD(64) diff --git a/miasm2/jitter/op_semantics.h b/miasm2/jitter/op_semantics.h deleted file mode 100644 index 690cfb35..00000000 --- a/miasm2/jitter/op_semantics.h +++ /dev/null @@ -1,167 +0,0 @@ -#ifndef OP_SEMANTICS_H -#define OP_SEMANTICS_H - -#include - -#if _WIN32 -#define _MIASM_EXPORT __declspec(dllexport) -#define _MIASM_IMPORT __declspec(dllimport) -#else -#define _MIASM_EXPORT -#define _MIASM_IMPORT -#endif - -#define CC_P 1 -#ifdef PARITY_IMPORT -_MIASM_IMPORT extern const uint8_t parity_table[256]; -#else -_MIASM_EXPORT extern const uint8_t parity_table[256]; -#endif -#define parity(a) parity_table[(a) & 0xFF] - -_MIASM_EXPORT unsigned int my_imul08(unsigned int a, unsigned int b); -_MIASM_EXPORT unsigned int mul_lo_op(unsigned int size, unsigned int a, unsigned int b); -_MIASM_EXPORT unsigned int mul_hi_op(unsigned int size, unsigned int a, unsigned int b); -_MIASM_EXPORT unsigned int imul_lo_op_08(char a, char b); -_MIASM_EXPORT unsigned int imul_lo_op_16(short a, short b); -_MIASM_EXPORT unsigned int imul_lo_op_32(int a, int b); -_MIASM_EXPORT int imul_hi_op_08(char a, char b); -_MIASM_EXPORT int imul_hi_op_16(short a, short b); -_MIASM_EXPORT int imul_hi_op_32(int a, int b); - - -_MIASM_EXPORT unsigned int umul16_lo(unsigned short a, unsigned short b); -_MIASM_EXPORT unsigned int umul16_hi(unsigned short a, unsigned short b); - - -_MIASM_EXPORT uint64_t rot_left(uint64_t size, uint64_t a, uint64_t b); -_MIASM_EXPORT uint64_t rot_right(uint64_t size, uint64_t a, uint64_t b); - -_MIASM_EXPORT uint64_t cntleadzeros(uint64_t size, uint64_t src); -_MIASM_EXPORT unsigned int cnttrailzeros(uint64_t size, uint64_t src); - -#define UDIV(sizeA) \ - uint ## sizeA ## _t udiv ## sizeA (uint ## sizeA ## _t a, uint ## sizeA ## _t b) \ - { \ - uint ## sizeA ## _t r; \ - if (b == 0) { \ - fprintf(stderr, "Should not happen\n"); \ - exit(EXIT_FAILURE); \ - } \ - r = a/b; \ - return r; \ - } - - -#define UMOD(sizeA) \ - uint ## sizeA ## _t umod ## sizeA (uint ## sizeA ## _t a, uint ## sizeA ## _t b) \ - { \ - uint ## sizeA ## _t r; \ - if (b == 0) { \ - fprintf(stderr, "Should not happen\n"); \ - exit(EXIT_FAILURE); \ - } \ - r = a%b; \ - return r; \ - } - - -#define SDIV(sizeA) \ - int ## sizeA ## _t sdiv ## sizeA (int ## sizeA ## _t a, int ## sizeA ## _t b) \ - { \ - int ## sizeA ## _t r; \ - if (b == 0) { \ - fprintf(stderr, "Should not happen\n"); \ - exit(EXIT_FAILURE); \ - } \ - r = a/b; \ - return r; \ - } - - -#define SMOD(sizeA) \ - int ## sizeA ## _t smod ## sizeA (int ## sizeA ## _t a, int ## sizeA ## _t b) \ - { \ - int ## sizeA ## _t r; \ - if (b == 0) { \ - fprintf(stderr, "Should not happen\n"); \ - exit(EXIT_FAILURE); \ - } \ - r = a%b; \ - return r; \ - } - -_MIASM_EXPORT uint64_t udiv64(uint64_t a, uint64_t b); -_MIASM_EXPORT uint64_t umod64(uint64_t a, uint64_t b); -_MIASM_EXPORT int64_t sdiv64(int64_t a, int64_t b); -_MIASM_EXPORT int64_t smod64(int64_t a, int64_t b); - -_MIASM_EXPORT uint32_t udiv32(uint32_t a, uint32_t b); -_MIASM_EXPORT uint32_t umod32(uint32_t a, uint32_t b); -_MIASM_EXPORT int32_t sdiv32(int32_t a, int32_t b); -_MIASM_EXPORT int32_t smod32(int32_t a, int32_t b); - -_MIASM_EXPORT uint16_t udiv16(uint16_t a, uint16_t b); -_MIASM_EXPORT uint16_t umod16(uint16_t a, uint16_t b); -_MIASM_EXPORT int16_t sdiv16(int16_t a, int16_t b); -_MIASM_EXPORT int16_t smod16(int16_t a, int16_t b); - -_MIASM_EXPORT uint8_t udiv8(uint8_t a, uint8_t b); -_MIASM_EXPORT uint8_t umod8(uint8_t a, uint8_t b); -_MIASM_EXPORT int8_t sdiv8(int8_t a, int8_t b); -_MIASM_EXPORT int8_t smod8(int8_t a, int8_t b); - -_MIASM_EXPORT unsigned int x86_cpuid(unsigned int a, unsigned int reg_num); - -_MIASM_EXPORT uint32_t fpu_fadd32(uint32_t a, uint32_t b); -_MIASM_EXPORT uint64_t fpu_fadd64(uint64_t a, uint64_t b); -_MIASM_EXPORT uint32_t fpu_fsub32(uint32_t a, uint32_t b); -_MIASM_EXPORT uint64_t fpu_fsub64(uint64_t a, uint64_t b); -_MIASM_EXPORT uint32_t fpu_fmul32(uint32_t a, uint32_t b); -_MIASM_EXPORT uint64_t fpu_fmul64(uint64_t a, uint64_t b); -_MIASM_EXPORT uint32_t fpu_fdiv32(uint32_t a, uint32_t b); -_MIASM_EXPORT uint64_t fpu_fdiv64(uint64_t a, uint64_t b); -_MIASM_EXPORT double fpu_ftan(double a); -_MIASM_EXPORT double fpu_frndint(double a); -_MIASM_EXPORT double fpu_fsin(double a); -_MIASM_EXPORT double fpu_fcos(double a); -_MIASM_EXPORT double fpu_fscale(double a, double b); -_MIASM_EXPORT double fpu_f2xm1(double a); -_MIASM_EXPORT uint32_t fpu_fsqrt32(uint32_t a); -_MIASM_EXPORT uint64_t fpu_fsqrt64(uint64_t a); -_MIASM_EXPORT uint64_t fpu_fabs64(uint64_t a); -_MIASM_EXPORT uint64_t fpu_fprem64(uint64_t a, uint64_t b); -_MIASM_EXPORT double fpu_fchs(double a); -_MIASM_EXPORT double fpu_fyl2x(double a, double b); -_MIASM_EXPORT double fpu_fpatan(double a, double b); -_MIASM_EXPORT unsigned int fpu_fcom_c0(double a, double b); -_MIASM_EXPORT unsigned int fpu_fcom_c1(double a, double b); -_MIASM_EXPORT unsigned int fpu_fcom_c2(double a, double b); -_MIASM_EXPORT unsigned int fpu_fcom_c3(double a, double b); - -_MIASM_EXPORT uint64_t sint_to_fp_64(int64_t a); -_MIASM_EXPORT uint32_t sint_to_fp_32(int32_t a); -_MIASM_EXPORT int32_t fp32_to_sint32(uint32_t a); -_MIASM_EXPORT int64_t fp64_to_sint64(uint64_t a); -_MIASM_EXPORT int32_t fp64_to_sint32(uint64_t a); -_MIASM_EXPORT uint32_t fp64_to_fp32(uint64_t a); -_MIASM_EXPORT uint64_t fp32_to_fp64(uint32_t a); -_MIASM_EXPORT uint32_t fpround_towardszero_fp32(uint32_t a); -_MIASM_EXPORT uint64_t fpround_towardszero_fp64(uint64_t a); - -#define SHIFT_RIGHT_ARITH(size, value, shift) \ - ((uint ## size ## _t)((((uint64_t) (shift)) > ((size) - 1))? \ - (((int ## size ## _t) (value)) < 0 ? -1 : 0) : \ - (((int ## size ## _t) (value)) >> (shift)))) - -#define SHIFT_RIGHT_LOGIC(size, value, shift) \ - ((uint ## size ## _t)((((uint64_t) (shift)) > ((size) - 1))? \ - 0 : \ - (((uint ## size ## _t) (value)) >> (shift)))) - -#define SHIFT_LEFT_LOGIC(size, value, shift) \ - ((uint ## size ## _t)((((uint64_t) (shift)) > ((size) - 1))? \ - 0 : \ - (((uint ## size ## _t) (value)) << (shift)))) - -#endif diff --git a/miasm2/jitter/queue.h b/miasm2/jitter/queue.h deleted file mode 100644 index 0caf72fb..00000000 --- a/miasm2/jitter/queue.h +++ /dev/null @@ -1,553 +0,0 @@ -/*- - * Copyright (c) 1991, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)queue.h 8.5 (Berkeley) 8/20/94 - * $FreeBSD$ - */ - -#ifndef _SYS_QUEUE_H_ -#define _SYS_QUEUE_H_ - -//#include - -/* - * This file defines four types of data structures: singly-linked lists, - * singly-linked tail queues, lists and tail queues. - * - * A singly-linked list is headed by a single forward pointer. The elements - * are singly linked for minimum space and pointer manipulation overhead at - * the expense of O(n) removal for arbitrary elements. New elements can be - * added to the list after an existing element or at the head of the list. - * Elements being removed from the head of the list should use the explicit - * macro for this purpose for optimum efficiency. A singly-linked list may - * only be traversed in the forward direction. Singly-linked lists are ideal - * for applications with large datasets and few or no removals or for - * implementing a LIFO queue. - * - * A singly-linked tail queue is headed by a pair of pointers, one to the - * head of the list and the other to the tail of the list. The elements are - * singly linked for minimum space and pointer manipulation overhead at the - * expense of O(n) removal for arbitrary elements. New elements can be added - * to the list after an existing element, at the head of the list, or at the - * end of the list. Elements being removed from the head of the tail queue - * should use the explicit macro for this purpose for optimum efficiency. - * A singly-linked tail queue may only be traversed in the forward direction. - * Singly-linked tail queues are ideal for applications with large datasets - * and few or no removals or for implementing a FIFO queue. - * - * A list is headed by a single forward pointer (or an array of forward - * pointers for a hash table header). The elements are doubly linked - * so that an arbitrary element can be removed without a need to - * traverse the list. New elements can be added to the list before - * or after an existing element or at the head of the list. A list - * may only be traversed in the forward direction. - * - * A tail queue is headed by a pair of pointers, one to the head of the - * list and the other to the tail of the list. The elements are doubly - * linked so that an arbitrary element can be removed without a need to - * traverse the list. New elements can be added to the list before or - * after an existing element, at the head of the list, or at the end of - * the list. A tail queue may be traversed in either direction. - * - * For details on the use of these macros, see the queue(3) manual page. - * - * - * SLIST LIST STAILQ TAILQ - * _HEAD + + + + - * _HEAD_INITIALIZER + + + + - * _ENTRY + + + + - * _INIT + + + + - * _EMPTY + + + + - * _FIRST + + + + - * _NEXT + + + + - * _PREV - - - + - * _LAST - - + + - * _FOREACH + + + + - * _FOREACH_SAFE + + + + - * _FOREACH_REVERSE - - - + - * _FOREACH_REVERSE_SAFE - - - + - * _INSERT_HEAD + + + + - * _INSERT_BEFORE - + - + - * _INSERT_AFTER + + + + - * _INSERT_TAIL - - + + - * _CONCAT - - + + - * _REMOVE_HEAD + - + - - * _REMOVE + + + + - * - */ -#define QUEUE_MACRO_DEBUG 0 -#if QUEUE_MACRO_DEBUG -/* Store the last 2 places the queue element or head was altered */ -struct qm_trace { - char * lastfile; - int lastline; - char * prevfile; - int prevline; -}; - -#define TRACEBUF struct qm_trace trace; -#define TRASHIT(x) do {(x) = (void *)-1;} while (0) - -#define QMD_TRACE_HEAD(head) do { \ - (head)->trace.prevline = (head)->trace.lastline; \ - (head)->trace.prevfile = (head)->trace.lastfile; \ - (head)->trace.lastline = __LINE__; \ - (head)->trace.lastfile = __FILE__; \ -} while (0) - -#define QMD_TRACE_ELEM(elem) do { \ - (elem)->trace.prevline = (elem)->trace.lastline; \ - (elem)->trace.prevfile = (elem)->trace.lastfile; \ - (elem)->trace.lastline = __LINE__; \ - (elem)->trace.lastfile = __FILE__; \ -} while (0) - -#else -#define QMD_TRACE_ELEM(elem) -#define QMD_TRACE_HEAD(head) -#define TRACEBUF -#define TRASHIT(x) -#endif /* QUEUE_MACRO_DEBUG */ - -/* - * Singly-linked List declarations. - */ -#define SLIST_HEAD(name, type) \ -struct name { \ - struct type *slh_first; /* first element */ \ -} - -#define SLIST_HEAD_INITIALIZER(head) \ - { NULL } - -#define SLIST_ENTRY(type) \ -struct { \ - struct type *sle_next; /* next element */ \ -} - -/* - * Singly-linked List functions. - */ -#define SLIST_EMPTY(head) ((head)->slh_first == NULL) - -#define SLIST_FIRST(head) ((head)->slh_first) - -#define SLIST_FOREACH(var, head, field) \ - for ((var) = SLIST_FIRST((head)); \ - (var); \ - (var) = SLIST_NEXT((var), field)) - -#define SLIST_FOREACH_SAFE(var, head, field, tvar) \ - for ((var) = SLIST_FIRST((head)); \ - (var) && ((tvar) = SLIST_NEXT((var), field), 1); \ - (var) = (tvar)) - -#define SLIST_FOREACH_PREVPTR(var, varp, head, field) \ - for ((varp) = &SLIST_FIRST((head)); \ - ((var) = *(varp)) != NULL; \ - (varp) = &SLIST_NEXT((var), field)) - -#define SLIST_INIT(head) do { \ - SLIST_FIRST((head)) = NULL; \ -} while (0) - -#define SLIST_INSERT_AFTER(slistelm, elm, field) do { \ - SLIST_NEXT((elm), field) = SLIST_NEXT((slistelm), field); \ - SLIST_NEXT((slistelm), field) = (elm); \ -} while (0) - -#define SLIST_INSERT_HEAD(head, elm, field) do { \ - SLIST_NEXT((elm), field) = SLIST_FIRST((head)); \ - SLIST_FIRST((head)) = (elm); \ -} while (0) - -#define SLIST_NEXT(elm, field) ((elm)->field.sle_next) - -#define SLIST_REMOVE(head, elm, type, field) do { \ - if (SLIST_FIRST((head)) == (elm)) { \ - SLIST_REMOVE_HEAD((head), field); \ - } \ - else { \ - struct type *curelm = SLIST_FIRST((head)); \ - while (SLIST_NEXT(curelm, field) != (elm)) \ - curelm = SLIST_NEXT(curelm, field); \ - SLIST_NEXT(curelm, field) = \ - SLIST_NEXT(SLIST_NEXT(curelm, field), field); \ - } \ -} while (0) - -#define SLIST_REMOVE_HEAD(head, field) do { \ - SLIST_FIRST((head)) = SLIST_NEXT(SLIST_FIRST((head)), field); \ -} while (0) - -/* - * Singly-linked Tail queue declarations. - */ -#define STAILQ_HEAD(name, type) \ -struct name { \ - struct type *stqh_first;/* first element */ \ - struct type **stqh_last;/* addr of last next element */ \ -} - -#define STAILQ_HEAD_INITIALIZER(head) \ - { NULL, &(head).stqh_first } - -#define STAILQ_ENTRY(type) \ -struct { \ - struct type *stqe_next; /* next element */ \ -} - -/* - * Singly-linked Tail queue functions. - */ -#define STAILQ_CONCAT(head1, head2) do { \ - if (!STAILQ_EMPTY((head2))) { \ - *(head1)->stqh_last = (head2)->stqh_first; \ - (head1)->stqh_last = (head2)->stqh_last; \ - STAILQ_INIT((head2)); \ - } \ -} while (0) - -#define STAILQ_EMPTY(head) ((head)->stqh_first == NULL) - -#define STAILQ_FIRST(head) ((head)->stqh_first) - -#define STAILQ_FOREACH(var, head, field) \ - for((var) = STAILQ_FIRST((head)); \ - (var); \ - (var) = STAILQ_NEXT((var), field)) - - -#define STAILQ_FOREACH_SAFE(var, head, field, tvar) \ - for ((var) = STAILQ_FIRST((head)); \ - (var) && ((tvar) = STAILQ_NEXT((var), field), 1); \ - (var) = (tvar)) - -#define STAILQ_INIT(head) do { \ - STAILQ_FIRST((head)) = NULL; \ - (head)->stqh_last = &STAILQ_FIRST((head)); \ -} while (0) - -#define STAILQ_INSERT_AFTER(head, tqelm, elm, field) do { \ - if ((STAILQ_NEXT((elm), field) = STAILQ_NEXT((tqelm), field)) == NULL)\ - (head)->stqh_last = &STAILQ_NEXT((elm), field); \ - STAILQ_NEXT((tqelm), field) = (elm); \ -} while (0) - -#define STAILQ_INSERT_HEAD(head, elm, field) do { \ - if ((STAILQ_NEXT((elm), field) = STAILQ_FIRST((head))) == NULL) \ - (head)->stqh_last = &STAILQ_NEXT((elm), field); \ - STAILQ_FIRST((head)) = (elm); \ -} while (0) - -#define STAILQ_INSERT_TAIL(head, elm, field) do { \ - STAILQ_NEXT((elm), field) = NULL; \ - *(head)->stqh_last = (elm); \ - (head)->stqh_last = &STAILQ_NEXT((elm), field); \ -} while (0) - -#define STAILQ_LAST(head, type, field) \ - (STAILQ_EMPTY((head)) ? \ - NULL : \ - ((struct type *) \ - ((char *)((head)->stqh_last) - __offsetof(struct type, field)))) - -#define STAILQ_NEXT(elm, field) ((elm)->field.stqe_next) - -#define STAILQ_REMOVE(head, elm, type, field) do { \ - if (STAILQ_FIRST((head)) == (elm)) { \ - STAILQ_REMOVE_HEAD((head), field); \ - } \ - else { \ - struct type *curelm = STAILQ_FIRST((head)); \ - while (STAILQ_NEXT(curelm, field) != (elm)) \ - curelm = STAILQ_NEXT(curelm, field); \ - if ((STAILQ_NEXT(curelm, field) = \ - STAILQ_NEXT(STAILQ_NEXT(curelm, field), field)) == NULL)\ - (head)->stqh_last = &STAILQ_NEXT((curelm), field);\ - } \ -} while (0) - -#define STAILQ_REMOVE_HEAD(head, field) do { \ - if ((STAILQ_FIRST((head)) = \ - STAILQ_NEXT(STAILQ_FIRST((head)), field)) == NULL) \ - (head)->stqh_last = &STAILQ_FIRST((head)); \ -} while (0) - -#define STAILQ_REMOVE_HEAD_UNTIL(head, elm, field) do { \ - if ((STAILQ_FIRST((head)) = STAILQ_NEXT((elm), field)) == NULL) \ - (head)->stqh_last = &STAILQ_FIRST((head)); \ -} while (0) - -/* - * List declarations. - */ -#define LIST_HEAD(name, type) \ -struct name { \ - struct type *lh_first; /* first element */ \ -} - -#define LIST_HEAD_INITIALIZER(head) \ - { NULL } - -#define LIST_ENTRY(type) \ -struct { \ - struct type *le_next; /* next element */ \ - struct type **le_prev; /* address of previous next element */ \ -} - -/* - * List functions. - */ - -#define LIST_EMPTY(head) ((head)->lh_first == NULL) - -#define LIST_FIRST(head) ((head)->lh_first) - -#define LIST_FOREACH(var, head, field) \ - for ((var) = LIST_FIRST((head)); \ - (var); \ - (var) = LIST_NEXT((var), field)) - -#define LIST_FOREACH_SAFE(var, head, field, tvar) \ - for ((var) = LIST_FIRST((head)); \ - (var) && ((tvar) = LIST_NEXT((var), field), 1); \ - (var) = (tvar)) - -#define LIST_INIT(head) do { \ - LIST_FIRST((head)) = NULL; \ -} while (0) - -#define LIST_INSERT_AFTER(listelm, elm, field) do { \ - if ((LIST_NEXT((elm), field) = LIST_NEXT((listelm), field)) != NULL)\ - LIST_NEXT((listelm), field)->field.le_prev = \ - &LIST_NEXT((elm), field); \ - LIST_NEXT((listelm), field) = (elm); \ - (elm)->field.le_prev = &LIST_NEXT((listelm), field); \ -} while (0) - -#define LIST_INSERT_BEFORE(listelm, elm, field) do { \ - (elm)->field.le_prev = (listelm)->field.le_prev; \ - LIST_NEXT((elm), field) = (listelm); \ - *(listelm)->field.le_prev = (elm); \ - (listelm)->field.le_prev = &LIST_NEXT((elm), field); \ -} while (0) - -#define LIST_INSERT_HEAD(head, elm, field) do { \ - if ((LIST_NEXT((elm), field) = LIST_FIRST((head))) != NULL) \ - LIST_FIRST((head))->field.le_prev = &LIST_NEXT((elm), field);\ - LIST_FIRST((head)) = (elm); \ - (elm)->field.le_prev = &LIST_FIRST((head)); \ -} while (0) - -#define LIST_NEXT(elm, field) ((elm)->field.le_next) - -#define LIST_REMOVE(elm, field) do { \ - if (LIST_NEXT((elm), field) != NULL) \ - LIST_NEXT((elm), field)->field.le_prev = \ - (elm)->field.le_prev; \ - *(elm)->field.le_prev = LIST_NEXT((elm), field); \ -} while (0) - -/* - * Tail queue declarations. - */ -#define TAILQ_HEAD(name, type) \ -struct name { \ - struct type *tqh_first; /* first element */ \ - struct type **tqh_last; /* addr of last next element */ \ - TRACEBUF \ -} - -#define TAILQ_HEAD_INITIALIZER(head) \ - { NULL, &(head).tqh_first } - -#define TAILQ_ENTRY(type) \ -struct { \ - struct type *tqe_next; /* next element */ \ - struct type **tqe_prev; /* address of previous next element */ \ - TRACEBUF \ -} - -/* - * Tail queue functions. - */ -#define TAILQ_CONCAT(head1, head2, field) do { \ - if (!TAILQ_EMPTY(head2)) { \ - *(head1)->tqh_last = (head2)->tqh_first; \ - (head2)->tqh_first->field.tqe_prev = (head1)->tqh_last; \ - (head1)->tqh_last = (head2)->tqh_last; \ - TAILQ_INIT((head2)); \ - QMD_TRACE_HEAD(head); \ - QMD_TRACE_HEAD(head2); \ - } \ -} while (0) - -#define TAILQ_EMPTY(head) ((head)->tqh_first == NULL) - -#define TAILQ_FIRST(head) ((head)->tqh_first) - -#define TAILQ_FOREACH(var, head, field) \ - for ((var) = TAILQ_FIRST((head)); \ - (var); \ - (var) = TAILQ_NEXT((var), field)) - -#define TAILQ_FOREACH_SAFE(var, head, field, tvar) \ - for ((var) = TAILQ_FIRST((head)); \ - (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \ - (var) = (tvar)) - -#define TAILQ_FOREACH_REVERSE(var, head, headname, field) \ - for ((var) = TAILQ_LAST((head), headname); \ - (var); \ - (var) = TAILQ_PREV((var), headname, field)) - -#define TAILQ_FOREACH_REVERSE_SAFE(var, head, headname, field, tvar) \ - for ((var) = TAILQ_LAST((head), headname); \ - (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1); \ - (var) = (tvar)) - -#define TAILQ_INIT(head) do { \ - TAILQ_FIRST((head)) = NULL; \ - (head)->tqh_last = &TAILQ_FIRST((head)); \ - QMD_TRACE_HEAD(head); \ -} while (0) - -#define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \ - if ((TAILQ_NEXT((elm), field) = TAILQ_NEXT((listelm), field)) != NULL)\ - TAILQ_NEXT((elm), field)->field.tqe_prev = \ - &TAILQ_NEXT((elm), field); \ - else { \ - (head)->tqh_last = &TAILQ_NEXT((elm), field); \ - QMD_TRACE_HEAD(head); \ - } \ - TAILQ_NEXT((listelm), field) = (elm); \ - (elm)->field.tqe_prev = &TAILQ_NEXT((listelm), field); \ - QMD_TRACE_ELEM(&(elm)->field); \ - QMD_TRACE_ELEM(&listelm->field); \ -} while (0) - -#define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \ - (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \ - TAILQ_NEXT((elm), field) = (listelm); \ - *(listelm)->field.tqe_prev = (elm); \ - (listelm)->field.tqe_prev = &TAILQ_NEXT((elm), field); \ - QMD_TRACE_ELEM(&(elm)->field); \ - QMD_TRACE_ELEM(&listelm->field); \ -} while (0) - -#define TAILQ_INSERT_HEAD(head, elm, field) do { \ - if ((TAILQ_NEXT((elm), field) = TAILQ_FIRST((head))) != NULL) \ - TAILQ_FIRST((head))->field.tqe_prev = \ - &TAILQ_NEXT((elm), field); \ - else \ - (head)->tqh_last = &TAILQ_NEXT((elm), field); \ - TAILQ_FIRST((head)) = (elm); \ - (elm)->field.tqe_prev = &TAILQ_FIRST((head)); \ - QMD_TRACE_HEAD(head); \ - QMD_TRACE_ELEM(&(elm)->field); \ -} while (0) - -#define TAILQ_INSERT_TAIL(head, elm, field) do { \ - TAILQ_NEXT((elm), field) = NULL; \ - (elm)->field.tqe_prev = (head)->tqh_last; \ - *(head)->tqh_last = (elm); \ - (head)->tqh_last = &TAILQ_NEXT((elm), field); \ - QMD_TRACE_HEAD(head); \ - QMD_TRACE_ELEM(&(elm)->field); \ -} while (0) - -#define TAILQ_LAST(head, headname) \ - (*(((struct headname *)((head)->tqh_last))->tqh_last)) - -#define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next) - -#define TAILQ_PREV(elm, headname, field) \ - (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last)) - -#define TAILQ_REMOVE(head, elm, field) do { \ - if ((TAILQ_NEXT((elm), field)) != NULL) \ - TAILQ_NEXT((elm), field)->field.tqe_prev = \ - (elm)->field.tqe_prev; \ - else { \ - (head)->tqh_last = (elm)->field.tqe_prev; \ - QMD_TRACE_HEAD(head); \ - } \ - *(elm)->field.tqe_prev = TAILQ_NEXT((elm), field); \ - TRASHIT((elm)->field.tqe_next); \ - TRASHIT((elm)->field.tqe_prev); \ - QMD_TRACE_ELEM(&(elm)->field); \ -} while (0) - - -#ifdef _KERNEL - -/* - * XXX insque() and remque() are an old way of handling certain queues. - * They bogusly assumes that all queue heads look alike. - */ - -struct quehead { - struct quehead *qh_link; - struct quehead *qh_rlink; -}; - -#if defined(__GNUC__) || defined(__INTEL_COMPILER) - -static __inline void -insque(void *a, void *b) -{ - struct quehead *element = (struct quehead *)a, - *head = (struct quehead *)b; - - element->qh_link = head->qh_link; - element->qh_rlink = head; - head->qh_link = element; - element->qh_link->qh_rlink = element; -} - -static __inline void -remque(void *a) -{ - struct quehead *element = (struct quehead *)a; - - element->qh_link->qh_rlink = element->qh_rlink; - element->qh_rlink->qh_link = element->qh_link; - element->qh_rlink = 0; -} - -#else /* !(__GNUC__ || __INTEL_COMPILER) */ - -void insque(void *a, void *b); -void remque(void *a); - -#endif /* __GNUC__ || __INTEL_COMPILER */ - -#endif /* _KERNEL */ - -#endif /* !_SYS_QUEUE_H_ */ diff --git a/miasm2/jitter/vm_mngr.c b/miasm2/jitter/vm_mngr.c deleted file mode 100644 index bd1de2f4..00000000 --- a/miasm2/jitter/vm_mngr.c +++ /dev/null @@ -1,926 +0,0 @@ -/* -** Copyright (C) 2011 EADS France, Fabrice Desclaux -** -** This program is free software; you can redistribute it and/or modify -** it under the terms of the GNU General Public License as published by -** the Free Software Foundation; either version 2 of the License, or -** (at your option) any later version. -** -** This program is distributed in the hope that it will be useful, -** but WITHOUT ANY WARRANTY; without even the implied warranty of -** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -** GNU General Public License for more details. -** -** You should have received a copy of the GNU General Public License along -** with this program; if not, write to the Free Software Foundation, Inc., -** 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -*/ -#include "vm_mngr.h" - -#include - -#include -#include - -#include "queue.h" - - - -/****************memory manager**************/ - - - - -#define MIN(a,b) (((a)<(b))?(a):(b)) -#define MAX(a,b) (((a)>(b))?(a):(b)) - -// #define DEBUG_MIASM_AUTOMOD_CODE - -void memory_access_list_init(struct memory_access_list * access) -{ - access->array = NULL; - access->allocated = 0; - access->num = 0; -} - -void memory_access_list_reset(struct memory_access_list * access) -{ - if (access->array) { - free(access->array); - access->array = NULL; - } - access->allocated = 0; - access->num = 0; -} - -void memory_access_list_add(struct memory_access_list * access, uint64_t start, uint64_t stop) -{ - if (access->num >= access->allocated) { - if (access->allocated == 0) - access->allocated = 1; - else - access->allocated *= 2; - access->array = realloc(access->array, access->allocated * sizeof(struct memory_access)); - if (access->array == NULL) { - fprintf(stderr, "cannot realloc struct memory_access access->array\n"); - exit(EXIT_FAILURE); - } - } - access->array[access->num].start = start; - access->array[access->num].stop = stop; - access->num += 1; -} - - - -uint16_t set_endian16(vm_mngr_t* vm_mngr, uint16_t val) -{ - if (vm_mngr->sex == __BYTE_ORDER) - return val; - else - return Endian16_Swap(val); -} - -uint32_t set_endian32(vm_mngr_t* vm_mngr, uint32_t val) -{ - if (vm_mngr->sex == __BYTE_ORDER) - return val; - else - return Endian32_Swap(val); -} - -uint64_t set_endian64(vm_mngr_t* vm_mngr, uint64_t val) -{ - if (vm_mngr->sex == __BYTE_ORDER) - return val; - else - return Endian64_Swap(val); -} - -void print_val(uint64_t base, uint64_t addr) -{ - uint64_t *ptr = (uint64_t *) (intptr_t) addr; - fprintf(stderr, "addr 0x%"PRIX64" val 0x%"PRIX64"\n", addr-base, *ptr); -} - -int midpoint(int imin, int imax) -{ - return (imin + imax) / 2; -} - - -int find_page_node(struct memory_page_node * array, uint64_t key, int imin, int imax) -{ - // continue searching while [imin,imax] is not empty - while (imin <= imax) { - // calculate the midpoint for roughly equal partition - int imid = midpoint(imin, imax); - if(array[imid].ad <= key && key < array[imid].ad + array[imid].size) - // key found at index imid - return imid; - // determine which subarray to search - else if (array[imid].ad < key) - // change min index to search upper subarray - imin = imid + 1; - else - // change max index to search lower subarray - imax = imid - 1; - } - // key was not found - return -1; -} - -struct memory_page_node * get_memory_page_from_address(vm_mngr_t* vm_mngr, uint64_t ad, int raise_exception) -{ - struct memory_page_node * mpn; - int i; - - i = find_page_node(vm_mngr->memory_pages_array, - ad, - 0, - vm_mngr->memory_pages_number - 1); - if (i >= 0) { - mpn = &vm_mngr->memory_pages_array[i]; - if ((mpn->ad <= ad) && (ad < mpn->ad + mpn->size)) - return mpn; - } - if (raise_exception) { - fprintf(stderr, "WARNING: address 0x%"PRIX64" is not mapped in virtual memory:\n", ad); - vm_mngr->exception_flags |= EXCEPT_ACCESS_VIOL; - } - return NULL; -} - -static uint64_t memory_page_read(vm_mngr_t* vm_mngr, unsigned int my_size, uint64_t ad) -{ - struct memory_page_node * mpn; - unsigned char * addr; - uint64_t ret = 0; - struct memory_breakpoint_info * b; - - - mpn = get_memory_page_from_address(vm_mngr, ad, 1); - if (!mpn) - return 0; - - if ((mpn->access & PAGE_READ) == 0){ - fprintf(stderr, "access to non readable page!! %"PRIX64"\n", ad); - vm_mngr->exception_flags |= EXCEPT_ACCESS_VIOL; - return 0; - } - - /* check read breakpoint */ - LIST_FOREACH(b, &vm_mngr->memory_breakpoint_pool, next){ - if ((b->access & BREAKPOINT_READ) == 0) - continue; - if ((b->ad <= ad) && (ad < b->ad + b->size)) - vm_mngr->exception_flags |= EXCEPT_BREAKPOINT_MEMORY; - } - - - addr = &((unsigned char*)mpn->ad_hp)[ad - mpn->ad]; - - /* read fits in a page */ - if (ad - mpn->ad + my_size/8 <= mpn->size){ - switch(my_size){ - case 8: - ret = *((unsigned char*)addr)&0xFF; - break; - case 16: - ret = *((unsigned short*)addr)&0xFFFF; - ret = set_endian16(vm_mngr, (uint16_t)ret); - break; - case 32: - ret = *((unsigned int*)addr)&0xFFFFFFFF; - ret = set_endian32(vm_mngr, (uint32_t)ret); - break; - case 64: - ret = *((uint64_t*)addr)&0xFFFFFFFFFFFFFFFFULL; - ret = set_endian64(vm_mngr, ret); - break; - default: - fprintf(stderr, "Bad memory access size %d\n", my_size); - exit(EXIT_FAILURE); - break; - } - } - /* read is multiple page wide */ - else{ - unsigned int new_size = my_size; - int index = 0; - while (new_size){ - mpn = get_memory_page_from_address(vm_mngr, ad, 1); - if (!mpn) - return 0; - addr = &((unsigned char*)mpn->ad_hp)[ad - mpn->ad]; - ret |= ((uint64_t)(*((unsigned char*)addr)&0xFF))<<(index); - index +=8; - new_size -= 8; - ad ++; - } - switch(my_size){ - case 8: - ret = ret; - break; - case 16: - ret = set_endian16(vm_mngr, (uint16_t)ret); - break; - case 32: - ret = set_endian32(vm_mngr, (uint32_t)ret); - break; - case 64: - ret = set_endian64(vm_mngr, ret); - break; - default: - fprintf(stderr, "Bad memory access size %d\n", my_size); - exit(EXIT_FAILURE); - break; - } - } - return ret; -} - -static void memory_page_write(vm_mngr_t* vm_mngr, unsigned int my_size, - uint64_t ad, uint64_t src) -{ - struct memory_page_node * mpn; - unsigned char * addr; - struct memory_breakpoint_info * b; - - mpn = get_memory_page_from_address(vm_mngr, ad, 1); - if (!mpn) - return; - - if ((mpn->access & PAGE_WRITE) == 0){ - fprintf(stderr, "access to non writable page!! %"PRIX64"\n", ad); - vm_mngr->exception_flags |= EXCEPT_ACCESS_VIOL; - return ; - } - - /* check read breakpoint*/ - LIST_FOREACH(b, &vm_mngr->memory_breakpoint_pool, next){ - if ((b->access & BREAKPOINT_WRITE) == 0) - continue; - if ((b->ad <= ad) && (ad < b->ad + b->size)) - vm_mngr->exception_flags |= EXCEPT_BREAKPOINT_MEMORY; - } - - addr = &((unsigned char*)mpn->ad_hp)[ad - mpn->ad]; - - /* write fits in a page */ - if (ad - mpn->ad + my_size/8 <= mpn->size){ - switch(my_size){ - case 8: - *((unsigned char*)addr) = src&0xFF; - break; - case 16: - src = set_endian16(vm_mngr, (uint16_t)src); - *((unsigned short*)addr) = src&0xFFFF; - break; - case 32: - src = set_endian32(vm_mngr, (uint32_t)src); - *((unsigned int*)addr) = src&0xFFFFFFFF; - break; - case 64: - src = set_endian64(vm_mngr, src); - *((uint64_t*)addr) = src&0xFFFFFFFFFFFFFFFFULL; - break; - default: - fprintf(stderr, "Bad memory access size %d\n", my_size); - exit(EXIT_FAILURE); - break; - } - } - /* write is multiple page wide */ - else{ - switch(my_size){ - - case 8: - src = src; - break; - case 16: - src = set_endian16(vm_mngr, (uint16_t)src); - break; - case 32: - src = set_endian32(vm_mngr, (uint32_t)src); - break; - case 64: - src = set_endian64(vm_mngr, src); - break; - default: - fprintf(stderr, "Bad memory access size %d\n", my_size); - exit(EXIT_FAILURE); - break; - } - while (my_size){ - mpn = get_memory_page_from_address(vm_mngr, ad, 1); - if (!mpn) - return; - - addr = &((unsigned char*)mpn->ad_hp)[ad - mpn->ad]; - *((unsigned char*)addr) = src&0xFF; - my_size -= 8; - src >>=8; - ad ++; - } - } -} - -// ################## - -void dump_code_bloc(vm_mngr_t* vm_mngr) -{ - struct code_bloc_node * cbp; - LIST_FOREACH(cbp, &vm_mngr->code_bloc_pool, next){ - fprintf(stderr, "%"PRIX64"%"PRIX64"\n", cbp->ad_start, cbp->ad_stop); - } - -} - -void add_range_to_list(struct memory_access_list * access, uint64_t addr1, uint64_t addr2) -{ - if (access->num > 0) { - /* Check match on upper bound */ - if (access->array[access->num-1].stop == addr1) { - access->array[access->num-1].stop = addr2; - return; - } - - /* Check match on lower bound */ - if (access->array[0].start == addr2) { - access->array[0].start = addr1; - return; - } - } - - /* No merge, add to the list */ - memory_access_list_add(access, addr1, addr2); -} - - -void add_mem_read(vm_mngr_t* vm_mngr, uint64_t addr, uint64_t size) -{ - add_range_to_list(&(vm_mngr->memory_r), addr, addr + size); -} - -void add_mem_write(vm_mngr_t* vm_mngr, uint64_t addr, uint64_t size) -{ - add_range_to_list(&(vm_mngr->memory_w), addr, addr + size); -} - -void check_invalid_code_blocs(vm_mngr_t* vm_mngr) -{ - int i; - struct code_bloc_node * cbp; - for (i=0;imemory_w.num; i++) { - if (vm_mngr->exception_flags & EXCEPT_CODE_AUTOMOD) - break; - if (vm_mngr->memory_w.array[i].stop <= vm_mngr->code_bloc_pool_ad_min || - vm_mngr->memory_w.array[i].start >=vm_mngr->code_bloc_pool_ad_max) - continue; - - LIST_FOREACH(cbp, &vm_mngr->code_bloc_pool, next){ - if ((cbp->ad_start < vm_mngr->memory_w.array[i].stop) && - (vm_mngr->memory_w.array[i].start < cbp->ad_stop)){ -#ifdef DEBUG_MIASM_AUTOMOD_CODE - fprintf(stderr, "**********************************\n"); - fprintf(stderr, "self modifying code %"PRIX64" %"PRIX64"\n", - vm_mngr->memory_w.array[i].start, - vm_mngr->memory_w.array[i].stop); - fprintf(stderr, "**********************************\n"); -#endif - vm_mngr->exception_flags |= EXCEPT_CODE_AUTOMOD; - break; - } - } - } -} - - -void check_memory_breakpoint(vm_mngr_t* vm_mngr) -{ - int i; - struct memory_breakpoint_info * memory_bp; - - /* Check memory breakpoints */ - LIST_FOREACH(memory_bp, &vm_mngr->memory_breakpoint_pool, next) { - if (vm_mngr->exception_flags & EXCEPT_BREAKPOINT_MEMORY) - break; - if (memory_bp->access & BREAKPOINT_READ) { - for (i=0;imemory_r.num; i++) { - if ((memory_bp->ad < vm_mngr->memory_r.array[i].stop) && - (vm_mngr->memory_r.array[i].start < memory_bp->ad + memory_bp->size)) { - vm_mngr->exception_flags |= EXCEPT_BREAKPOINT_MEMORY; - break; - } - } - } - if (memory_bp->access & BREAKPOINT_WRITE) { - for (i=0;imemory_w.num; i++) { - if ((memory_bp->ad < vm_mngr->memory_w.array[i].stop) && - (vm_mngr->memory_w.array[i].start < memory_bp->ad + memory_bp->size)) { - vm_mngr->exception_flags |= EXCEPT_BREAKPOINT_MEMORY; - break; - } - } - } - } -} - - -PyObject* get_memory_pylist(vm_mngr_t* vm_mngr, struct memory_access_list* memory_list) -{ - int i; - PyObject *pylist; - PyObject *range; - pylist = PyList_New(memory_list->num); - for (i=0;inum;i++) { - range = PyTuple_New(2); - PyTuple_SetItem(range, 0, PyLong_FromUnsignedLongLong((uint64_t)memory_list->array[i].start)); - PyTuple_SetItem(range, 1, PyLong_FromUnsignedLongLong((uint64_t)memory_list->array[i].stop)); - PyList_SetItem(pylist, i, range); - } - return pylist; - -} - -PyObject* get_memory_read(vm_mngr_t* vm_mngr) -{ - return get_memory_pylist(vm_mngr, &vm_mngr->memory_r); -} - -PyObject* get_memory_write(vm_mngr_t* vm_mngr) -{ - return get_memory_pylist(vm_mngr, &vm_mngr->memory_w); -} - -void vm_MEM_WRITE_08(vm_mngr_t* vm_mngr, uint64_t addr, unsigned char src) -{ - add_mem_write(vm_mngr, addr, 1); - memory_page_write(vm_mngr, 8, addr, src); -} - -void vm_MEM_WRITE_16(vm_mngr_t* vm_mngr, uint64_t addr, unsigned short src) -{ - add_mem_write(vm_mngr, addr, 2); - memory_page_write(vm_mngr, 16, addr, src); -} -void vm_MEM_WRITE_32(vm_mngr_t* vm_mngr, uint64_t addr, unsigned int src) -{ - add_mem_write(vm_mngr, addr, 4); - memory_page_write(vm_mngr, 32, addr, src); -} -void vm_MEM_WRITE_64(vm_mngr_t* vm_mngr, uint64_t addr, uint64_t src) -{ - add_mem_write(vm_mngr, addr, 8); - memory_page_write(vm_mngr, 64, addr, src); -} - -unsigned char vm_MEM_LOOKUP_08(vm_mngr_t* vm_mngr, uint64_t addr) -{ - unsigned char ret; - add_mem_read(vm_mngr, addr, 1); - ret = (unsigned char)memory_page_read(vm_mngr, 8, addr); - return ret; -} -unsigned short vm_MEM_LOOKUP_16(vm_mngr_t* vm_mngr, uint64_t addr) -{ - unsigned short ret; - add_mem_read(vm_mngr, addr, 2); - ret = (unsigned short)memory_page_read(vm_mngr, 16, addr); - return ret; -} -unsigned int vm_MEM_LOOKUP_32(vm_mngr_t* vm_mngr, uint64_t addr) -{ - unsigned int ret; - add_mem_read(vm_mngr, addr, 4); - ret = (unsigned int)memory_page_read(vm_mngr, 32, addr); - return ret; -} -uint64_t vm_MEM_LOOKUP_64(vm_mngr_t* vm_mngr, uint64_t addr) -{ - uint64_t ret; - add_mem_read(vm_mngr, addr, 8); - ret = memory_page_read(vm_mngr, 64, addr); - return ret; -} - - -int vm_read_mem(vm_mngr_t* vm_mngr, uint64_t addr, char** buffer_ptr, uint64_t size) -{ - char* buffer; - uint64_t len; - struct memory_page_node * mpn; - - buffer = malloc(size); - *buffer_ptr = buffer; - if (!buffer){ - fprintf(stderr, "Error: cannot alloc read\n"); - exit(EXIT_FAILURE); - } - - /* read is multiple page wide */ - while (size){ - mpn = get_memory_page_from_address(vm_mngr, addr, 1); - if (!mpn){ - free(*buffer_ptr); - PyErr_SetString(PyExc_RuntimeError, "Error: cannot find address"); - return -1; - } - - len = MIN(size, mpn->size - (addr - mpn->ad)); - memcpy(buffer, (char*)mpn->ad_hp + (addr - mpn->ad), len); - buffer += len; - addr += len; - size -= len; - } - - return 0; -} - -int vm_write_mem(vm_mngr_t* vm_mngr, uint64_t addr, char *buffer, uint64_t size) -{ - uint64_t len; - struct memory_page_node * mpn; - - /* write is multiple page wide */ - while (size){ - mpn = get_memory_page_from_address(vm_mngr, addr, 1); - if (!mpn){ - PyErr_SetString(PyExc_RuntimeError, "Error: cannot find address"); - return -1; - } - - len = MIN(size, mpn->size - (addr - mpn->ad)); - memcpy((char*)mpn->ad_hp + (addr-mpn->ad), buffer, len); - buffer += len; - addr += len; - size -= len; - } - - return 0; -} - - - -int is_mapped(vm_mngr_t* vm_mngr, uint64_t addr, uint64_t size) -{ - uint64_t len; - struct memory_page_node * mpn; - - /* test multiple page wide */ - while (size){ - mpn = get_memory_page_from_address(vm_mngr, addr, 0); - if (!mpn) - return 0; - - len = MIN(size, mpn->size - (addr - mpn->ad)); - addr += len; - size -= len; - } - - return 1; -} - -struct memory_page_node * create_memory_page_node(uint64_t ad, unsigned int size, unsigned int access, char* name) -{ - struct memory_page_node * mpn; - void* ad_hp; - - mpn = malloc(sizeof(*mpn)); - if (!mpn){ - fprintf(stderr, "Error: cannot alloc mpn\n"); - return NULL; - } - ad_hp = malloc(size); - if (!ad_hp){ - free(mpn); - fprintf(stderr, "Error: cannot alloc %d\n", size); - return NULL; - } - mpn->name = malloc(strlen(name) + 1); - if (!mpn->name){ - free(mpn); - free(ad_hp); - fprintf(stderr, "Error: cannot alloc\n"); - return NULL; - } - - mpn->ad = ad; - mpn->size = size; - mpn->access = access; - mpn->ad_hp = ad_hp; - strcpy(mpn->name, name); - - return mpn; -} - - -struct code_bloc_node * create_code_bloc_node(uint64_t ad_start, uint64_t ad_stop) -{ - struct code_bloc_node * cbp; - - cbp = malloc(sizeof(*cbp)); - if (!cbp){ - fprintf(stderr, "Error: cannot alloc cbp\n"); - exit(EXIT_FAILURE); - } - - cbp->ad_start = ad_start; - cbp->ad_stop = ad_stop; - - return cbp; -} - - -void add_code_bloc(vm_mngr_t* vm_mngr, struct code_bloc_node* cbp) -{ - LIST_INSERT_HEAD(&vm_mngr->code_bloc_pool, cbp, next); - if (vm_mngr->code_bloc_pool_ad_min> cbp->ad_start) - vm_mngr->code_bloc_pool_ad_min = cbp->ad_start; - if (vm_mngr->code_bloc_pool_ad_max< cbp->ad_stop) - vm_mngr->code_bloc_pool_ad_max = cbp->ad_stop; -} - -void dump_code_bloc_pool(vm_mngr_t* vm_mngr) -{ - struct code_bloc_node * cbp; - - LIST_FOREACH(cbp, &vm_mngr->code_bloc_pool, next){ - printf("ad start %"PRIX64" ad_stop %"PRIX64"\n", - cbp->ad_start, - cbp->ad_stop); - } -} - - -void init_memory_page_pool(vm_mngr_t* vm_mngr) -{ - - vm_mngr->memory_pages_number = 0; - vm_mngr->memory_pages_array = NULL; -} - -void init_code_bloc_pool(vm_mngr_t* vm_mngr) -{ - LIST_INIT(&vm_mngr->code_bloc_pool); - vm_mngr->code_bloc_pool_ad_min = 0xffffffffffffffffULL; - vm_mngr->code_bloc_pool_ad_max = 0; - - memory_access_list_init(&(vm_mngr->memory_r)); - memory_access_list_init(&(vm_mngr->memory_w)); - - -} - -void init_memory_breakpoint(vm_mngr_t* vm_mngr) -{ - LIST_INIT(&vm_mngr->memory_breakpoint_pool); -} - - -void reset_memory_page_pool(vm_mngr_t* vm_mngr) -{ - struct memory_page_node * mpn; - int i; - for (i=0;imemory_pages_number; i++) { - mpn = &vm_mngr->memory_pages_array[i]; - free(mpn->ad_hp); - free(mpn->name); - } - free(vm_mngr->memory_pages_array); - vm_mngr->memory_pages_array = NULL; - vm_mngr->memory_pages_number = 0; -} - - -void reset_code_bloc_pool(vm_mngr_t* vm_mngr) -{ - struct code_bloc_node * cbp; - - - while (!LIST_EMPTY(&vm_mngr->code_bloc_pool)) { - cbp = LIST_FIRST(&vm_mngr->code_bloc_pool); - LIST_REMOVE(cbp, next); - free(cbp); - } - vm_mngr->code_bloc_pool_ad_min = 0xffffffffffffffffULL; - vm_mngr->code_bloc_pool_ad_max = 0; -} - -void reset_memory_access(vm_mngr_t* vm_mngr) -{ - memory_access_list_reset(&(vm_mngr->memory_r)); - memory_access_list_reset(&(vm_mngr->memory_w)); -} - -void reset_memory_breakpoint(vm_mngr_t* vm_mngr) -{ - struct memory_breakpoint_info * mpn; - - while (!LIST_EMPTY(&vm_mngr->memory_breakpoint_pool)) { - mpn = LIST_FIRST(&vm_mngr->memory_breakpoint_pool); - LIST_REMOVE(mpn, next); - free(mpn); - } - -} - - - -/* We don't use dichotomy here for the insertion */ -int is_mpn_in_tab(vm_mngr_t* vm_mngr, struct memory_page_node* mpn_a) -{ - struct memory_page_node * mpn; - int i; - - for (i=0;imemory_pages_number; i++) { - mpn = &vm_mngr->memory_pages_array[i]; - if (mpn->ad >= mpn_a->ad + mpn_a->size) - continue; - if (mpn->ad + mpn->size <= mpn_a->ad) - continue; - fprintf(stderr, - "Error: attempt to add page (0x%"PRIX64" 0x%"PRIX64") " - "overlapping page (0x%"PRIX64" 0x%"PRIX64")\n", - mpn_a->ad, mpn_a->ad + mpn_a->size, - mpn->ad, mpn->ad + mpn->size); - - return 1; - } - - return 0; -} - - -/* We don't use dichotomy here for the insertion */ -void add_memory_page(vm_mngr_t* vm_mngr, struct memory_page_node* mpn_a) -{ - struct memory_page_node * mpn; - int i; - - for (i=0; i < vm_mngr->memory_pages_number; i++) { - mpn = &vm_mngr->memory_pages_array[i]; - if (mpn->ad < mpn_a->ad) - continue; - break; - } - vm_mngr->memory_pages_array = realloc(vm_mngr->memory_pages_array, - sizeof(struct memory_page_node) * - (vm_mngr->memory_pages_number+1)); - if (vm_mngr->memory_pages_array == NULL) { - fprintf(stderr, "cannot realloc struct memory_page_node vm_mngr->memory_pages_array\n"); - exit(EXIT_FAILURE); - } - - - memmove(&vm_mngr->memory_pages_array[i+1], - &vm_mngr->memory_pages_array[i], - sizeof(struct memory_page_node) * (vm_mngr->memory_pages_number - i) - ); - - vm_mngr->memory_pages_array[i] = *mpn_a; - vm_mngr->memory_pages_number ++; - -} - -/* Return a char* representing the repr of vm_mngr_t object */ -char* dump(vm_mngr_t* vm_mngr) -{ - char buf[0x100]; - int length; - char *buf_final; - int i; - char buf_addr[0x20]; - char buf_size[0x20]; - struct memory_page_node * mpn; - /* 0x1234567812345678 0x1234567812345678 */ - char* intro = "Addr Size Access Comment\n"; - size_t total_len = strlen(intro) + 1; - - buf_final = malloc(total_len); - if (buf_final == NULL) { - fprintf(stderr, "Error: cannot alloc char* buf_final\n"); - exit(EXIT_FAILURE); - } - strcpy(buf_final, intro); - for (i=0; i< vm_mngr->memory_pages_number; i++) { - mpn = &vm_mngr->memory_pages_array[i]; - snprintf(buf_addr, sizeof(buf_addr), - "0x%"PRIX64, (uint64_t)mpn->ad); - snprintf(buf_size, sizeof(buf_size), - "0x%"PRIX64, (uint64_t)mpn->size); - - length = snprintf(buf, sizeof(buf) - 1, - "%-18s %-18s %c%c%c %s", - buf_addr, - buf_size, - mpn->access & PAGE_READ? 'R':'_', - mpn->access & PAGE_WRITE? 'W':'_', - mpn->access & PAGE_EXEC? 'X':'_', - mpn->name - ); - strcat(buf, "\n"); - total_len += length + 1 + 1; - buf_final = realloc(buf_final, total_len); - if (buf_final == NULL) { - fprintf(stderr, "cannot realloc char* buf_final\n"); - exit(EXIT_FAILURE); - } - strcat(buf_final, buf); - } - - return buf_final; -} - -void dump_memory_breakpoint_pool(vm_mngr_t* vm_mngr) -{ - struct memory_breakpoint_info * mpn; - - LIST_FOREACH(mpn, &vm_mngr->memory_breakpoint_pool, next){ - printf("ad %"PRIX64" size %"PRIX64" access %"PRIX64"\n", - mpn->ad, - mpn->size, - mpn->access - ); - } -} - - -void add_memory_breakpoint(vm_mngr_t* vm_mngr, uint64_t ad, uint64_t size, unsigned int access) -{ - struct memory_breakpoint_info * mpn_a; - mpn_a = malloc(sizeof(*mpn_a)); - if (!mpn_a) { - fprintf(stderr, "Error: cannot alloc\n"); - exit(EXIT_FAILURE); - } - mpn_a->ad = ad; - mpn_a->size = size; - mpn_a->access = access; - - LIST_INSERT_HEAD(&vm_mngr->memory_breakpoint_pool, mpn_a, next); - -} - -void remove_memory_breakpoint(vm_mngr_t* vm_mngr, uint64_t ad, unsigned int access) -{ - struct memory_breakpoint_info * mpn; - - LIST_FOREACH(mpn, &vm_mngr->memory_breakpoint_pool, next){ - if (mpn->ad == ad && mpn->access == access) - LIST_REMOVE(mpn, next); - } - -} - - -/********************************************/ - -void hexdump(char* m, unsigned int l) -{ - unsigned int i, j, last; - last = 0; - for (i=0;iexception_flags; -} - - diff --git a/miasm2/jitter/vm_mngr.h b/miasm2/jitter/vm_mngr.h deleted file mode 100644 index 660e6998..00000000 --- a/miasm2/jitter/vm_mngr.h +++ /dev/null @@ -1,302 +0,0 @@ -/* -** Copyright (C) 2011 EADS France, Fabrice Desclaux -** -** This program is free software; you can redistribute it and/or modify -** it under the terms of the GNU General Public License as published by -** the Free Software Foundation; either version 2 of the License, or -** (at your option) any later version. -** -** This program is distributed in the hope that it will be useful, -** but WITHOUT ANY WARRANTY; without even the implied warranty of -** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -** GNU General Public License for more details. -** -** You should have received a copy of the GNU General Public License along -** with this program; if not, write to the Free Software Foundation, Inc., -** 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -*/ -#ifndef CODENAT_H -#define CODENAT_H - -#if defined(_WIN32) || defined(_WIN64) -#define _CRT_SECURE_NO_WARNINGS -#endif - -#if _WIN32 -#define _MIASM_EXPORT __declspec(dllexport) -#else -#define _MIASM_EXPORT -#endif - -#include -#include - -#include "queue.h" - -#ifdef __APPLE__ -#define __BYTE_ORDER __BYTE_ORDER__ -#elif defined(__NetBSD__) || defined(__OpenBSD__) -#define __BYTE_ORDER _BYTE_ORDER -#define __BIG_ENDIAN _BIG_ENDIAN -#define __LITTLE_ENDIAN _LITTLE_ENDIAN -#elif defined(_WIN32) || defined(_WIN64) -#define __BIG_ENDIAN '>' -#define __LITTLE_ENDIAN '<' -#define __BYTE_ORDER __LITTLE_ENDIAN -#endif - - -#define Endian16_Swap(value) \ - ((((uint16_t)((value) & 0x00FF)) << 8) | \ - (((uint16_t)((value) & 0xFF00)) >> 8)) - -#define Endian32_Swap(value) \ - ((((uint32_t)((value) & 0x000000FF)) << 24) | \ - (((uint32_t)((value) & 0x0000FF00)) << 8) | \ - (((uint32_t)((value) & 0x00FF0000)) >> 8) | \ - (((uint32_t)((value) & 0xFF000000)) >> 24)) - -#define Endian64_Swap(value) \ - (((((uint64_t)value)<<56) & 0xFF00000000000000ULL) | \ - ((((uint64_t)value)<<40) & 0x00FF000000000000ULL) | \ - ((((uint64_t)value)<<24) & 0x0000FF0000000000ULL) | \ - ((((uint64_t)value)<< 8) & 0x000000FF00000000ULL) | \ - ((((uint64_t)value)>> 8) & 0x00000000FF000000ULL) | \ - ((((uint64_t)value)>>24) & 0x0000000000FF0000ULL) | \ - ((((uint64_t)value)>>40) & 0x000000000000FF00ULL) | \ - ((((uint64_t)value)>>56) & 0x00000000000000FFULL)) - - -LIST_HEAD(code_bloc_list_head, code_bloc_node); -LIST_HEAD(memory_breakpoint_info_head, memory_breakpoint_info); - - -#define BREAKPOINT_READ 1 -#define BREAKPOINT_WRITE 2 - -#define BREAK_SIGALARM 1<<5 - -#define MAX_MEMORY_PAGE_POOL_TAB 0x100000 -#define MEMORY_PAGE_POOL_MASK_BIT 12 -#define PAGE_SIZE (1< -** -** This program is free software; you can redistribute it and/or modify -** it under the terms of the GNU General Public License as published by -** the Free Software Foundation; either version 2 of the License, or -** (at your option) any later version. -** -** This program is distributed in the hope that it will be useful, -** but WITHOUT ANY WARRANTY; without even the implied warranty of -** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -** GNU General Public License for more details. -** -** You should have received a copy of the GNU General Public License along -** with this program; if not, write to the Free Software Foundation, Inc., -** 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -*/ -#include -#include "structmember.h" -#include -#include -#include -#include "compat_py23.h" -#include "queue.h" -#include "vm_mngr.h" -#include "vm_mngr_py.h" - -#define MIN(a,b) (((a)<(b))?(a):(b)) -#define MAX(a,b) (((a)>(b))?(a):(b)) - -extern struct memory_page_list_head memory_page_pool; -extern struct code_bloc_list_head code_bloc_pool; - -#define RAISE(errtype, msg) {PyObject* p; p = PyErr_Format( errtype, msg ); return p;} - - - -/* XXX POC signals */ -VmMngr* global_vmmngr; - -PyObject* _vm_get_exception(unsigned int xcpt) -{ - PyObject*p; - - if (!xcpt) - p = NULL; - else if (xcpt & EXCEPT_CODE_AUTOMOD) - p = PyErr_Format( PyExc_RuntimeError, "EXCEPT_CODE_AUTOMOD" ); - else if (xcpt & EXCEPT_UNK_EIP) - p = PyErr_Format( PyExc_RuntimeError, "EXCEPT_UNK_EIP" ); - else if (xcpt & EXCEPT_UNK_MEM_AD) - p = PyErr_Format( PyExc_RuntimeError, "EXCEPT_UNK_MEM_AD" ); - - else p = PyErr_Format( PyExc_RuntimeError, "EXCEPT_UNKNOWN" ); - return p; -} - -static void sig_alarm(int signo) -{ - global_vmmngr->vm_mngr.exception_flags |= BREAK_SIGALARM; - return; -} - -PyObject* set_alarm(VmMngr* self) -{ - global_vmmngr = self; - signal(SIGALRM, sig_alarm); - - Py_INCREF(Py_None); - return Py_None; -} - - - -PyObject* vm_add_memory_page(VmMngr* self, PyObject* args) -{ - PyObject *addr; - PyObject *access; - PyObject *item_str; - PyObject *name=NULL; - uint64_t buf_size; - char* buf_data; - Py_ssize_t length; - uint64_t page_addr; - uint64_t page_access; - char* name_ptr; - - struct memory_page_node * mpn; - - if (!PyArg_ParseTuple(args, "OOO|O", &addr, &access, &item_str, &name)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - - PyGetInt(addr, page_addr); - PyGetInt(access, page_access); - - if(!PyBytes_Check(item_str)) - RAISE(PyExc_TypeError,"arg must be bytes"); - - buf_size = PyBytes_Size(item_str); - PyBytes_AsStringAndSize(item_str, &buf_data, &length); - - if (name == NULL) { - name_ptr = (char*)""; - } else { - PyGetStr(name_ptr, name); - } - mpn = create_memory_page_node(page_addr, (unsigned int)buf_size, (unsigned int)page_access, name_ptr); - if (mpn == NULL) - RAISE(PyExc_TypeError,"cannot create page"); - if (is_mpn_in_tab(&self->vm_mngr, mpn)) { - free(mpn->ad_hp); - free(mpn); - RAISE(PyExc_TypeError,"known page in memory"); - } - - memcpy(mpn->ad_hp, buf_data, buf_size); - add_memory_page(&self->vm_mngr, mpn); - - Py_INCREF(Py_None); - return Py_None; -} - - - -PyObject* vm_set_mem_access(VmMngr* self, PyObject* args) -{ - PyObject *addr; - PyObject *access; - uint64_t page_addr; - uint64_t page_access; - struct memory_page_node * mpn; - - if (!PyArg_ParseTuple(args, "OO", &addr, &access)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - - PyGetInt(addr, page_addr); - PyGetInt(access, page_access); - - mpn = get_memory_page_from_address(&self->vm_mngr, page_addr, 1); - if (!mpn){ - PyErr_SetString(PyExc_RuntimeError, "cannot find address"); - return 0; - } - - mpn->access = page_access; - - Py_INCREF(Py_None); - return Py_None; -} - -PyObject* vm_set_mem(VmMngr* self, PyObject* args) -{ - PyObject *py_addr; - PyObject *py_buffer; - Py_ssize_t py_length; - - char * buffer; - uint64_t size; - uint64_t addr; - int ret; - - if (!PyArg_ParseTuple(args, "OO", &py_addr, &py_buffer)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - - PyGetInt(py_addr, addr); - - if (!PyBytes_Check(py_buffer)) - RAISE(PyExc_TypeError,"arg must be bytes"); - - size = PyBytes_Size(py_buffer); - PyBytes_AsStringAndSize(py_buffer, &buffer, &py_length); - - ret = vm_write_mem(&self->vm_mngr, addr, buffer, size); - if (ret < 0) - RAISE(PyExc_TypeError, "Error in set_mem"); - - add_mem_write(&self->vm_mngr, addr, size); - check_invalid_code_blocs(&self->vm_mngr); - - Py_INCREF(Py_None); - return Py_None; -} - - - -PyObject* vm_get_mem_access(VmMngr* self, PyObject* args) -{ - PyObject *py_addr; - uint64_t page_addr; - struct memory_page_node * mpn; - - if (!PyArg_ParseTuple(args, "O", &py_addr)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - - PyGetInt(py_addr, page_addr); - - mpn = get_memory_page_from_address(&self->vm_mngr, page_addr, 1); - if (!mpn){ - PyErr_SetString(PyExc_RuntimeError, "cannot find address"); - return 0; - } - - return PyLong_FromUnsignedLongLong((uint64_t)mpn->access); -} - -PyObject* vm_get_mem(VmMngr* self, PyObject* args) -{ - PyObject *py_addr; - PyObject *py_len; - - uint64_t addr; - uint64_t size; - PyObject *obj_out; - char * buf_out; - int ret; - - if (!PyArg_ParseTuple(args, "OO", &py_addr, &py_len)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - - PyGetInt(py_addr, addr); - PyGetInt(py_len, size); - - ret = vm_read_mem(&self->vm_mngr, addr, &buf_out, size); - if (ret < 0) { - RAISE(PyExc_RuntimeError,"Cannot find address"); - } - - obj_out = PyBytes_FromStringAndSize(buf_out, size); - free(buf_out); - return obj_out; -} - -PyObject* vm_get_u8(VmMngr* self, PyObject* args) -{ - PyObject *py_addr; - - uint64_t addr; - PyObject *obj_out; - char * buf_out; - int ret; - uint32_t value; - - if (!PyArg_ParseTuple(args, "O", &py_addr)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - - PyGetInt(py_addr, addr); - - ret = vm_read_mem(&self->vm_mngr, addr, &buf_out, 1); - if (ret < 0) { - RAISE(PyExc_RuntimeError,"Cannot find address"); - } - - value = *(uint8_t*)buf_out; - - obj_out = PyLong_FromUnsignedLongLong(value); - free(buf_out); - return obj_out; -} - -PyObject* vm_get_u16(VmMngr* self, PyObject* args) -{ - PyObject *py_addr; - - uint64_t addr; - PyObject *obj_out; - char * buf_out; - int ret; - uint16_t value; - - if (!PyArg_ParseTuple(args, "O", &py_addr)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - - PyGetInt(py_addr, addr); - - ret = vm_read_mem(&self->vm_mngr, addr, &buf_out, 2); - if (ret < 0) { - RAISE(PyExc_RuntimeError,"Cannot find address"); - } - - value = set_endian16(&self->vm_mngr, *(uint16_t*)buf_out); - - obj_out = PyLong_FromUnsignedLongLong(value); - free(buf_out); - return obj_out; -} - -PyObject* vm_get_u32(VmMngr* self, PyObject* args) -{ - PyObject *py_addr; - - uint64_t addr; - PyObject *obj_out; - char * buf_out; - int ret; - uint32_t value; - - if (!PyArg_ParseTuple(args, "O", &py_addr)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - - PyGetInt(py_addr, addr); - - ret = vm_read_mem(&self->vm_mngr, addr, &buf_out, 4); - if (ret < 0) { - RAISE(PyExc_RuntimeError,"Cannot find address"); - } - - value = set_endian32(&self->vm_mngr, *(uint32_t*)buf_out); - - obj_out = PyLong_FromUnsignedLongLong(value); - free(buf_out); - return obj_out; -} - - -PyObject* vm_get_u64(VmMngr* self, PyObject* args) -{ - PyObject *py_addr; - - uint64_t addr; - PyObject *obj_out; - char * buf_out; - int ret; - uint64_t value; - - if (!PyArg_ParseTuple(args, "O", &py_addr)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - - PyGetInt(py_addr, addr); - - ret = vm_read_mem(&self->vm_mngr, addr, &buf_out, 8); - if (ret < 0) { - RAISE(PyExc_RuntimeError,"Cannot find address"); - } - - value = set_endian64(&self->vm_mngr, *(uint64_t*)buf_out); - - obj_out = PyLong_FromUnsignedLongLong(value); - free(buf_out); - return obj_out; -} - - -PyObject* vm_set_u8(VmMngr* self, PyObject* args) -{ - PyObject *py_addr; - PyObject *py_val; - uint64_t value; - uint64_t addr; - uint8_t final_value; - int ret; - - if (!PyArg_ParseTuple(args, "OO", &py_addr, &py_val)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - - PyGetInt(py_addr, addr); - PyGetInt(py_val, value); - - if (value > 0xFF) { - fprintf(stderr, "Warning: int to big\n"); - } - - final_value = value; - - ret = vm_write_mem(&self->vm_mngr, addr, (char*)&final_value, 1); - if (ret < 0) - RAISE(PyExc_TypeError, "Error in set_mem"); - - add_mem_write(&self->vm_mngr, addr, 1); - check_invalid_code_blocs(&self->vm_mngr); - - Py_INCREF(Py_None); - return Py_None; -} - -PyObject* vm_set_u16(VmMngr* self, PyObject* args) -{ - PyObject *py_addr; - PyObject *py_val; - uint64_t value; - uint64_t addr; - uint16_t final_value; - int ret; - - if (!PyArg_ParseTuple(args, "OO", &py_addr, &py_val)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - - PyGetInt(py_addr, addr); - PyGetInt(py_val, value); - - if (value > 0xFFFF) { - fprintf(stderr, "Warning: int to big\n"); - } - - final_value = set_endian16(&self->vm_mngr, value); - - ret = vm_write_mem(&self->vm_mngr, addr, (char*)&final_value, 2); - if (ret < 0) - RAISE(PyExc_TypeError, "Error in set_mem"); - - add_mem_write(&self->vm_mngr, addr, 2); - check_invalid_code_blocs(&self->vm_mngr); - - Py_INCREF(Py_None); - return Py_None; -} - -PyObject* vm_set_u32(VmMngr* self, PyObject* args) -{ - PyObject *py_addr; - PyObject *py_val; - uint64_t value; - uint64_t addr; - uint32_t final_value; - int ret; - - if (!PyArg_ParseTuple(args, "OO", &py_addr, &py_val)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - - PyGetInt(py_addr, addr); - PyGetInt(py_val, value); - - if (value > 0xFFFFFFFF) { - fprintf(stderr, "Warning: int to big\n"); - } - - final_value = set_endian32(&self->vm_mngr, value); - - ret = vm_write_mem(&self->vm_mngr, addr, (char*)&final_value, 4); - if (ret < 0) - RAISE(PyExc_TypeError, "Error in set_mem"); - - add_mem_write(&self->vm_mngr, addr, 4); - check_invalid_code_blocs(&self->vm_mngr); - - Py_INCREF(Py_None); - return Py_None; -} - -PyObject* vm_set_u64(VmMngr* self, PyObject* args) -{ - PyObject *py_addr; - PyObject *py_val; - uint64_t value; - uint64_t addr; - uint64_t final_value; - int ret; - - if (!PyArg_ParseTuple(args, "OO", &py_addr, &py_val)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - - PyGetInt(py_addr, addr); - PyGetInt(py_val, value); - - final_value = set_endian64(&self->vm_mngr, value); - - ret = vm_write_mem(&self->vm_mngr, addr, (char*)&final_value, 8); - if (ret < 0) - RAISE(PyExc_TypeError, "Error in set_mem"); - - add_mem_write(&self->vm_mngr, addr, 8); - check_invalid_code_blocs(&self->vm_mngr); - - Py_INCREF(Py_None); - return Py_None; -} - - - - - -PyObject* vm_add_memory_breakpoint(VmMngr* self, PyObject* args) -{ - PyObject *ad; - PyObject *size; - PyObject *access; - - uint64_t b_ad; - uint64_t b_size; - uint64_t b_access; - - if (!PyArg_ParseTuple(args, "OOO", &ad, &size, &access)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - - PyGetInt(ad, b_ad); - PyGetInt(size, b_size); - PyGetInt(access, b_access); - - add_memory_breakpoint(&self->vm_mngr, b_ad, b_size, (unsigned int)b_access); - - /* Raise exception in the following pattern: - - set_mem(XXX) - - add_memory_breakpoint(XXX) - -> Here, there is a pending breakpoint not raise - */ - check_memory_breakpoint(&self->vm_mngr); - - Py_INCREF(Py_None); - return Py_None; -} - - -PyObject* vm_remove_memory_breakpoint(VmMngr* self, PyObject* args) -{ - PyObject *ad; - PyObject *access; - uint64_t b_ad; - uint64_t b_access; - - if (!PyArg_ParseTuple(args, "OO", &ad, &access)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - - PyGetInt(ad, b_ad); - PyGetInt(access, b_access); - remove_memory_breakpoint(&self->vm_mngr, b_ad, (unsigned int)b_access); - - Py_INCREF(Py_None); - return Py_None; -} - - -PyObject* vm_set_exception(VmMngr* self, PyObject* args) -{ - PyObject *item1; - uint64_t i; - - if (!PyArg_ParseTuple(args, "O", &item1)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - - PyGetInt(item1, i); - - self->vm_mngr.exception_flags = i; - Py_INCREF(Py_None); - return Py_None; -} - -PyObject* vm_get_exception(VmMngr* self, PyObject* args) -{ - return PyLong_FromUnsignedLongLong((uint64_t)self->vm_mngr.exception_flags); -} - - - - -PyObject* vm_init_memory_page_pool(VmMngr* self, PyObject* args) -{ - init_memory_page_pool(&self->vm_mngr); - Py_INCREF(Py_None); - return Py_None; -} - -PyObject* vm_init_code_bloc_pool(VmMngr* self, PyObject* args) -{ - init_code_bloc_pool(&self->vm_mngr); - Py_INCREF(Py_None); - return Py_None; - -} - -PyObject* vm_init_memory_breakpoint(VmMngr* self, PyObject* args) -{ - init_memory_breakpoint(&self->vm_mngr); - Py_INCREF(Py_None); - return Py_None; - -} - -PyObject* vm_reset_memory_breakpoint(VmMngr* self, PyObject* args) -{ - reset_memory_breakpoint(&self->vm_mngr); - Py_INCREF(Py_None); - return Py_None; - -} - -PyObject* vm_reset_memory_access(VmMngr* self, PyObject* args) -{ - reset_memory_access(&self->vm_mngr); - Py_INCREF(Py_None); - return Py_None; -} - -PyObject* py_add_mem_read(VmMngr* self, PyObject* args) -{ - PyObject *py_addr; - PyObject *py_size; - uint64_t addr; - uint64_t size; - - if (!PyArg_ParseTuple(args, "OO", &py_addr, &py_size)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - - PyGetInt(py_addr, addr); - PyGetInt(py_size, size); - add_mem_read(&self->vm_mngr, addr, size); - Py_INCREF(Py_None); - return Py_None; - -} - -PyObject* py_add_mem_write(VmMngr* self, PyObject* args) -{ - PyObject *py_addr; - PyObject *py_size; - uint64_t addr; - uint64_t size; - - if (!PyArg_ParseTuple(args, "OO", &py_addr, &py_size)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - - PyGetInt(py_addr, addr); - PyGetInt(py_size, size); - add_mem_write(&self->vm_mngr, addr, size); - Py_INCREF(Py_None); - return Py_None; - -} - -PyObject* vm_check_invalid_code_blocs(VmMngr* self, PyObject* args) -{ - check_invalid_code_blocs(&self->vm_mngr); - Py_INCREF(Py_None); - return Py_None; -} - -PyObject* vm_check_memory_breakpoint(VmMngr* self, PyObject* args) -{ - check_memory_breakpoint(&self->vm_mngr); - Py_INCREF(Py_None); - return Py_None; -} - -PyObject *vm_dump(PyObject* self) -{ - char* buf_final; - PyObject* ret_obj; - - buf_final = dump(&((VmMngr* )self)->vm_mngr); - ret_obj = PyUnicode_FromString(buf_final); - free(buf_final); - return ret_obj; -} - -PyObject* vm_dump_memory_breakpoint(VmMngr* self, PyObject* args) -{ - dump_memory_breakpoint_pool(&self->vm_mngr); - Py_INCREF(Py_None); - return Py_None; -} - - -PyObject* vm_get_all_memory(VmMngr* self, PyObject* args) -{ - PyObject *o; - struct memory_page_node * mpn; - PyObject *dict; - PyObject *dict2; - int i; - - - dict = PyDict_New(); - - for (i=0;ivm_mngr.memory_pages_number; i++) { - mpn = &self->vm_mngr.memory_pages_array[i]; - - dict2 = PyDict_New(); - - o = PyBytes_FromStringAndSize(mpn->ad_hp, mpn->size); - PyDict_SetItemString(dict2, "data", o); - Py_DECREF(o); - - o = PyLong_FromLong((long)mpn->size); - PyDict_SetItemString(dict2, "size", o); - Py_DECREF(o); - - o = PyLong_FromLong((long)mpn->access); - PyDict_SetItemString(dict2, "access", o); - Py_DECREF(o); - - o = PyLong_FromUnsignedLongLong(mpn->ad); - PyDict_SetItem(dict, o, dict2); - Py_DECREF(o); - Py_DECREF(dict2); - } - return dict; -} - - -PyObject* vm_reset_memory_page_pool(VmMngr* self, PyObject* args) -{ - reset_memory_page_pool(&self->vm_mngr); - Py_INCREF(Py_None); - return Py_None; - -} - -PyObject* vm_reset_code_bloc_pool(VmMngr* self, PyObject* args) -{ - reset_code_bloc_pool(&self->vm_mngr); - Py_INCREF(Py_None); - return Py_None; - -} - - -PyObject* vm_add_code_bloc(VmMngr *self, PyObject *args) -{ - PyObject *item1; - PyObject *item2; - uint64_t ad_start, ad_stop, ad_code = 0; - - struct code_bloc_node * cbp; - - if (!PyArg_ParseTuple(args, "OO", &item1, &item2)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - - PyGetInt(item1, ad_start); - PyGetInt(item2, ad_stop); - - cbp = create_code_bloc_node(ad_start, ad_stop); - cbp->ad_start = ad_start; - cbp->ad_stop = ad_stop; - cbp->ad_code = ad_code; - add_code_bloc(&self->vm_mngr, cbp); - - Py_INCREF(Py_None); - return Py_None; -} - -PyObject* vm_dump_code_bloc_pool(VmMngr* self) -{ - dump_code_bloc_pool(&self->vm_mngr); - Py_INCREF(Py_None); - return Py_None; - -} - - - -PyObject* vm_is_mapped(VmMngr* self, PyObject* args) -{ - PyObject *ad; - PyObject *size; - uint64_t b_ad; - uint64_t b_size; - int ret; - - if (!PyArg_ParseTuple(args, "OO", &ad, &size)) - RAISE(PyExc_TypeError,"Cannot parse arguments"); - - PyGetInt(ad, b_ad); - PyGetInt(size, b_size); - ret = is_mapped(&self->vm_mngr, b_ad, b_size); - return PyLong_FromUnsignedLongLong((uint64_t)ret); -} - -PyObject* vm_get_memory_read(VmMngr* self, PyObject* args) -{ - PyObject* result; - result = get_memory_read(&self->vm_mngr); - Py_INCREF(result); - return result; -} - -PyObject* vm_get_memory_write(VmMngr* self, PyObject* args) -{ - PyObject* result; - result = get_memory_write(&self->vm_mngr); - Py_INCREF(result); - return result; -} - - - -static PyObject * -vm_set_big_endian(VmMngr *self, PyObject *value, void *closure) -{ - self->vm_mngr.sex = __BIG_ENDIAN; - Py_INCREF(Py_None); - return Py_None; -} - -static PyObject * -vm_set_little_endian(VmMngr *self, PyObject *value, void *closure) -{ - self->vm_mngr.sex = __LITTLE_ENDIAN; - Py_INCREF(Py_None); - return Py_None; -} - - -static PyObject * -vm_is_little_endian(VmMngr *self, PyObject *value, void *closure) -{ - if (self->vm_mngr.sex == __BIG_ENDIAN) { - return PyLong_FromUnsignedLongLong(0); - } else { - return PyLong_FromUnsignedLongLong(1); - } -} - - -static void -VmMngr_dealloc(VmMngr* self) -{ - vm_reset_memory_page_pool(self, NULL); - vm_reset_code_bloc_pool(self, NULL); - vm_reset_memory_breakpoint(self, NULL); - Py_TYPE(self)->tp_free((PyObject*)self); -} - - -static PyObject * -VmMngr_new(PyTypeObject *type, PyObject *args, PyObject *kwds) -{ - VmMngr *self; - - self = (VmMngr *)type->tp_alloc(type, 0); - return (PyObject *)self; -} - -static PyObject * -VmMngr_get_vmmngr(VmMngr *self, void *closure) -{ - return PyLong_FromUnsignedLongLong((uint64_t)(intptr_t)&(self->vm_mngr)); -} - -static int -VmMngr_set_vmmngr(VmMngr *self, PyObject *value, void *closure) -{ - PyErr_SetString(PyExc_TypeError, "immutable vmmngr"); - return -1; -} - -static PyMemberDef VmMngr_members[] = { - {NULL} /* Sentinel */ -}; - -static PyMethodDef VmMngr_methods[] = { - {"init_memory_page_pool", (PyCFunction)vm_init_memory_page_pool, METH_VARARGS, - "init_memory_page_pool() -> Initialize the VmMngr memory"}, - {"init_memory_breakpoint", (PyCFunction)vm_init_memory_breakpoint, METH_VARARGS, - "init_memory_breakpoint() -> Initialize the VmMngr memory breakpoints"}, - {"init_code_bloc_pool",(PyCFunction)vm_init_code_bloc_pool, METH_VARARGS, - "init_code_bloc_pool() -> Initialize the VmMngr jitted code blocks"}, - {"set_mem_access", (PyCFunction)vm_set_mem_access, METH_VARARGS, - "set_mem_access(address, access) -> Change the protection of the page at @address with @access"}, - {"set_mem", (PyCFunction)vm_set_mem, METH_VARARGS, - "set_mem(address, data) -> Set a @data in memory at @address"}, - {"is_mapped", (PyCFunction)vm_is_mapped, METH_VARARGS, - "is_mapped(address, size) -> Check if the memory region at @address of @size bytes is fully mapped"}, - {"add_code_bloc",(PyCFunction)vm_add_code_bloc, METH_VARARGS, - "add_code_bloc(address_start, address_stop) -> Add a jitted code block between [@address_start, @address_stop["}, - {"get_mem_access", (PyCFunction)vm_get_mem_access, METH_VARARGS, - "get_mem_access(address) -> Retrieve the memory protection of the page at @address"}, - {"get_mem", (PyCFunction)vm_get_mem, METH_VARARGS, - "get_mem(addr, size) -> Get the memory content at @address of @size bytes"}, - - {"get_u8", (PyCFunction)vm_get_u8, METH_VARARGS, - "get_u8(addr) -> Get a u8 at @address of @size bytes (vm endianness)"}, - {"get_u16", (PyCFunction)vm_get_u16, METH_VARARGS, - "get_u16(addr) -> Get a u16 at @address of @size bytes (vm endianness)"}, - {"get_u32", (PyCFunction)vm_get_u32, METH_VARARGS, - "get_u32(addr) -> Get a u32 at @address of @size bytes (vm endianness)"}, - {"get_u64", (PyCFunction)vm_get_u64, METH_VARARGS, - "get_u64(addr) -> Get a u64 at @address of @size bytes (vm endianness)"}, - - - {"set_u8", (PyCFunction)vm_set_u8, METH_VARARGS, - "set_u8(addr, value) -> Set a u8 at @address of @size bytes (vm endianness)"}, - {"set_u16", (PyCFunction)vm_set_u16, METH_VARARGS, - "set_u16(addr, value) -> Set a u16 at @address of @size bytes (vm endianness)"}, - {"set_u32", (PyCFunction)vm_set_u32, METH_VARARGS, - "set_u32(addr, value) -> Set a u32 at @address of @size bytes (vm endianness)"}, - {"set_u64", (PyCFunction)vm_set_u64, METH_VARARGS, - "set_u64(addr, value) -> Set a u64 at @address of @size bytes (vm endianness)"}, - - {"add_memory_page",(PyCFunction)vm_add_memory_page, METH_VARARGS, - "add_memory_page(address, access, content [, cmt]) -> Maps a memory page at @address of len(@content) bytes containing @content with protection @access\n" - "@cmt is a comment linked to the memory page"}, - {"add_memory_breakpoint",(PyCFunction)vm_add_memory_breakpoint, METH_VARARGS, - "add_memory_breakpoint(address, size, access) -> Add a memory breakpoint at @address of @size bytes with @access type"}, - {"remove_memory_breakpoint",(PyCFunction)vm_remove_memory_breakpoint, METH_VARARGS, - "remove_memory_breakpoint(address, access) -> Remove a memory breakpoint at @address with @access type"}, - {"set_exception", (PyCFunction)vm_set_exception, METH_VARARGS, - "set_exception(exception) -> Set the VmMngr exception flags to @exception"}, - {"dump_memory_breakpoint", (PyCFunction)vm_dump_memory_breakpoint, METH_VARARGS, - "dump_memory_breakpoint() -> Lists each memory breakpoint"}, - {"get_all_memory",(PyCFunction)vm_get_all_memory, METH_VARARGS, - "get_all_memory() -> Returns a dictionary representing the VmMngr memory.\n" - "Keys are the addresses of each memory page.\n" - "Values are another dictionary containing page properties ('data', 'size', 'access')" - }, - {"reset_memory_page_pool", (PyCFunction)vm_reset_memory_page_pool, METH_VARARGS, - "reset_memory_page_pool() -> Remove all memory pages"}, - {"reset_memory_breakpoint", (PyCFunction)vm_reset_memory_breakpoint, METH_VARARGS, - "reset_memory_breakpoint() -> Remove all memory breakpoints"}, - {"reset_code_bloc_pool", (PyCFunction)vm_reset_code_bloc_pool, METH_VARARGS, - "reset_code_bloc_pool() -> Remove all jitted blocks"}, - {"set_alarm", (PyCFunction)set_alarm, METH_VARARGS, - "set_alarm() -> Force a timer based alarm during a code emulation"}, - {"get_exception",(PyCFunction)vm_get_exception, METH_VARARGS, - "get_exception() -> Returns the VmMngr exception flags"}, - {"set_big_endian",(PyCFunction)vm_set_big_endian, METH_VARARGS, - "set_big_endian() -> Set the VmMngr to Big Endian"}, - {"set_little_endian",(PyCFunction)vm_set_little_endian, METH_VARARGS, - "set_little_endian() -> Set the VmMngr to Little Endian"}, - {"is_little_endian",(PyCFunction)vm_is_little_endian, METH_VARARGS, - "is_little_endian() -> Return True if the VmMngr is Little Endian"}, - {"get_memory_read",(PyCFunction)vm_get_memory_read, METH_VARARGS, - "get_memory_read() -> Retrieve last instruction READ access\n" - "This function is only valid in a memory breakpoint callback." - }, - {"get_memory_write",(PyCFunction)vm_get_memory_write, METH_VARARGS, - "get_memory_write() -> Retrieve last instruction WRITE access\n" - "This function is only valid in a memory breakpoint callback." - }, - {"reset_memory_access",(PyCFunction)vm_reset_memory_access, METH_VARARGS, - "reset_memory_access() -> Reset last memory READ/WRITE"}, - {"add_mem_read",(PyCFunction)py_add_mem_read, METH_VARARGS, - "add_mem_read(address, size) -> Add a READ access at @address of @size bytes"}, - {"add_mem_write",(PyCFunction)py_add_mem_write, METH_VARARGS, - "add_mem_write(address, size) -> Add a WRITE access at @address of @size bytes"}, - {"check_invalid_code_blocs",(PyCFunction)vm_check_invalid_code_blocs, METH_VARARGS, - "check_invalid_code_blocs() -> Set the AUTOMOD flag in exception in case of automodified code"}, - {"check_memory_breakpoint",(PyCFunction)vm_check_memory_breakpoint, METH_VARARGS, - "check_memory_breakpoint() -> Set the BREAKPOINT_MEMORY flag in exception in case of memory breakpoint occurred"}, - - {NULL} /* Sentinel */ -}; - -static int -VmMngr_init(VmMngr *self, PyObject *args, PyObject *kwds) -{ - memset(&(self->vm_mngr), 0, sizeof(self->vm_mngr)); - return 0; -} - -static PyGetSetDef VmMngr_getseters[] = { - {"vmmngr", - (getter)VmMngr_get_vmmngr, (setter)VmMngr_set_vmmngr, - "vmmngr object", - NULL}, - {NULL} /* Sentinel */ -}; - -static PyTypeObject VmMngrType = { - PyVarObject_HEAD_INIT(NULL, 0) - "VmMngr", /*tp_name*/ - sizeof(VmMngr), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - (destructor)VmMngr_dealloc,/*tp_dealloc*/ - 0, /*tp_print*/ - 0, /*tp_getattr*/ - 0, /*tp_setattr*/ - 0, /*tp_compare*/ - vm_dump, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - 0, /*tp_hash */ - 0, /*tp_call*/ - 0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ - "VmMngr object", /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - VmMngr_methods, /* tp_methods */ - VmMngr_members, /* tp_members */ - VmMngr_getseters, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - (initproc)VmMngr_init, /* tp_init */ - 0, /* tp_alloc */ - VmMngr_new, /* tp_new */ -}; - -static PyMethodDef VmMngr_Methods[] = { - {NULL, NULL, 0, NULL} /* Sentinel */ - -}; - -char vm_mngr_mod_docs[] = "vm_mngr module."; -char vm_mngr_mod_name[] = "VmMngr"; - - -MOD_INIT(VmMngr) -{ - PyObject *module; - - MOD_DEF(module, "VmMngr", "vm_mngr module", VmMngr_Methods); - - if (module == NULL) - return NULL; - - if (PyType_Ready(&VmMngrType) < 0) - return NULL; - - Py_INCREF(&VmMngrType); - if (PyModule_AddObject(module, "Vm", (PyObject *)&VmMngrType) < 0) - return NULL; - - return module; -} diff --git a/miasm2/jitter/vm_mngr_py.h b/miasm2/jitter/vm_mngr_py.h deleted file mode 100644 index e2e43c65..00000000 --- a/miasm2/jitter/vm_mngr_py.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef VM_MNGR_PY_H -#define VM_MNGR_PY_H - -#ifdef _WIN32 -#define SIGALRM 0 -#endif - -typedef struct { - PyObject_HEAD - PyObject *vmmngr; - vm_mngr_t vm_mngr; -} VmMngr; - - -#endif// VM_MNGR_PY_H diff --git a/miasm2/os_dep/__init__.py b/miasm2/os_dep/__init__.py deleted file mode 100644 index 6aa660d8..00000000 --- a/miasm2/os_dep/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"Operating System specific methods" diff --git a/miasm2/os_dep/common.py b/miasm2/os_dep/common.py deleted file mode 100644 index ed68185f..00000000 --- a/miasm2/os_dep/common.py +++ /dev/null @@ -1,168 +0,0 @@ -import os - -from future.utils import viewitems - -from miasm2.core.utils import force_bytes -from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE -from miasm2.core.utils import get_caller_name -from miasm2.core.utils import pck64, upck64 - -BASE_SB_PATH = "file_sb" - - -def get_str_ansi(jitter, ad_str, max_char=None): - l = 0 - tmp = ad_str - while ((max_char is None or l < max_char) and - jitter.vm.get_mem(tmp, 1) != b"\x00"): - tmp += 1 - l += 1 - return jitter.vm.get_mem(ad_str, l) - - -def get_str_unic(jitter, ad_str, max_char=None): - l = 0 - tmp = ad_str - while ((max_char is None or l < max_char) and - jitter.vm.get_mem(tmp, 2) != b"\x00\x00"): - tmp += 2 - l += 2 - s = jitter.vm.get_mem(ad_str, l) - s = s.decode("utf-16le") - return s - - -def set_str_ansi(value): - value = force_bytes(value) - return value + b"\x00" - - -def set_str_unic(value): - try: - value = value.decode() - except AttributeError: - pass - return value.encode("utf-16le") + b'\x00' * 2 - - -class heap(object): - - "Light heap simulation" - - addr = 0x20000000 - align = 0x1000 - size = 32 - mask = (1 << size) - 1 - - def next_addr(self, size): - """ - @size: the size to allocate - return the future checnk address - """ - ret = self.addr - self.addr = (self.addr + size + self.align - 1) - self.addr &= self.mask ^ (self.align - 1) - return ret - - def alloc(self, jitter, size, perm=PAGE_READ | PAGE_WRITE): - """ - @jitter: a jitter instance - @size: the size to allocate - @perm: permission flags (see vm_alloc doc) - """ - return self.vm_alloc(jitter.vm, size, perm) - - def vm_alloc(self, vm, size, perm=PAGE_READ | PAGE_WRITE): - """ - @vm: a VmMngr instance - @size: the size to allocate - @perm: permission flags (PAGE_READ, PAGE_WRITE, PAGE_EXEC or any `|` - combination of them); default is PAGE_READ|PAGE_WRITE - """ - addr = self.next_addr(size) - vm.add_memory_page( - addr, - perm, - b"\x00" * (size), - "Heap alloc by %s" % get_caller_name(2) - ) - return addr - - def get_size(self, vm, ptr): - """ - @vm: a VmMngr instance - @size: ptr to get the size of the associated allocation. - - `ptr` can be the base address of a previous allocation, or an address - within the allocated range. The size of the whole allocation is always - returned, regardless ptr is the base address or not. - """ - assert vm.is_mapped(ptr, 1) - data = vm.get_all_memory() - ptr_page = data.get(ptr, None) - if ptr_page is None: - for address, page_info in viewitems(data): - if address <= ptr < address + page_info["size"]: - ptr_page = page_info - break - else: - raise RuntimeError("Must never happen (unmapped but mark as mapped by API)") - return ptr_page["size"] - - -def windows_to_sbpath(path): - """Convert a Windows path to a valid filename within the sandbox - base directory. - - """ - path = [elt for elt in path.lower().replace('/', '_').split('\\') if elt] - return os.path.join(BASE_SB_PATH, *path) - - -def unix_to_sbpath(path): - """Convert a POSIX path to a valid filename within the sandbox - base directory. - - """ - path = [elt for elt in path.split('/') if elt] - return os.path.join(BASE_SB_PATH, *path) - -def get_fmt_args(fmt, cur_arg, get_str, get_arg_n): - idx = 0 - fmt = get_str(fmt) - if isinstance(fmt, bytes): - chars_format = b'%cdfsuxX' - char_percent = b'%' - char_string = b's' - output = b"" - else: - chars_format = u'%cdfsuxX' - char_percent = u'%' - char_string = u's' - output = u"" - - while True: - if idx == len(fmt): - break - char = fmt[idx:idx+1] - idx += 1 - if char == char_percent: - token = char_percent - while True: - char = fmt[idx:idx+1] - idx += 1 - token += char - if char in chars_format: - break - if char == char_percent: - output += char - continue - if token.endswith(char_string): - addr = get_arg_n(cur_arg) - arg = get_str(addr) - else: - arg = get_arg_n(cur_arg) - char = token % arg - cur_arg += 1 - output += char - return output diff --git a/miasm2/os_dep/linux/__init__.py b/miasm2/os_dep/linux/__init__.py deleted file mode 100644 index 4434ce50..00000000 --- a/miasm2/os_dep/linux/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# Linux emulation diff --git a/miasm2/os_dep/linux/environment.py b/miasm2/os_dep/linux/environment.py deleted file mode 100644 index ae9c3317..00000000 --- a/miasm2/os_dep/linux/environment.py +++ /dev/null @@ -1,916 +0,0 @@ -from __future__ import print_function -from collections import namedtuple -import functools -import os -import struct -import termios - -from future.utils import viewitems - -from miasm2.core.interval import interval -from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE - - -StatInfo = namedtuple("StatInfo", [ - "st_dev", "st_ino", "st_nlink", "st_mode", "st_uid", "st_gid", "st_rdev", - "st_size", "st_blksize", "st_blocks", "st_atime", "st_atimensec", - "st_mtime", "st_mtimensec", "st_ctime", "st_ctimensec" -]) -StatFSInfo = namedtuple("StatFSInfo", [ - "f_type", "f_bsize", "f_blocks", "f_bfree", "f_bavail", "f_files", - "f_ffree", "f_fsid", "f_namelen", "f_frsize", "f_flags", "f_spare", -]) - - -class FileDescriptor(object): - """Stand for a file descriptor on a system - - According to inode(7), following types are possibles: - - socket - - symbolic link - - regular file - - block device - - directory - - character device - - FIFO - """ - - # st_mode's file type - file_type = None - # st_mode's file mode (9 least bits are file permission bits) - file_mode = 0o0777 - # st_dev / st_rdev - cont_device_id = None - device_id = 0 - # inode number (st_ino) - inode = None - # Number of hardlink (st_nlink) - nlink = 0 - # Owner / group - uid = None - gid = None - # Size (st_size / st_blksize / st_blocks) - size = 0 - blksize = 0 - blocks = 0 - # Times - atime = 0 - atimensec = 0 - mtime = 0 - mtimensec = 0 - ctime = 0 - ctimensec = 0 - - def __init__(self, number): - self.number = number - self.is_closed = False - - def stat(self): - mode = self.file_type | self.file_mode - return StatInfo( - st_dev=self.cont_device_id, st_ino=self.inode, - st_nlink=self.nlink, st_mode=mode, - st_uid=self.uid, st_gid=self.gid, - st_rdev=self.device_id, st_size=self.size, - st_blksize=self.blksize, st_blocks=self.blocks, - st_atime=self.atime, st_atimensec=self.atimensec, - st_mtime=self.mtime, st_mtimensec=self.mtimensec, - st_ctime=self.ctime, st_ctimensec=self.ctimensec - ) - - def close(self): - self.is_closed = True - - -class FileDescriptorCharDevice(FileDescriptor): - file_type = 0o0020000 # S_IFCHR - file_mode = 0o0620 - cont_device_id = 1 - device_id = 1 - - -class FileDescriptorSTDIN(FileDescriptorCharDevice): - """Special file descriptor standinf for STDIN""" - inode = 0 - - def read(self, count): - raise RuntimeError("Not implemented") - - -class FileDescriptorSTDOUT(FileDescriptorCharDevice): - """Special file descriptor standinf for STDOUT""" - inode = 1 - - def write(self, data): - print("[STDOUT] %s" % data.rstrip()) - - -class FileDescriptorSTDERR(FileDescriptorCharDevice): - """Special file descriptor standinf for STDERR""" - inode = 2 - - def write(self, data): - print("[STDERR] %s" % data.rstrip()) - - -class FileDescriptorDirectory(FileDescriptor): - """FileDescription designing a directory""" - - file_type = 0o0040000 # S_IFDIR - - def __init__(self, number, flags, filesystem, real_path): - super(FileDescriptorDirectory, self).__init__(number) - self.filesystem = filesystem - self.real_path = real_path - self.cur_listdir = None - self.flags = flags - - def listdir(self): - if self.cur_listdir is None: - self.cur_listdir = os.listdir(self.real_path) - while self.cur_listdir: - yield self.cur_listdir.pop() - - -class FileDescriptorRegularFile(FileDescriptor): - """FileDescriptor designing a regular file""" - - file_type = 0o0100000 # S_IFREG - - def __init__(self, number, flags, filesystem, real_fd): - super(FileDescriptorRegularFile, self).__init__(number) - self.flags = flags - self.filesystem = filesystem - self.real_fd = real_fd - - def write(self, data): - raise RuntimeError("Not implemented") - - def read(self, count): - return os.read(self.real_fd, count) - - def close(self): - super(FileDescriptorRegularFile, self).close() - return os.close(self.real_fd) - - def lseek(self, offset, whence): - return os.lseek(self.real_fd, offset, whence) # SEEK_SET - - def tell(self): - return self.lseek(0, 1) # SEEK_CUR - - def seek(self, offset): - return self.lseek(offset, 0) # SEEK_SET - - -class FileDescriptorSocket(FileDescriptor): - """FileDescription standing for a socket""" - - file_type = 0o0140000 # S_IFSOCK - - def __init__(self, number, family, type_, protocol): - super(FileDescriptorSocket, self).__init__(number) - self.family = family - self.type_ = type_ - self.protocol = protocol - - -class FileSystem(object): - """File system abstraction - Provides standard operations on the filesystem, (a bit like FUSE) - - API using FileSystem only used sandbox-side path. FileSystem should be the - only object able to interact with real path, outside the sandbox. - - Thus, if `resolve_path` is correctly implemented and used, it should not be - possible to modify files outside the sandboxed path - """ - - device_id = 0x1234 # ID of device containing file (stat.st_dev) - blocksize = 0x1000 # Size of block on this filesystem - f_type = 0xef53 # (Type of filesystem) EXT4_SUPER_MAGIC - nb_total_block = 0x1000 - nb_free_block = 0x100 - nb_avail_block = nb_free_block # Available to unprivileged user - nb_total_fnode = 100 # Total file nodes in filesystem - nb_free_fnode = 50 - max_filename_len = 256 - fragment_size = 0 - mount_flags = 0 - - def __init__(self, base_path, linux_env): - self.base_path = base_path - self.linux_env = linux_env - self.passthrough = [] - self.path_to_inode = {} # Real path (post-resolution) -> inode number - - def resolve_path(self, path, follow_link=True): - """Resolve @path to the corresponding sandboxed path""" - # Remove '../', etc. - path = os.path.normpath(path) - - # Passthrough - for passthrough in self.passthrough: - if hasattr(passthrough, "match"): - if passthrough.match(path): - return path - elif passthrough == path: - return path - - # Remove leading '/' if any (multiple '//' are handled by 'abspath' - if path.startswith(os.path.sep): - path = path[1:] - - base_path = os.path.abspath(self.base_path) - out_path = os.path.join(base_path, path) - assert out_path.startswith(base_path + os.path.sep) - if os.path.islink(out_path): - link_target = os.readlink(out_path) - # Link can be absolute or relative -> absolute - link = os.path.normpath(os.path.join(os.path.dirname(path), link_target)) - if follow_link: - out_path = self.resolve_path(link) - else: - out_path = link - return out_path - - def get_path_inode(self, real_path): - inode = self.path_to_inode.setdefault(real_path, len(self.path_to_inode)) - return inode - - def exists(self, path): - sb_path = self.resolve_path(path) - return os.path.exists(sb_path) - - def readlink(self, path): - sb_path = self.resolve_path(path, follow_link=False) - if not os.path.islink(sb_path): - return None - return os.readlink(sb_path) - - def statfs(self): - return StatFSInfo( - f_type=self.f_type, f_bsize=self.blocksize, - f_blocks=self.nb_total_block, f_bfree=self.nb_free_block, - f_bavail=self.nb_avail_block, f_files=self.nb_total_fnode, - f_ffree=self.nb_free_fnode, f_fsid=self.device_id, - f_namelen=self.max_filename_len, - f_frsize=self.fragment_size, f_flags=self.mount_flags, f_spare=0) - - def getattr_(self, path, follow_link=True): - sb_path = self.resolve_path(path, follow_link=follow_link) - flags = self.linux_env.O_RDONLY - if os.path.isdir(sb_path): - flags |= self.linux_env.O_DIRECTORY - - fd = self.open_(path, flags, follow_link=follow_link) - info = self.linux_env.fstat(fd) - self.linux_env.close(fd) - return info - - def open_(self, path, flags, follow_link=True): - path = self.resolve_path(path, follow_link=follow_link) - if not os.path.exists(path): - # ENOENT (No such file or directory) - return -1 - fd = self.linux_env.next_fd() - acc_mode = flags & self.linux_env.O_ACCMODE - - if os.path.isdir(path): - assert flags & self.linux_env.O_DIRECTORY == self.linux_env.O_DIRECTORY - if acc_mode == self.linux_env.O_RDONLY: - fdesc = FileDescriptorDirectory(fd, flags, self, path) - else: - raise RuntimeError("Not implemented") - elif os.path.isfile(path): - if acc_mode == os.O_RDONLY: - # Read only - real_fd = os.open(path, os.O_RDONLY) - else: - raise RuntimeError("Not implemented") - fdesc = FileDescriptorRegularFile(fd, flags, self, real_fd) - - elif os.path.islink(path): - raise RuntimeError("Not implemented") - else: - raise RuntimeError("Unknown file type for %r" % path) - - self.linux_env.file_descriptors[fd] = fdesc - # Set stat info - fdesc.cont_device_id = self.device_id - fdesc.inode = self.get_path_inode(path) - fdesc.uid = self.linux_env.user_uid - fdesc.gid = self.linux_env.user_gid - size = os.path.getsize(path) - fdesc.size = size - fdesc.blksize = self.blocksize - fdesc.blocks = (size + ((512 - (size % 512)) % 512)) // 512 - return fd - - -class Networking(object): - """Network abstraction""" - - def __init__(self, linux_env): - self.linux_env = linux_env - - def socket(self, family, type_, protocol): - fd = self.linux_env.next_fd() - fdesc = FileDescriptorSocket(fd, family, type_, protocol) - self.linux_env.file_descriptors[fd] = fdesc - return fd - - -class LinuxEnvironment(object): - """A LinuxEnvironment regroups information to simulate a Linux-like - environment""" - - # To be overridden - platform_arch = None - - # User information - user_uid = 1000 - user_euid = 1000 - user_gid = 1000 - user_egid = 1000 - user_name = b"user" - - # Memory mapping information - brk_current = 0x74000000 - mmap_current = 0x75000000 - - # System information - sys_sysname = b"Linux" - sys_nodename = b"user-pc" - sys_release = b"4.13.0-19-generic" - sys_version = b"#22-Ubuntu" - sys_machine = None - - # Filesystem - filesystem_base = "file_sb" - file_descriptors = None - - # Current process - process_tid = 1000 - process_pid = 1000 - - # Syscall restrictions - ioctl_allowed = None # list of (fd, cmd), None value for wildcard - ioctl_disallowed = None # list of (fd, cmd), None value for wildcard - - # Time - base_time = 1531900000 - - # Arch specific constant - O_ACCMODE = None - O_CLOEXEC = None - O_DIRECTORY = None - O_LARGEFILE = None - O_NONBLOCK = None - O_RDONLY = None - - def __init__(self): - stdin = FileDescriptorSTDIN(0) - stdout = FileDescriptorSTDOUT(1) - stderr = FileDescriptorSTDERR(2) - for std in [stdin, stdout, stderr]: - std.uid = self.user_uid - std.gid = self.user_gid - self.file_descriptors = { - 0: stdin, - 1: stdout, - 2: stderr, - } - self.ioctl_allowed = [ - (0, termios.TCGETS), - (0, termios.TIOCGWINSZ), - (0, termios.TIOCSWINSZ), - (1, termios.TCGETS), - (1, termios.TIOCGWINSZ), - (1, termios.TIOCSWINSZ), - ] - self.ioctl_disallowed = [ - (2, termios.TCGETS), - (0, termios.TCSETSW), - ] - self.filesystem = FileSystem(self.filesystem_base, self) - self.network = Networking(self) - - def next_fd(self): - return len(self.file_descriptors) - - def clock_gettime(self): - out = self.base_time - self.base_time += 1 - return out - - def open_(self, path, flags, follow_link=True): - """Stub for 'open' syscall""" - return self.filesystem.open_(path, flags, follow_link=follow_link) - - def socket(self, family, type_, protocol): - """Stub for 'socket' syscall""" - return self.network.socket(family, type_, protocol) - - def fstat(self, fd): - """Get file status through fd""" - fdesc = self.file_descriptors.get(fd) - if fdesc is None: - return None - return fdesc.stat() - - def stat(self, path): - """Get file status through path""" - return self.filesystem.getattr_(path) - - def lstat(self, path): - """Get file status through path (not following links)""" - return self.filesystem.getattr_(path, follow_link=False) - - def close(self, fd): - """Stub for 'close' syscall""" - fdesc = self.file_descriptors.get(fd) - if fdesc is None: - return None - return fdesc.close() - - def write(self, fd, data): - """Stub for 'write' syscall""" - fdesc = self.file_descriptors.get(fd) - if fdesc is None: - return None - fdesc.write(data) - return len(data) - - def read(self, fd, count): - """Stub for 'read' syscall""" - fdesc = self.file_descriptors.get(fd) - if fdesc is None: - return None - return fdesc.read(count) - - def getdents(self, fd, count, packing_callback): - """Stub for 'getdents' syscall - - 'getdents64' must be handled by caller (only the structure layout is - modified) - - @fd: getdents' fd argument - @count: getdents' count argument - @packing_callback(cur_len, d_ino, d_type, name) -> entry - """ - fdesc = self.file_descriptors[fd] - if not isinstance(fdesc, FileDescriptorDirectory): - raise RuntimeError("Not implemented") - - out = "" - # fdesc.listdir continues from where it stopped - for name in fdesc.listdir(): - d_ino = 1 # Not the real one - d_type = 0 # DT_UNKNOWN (getdents(2) "All applications must properly - # handle a return of DT_UNKNOWN.") - entry = packing_callback(len(out), d_ino, d_type, name) - - if len(out) + len(entry) > count: - # Report to a further call - fdesc.cur_listdir.append(name) - break - out = out + entry - return out - - def ioctl(self, fd, cmd, arg): - """Stub for 'ioctl' syscall - Return the list of element to pack back depending on target ioctl - If the ioctl is disallowed, return False - """ - allowed = False - disallowed = False - for test in [(fd, cmd), (None, cmd), (fd, None)]: - if test in self.ioctl_allowed: - allowed = True - if test in self.ioctl_disallowed: - disallowed = True - - if allowed and disallowed: - raise ValueError("fd: %x, cmd: %x is allowed and disallowed" % (fd, cmd)) - - if allowed: - if cmd == termios.TCGETS: - return 0, 0, 0, 0 - elif cmd == termios.TIOCGWINSZ: - # struct winsize - # { - # unsigned short ws_row; /* rows, in characters */ - # unsigned short ws_col; /* columns, in characters */ - # unsigned short ws_xpixel; /* horizontal size, pixels */ - # unsigned short ws_ypixel; /* vertical size, pixels */ - # }; - return 1000, 360, 1000, 1000 - elif cmd == termios.TIOCSWINSZ: - # Ignore it - return - else: - raise RuntimeError("Not implemented") - - elif disallowed: - return False - - else: - raise KeyError("Unknown ioctl fd:%x cmd:%x" % (fd, cmd)) - - def mmap(self, addr, len_, prot, flags, fd, off, vmmngr): - """Stub for 'mmap' syscall - - 'mmap2' must be implemented by calling this function with off * 4096 - """ - if addr == 0: - addr = self.mmap_current - self.mmap_current += (len_ + 0x1000) & ~0xfff - - all_mem = vmmngr.get_all_memory() - mapped = interval( - [ - (start, start + info["size"] - 1) - for start, info in viewitems(all_mem) - ] - ) - - MAP_FIXED = 0x10 - if flags & MAP_FIXED: - # Alloc missing and override - missing = interval([(addr, addr + len_ - 1)]) - mapped - for start, stop in missing: - vmmngr.add_memory_page( - start, - PAGE_READ|PAGE_WRITE, - b"\x00" * (stop - start + 1), - "mmap allocated" - ) - else: - # Find first candidate segment nearby addr - for start, stop in mapped: - if stop < addr: - continue - rounded = (stop + 1 + 0x1000) & ~0xfff - if (interval([(rounded, rounded + len_)]) & mapped).empty: - addr = rounded - break - else: - assert (interval([(addr, addr + len_)]) & mapped).empty - - vmmngr.add_memory_page( - addr, - PAGE_READ|PAGE_WRITE, - b"\x00" * len_, - "mmap allocated" - ) - - - if fd == 0xffffffff: - if off != 0: - raise RuntimeError("Not implemented") - data = b"\x00" * len_ - else: - fdesc = self.file_descriptors[fd] - cur_pos = fdesc.tell() - fdesc.seek(off) - data = fdesc.read(len_) - fdesc.seek(cur_pos) - - vmmngr.set_mem(addr, data) - return addr - - def brk(self, addr, vmmngr): - """Stub for 'brk' syscall""" - if addr == 0: - addr = self.brk_current - else: - all_mem = vmmngr.get_all_memory() - mapped = interval( - [ - (start, start + info["size"] - 1) - for start, info in viewitems(all_mem) - ] - ) - - # Alloc missing and override - missing = interval([(self.brk_current, addr)]) - mapped - for start, stop in missing: - vmmngr.add_memory_page( - start, - PAGE_READ|PAGE_WRITE, - b"\x00" * (stop - start + 1), - "BRK" - ) - - self.brk_current = addr - return addr - - -class LinuxEnvironment_x86_64(LinuxEnvironment): - platform_arch = b"x86_64" - sys_machine = b"x86_64" - - O_ACCMODE = 0x3 - O_CLOEXEC = 0x80000 - O_DIRECTORY = 0x10000 - O_LARGEFILE = 0x8000 - O_NONBLOCK = 0x800 - O_RDONLY = 0 - - -class LinuxEnvironment_arml(LinuxEnvironment): - platform_arch = b"arml" - sys_machine = b"arml" - - O_ACCMODE = 0x3 - O_CLOEXEC = 0x80000 - O_DIRECTORY = 0x4000 - O_LARGEFILE = 0x20000 - O_NONBLOCK = 0x800 - O_RDONLY = 0 - - # ARM specific - tls = 0 - # get_tls: __kuser_helper_version >= 1 - # cmpxchg: __kuser_helper_version >= 2 - # memory_barrier: __kuser_helper_version >= 3 - kuser_helper_version = 3 - - -class AuxVec(object): - """Auxiliary vector abstraction, filled with default values - (mainly based on https://lwn.net/Articles/519085) - - # Standard usage - >>> auxv = AuxVec(elf_base_addr, cont_target.entry_point, linux_env) - - # Enable AT_SECURE - >>> auxv = AuxVec(..., AuxVec.AT_SECURE=1) - # Modify AT_RANDOM - >>> auxv = AuxVec(..., AuxVec.AT_RANDOM="\x00"*0x10) - - # Using AuxVec instance for stack preparation - # First, fill memory with vectors data - >>> for AT_number, data in auxv.data_to_map(): - dest_ptr = ... - copy_to_dest(data, dest_ptr) - auxv.ptrs[AT_number] = dest_ptr - # Then, get the key: value (with value being sometime a pointer) - >>> for auxid, auxval in auxv.iteritems(): - ... - """ - - AT_PHDR = 3 - AT_PHNUM = 5 - AT_PAGESZ = 6 - AT_ENTRY = 9 - AT_UID = 11 - AT_EUID = 12 - AT_GID = 13 - AT_EGID = 14 - AT_PLATFORM = 15 - AT_HWCAP = 16 - AT_SECURE = 23 - AT_RANDOM = 25 - AT_SYSINFO_EHDR = 33 - - def __init__(self, elf_phdr_vaddr, entry_point, linux_env, **kwargs): - """Instantiate an AuxVec, with required elements: - - elf_phdr_vaddr: virtual address of the ELF's PHDR in memory - - entry_point: virtual address of the ELF entry point - - linux_env: LinuxEnvironment instance, used to provides some of the - option values - - Others options can be overridden by named arguments - - """ - self.info = { - self.AT_PHDR: elf_phdr_vaddr, - self.AT_PHNUM: 9, - self.AT_PAGESZ: 0x1000, - self.AT_ENTRY: entry_point, - self.AT_UID: linux_env.user_uid, - self.AT_EUID: linux_env.user_euid, - self.AT_GID: linux_env.user_gid, - self.AT_EGID: linux_env.user_egid, - self.AT_PLATFORM: linux_env.platform_arch, - self.AT_HWCAP: 0, - self.AT_SECURE: 0, - self.AT_RANDOM: b"\x00" * 0x10, - # vDSO is not mandatory - self.AT_SYSINFO_EHDR: None, - } - self.info.update(kwargs) - self.ptrs = {} # info key -> corresponding virtual address - - def data_to_map(self): - """Iterator on (AT_number, data) - Once the data has been mapped, the corresponding ptr must be set in - 'self.ptrs[AT_number]' - """ - for AT_number in [self.AT_PLATFORM, self.AT_RANDOM]: - yield (AT_number, self.info[AT_number]) - - def iteritems(self): - """Iterator on auxiliary vector id and values""" - for AT_number, value in viewitems(self.info): - if AT_number in self.ptrs: - value = self.ptrs[AT_number] - if value is None: - # AT to ignore - continue - yield (AT_number, value) - - items = iteritems - -def prepare_loader_x86_64(jitter, argv, envp, auxv, linux_env, - hlt_address=0x13371acc): - """Fill the environment with enough information to run a linux loader - - @jitter: Jitter instance - @argv: list of strings - @envp: dict of environment variables names to their values - @auxv: AuxVec instance - @hlt_address (default to 0x13371acc): stopping address - - Example of use: - >>> jitter = machine.jitter() - >>> jitter.init_stack() - >>> linux_env = LinuxEnvironment_x86_64() - >>> argv = ["/bin/ls", "-lah"] - >>> envp = {"PATH": "/usr/local/bin", "USER": linux_env.user_name} - >>> auxv = AuxVec(elf_base_addr, entry_point, linux_env) - >>> prepare_loader_x86_64(jitter, argv, envp, auxv, linux_env) - # One may want to enable syscall handling here - # The program can now run from the loader - >>> jitter.init_run(ld_entry_point) - >>> jitter.continue_run() - """ - # Stack layout looks like - # [data] - # - auxv values - # - envp name=value - # - argv arguments - # [auxiliary vector] - # [environment pointer] - # [argument vector] - - for AT_number, data in auxv.data_to_map(): - data += b"\x00" - jitter.cpu.RSP -= len(data) - ptr = jitter.cpu.RSP - jitter.vm.set_mem(ptr, data) - auxv.ptrs[AT_number] = ptr - - env_ptrs = [] - for name, value in viewitems(envp): - env = b"%s=%s\x00" % (name, value) - jitter.cpu.RSP -= len(env) - ptr = jitter.cpu.RSP - jitter.vm.set_mem(ptr, env) - env_ptrs.append(ptr) - - argv_ptrs = [] - for arg in argv: - arg += b"\x00" - jitter.cpu.RSP -= len(arg) - ptr = jitter.cpu.RSP - jitter.vm.set_mem(ptr, arg) - argv_ptrs.append(ptr) - - jitter.push_uint64_t(hlt_address) - jitter.push_uint64_t(0) - jitter.push_uint64_t(0) - for auxid, auxval in viewitems(auxv): - jitter.push_uint64_t(auxval) - jitter.push_uint64_t(auxid) - jitter.push_uint64_t(0) - for ptr in reversed(env_ptrs): - jitter.push_uint64_t(ptr) - jitter.push_uint64_t(0) - for ptr in reversed(argv_ptrs): - jitter.push_uint64_t(ptr) - jitter.push_uint64_t(len(argv)) - - - -def _arml__kuser_get_tls(linux_env, jitter): - # __kuser_get_tls - jitter.pc = jitter.cpu.LR - jitter.cpu.R0 = linux_env.tls - return True - -def _arml__kuser_cmpxchg(jitter): - oldval = jitter.cpu.R0 - newval = jitter.cpu.R1 - ptr = jitter.cpu.R2 - - value = struct.unpack(">> jitter = machine.jitter() - >>> jitter.init_stack() - >>> linux_env = LinuxEnvironment_arml() - >>> argv = ["/bin/ls", "-lah"] - >>> envp = {"PATH": "/usr/local/bin", "USER": linux_env.user_name} - >>> auxv = AuxVec(elf_base_addr, entry_point, linux_env) - >>> prepare_loader_arml(jitter, argv, envp, auxv, linux_env) - # One may want to enable syscall handling here - # The program can now run from the loader - >>> jitter.init_run(ld_entry_point) - >>> jitter.continue_run() - """ - # Stack layout looks like - # [data] - # - auxv values - # - envp name=value - # - argv arguments - # [auxiliary vector] - # [environment pointer] - # [argument vector] - - for AT_number, data in auxv.data_to_map(): - data += b"\x00" - jitter.cpu.SP -= len(data) - ptr = jitter.cpu.SP - jitter.vm.set_mem(ptr, data) - auxv.ptrs[AT_number] = ptr - - env_ptrs = [] - for name, value in viewitems(envp): - env = b"%s=%s\x00" % (name, value) - jitter.cpu.SP -= len(env) - ptr = jitter.cpu.SP - jitter.vm.set_mem(ptr, env) - env_ptrs.append(ptr) - - argv_ptrs = [] - for arg in argv: - arg += b"\x00" - jitter.cpu.SP -= len(arg) - ptr = jitter.cpu.SP - jitter.vm.set_mem(ptr, arg) - argv_ptrs.append(ptr) - - jitter.push_uint32_t(hlt_address) - jitter.push_uint32_t(0) - jitter.push_uint32_t(0) - for auxid, auxval in viewitems(auxv): - jitter.push_uint32_t(auxval) - jitter.push_uint32_t(auxid) - jitter.push_uint32_t(0) - for ptr in reversed(env_ptrs): - jitter.push_uint32_t(ptr) - jitter.push_uint32_t(0) - for ptr in reversed(argv_ptrs): - jitter.push_uint32_t(ptr) - jitter.push_uint32_t(len(argv)) - - # Add kernel user helpers - # from Documentation/arm/kernel_user_helpers.txt - - if linux_env.kuser_helper_version >= 1: - jitter.add_breakpoint( - 0xFFFF0FE0, - functools.partial(_arml__kuser_get_tls, linux_env) - ) - - if linux_env.kuser_helper_version >= 2: - jitter.add_breakpoint(0XFFFF0FC0, _arml__kuser_cmpxchg) - - if linux_env.kuser_helper_version >= 3: - jitter.add_breakpoint(0xFFFF0FA0, _arml__kuser_memory_barrier) - - jitter.add_breakpoint(0xffff0ffc, _arml__kuser_helper_version) diff --git a/miasm2/os_dep/linux/syscall.py b/miasm2/os_dep/linux/syscall.py deleted file mode 100644 index cd4de49f..00000000 --- a/miasm2/os_dep/linux/syscall.py +++ /dev/null @@ -1,1040 +0,0 @@ -from builtins import range -import fcntl -import functools -import logging -import struct -import termios - -from miasm2.jitter.csts import EXCEPT_PRIV_INSN, EXCEPT_INT_XX - -log = logging.getLogger('syscalls') -hnd = logging.StreamHandler() -hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) -log.addHandler(hnd) -log.setLevel(logging.WARNING) - - -def _dump_struct_stat_x86_64(info): - data = struct.pack( - "QQQIIIIQQQQQQQQQQQQQ", - info.st_dev, - info.st_ino, - info.st_nlink, - info.st_mode, - info.st_uid, - info.st_gid, - 0, # 32 bit padding - info.st_rdev, - info.st_size, - info.st_blksize, - info.st_blocks, - info.st_atime, - info.st_atimensec, - info.st_mtime, - info.st_mtimensec, - info.st_ctime, - info.st_ctimensec, - 0, # unused - 0, # unused - 0, # unused - ) - return data - - -def _dump_struct_stat_arml(info): - data = struct.pack( - "QIIIIIIIIIIIIIIIIII", - info.st_dev, - 0, # pad - info.st_ino, - info.st_mode, - info.st_nlink, - info.st_uid, - info.st_gid, - info.st_rdev, - info.st_size, - info.st_blksize, - info.st_blocks, - info.st_atime, - info.st_atimensec, - info.st_mtime, - info.st_mtimensec, - info.st_ctime, - info.st_ctimensec, - 0, # unused - 0, # unused - ) - return data - - -def sys_x86_64_rt_sigaction(jitter, linux_env): - # Parse arguments - sig, act, oact, sigsetsize = jitter.syscall_args_systemv(4) - log.debug("sys_rt_sigaction(%x, %x, %x, %x)", sig, act, oact, sigsetsize) - - # Stub - if oact != 0: - # Return an empty old action - jitter.vm.set_mem(oact, b"\x00" * sigsetsize) - jitter.syscall_ret_systemv(0) - - -def sys_generic_brk(jitter, linux_env): - # Parse arguments - addr, = jitter.syscall_args_systemv(1) - log.debug("sys_brk(%d)", addr) - - # Stub - jitter.syscall_ret_systemv(linux_env.brk(addr, jitter.vm)) - - -def sys_x86_64_newuname(jitter, linux_env): - # struct utsname { - # char sysname[]; /* Operating system name (e.g., "Linux") */ - # char nodename[]; /* Name within "some implementation-defined - # network" */ - # char release[]; /* Operating system release (e.g., "2.6.28") */ - # char version[]; /* Operating system version */ - # char machine[]; /* Hardware identifier */ - # } - - # Parse arguments - nameptr, = jitter.syscall_args_systemv(1) - log.debug("sys_newuname(%x)", nameptr) - - # Stub - info = [ - linux_env.sys_sysname, - linux_env.sys_nodename, - linux_env.sys_release, - linux_env.sys_version, - linux_env.sys_machine - ] - # TODO: Elements start at 0x41 multiples on my tests... - output = b"" - for elem in info: - output += elem - output += b"\x00" * (0x41 - len(elem)) - jitter.vm.set_mem(nameptr, output) - jitter.syscall_ret_systemv(0) - - -def sys_arml_newuname(jitter, linux_env): - # struct utsname { - # char sysname[]; /* Operating system name (e.g., "Linux") */ - # char nodename[]; /* Name within "some implementation-defined - # network" */ - # char release[]; /* Operating system release (e.g., "2.6.28") */ - # char version[]; /* Operating system version */ - # char machine[]; /* Hardware identifier */ - # } - - # Parse arguments - nameptr, = jitter.syscall_args_systemv(1) - log.debug("sys_newuname(%x)", nameptr) - - # Stub - info = [ - linux_env.sys_sysname, - linux_env.sys_nodename, - linux_env.sys_release, - linux_env.sys_version, - linux_env.sys_machine - ] - # TODO: Elements start at 0x41 multiples on my tests... - output = b"" - for elem in info: - output += elem - output += b"\x00" * (0x41 - len(elem)) - jitter.vm.set_mem(nameptr, output) - jitter.syscall_ret_systemv(0) - - -def sys_generic_access(jitter, linux_env): - # Parse arguments - pathname, mode = jitter.syscall_args_systemv(2) - rpathname = jitter.get_str_ansi(pathname) - rmode = mode - if mode == 1: - rmode = "F_OK" - elif mode == 2: - rmode = "R_OK" - log.debug("sys_access(%s, %s)", rpathname, rmode) - - # Stub - # Do not check the mode - if linux_env.filesystem.exists(rpathname): - jitter.syscall_ret_systemv(0) - else: - jitter.syscall_ret_systemv(-1) - - -def sys_x86_64_openat(jitter, linux_env): - # Parse arguments - dfd, filename, flags, mode = jitter.syscall_args_systemv(4) - rpathname = jitter.get_str_ansi(filename) - log.debug("sys_openat(%x, %r, %x, %x)", dfd, rpathname, flags, mode) - - # Stub - # flags, openat particularity over 'open' are ignored - jitter.syscall_ret_systemv(linux_env.open_(rpathname, flags)) - - -def sys_x86_64_newstat(jitter, linux_env): - # Parse arguments - filename, statbuf = jitter.syscall_args_systemv(2) - rpathname = jitter.get_str_ansi(filename) - log.debug("sys_newstat(%r, %x)", rpathname, statbuf) - - # Stub - if linux_env.filesystem.exists(rpathname): - info = linux_env.stat(rpathname) - data = _dump_struct_stat_x86_64(info) - jitter.vm.set_mem(statbuf, data) - jitter.syscall_ret_systemv(0) - else: - # ENOENT (No such file or directory) - jitter.syscall_ret_systemv(-1) - - -def sys_arml_stat64(jitter, linux_env): - # Parse arguments - filename, statbuf = jitter.syscall_args_systemv(2) - rpathname = jitter.get_str_ansi(filename) - log.debug("sys_newstat(%r, %x)", rpathname, statbuf) - - # Stub - if linux_env.filesystem.exists(rpathname): - info = linux_env.stat(rpathname) - data = _dump_struct_stat_arml(info) - jitter.vm.set_mem(statbuf, data) - jitter.syscall_ret_systemv(0) - else: - # ENOENT (No such file or directory) - jitter.syscall_ret_systemv(-1) - - -def sys_x86_64_writev(jitter, linux_env): - # Parse arguments - fd, vec, vlen = jitter.syscall_args_systemv(3) - log.debug("sys_writev(%d, %d, %x)", fd, vec, vlen) - - # Stub - fdesc = linux_env.file_descriptors[fd] - for iovec_num in range(vlen): - # struct iovec { - # void *iov_base; /* Starting address */ - # size_t iov_len; /* Number of bytes to transfer */ - # }; - iovec = jitter.vm.get_mem(vec + iovec_num * 8 * 2, 8*2) - iov_base, iov_len = struct.unpack("QQ", iovec) - fdesc.write(jitter.get_str_ansi(iov_base)[:iov_len]) - - jitter.syscall_ret_systemv(vlen) - - -def sys_arml_writev(jitter, linux_env): - # Parse arguments - fd, vec, vlen = jitter.syscall_args_systemv(3) - log.debug("sys_writev(%d, %d, %x)", fd, vec, vlen) - - # Stub - fdesc = linux_env.file_descriptors[fd] - for iovec_num in range(vlen): - # struct iovec { - # void *iov_base; /* Starting address */ - # size_t iov_len; /* Number of bytes to transfer */ - # }; - iovec = jitter.vm.get_mem(vec + iovec_num * 4 * 2, 4*2) - iov_base, iov_len = struct.unpack("II", iovec) - fdesc.write(jitter.get_str_ansi(iov_base)[:iov_len]) - - jitter.syscall_ret_systemv(vlen) - - -def sys_generic_exit_group(jitter, linux_env): - # Parse arguments - status, = jitter.syscall_args_systemv(1) - log.debug("sys_exit_group(%d)", status) - - # Stub - log.debug("Exit with status code %d", status) - jitter.run = False - - -def sys_generic_read(jitter, linux_env): - # Parse arguments - fd, buf, count = jitter.syscall_args_systemv(3) - log.debug("sys_read(%d, %x, %x)", fd, buf, count) - - # Stub - data = linux_env.read(fd, count) - jitter.vm.set_mem(buf, data) - jitter.syscall_ret_systemv(len(data)) - - -def sys_x86_64_fstat(jitter, linux_env): - # Parse arguments - fd, statbuf = jitter.syscall_args_systemv(2) - log.debug("sys_fstat(%d, %x)", fd, statbuf) - - # Stub - info = linux_env.fstat(fd) - data = _dump_struct_stat_x86_64(info) - jitter.vm.set_mem(statbuf, data) - jitter.syscall_ret_systemv(0) - - -def sys_arml_fstat64(jitter, linux_env): - # Parse arguments - fd, statbuf = jitter.syscall_args_systemv(2) - log.debug("sys_fstat(%d, %x)", fd, statbuf) - - # Stub - info = linux_env.fstat(fd) - data = _dump_struct_stat_arml(info) - jitter.vm.set_mem(statbuf, data) - jitter.syscall_ret_systemv(0) - - -def sys_generic_mmap(jitter, linux_env): - # Parse arguments - addr, len_, prot, flags, fd, off = jitter.syscall_args_systemv(6) - log.debug("sys_mmap(%x, %x, %x, %x, %x, %x)", addr, len_, prot, flags, fd, off) - - # Stub - addr = linux_env.mmap(addr, len_, prot & 0xFFFFFFFF, flags & 0xFFFFFFFF, - fd & 0xFFFFFFFF, off, jitter.vm) - jitter.syscall_ret_systemv(addr) - - -def sys_generic_mmap2(jitter, linux_env): - # Parse arguments - addr, len_, prot, flags, fd, off = jitter.syscall_args_systemv(6) - log.debug("sys_mmap2(%x, %x, %x, %x, %x, %x)", addr, len_, prot, flags, fd, off) - off = off * 4096 - - # Stub - addr = linux_env.mmap(addr, len_, prot & 0xFFFFFFFF, flags & 0xFFFFFFFF, - fd & 0xFFFFFFFF, off, jitter.vm) - jitter.syscall_ret_systemv(addr) - - -def sys_generic_mprotect(jitter, linux_env): - # Parse arguments - start, len_, prot = jitter.syscall_args_systemv(3) - assert jitter.vm.is_mapped(start, len_) - log.debug("sys_mprotect(%x, %x, %x)", start, len_, prot) - - # Do nothing - jitter.syscall_ret_systemv(0) - - -def sys_generic_close(jitter, linux_env): - # Parse arguments - fd, = jitter.syscall_args_systemv(1) - log.debug("sys_close(%x)", fd) - - # Stub - linux_env.close(fd) - jitter.syscall_ret_systemv(0) - - -def sys_x86_64_arch_prctl(jitter, linux_env): - # Parse arguments - code_name = { - 0x1001: "ARCH_SET_GS", - 0x1002: "ARCH_SET_FS", - 0x1003: "ARCH_GET_FS", - 0x1004: "ARCH_GET_GS", - } - code = jitter.cpu.RDI - rcode = code_name[code] - addr = jitter.cpu.RSI - log.debug("sys_arch_prctl(%s, %x)", rcode, addr) - - if code == 0x1002: - jitter.cpu.set_segm_base(jitter.cpu.FS, addr) - else: - raise RuntimeError("Not implemented") - jitter.cpu.RAX = 0 - - -def sys_x86_64_set_tid_address(jitter, linux_env): - # Parse arguments - tidptr = jitter.cpu.RDI - # clear_child_tid = tidptr - log.debug("sys_set_tid_address(%x)", tidptr) - - jitter.cpu.RAX = linux_env.process_tid - - -def sys_x86_64_set_robust_list(jitter, linux_env): - # Parse arguments - head = jitter.cpu.RDI - len_ = jitter.cpu.RSI - # robust_list = head - log.debug("sys_set_robust_list(%x, %x)", head, len_) - jitter.cpu.RAX = 0 - -def sys_x86_64_rt_sigprocmask(jitter, linux_env): - # Parse arguments - how = jitter.cpu.RDI - nset = jitter.cpu.RSI - oset = jitter.cpu.RDX - sigsetsize = jitter.cpu.R10 - log.debug("sys_rt_sigprocmask(%x, %x, %x, %x)", how, nset, oset, sigsetsize) - if oset != 0: - raise RuntimeError("Not implemented") - jitter.cpu.RAX = 0 - - -def sys_x86_64_prlimit64(jitter, linux_env): - # Parse arguments - pid = jitter.cpu.RDI - resource = jitter.cpu.RSI - new_rlim = jitter.cpu.RDX - if new_rlim != 0: - raise RuntimeError("Not implemented") - old_rlim = jitter.cpu.R10 - log.debug("sys_prlimit64(%x, %x, %x, %x)", pid, resource, new_rlim, - old_rlim) - - # Stub - if resource == 3: - # RLIMIT_STACK - jitter.vm.set_mem(old_rlim, - struct.pack("QQ", - 0x100000, - 0x7fffffffffffffff, # RLIM64_INFINITY - )) - else: - raise RuntimeError("Not implemented") - jitter.cpu.RAX = 0 - - -def sys_x86_64_statfs(jitter, linux_env): - # Parse arguments - pathname = jitter.cpu.RDI - buf = jitter.cpu.RSI - rpathname = jitter.get_str_ansi(pathname) - log.debug("sys_statfs(%r, %x)", rpathname, buf) - - # Stub - if not linux_env.filesystem.exists(rpathname): - jitter.cpu.RAX = -1 - else: - info = linux_env.filesystem.statfs() - raise RuntimeError("Not implemented") - - -def sys_x86_64_ioctl(jitter, linux_env): - # Parse arguments - fd, cmd, arg = jitter.syscall_args_systemv(3) - log.debug("sys_ioctl(%x, %x, %x)", fd, cmd, arg) - - info = linux_env.ioctl(fd, cmd, arg) - if info is False: - jitter.syscall_ret_systemv(-1) - else: - if cmd == termios.TCGETS: - data = struct.pack("BBBB", *info) - jitter.vm.set_mem(arg, data) - elif cmd == termios.TIOCGWINSZ: - data = struct.pack("HHHH", *info) - jitter.vm.set_mem(arg, data) - else: - assert data is None - jitter.syscall_ret_systemv(0) - - -def sys_arml_ioctl(jitter, linux_env): - # Parse arguments - fd, cmd, arg = jitter.syscall_args_systemv(3) - log.debug("sys_ioctl(%x, %x, %x)", fd, cmd, arg) - - info = linux_env.ioctl(fd, cmd, arg) - if info is False: - jitter.syscall_ret_systemv(-1) - else: - if cmd == termios.TCGETS: - data = struct.pack("BBBB", *info) - jitter.vm.set_mem(arg, data) - elif cmd == termios.TIOCGWINSZ: - data = struct.pack("HHHH", *info) - jitter.vm.set_mem(arg, data) - else: - assert data is None - jitter.syscall_ret_systemv(0) - -def sys_generic_open(jitter, linux_env): - # Parse arguments - filename, flags, mode = jitter.syscall_args_systemv(3) - rpathname = jitter.get_str_ansi(filename) - log.debug("sys_open(%r, %x, %x)", rpathname, flags, mode) - # Stub - # 'mode' is ignored - jitter.syscall_ret_systemv(linux_env.open_(rpathname, flags)) - - -def sys_generic_write(jitter, linux_env): - # Parse arguments - fd, buf, count = jitter.syscall_args_systemv(3) - log.debug("sys_write(%d, %x, %x)", fd, buf, count) - - # Stub - data = jitter.vm.get_mem(buf, count) - jitter.syscall_ret_systemv(linux_env.write(fd, data)) - - -def sys_x86_64_getdents(jitter, linux_env): - # Parse arguments - fd = jitter.cpu.RDI - dirent = jitter.cpu.RSI - count = jitter.cpu.RDX - log.debug("sys_getdents(%x, %x, %x)", fd, dirent, count) - - # Stub - def packing_callback(cur_len, d_ino, d_type, name): - # struct linux_dirent { - # unsigned long d_ino; /* Inode number */ - # unsigned long d_off; /* Offset to next linux_dirent */ - # unsigned short d_reclen; /* Length of this linux_dirent */ - # char d_name[]; /* Filename (null-terminated) */ - # /* length is actually (d_reclen - 2 - - # offsetof(struct linux_dirent, d_name)) */ - # /* - # char pad; // Zero padding byte - # char d_type; // File type (only since Linux - # // 2.6.4); offset is (d_reclen - 1) - # */ - # } - d_reclen = 8 * 2 + 2 + 1 + len(name) + 1 - d_off = cur_len + d_reclen - entry = struct.pack("QqH", d_ino, d_off, d_reclen) + \ - name + b"\x00" + struct.pack("B", d_type) - assert len(entry) == d_reclen - return entry - - out = linux_env.getdents(fd, count, packing_callback) - jitter.vm.set_mem(dirent, out) - jitter.cpu.RAX = len(out) - - -def sys_arml_getdents64(jitter, linux_env): - # Parse arguments - fd = jitter.cpu.R0 - dirent = jitter.cpu.R1 - count = jitter.cpu.R2 - log.debug("sys_getdents64(%x, %x, %x)", fd, dirent, count) - - # Stub - def packing_callback(cur_len, d_ino, d_type, name): - # struct linux_dirent64 { - # ino64_t d_ino; /* 64-bit inode number */ - # off64_t d_off; /* 64-bit offset to next structure */ - # unsigned short d_reclen; /* Size of this dirent */ - # unsigned char d_type; /* File type */ - # char d_name[]; /* Filename (null-terminated) */ - # }; - d_reclen = 8 * 2 + 2 + 1 + len(name) + 1 - d_off = cur_len + d_reclen - entry = struct.pack("QqHB", d_ino, d_off, d_reclen, d_type) + \ - name + b"\x00" - assert len(entry) == d_reclen - return entry - - out = linux_env.getdents(fd, count, packing_callback) - jitter.vm.set_mem(dirent, out) - jitter.cpu.R0 = len(out) - - -def sys_x86_64_newlstat(jitter, linux_env): - # Parse arguments - filename = jitter.cpu.RDI - statbuf = jitter.cpu.RSI - rpathname = jitter.get_str_ansi(filename) - log.debug("sys_newlstat(%s, %x)", rpathname, statbuf) - - # Stub - if not linux_env.filesystem.exists(rpathname): - # ENOENT (No such file or directory) - jitter.cpu.RAX = -1 - else: - info = linux_env.lstat(rpathname) - data = _dump_struct_stat_x86_64(info) - jitter.vm.set_mem(statbuf, data) - jitter.cpu.RAX = 0 - - -def sys_arml_lstat64(jitter, linux_env): - # Parse arguments - filename = jitter.cpu.R0 - statbuf = jitter.cpu.R1 - rpathname = jitter.get_str_ansi(filename) - log.debug("sys_newlstat(%s, %x)", rpathname, statbuf) - - # Stub - if not linux_env.filesystem.exists(rpathname): - # ENOENT (No such file or directory) - jitter.cpu.R0 = -1 - else: - info = linux_env.lstat(rpathname) - data = _dump_struct_stat_arml(info) - jitter.vm.set_mem(statbuf, data) - jitter.cpu.R0 = 0 - - -def sys_x86_64_lgetxattr(jitter, linux_env): - # Parse arguments - pathname = jitter.cpu.RDI - name = jitter.cpu.RSI - value = jitter.cpu.RDX - size = jitter.cpu.R10 - rpathname = jitter.get_str_ansi(pathname) - rname = jitter.get_str_ansi(name) - log.debug("sys_lgetxattr(%r, %r, %x, %x)", rpathname, rname, value, size) - - # Stub - jitter.vm.set_mem(value, b"\x00" * size) - jitter.cpu.RAX = 0 - - -def sys_x86_64_getxattr(jitter, linux_env): - # Parse arguments - pathname = jitter.cpu.RDI - name = jitter.cpu.RSI - value = jitter.cpu.RDX - size = jitter.cpu.R10 - rpathname = jitter.get_str_ansi(pathname) - rname = jitter.get_str_ansi(name) - log.debug("sys_getxattr(%r, %r, %x, %x)", rpathname, rname, value, size) - - # Stub - jitter.vm.set_mem(value, b"\x00" * size) - jitter.cpu.RAX = 0 - - -def sys_x86_64_socket(jitter, linux_env): - # Parse arguments - family = jitter.cpu.RDI - type_ = jitter.cpu.RSI - protocol = jitter.cpu.RDX - log.debug("sys_socket(%x, %x, %x)", family, type_, protocol) - - jitter.cpu.RAX = linux_env.socket(family, type_, protocol) - - -def sys_x86_64_connect(jitter, linux_env): - # Parse arguments - fd = jitter.cpu.RDI - uservaddr = jitter.cpu.RSI - addrlen = jitter.cpu.RDX - raddr = jitter.get_str_ansi(uservaddr + 2) - log.debug("sys_connect(%x, %r, %x)", fd, raddr, addrlen) - - # Stub - # Always refuse the connexion - jitter.cpu.RAX = -1 - - -def sys_x86_64_clock_gettime(jitter, linux_env): - # Parse arguments - which_clock = jitter.cpu.RDI - tp = jitter.cpu.RSI - log.debug("sys_clock_gettime(%x, %x)", which_clock, tp) - - # Stub - value = linux_env.clock_gettime() - jitter.vm.set_mem(tp, struct.pack("Q", value)) - jitter.cpu.RAX = 0 - - -def sys_x86_64_lseek(jitter, linux_env): - # Parse arguments - fd = jitter.cpu.RDI - offset = jitter.cpu.RSI - whence = jitter.cpu.RDX - log.debug("sys_lseek(%d, %x, %x)", fd, offset, whence) - - # Stub - fdesc = linux_env.file_descriptors[fd] - mask = (1 << 64) - 1 - if offset > (1 << 63): - offset = - ((offset ^ mask) + 1) - - new_offset = fdesc.lseek(offset, whence) - jitter.cpu.RAX = new_offset - - -def sys_x86_64_munmap(jitter, linux_env): - # Parse arguments - addr = jitter.cpu.RDI - len_ = jitter.cpu.RSI - log.debug("sys_munmap(%x, %x)", addr, len_) - - # Do nothing - jitter.cpu.RAX = 0 - - -def sys_x86_64_readlink(jitter, linux_env): - # Parse arguments - path = jitter.cpu.RDI - buf = jitter.cpu.RSI - bufsize = jitter.cpu.RDX - rpath = jitter.get_str_ansi(path) - log.debug("sys_readlink(%r, %x, %x)", rpath, buf, bufsize) - - # Stub - link = linux_env.filesystem.readlink(rpath) - if link is None: - # Not a link - jitter.cpu.RAX = -1 - else: - data = link[:bufsize - 1] + b"\x00" - jitter.vm.set_mem(buf, data) - jitter.cpu.RAX = len(data) - 1 - -def sys_x86_64_getpid(jitter, linux_env): - # Parse arguments - log.debug("sys_getpid()") - - # Stub - jitter.cpu.RAX = linux_env.process_pid - - -def sys_x86_64_sysinfo(jitter, linux_env): - # Parse arguments - info = jitter.cpu.RDI - log.debug("sys_sysinfo(%x)", info) - - # Stub - data = struct.pack("QQQQQQQQQQHQQI", - 0x1234, # uptime - 0x2000, # loads (1 min) - 0x2000, # loads (5 min) - 0x2000, # loads (15 min) - 0x10000000, # total ram - 0x10000000, # free ram - 0x10000000, # shared memory - 0x0, # memory used by buffers - 0x0, # total swap - 0x0, # free swap - 0x1, # nb current processes - 0x0, # total high mem - 0x0, # available high mem - 0x1, # memory unit size - ) - jitter.vm.set_mem(info, data) - jitter.cpu.RAX = 0 - - -def sys_generic_geteuid(jitter, linux_env): - # Parse arguments - log.debug("sys_geteuid()") - - # Stub - jitter.syscall_ret_systemv(linux_env.user_euid) - - -def sys_generic_getegid(jitter, linux_env): - # Parse arguments - log.debug("sys_getegid()") - - # Stub - jitter.syscall_ret_systemv(linux_env.user_egid) - - -def sys_generic_getuid(jitter, linux_env): - # Parse arguments - log.debug("sys_getuid()") - - # Stub - jitter.syscall_ret_systemv(linux_env.user_uid) - - -def sys_generic_getgid(jitter, linux_env): - # Parse arguments - log.debug("sys_getgid()") - - # Stub - jitter.syscall_ret_systemv(linux_env.user_gid) - - -def sys_generic_setgid(jitter, linux_env): - # Parse arguments - gid, = jitter.syscall_args_systemv(1) - log.debug("sys_setgid(%x)", gid) - - # Stub - # Denied if different - if gid != linux_env.user_gid: - jitter.syscall_ret_systemv(-1) - else: - jitter.syscall_ret_systemv(0) - - -def sys_generic_setuid(jitter, linux_env): - # Parse arguments - uid, = jitter.syscall_args_systemv(1) - log.debug("sys_setuid(%x)", uid) - - # Stub - # Denied if different - if uid != linux_env.user_uid: - jitter.syscall_ret_systemv(-1) - else: - jitter.syscall_ret_systemv(0) - - -def sys_arml_set_tls(jitter, linux_env): - # Parse arguments - ptr = jitter.cpu.R0 - log.debug("sys_set_tls(%x)", ptr) - - # Stub - linux_env.tls = ptr - jitter.cpu.R0 = 0 - - -def sys_generic_fcntl64(jitter, linux_env): - # Parse arguments - fd, cmd, arg = jitter.syscall_args_systemv(3) - log.debug("sys_fcntl(%x, %x, %x)", fd, cmd, arg) - - # Stub - fdesc = linux_env.file_descriptors[fd] - if cmd == fcntl.F_GETFL: - jitter.syscall_ret_systemv(fdesc.flags) - elif cmd == fcntl.F_SETFL: - # Ignore flag change - jitter.syscall_ret_systemv(0) - elif cmd == fcntl.F_GETFD: - jitter.syscall_ret_systemv(fdesc.flags) - elif cmd == fcntl.F_SETFD: - # Ignore flag change - jitter.syscall_ret_systemv(0) - else: - raise RuntimeError("Not implemented") - - -def sys_x86_64_pread64(jitter, linux_env): - # Parse arguments - fd = jitter.cpu.RDI - buf = jitter.cpu.RSI - count = jitter.cpu.RDX - pos = jitter.cpu.R10 - log.debug("sys_pread64(%x, %x, %x, %x)", fd, buf, count, pos) - - # Stub - fdesc = linux_env.file_descriptors[fd] - cur_pos = fdesc.tell() - fdesc.seek(pos) - data = fdesc.read(count) - jitter.vm.set_mem(buf, data) - fdesc.seek(cur_pos) - jitter.cpu.RAX = len(data) - - -def sys_arml_gettimeofday(jitter, linux_env): - # Parse arguments - tv = jitter.cpu.R0 - tz = jitter.cpu.R1 - log.debug("sys_gettimeofday(%x, %x)", tv, tz) - - # Stub - value = linux_env.clock_gettime() - if tv: - jitter.vm.set_mem(tv, struct.pack("II", value, 0)) - if tz: - jitter.vm.set_mem(tz, struct.pack("II", 0, 0)) - jitter.cpu.R0 = 0 - - -syscall_callbacks_x86_64 = { - 0x0: sys_generic_read, - 0x1: sys_generic_write, - 0x2: sys_generic_open, - 0x3: sys_generic_close, - 0x4: sys_x86_64_newstat, - 0x5: sys_x86_64_fstat, - 0x6: sys_x86_64_newlstat, - 0x8: sys_x86_64_lseek, - 0x9: sys_generic_mmap, - 0x10: sys_x86_64_ioctl, - 0xA: sys_generic_mprotect, - 0xB: sys_x86_64_munmap, - 0xC: sys_generic_brk, - 0xD: sys_x86_64_rt_sigaction, - 0xE: sys_x86_64_rt_sigprocmask, - 0x11: sys_x86_64_pread64, - 0x14: sys_x86_64_writev, - 0x15: sys_generic_access, - 0x27: sys_x86_64_getpid, - 0x29: sys_x86_64_socket, - 0x2A: sys_x86_64_connect, - 0x3F: sys_x86_64_newuname, - 0x48: sys_generic_fcntl64, - 0x4E: sys_x86_64_getdents, - 0x59: sys_x86_64_readlink, - 0x63: sys_x86_64_sysinfo, - 0x66: sys_generic_getuid, - 0x68: sys_generic_getgid, - 0x6B: sys_generic_geteuid, - 0x6C: sys_generic_getegid, - 0xE4: sys_x86_64_clock_gettime, - 0x89: sys_x86_64_statfs, - 0x9E: sys_x86_64_arch_prctl, - 0xBF: sys_x86_64_getxattr, - 0xC0: sys_x86_64_lgetxattr, - 0xDA: sys_x86_64_set_tid_address, - 0xE7: sys_generic_exit_group, - 0x101: sys_x86_64_openat, - 0x111: sys_x86_64_set_robust_list, - 0x12E: sys_x86_64_prlimit64, -} - - -syscall_callbacks_arml = { - - 0x3: sys_generic_read, - 0x4: sys_generic_write, - 0x5: sys_generic_open, - 0x6: sys_generic_close, - 0x2d: sys_generic_brk, - 0x21: sys_generic_access, - 0x36: sys_arml_ioctl, - 0x7a: sys_arml_newuname, - 0x7d: sys_generic_mprotect, - 0x92: sys_arml_writev, - 0xc0: sys_generic_mmap2, - 0xc3: sys_arml_stat64, - 0xc4: sys_arml_lstat64, - 0xc5: sys_arml_fstat64, - 0xc7: sys_generic_getuid, - 0xc8: sys_generic_getgid, - 0xc9: sys_generic_geteuid, - 0xcA: sys_generic_getegid, - 0x4e: sys_arml_gettimeofday, - 0xd5: sys_generic_setuid, - 0xd6: sys_generic_setgid, - 0xd9: sys_arml_getdents64, - 0xdd: sys_generic_fcntl64, - 0xf8: sys_generic_exit_group, - - # ARM-specific ARM_NR_BASE == 0x0f0000 - 0xf0005: sys_arml_set_tls, -} - -def syscall_x86_64_exception_handler(linux_env, syscall_callbacks, jitter): - """Call to actually handle an EXCEPT_PRIV_INSN exception - In the case of an error raised by a SYSCALL, call the corresponding - syscall_callbacks - @linux_env: LinuxEnvironment_x86_64 instance - @syscall_callbacks: syscall number -> func(jitter, linux_env) - """ - # Ensure the jitter has break on a SYSCALL - cur_instr = jitter.jit.mdis.dis_instr(jitter.pc) - if cur_instr.name != "SYSCALL": - return True - - # Dispatch to SYSCALL stub - syscall_number = jitter.cpu.RAX - callback = syscall_callbacks.get(syscall_number) - if callback is None: - raise KeyError( - "No callback found for syscall number 0x%x" % syscall_number - ) - callback(jitter, linux_env) - log.debug("-> %x", jitter.cpu.RAX) - - # Clean exception and move pc to the next instruction, to let the jitter - # continue - jitter.cpu.set_exception(jitter.cpu.get_exception() ^ EXCEPT_PRIV_INSN) - jitter.pc += cur_instr.l - return True - - - -def syscall_x86_32_exception_handler(linux_env, syscall_callbacks, jitter): - """Call to actually handle an EXCEPT_PRIV_INSN exception - In the case of an error raised by a SYSCALL, call the corresponding - syscall_callbacks - @linux_env: LinuxEnvironment_x86_32 instance - @syscall_callbacks: syscall number -> func(jitter, linux_env) - """ - # Ensure the jitter has break on a SYSCALL - if jitter.cpu.interrupt_num != 0x80: - return True - - # Dispatch to SYSCALL stub - syscall_number = jitter.cpu.EAX - callback = syscall_callbacks.get(syscall_number) - if callback is None: - raise KeyError( - "No callback found for syscall number 0x%x" % syscall_number - ) - callback(jitter, linux_env) - log.debug("-> %x", jitter.cpu.EAX) - - # Clean exception and move pc to the next instruction, to let the jitter - # continue - jitter.cpu.set_exception(jitter.cpu.get_exception() ^ EXCEPT_INT_XX) - return True - - - -def syscall_arml_exception_handler(linux_env, syscall_callbacks, jitter): - """Call to actually handle an EXCEPT_PRIV_INSN exception - In the case of an error raised by a SYSCALL, call the corresponding - syscall_callbacks - @linux_env: LinuxEnvironment_arml instance - @syscall_callbacks: syscall number -> func(jitter, linux_env) - """ - # Ensure the jitter has break on a SYSCALL - if jitter.cpu.interrupt_num != 0x0: - return True - - # Dispatch to SYSCALL stub - syscall_number = jitter.cpu.R7 - callback = syscall_callbacks.get(syscall_number) - if callback is None: - raise KeyError( - "No callback found for syscall number 0x%x" % syscall_number - ) - callback(jitter, linux_env) - log.debug("-> %x", jitter.cpu.R0) - - # Clean exception and move pc to the next instruction, to let the jitter - # continue - jitter.cpu.set_exception(jitter.cpu.get_exception() ^ EXCEPT_INT_XX) - return True - - - -def enable_syscall_handling(jitter, linux_env, syscall_callbacks): - """Activate handling of syscall for the current jitter instance. - Syscall handlers are provided by @syscall_callbacks - @linux_env: LinuxEnvironment instance - @syscall_callbacks: syscall number -> func(jitter, linux_env) - - Example of use: - >>> linux_env = LinuxEnvironment_x86_64() - >>> enable_syscall_handling(jitter, linux_env, syscall_callbacks_x86_64) - """ - arch_name = jitter.jit.arch_name - if arch_name == "x8664": - handler = syscall_x86_64_exception_handler - handler = functools.partial(handler, linux_env, syscall_callbacks) - jitter.add_exception_handler(EXCEPT_PRIV_INSN, handler) - elif arch_name == "x8632": - handler = syscall_x86_32_exception_handler - handler = functools.partial(handler, linux_env, syscall_callbacks) - jitter.add_exception_handler(EXCEPT_INT_XX, handler) - elif arch_name == "arml": - handler = syscall_arml_exception_handler - handler = functools.partial(handler, linux_env, syscall_callbacks) - jitter.add_exception_handler(EXCEPT_INT_XX, handler) - else: - raise ValueError("No syscall handler implemented for %s" % arch_name) - diff --git a/miasm2/os_dep/linux_stdlib.py b/miasm2/os_dep/linux_stdlib.py deleted file mode 100644 index f12284ee..00000000 --- a/miasm2/os_dep/linux_stdlib.py +++ /dev/null @@ -1,213 +0,0 @@ -#-*- coding:utf-8 -*- - -from __future__ import print_function -import struct -from sys import stdout - -try: - # Python3 binary stdout - stdout = stdout.buffer -except AttributeError: - pass - -from miasm2.core.utils import int_to_byte, cmp_elts -from miasm2.os_dep.common import heap -from miasm2.os_dep.common import get_fmt_args as _get_fmt_args - - -class c_linobjs(object): - - base_addr = 0x20000000 - align_addr = 0x1000 - def __init__(self): - self.alloc_ad = self.base_addr - self.alloc_align = self.align_addr - self.heap = heap() - -linobjs = c_linobjs() - -ABORT_ADDR = 0x1337beef - -def xxx___libc_start_main(jitter): - """Basic implementation of __libc_start_main - - int __libc_start_main(int *(main) (int, char * *, char * *), int argc, - char * * ubp_av, void (*init) (void), - void (*fini) (void), void (*rtld_fini) (void), - void (* stack_end)); - - Note: - - init, fini, rtld_fini are ignored - - return address is forced to ABORT_ADDR, to avoid calling abort/hlt/... - - in powerpc, signature is: - - int __libc_start_main (int argc, char **argv, char **ev, ElfW (auxv_t) * - auxvec, void (*rtld_fini) (void), struct startup_info - *stinfo, char **stack_on_entry) - - """ - global ABORT_ADDR - if jitter.arch.name == "ppc32": - ret_ad, args = jitter.func_args_systemv( - ["argc", "argv", "ev", "aux_vec", "rtld_fini", "st_info", - "stack_on_entry"] - ) - - # Mimic glibc implementation - if args.stack_on_entry != 0: - argc = struct.unpack(">I", - jitter.vm.get_mem(args.stack_on_entry, 4))[0] - argv = args.stack_on_entry + 4 - envp = argv + ((argc + 1) * 4) - else: - argc = args.argc - argv = args.argv - envp = args.ev - # sda_base, main, init, fini - _, main, _, _ = struct.unpack(">IIII", - jitter.vm.get_mem(args.st_info, 4 * 4)) - - else: - ret_ad, args = jitter.func_args_systemv( - ["main", "argc", "ubp_av", "init", "fini", "rtld_fini", "stack_end"] - ) - - main = args.main - # done by __libc_init_first - size = jitter.ir_arch.pc.size // 8 - argc = args.argc - argv = args.ubp_av - envp = argv + (args.argc + 1) * size - - - # Call int main(int argc, char** argv, char** envp) - jitter.func_ret_systemv(main) - ret_ad = ABORT_ADDR - jitter.func_prepare_systemv(ret_ad, argc, argv, envp) - return True - - -def xxx_isprint(jitter): - ''' - #include - int isprint(int c); - - checks for any printable character including space. - ''' - ret_addr, args = jitter.func_args_systemv(['c']) - ret = 1 if 0x20 <= args.c & 0xFF < 0x7f else 0 - return jitter.func_ret_systemv(ret_addr, ret) - - -def xxx_memcpy(jitter): - ''' - #include - void *memcpy(void *dest, const void *src, size_t n); - - copies n bytes from memory area src to memory area dest. - ''' - ret_addr, args = jitter.func_args_systemv(['dest', 'src', 'n']) - jitter.vm.set_mem(args.dest, jitter.vm.get_mem(args.src, args.n)) - return jitter.func_ret_systemv(ret_addr, args.dest) - - -def xxx_memset(jitter): - ''' - #include - void *memset(void *s, int c, size_t n); - - fills the first n bytes of the memory area pointed to by s with the constant - byte c.''' - - ret_addr, args = jitter.func_args_systemv(['dest', 'c', 'n']) - jitter.vm.set_mem(args.dest, int_to_byte(args.c & 0xFF) * args.n) - return jitter.func_ret_systemv(ret_addr, args.dest) - - -def xxx_puts(jitter): - ''' - #include - int puts(const char *s); - - writes the string s and a trailing newline to stdout. - ''' - ret_addr, args = jitter.func_args_systemv(['s']) - index = args.s - char = jitter.vm.get_mem(index, 1) - while char != b'\x00': - stdout.write(char) - index += 1 - char = jitter.vm.get_mem(index, 1) - stdout.write(b'\n') - return jitter.func_ret_systemv(ret_addr, 1) - - -def get_fmt_args(jitter, fmt, cur_arg): - return _get_fmt_args(fmt, cur_arg, jitter.get_str_ansi, jitter.get_arg_n_systemv) - - -def xxx_snprintf(jitter): - ret_addr, args = jitter.func_args_systemv(['string', 'size', 'fmt']) - cur_arg, fmt = 3, args.fmt - size = args.size if args.size else 1 - output = get_fmt_args(jitter, fmt, cur_arg) - output = output[:size - 1] - ret = len(output) - jitter.vm.set_mem(args.string, output + b'\x00') - return jitter.func_ret_systemv(ret_addr, ret) - - -def xxx_sprintf(jitter): - ret_addr, args = jitter.func_args_systemv(['string', 'fmt']) - cur_arg, fmt = 2, args.fmt - output = get_fmt_args(jitter, fmt, cur_arg) - ret = len(output) - jitter.vm.set_mem(args.string, output + b'\x00') - return jitter.func_ret_systemv(ret_addr, ret) - - -def xxx_printf(jitter): - ret_addr, args = jitter.func_args_systemv(['fmt']) - cur_arg, fmt = 1, args.fmt - output = get_fmt_args(jitter, fmt, cur_arg) - ret = len(output) - stdout.write(output) - return jitter.func_ret_systemv(ret_addr, ret) - - -def xxx_strcpy(jitter): - ret_ad, args = jitter.func_args_systemv(["dst", "src"]) - str_src = jitter.get_str_ansi(args.src) + b'\x00' - jitter.vm.set_mem(args.dst, str_src) - jitter.func_ret_systemv(ret_ad, args.dst) - - -def xxx_strlen(jitter): - ret_ad, args = jitter.func_args_systemv(["src"]) - str_src = jitter.get_str_ansi(args.src) - jitter.func_ret_systemv(ret_ad, len(str_src)) - - -def xxx_malloc(jitter): - ret_ad, args = jitter.func_args_systemv(["msize"]) - addr = linobjs.heap.alloc(jitter, args.msize) - jitter.func_ret_systemv(ret_ad, addr) - - -def xxx_free(jitter): - ret_ad, args = jitter.func_args_systemv(["ptr"]) - jitter.func_ret_systemv(ret_ad, 0) - - -def xxx_strcmp(jitter): - ret_ad, args = jitter.func_args_systemv(["ptr_str1", "ptr_str2"]) - s1 = jitter.get_str_ansi(args.ptr_str1) - s2 = jitter.get_str_ansi(args.ptr_str2) - jitter.func_ret_systemv(ret_ad, cmp_elts(s1, s2)) - - -def xxx_strncmp(jitter): - ret_ad, args = jitter.func_args_systemv(["ptr_str1", "ptr_str2", "size"]) - s1 = jitter.get_str_ansi(args.ptr_str1, args.size) - s2 = jitter.get_str_ansi(args.ptr_str2, args.size) - jitter.func_ret_systemv(ret_ad, cmp_elts(s1, s2)) diff --git a/miasm2/os_dep/win_32_structs.py b/miasm2/os_dep/win_32_structs.py deleted file mode 100644 index ffe6afc4..00000000 --- a/miasm2/os_dep/win_32_structs.py +++ /dev/null @@ -1,231 +0,0 @@ -from miasm2.core.types import MemStruct, Num, Ptr, Str, \ - Array, RawStruct, Union, \ - BitField, Self, Void, Bits, \ - set_allocator, MemUnion, Struct - - -class UnicodeString(MemStruct): - fields = [ - ("length", Num("H")), - ("maxlength", Num("H")), - ("data", Ptr(" -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# -from past.builtins import cmp -import struct -import os -import stat -import time -import string -import logging -from zlib import crc32 -from io import StringIO -import time -import datetime - -from future.utils import PY3, viewitems - -try: - from Crypto.Hash import MD5, SHA -except ImportError: - print("cannot find crypto, skipping") - -from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE, PAGE_EXEC -from miasm2.core.utils import pck16, pck32, hexdump, whoami -from miasm2.os_dep.common import heap, windows_to_sbpath -from miasm2.os_dep.common import set_str_unic, set_str_ansi -from miasm2.os_dep.common import get_fmt_args as _get_fmt_args -from miasm2.os_dep.win_api_x86_32_seh import tib_address - -log = logging.getLogger("win_api_x86_32") -console_handler = logging.StreamHandler() -console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) -log.addHandler(console_handler) -log.setLevel(logging.WARN) - -DATE_1601_TO_1970 = 116444736000000000 - -MAX_PATH = 260 - - -""" -typedef struct tagPROCESSENTRY32 { - DWORD dwSize; - DWORD cntUsage; - DWORD th32ProcessID; - ULONG_PTR th32DefaultHeapID; - DWORD th32ModuleID; - DWORD cntThreads; - DWORD th32ParentProcessID; - LONG pcPriClassBase; - DWORD dwFlags; - TCHAR szExeFile[MAX_PATH]; -} PROCESSENTRY32, *PPROCESSENTRY32; -""" - - -ACCESS_DICT = {0x0: 0, - 0x1: 0, - 0x2: PAGE_READ, - 0x4: PAGE_READ | PAGE_WRITE, - 0x10: PAGE_EXEC, - 0x20: PAGE_EXEC | PAGE_READ, - 0x40: PAGE_EXEC | PAGE_READ | PAGE_WRITE, - 0x80: PAGE_EXEC | PAGE_READ | PAGE_WRITE, - # 0x80: PAGE_EXECUTE_WRITECOPY - 0x100: 0 - } - -ACCESS_DICT_INV = dict((x[1], x[0]) for x in viewitems(ACCESS_DICT)) - - -class whandle(object): - - def __init__(self, name, info): - self.name = name - self.info = info - - def __repr__(self): - return '<%r %r %r>' % (self.__class__.__name__, self.name, self.info) - - -class handle_generator(object): - - def __init__(self): - self.offset = 600 - self.all_handles = {} - - def add(self, name, info=None): - self.offset += 1 - h = whandle(name, info) - self.all_handles[self.offset] = h - - log.debug(repr(self)) - return self.offset - - def __repr__(self): - out = '<%r\n' % self.__class__.__name__ - ks = list(self.all_handles) - ks.sort() - - for k in ks: - out += " %r %r\n" % (k, self.all_handles[k]) - out += '>' - return out - - def __contains__(self, e): - return e in self.all_handles - - def __getitem__(self, item): - return self.all_handles.__getitem__(item) - - def __delitem__(self, item): - self.all_handles.__delitem__(item) - - -class c_winobjs(object): - - def __init__(self): - self.alloc_ad = 0x20000000 - self.alloc_align = 0x1000 - self.heap = heap() - self.handle_toolhelpsnapshot = 0xaaaa00 - self.toolhelpsnapshot_info = {} - self.handle_curprocess = 0xaaaa01 - self.dbg_present = 0 - self.tickcount = 0 - self.dw_pid_dummy1 = 0x111 - self.dw_pid_explorer = 0x222 - self.dw_pid_dummy2 = 0x333 - self.dw_pid_cur = 0x444 - self.module_fname_nux = None - self.module_name = b"test.exe" - self.module_path = b"c:\\mydir\\" + self.module_name - self.hcurmodule = None - self.module_filesize = None - self.getversion = 0x0A280105 - self.getforegroundwindow = 0x333333 - self.cryptcontext_hwnd = 0x44400 - self.cryptcontext_bnum = 0x44000 - self.cryptcontext_num = 0 - self.cryptcontext = {} - self.phhash_crypt_md5 = 0x55555 - self.files_hwnd = {} - self.windowlong_dw = 0x77700 - self.module_cur_hwnd = 0x88800 - self.module_file_nul = 0x999000 - self.runtime_dll = None - self.current_pe = None - self.tls_index = 0xf - self.tls_values = {} - self.handle_pool = handle_generator() - self.handle_mapped = {} - self.hkey_handles = { - 0x80000001: b"hkey_current_user", - 0x80000002: b"hkey_local_machine" - } - self.cur_dir = b"c:\\tmp" - - self.nt_mdl = {} - self.nt_mdl_ad = None - self.nt_mdl_cur = 0 - self.win_event_num = 0x13370 - self.cryptdll_md5_h = {} - - self.lastwin32error = 0 - self.mutex = {} - self.env_variables = {} - self.events_pool = {} - self.find_data = None - - self.current_datetime = datetime.datetime( - year=2017, month=8, day=21, - hour=13, minute=37, - second=11, microsecond=123456 - ) - -winobjs = c_winobjs() - - -process_list = [ - [ - 0x40, # DWORD dwSize; - 0, # DWORD cntUsage; - winobjs.dw_pid_dummy1, # DWORD th32ProcessID; - 0x11111111, # ULONG_PTR th32DefaultHeapID; - 0x11111112, # DWORD th32ModuleID; - 1, # DWORD cntThreads; - winobjs.dw_pid_explorer, # DWORD th32ParentProcessID; - 0xbeef, # LONG pcPriClassBase; - 0x0, # DWORD dwFlags; - b"dummy1.exe" # TCHAR szExeFile[MAX_PATH]; - ], - [ - 0x40, # DWORD dwSize; - 0, # DWORD cntUsage; - winobjs.dw_pid_explorer, # DWORD th32ProcessID; - 0x11111111, # ULONG_PTR th32DefaultHeapID; - 0x11111112, # DWORD th32ModuleID; - 1, # DWORD cntThreads; - 4, # DWORD th32ParentProcessID; - 0xbeef, # LONG pcPriClassBase; - 0x0, # DWORD dwFlags; - b"explorer.exe" # TCHAR szExeFile[MAX_PATH]; - ], - - [ - 0x40, # DWORD dwSize; - 0, # DWORD cntUsage; - winobjs.dw_pid_dummy2, # DWORD th32ProcessID; - 0x11111111, # ULONG_PTR th32DefaultHeapID; - 0x11111112, # DWORD th32ModuleID; - 1, # DWORD cntThreads; - winobjs.dw_pid_explorer, # DWORD th32ParentProcessID; - 0xbeef, # LONG pcPriClassBase; - 0x0, # DWORD dwFlags; - b"dummy2.exe" # TCHAR szExeFile[MAX_PATH]; - ], - - [ - 0x40, # DWORD dwSize; - 0, # DWORD cntUsage; - winobjs.dw_pid_cur, # DWORD th32ProcessID; - 0x11111111, # ULONG_PTR th32DefaultHeapID; - 0x11111112, # DWORD th32ModuleID; - 1, # DWORD cntThreads; - winobjs.dw_pid_explorer, # DWORD th32ParentProcessID; - 0xbeef, # LONG pcPriClassBase; - 0x0, # DWORD dwFlags; - winobjs.module_name # TCHAR szExeFile[MAX_PATH]; - ], - - -] - - -class hobj(object): - pass - - -class mdl(object): - - def __init__(self, ad, l): - self.ad = ad - self.l = l - - def __bytes__(self): - return struct.pack('LL', self.ad, self.l) - - def __str__(self): - if PY3: - return repr(self) - return self.__bytes__() - - -def kernel32_HeapAlloc(jitter): - ret_ad, args = jitter.func_args_stdcall(["heap", "flags", "size"]) - alloc_addr = winobjs.heap.alloc(jitter, args.size) - jitter.func_ret_stdcall(ret_ad, alloc_addr) - - -def kernel32_HeapFree(jitter): - ret_ad, _ = jitter.func_args_stdcall(["heap", "flags", "pmem"]) - jitter.func_ret_stdcall(ret_ad, 1) - - -def kernel32_GlobalAlloc(jitter): - ret_ad, args = jitter.func_args_stdcall(["uflags", "msize"]) - alloc_addr = winobjs.heap.alloc(jitter, args.msize) - jitter.func_ret_stdcall(ret_ad, alloc_addr) - - -def kernel32_LocalFree(jitter): - ret_ad, _ = jitter.func_args_stdcall(["lpvoid"]) - jitter.func_ret_stdcall(ret_ad, 0) - - -def kernel32_LocalAlloc(jitter): - ret_ad, args = jitter.func_args_stdcall(["uflags", "msize"]) - alloc_addr = winobjs.heap.alloc(jitter, args.msize) - jitter.func_ret_stdcall(ret_ad, alloc_addr) - -def msvcrt_new(jitter): - ret_ad, args = jitter.func_args_cdecl(["size"]) - alloc_addr = winobjs.heap.alloc(jitter, args.size) - jitter.func_ret_cdecl(ret_ad, alloc_addr) - -globals()['msvcrt_??2@YAPAXI@Z'] = msvcrt_new - -def msvcrt_delete(jitter): - ret_ad, args = jitter.func_args_cdecl(["ptr"]) - jitter.func_ret_cdecl(ret_ad, 0) - -globals()['msvcrt_??3@YAXPAX@Z'] = msvcrt_delete - -def kernel32_GlobalFree(jitter): - ret_ad, _ = jitter.func_args_stdcall(["addr"]) - jitter.func_ret_stdcall(ret_ad, 0) - - -def kernel32_IsDebuggerPresent(jitter): - ret_ad, _ = jitter.func_args_stdcall(0) - jitter.func_ret_stdcall(ret_ad, winobjs.dbg_present) - - -def kernel32_CreateToolhelp32Snapshot(jitter): - ret_ad, _ = jitter.func_args_stdcall(["dwflags", "th32processid"]) - jitter.func_ret_stdcall(ret_ad, winobjs.handle_toolhelpsnapshot) - - -def kernel32_GetCurrentProcess(jitter): - ret_ad, _ = jitter.func_args_stdcall(0) - jitter.func_ret_stdcall(ret_ad, winobjs.handle_curprocess) - - -def kernel32_GetCurrentProcessId(jitter): - ret_ad, _ = jitter.func_args_stdcall(0) - jitter.func_ret_stdcall(ret_ad, winobjs.dw_pid_cur) - - -def kernel32_Process32First(jitter): - ret_ad, args = jitter.func_args_stdcall(["s_handle", "ad_pentry"]) - - pentry = struct.pack( - 'IIIIIIIII', *process_list[0][:-1] - ) + process_list[0][-1] - jitter.vm.set_mem(args.ad_pentry, pentry) - winobjs.toolhelpsnapshot_info[args.s_handle] = 0 - - jitter.func_ret_stdcall(ret_ad, 1) - - -def kernel32_Process32Next(jitter): - ret_ad, args = jitter.func_args_stdcall(["s_handle", "ad_pentry"]) - - winobjs.toolhelpsnapshot_info[args.s_handle] += 1 - if winobjs.toolhelpsnapshot_info[args.s_handle] >= len(process_list): - ret = 0 - else: - ret = 1 - n = winobjs.toolhelpsnapshot_info[args.s_handle] - pentry = struct.pack( - 'IIIIIIIII', *process_list[n][:-1]) + process_list[n][-1] - jitter.vm.set_mem(args.ad_pentry, pentry) - jitter.func_ret_stdcall(ret_ad, ret) - - -def kernel32_GetTickCount(jitter): - ret_ad, _ = jitter.func_args_stdcall(0) - winobjs.tickcount += 1 - jitter.func_ret_stdcall(ret_ad, winobjs.tickcount) - - -def kernel32_GetVersion(jitter): - ret_ad, _ = jitter.func_args_stdcall(0) - jitter.func_ret_stdcall(ret_ad, winobjs.getversion) - - -def kernel32_GetVersionEx(jitter, str_size, set_str): - ret_ad, args = jitter.func_args_stdcall(["ptr_struct"]) - - size = jitter.vm.get_u32(args.ptr_struct) - if size in [0x14+str_size, 0x1c+str_size]: - tmp = struct.pack( - "IIIII%dsHHHBB" % str_size, - 0x114, # struct size - 0x5, # maj vers - 0x2, # min vers - 0xa28, # build nbr - 0x2, # platform id - set_str("Service pack 4"), - 3, # wServicePackMajor - 0, # wServicePackMinor - 0x100, # wSuiteMask - 1, # wProductType - 0 # wReserved - ) - tmp = tmp[:size] - jitter.vm.set_mem(args.ptr_struct, tmp) - ret = 1 - else: - ret = 0 - jitter.func_ret_stdcall(ret_ad, ret) - - -kernel32_GetVersionExA = lambda jitter: kernel32_GetVersionEx(jitter, 128, - set_str_ansi) -kernel32_GetVersionExW = lambda jitter: kernel32_GetVersionEx(jitter, 256, - set_str_unic) - - -def kernel32_GetPriorityClass(jitter): - ret_ad, _ = jitter.func_args_stdcall(["hwnd"]) - jitter.func_ret_stdcall(ret_ad, 0) - - -def kernel32_SetPriorityClass(jitter): - ret_ad, _ = jitter.func_args_stdcall(["hwnd", "dwpclass"]) - jitter.func_ret_stdcall(ret_ad, 0) - - -def kernel32_CloseHandle(jitter): - ret_ad, _ = jitter.func_args_stdcall(["hwnd"]) - jitter.func_ret_stdcall(ret_ad, 1) - - -def user32_GetForegroundWindow(jitter): - ret_ad, _ = jitter.func_args_stdcall(0) - jitter.func_ret_stdcall(ret_ad, winobjs.getforegroundwindow) - - -def user32_FindWindowA(jitter): - ret_ad, args = jitter.func_args_stdcall(["pclassname", "pwindowname"]) - if args.pclassname: - classname = jitter.get_str_ansi(args.pclassname) - log.info("FindWindowA classname %s", classname) - if args.pwindowname: - windowname = jitter.get_str_ansi(args.pwindowname) - log.info("FindWindowA windowname %s", windowname) - jitter.func_ret_stdcall(ret_ad, 0) - - -def user32_GetTopWindow(jitter): - ret_ad, _ = jitter.func_args_stdcall(["hwnd"]) - jitter.func_ret_stdcall(ret_ad, 0) - - -def user32_BlockInput(jitter): - ret_ad, _ = jitter.func_args_stdcall(["blockit"]) - jitter.func_ret_stdcall(ret_ad, 1) - - -def advapi32_CryptAcquireContext(jitter, funcname, get_str): - ret_ad, args = jitter.func_args_stdcall(["phprov", "pszcontainer", - "pszprovider", "dwprovtype", - "dwflags"]) - prov = get_str(args.pszprovider) if args.pszprovider else "NONE" - log.debug('prov: %r', prov) - jitter.vm.set_u32(args.phprov, winobjs.cryptcontext_hwnd) - jitter.func_ret_stdcall(ret_ad, 1) - - -def advapi32_CryptAcquireContextA(jitter): - advapi32_CryptAcquireContext(jitter, whoami(), jitter.get_str_ansi) - - -def advapi32_CryptAcquireContextW(jitter): - advapi32_CryptAcquireContext(jitter, whoami(), jitter.get_str_unic) - - -def advapi32_CryptCreateHash(jitter): - ret_ad, args = jitter.func_args_stdcall(["hprov", "algid", "hkey", - "dwflags", "phhash"]) - - winobjs.cryptcontext_num += 1 - - if args.algid == 0x00008003: - log.debug('algo is MD5') - jitter.vm.set_u32( - args.phhash, - winobjs.cryptcontext_bnum + winobjs.cryptcontext_num - ) - winobjs.cryptcontext[ - winobjs.cryptcontext_bnum + winobjs.cryptcontext_num] = hobj() - winobjs.cryptcontext[ - winobjs.cryptcontext_bnum + winobjs.cryptcontext_num].h = MD5.new() - elif args.algid == 0x00008004: - log.debug('algo is SHA1') - jitter.vm.set_u32( - args.phhash, - winobjs.cryptcontext_bnum + winobjs.cryptcontext_num - ) - winobjs.cryptcontext[ - winobjs.cryptcontext_bnum + winobjs.cryptcontext_num] = hobj() - winobjs.cryptcontext[ - winobjs.cryptcontext_bnum + winobjs.cryptcontext_num].h = SHA.new() - else: - raise ValueError('un impl algo1') - jitter.func_ret_stdcall(ret_ad, 1) - - -def advapi32_CryptHashData(jitter): - ret_ad, args = jitter.func_args_stdcall(["hhash", "pbdata", "dwdatalen", - "dwflags"]) - - if not args.hhash in winobjs.cryptcontext: - raise ValueError("unknown crypt context") - - data = jitter.vm.get_mem(args.pbdata, args.dwdatalen) - log.debug('will hash %X', args.dwdatalen) - log.debug(repr(data[:10]) + "...") - winobjs.cryptcontext[args.hhash].h.update(data) - jitter.func_ret_stdcall(ret_ad, 1) - - -def advapi32_CryptGetHashParam(jitter): - ret_ad, args = jitter.func_args_stdcall(["hhash", "param", "pbdata", - "dwdatalen", "dwflags"]) - - if not args.hhash in winobjs.cryptcontext: - raise ValueError("unknown crypt context") - - if args.param == 2: - # XXX todo: save h state? - h = winobjs.cryptcontext[args.hhash].h.digest() - else: - raise ValueError('not impl', args.param) - jitter.vm.set_mem(args.pbdata, h) - jitter.vm.set_u32(args.dwdatalen, len(h)) - - jitter.func_ret_stdcall(ret_ad, 1) - - -def advapi32_CryptReleaseContext(jitter): - ret_ad, _ = jitter.func_args_stdcall(["hhash", "flags"]) - jitter.func_ret_stdcall(ret_ad, 0) - - -def advapi32_CryptDeriveKey(jitter): - ret_ad, args = jitter.func_args_stdcall(["hprov", "algid", "hbasedata", - "dwflags", "phkey"]) - - if args.algid == 0x6801: - log.debug('using DES') - else: - raise ValueError('un impl algo2') - h = winobjs.cryptcontext[args.hbasedata].h.digest() - log.debug('hash %r', h) - winobjs.cryptcontext[args.hbasedata].h_result = h - jitter.vm.set_u32(args.phkey, args.hbasedata) - jitter.func_ret_stdcall(ret_ad, 1) - - -def advapi32_CryptDestroyHash(jitter): - ret_ad, _ = jitter.func_args_stdcall(["hhash"]) - jitter.func_ret_stdcall(ret_ad, 1) - - -def advapi32_CryptDecrypt(jitter): - # ret_ad, _ = jitter.func_args_stdcall(["hkey", "hhash", "final", - # "dwflags", "pbdata", - # "pdwdatalen"]) - raise ValueError("Not implemented") - # jitter.func_ret_stdcall(ret_ad, 1) - - -def kernel32_CreateFile(jitter, funcname, get_str): - ret_ad, args = jitter.func_args_stdcall(["lpfilename", "access", - "dwsharedmode", - "lpsecurityattr", - "dwcreationdisposition", - "dwflagsandattr", - "htemplatefile"]) - if args.lpfilename == 0: - jitter.func_ret_stdcall(ret_ad, 0xffffffff) - return - - fname = get_str(args.lpfilename) - log.info('CreateFile fname %s', fname) - ret = 0xffffffff - - log.debug("%r %r", fname.lower(), winobjs.module_path.lower()) - is_original_file = fname.lower() == winobjs.module_path.lower() - - if fname.upper() in [r"\\.\SICE", r"\\.\NTICE", r"\\.\SIWVID", r'\\.\SIWDEBUG']: - pass - elif fname.upper() in ['NUL']: - ret = winobjs.module_cur_hwnd - else: - # sandox path - sb_fname = windows_to_sbpath(fname) - if args.access & 0x80000000 or args.access == 1: - # read - if args.dwcreationdisposition == 2: - # create_always - if os.access(sb_fname, os.R_OK): - # but file exist - pass - else: - raise NotImplementedError("Untested case") # to test - # h = open(sb_fname, 'rb+') - elif args.dwcreationdisposition == 3: - # open_existing - if os.access(sb_fname, os.R_OK): - s = os.stat(sb_fname) - if stat.S_ISDIR(s.st_mode): - ret = winobjs.handle_pool.add(sb_fname, 0x1337) - else: - h = open(sb_fname, 'r+b') - ret = winobjs.handle_pool.add(sb_fname, h) - else: - log.warning("FILE %r DOES NOT EXIST!", fname) - elif args.dwcreationdisposition == 1: - # create new - if os.access(sb_fname, os.R_OK): - # file exist - # ret = 80 - winobjs.lastwin32error = 80 - else: - # first create an empty file - open(sb_fname, 'w').close() - # then open - h = open(sb_fname, 'r+b') - ret = winobjs.handle_pool.add(sb_fname, h) - elif args.dwcreationdisposition == 4: - # open_always - if os.access(sb_fname, os.R_OK): - s = os.stat(sb_fname) - if stat.S_ISDIR(s.st_mode): - ret = winobjs.handle_pool.add(sb_fname, 0x1337) - else: - h = open(sb_fname, 'r+b') - ret = winobjs.handle_pool.add(sb_fname, h) - else: - raise NotImplementedError("Untested case") - else: - raise NotImplementedError("Untested case") - elif args.access & 0x40000000: - # write - if args.dwcreationdisposition == 3: - # open existing - if is_original_file: - # cannot open self in write mode! - pass - elif os.access(sb_fname, os.R_OK): - s = os.stat(sb_fname) - if stat.S_ISDIR(s.st_mode): - # open dir - ret = winobjs.handle_pool.add(sb_fname, 0x1337) - else: - h = open(sb_fname, 'r+b') - ret = winobjs.handle_pool.add(sb_fname, h) - else: - raise NotImplementedError("Untested case") # to test - elif args.dwcreationdisposition == 5: - # truncate_existing - if is_original_file: - pass - else: - raise NotImplementedError("Untested case") # to test - else: - # raise NotImplementedError("Untested case") # to test - h = open(sb_fname, 'w') - ret = winobjs.handle_pool.add(sb_fname, h) - else: - raise NotImplementedError("Untested case") - - # h = open(sb_fname, 'rb+') - # ret = winobjs.handle_pool.add(sb_fname, h) - log.debug('CreateFile ret %x', ret) - jitter.func_ret_stdcall(ret_ad, ret) - - -def kernel32_CreateFileA(jitter): - kernel32_CreateFile(jitter, whoami(), jitter.get_str_ansi) - - -def kernel32_CreateFileW(jitter): - kernel32_CreateFile(jitter, whoami(), jitter.get_str_unic) - - -def kernel32_ReadFile(jitter): - ret_ad, args = jitter.func_args_stdcall(["hwnd", "lpbuffer", - "nnumberofbytestoread", - "lpnumberofbytesread", - "lpoverlapped"]) - if args.hwnd == winobjs.module_cur_hwnd: - pass - elif args.hwnd in winobjs.handle_pool: - pass - else: - raise ValueError('unknown hwnd!') - - data = None - if args.hwnd in winobjs.files_hwnd: - data = winobjs.files_hwnd[ - winobjs.module_cur_hwnd].read(args.nnumberofbytestoread) - elif args.hwnd in winobjs.handle_pool: - wh = winobjs.handle_pool[args.hwnd] - data = wh.info.read(args.nnumberofbytestoread) - else: - raise ValueError('unknown filename') - - if data is not None: - if (args.lpnumberofbytesread): - jitter.vm.set_u32(args.lpnumberofbytesread, len(data)) - jitter.vm.set_mem(args.lpbuffer, data) - - jitter.func_ret_stdcall(ret_ad, 1) - - -def kernel32_GetFileSize(jitter): - ret_ad, args = jitter.func_args_stdcall(["hwnd", "lpfilesizehight"]) - - if args.hwnd == winobjs.module_cur_hwnd: - ret = len(open(winobjs.module_fname_nux, "rb").read()) - elif args.hwnd in winobjs.handle_pool: - wh = winobjs.handle_pool[args.hwnd] - ret = len(open(wh.name, "rb").read()) - else: - raise ValueError('unknown hwnd!') - - if args.lpfilesizehight != 0: - jitter.vm.set_u32(args.lpfilesizehight, ret) - jitter.func_ret_stdcall(ret_ad, ret) - - -def kernel32_GetFileSizeEx(jitter): - ret_ad, args = jitter.func_args_stdcall(["hwnd", "lpfilesizehight"]) - - if args.hwnd == winobjs.module_cur_hwnd: - l = len(open(winobjs.module_fname_nux, "rb").read()) - elif args.hwnd in winobjs.handle_pool: - wh = winobjs.handle_pool[args.hwnd] - l = len(open(wh.name, "rb").read()) - else: - raise ValueError('unknown hwnd!') - - if args.lpfilesizehight == 0: - raise NotImplementedError("Untested case") - jitter.vm.set_mem(args.lpfilesizehight, pck32( - l & 0xffffffff) + pck32((l >> 32) & 0xffffffff)) - jitter.func_ret_stdcall(ret_ad, 1) - - -def kernel32_FlushInstructionCache(jitter): - ret_ad, _ = jitter.func_args_stdcall(["hprocess", "lpbasead", "dwsize"]) - jitter.func_ret_stdcall(ret_ad, 0x1337) - - -def kernel32_VirtualProtect(jitter): - ret_ad, args = jitter.func_args_stdcall(['lpvoid', 'dwsize', - 'flnewprotect', - 'lpfloldprotect']) - # XXX mask hpart - flnewprotect = args.flnewprotect & 0xFFF - if not flnewprotect in ACCESS_DICT: - raise ValueError('unknown access dw!') - - if args.lpfloldprotect: - old = jitter.vm.get_mem_access(args.lpvoid) - jitter.vm.set_u32(args.lpfloldprotect, ACCESS_DICT_INV[old]) - - for addr in jitter.vm.get_all_memory(): - # Multi-page - if args.lpvoid <= addr < args.lpvoid + args.dwsize: - jitter.vm.set_mem_access(addr, ACCESS_DICT[flnewprotect]) - - jitter.func_ret_stdcall(ret_ad, 1) - - -def kernel32_VirtualAlloc(jitter): - ret_ad, args = jitter.func_args_stdcall(['lpvoid', 'dwsize', - 'alloc_type', 'flprotect']) - - - if not args.flprotect in ACCESS_DICT: - raise ValueError('unknown access dw!') - - if args.lpvoid == 0: - alloc_addr = winobjs.heap.next_addr(args.dwsize) - jitter.vm.add_memory_page( - alloc_addr, ACCESS_DICT[args.flprotect], "\x00" * args.dwsize, - "Alloc in %s ret 0x%X" % (whoami(), ret_ad)) - else: - all_mem = jitter.vm.get_all_memory() - if args.lpvoid in all_mem: - alloc_addr = args.lpvoid - jitter.vm.set_mem_access(args.lpvoid, ACCESS_DICT[args.flprotect]) - else: - alloc_addr = winobjs.heap.next_addr(args.dwsize) - # alloc_addr = args.lpvoid - jitter.vm.add_memory_page( - alloc_addr, ACCESS_DICT[args.flprotect], "\x00" * args.dwsize, - "Alloc in %s ret 0x%X" % (whoami(), ret_ad)) - - log.info('VirtualAlloc addr: 0x%x', alloc_addr) - jitter.func_ret_stdcall(ret_ad, alloc_addr) - - -def kernel32_VirtualFree(jitter): - ret_ad, _ = jitter.func_args_stdcall(["lpvoid", "dwsize", "alloc_type"]) - jitter.func_ret_stdcall(ret_ad, 0) - - -def user32_GetWindowLongA(jitter): - ret_ad, _ = jitter.func_args_stdcall(["hwnd", "nindex"]) - jitter.func_ret_stdcall(ret_ad, winobjs.windowlong_dw) - - -def user32_SetWindowLongA(jitter): - ret_ad, _ = jitter.func_args_stdcall(["hwnd", "nindex", "newlong"]) - jitter.func_ret_stdcall(ret_ad, winobjs.windowlong_dw) - - -def kernel32_GetModuleFileName(jitter, funcname, set_str): - ret_ad, args = jitter.func_args_stdcall(["hmodule", "lpfilename", "nsize"]) - - if args.hmodule in [0, winobjs.hcurmodule]: - p = winobjs.module_path[:] - elif (winobjs.runtime_dll and - args.hmodule in viewvalues(winobjs.runtime_dll.name2off)): - name_inv = dict( - [ - (x[1], x[0]) - for x in viewitems(winobjs.runtime_dll.name2off) - ] - ) - p = name_inv[args.hmodule] - else: - log.warning(('Unknown module 0x%x.' + - 'Set winobjs.hcurmodule and retry'), args.hmodule) - p = None - - if p is None: - l = 0 - elif args.nsize < len(p): - p = p[:args.nsize] - l = len(p) - else: - l = len(p) - - if p: - set_str(args.lpfilename, p) - - jitter.func_ret_stdcall(ret_ad, l) - - -def kernel32_GetModuleFileNameA(jitter): - kernel32_GetModuleFileName(jitter, whoami(), jitter.set_str_ansi) - - -def kernel32_GetModuleFileNameW(jitter): - kernel32_GetModuleFileName(jitter, whoami(), jitter.set_str_unic) - - -def kernel32_CreateMutex(jitter, funcname, get_str): - ret_ad, args = jitter.func_args_stdcall(["mutexattr", "initowner", - "lpname"]) - - if args.lpname: - name = get_str(args.lpname) - log.info("CreateMutex %r", name) - else: - name = None - if args.initowner: - if name in winobjs.mutex: - raise NotImplementedError("Untested case") - # ret = 0 - else: - winobjs.mutex[name] = id(name) - ret = winobjs.mutex[name] - else: - if name in winobjs.mutex: - raise NotImplementedError("Untested case") - # ret = 0 - else: - winobjs.mutex[name] = id(name) - ret = winobjs.mutex[name] - jitter.func_ret_stdcall(ret_ad, ret) - - -def kernel32_CreateMutexA(jitter): - kernel32_CreateMutex(jitter, whoami(), jitter.get_str_ansi) - - -def kernel32_CreateMutexW(jitter): - kernel32_CreateMutex(jitter, whoami(), jitter.get_str_unic) - - -def shell32_SHGetSpecialFolderLocation(jitter): - ret_ad, args = jitter.func_args_stdcall(["hwndowner", "nfolder", "ppidl"]) - jitter.vm.set_u32(args.ppidl, args.nfolder) - jitter.func_ret_stdcall(ret_ad, 0) - - -def kernel32_SHGetPathFromIDList(jitter, funcname, set_str): - ret_ad, args = jitter.func_args_stdcall(["pidl", "ppath"]) - - if args.pidl == 7: # CSIDL_STARTUP: - s = "c:\\doc\\user\\startmenu\\programs\\startup" - set_str(args.ppath, s) - else: - raise ValueError('pidl not implemented', args.pidl) - jitter.func_ret_stdcall(ret_ad, 1) - - -def shell32_SHGetPathFromIDListW(jitter): - kernel32_SHGetPathFromIDList(jitter, whoami(), jitter.set_str_unic) - - -def shell32_SHGetPathFromIDListA(jitter): - kernel32_SHGetPathFromIDList(jitter, whoami(), jitter.set_str_ansi) - - -def kernel32_GetLastError(jitter): - ret_ad, _ = jitter.func_args_stdcall(0) - jitter.func_ret_stdcall(ret_ad, winobjs.lastwin32error) - - -def kernel32_SetLastError(jitter): - ret_ad, args = jitter.func_args_stdcall(["errcode"]) - # lasterr addr - # ad = tib_address + 0x34 - # jitter.vm.set_mem(ad, pck32(args.errcode)) - winobjs.lastwin32error = args.errcode - jitter.func_ret_stdcall(ret_ad, 0) - - -def kernel32_RestoreLastError(jitter): - kernel32_SetLastError(jitter) - - -def kernel32_LoadLibrary(jitter, get_str): - ret_ad, args = jitter.func_args_stdcall(["dllname"]) - - libname = get_str(args.dllname, 0x100) - ret = winobjs.runtime_dll.lib_get_add_base(libname) - log.info("Loading %r ret 0x%x", libname, ret) - jitter.func_ret_stdcall(ret_ad, ret) - - -def kernel32_LoadLibraryA(jitter): - kernel32_LoadLibrary(jitter, jitter.get_str_ansi) - - -def kernel32_LoadLibraryW(jitter): - kernel32_LoadLibrary(jitter, jitter.get_str_unic) - - -def kernel32_LoadLibraryEx(jitter, get_str): - ret_ad, args = jitter.func_args_stdcall(["dllname", "hfile", "flags"]) - - if args.hfile != 0: - raise NotImplementedError("Untested case") - libname = get_str(args.dllname, 0x100) - ret = winobjs.runtime_dll.lib_get_add_base(libname) - log.info("Loading %r ret 0x%x", libname, ret) - jitter.func_ret_stdcall(ret_ad, ret) - - -def kernel32_LoadLibraryExA(jitter): - kernel32_LoadLibraryEx(jitter, jitter.get_str_ansi) - - -def kernel32_LoadLibraryExW(jitter): - kernel32_LoadLibraryEx(jitter, jitter.get_str_unic) - - -def kernel32_GetProcAddress(jitter): - ret_ad, args = jitter.func_args_stdcall(["libbase", "fname"]) - fname = args.fname - if fname >= 0x10000: - fname = jitter.get_str_ansi(fname, 0x100) - if not fname: - fname = None - if fname is not None: - ad = winobjs.runtime_dll.lib_get_add_func(args.libbase, fname) - else: - ad = 0 - log.info("GetProcAddress %r %r ret 0x%x", args.libbase, fname, ad) - jitter.add_breakpoint(ad, jitter.handle_lib) - jitter.func_ret_stdcall(ret_ad, ad) - - -def kernel32_GetModuleHandle(jitter, funcname, get_str): - ret_ad, args = jitter.func_args_stdcall(["dllname"]) - - if args.dllname: - libname = get_str(args.dllname) - if libname: - ret = winobjs.runtime_dll.lib_get_add_base(libname) - else: - log.warning('unknown module!') - ret = 0 - log.info("GetModuleHandle %r ret 0x%x", libname, ret) - else: - ret = winobjs.current_pe.NThdr.ImageBase - log.info("GetModuleHandle default ret 0x%x", ret) - jitter.func_ret_stdcall(ret_ad, ret) - - -def kernel32_GetModuleHandleA(jitter): - kernel32_GetModuleHandle(jitter, whoami(), jitter.get_str_ansi) - - -def kernel32_GetModuleHandleW(jitter): - kernel32_GetModuleHandle(jitter, whoami(), jitter.get_str_unic) - - -def kernel32_VirtualLock(jitter): - ret_ad, _ = jitter.func_args_stdcall(["lpaddress", "dwsize"]) - jitter.func_ret_stdcall(ret_ad, 1) - - -class systeminfo(object): - oemId = 0 - dwPageSize = 0x1000 - lpMinimumApplicationAddress = 0x10000 - lpMaximumApplicationAddress = 0x7ffeffff - dwActiveProcessorMask = 0x1 - numberOfProcessors = 0x1 - ProcessorsType = 586 - dwAllocationgranularity = 0x10000 - wProcessorLevel = 0x6 - ProcessorRevision = 0xf0b - - def pack(self): - return struct.pack('IIIIIIIIHH', - self.oemId, - self.dwPageSize, - self.lpMinimumApplicationAddress, - self.lpMaximumApplicationAddress, - self.dwActiveProcessorMask, - self.numberOfProcessors, - self.ProcessorsType, - self.dwAllocationgranularity, - self.wProcessorLevel, - self.ProcessorRevision) - - -def kernel32_GetSystemInfo(jitter): - ret_ad, args = jitter.func_args_stdcall(["sys_ptr"]) - sysinfo = systeminfo() - jitter.vm.set_mem(args.sys_ptr, sysinfo.pack()) - jitter.func_ret_stdcall(ret_ad, 0) - - -def kernel32_IsWow64Process(jitter): - ret_ad, args = jitter.func_args_stdcall(["process", "bool_ptr"]) - jitter.vm.set_u32(args.bool_ptr, 0) - jitter.func_ret_stdcall(ret_ad, 1) - - -def kernel32_GetCommandLine(jitter, set_str): - ret_ad, _ = jitter.func_args_stdcall(0) - alloc_addr = winobjs.heap.alloc(jitter, 0x1000) - s = set_str('"%s"' % winobjs.module_path) - jitter.vm.set_mem(alloc_addr, s) - jitter.func_ret_stdcall(ret_ad, alloc_addr) - - -def kernel32_GetCommandLineA(jitter): - kernel32_GetCommandLine(jitter, set_str_ansi) - - -def kernel32_GetCommandLineW(jitter): - kernel32_GetCommandLine(jitter, set_str_unic) - - -def shell32_CommandLineToArgvW(jitter): - ret_ad, args = jitter.func_args_stdcall(["pcmd", "pnumargs"]) - cmd = jitter.get_str_unic(args.pcmd) - log.info("CommandLineToArgv %r", cmd) - tks = cmd.split(' ') - addr = winobjs.heap.alloc(jitter, len(cmd) * 2 + 4 * len(tks)) - addr_ret = winobjs.heap.alloc(jitter, 4 * (len(tks) + 1)) - o = 0 - for i, t in enumerate(tks): - jitter.set_str_unic(addr + o, t) - jitter.vm.set_u32(addr_ret + 4 * i, addr + o) - o += len(t)*2 + 2 - - jitter.vm.set_u32(addr_ret + 4 * i, 0) - jitter.vm.set_u32(args.pnumargs, len(tks)) - jitter.func_ret_stdcall(ret_ad, addr_ret) - - -def cryptdll_MD5Init(jitter): - ret_ad, args = jitter.func_args_stdcall(["ad_ctx"]) - index = len(winobjs.cryptdll_md5_h) - h = MD5.new() - winobjs.cryptdll_md5_h[index] = h - - jitter.vm.set_u32(args.ad_ctx, index) - jitter.func_ret_stdcall(ret_ad, 0) - - -def cryptdll_MD5Update(jitter): - ret_ad, args = jitter.func_args_stdcall(["ad_ctx", "ad_input", "inlen"]) - - index = jitter.vm.get_u32(args.ad_ctx) - if not index in winobjs.cryptdll_md5_h: - raise ValueError('unknown h context', index) - - data = jitter.vm.get_mem(args.ad_input, args.inlen) - winobjs.cryptdll_md5_h[index].update(data) - log.debug(hexdump(data)) - - jitter.func_ret_stdcall(ret_ad, 0) - - -def cryptdll_MD5Final(jitter): - ret_ad, args = jitter.func_args_stdcall(["ad_ctx"]) - - index = jitter.vm.get_u32(args.ad_ctx) - if not index in winobjs.cryptdll_md5_h: - raise ValueError('unknown h context', index) - h = winobjs.cryptdll_md5_h[index].digest() - jitter.vm.set_mem(args.ad_ctx + 88, h) - jitter.func_ret_stdcall(ret_ad, 0) - - -def ntdll_RtlInitAnsiString(jitter): - ret_ad, args = jitter.func_args_stdcall(["ad_ctx", "ad_str"]) - - s = jitter.get_str_ansi(args.ad_str) - l = len(s) - jitter.vm.set_mem(args.ad_ctx, - pck16(l) + pck16(l + 1) + pck32(args.ad_str)) - jitter.func_ret_stdcall(ret_ad, 0) - - -def ntdll_RtlHashUnicodeString(jitter): - ret_ad, args = jitter.func_args_stdcall(["ad_ctxu", "case_i", "h_id", - "phout"]) - - if args.h_id != 1: - raise ValueError('unk hash unicode', args.h_id) - - l1, l2, ptra = struct.unpack('HHL', jitter.vm.get_mem(args.ad_ctxu, 8)) - s = jitter.vm.get_mem(ptra, l1) - s = s[:-1] - hv = 0 - - if args.case_i: - s = s.lower() - for c in s: - hv = ((65599 * hv) + ord(c)) & 0xffffffff - jitter.vm.set_u32(args.phout, hv) - jitter.func_ret_stdcall(ret_ad, 0) - - -def kernel32_RtlMoveMemory(jitter): - ret_ad, args = jitter.func_args_stdcall(["ad_dst", "ad_src", "m_len"]) - data = jitter.vm.get_mem(args.ad_src, args.m_len) - jitter.vm.set_mem(args.ad_dst, data) - jitter.func_ret_stdcall(ret_ad, 0) - - -def ntdll_RtlAnsiCharToUnicodeChar(jitter): - ret_ad, args = jitter.func_args_stdcall(['ad_ad_ch']) - ad_ch = jitter.vm.get_u32(args.ad_ad_ch) - ch = ord(jitter.vm.get_mem(ad_ch, 1)) - jitter.vm.set_u32(args.ad_ad_ch, ad_ch + 1) - jitter.func_ret_stdcall(ret_ad, ch) - - -def ntdll_RtlFindCharInUnicodeString(jitter): - ret_ad, args = jitter.func_args_stdcall(["flags", "main_str_ad", - "search_chars_ad", "pos_ad"]) - - if args.flags != 0: - raise ValueError('unk flags') - - ml1, ml2, mptra = struct.unpack('HHL', - jitter.vm.get_mem(args.main_str_ad, 8)) - sl1, sl2, sptra = struct.unpack( - 'HHL', jitter.vm.get_mem(args.search_chars_ad, 8)) - main_data = jitter.vm.get_mem(mptra, ml1)[:-1] - search_data = jitter.vm.get_mem(sptra, sl1)[:-1] - - pos = None - for i, c in enumerate(main_data): - for s in search_data: - if s == c: - pos = i - break - if pos: - break - if pos is None: - ret = 0xC0000225 - jitter.vm.set_u32(args.pos_ad, 0) - else: - ret = 0 - jitter.vm.set_u32(args.pos_ad, pos) - - jitter.func_ret_stdcall(ret_ad, ret) - - -def ntdll_RtlComputeCrc32(jitter): - ret_ad, args = jitter.func_args_stdcall(["dwinit", "pdata", "ilen"]) - data = jitter.vm.get_mem(args.pdata, args.ilen) - crc_r = crc32(data, args.dwinit) - jitter.func_ret_stdcall(ret_ad, crc_r) - - -def ntdll_RtlExtendedIntegerMultiply(jitter): - ret_ad, args = jitter.func_args_stdcall(['multiplicand_low', - 'multiplicand_high', - 'multiplier']) - a = (args.multiplicand_high << 32) + args.multiplicand_low - a = a * args.multiplier - jitter.func_ret_stdcall(ret_ad, a & 0xffffffff, (a >> 32) & 0xffffffff) - - -def ntdll_RtlLargeIntegerAdd(jitter): - ret_ad, args = jitter.func_args_stdcall(['a_low', 'a_high', - 'b_low', 'b_high']) - a = (args.a_high << 32) + args.a_low + (args.b_high << 32) + args.b_low - jitter.func_ret_stdcall(ret_ad, a & 0xffffffff, (a >> 32) & 0xffffffff) - - -def ntdll_RtlLargeIntegerShiftRight(jitter): - ret_ad, args = jitter.func_args_stdcall(['a_low', 'a_high', 's_count']) - a = ((args.a_high << 32) + args.a_low) >> args.s_count - jitter.func_ret_stdcall(ret_ad, a & 0xffffffff, (a >> 32) & 0xffffffff) - - -def ntdll_RtlEnlargedUnsignedMultiply(jitter): - ret_ad, args = jitter.func_args_stdcall(['a', 'b']) - a = args.a * args.b - jitter.func_ret_stdcall(ret_ad, a & 0xffffffff, (a >> 32) & 0xffffffff) - - -def ntdll_RtlLargeIntegerSubtract(jitter): - ret_ad, args = jitter.func_args_stdcall(['a_low', 'a_high', - 'b_low', 'b_high']) - a = (args.a_high << 32) + args.a_low - (args.b_high << 32) + args.b_low - jitter.func_ret_stdcall(ret_ad, a & 0xffffffff, (a >> 32) & 0xffffffff) - - -def ntdll_RtlCompareMemory(jitter): - ret_ad, args = jitter.func_args_stdcall(['ad1', 'ad2', 'm_len']) - data1 = jitter.vm.get_mem(args.ad1, args.m_len) - data2 = jitter.vm.get_mem(args.ad2, args.m_len) - - i = 0 - while data1[i] == data2[i]: - i += 1 - if i >= args.m_len: - break - - jitter.func_ret_stdcall(ret_ad, i) - - -def user32_GetMessagePos(jitter): - ret_ad, _ = jitter.func_args_stdcall(0) - jitter.func_ret_stdcall(ret_ad, 0x00110022) - - -def kernel32_Sleep(jitter): - ret_ad, _ = jitter.func_args_stdcall(['t']) - jitter.func_ret_stdcall(ret_ad, 0) - - -def ntdll_ZwUnmapViewOfSection(jitter): - ret_ad, _ = jitter.func_args_stdcall(['h', 'ad']) - jitter.func_ret_stdcall(ret_ad, 0) - - -def kernel32_IsBadReadPtr(jitter): - ret_ad, _ = jitter.func_args_stdcall(['lp', 'ucb']) - jitter.func_ret_stdcall(ret_ad, 0) - - -def ntoskrnl_KeInitializeEvent(jitter): - ret_ad, args = jitter.func_args_stdcall(['my_event', 'my_type', - 'my_state']) - jitter.vm.set_u32(args.my_event, winobjs.win_event_num) - winobjs.win_event_num += 1 - - jitter.func_ret_stdcall(ret_ad, 0) - - -def ntoskrnl_RtlGetVersion(jitter): - ret_ad, args = jitter.func_args_stdcall(['ptr_version']) - - s = struct.pack("IIIII", - 0x114, # struct size - 0x5, # maj vers - 0x2, # min vers - 0x666, # build nbr - 0x2, # platform id - ) + jitter.set_str_unic("Service pack 4") - - jitter.vm.set_mem(args.ptr_version, s) - jitter.func_ret_stdcall(ret_ad, 0) - - -def ntoskrnl_RtlVerifyVersionInfo(jitter): - ret_ad, args = jitter.func_args_stdcall(['ptr_version']) - - s = jitter.vm.get_mem(args.ptr_version, 0x5 * 4) - s_size, s_majv, s_minv, s_buildn, s_platform = struct.unpack('IIIII', s) - raise NotImplementedError("Untested case") - # jitter.vm.set_mem(args.ptr_version, s) - # jitter.func_ret_stdcall(ret_ad, 0) - - -def hal_ExAcquireFastMutex(jitter): - ret_ad, _ = jitter.func_args_stdcall(0) - jitter.func_ret_stdcall(ret_ad, 0) - - -def mdl2ad(n): - return winobjs.nt_mdl_ad + 0x10 * n - - -def ad2mdl(ad): - return ((ad - winobjs.nt_mdl_ad) & 0xFFFFFFFF) // 0x10 - - -def ntoskrnl_IoAllocateMdl(jitter): - ret_ad, args = jitter.func_args_stdcall(["v_addr", "l", "second_buf", - "chargequota", "pirp"]) - m = mdl(args.v_addr, args.l) - winobjs.nt_mdl[winobjs.nt_mdl_cur] = m - jitter.vm.set_mem(mdl2ad(winobjs.nt_mdl_cur), bytes(m)) - jitter.func_ret_stdcall(ret_ad, mdl2ad(winobjs.nt_mdl_cur)) - winobjs.nt_mdl_cur += 1 - - -def ntoskrnl_MmProbeAndLockPages(jitter): - ret_ad, args = jitter.func_args_stdcall(["p_mdl", "access_mode", "op"]) - - if not ad2mdl(args.p_mdl) in winobjs.nt_mdl: - raise ValueError('unk mdl', hex(args.p_mdl)) - jitter.func_ret_stdcall(ret_ad, 0) - - -def ntoskrnl_MmMapLockedPagesSpecifyCache(jitter): - ret_ad, args = jitter.func_args_stdcall(["p_mdl", "access_mode", - "cache_type", "base_ad", - "bugcheckonfailure", - "priority"]) - if not ad2mdl(args.p_mdl) in winobjs.nt_mdl: - raise ValueError('unk mdl', hex(args.p_mdl)) - - jitter.func_ret_stdcall(ret_ad, winobjs.nt_mdl[ad2mdl(args.p_mdl)].ad) - - -def ntoskrnl_MmProtectMdlSystemAddress(jitter): - ret_ad, args = jitter.func_args_stdcall(["p_mdl", "prot"]) - if not ad2mdl(args.p_mdl) in winobjs.nt_mdl: - raise ValueError('unk mdl', hex(args.p_mdl)) - - jitter.func_ret_stdcall(ret_ad, 0) - - -def ntoskrnl_MmUnlockPages(jitter): - ret_ad, args = jitter.func_args_stdcall(['p_mdl']) - if not ad2mdl(args.p_mdl) in winobjs.nt_mdl: - raise ValueError('unk mdl', hex(args.p_mdl)) - - jitter.func_ret_stdcall(ret_ad, 0) - - -def ntoskrnl_IoFreeMdl(jitter): - ret_ad, args = jitter.func_args_stdcall(['p_mdl']) - if not ad2mdl(args.p_mdl) in winobjs.nt_mdl: - raise ValueError('unk mdl', hex(args.p_mdl)) - del(winobjs.nt_mdl[ad2mdl(args.p_mdl)]) - jitter.func_ret_stdcall(ret_ad, 0) - - -def hal_ExReleaseFastMutex(jitter): - ret_ad, _ = jitter.func_args_stdcall(0) - jitter.func_ret_stdcall(ret_ad, 0) - - -def ntoskrnl_RtlQueryRegistryValues(jitter): - ret_ad, args = jitter.func_args_stdcall(["relativeto", "path", - "querytable", - "context", - "environ"]) - # path = get_str_unic(jitter, args.path) - jitter.func_ret_stdcall(ret_ad, 0) - - -def ntoskrnl_ExAllocatePoolWithTagPriority(jitter): - ret_ad, args = jitter.func_args_stdcall(["pool_type", - "nbr_of_bytes", - "tag", "priority"]) - alloc_addr = winobjs.heap.next_addr(args.nbr_of_bytes) - jitter.vm.add_memory_page( - alloc_addr, PAGE_READ | PAGE_WRITE, "\x00" * args.nbr_of_bytes, - "Alloc in %s ret 0x%X" % (whoami(), ret_ad)) - - jitter.func_ret_stdcall(ret_ad, alloc_addr) - - -def my_lstrcmp(jitter, funcname, get_str): - ret_ad, args = jitter.func_args_stdcall(["ptr_str1", "ptr_str2"]) - s1 = get_str(args.ptr_str1) - s2 = get_str(args.ptr_str2) - log.info("Compare %r with %r", s1, s2) - jitter.func_ret_stdcall(ret_ad, cmp(s1, s2)) - -def msvcrt_wcscmp(jitter): - ret_ad, args = jitter.func_args_cdecl(["ptr_str1", "ptr_str2"]) - s1 = jitter.get_str_unic(args.ptr_str1) - s2 = jitter.get_str_unic(args.ptr_str2) - log.debug("%s('%s','%s')" % (whoami(), s1, s2)) - jitter.func_ret_cdecl(ret_ad, cmp(s1, s2)) - -def msvcrt__wcsicmp(jitter): - ret_ad, args = jitter.func_args_cdecl(["ptr_str1", "ptr_str2"]) - s1 = jitter.get_str_unic(args.ptr_str1) - s2 = jitter.get_str_unic(args.ptr_str2) - log.debug("%s('%s','%s')" % (whoami(), s1, s2)) - jitter.func_ret_cdecl(ret_ad, cmp(s1.lower(), s2.lower())) - -def msvcrt__wcsnicmp(jitter): - ret_ad, args = jitter.func_args_cdecl(["ptr_str1", "ptr_str2", "count"]) - s1 = jitter.get_str_unic(args.ptr_str1) - s2 = jitter.get_str_unic(args.ptr_str2) - log.debug("%s('%s','%s',%d)" % (whoami(), s1, s2, args.count)) - jitter.func_ret_cdecl(ret_ad, cmp(s1.lower()[:args.count], s2.lower()[:args.count])) - -def msvcrt_wcsncpy(jitter): - ret_ad, args = jitter.func_args_cdecl(["dst", "src", "n"]) - src = jitter.get_str_unic(args.src) - dst = src[:args.n] - dst += "\x00\x00" * (args.n-len(dst)+1) - jitter.vm.set_mem(args.dst, dst) - jitter.func_ret_cdecl(ret_ad, args.dst) - -def kernel32_lstrcmpA(jitter): - my_lstrcmp(jitter, whoami(), jitter.get_str_ansi) - - -def kernel32_lstrcmpiA(jitter): - my_lstrcmp(jitter, whoami(), lambda x: jitter.get_str_ansi(x).lower()) - - -def kernel32_lstrcmpW(jitter): - my_lstrcmp(jitter, whoami(), jitter.get_str_unic) - - -def kernel32_lstrcmpiW(jitter): - my_lstrcmp(jitter, whoami(), lambda x: jitter.get_str_unic(x).lower()) - - -def kernel32_lstrcmpi(jitter): - my_lstrcmp(jitter, whoami(), lambda x: jitter.get_str_ansi(x).lower()) - - -def my_strcpy(jitter, funcname, get_str, set_str): - ret_ad, args = jitter.func_args_stdcall(["ptr_str1", "ptr_str2"]) - s2 = get_str(args.ptr_str2) - set_str(args.ptr_str1, s2) - log.info("Copy '%r'", s2) - jitter.func_ret_stdcall(ret_ad, args.ptr_str1) - - -def kernel32_lstrcpyW(jitter): - my_strcpy(jitter, whoami(), jitter.get_str_unic, jitter.set_str_unic) - - -def kernel32_lstrcpyA(jitter): - my_strcpy(jitter, whoami(), jitter.get_str_ansi, jitter.set_str_ansi) - - -def kernel32_lstrcpy(jitter): - my_strcpy(jitter, whoami(), jitter.get_str_ansi, jitter.set_str_ansi) - -def msvcrt__mbscpy(jitter): - ret_ad, args = jitter.func_args_cdecl(["ptr_str1", "ptr_str2"]) - s2 = jitter.get_str_unic(args.ptr_str2) - jitter.set_str_unic(args.ptr_str1, s2) - jitter.func_ret_cdecl(ret_ad, args.ptr_str1) - -def msvcrt_wcscpy(jitter): - return msvcrt__mbscpy(jitter) - - -def kernel32_lstrcpyn(jitter): - ret_ad, args = jitter.func_args_stdcall(["ptr_str1", "ptr_str2", - "mlen"]) - s2 = jitter.get_str_ansi(args.ptr_str2) - if len(s2) >= args.mlen: - s2 = s2[:args.mlen - 1] - log.info("Copy '%r'", s2) - jitter.set_str_ansi(args.ptr_str1, s2) - jitter.func_ret_stdcall(ret_ad, args.ptr_str1) - - -def my_strlen(jitter, funcname, get_str, mylen): - ret_ad, args = jitter.func_args_stdcall(["src"]) - src = get_str(args.src) - length = mylen(src) - log.info("Len of '%r' -> 0x%x", src, length) - jitter.func_ret_stdcall(ret_ad, length) - - -def kernel32_lstrlenA(jitter): - my_strlen(jitter, whoami(), jitter.get_str_ansi, len) - - -def kernel32_lstrlenW(jitter): - my_strlen(jitter, whoami(), jitter.get_str_unic, len) - - -def kernel32_lstrlen(jitter): - my_strlen(jitter, whoami(), jitter.get_str_ansi, len) - - -def my_lstrcat(jitter, funcname, get_str, set_str): - ret_ad, args = jitter.func_args_stdcall(['ptr_str1', 'ptr_str2']) - s1 = get_str(args.ptr_str1) - s2 = get_str(args.ptr_str2) - set_str(args.ptr_str1, s1 + s2) - jitter.func_ret_stdcall(ret_ad, args.ptr_str1) - - -def kernel32_lstrcatA(jitter): - my_lstrcat(jitter, whoami(), jitter.get_str_ansi, jitter.set_str_ansi) - - -def kernel32_lstrcatW(jitter): - my_lstrcat(jitter, whoami(), jitter.get_str_unic, jitter.set_str_unic) - - -def kernel32_GetUserGeoID(jitter): - ret_ad, args = jitter.func_args_stdcall(["geoclass"]) - if args.geoclass == 14: - ret = 12345678 - elif args.geoclass == 16: - ret = 55667788 - else: - raise ValueError('unknown geolcass') - jitter.func_ret_stdcall(ret_ad, ret) - - -def my_GetVolumeInformation(jitter, funcname, get_str, set_str): - ret_ad, args = jitter.func_args_stdcall(["lprootpathname", - "lpvolumenamebuffer", - "nvolumenamesize", - "lpvolumeserialnumber", - "lpmaximumcomponentlength", - "lpfilesystemflags", - "lpfilesystemnamebuffer", - "nfilesystemnamesize"]) - if args.lprootpathname: - s = get_str(args.lprootpathname) - log.info('GetVolumeInformation %r', s) - - - if args.lpvolumenamebuffer: - s = "volumename" - s = s[:args.nvolumenamesize] - set_str(args.lpvolumenamebuffer, s) - - if args.lpvolumeserialnumber: - jitter.vm.set_u32(args.lpvolumeserialnumber, 11111111) - if args.lpmaximumcomponentlength: - jitter.vm.set_u32(args.lpmaximumcomponentlength, 0xff) - if args.lpfilesystemflags: - jitter.vm.set_u32(args.lpfilesystemflags, 22222222) - - if args.lpfilesystemnamebuffer: - s = "filesystemname" - s = s[:args.nfilesystemnamesize] - set_str(args.lpfilesystemnamebuffer, s) - - jitter.func_ret_stdcall(ret_ad, 1) - - -def kernel32_GetVolumeInformationA(jitter): - my_GetVolumeInformation( - jitter, whoami(), jitter.get_str_ansi, jitter.set_str_ansi) - - -def kernel32_GetVolumeInformationW(jitter): - my_GetVolumeInformation(jitter, whoami(), jitter.get_str_unic, jitter.set_str_unic) - - -def kernel32_MultiByteToWideChar(jitter): - ret_ad, args = jitter.func_args_stdcall(["codepage", "dwflags", - "lpmultibytestr", - "cbmultibyte", - "lpwidecharstr", - "cchwidechar"]) - src = jitter.get_str_ansi(args.lpmultibytestr) + '\x00' - l = len(src) - - src = "\x00".join(list(src)) - jitter.vm.set_mem(args.lpwidecharstr, src) - jitter.func_ret_stdcall(ret_ad, l) - - -def my_GetEnvironmentVariable(jitter, funcname, get_str, set_str, mylen): - ret_ad, args = jitter.func_args_stdcall(["lpname", "lpbuffer", - "nsize"]) - - s = get_str(args.lpname) - log.info('GetEnvironmentVariable %r', s) - if s in winobjs.env_variables: - v = winobjs.env_variables[s] - else: - log.warning('WARNING unknown env variable %r', s) - v = "" - set_str(args.lpbuffer, v) - jitter.func_ret_stdcall(ret_ad, mylen(v)) - - -def kernel32_GetEnvironmentVariableA(jitter): - my_GetEnvironmentVariable(jitter, whoami(), - jitter.get_str_ansi, - jitter.set_str_ansi, - len) - - -def kernel32_GetEnvironmentVariableW(jitter): - my_GetEnvironmentVariable(jitter, whoami(), - jitter.get_str_unic, - jitter.set_str_ansi, - len) - - -def my_GetSystemDirectory(jitter, funcname, set_str): - ret_ad, args = jitter.func_args_stdcall(["lpbuffer", "usize"]) - s = "c:\\windows\\system32" - l = len(s) - set_str(args.lpbuffer, s) - jitter.func_ret_stdcall(ret_ad, l) - - - -def kernel32_GetSystemDirectoryA(jitter): - my_GetSystemDirectory(jitter, whoami(), jitter.set_str_ansi) - - -def kernel32_GetSystemDirectoryW(jitter): - my_GetSystemDirectory(jitter, whoami(), jitter.set_str_unic) - - -def my_CreateDirectory(jitter, funcname, get_str): - ret_ad, args = jitter.func_args_stdcall(['lppath', 'secattrib']) - # path = get_str(jitter, args.lppath) - jitter.func_ret_stdcall(ret_ad, 0x1337) - - -def kernel32_CreateDirectoryW(jitter): - my_CreateDirectory(jitter, whoami(), jitter.get_str_unic) - - -def kernel32_CreateDirectoryA(jitter): - my_CreateDirectory(jitter, whoami(), jitter.get_str_ansi) - - - -def my_CreateEvent(jitter, funcname, get_str): - ret_ad, args = jitter.func_args_stdcall(["lpeventattributes", - "bmanualreset", - "binitialstate", - "lpname"]) - s = get_str(args.lpname) if args.lpname else None - if not s in winobjs.events_pool: - winobjs.events_pool[s] = (args.bmanualreset, args.binitialstate) - else: - log.warning('WARNING: known event') - jitter.func_ret_stdcall(ret_ad, id(s)) - - -def kernel32_CreateEventA(jitter): - my_CreateEvent(jitter, whoami(), jitter.get_str_ansi) - - -def kernel32_CreateEventW(jitter): - my_CreateEvent(jitter, whoami(), jitter.get_str_unic) - - -def kernel32_WaitForSingleObject(jitter): - ret_ad, args = jitter.func_args_stdcall(['handle', 'dwms']) - - t_start = time.time() * 1000 - found = False - while True: - if args.dwms and args.dwms + t_start > time.time() * 1000: - ret = 0x102 - break - for key, value in viewitems(winobjs.events_pool): - if key != args.handle: - continue - found = True - if value[1] == 1: - ret = 0 - break - if not found: - log.warning('unknown handle') - ret = 0xffffffff - break - time.sleep(0.1) - jitter.func_ret_stdcall(ret_ad, ret) - - -def kernel32_SetFileAttributesA(jitter): - ret_ad, args = jitter.func_args_stdcall(["lpfilename", - "dwfileattributes"]) - if args.lpfilename: - # fname = get_str_ansi(jitter, args.lpfilename) - ret = 1 - else: - ret = 0 - jitter.vm.set_u32(tib_address + 0x34, 3) - - jitter.func_ret_stdcall(ret_ad, ret) - - -def ntdll_RtlMoveMemory(jitter): - ret_ad, args = jitter.func_args_stdcall(["dst", "src", "l"]) - s = jitter.vm.get_mem(args.src, args.l) - jitter.vm.set_mem(args.dst, s) - jitter.func_ret_stdcall(ret_ad, 1) - - -def ntdll_ZwQuerySystemInformation(jitter): - ret_ad, args = jitter.func_args_stdcall(["systeminformationclass", - "systeminformation", - "systeminformationl", - "returnl"]) - if args.systeminformationclass == 2: - # SYSTEM_PERFORMANCE_INFORMATION - o = struct.pack('II', 0x22222222, 0x33333333) - o += "\x00" * args.systeminformationl - o = o[:args.systeminformationl] - jitter.vm.set_mem(args.systeminformation, o) - else: - raise ValueError('unknown sysinfo class', - args.systeminformationclass) - jitter.func_ret_stdcall(ret_ad, 0) - - -def ntdll_ZwProtectVirtualMemory(jitter): - ret_ad, args = jitter.func_args_stdcall(["handle", "lppvoid", - "pdwsize", - "flnewprotect", - "lpfloldprotect"]) - - ad = jitter.vm.get_u32(args.lppvoid) - # dwsize = upck32(jitter.vm.get_mem(args.pdwsize, 4)) - # XXX mask hpart - flnewprotect = args.flnewprotect & 0xFFF - - if not flnewprotect in ACCESS_DICT: - raise ValueError('unknown access dw!') - jitter.vm.set_mem_access(ad, ACCESS_DICT[flnewprotect]) - - # XXX todo real old protect - jitter.vm.set_u32(args.lpfloldprotect, 0x40) - - jitter.func_ret_stdcall(ret_ad, 1) - - -def ntdll_ZwAllocateVirtualMemory(jitter): - ret_ad, args = jitter.func_args_stdcall(["handle", "lppvoid", - "zerobits", "pdwsize", - "alloc_type", - "flprotect"]) - - # ad = upck32(jitter.vm.get_mem(args.lppvoid, 4)) - dwsize = jitter.vm.get_u32(args.pdwsize) - - if not args.flprotect in ACCESS_DICT: - raise ValueError('unknown access dw!') - - alloc_addr = winobjs.heap.next_addr(dwsize) - jitter.vm.add_memory_page( - alloc_addr, ACCESS_DICT[args.flprotect], "\x00" * dwsize, - "Alloc in %s ret 0x%X" % (whoami(), ret_ad)) - jitter.vm.set_u32(args.lppvoid, alloc_addr) - - jitter.func_ret_stdcall(ret_ad, 0) - - -def ntdll_ZwFreeVirtualMemory(jitter): - ret_ad, args = jitter.func_args_stdcall(["handle", "lppvoid", - "pdwsize", "alloc_type"]) - # ad = upck32(jitter.vm.get_mem(args.lppvoid, 4)) - # dwsize = upck32(jitter.vm.get_mem(args.pdwsize, 4)) - jitter.func_ret_stdcall(ret_ad, 0) - - -def ntdll_RtlInitString(jitter): - ret_ad, args = jitter.func_args_stdcall(["pstring", "source"]) - s = jitter.get_str_ansi(args.source) - l = len(s) + 1 - o = struct.pack('HHI', l, l, args.source) - jitter.vm.set_mem(args.pstring, o) - jitter.func_ret_stdcall(ret_ad, 0) - - -def ntdll_RtlAnsiStringToUnicodeString(jitter): - ret_ad, args = jitter.func_args_stdcall(["dst", "src", "alloc_str"]) - - l1, l2, p_src = struct.unpack('HHI', jitter.vm.get_mem(args.src, 0x8)) - s = jitter.get_str_ansi(p_src) - s = ("\x00".join(s + "\x00")) - l = len(s) + 1 - if args.alloc_str: - alloc_addr = winobjs.heap.next_addr(l) - jitter.vm.add_memory_page( - alloc_addr, PAGE_READ | PAGE_WRITE, "\x00" * l, - "Alloc in %s ret 0x%X" % (whoami(), ret_ad)) - else: - alloc_addr = p_src - jitter.vm.set_mem(alloc_addr, s) - o = struct.pack('HHI', l, l, alloc_addr) - jitter.vm.set_mem(args.dst, o) - jitter.func_ret_stdcall(ret_ad, 0) - - -def ntdll_LdrLoadDll(jitter): - ret_ad, args = jitter.func_args_stdcall(["path", "flags", - "modname", "modhandle"]) - - l1, l2, p_src = struct.unpack('HHI', - jitter.vm.get_mem(args.modname, 0x8)) - s = jitter.get_str_unic(p_src) - libname = s.lower() - - ad = winobjs.runtime_dll.lib_get_add_base(libname) - jitter.vm.set_u32(args.modhandle, ad) - - jitter.func_ret_stdcall(ret_ad, 0) - - -def ntdll_RtlFreeUnicodeString(jitter): - ret_ad, args = jitter.func_args_stdcall(['src']) - # l1, l2, p_src = struct.unpack('HHI', jitter.vm.get_mem(args.src, 0x8)) - # s = get_str_unic(jitter, p_src) - jitter.func_ret_stdcall(ret_ad, 0) - - -def ntdll_LdrGetProcedureAddress(jitter): - ret_ad, args = jitter.func_args_stdcall(["libbase", "pfname", - "opt", "p_ad"]) - - l1, l2, p_src = struct.unpack('HHI', jitter.vm.get_mem(args.pfname, 0x8)) - fname = jitter.get_str_ansi(p_src) - - ad = winobjs.runtime_dll.lib_get_add_func(args.libbase, fname) - jitter.add_breakpoint(ad, jitter.handle_lib) - - jitter.vm.set_u32(args.p_ad, ad) - - jitter.func_ret_stdcall(ret_ad, 0) - - -def ntdll_memset(jitter): - ret_ad, args = jitter.func_args_cdecl(['addr', 'c', 'size']) - jitter.vm.set_mem(args.addr, int_to_byte(args.c) * args.size) - jitter.func_ret_cdecl(ret_ad, args.addr) - - -def msvcrt_memset(jitter): - ret_ad, args = jitter.func_args_cdecl(['addr', 'c', 'size']) - jitter.vm.set_mem(args.addr, int_to_byte(args.c) * args.size) - jitter.func_ret_cdecl(ret_ad, args.addr) - -def msvcrt_strrchr(jitter): - ret_ad, args = jitter.func_args_cdecl(['pstr','c']) - s = jitter.get_str_ansi(args.pstr) - c = int_to_byte(args.c) - ret = args.pstr + s.rfind(c) - log.info("strrchr(%x '%s','%s') = %x" % (args.pstr,s,c,ret)) - jitter.func_ret_cdecl(ret_ad, ret) - -def msvcrt_wcsrchr(jitter): - ret_ad, args = jitter.func_args_cdecl(['pstr','c']) - s = jitter.get_str_unic(args.pstr) - c = int_to_byte(args.c) - ret = args.pstr + (s.rfind(c)*2) - log.info("wcsrchr(%x '%s',%s) = %x" % (args.pstr,s,c,ret)) - jitter.func_ret_cdecl(ret_ad, ret) - -def msvcrt_memcpy(jitter): - ret_ad, args = jitter.func_args_cdecl(['dst', 'src', 'size']) - s = jitter.vm.get_mem(args.src, args.size) - jitter.vm.set_mem(args.dst, s) - jitter.func_ret_cdecl(ret_ad, args.dst) - -def msvcrt_realloc(jitter): - ret_ad,args = jitter.func_args_cdecl(['ptr','new_size']) - if args.ptr == 0: - addr = winobjs.heap.alloc(jitter, args.new_size) - else: - addr = winobjs.heap.alloc(jitter, args.new_size) - size = winobjs.heap.get_size(jitter.vm, args.ptr) - data = jitter.vm.get_mem(args.ptr, size) - jitter.vm.set_mem(addr, data) - jitter.func_ret_cdecl(ret_ad, addr) - -def msvcrt_memcmp(jitter): - ret_ad, args = jitter.func_args_cdecl(['ps1', 'ps2', 'size']) - s1 = jitter.vm.get_mem(args.ps1, args.size) - s2 = jitter.vm.get_mem(args.ps2, args.size) - ret = cmp(s1, s2) - jitter.func_ret_cdecl(ret_ad, ret) - - -def shlwapi_PathFindExtensionA(jitter): - ret_ad, args = jitter.func_args_stdcall(['path_ad']) - path = jitter.get_str_ansi(args.path_ad) - i = path.rfind('.') - if i == -1: - i = args.path_ad + len(path) - else: - i = args.path_ad + i - jitter.func_ret_stdcall(ret_ad, i) - - -def shlwapi_PathRemoveFileSpecW(jitter): - ret_ad, args = jitter.func_args_stdcall(['path_ad']) - path = jitter.get_str_unic(args.path_ad) - i = path.rfind('\\') - if i == -1: - i = 0 - jitter.vm.set_mem(args.path_ad + i * 2, "\x00\x00") - path = jitter.get_str_unic(args.path_ad) - jitter.func_ret_stdcall(ret_ad, 1) - - -def shlwapi_PathIsPrefixW(jitter): - ret_ad, args = jitter.func_args_stdcall(['ptr_prefix', 'ptr_path']) - prefix = jitter.get_str_unic(args.ptr_prefix) - path = jitter.get_str_unic(args.ptr_path) - - if path.startswith(prefix): - ret = 1 - else: - ret = 0 - jitter.func_ret_stdcall(ret_ad, ret) - - -def shlwapi_PathIsDirectoryW(jitter): - ret_ad, args = jitter.func_args_stdcall(['ptr_path']) - fname = jitter.get_str_unic(args.ptr_path) - - sb_fname = windows_to_sbpath(fname) - - s = os.stat(sb_fname) - ret = 0 - if stat.S_ISDIR(s.st_mode): - ret = 1 - - jitter.func_ret_cdecl(ret_ad, ret) - - -def shlwapi_PathIsFileSpec(jitter, funcname, get_str): - ret_ad, args = jitter.func_args_stdcall(['path_ad']) - path = get_str(args.path_ad) - if path.find(':') != -1 and path.find('\\') != -1: - ret = 0 - else: - ret = 1 - - jitter.func_ret_stdcall(ret_ad, ret) - - -def shlwapi_PathGetDriveNumber(jitter, funcname, get_str): - ret_ad, args = jitter.func_args_stdcall(['path_ad']) - path = get_str(args.path_ad) - l = ord(path[0].upper()) - ord('A') - if 0 <= l <= 25: - ret = l - else: - ret = -1 - - jitter.func_ret_stdcall(ret_ad, ret) - - -def shlwapi_PathGetDriveNumberA(jitter): - shlwapi_PathGetDriveNumber(jitter, whoami(), jitter.get_str_ansi) - - -def shlwapi_PathGetDriveNumberW(jitter): - shlwapi_PathGetDriveNumber(jitter, whoami(), jitter.get_str_unic) - - -def shlwapi_PathIsFileSpecA(jitter): - shlwapi_PathIsFileSpec(jitter, whoami(), jitter.get_str_ansi) - - -def shlwapi_PathIsFileSpecW(jitter): - shlwapi_PathIsFileSpec(jitter, whoami(), jitter.get_str_unic) - - -def shlwapi_StrToIntA(jitter): - ret_ad, args = jitter.func_args_stdcall(['i_str_ad']) - i_str = jitter.get_str_ansi(args.i_str_ad) - try: - i = int(i_str) - except: - log.warning('WARNING cannot convert int') - i = 0 - - jitter.func_ret_stdcall(ret_ad, i) - - -def shlwapi_StrToInt64Ex(jitter, funcname, get_str): - ret_ad, args = jitter.func_args_stdcall(['pstr', 'flags', 'pret']) - i_str = get_str(args.pstr) - - if args.flags == 0: - r = int(i_str) - elif args.flags == 1: - r = int(i_str, 16) - else: - raise ValueError('cannot decode int') - - jitter.vm.set_mem(args.pret, struct.pack('q', r)) - jitter.func_ret_stdcall(ret_ad, 1) - - -def shlwapi_StrToInt64ExA(jitter): - shlwapi_StrToInt64Ex(jitter, whoami(), jitter.get_str_ansi) - - -def shlwapi_StrToInt64ExW(jitter): - shlwapi_StrToInt64Ex(jitter, whoami(), jitter.get_str_unic) - - -def user32_IsCharAlpha(jitter, funcname, get_str): - ret_ad, args = jitter.func_args_stdcall(["c"]) - try: - c = int_to_byte(args.c) - except: - log.error('bad char %r', args.c) - c = "\x00" - if c.isalpha(jitter): - ret = 1 - else: - ret = 0 - jitter.func_ret_stdcall(ret_ad, ret) - - -def user32_IsCharAlphaA(jitter): - user32_IsCharAlpha(jitter, whoami(), jitter.get_str_ansi) - - -def user32_IsCharAlphaW(jitter): - user32_IsCharAlpha(jitter, whoami(), jitter.get_str_unic) - - -def user32_IsCharAlphaNumericA(jitter): - ret_ad, args = jitter.func_args_stdcall(["c"]) - c = int_to_byte(args.c) - if c.isalnum(jitter): - ret = 1 - else: - ret = 0 - jitter.func_ret_stdcall(ret_ad, ret) - -def get_fmt_args(jitter, fmt, cur_arg, get_str): - return _get_fmt_args(fmt, cur_arg, get_str, jitter.get_arg_n_cdecl) - -def msvcrt_sprintf_str(jitter, get_str): - ret_ad, args = jitter.func_args_cdecl(['string', 'fmt']) - cur_arg, fmt = 2, args.fmt - return ret_ad, args, get_fmt_args(jitter, fmt, cur_arg, get_str) - -def msvcrt_sprintf(jitter): - ret_ad, args, output = msvcrt_sprintf_str(jitter, jitter.get_str_ansi) - ret = len(output) - log.info("sprintf() = '%s'" % (output)) - jitter.vm.set_mem(args.string, output + b'\x00') - return jitter.func_ret_cdecl(ret_ad, ret) - -def msvcrt_swprintf(jitter): - ret_ad, args = jitter.func_args_cdecl(['string', 'fmt']) - cur_arg, fmt = 2, args.fmt - output = get_fmt_args(jitter, fmt, cur_arg, jitter.get_str_unic) - ret = len(output) - log.info("swprintf('%s') = '%s'" % (jitter.get_str_unic(args.fmt), output)) - jitter.vm.set_mem(args.string, output.encode("utf-16le") + b'\x00\x00') - return jitter.func_ret_cdecl(ret_ad, ret) - -def msvcrt_fprintf(jitter): - ret_addr, args = jitter.func_args_cdecl(['file', 'fmt']) - cur_arg, fmt = 2, args.fmt - output = get_fmt_args(jitter, fmt, cur_arg, jitter.get_str_ansi) - ret = len(output) - log.info("fprintf(%x, '%s') = '%s'" % (args.file, jitter.get_str_ansi(args.fmt), output)) - - fd = jitter.vm.get_u32(args.file + 0x10) - if not fd in winobjs.handle_pool: - raise NotImplementedError("Untested case") - winobjs.handle_pool[fd].info.write(output) - - return jitter.func_ret_cdecl(ret_addr, ret) - -def shlwapi_StrCmpNIA(jitter): - ret_ad, args = jitter.func_args_stdcall(["ptr_str1", "ptr_str2", - "nchar"]) - s1 = jitter.get_str_ansi(args.ptr_str1).lower() - s2 = jitter.get_str_ansi(args.ptr_str2).lower() - s1 = s1[:args.nchar] - s2 = s2[:args.nchar] - jitter.func_ret_stdcall(ret_ad, cmp(s1, s2)) - - -def advapi32_RegCreateKeyW(jitter): - ret_ad, args = jitter.func_args_stdcall(["hkey", "subkey", - "phandle"]) - s_subkey = jitter.get_str_unic(args.subkey).lower() if args.subkey else "" - - ret_hkey = 0 - ret = 2 - if args.hkey in winobjs.hkey_handles: - ret = 0 - if s_subkey: - ret_hkey = hash(s_subkey) & 0xffffffff - winobjs.hkey_handles[ret_hkey] = s_subkey - else: - ret_hkey = args.hkey - - log.info("RegCreateKeyW(%x, '%s') = (%x,%d)" % (args.hkey, s_subkey, ret_hkey, ret)) - jitter.vm.set_u32(args.phandle, ret_hkey) - - jitter.func_ret_stdcall(ret_ad, ret) - -def kernel32_GetCurrentDirectoryA(jitter): - ret_ad, args = jitter.func_args_stdcall(["size","buf"]) - dir_ = winobjs.cur_dir - log.debug("GetCurrentDirectory() = '%s'" % dir_) - jitter.vm.set_mem(args.buf, dir_[:args.size-1] + b"\x00") - ret = len(dir_) - if args.size <= len(dir_): - ret += 1 - jitter.func_ret_stdcall(ret_ad, ret) - -def advapi32_RegOpenKeyEx(jitter, funcname, get_str): - ret_ad, args = jitter.func_args_stdcall(["hkey", "subkey", - "reserved", "access", - "phandle"]) - s_subkey = get_str(args.subkey).lower() if args.subkey else "" - - ret_hkey = 0 - ret = 2 - if args.hkey in winobjs.hkey_handles: - if s_subkey: - h = hash(s_subkey) & 0xffffffff - if h in winobjs.hkey_handles: - ret_hkey = h - ret = 0 - else: - log.error('unknown skey') - - jitter.vm.set_u32(args.phandle, ret_hkey) - - jitter.func_ret_stdcall(ret_ad, ret) - - -def advapi32_RegOpenKeyExA(jitter): - advapi32_RegOpenKeyEx(jitter, whoami(), jitter.get_str_ansi) - - -def advapi32_RegOpenKeyExW(jitter): - advapi32_RegOpenKeyEx(jitter, whoami(), jitter.get_str_unic) - - -def advapi32_RegSetValue(jitter, funcname, get_str): - ret_ad, args = jitter.func_args_stdcall(["hkey", "psubkey", - "valuetype", "pvalue", - "vlen"]) - if args.psubkey: - log.info("Subkey %s", get_str(args.psubkey)) - if args.pvalue: - log.info("Value %s", get_str(args.pvalue)) - jitter.func_ret_stdcall(ret_ad, 0) - -def advapi32_RegSetValueEx(jitter, funcname, get_str): - ret_ad, args = jitter.func_args_stdcall(["hkey", "lpvaluename", - "reserved", "dwtype", - "lpdata", "cbData"]) - hkey = winobjs.hkey_handles.get(args.hkey, "unknown HKEY") - value_name = get_str(args.lpvaluename) if args.lpvaluename else "" - data = get_str(args.lpdata) if args.lpdata else "" - log.info("%s('%s','%s'='%s',%x)" % (funcname, hkey, value_name, data, args.dwtype)) - jitter.func_ret_stdcall(ret_ad, 0) - -def advapi32_RegCloseKey(jitter): - ret_ad, args = jitter.func_args_stdcall(["hkey"]) - del winobjs.hkey_handles[args.hkey] - log.info("RegCloseKey(%x)" % args.hkey) - jitter.func_ret_stdcall(ret_ad, 0) - -def advapi32_RegSetValueExA(jitter): - advapi32_RegSetValueEx(jitter, whoami(), jitter.get_str_ansi) - - -def advapi32_RegSetValueExW(jitter): - advapi32_RegOpenKeyEx(jitter, whoami(), jitter.get_str_unic) - - -def advapi32_RegSetValueA(jitter): - advapi32_RegSetValue(jitter, whoami(), jitter.get_str_ansi) - - -def advapi32_RegSetValueW(jitter): - advapi32_RegSetValue(jitter, whoami(), jitter.get_str_unic) - - -def kernel32_GetThreadLocale(jitter): - ret_ad, _ = jitter.func_args_stdcall(0) - jitter.func_ret_stdcall(ret_ad, 0x40c) - -def kernel32_SetCurrentDirectory(jitter, get_str): - ret_ad, args = jitter.func_args_stdcall(['dir']) - dir_ = get_str(args.dir) - log.debug("SetCurrentDirectory('%s') = 1" % dir_) - winobjs.cur_dir = dir_ - jitter.func_ret_stdcall(ret_ad, 1) - -def kernel32_SetCurrentDirectoryW(jitter): - return kernel32_SetCurrentDirectory(jitter, jitter.get_str_unic) - -def kernel32_SetCurrentDirectoryA(jitter): - return kernel32_SetCurrentDirectory(jitter, jitter.get_str_ansi) - -def msvcrt_wcscat(jitter): - ret_ad, args = jitter.func_args_cdecl(['ptr_str1', 'ptr_str2']) - s1 = jitter.get_str_unic(args.ptr_str1) - s2 = jitter.get_str_unic(args.ptr_str2) - log.info("strcat('%s','%s')" % (s1,s2)) - jitter.vm.set_mem(args.ptr_str1, (s1 + s2).encode("utf-16le") + "\x00\x00") - jitter.func_ret_cdecl(ret_ad, args.ptr_str1) - - -def kernel32_GetLocaleInfo(jitter, funcname, set_str): - ret_ad, args = jitter.func_args_stdcall(["localeid", "lctype", - "lplcdata", "cchdata"]) - - buf = None - ret = 0 - if args.localeid == 0x40c: - if args.lctype == 0x3: - buf = "ENGLISH" - buf = buf[:args.cchdata - 1] - set_str(args.lplcdata, buf) - ret = len(buf) - else: - raise ValueError('unimpl localeid') - - jitter.func_ret_stdcall(ret_ad, ret) - - -def kernel32_GetLocaleInfoA(jitter): - kernel32_GetLocaleInfo(jitter, whoami(), jitter.set_str_ansi) - - -def kernel32_GetLocaleInfoW(jitter): - kernel32_GetLocaleInfo(jitter, whoami(), jitter.set_str_unic) - - -def kernel32_TlsAlloc(jitter): - ret_ad, _ = jitter.func_args_stdcall(0) - winobjs.tls_index += 1 - jitter.func_ret_stdcall(ret_ad, winobjs.tls_index) - - -def kernel32_TlsFree(jitter): - ret_ad, _ = jitter.func_args_stdcall(["tlsindex"]) - jitter.func_ret_stdcall(ret_ad, 0) - - -def kernel32_TlsSetValue(jitter): - ret_ad, args = jitter.func_args_stdcall(["tlsindex", "tlsvalue"]) - winobjs.tls_values[args.tlsindex] = args.tlsvalue - jitter.func_ret_stdcall(ret_ad, 1) - - -def kernel32_TlsGetValue(jitter): - ret_ad, args = jitter.func_args_stdcall(["tlsindex"]) - if not args.tlsindex in winobjs.tls_values: - raise ValueError("unknown tls val", repr(args.tlsindex)) - jitter.func_ret_stdcall(ret_ad, winobjs.tls_values[args.tlsindex]) - - -def user32_GetKeyboardType(jitter): - ret_ad, args = jitter.func_args_stdcall(["typeflag"]) - - ret = 0 - if args.typeflag == 0: - ret = 4 - else: - raise ValueError('unimpl keyboard type') - - jitter.func_ret_stdcall(ret_ad, ret) - - -def kernel32_GetStartupInfo(jitter, funcname, set_str): - ret_ad, args = jitter.func_args_stdcall(["ptr"]) - - s = "\x00" * 0x2c + "\x81\x00\x00\x00" + "\x0a" - - jitter.vm.set_mem(args.ptr, s) - jitter.func_ret_stdcall(ret_ad, args.ptr) - - -def kernel32_GetStartupInfoA(jitter): - kernel32_GetStartupInfo(jitter, whoami(), jitter.set_str_ansi) - - -def kernel32_GetStartupInfoW(jitter): - kernel32_GetStartupInfo(jitter, whoami(), jitter.set_str_unic) - - -def kernel32_GetCurrentThreadId(jitter): - ret_ad, _ = jitter.func_args_stdcall(0) - jitter.func_ret_stdcall(ret_ad, 0x113377) - - -def kernel32_InitializeCriticalSection(jitter): - ret_ad, _ = jitter.func_args_stdcall(["lpcritic"]) - jitter.func_ret_stdcall(ret_ad, 0) - - -def user32_GetSystemMetrics(jitter): - ret_ad, args = jitter.func_args_stdcall(["nindex"]) - - ret = 0 - if args.nindex in [0x2a, 0x4a]: - ret = 0 - else: - raise ValueError('unimpl index') - jitter.func_ret_stdcall(ret_ad, ret) - - -def wsock32_WSAStartup(jitter): - ret_ad, args = jitter.func_args_stdcall(["version", "pwsadata"]) - jitter.vm.set_mem(args.pwsadata, "\x01\x01\x02\x02WinSock 2.0\x00") - jitter.func_ret_stdcall(ret_ad, 0) - - -def get_current_filetime(): - """ - Get current filetime - https://msdn.microsoft.com/en-us/library/ms724228 - """ - curtime = winobjs.current_datetime - unixtime = int(time.mktime(curtime.timetuple())) - filetime = (int(unixtime * 1000000 + curtime.microsecond) * 10 + - DATE_1601_TO_1970) - return filetime - - -def unixtime_to_filetime(unixtime): - """ - Convert unixtime to filetime - https://msdn.microsoft.com/en-us/library/ms724228 - """ - return (unixtime * 10000000) + DATE_1601_TO_1970 - - -def filetime_to_unixtime(filetime): - """ - Convert filetime to unixtime - # https://msdn.microsoft.com/en-us/library/ms724228 - """ - return int((filetime - DATE_1601_TO_1970) // 10000000) - - -def datetime_to_systemtime(curtime): - - s = struct.pack('HHHHHHHH', - curtime.year, # year - curtime.month, # month - curtime.weekday(), # dayofweek - curtime.day, # day - curtime.hour, # hour - curtime.minute , # minutes - curtime.second, # seconds - int(curtime.microsecond // 1000), # millisec - ) - return s - - -def kernel32_GetSystemTimeAsFileTime(jitter): - ret_ad, args = jitter.func_args_stdcall(["lpSystemTimeAsFileTime"]) - - current_filetime = get_current_filetime() - filetime = struct.pack('II', - current_filetime & 0xffffffff, - (current_filetime>>32) & 0xffffffff) - - jitter.vm.set_mem(args.lpSystemTimeAsFileTime, filetime) - jitter.func_ret_stdcall(ret_ad, 0) - - -def kernel32_GetLocalTime(jitter): - ret_ad, args = jitter.func_args_stdcall(["lpsystemtime"]) - systemtime = datetime_to_systemtime(winobjs.current_datetime) - jitter.vm.set_mem(args.lpsystemtime, systemtime) - jitter.func_ret_stdcall(ret_ad, args.lpsystemtime) - - -def kernel32_GetSystemTime(jitter): - ret_ad, args = jitter.func_args_stdcall(["lpsystemtime"]) - systemtime = datetime_to_systemtime(winobjs.current_datetime) - jitter.vm.set_mem(args.lpsystemtime, systemtime) - jitter.func_ret_stdcall(ret_ad, args.lpsystemtime) - - -def kernel32_CreateFileMapping(jitter, funcname, get_str): - ret_ad, args = jitter.func_args_stdcall(["hfile", "lpattr", "flprotect", - "dwmaximumsizehigh", - "dwmaximumsizelow", "lpname"]) - - if args.hfile == 0xffffffff: - # Create null mapping - if args.dwmaximumsizehigh: - raise NotImplementedError("Untested case") - hmap = StringIO("\x00" * args.dwmaximumsizelow) - hmap_handle = winobjs.handle_pool.add('filemem', hmap) - - ret = winobjs.handle_pool.add('filemapping', hmap_handle) - else: - if not args.hfile in winobjs.handle_pool: - raise ValueError('unknown handle') - ret = winobjs.handle_pool.add('filemapping', args.hfile) - jitter.func_ret_stdcall(ret_ad, ret) - - -def kernel32_CreateFileMappingA(jitter): - kernel32_CreateFileMapping(jitter, whoami(), jitter.get_str_ansi) - - -def kernel32_CreateFileMappingW(jitter): - kernel32_CreateFileMapping(jitter, whoami(), jitter.get_str_unic) - - -def kernel32_MapViewOfFile(jitter): - ret_ad, args = jitter.func_args_stdcall(["hfile", "flprotect", - "dwfileoffsethigh", - "dwfileoffsetlow", - "length"]) - - if not args.hfile in winobjs.handle_pool: - raise ValueError('unknown handle') - hmap = winobjs.handle_pool[args.hfile] - if not hmap.info in winobjs.handle_pool: - raise ValueError('unknown file handle') - - hfile_o = winobjs.handle_pool[hmap.info] - fd = hfile_o.info - fd.seek((args.dwfileoffsethigh << 32) | args.dwfileoffsetlow) - data = fd.read(args.length) if args.length else fd.read() - length = len(data) - - log.debug('MapViewOfFile len: %x', len(data)) - - if not args.flprotect in ACCESS_DICT: - raise ValueError('unknown access dw!') - - alloc_addr = winobjs.heap.alloc(jitter, len(data)) - jitter.vm.set_mem(alloc_addr, data) - - winobjs.handle_mapped[alloc_addr] = (hfile_o, args.dwfileoffsethigh, - args.dwfileoffsetlow, length) - - jitter.func_ret_stdcall(ret_ad, alloc_addr) - - -def kernel32_UnmapViewOfFile(jitter): - ret_ad, args = jitter.func_args_stdcall(['ad']) - - if not args.ad in winobjs.handle_mapped: - raise NotImplementedError("Untested case") - """ - hfile_o, dwfileoffsethigh, dwfileoffsetlow, length = winobjs.handle_mapped[ad] - off = (dwfileoffsethigh<<32) | dwfileoffsetlow - s = jitter.vm.get_mem(ad, length) - hfile_o.info.seek(off) - hfile_o.info.write(s) - hfile_o.info.close() - """ - jitter.func_ret_stdcall(ret_ad, 1) - - -def kernel32_GetDriveType(jitter, funcname, get_str): - ret_ad, args = jitter.func_args_stdcall(['pathname']) - - p = get_str(args.pathname) - p = p.upper() - - log.debug('Drive: %r', p) - - ret = 0 - if p[0] == "C": - ret = 3 - - jitter.func_ret_stdcall(ret_ad, ret) - - -def kernel32_GetDriveTypeA(jitter): - kernel32_GetDriveType(jitter, whoami(), jitter.get_str_ansi) - - -def kernel32_GetDriveTypeW(jitter): - kernel32_GetDriveType(jitter, whoami(), jitter.get_str_unic) - - -def kernel32_GetDiskFreeSpace(jitter, funcname, get_str): - ret_ad, args = jitter.func_args_stdcall(["lprootpathname", - "lpsectorpercluster", - "lpbytespersector", - "lpnumberoffreeclusters", - "lptotalnumberofclusters"]) - jitter.vm.set_u32(args.lpsectorpercluster, 8) - jitter.vm.set_u32(args.lpbytespersector, 0x200) - jitter.vm.set_u32(args.lpnumberoffreeclusters, 0x222222) - jitter.vm.set_u32(args.lptotalnumberofclusters, 0x333333) - jitter.func_ret_stdcall(ret_ad, 1) - - -def kernel32_GetDiskFreeSpaceA(jitter): - kernel32_GetDiskFreeSpace(jitter, whoami(), jitter.get_str_ansi) - - -def kernel32_GetDiskFreeSpaceW(jitter): - kernel32_GetDiskFreeSpace(jitter, whoami(), jitter.get_str_unic) - - -def kernel32_VirtualQuery(jitter): - ret_ad, args = jitter.func_args_stdcall(["ad", "lpbuffer", "dwl"]) - - all_mem = jitter.vm.get_all_memory() - found = None - for basead, m in viewitems(all_mem): - if basead <= args.ad < basead + m['size']: - found = args.ad, m - break - if not found: - raise ValueError('cannot find mem', hex(args.ad)) - - if args.dwl != 0x1c: - raise ValueError('strange mem len', hex(args.dwl)) - s = struct.pack('IIIIIII', - args.ad, - basead, - ACCESS_DICT_INV[m['access']], - m['size'], - 0x1000, - ACCESS_DICT_INV[m['access']], - 0x01000000) - jitter.vm.set_mem(args.lpbuffer, s) - jitter.func_ret_stdcall(ret_ad, args.dwl) - - -def kernel32_GetProcessAffinityMask(jitter): - ret_ad, args = jitter.func_args_stdcall(["hprocess", - "procaffmask", - "systemaffmask"]) - jitter.vm.set_u32(args.procaffmask, 1) - jitter.vm.set_u32(args.systemaffmask, 1) - jitter.func_ret_stdcall(ret_ad, 1) - - -def msvcrt_rand(jitter): - ret_ad, _ = jitter.func_args_cdecl(0) - jitter.func_ret_stdcall(ret_ad, 0x666) - -def msvcrt_srand(jitter): - ret_ad, _ = jitter.func_args_cdecl(['seed']) - jitter.func_ret_stdcall(ret_ad, 0) - -def msvcrt_wcslen(jitter): - ret_ad, args = jitter.func_args_cdecl(["pwstr"]) - s = jitter.get_str_unic(args.pwstr) - jitter.func_ret_cdecl(ret_ad, len(s)) - -def kernel32_SetFilePointer(jitter): - ret_ad, args = jitter.func_args_stdcall(["hwnd", "dinstance", - "p_dinstance_high", - "movemethod"]) - - if args.hwnd == winobjs.module_cur_hwnd: - pass - elif args.hwnd in winobjs.handle_pool: - pass - else: - raise ValueError('unknown hwnd!') - - # data = None - if args.hwnd in winobjs.files_hwnd: - winobjs.files_hwnd[winobjs.module_cur_hwnd].seek(args.dinstance, args.movemethod) - elif args.hwnd in winobjs.handle_pool: - wh = winobjs.handle_pool[args.hwnd] - wh.info.seek(args.dinstance, args.movemethod) - else: - raise ValueError('unknown filename') - jitter.func_ret_stdcall(ret_ad, args.dinstance) - - -def kernel32_SetFilePointerEx(jitter): - ret_ad, args = jitter.func_args_stdcall(["hwnd", "dinstance_l", - "dinstance_h", - "pnewfileptr", - "movemethod"]) - dinstance = args.dinstance_l | (args.dinstance_h << 32) - if dinstance: - raise ValueError('Not implemented') - if args.pnewfileptr: - raise ValueError('Not implemented') - if args.hwnd == winobjs.module_cur_hwnd: - pass - elif args.hwnd in winobjs.handle_pool: - pass - else: - raise ValueError('unknown hwnd!') - - # data = None - if args.hwnd in winobjs.files_hwnd: - winobjs.files_hwnd[winobjs.module_cur_hwnd].seek(dinstance, args.movemethod) - elif args.hwnd in winobjs.handle_pool: - wh = winobjs.handle_pool[args.hwnd] - wh.info.seek(dinstance, args.movemethod) - else: - raise ValueError('unknown filename') - jitter.func_ret_stdcall(ret_ad, 1) - - -def kernel32_SetEndOfFile(jitter): - ret_ad, args = jitter.func_args_stdcall(['hwnd']) - if args.hwnd in winobjs.handle_pool: - wh = winobjs.handle_pool[args.hwnd] - wh.info.seek(0, 2) - else: - raise ValueError('unknown filename') - jitter.func_ret_stdcall(ret_ad, 1) - - -def kernel32_FlushFileBuffers(jitter): - ret_ad, args = jitter.func_args_stdcall(['hwnd']) - if args.hwnd in winobjs.handle_pool: - pass - else: - raise ValueError('unknown filename') - jitter.func_ret_stdcall(ret_ad, 1) - - -def kernel32_WriteFile(jitter): - ret_ad, args = jitter.func_args_stdcall(["hwnd", "lpbuffer", - "nnumberofbytestowrite", - "lpnumberofbyteswrite", - "lpoverlapped"]) - data = jitter.vm.get_mem(args.lpbuffer, args.nnumberofbytestowrite) - - if args.hwnd == winobjs.module_cur_hwnd: - pass - elif args.hwnd in winobjs.handle_pool: - pass - else: - raise ValueError('unknown hwnd!') - - if args.hwnd in winobjs.files_hwnd: - winobjs.files_hwnd[winobjs.module_cur_hwnd].write(data) - elif args.hwnd in winobjs.handle_pool: - wh = winobjs.handle_pool[args.hwnd] - wh.info.write(data) - else: - raise ValueError('unknown filename') - - if (args.lpnumberofbyteswrite): - jitter.vm.set_u32(args.lpnumberofbyteswrite, len(data)) - - jitter.func_ret_stdcall(ret_ad, 1) - - -def user32_IsCharUpperA(jitter): - ret_ad, args = jitter.func_args_stdcall(["c"]) - ret = 0 if args.c & 0x20 else 1 - jitter.func_ret_stdcall(ret_ad, ret) - - -def user32_IsCharLowerA(jitter): - ret_ad, args = jitter.func_args_stdcall(["c"]) - ret = 1 if args.c & 0x20 else 0 - jitter.func_ret_stdcall(ret_ad, ret) - - -def kernel32_GetSystemDefaultLangID(jitter): - ret_ad, _ = jitter.func_args_stdcall(0) - jitter.func_ret_stdcall(ret_ad, 0x409) # encglish - - -def msvcrt_malloc(jitter): - ret_ad, args = jitter.func_args_cdecl(["msize"]) - addr = winobjs.heap.alloc(jitter, args.msize) - jitter.func_ret_cdecl(ret_ad, addr) - - -def msvcrt_free(jitter): - ret_ad, _ = jitter.func_args_cdecl(["ptr"]) - jitter.func_ret_cdecl(ret_ad, 0) - - -def msvcrt_fseek(jitter): - ret_ad, args = jitter.func_args_cdecl(['stream', 'offset', 'orig']) - fd = jitter.vm.get_u32(args.stream + 0x10) - - if not fd in winobjs.handle_pool: - raise NotImplementedError("Untested case") - o = winobjs.handle_pool[fd] - o.info.seek(args.offset, args.orig) - jitter.func_ret_cdecl(ret_ad, 0) - - -def msvcrt_ftell(jitter): - ret_ad, args = jitter.func_args_cdecl(["stream"]) - fd = jitter.vm.get_u32(args.stream + 0x10) - - if not fd in winobjs.handle_pool: - raise NotImplementedError("Untested case") - o = winobjs.handle_pool[fd] - off = o.info.tell() - jitter.func_ret_cdecl(ret_ad, off) - - -def msvcrt_rewind(jitter): - ret_ad, args = jitter.func_args_cdecl(["stream"]) - fd = jitter.vm.get_u32(args.stream + 0x10) - if not fd in winobjs.handle_pool: - raise NotImplementedError("Untested case") - o = winobjs.handle_pool[fd] - # off = o.info.seek(0, 0) - jitter.func_ret_cdecl(ret_ad, 0) - - -def msvcrt_fread(jitter): - ret_ad, args = jitter.func_args_cdecl(["buf", "size", "nmemb", "stream"]) - fd = jitter.vm.get_u32(args.stream + 0x10) - if not fd in winobjs.handle_pool: - raise NotImplementedError("Untested case") - - data = winobjs.handle_pool[fd].info.read(args.size * args.nmemb) - jitter.vm.set_mem(args.buf, data) - jitter.func_ret_cdecl(ret_ad, args.nmemb) - - -def msvcrt_fwrite(jitter): - ret_ad, args = jitter.func_args_cdecl(["buf", "size", "nmemb", "stream"]) - fd = jitter.vm.get_u32(args.stream + 0x10) - if not fd in winobjs.handle_pool: - raise NotImplementedError("Unknown file handle!") - - data = jitter.vm.get_mem(args.buf, args.size*args.nmemb) - winobjs.handle_pool[fd].info.write(data) - jitter.func_ret_cdecl(ret_ad, args.nmemb) - - -def msvcrt_fclose(jitter): - ret_ad, args = jitter.func_args_cdecl(['stream']) - fd = jitter.vm.get_u32(args.stream + 0x10) - - if not fd in winobjs.handle_pool: - raise NotImplementedError("Untested case") - o = winobjs.handle_pool[fd] - # off = o.info.close() - jitter.func_ret_cdecl(ret_ad, 0) - - -def msvcrt_atexit(jitter): - ret_ad, _ = jitter.func_args_cdecl(["func"]) - jitter.func_ret_cdecl(ret_ad, 0) - - -def user32_MessageBoxA(jitter): - ret_ad, args = jitter.func_args_stdcall(["hwnd", "lptext", - "lpcaption", "utype"]) - - text = jitter.get_str_ansi(args.lptext) - caption = jitter.get_str_ansi(args.lpcaption) - - log.info('Caption: %r Text: %r', caption, text) - - jitter.func_ret_stdcall(ret_ad, 0) - - -def kernel32_myGetTempPath(jitter, set_str): - ret_ad, args = jitter.func_args_stdcall(["l", "buf"]) - l = 'c:\\temp\\' - if len(l) < args.l: - set_str(args.buf, l) - jitter.func_ret_stdcall(ret_ad, len(l)) - - -def kernel32_GetTempPathA(jitter): - kernel32_myGetTempPath(jitter, jitter.set_str_ansi) - - -def kernel32_GetTempPathW(jitter): - kernel32_myGetTempPath(jitter, jitter.set_str_unic) - - -temp_num = 0 - - -def kernel32_GetTempFileNameA(jitter): - global temp_num - ret_ad, args = jitter.func_args_stdcall(["path", "ext", "unique", "buf"]) - - temp_num += 1 - ext = jitter.get_str_ansi(args.ext) if args.ext else 'tmp' - path = jitter.get_str_ansi(args.path) if args.path else "xxx" - fname = path + "\\" + "temp%.4d" % temp_num + "." + ext - jitter.vm.set_mem(args.buf, fname) - - jitter.func_ret_stdcall(ret_ad, 0) - - -class win32_find_data(object): - fileattrib = 0 - creationtime = 0 - lastaccesstime = 0 - lastwritetime = 0 - filesizehigh = 0 - filesizelow = 0 - dwreserved0 = 0 - dwreserved1 = 0x1337beef - cfilename = "" - alternamefilename = "" - - def __init__(self, **kargs): - for k, v in viewitems(kargs): - setattr(self, k, v) - - def toStruct(self): - s = struct.pack('=IQQQIIII', - self.fileattrib, - self.creationtime, - self.lastaccesstime, - self.lastwritetime, - self.filesizehigh, - self.filesizelow, - self.dwreserved0, - self.dwreserved1) - fname = self.cfilename + '\x00' * MAX_PATH - fname = fname[:MAX_PATH] - s += fname - fname = self.alternamefilename + '\x00' * 14 - fname = fname[:14] - s += fname - return s - - -class find_data_mngr(object): - - def __init__(self): - self.patterns = {} - self.flist = [] - # handle number -> (flist index, current index in list) - self.handles = {} - - def add_list(self, pattern, flist): - index = len(self.flist) - self.flist.append(flist) - - self.patterns[pattern] = index - - def findfirst(self, pattern): - assert(pattern in self.patterns) - findex = self.patterns[pattern] - h = len(self.handles) + 1 - self.handles[h] = [findex, 0] - return h - - def findnext(self, h): - assert(h in self.handles) - findex, index = self.handles[h] - if index >= len(self.flist[findex]): - return None - fname = self.flist[findex][index] - self.handles[h][1] += 1 - - return fname - - -def kernel32_FindFirstFileA(jitter): - ret_ad, args = jitter.func_args_stdcall(["pfilepattern", "pfindfiledata"]) - - filepattern = jitter.get_str_ansi(args.pfilepattern) - h = winobjs.find_data.findfirst(filepattern) - - fname = winobjs.find_data.findnext(h) - fdata = win32_find_data(cfilename=fname) - - jitter.vm.set_mem(args.pfindfiledata, fdata.toStruct()) - jitter.func_ret_stdcall(ret_ad, h) - - -def kernel32_FindNextFileA(jitter): - ret_ad, args = jitter.func_args_stdcall(["handle", "pfindfiledata"]) - - fname = winobjs.find_data.findnext(args.handle) - if fname is None: - ret = 0 - else: - ret = 1 - fdata = win32_find_data(cfilename=fname) - jitter.vm.set_mem(args.pfindfiledata, fdata.toStruct()) - - jitter.func_ret_stdcall(ret_ad, ret) - - -def kernel32_GetNativeSystemInfo(jitter): - ret_ad, args = jitter.func_args_stdcall(["sys_ptr"]) - sysinfo = systeminfo() - jitter.vm.set_mem(args.sys_ptr, sysinfo.pack()) - jitter.func_ret_stdcall(ret_ad, 0) - - -def raw2guid(r): - o = struct.unpack('IHHHBBBBBB', r) - return '{%.8X-%.4X-%.4X-%.4X-%.2X%.2X%.2X%.2X%.2X%.2X}' % o - - -digs = string.digits + string.ascii_lowercase - - -def int2base(x, base): - if x < 0: - sign = -1 - elif x == 0: - return '0' - else: - sign = 1 - x *= sign - digits = [] - while x: - digits.append(digs[x % base]) - x /= base - if sign < 0: - digits.append('-') - digits.reverse() - return ''.join(digits) - - -def msvcrt__ultow(jitter): - ret_ad, args = jitter.func_args_cdecl(["value", "p", "radix"]) - - value = args.value & 0xFFFFFFFF - if not args.radix in [10, 16, 20]: - raise ValueError("Not tested") - s = int2base(value, args.radix) - jitter.vm.set_mem(args.p, jitter.set_str_unic(s + "\x00")) - jitter.func_ret_cdecl(ret_ad, args.p) - - -def msvcrt_myfopen(jitter, get_str): - ret_ad, args = jitter.func_args_cdecl(["pfname", "pmode"]) - - fname = get_str(args.pfname) - rw = get_str(args.pmode) - log.info("fopen %r, %r", fname, rw) - - if rw in ['r', 'rb', 'wb+','wb','wt']: - sb_fname = windows_to_sbpath(fname) - h = open(sb_fname, rw) - eax = winobjs.handle_pool.add(sb_fname, h) - dwsize = 0x20 - alloc_addr = winobjs.heap.alloc(jitter, dwsize) - pp = pck32(0x11112222) + pck32(0) + pck32(0) + pck32(0) + pck32(eax) - jitter.vm.set_mem(alloc_addr, pp) - - else: - raise ValueError('unknown access mode %s' % rw) - - jitter.func_ret_cdecl(ret_ad, alloc_addr) - - -def msvcrt__wfopen(jitter): - msvcrt_myfopen(jitter, jitter.get_str_unic) - - -def msvcrt_fopen(jitter): - msvcrt_myfopen(jitter, jitter.get_str_ansi) - - -def msvcrt_strlen(jitter): - ret_ad, args = jitter.func_args_cdecl(["src"]) - - s = jitter.get_str_ansi(args.src) - jitter.func_ret_cdecl(ret_ad, len(s)) diff --git a/miasm2/os_dep/win_api_x86_32_seh.py b/miasm2/os_dep/win_api_x86_32_seh.py deleted file mode 100644 index 27808d83..00000000 --- a/miasm2/os_dep/win_api_x86_32_seh.py +++ /dev/null @@ -1,695 +0,0 @@ -#-*- coding:utf-8 -*- - -# -# Copyright (C) 2011 EADS France, Fabrice Desclaux -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# -import logging -import os -import struct - -from future.utils import viewitems - -from elfesteem import pe_init - -from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE -from miasm2.core.utils import pck32 -import miasm2.arch.x86.regs as x86_regs - -from miasm2.os_dep.win_32_structs import LdrDataEntry, ListEntry, \ - TEB, NT_TIB, PEB, PEB_LDR_DATA, ContextException, \ - EXCEPTION_REGISTRATION_RECORD, EXCEPTION_RECORD - -# Constants Windows -EXCEPTION_BREAKPOINT = 0x80000003 -EXCEPTION_ACCESS_VIOLATION = 0xc0000005 -EXCEPTION_INT_DIVIDE_BY_ZERO = 0xc0000094 -EXCEPTION_PRIV_INSTRUCTION = 0xc0000096 -EXCEPTION_ILLEGAL_INSTRUCTION = 0xc000001d - - -log = logging.getLogger("seh_helper") -console_handler = logging.StreamHandler() -console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) -log.addHandler(console_handler) -log.setLevel(logging.INFO) - -# fs:[0] Page (TIB) -tib_address = 0x7ff70000 -PEB_AD = 0x7ffdf000 -LDR_AD = 0x340000 -DEFAULT_SEH = 0x7ffff000 - -MAX_MODULES = 0x40 - -peb_address = PEB_AD -peb_ldr_data_offset = 0x1ea0 -peb_ldr_data_address = LDR_AD + peb_ldr_data_offset - - -modules_list_offset = 0x1f00 - -InInitializationOrderModuleList_offset = 0x1ee0 -InInitializationOrderModuleList_address = LDR_AD + \ - InInitializationOrderModuleList_offset - -InLoadOrderModuleList_offset = 0x1ee0 + \ - MAX_MODULES * 0x1000 -InLoadOrderModuleList_address = LDR_AD + \ - InLoadOrderModuleList_offset - - -process_environment_address = 0x10000 -process_parameters_address = 0x200000 - -return_from_exception = 0x6eadbeef - - -name2module = [] -main_pe = None -main_pe_name = b"c:\\xxx\\toto.exe" - -MAX_SEH = 5 - - -def build_teb(jitter, teb_address): - """ - Build TEB information using following structure: - - @jitter: jitter instance - @teb_address: the TEB address - """ - - # Only allocate space for ExceptionList/ProcessEnvironmentBlock/Self - jitter.vm.add_memory_page( - teb_address, - PAGE_READ | PAGE_WRITE, - b"\x00" * NT_TIB.get_offset("StackBase"), - "TEB.NtTib.ExceptionList" - ) - jitter.vm.add_memory_page( - teb_address + NT_TIB.get_offset("Self"), - PAGE_READ | PAGE_WRITE, - b"\x00" * (NT_TIB.sizeof() - NT_TIB.get_offset("Self")), - "TEB.NtTib.Self" - ) - jitter.vm.add_memory_page( - teb_address + TEB.get_offset("ProcessEnvironmentBlock"), - PAGE_READ | PAGE_WRITE, - b"\x00" * ( - TEB.get_offset("LastErrorValue") - - TEB.get_offset("ProcessEnvironmentBlock") - ), - "TEB.ProcessEnvironmentBlock" - ) - Teb = TEB(jitter.vm, teb_address) - Teb.NtTib.ExceptionList = DEFAULT_SEH - Teb.NtTib.Self = teb_address - Teb.ProcessEnvironmentBlock = peb_address - -def build_peb(jitter, peb_address): - """ - Build PEB information using following structure: - - @jitter: jitter instance - @peb_address: the PEB address - """ - - if main_pe: - offset, length = peb_address + 8, 4 - else: - offset, length = peb_address + 0xC, 0 - length += 4 - - jitter.vm.add_memory_page( - offset, - PAGE_READ | PAGE_WRITE, - b"\x00" * length, - "PEB" - ) - - Peb = PEB(jitter.vm, peb_address) - if main_pe: - Peb.ImageBaseAddress = main_pe.NThdr.ImageBase - Peb.Ldr = peb_ldr_data_address - - -def build_ldr_data(jitter, modules_info): - """ - Build Loader information using following structure: - - +0x000 Length : Uint4B - +0x004 Initialized : UChar - +0x008 SsHandle : Ptr32 Void - +0x00c InLoadOrderModuleList : _LIST_ENTRY - +0x014 InMemoryOrderModuleList : _LIST_ENTRY - +0x01C InInitializationOrderModuleList : _LIST_ENTRY - # dummy dll base - +0x024 DllBase : Ptr32 Void - - @jitter: jitter instance - @modules_info: LoadedModules instance - - """ - # ldr offset pad - offset = 0xC - addr = LDR_AD + peb_ldr_data_offset - ldrdata = PEB_LDR_DATA(jitter.vm, addr) - - main_pe = modules_info.name2module.get(main_pe_name, None) - ntdll_pe = modules_info.name2module.get("ntdll.dll", None) - - - size = 0 - if main_pe: - size += ListEntry.sizeof() * 2 - main_addr_entry = modules_info.module2entry[main_pe] - if ntdll_pe: - size += ListEntry.sizeof() - ntdll_addr_entry = modules_info.module2entry[ntdll_pe] - - jitter.vm.add_memory_page( - addr + offset, - PAGE_READ | PAGE_WRITE, - b"\x00" * size, - "Loader struct" - ) # (ldrdata.get_size() - offset)) - - if main_pe: - ldrdata.InLoadOrderModuleList.flink = main_addr_entry - ldrdata.InLoadOrderModuleList.blink = 0 - - ldrdata.InMemoryOrderModuleList.flink = main_addr_entry + \ - LdrDataEntry.get_type().get_offset("InMemoryOrderLinks") - ldrdata.InMemoryOrderModuleList.blink = 0 - - if ntdll_pe: - ldrdata.InInitializationOrderModuleList.flink = ntdll_addr_entry + \ - LdrDataEntry.get_type().get_offset("InInitializationOrderLinks") - ldrdata.InInitializationOrderModuleList.blink = 0 - - # Add dummy dll base - jitter.vm.add_memory_page(peb_ldr_data_address + 0x24, - PAGE_READ | PAGE_WRITE, pck32(0), - "Loader struct dummy dllbase") - - -class LoadedModules(object): - - """Class representing modules in memory""" - - def __init__(self): - self.modules = [] - self.name2module = {} - self.module2entry = {} - self.module2name = {} - - def add(self, name, module, module_entry): - """Track a new module - @name: module name (with extension) - @module: module object - @module_entry: address of the module entry - """ - - self.modules.append(module) - self.name2module[name] = module - self.module2entry[module] = module_entry - self.module2name[module] = name - - def __repr__(self): - return "\n".join(str(x) for x in viewitems(self.name2module)) - - -def create_modules_chain(jitter, name2module): - """ - Create the modules entries. Those modules are not linked in this function. - - @jitter: jitter instance - @name2module: dict containing association between name and its pe instance - """ - - modules_info = LoadedModules() - base_addr = LDR_AD + modules_list_offset # XXXX - offset_name = 0x500 - offset_path = 0x600 - - out = "" - for i, (fname, pe_obj) in enumerate(viewitems(name2module), 1): - if pe_obj is None: - log.warning("Unknown module: omitted from link list (%r)", - fname) - continue - addr = base_addr + i * 0x1000 - bpath = fname.replace('/', '\\') - bname_str = os.path.split(fname)[1].lower() - bname_unicode = bname_str.encode("utf-16le") - log.info("Add module %x %r", pe_obj.NThdr.ImageBase, bname_str) - - modules_info.add(bname_str, pe_obj, addr) - - # Allocate a partial LdrDataEntry (0-Flags) - jitter.vm.add_memory_page( - addr, - PAGE_READ | PAGE_WRITE, - b"\x00" * LdrDataEntry.get_offset("Flags"), - "Module info %r" % bname_str - ) - - LdrEntry = LdrDataEntry(jitter.vm, addr) - - LdrEntry.DllBase = pe_obj.NThdr.ImageBase - LdrEntry.EntryPoint = pe_obj.Opthdr.AddressOfEntryPoint - LdrEntry.SizeOfImage = pe_obj.NThdr.sizeofimage - LdrEntry.FullDllName.length = len(bname_unicode) - LdrEntry.FullDllName.maxlength = len(bname_unicode) + 2 - LdrEntry.FullDllName.data = addr + offset_path - LdrEntry.BaseDllName.length = len(bname_unicode) - LdrEntry.BaseDllName.maxlength = len(bname_unicode) + 2 - LdrEntry.BaseDllName.data = addr + offset_name - - jitter.vm.add_memory_page( - addr + offset_name, - PAGE_READ | PAGE_WRITE, - bname_unicode + b"\x00" * 2, - "Module name %r" % bname_str - ) - - bpath_unicode = bpath.encode('utf-16le') - jitter.vm.add_memory_page( - addr + offset_path, - PAGE_READ | PAGE_WRITE, - bpath_unicode + b"\x00" * 2, - "Module path %r" % bname_str - ) - - return modules_info - - -def set_link_list_entry(jitter, loaded_modules, modules_info, offset): - for i, module in enumerate(loaded_modules): - cur_module_entry = modules_info.module2entry[module] - prev_module = loaded_modules[(i - 1) % len(loaded_modules)] - next_module = loaded_modules[(i + 1) % len(loaded_modules)] - prev_module_entry = modules_info.module2entry[prev_module] - next_module_entry = modules_info.module2entry[next_module] - if i == 0: - prev_module_entry = peb_ldr_data_address + 0xC - if i == len(loaded_modules) - 1: - next_module_entry = peb_ldr_data_address + 0xC - jitter.vm.set_mem(cur_module_entry + offset, - (pck32(next_module_entry + offset) + - pck32(prev_module_entry + offset))) - - -def fix_InLoadOrderModuleList(jitter, modules_info): - """Fix InLoadOrderModuleList double link list. First module is the main pe, - then ntdll, kernel32. - - @jitter: the jitter instance - @modules_info: the LoadedModules instance - """ - - log.debug("Fix InLoadOrderModuleList") - main_pe = modules_info.name2module.get(main_pe_name, None) - kernel32_pe = modules_info.name2module.get("kernel32.dll", None) - ntdll_pe = modules_info.name2module.get("ntdll.dll", None) - special_modules = [main_pe, kernel32_pe, ntdll_pe] - if not all(special_modules): - log.warn( - 'No main pe, ldr data will be unconsistant %r', special_modules) - loaded_modules = modules_info.modules - else: - loaded_modules = [module for module in modules_info.modules - if module not in special_modules] - loaded_modules[0:0] = [main_pe] - loaded_modules[1:1] = [ntdll_pe] - loaded_modules[2:2] = [kernel32_pe] - - set_link_list_entry(jitter, loaded_modules, modules_info, 0x0) - - -def fix_InMemoryOrderModuleList(jitter, modules_info): - """Fix InMemoryOrderLinks double link list. First module is the main pe, - then ntdll, kernel32. - - @jitter: the jitter instance - @modules_info: the LoadedModules instance - """ - - log.debug("Fix InMemoryOrderModuleList") - main_pe = modules_info.name2module.get(main_pe_name, None) - kernel32_pe = modules_info.name2module.get("kernel32.dll", None) - ntdll_pe = modules_info.name2module.get("ntdll.dll", None) - special_modules = [main_pe, kernel32_pe, ntdll_pe] - if not all(special_modules): - log.warn('No main pe, ldr data will be unconsistant') - loaded_modules = modules_info.modules - else: - loaded_modules = [module for module in modules_info.modules - if module not in special_modules] - loaded_modules[0:0] = [main_pe] - loaded_modules[1:1] = [ntdll_pe] - loaded_modules[2:2] = [kernel32_pe] - - set_link_list_entry(jitter, loaded_modules, modules_info, 0x8) - - -def fix_InInitializationOrderModuleList(jitter, modules_info): - """Fix InInitializationOrderModuleList double link list. First module is the - ntdll, then kernel32. - - @jitter: the jitter instance - @modules_info: the LoadedModules instance - - """ - - log.debug("Fix InInitializationOrderModuleList") - main_pe = modules_info.name2module.get(main_pe_name, None) - kernel32_pe = modules_info.name2module.get("kernel32.dll", None) - ntdll_pe = modules_info.name2module.get("ntdll.dll", None) - special_modules = [main_pe, kernel32_pe, ntdll_pe] - if not all(special_modules): - log.warn('No main pe, ldr data will be unconsistant') - loaded_modules = modules_info.modules - else: - loaded_modules = [module for module in modules_info.modules - if module not in special_modules] - loaded_modules[0:0] = [ntdll_pe] - loaded_modules[1:1] = [kernel32_pe] - - set_link_list_entry(jitter, loaded_modules, modules_info, 0x10) - - -def add_process_env(jitter): - """ - Build a process environment structure - @jitter: jitter instance - """ - - env_unicode = 'ALLUSEESPROFILE=C:\\Documents and Settings\\All Users\x00'.encode('utf-16le') - env_unicode += b"\x00" * 0x10 - jitter.vm.add_memory_page( - process_environment_address, - PAGE_READ | PAGE_WRITE, - env_unicode, - "Process environment" - ) - jitter.vm.set_mem(process_environment_address, env_unicode) - - -def add_process_parameters(jitter): - """ - Build a process parameters structure - @jitter: jitter instance - """ - - o = b"" - o += pck32(0x1000) # size - o += b"E" * (0x48 - len(o)) - o += pck32(process_environment_address) - jitter.vm.add_memory_page( - process_parameters_address, - PAGE_READ | PAGE_WRITE, - o, "Process parameters" - ) - - -# http://blog.fireeye.com/research/2010/08/download_exec_notes.html -seh_count = 0 - - -def init_seh(jitter): - """ - Build the modules entries and create double links - @jitter: jitter instance - """ - - global seh_count - seh_count = 0 - tib_ad = jitter.cpu.get_segm_base(jitter.cpu.FS) - build_teb(jitter, tib_ad) - build_peb(jitter, peb_address) - - modules_info = create_modules_chain(jitter, name2module) - fix_InLoadOrderModuleList(jitter, modules_info) - fix_InMemoryOrderModuleList(jitter, modules_info) - fix_InInitializationOrderModuleList(jitter, modules_info) - - build_ldr_data(jitter, modules_info) - add_process_env(jitter) - add_process_parameters(jitter) - - - -def regs2ctxt(jitter, context_address): - """ - Build x86_32 cpu context for exception handling - @jitter: jitload instance - """ - - ctxt = ContextException(jitter.vm, context_address) - ctxt.memset(b"\x00") - # ContextFlags - # XXX - - # DRX - ctxt.dr0 = 0 - ctxt.dr1 = 0 - ctxt.dr2 = 0 - ctxt.dr3 = 0 - ctxt.dr4 = 0 - ctxt.dr5 = 0 - - # Float context - # XXX - - # Segment selectors - ctxt.gs = jitter.cpu.GS - ctxt.fs = jitter.cpu.FS - ctxt.es = jitter.cpu.ES - ctxt.ds = jitter.cpu.DS - - # Gpregs - ctxt.edi = jitter.cpu.EDI - ctxt.esi = jitter.cpu.ESI - ctxt.ebx = jitter.cpu.EBX - ctxt.edx = jitter.cpu.EDX - ctxt.ecx = jitter.cpu.ECX - ctxt.eax = jitter.cpu.EAX - ctxt.ebp = jitter.cpu.EBP - ctxt.eip = jitter.cpu.EIP - - # CS - ctxt.cs = jitter.cpu.CS - - # Eflags - # XXX TODO real eflag - - # ESP - ctxt.esp = jitter.cpu.ESP - - # SS - ctxt.ss = jitter.cpu.SS - - -def ctxt2regs(jitter, ctxt_ptr): - """ - Restore x86_32 registers from an exception context - @ctxt: the serialized context - @jitter: jitload instance - """ - - ctxt = ContextException(jitter.vm, ctxt_ptr) - - # Selectors - jitter.cpu.GS = ctxt.gs - jitter.cpu.FS = ctxt.fs - jitter.cpu.ES = ctxt.es - jitter.cpu.DS = ctxt.ds - - # Gpregs - jitter.cpu.EDI = ctxt.edi - jitter.cpu.ESI = ctxt.esi - jitter.cpu.EBX = ctxt.ebx - jitter.cpu.EDX = ctxt.edx - jitter.cpu.ECX = ctxt.ecx - jitter.cpu.EAX = ctxt.eax - jitter.cpu.EBP = ctxt.ebp - jitter.cpu.EIP = ctxt.eip - - # CS - jitter.cpu.CS = ctxt.cs - - # Eflag - # XXX TODO - - # ESP - jitter.cpu.ESP = ctxt.esp - # SS - jitter.cpu.SS = ctxt.ss - - -def fake_seh_handler(jitter, except_code, previous_seh=None): - """ - Create an exception context - @jitter: jitter instance - @except_code: x86 exception code - @previous_seh: (optional) last SEH address when multiple SEH are used - """ - global seh_count - log.warning('Exception at %x %r', jitter.cpu.EIP, seh_count) - seh_count += 1 - - # Get space on stack for exception handling - new_ESP = jitter.cpu.ESP - 0x3c8 - exception_base_address = new_ESP - exception_record_address = exception_base_address + 0xe8 - context_address = exception_base_address + 0xfc - fake_seh_address = exception_base_address + 0x14 - - # Save a CONTEXT - regs2ctxt(jitter, context_address) - jitter.cpu.ESP = new_ESP - - # Get current seh (fs:[0]) - tib = NT_TIB(jitter.vm, tib_address) - seh = tib.ExceptionList.deref - if previous_seh: - # Recursive SEH - while seh.get_addr() != previous_seh: - seh = seh.Next.deref - seh = seh.Next.deref - - log.info( - 'seh_ptr %x { old_seh %r eh %r} ctx_addr %x', - seh.get_addr(), - seh.Next, - seh.Handler, - context_address - ) - - # Write exception_record - except_record = EXCEPTION_RECORD(jitter.vm, exception_record_address) - except_record.memset(b"\x00") - except_record.ExceptionCode = except_code - except_record.ExceptionAddress = jitter.cpu.EIP - - # Prepare the stack - jitter.push_uint32_t(context_address) # Context - jitter.push_uint32_t(seh.get_addr()) # SEH - jitter.push_uint32_t(except_record.get_addr()) # ExceptRecords - jitter.push_uint32_t(return_from_exception) # Ret address - - # Set fake new current seh for exception - log.info("Fake seh ad %x", fake_seh_address) - fake_seh = EXCEPTION_REGISTRATION_RECORD(jitter.vm, fake_seh_address) - fake_seh.Next.val = tib.ExceptionList.val - fake_seh.Handler = 0xaaaaaaaa - tib.ExceptionList.val = fake_seh.get_addr() - dump_seh(jitter) - - # Remove exceptions - jitter.vm.set_exception(0) - jitter.cpu.set_exception(0) - - # XXX set ebx to nul? - jitter.cpu.EBX = 0 - - log.info('Jumping at %r', seh.Handler) - return seh.Handler.val - - -def dump_seh(jitter): - """ - Walk and dump the SEH entries - @jitter: jitter instance - """ - log.info('Dump_seh. Tib_address: %x', tib_address) - cur_seh_ptr = NT_TIB(jitter.vm, tib_address).ExceptionList - loop = 0 - while cur_seh_ptr and jitter.vm.is_mapped(cur_seh_ptr.val, - len(cur_seh_ptr)): - if loop > MAX_SEH: - log.warn("Too many seh, quit") - return - err = cur_seh_ptr.deref - log.info('\t' * (loop + 1) + 'seh_ptr: %x { prev_seh: %r eh %r }', - err.get_addr(), err.Next, err.Handler) - cur_seh_ptr = err.Next - loop += 1 - - -def set_win_fs_0(jitter, fs=4): - """ - Set FS segment selector and create its corresponding segment - @jitter: jitter instance - @fs: segment selector value - """ - jitter.cpu.FS = fs - jitter.cpu.set_segm_base(fs, tib_address) - segm_to_do = set([x86_regs.FS]) - return segm_to_do - - -def return_from_seh(jitter): - """Handle the return from an exception handler - @jitter: jitter instance""" - - # Get object addresses - seh_address = jitter.vm.get_u32(jitter.cpu.ESP + 0x4) - context_address = jitter.vm.get_u32(jitter.cpu.ESP + 0x8) - - # Get registers changes - log.info('Context address: %x', context_address) - status = jitter.cpu.EAX - ctxt2regs(jitter, context_address) - - # Rebuild SEH (remove fake SEH) - tib = NT_TIB(jitter.vm, tib_address) - seh = tib.ExceptionList.deref - log.info('Old seh: %x New seh: %x', seh.get_addr(), seh.Next.val) - tib.ExceptionList.val = seh.Next.val - dump_seh(jitter) - - # Handle returned values - if status == 0x0: - # ExceptionContinueExecution - log.info('SEH continue') - jitter.pc = jitter.cpu.EIP - log.info('Context::Eip: %x', jitter.pc) - - elif status == 1: - # ExceptionContinueSearch - log.info("Delegate to the next SEH handler") - # exception_base_address: context_address - 0xfc - # -> exception_record_address: exception_base_address + 0xe8 - exception_record = EXCEPTION_RECORD(jitter.vm, - context_address - 0xfc + 0xe8) - - pc = fake_seh_handler(jitter, exception_record.ExceptionCode, - seh_address) - jitter.pc = pc - - else: - # https://msdn.microsoft.com/en-us/library/aa260344%28v=vs.60%29.aspx - # But the type _EXCEPTION_DISPOSITION may take 2 others values: - # - ExceptionNestedException = 2 - # - ExceptionCollidedUnwind = 3 - raise ValueError("Valid values are ExceptionContinueExecution and " - "ExceptionContinueSearch") - - # Jitter's breakpoint compliant - return True diff --git a/setup.py b/setup.py index 83e4608e..c52e1f9e 100755 --- a/setup.py +++ b/setup.py @@ -13,117 +13,117 @@ is_win = platform.system() == "Windows" def buil_all(): packages=[ - "miasm2", - "miasm2/arch", - "miasm2/arch/x86", - "miasm2/arch/arm", - "miasm2/arch/aarch64", - "miasm2/arch/msp430", - "miasm2/arch/mep", - "miasm2/arch/sh4", - "miasm2/arch/mips32", - "miasm2/arch/ppc", - "miasm2/core", - "miasm2/expression", - "miasm2/ir", - "miasm2/ir/translators", - "miasm2/analysis", - "miasm2/os_dep", - "miasm2/os_dep/linux", - "miasm2/jitter", - "miasm2/jitter/arch", - "miasm2/jitter/loader", + "miasm", + "miasm/arch", + "miasm/arch/x86", + "miasm/arch/arm", + "miasm/arch/aarch64", + "miasm/arch/msp430", + "miasm/arch/mep", + "miasm/arch/sh4", + "miasm/arch/mips32", + "miasm/arch/ppc", + "miasm/core", + "miasm/expression", + "miasm/ir", + "miasm/ir/translators", + "miasm/analysis", + "miasm/os_dep", + "miasm/os_dep/linux", + "miasm/jitter", + "miasm/jitter/arch", + "miasm/jitter/loader", ] ext_modules_all = [ Extension( - "miasm2.jitter.VmMngr", + "miasm.jitter.VmMngr", [ - "miasm2/jitter/vm_mngr.c", - "miasm2/jitter/vm_mngr_py.c", - "miasm2/jitter/bn.c", + "miasm/jitter/vm_mngr.c", + "miasm/jitter/vm_mngr_py.c", + "miasm/jitter/bn.c", ] ), Extension( - "miasm2.jitter.arch.JitCore_x86", + "miasm.jitter.arch.JitCore_x86", [ - "miasm2/jitter/JitCore.c", - "miasm2/jitter/vm_mngr.c", - "miasm2/jitter/op_semantics.c", - "miasm2/jitter/bn.c", - "miasm2/jitter/arch/JitCore_x86.c" + "miasm/jitter/JitCore.c", + "miasm/jitter/vm_mngr.c", + "miasm/jitter/op_semantics.c", + "miasm/jitter/bn.c", + "miasm/jitter/arch/JitCore_x86.c" ] ), Extension( - "miasm2.jitter.arch.JitCore_arm", + "miasm.jitter.arch.JitCore_arm", [ - "miasm2/jitter/JitCore.c", - "miasm2/jitter/vm_mngr.c", - "miasm2/jitter/op_semantics.c", - "miasm2/jitter/bn.c", - "miasm2/jitter/arch/JitCore_arm.c" + "miasm/jitter/JitCore.c", + "miasm/jitter/vm_mngr.c", + "miasm/jitter/op_semantics.c", + "miasm/jitter/bn.c", + "miasm/jitter/arch/JitCore_arm.c" ] ), Extension( - "miasm2.jitter.arch.JitCore_aarch64", + "miasm.jitter.arch.JitCore_aarch64", [ - "miasm2/jitter/JitCore.c", - "miasm2/jitter/vm_mngr.c", - "miasm2/jitter/op_semantics.c", - "miasm2/jitter/bn.c", - "miasm2/jitter/arch/JitCore_aarch64.c" + "miasm/jitter/JitCore.c", + "miasm/jitter/vm_mngr.c", + "miasm/jitter/op_semantics.c", + "miasm/jitter/bn.c", + "miasm/jitter/arch/JitCore_aarch64.c" ] ), Extension( - "miasm2.jitter.arch.JitCore_msp430", + "miasm.jitter.arch.JitCore_msp430", [ - "miasm2/jitter/JitCore.c", - "miasm2/jitter/vm_mngr.c", - "miasm2/jitter/op_semantics.c", - "miasm2/jitter/bn.c", - "miasm2/jitter/arch/JitCore_msp430.c" + "miasm/jitter/JitCore.c", + "miasm/jitter/vm_mngr.c", + "miasm/jitter/op_semantics.c", + "miasm/jitter/bn.c", + "miasm/jitter/arch/JitCore_msp430.c" ] ), Extension( - "miasm2.jitter.arch.JitCore_mep", + "miasm.jitter.arch.JitCore_mep", [ - "miasm2/jitter/JitCore.c", - "miasm2/jitter/vm_mngr.c", - "miasm2/jitter/bn.c", - "miasm2/jitter/arch/JitCore_mep.c" + "miasm/jitter/JitCore.c", + "miasm/jitter/vm_mngr.c", + "miasm/jitter/bn.c", + "miasm/jitter/arch/JitCore_mep.c" ] ), Extension( - "miasm2.jitter.arch.JitCore_mips32", + "miasm.jitter.arch.JitCore_mips32", [ - "miasm2/jitter/JitCore.c", - "miasm2/jitter/vm_mngr.c", - "miasm2/jitter/op_semantics.c", - "miasm2/jitter/bn.c", - "miasm2/jitter/arch/JitCore_mips32.c" + "miasm/jitter/JitCore.c", + "miasm/jitter/vm_mngr.c", + "miasm/jitter/op_semantics.c", + "miasm/jitter/bn.c", + "miasm/jitter/arch/JitCore_mips32.c" ] ), Extension( - "miasm2.jitter.arch.JitCore_ppc32", + "miasm.jitter.arch.JitCore_ppc32", [ - "miasm2/jitter/JitCore.c", - "miasm2/jitter/vm_mngr.c", - "miasm2/jitter/op_semantics.c", - "miasm2/jitter/bn.c", - "miasm2/jitter/arch/JitCore_ppc32.c" + "miasm/jitter/JitCore.c", + "miasm/jitter/vm_mngr.c", + "miasm/jitter/op_semantics.c", + "miasm/jitter/bn.c", + "miasm/jitter/arch/JitCore_ppc32.c" ], depends=[ - "miasm2/jitter/arch/JitCore_ppc32.h", - "miasm2/jitter/arch/JitCore_ppc32_regs.h", - "miasm2/jitter/bn.h", + "miasm/jitter/arch/JitCore_ppc32.h", + "miasm/jitter/arch/JitCore_ppc32_regs.h", + "miasm/jitter/bn.h", ] ), - Extension("miasm2.jitter.Jitllvm", - ["miasm2/jitter/Jitllvm.c", - "miasm2/jitter/bn.c", + Extension("miasm.jitter.Jitllvm", + ["miasm/jitter/Jitllvm.c", + "miasm/jitter/bn.c", ]), - Extension("miasm2.jitter.Jitgcc", - ["miasm2/jitter/Jitgcc.c", - "miasm2/jitter/bn.c", + Extension("miasm.jitter.Jitgcc", + ["miasm/jitter/Jitgcc.c", + "miasm/jitter/bn.c", ]), ] @@ -143,7 +143,7 @@ def buil_all(): version = "2.0", packages = packages, package_data = { - "miasm2":[ + "miasm":[ "jitter/*.h", "jitter/arch/*.h", ] @@ -199,7 +199,7 @@ def buil_all(): jitters = [] for lib in libs: filename = os.path.basename(lib) - dst = os.path.join(build_base, lib_dirname, "miasm2", "jitter") + dst = os.path.join(build_base, lib_dirname, "miasm", "jitter") if filename not in ["VmMngr.lib", "Jitgcc.lib", "Jitllvm.lib"]: dst = os.path.join(dst, "arch") dst = os.path.join(dst, filename) diff --git a/test/analysis/data_flow.py b/test/analysis/data_flow.py index 288f4bd6..ecca3eac 100644 --- a/test/analysis/data_flow.py +++ b/test/analysis/data_flow.py @@ -3,11 +3,11 @@ from __future__ import print_function from future.utils import viewitems -from miasm2.expression.expression import ExprId, ExprInt, ExprAssign, ExprMem -from miasm2.core.locationdb import LocationDB -from miasm2.analysis.data_flow import * -from miasm2.ir.analysis import ira -from miasm2.ir.ir import IRBlock, AssignBlock +from miasm.expression.expression import ExprId, ExprInt, ExprAssign, ExprMem +from miasm.core.locationdb import LocationDB +from miasm.analysis.data_flow import * +from miasm.ir.analysis import ira +from miasm.ir.ir import IRBlock, AssignBlock loc_db = LocationDB() diff --git a/test/analysis/depgraph.py b/test/analysis/depgraph.py index c229caf2..345f979a 100644 --- a/test/analysis/depgraph.py +++ b/test/analysis/depgraph.py @@ -3,13 +3,13 @@ from __future__ import print_function from future.utils import viewitems -from miasm2.expression.expression import ExprId, ExprInt, ExprAssign, \ +from miasm.expression.expression import ExprId, ExprInt, ExprAssign, \ ExprCond, ExprLoc, LocKey -from miasm2.core.locationdb import LocationDB -from miasm2.ir.analysis import ira -from miasm2.ir.ir import IRBlock, AssignBlock -from miasm2.core.graph import DiGraph -from miasm2.analysis.depgraph import DependencyNode, DependencyGraph +from miasm.core.locationdb import LocationDB +from miasm.ir.analysis import ira +from miasm.ir.ir import IRBlock, AssignBlock +from miasm.core.graph import DiGraph +from miasm.analysis.depgraph import DependencyNode, DependencyGraph from itertools import count from pdb import pm import re diff --git a/test/analysis/dse.py b/test/analysis/dse.py index 82668ea8..8691551c 100644 --- a/test/analysis/dse.py +++ b/test/analysis/dse.py @@ -4,12 +4,12 @@ from pdb import pm from future.utils import viewitems from elfesteem.strpatchwork import StrPatchwork -from miasm2.core import parse_asm -from miasm2.expression.expression import ExprCompose, ExprOp, ExprInt, ExprId -from miasm2.core.asmblock import asm_resolve_final -from miasm2.analysis.machine import Machine -from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE -from miasm2.analysis.dse import DSEEngine +from miasm.core import parse_asm +from miasm.expression.expression import ExprCompose, ExprOp, ExprInt, ExprId +from miasm.core.asmblock import asm_resolve_final +from miasm.analysis.machine import Machine +from miasm.jitter.csts import PAGE_READ, PAGE_WRITE +from miasm.analysis.dse import DSEEngine class DSETest(object): diff --git a/test/analysis/modularintervals.py b/test/analysis/modularintervals.py index cf286e3a..2d877567 100644 --- a/test/analysis/modularintervals.py +++ b/test/analysis/modularintervals.py @@ -1,10 +1,10 @@ from builtins import range from random import shuffle, seed -from miasm2.core.interval import interval -from miasm2.analysis.modularintervals import ModularIntervals -from miasm2.expression.expression import * -from miasm2.expression.simplifications import expr_simp +from miasm.core.interval import interval +from miasm.analysis.modularintervals import ModularIntervals +from miasm.expression.expression import * +from miasm.expression.simplifications import expr_simp def gen_all_intervals(size): diff --git a/test/analysis/range.py b/test/analysis/range.py index 946d0116..6dc109ac 100644 --- a/test/analysis/range.py +++ b/test/analysis/range.py @@ -1,7 +1,7 @@ from __future__ import print_function -from miasm2.expression.expression import * -from miasm2.analysis.expression_range import expr_range -from miasm2.ir.translators import Translator +from miasm.expression.expression import * +from miasm.analysis.expression_range import expr_range +from miasm.ir.translators import Translator import z3 trans = Translator.to_language("z3") diff --git a/test/analysis/unssa.py b/test/analysis/unssa.py index 42e56246..55ceac9e 100644 --- a/test/analysis/unssa.py +++ b/test/analysis/unssa.py @@ -1,12 +1,12 @@ """ Test cases for dead code elimination""" from future.utils import viewvalues -from miasm2.expression.expression import ExprId, ExprInt, ExprAssign, ExprMem, \ +from miasm.expression.expression import ExprId, ExprInt, ExprAssign, ExprMem, \ ExprCond, ExprLoc -from miasm2.core.locationdb import LocationDB -from miasm2.analysis.simplifier import IRCFGSimplifierSSA -from miasm2.ir.analysis import ira -from miasm2.ir.ir import IRCFG, IRBlock, AssignBlock +from miasm.core.locationdb import LocationDB +from miasm.analysis.simplifier import IRCFGSimplifierSSA +from miasm.ir.analysis import ira +from miasm.ir.ir import IRCFG, IRBlock, AssignBlock loc_db = LocationDB() diff --git a/test/arch/aarch64/arch.py b/test/arch/aarch64/arch.py index 948ee489..70a44053 100644 --- a/test/arch/aarch64/arch.py +++ b/test/arch/aarch64/arch.py @@ -2,9 +2,9 @@ from __future__ import print_function import sys import time from pdb import pm -from miasm2.core.utils import decode_hex -from miasm2.arch.aarch64.arch import * -from miasm2.core.locationdb import LocationDB +from miasm.core.utils import decode_hex +from miasm.arch.aarch64.arch import * +from miasm.core.locationdb import LocationDB loc_db = LocationDB() diff --git a/test/arch/aarch64/unit/asm_test.py b/test/arch/aarch64/unit/asm_test.py index e49a2a62..65a537a0 100644 --- a/test/arch/aarch64/unit/asm_test.py +++ b/test/arch/aarch64/unit/asm_test.py @@ -3,13 +3,13 @@ import os from future.utils import viewitems -from miasm2.arch.aarch64.arch import mn_aarch64, base_expr, variable -from miasm2.core import parse_asm -from miasm2.expression.expression import * -from miasm2.core import asmblock +from miasm.arch.aarch64.arch import mn_aarch64, base_expr, variable +from miasm.core import parse_asm +from miasm.expression.expression import * +from miasm.core import asmblock from elfesteem.strpatchwork import StrPatchwork -from miasm2.analysis.machine import Machine -from miasm2.jitter.csts import * +from miasm.analysis.machine import Machine +from miasm.jitter.csts import * reg_and_id = dict(mn_aarch64.regs.all_regs_ids_byname) diff --git a/test/arch/arm/arch.py b/test/arch/arm/arch.py index bf2b1a02..c8f2d433 100644 --- a/test/arch/arm/arch.py +++ b/test/arch/arm/arch.py @@ -1,9 +1,9 @@ from __future__ import print_function import time -from miasm2.core.utils import decode_hex, encode_hex -from miasm2.arch.arm.arch import * -from miasm2.core.locationdb import LocationDB +from miasm.core.utils import decode_hex, encode_hex +from miasm.arch.arm.arch import * +from miasm.core.locationdb import LocationDB from pdb import pm diff --git a/test/arch/arm/sem.py b/test/arch/arm/sem.py index 9c19431b..f38d48e9 100755 --- a/test/arch/arm/sem.py +++ b/test/arch/arm/sem.py @@ -7,12 +7,12 @@ import logging from future.utils import viewitems -from miasm2.ir.symbexec import SymbolicExecutionEngine -from miasm2.arch.arm.arch import mn_arm as mn -from miasm2.arch.arm.sem import ir_arml as ir_arch -from miasm2.arch.arm.regs import * -from miasm2.expression.expression import * -from miasm2.core.locationdb import LocationDB +from miasm.ir.symbexec import SymbolicExecutionEngine +from miasm.arch.arm.arch import mn_arm as mn +from miasm.arch.arm.sem import ir_arml as ir_arch +from miasm.arch.arm.regs import * +from miasm.expression.expression import * +from miasm.core.locationdb import LocationDB from pdb import pm logging.getLogger('cpuhelper').setLevel(logging.ERROR) diff --git a/test/arch/mep/asm/test_asm.py b/test/arch/mep/asm/test_asm.py index 217def86..7762669a 100644 --- a/test/arch/mep/asm/test_asm.py +++ b/test/arch/mep/asm/test_asm.py @@ -2,8 +2,8 @@ # Guillaume Valadon from __future__ import print_function -from miasm2.core.utils import decode_hex, encode_hex -from miasm2.arch.mep.arch import mn_mep +from miasm.core.utils import decode_hex, encode_hex +from miasm.arch.mep.arch import mn_mep class TestMisc(object): diff --git a/test/arch/mep/asm/ut_helpers_asm.py b/test/arch/mep/asm/ut_helpers_asm.py index 26520787..9f6dc5c2 100644 --- a/test/arch/mep/asm/ut_helpers_asm.py +++ b/test/arch/mep/asm/ut_helpers_asm.py @@ -5,11 +5,11 @@ from __future__ import print_function from builtins import range -from miasm2.core.utils import decode_hex, encode_hex -from miasm2.arch.mep.arch import mn_mep -from miasm2.core.cpu import Disasm_Exception -from miasm2.core.locationdb import LocationDB -from miasm2.expression.expression import ExprId, ExprInt, ExprLoc +from miasm.core.utils import decode_hex, encode_hex +from miasm.arch.mep.arch import mn_mep +from miasm.core.cpu import Disasm_Exception +from miasm.core.locationdb import LocationDB +from miasm.expression.expression import ExprId, ExprInt, ExprLoc import re diff --git a/test/arch/mep/ir/test_arithmetic.py b/test/arch/mep/ir/test_arithmetic.py index 2e0dbf32..d404f51c 100644 --- a/test/arch/mep/ir/test_arithmetic.py +++ b/test/arch/mep/ir/test_arithmetic.py @@ -3,7 +3,7 @@ from ut_helpers_ir import exec_instruction -from miasm2.expression.expression import ExprId, ExprInt, ExprCond, ExprOp +from miasm.expression.expression import ExprId, ExprInt, ExprCond, ExprOp class TestArithmetic(object): diff --git a/test/arch/mep/ir/test_bitmanipulation.py b/test/arch/mep/ir/test_bitmanipulation.py index f4ea2f29..6ec200c5 100644 --- a/test/arch/mep/ir/test_bitmanipulation.py +++ b/test/arch/mep/ir/test_bitmanipulation.py @@ -3,7 +3,7 @@ from ut_helpers_ir import exec_instruction -from miasm2.expression.expression import ExprId, ExprInt, ExprMem +from miasm.expression.expression import ExprId, ExprInt, ExprMem class TestBitManipulation(object): diff --git a/test/arch/mep/ir/test_branchjump.py b/test/arch/mep/ir/test_branchjump.py index 7e0953fd..828b172f 100644 --- a/test/arch/mep/ir/test_branchjump.py +++ b/test/arch/mep/ir/test_branchjump.py @@ -3,7 +3,7 @@ from ut_helpers_ir import exec_instruction -from miasm2.expression.expression import ExprId, ExprInt +from miasm.expression.expression import ExprId, ExprInt class TestBranchJump(object): diff --git a/test/arch/mep/ir/test_control.py b/test/arch/mep/ir/test_control.py index 92dcb371..04c8b4d0 100644 --- a/test/arch/mep/ir/test_control.py +++ b/test/arch/mep/ir/test_control.py @@ -3,7 +3,7 @@ from ut_helpers_ir import exec_instruction -from miasm2.expression.expression import ExprId, ExprInt, ExprCond, ExprOp +from miasm.expression.expression import ExprId, ExprInt, ExprCond, ExprOp class TestControl(object): diff --git a/test/arch/mep/ir/test_coprocessor.py b/test/arch/mep/ir/test_coprocessor.py index e9829c08..bd8fd39c 100644 --- a/test/arch/mep/ir/test_coprocessor.py +++ b/test/arch/mep/ir/test_coprocessor.py @@ -3,7 +3,7 @@ from ut_helpers_ir import exec_instruction -from miasm2.expression.expression import ExprId, ExprMem, ExprInt +from miasm.expression.expression import ExprId, ExprMem, ExprInt class TestCoprocessor(object): diff --git a/test/arch/mep/ir/test_debug.py b/test/arch/mep/ir/test_debug.py index b25e3a19..0c1026de 100644 --- a/test/arch/mep/ir/test_debug.py +++ b/test/arch/mep/ir/test_debug.py @@ -3,7 +3,7 @@ from ut_helpers_ir import exec_instruction -from miasm2.expression.expression import ExprId, ExprInt, ExprCond, ExprOp +from miasm.expression.expression import ExprId, ExprInt, ExprCond, ExprOp class TestDebug(object): diff --git a/test/arch/mep/ir/test_divide.py b/test/arch/mep/ir/test_divide.py index e3e4cb99..424a9876 100644 --- a/test/arch/mep/ir/test_divide.py +++ b/test/arch/mep/ir/test_divide.py @@ -3,8 +3,8 @@ from ut_helpers_ir import exec_instruction -from miasm2.expression.expression import ExprId, ExprInt, ExprCond, ExprOp -from miasm2.jitter.csts import EXCEPT_DIV_BY_ZERO +from miasm.expression.expression import ExprId, ExprInt, ExprCond, ExprOp +from miasm.jitter.csts import EXCEPT_DIV_BY_ZERO class TestDivide(object): diff --git a/test/arch/mep/ir/test_extension.py b/test/arch/mep/ir/test_extension.py index 10f16ebf..72ad8c22 100644 --- a/test/arch/mep/ir/test_extension.py +++ b/test/arch/mep/ir/test_extension.py @@ -3,7 +3,7 @@ from ut_helpers_ir import exec_instruction -from miasm2.expression.expression import ExprId, ExprMem, ExprInt +from miasm.expression.expression import ExprId, ExprMem, ExprInt class TestExtension(object): diff --git a/test/arch/mep/ir/test_ir.py b/test/arch/mep/ir/test_ir.py index be717db8..be8e24e1 100644 --- a/test/arch/mep/ir/test_ir.py +++ b/test/arch/mep/ir/test_ir.py @@ -3,13 +3,13 @@ from __future__ import print_function -from miasm2.core.utils import decode_hex -from miasm2.arch.mep.arch import mn_mep -from miasm2.arch.mep.regs import regs_init -from miasm2.arch.mep.ira import ir_mepb, ir_a_mepb -from miasm2.expression.expression import ExprId, ExprInt, ExprMem -from miasm2.ir.symbexec import SymbolicExecutionEngine -from miasm2.core.locationdb import LocationDB +from miasm.core.utils import decode_hex +from miasm.arch.mep.arch import mn_mep +from miasm.arch.mep.regs import regs_init +from miasm.arch.mep.ira import ir_mepb, ir_a_mepb +from miasm.expression.expression import ExprId, ExprInt, ExprMem +from miasm.ir.symbexec import SymbolicExecutionEngine +from miasm.core.locationdb import LocationDB class TestMisc(object): diff --git a/test/arch/mep/ir/test_ldz.py b/test/arch/mep/ir/test_ldz.py index 668030c8..f14172b2 100644 --- a/test/arch/mep/ir/test_ldz.py +++ b/test/arch/mep/ir/test_ldz.py @@ -3,7 +3,7 @@ from ut_helpers_ir import exec_instruction -from miasm2.expression.expression import ExprId, ExprInt, ExprCond, ExprOp +from miasm.expression.expression import ExprId, ExprInt, ExprCond, ExprOp class TestLdz(object): diff --git a/test/arch/mep/ir/test_loadstore.py b/test/arch/mep/ir/test_loadstore.py index 22cb4304..87343fcb 100644 --- a/test/arch/mep/ir/test_loadstore.py +++ b/test/arch/mep/ir/test_loadstore.py @@ -3,7 +3,7 @@ from ut_helpers_ir import exec_instruction -from miasm2.expression.expression import ExprId, ExprMem, ExprInt +from miasm.expression.expression import ExprId, ExprMem, ExprInt class TestLoadStore(object): diff --git a/test/arch/mep/ir/test_logical.py b/test/arch/mep/ir/test_logical.py index e78b5488..0e5aef76 100644 --- a/test/arch/mep/ir/test_logical.py +++ b/test/arch/mep/ir/test_logical.py @@ -3,7 +3,7 @@ from ut_helpers_ir import exec_instruction -from miasm2.expression.expression import ExprId, ExprInt, ExprCond, ExprOp +from miasm.expression.expression import ExprId, ExprInt, ExprCond, ExprOp class TestLogical(object): diff --git a/test/arch/mep/ir/test_move.py b/test/arch/mep/ir/test_move.py index 8da7a18a..30af1a74 100644 --- a/test/arch/mep/ir/test_move.py +++ b/test/arch/mep/ir/test_move.py @@ -3,7 +3,7 @@ from ut_helpers_ir import exec_instruction -from miasm2.expression.expression import ExprId, ExprMem, ExprInt +from miasm.expression.expression import ExprId, ExprMem, ExprInt class TestMove(object): diff --git a/test/arch/mep/ir/test_multiply.py b/test/arch/mep/ir/test_multiply.py index 5673994c..065f1a59 100644 --- a/test/arch/mep/ir/test_multiply.py +++ b/test/arch/mep/ir/test_multiply.py @@ -3,7 +3,7 @@ from ut_helpers_ir import exec_instruction -from miasm2.expression.expression import ExprId, ExprInt, ExprCond, ExprOp +from miasm.expression.expression import ExprId, ExprInt, ExprCond, ExprOp class TestMultiply(object): diff --git a/test/arch/mep/ir/test_repeat.py b/test/arch/mep/ir/test_repeat.py index 1e0e2f86..e684ef87 100644 --- a/test/arch/mep/ir/test_repeat.py +++ b/test/arch/mep/ir/test_repeat.py @@ -3,7 +3,7 @@ from ut_helpers_ir import exec_instruction -from miasm2.expression.expression import ExprId, ExprInt, ExprCond, ExprOp +from miasm.expression.expression import ExprId, ExprInt, ExprCond, ExprOp class TestRepeat(object): diff --git a/test/arch/mep/ir/test_shift.py b/test/arch/mep/ir/test_shift.py index 99755ba5..cac48660 100644 --- a/test/arch/mep/ir/test_shift.py +++ b/test/arch/mep/ir/test_shift.py @@ -3,8 +3,8 @@ from ut_helpers_ir import exec_instruction -from miasm2.expression.expression import ExprId, ExprInt, ExprCond, ExprOp -from miasm2.core.cpu import sign_ext +from miasm.expression.expression import ExprId, ExprInt, ExprCond, ExprOp +from miasm.core.cpu import sign_ext class TestShift(object): diff --git a/test/arch/mep/ir/ut_helpers_ir.py b/test/arch/mep/ir/ut_helpers_ir.py index 26eebeda..c5bf36b9 100644 --- a/test/arch/mep/ir/ut_helpers_ir.py +++ b/test/arch/mep/ir/ut_helpers_ir.py @@ -3,16 +3,16 @@ from __future__ import print_function -from miasm2.arch.mep.arch import mn_mep -from miasm2.arch.mep.sem import ir_mepb -from miasm2.arch.mep.regs import regs_init - -from miasm2.ir.symbexec import SymbolicExecutionEngine -from miasm2.core.locationdb import LocationDB -from miasm2.core.utils import Disasm_Exception -from miasm2.ir.ir import AssignBlock -from miasm2.arch.mep.ira import ir_a_mepb -from miasm2.expression.expression import ExprId, ExprInt, ExprOp, ExprMem, \ +from miasm.arch.mep.arch import mn_mep +from miasm.arch.mep.sem import ir_mepb +from miasm.arch.mep.regs import regs_init + +from miasm.ir.symbexec import SymbolicExecutionEngine +from miasm.core.locationdb import LocationDB +from miasm.core.utils import Disasm_Exception +from miasm.ir.ir import AssignBlock +from miasm.arch.mep.ira import ir_a_mepb +from miasm.expression.expression import ExprId, ExprInt, ExprOp, ExprMem, \ ExprAssign, ExprLoc diff --git a/test/arch/mep/jit/ut_helpers_jit.py b/test/arch/mep/jit/ut_helpers_jit.py index 999ead42..0c756e39 100644 --- a/test/arch/mep/jit/ut_helpers_jit.py +++ b/test/arch/mep/jit/ut_helpers_jit.py @@ -3,14 +3,14 @@ from __future__ import print_function -from miasm2.analysis.machine import Machine -from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE +from miasm.analysis.machine import Machine +from miasm.jitter.csts import PAGE_READ, PAGE_WRITE def jit_instructions(mn_str): """JIT instructions and return the jitter object.""" - # Get the miasm2 Machine + # Get the miasm Machine machine = Machine("mepb") mn_mep = machine.mn() diff --git a/test/arch/mips32/arch.py b/test/arch/mips32/arch.py index f71d3ee8..e5e8cff6 100644 --- a/test/arch/mips32/arch.py +++ b/test/arch/mips32/arch.py @@ -2,9 +2,9 @@ from __future__ import print_function import time from pdb import pm -from miasm2.core.utils import decode_hex, encode_hex -from miasm2.core.locationdb import LocationDB -from miasm2.arch.mips32.arch import * +from miasm.core.utils import decode_hex, encode_hex +from miasm.core.locationdb import LocationDB +from miasm.arch.mips32.arch import * loc_db = LocationDB() diff --git a/test/arch/mips32/unit/asm_test.py b/test/arch/mips32/unit/asm_test.py index 7a50b38e..38a2d928 100644 --- a/test/arch/mips32/unit/asm_test.py +++ b/test/arch/mips32/unit/asm_test.py @@ -3,13 +3,13 @@ import os from future.utils import viewitems -from miasm2.arch.mips32.arch import mn_mips32 -from miasm2.core import parse_asm -from miasm2.expression.expression import * -from miasm2.core import asmblock +from miasm.arch.mips32.arch import mn_mips32 +from miasm.core import parse_asm +from miasm.expression.expression import * +from miasm.core import asmblock from elfesteem.strpatchwork import StrPatchwork -from miasm2.analysis.machine import Machine -from miasm2.jitter.csts import * +from miasm.analysis.machine import Machine +from miasm.jitter.csts import * reg_and_id = dict(mn_mips32.regs.all_regs_ids_byname) diff --git a/test/arch/msp430/arch.py b/test/arch/msp430/arch.py index eea87091..bc38c363 100644 --- a/test/arch/msp430/arch.py +++ b/test/arch/msp430/arch.py @@ -2,9 +2,9 @@ from __future__ import print_function import time from pdb import pm -from miasm2.core.utils import decode_hex, encode_hex -from miasm2.arch.msp430.arch import * -from miasm2.core.locationdb import LocationDB +from miasm.core.utils import decode_hex, encode_hex +from miasm.arch.msp430.arch import * +from miasm.core.locationdb import LocationDB loc_db = LocationDB() diff --git a/test/arch/msp430/sem.py b/test/arch/msp430/sem.py index 88aa990d..2aca66ed 100755 --- a/test/arch/msp430/sem.py +++ b/test/arch/msp430/sem.py @@ -7,12 +7,12 @@ import logging from future.utils import viewitems -from miasm2.ir.symbexec import SymbolicExecutionEngine -from miasm2.arch.msp430.arch import mn_msp430 as mn, mode_msp430 as mode -from miasm2.arch.msp430.sem import ir_msp430 as ir_arch -from miasm2.arch.msp430.regs import * -from miasm2.expression.expression import * -from miasm2.core.locationdb import LocationDB +from miasm.ir.symbexec import SymbolicExecutionEngine +from miasm.arch.msp430.arch import mn_msp430 as mn, mode_msp430 as mode +from miasm.arch.msp430.sem import ir_msp430 as ir_arch +from miasm.arch.msp430.regs import * +from miasm.expression.expression import * +from miasm.core.locationdb import LocationDB logging.getLogger('cpuhelper').setLevel(logging.ERROR) EXCLUDE_REGS = set([res, ir_arch().IRDst]) diff --git a/test/arch/sh4/arch.py b/test/arch/sh4/arch.py index f52ed070..0fbc6ba2 100644 --- a/test/arch/sh4/arch.py +++ b/test/arch/sh4/arch.py @@ -2,9 +2,9 @@ from __future__ import print_function import time from pdb import pm from sys import stderr -from miasm2.core.utils import decode_hex, encode_hex -from miasm2.arch.sh4.arch import * -from miasm2.core.locationdb import LocationDB +from miasm.core.utils import decode_hex, encode_hex +from miasm.arch.sh4.arch import * +from miasm.core.locationdb import LocationDB loc_db = LocationDB() diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py index b4cebd28..202ecac5 100644 --- a/test/arch/x86/arch.py +++ b/test/arch/x86/arch.py @@ -2,13 +2,13 @@ from __future__ import print_function import time from pdb import pm -from miasm2.core.utils import decode_hex, encode_hex -import miasm2.expression.expression as m2_expr -from miasm2.arch.x86.arch import mn_x86, deref_mem_ad, \ +from miasm.core.utils import decode_hex, encode_hex +import miasm.expression.expression as m2_expr +from miasm.arch.x86.arch import mn_x86, deref_mem_ad, \ base_expr, rmarg, print_size -from miasm2.arch.x86.sem import ir_x86_16, ir_x86_32, ir_x86_64 -from miasm2.core.bin_stream import bin_stream_str -from miasm2.core.locationdb import LocationDB +from miasm.arch.x86.sem import ir_x86_16, ir_x86_32, ir_x86_64 +from miasm.core.bin_stream import bin_stream_str +from miasm.core.locationdb import LocationDB loc_db = LocationDB() diff --git a/test/arch/x86/qemu/testqemu.py b/test/arch/x86/qemu/testqemu.py index 264a84b9..99d6e6c1 100644 --- a/test/arch/x86/qemu/testqemu.py +++ b/test/arch/x86/qemu/testqemu.py @@ -10,9 +10,9 @@ try: except AttributeError: pass -from miasm2.analysis.sandbox import Sandbox_Linux_x86_32 -from miasm2.jitter.jitload import log_func -from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE +from miasm.analysis.sandbox import Sandbox_Linux_x86_32 +from miasm.jitter.jitload import log_func +from miasm.jitter.csts import PAGE_READ, PAGE_WRITE # Utils def parse_fmt(s): diff --git a/test/arch/x86/qemu/testqemu64.py b/test/arch/x86/qemu/testqemu64.py index 4fe51992..24193d40 100644 --- a/test/arch/x86/qemu/testqemu64.py +++ b/test/arch/x86/qemu/testqemu64.py @@ -10,9 +10,9 @@ try: except AttributeError: pass -from miasm2.analysis.sandbox import Sandbox_Linux_x86_64 -from miasm2.jitter.jitload import log_func -from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE +from miasm.analysis.sandbox import Sandbox_Linux_x86_64 +from miasm.jitter.jitload import log_func +from miasm.jitter.csts import PAGE_READ, PAGE_WRITE # Utils def parse_fmt(s): diff --git a/test/arch/x86/sem.py b/test/arch/x86/sem.py index c0cfc8f2..5109d2b4 100755 --- a/test/arch/x86/sem.py +++ b/test/arch/x86/sem.py @@ -12,14 +12,14 @@ import unittest import logging import copy -from miasm2.ir.symbexec import SymbolicExecutionEngine -from miasm2.arch.x86.arch import mn_x86 as mn -from miasm2.arch.x86.sem import ir_x86_32 as ir_32, ir_x86_64 as ir_64 -from miasm2.arch.x86.regs import * -from miasm2.expression.expression import * -from miasm2.expression.simplifications import expr_simp -from miasm2.core import parse_asm, asmblock -from miasm2.core.locationdb import LocationDB +from miasm.ir.symbexec import SymbolicExecutionEngine +from miasm.arch.x86.arch import mn_x86 as mn +from miasm.arch.x86.sem import ir_x86_32 as ir_32, ir_x86_64 as ir_64 +from miasm.arch.x86.regs import * +from miasm.expression.expression import * +from miasm.expression.simplifications import expr_simp +from miasm.core import parse_asm, asmblock +from miasm.core.locationdb import LocationDB logging.getLogger('cpuhelper').setLevel(logging.ERROR) diff --git a/test/arch/x86/unit/access_xmm.py b/test/arch/x86/unit/access_xmm.py index 8354c30f..65248b2e 100644 --- a/test/arch/x86/unit/access_xmm.py +++ b/test/arch/x86/unit/access_xmm.py @@ -1,7 +1,7 @@ #! /usr/bin/env python2 """Test getter and setter for XMM registers (128 bits)""" -from miasm2.analysis.machine import Machine +from miasm.analysis.machine import Machine # Jitter engine doesn't matter, use the always available 'python' one myjit = Machine("x86_32").jitter("python") diff --git a/test/arch/x86/unit/asm_test.py b/test/arch/x86/unit/asm_test.py index a87fe278..0059f511 100644 --- a/test/arch/x86/unit/asm_test.py +++ b/test/arch/x86/unit/asm_test.py @@ -5,13 +5,13 @@ import os from future.utils import viewitems -from miasm2.arch.x86.arch import mn_x86, base_expr, variable -from miasm2.core import parse_asm -from miasm2.expression.expression import * -from miasm2.core import asmblock +from miasm.arch.x86.arch import mn_x86, base_expr, variable +from miasm.core import parse_asm +from miasm.expression.expression import * +from miasm.core import asmblock from elfesteem.strpatchwork import StrPatchwork -from miasm2.analysis.machine import Machine -from miasm2.jitter.csts import * +from miasm.analysis.machine import Machine +from miasm.jitter.csts import * reg_and_id = dict(mn_x86.regs.all_regs_ids_byname) diff --git a/test/arch/x86/unit/mn_cdq.py b/test/arch/x86/unit/mn_cdq.py index 947b40bb..d015ede9 100644 --- a/test/arch/x86/unit/mn_cdq.py +++ b/test/arch/x86/unit/mn_cdq.py @@ -3,7 +3,7 @@ import sys from asm_test import Asm_Test_16, Asm_Test_32, Asm_Test_64 -from miasm2.core.utils import pck16, pck32 +from miasm.core.utils import pck16, pck32 class Test_CBW_16(Asm_Test_16): diff --git a/test/arch/x86/unit/mn_int.py b/test/arch/x86/unit/mn_int.py index efacb105..8cb8080f 100755 --- a/test/arch/x86/unit/mn_int.py +++ b/test/arch/x86/unit/mn_int.py @@ -1,7 +1,7 @@ #! /usr/bin/env python2 import sys -from miasm2.jitter.csts import EXCEPT_INT_XX +from miasm.jitter.csts import EXCEPT_INT_XX from asm_test import Asm_Test_32 diff --git a/test/arch/x86/unit/mn_pushpop.py b/test/arch/x86/unit/mn_pushpop.py index fedd197b..6dc37b74 100755 --- a/test/arch/x86/unit/mn_pushpop.py +++ b/test/arch/x86/unit/mn_pushpop.py @@ -3,7 +3,7 @@ import sys from asm_test import Asm_Test_16, Asm_Test_32 -from miasm2.core.utils import pck16, pck32 +from miasm.core.utils import pck16, pck32 def init_regs(test): diff --git a/test/arch/x86/unit/mn_seh.py b/test/arch/x86/unit/mn_seh.py index 1fa0900e..8575cc46 100755 --- a/test/arch/x86/unit/mn_seh.py +++ b/test/arch/x86/unit/mn_seh.py @@ -2,10 +2,10 @@ from __future__ import print_function import sys -from miasm2.os_dep.win_api_x86_32_seh import fake_seh_handler, build_teb, \ +from miasm.os_dep.win_api_x86_32_seh import fake_seh_handler, build_teb, \ set_win_fs_0, return_from_exception, EXCEPTION_PRIV_INSTRUCTION, \ return_from_seh, DEFAULT_SEH -from miasm2.os_dep.win_32_structs import ContextException +from miasm.os_dep.win_32_structs import ContextException from asm_test import Asm_Test_32 diff --git a/test/arch/x86/unit/test_asm_x86_64.py b/test/arch/x86/unit/test_asm_x86_64.py index 4e600846..e23f9a19 100644 --- a/test/arch/x86/unit/test_asm_x86_64.py +++ b/test/arch/x86/unit/test_asm_x86_64.py @@ -1,7 +1,7 @@ -from miasm2.core import asmblock -from miasm2.arch.x86 import arch -from miasm2.core import parse_asm -from miasm2.core.interval import interval +from miasm.core import asmblock +from miasm.arch.x86 import arch +from miasm.core import parse_asm +from miasm.core.interval import interval my_mn = arch.mn_x86 diff --git a/test/core/asmblock.py b/test/core/asmblock.py index 48e81e78..e5ccf252 100644 --- a/test/core/asmblock.py +++ b/test/core/asmblock.py @@ -4,14 +4,14 @@ from pdb import pm from future.utils import viewitems -from miasm2.core.utils import decode_hex -from miasm2.analysis.machine import Machine -from miasm2.analysis.binary import Container -from miasm2.core.asmblock import AsmCFG, AsmConstraint, AsmBlock, \ +from miasm.core.utils import decode_hex +from miasm.analysis.machine import Machine +from miasm.analysis.binary import Container +from miasm.core.asmblock import AsmCFG, AsmConstraint, AsmBlock, \ AsmBlockBad, AsmConstraintTo, AsmConstraintNext, \ bbl_simplifier -from miasm2.core.graph import DiGraphSimplifier, MatchGraphJoker -from miasm2.expression.expression import ExprId +from miasm.core.graph import DiGraphSimplifier, MatchGraphJoker +from miasm.expression.expression import ExprId # Initial data: from 'samples/simple_test.bin' data = decode_hex("5589e583ec10837d08007509c745fc01100000eb73837d08017709c745fc02100000eb64837d08057709c745fc03100000eb55837d080774138b450801c083f80e7509c745fc04100000eb3c8b450801c083f80e7509c745fc05100000eb298b450883e03085c07409c745fc06100000eb16837d08427509c745fc07100000eb07c745fc081000008b45fcc9c3") diff --git a/test/core/graph.py b/test/core/graph.py index 484591b7..3db5e523 100644 --- a/test/core/graph.py +++ b/test/core/graph.py @@ -1,5 +1,5 @@ from __future__ import print_function -from miasm2.core.graph import * +from miasm.core.graph import * g = DiGraph() g.add_node('a') diff --git a/test/core/interval.py b/test/core/interval.py index 76c95d66..61f33178 100755 --- a/test/core/interval.py +++ b/test/core/interval.py @@ -2,7 +2,7 @@ #-*- coding:utf-8 -*- from builtins import range -from miasm2.core.interval import * +from miasm.core.interval import * from random import randint from pdb import pm diff --git a/test/core/locationdb.py b/test/core/locationdb.py index 3db760d8..b6d7d8e0 100644 --- a/test/core/locationdb.py +++ b/test/core/locationdb.py @@ -1,5 +1,5 @@ from builtins import str -from miasm2.core.locationdb import LocationDB +from miasm.core.locationdb import LocationDB # Basic tests (LocationDB description) diff --git a/test/core/parse_asm.py b/test/core/parse_asm.py index ade9040d..ab2bca4a 100755 --- a/test/core/parse_asm.py +++ b/test/core/parse_asm.py @@ -8,8 +8,8 @@ import unittest class TestParseAsm(unittest.TestCase): def test_ParseTxt(self): - from miasm2.arch.x86.arch import mn_x86 - from miasm2.core.parse_asm import parse_txt + from miasm.arch.x86.arch import mn_x86 + from miasm.core.parse_asm import parse_txt ASM0 = ''' ; @@ -37,9 +37,9 @@ class TestParseAsm(unittest.TestCase): self.assertRaises(ValueError, parse_txt, mn_x86, 32, ASM1) def test_DirectiveDontSplit(self): - from miasm2.arch.x86.arch import mn_x86 - from miasm2.core.parse_asm import parse_txt - from miasm2.core.asmblock import asm_resolve_final + from miasm.arch.x86.arch import mn_x86 + from miasm.core.parse_asm import parse_txt + from miasm.core.asmblock import asm_resolve_final ASM0 = ''' lbl0: @@ -85,8 +85,8 @@ class TestParseAsm(unittest.TestCase): assert(lbls[5] == lbl2block[lbls[4]].get_next()) def test_DirectiveSplit(self): - from miasm2.arch.x86.arch import mn_x86 - from miasm2.core.parse_asm import parse_txt + from miasm.arch.x86.arch import mn_x86 + from miasm.core.parse_asm import parse_txt ASM0 = ''' lbl0: diff --git a/test/core/sembuilder.py b/test/core/sembuilder.py index 53e9e60e..7c3d578e 100644 --- a/test/core/sembuilder.py +++ b/test/core/sembuilder.py @@ -2,9 +2,9 @@ from __future__ import print_function import inspect from pdb import pm -from miasm2.core.sembuilder import SemBuilder -from miasm2.core.locationdb import LocationDB -import miasm2.expression.expression as m2_expr +from miasm.core.sembuilder import SemBuilder +from miasm.core.locationdb import LocationDB +import miasm.expression.expression as m2_expr diff --git a/test/core/test_types.py b/test/core/test_types.py index e3914185..1b15630c 100755 --- a/test/core/test_types.py +++ b/test/core/test_types.py @@ -1,19 +1,19 @@ #! /usr/bin/env python2 -# miasm2.core.types tests +# miasm.core.types tests from __future__ import print_function from builtins import range import struct -from miasm2.core.utils import int_to_byte -from miasm2.analysis.machine import Machine -from miasm2.core.types import MemStruct, Num, Ptr, Str, \ +from miasm.core.utils import int_to_byte +from miasm.analysis.machine import Machine +from miasm.core.types import MemStruct, Num, Ptr, Str, \ Array, RawStruct, Union, \ BitField, Self, Void, Bits, \ set_allocator, MemUnion, Struct -from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE -from miasm2.os_dep.common import heap +from miasm.jitter.csts import PAGE_READ, PAGE_WRITE +from miasm.os_dep.common import heap # Two structures with some fields class OtherStruct(MemStruct): diff --git a/test/core/utils.py b/test/core/utils.py index 6f69fdf1..0ca64f4e 100755 --- a/test/core/utils.py +++ b/test/core/utils.py @@ -10,7 +10,7 @@ import unittest class TestUtils(unittest.TestCase): def test_boundedDict(self): - from miasm2.core.utils import BoundedDict + from miasm.core.utils import BoundedDict # Use a callback def logger(key): diff --git a/test/expr_type/test_chandler.py b/test/expr_type/test_chandler.py index 92ebd0f2..e6566636 100644 --- a/test/expr_type/test_chandler.py +++ b/test/expr_type/test_chandler.py @@ -9,18 +9,18 @@ from __future__ import print_function from future.utils import viewitems from past.builtins import cmp from builtins import str -from miasm2.expression.expression import ExprInt, ExprId, ExprMem -from miasm2.expression.simplifications import expr_simp +from miasm.expression.expression import ExprInt, ExprId, ExprMem +from miasm.expression.simplifications import expr_simp -from miasm2.core.objc import parse_access -from miasm2.core.objc import ast_get_c_access_expr -from miasm2.core.objc import ExprCToExpr, ExprToAccessC, CHandler +from miasm.core.objc import parse_access +from miasm.core.objc import ast_get_c_access_expr +from miasm.core.objc import ExprCToExpr, ExprToAccessC, CHandler -from miasm2.core.ctypesmngr import CTypeStruct, CTypeUnion, CAstTypes, CTypePtr, CTypeId -from miasm2.core.objc import CTypesManagerNotPacked +from miasm.core.ctypesmngr import CTypeStruct, CTypeUnion, CAstTypes, CTypePtr, CTypeId +from miasm.core.objc import CTypesManagerNotPacked -from miasm2.arch.x86.ctype import CTypeAMD64_unk +from miasm.arch.x86.ctype import CTypeAMD64_unk text_1 = """ diff --git a/test/expression/expr_cmp.py b/test/expression/expr_cmp.py index b238151d..671085f1 100644 --- a/test/expression/expr_cmp.py +++ b/test/expression/expr_cmp.py @@ -2,12 +2,12 @@ # Expression comparison regression tests # # from pdb import pm -from miasm2.expression.expression import ExprInt, expr_is_unsigned_greater,\ +from miasm.expression.expression import ExprInt, expr_is_unsigned_greater,\ expr_is_unsigned_greater_or_equal, expr_is_unsigned_lower,\ expr_is_unsigned_lower_or_equal, expr_is_signed_greater,\ expr_is_signed_greater_or_equal, expr_is_signed_lower, \ expr_is_signed_lower_or_equal, expr_is_equal, expr_is_not_equal -from miasm2.expression.simplifications import expr_simp +from miasm.expression.simplifications import expr_simp int_0 = ExprInt(0, 32) int_1 = ExprInt(1, 32) diff --git a/test/expression/expr_pickle.py b/test/expression/expr_pickle.py index 16b87db7..70778d38 100644 --- a/test/expression/expr_pickle.py +++ b/test/expression/expr_pickle.py @@ -1,6 +1,6 @@ from __future__ import print_function import pickle -from miasm2.expression.expression import ExprInt, ExprAssign, ExprId, \ +from miasm.expression.expression import ExprInt, ExprAssign, ExprId, \ Expr, ExprCompose, ExprMem diff --git a/test/expression/expression.py b/test/expression/expression.py index b8a2642a..3597eae8 100644 --- a/test/expression/expression.py +++ b/test/expression/expression.py @@ -3,8 +3,8 @@ from __future__ import print_function # Expression regression tests # # from pdb import pm -from miasm2.expression.expression import * -from miasm2.expression.expression_helper import * +from miasm.expression.expression import * +from miasm.expression.expression_helper import * # Expression comparison assert(ExprInt(-1, 64) != ExprInt(-2, 64)) diff --git a/test/expression/expression_helper.py b/test/expression/expression_helper.py index 6c6fb2a9..c188215e 100755 --- a/test/expression/expression_helper.py +++ b/test/expression/expression_helper.py @@ -10,8 +10,8 @@ import unittest class TestExpressionExpressionHelper(unittest.TestCase): def test_Variables_Identifier(self): - import miasm2.expression.expression as m2_expr - from miasm2.expression.expression_helper import Variables_Identifier + import miasm.expression.expression as m2_expr + from miasm.expression.expression_helper import Variables_Identifier # Build a complex expression cst = m2_expr.ExprInt(0x100, 16) diff --git a/test/expression/modint.py b/test/expression/modint.py index a833ee80..af80b284 100644 --- a/test/expression/modint.py +++ b/test/expression/modint.py @@ -1,5 +1,6 @@ from __future__ import print_function -from miasm2.expression.modint import * + +from miasm.expression.modint import * a = uint8(0x42) b = uint8(0xFF) diff --git a/test/expression/parser.py b/test/expression/parser.py index d05f8262..48c63753 100644 --- a/test/expression/parser.py +++ b/test/expression/parser.py @@ -1,6 +1,6 @@ from __future__ import print_function -from miasm2.expression.parser import str_to_expr -from miasm2.expression.expression import ExprInt, ExprId, ExprSlice, ExprMem, \ +from miasm.expression.parser import str_to_expr +from miasm.expression.expression import ExprInt, ExprId, ExprSlice, ExprMem, \ ExprCond, ExprCompose, ExprOp, ExprAssign, ExprLoc, LocKey for expr_test in [ExprInt(0x12, 32), diff --git a/test/expression/simplifications.py b/test/expression/simplifications.py index ae9eb1c0..1a22c43d 100644 --- a/test/expression/simplifications.py +++ b/test/expression/simplifications.py @@ -6,11 +6,11 @@ from pdb import pm from argparse import ArgumentParser import logging -from miasm2.expression.expression import * -from miasm2.expression.simplifications import expr_simp, expr_simp_explicit, \ +from miasm.expression.expression import * +from miasm.expression.simplifications import expr_simp, expr_simp_explicit, \ ExpressionSimplifier, log_exprsimp -from miasm2.expression.simplifications_cond import ExprOp_inf_signed, ExprOp_inf_unsigned, ExprOp_equal +from miasm.expression.simplifications_cond import ExprOp_inf_signed, ExprOp_inf_unsigned, ExprOp_equal parser = ArgumentParser("Expression simplification regression tests") parser.add_argument("--z3", action="store_true", help="Enable check against z3") @@ -24,7 +24,7 @@ if args.verbose: # Additional imports and definitions if args.z3: import z3 - from miasm2.ir.translators import Translator + from miasm.ir.translators import Translator trans = Translator.to_language("z3") def check(expr_in, expr_out): diff --git a/test/expression/stp.py b/test/expression/stp.py index 7650bf45..b97b0754 100755 --- a/test/expression/stp.py +++ b/test/expression/stp.py @@ -8,8 +8,8 @@ import unittest class TestIrIr2STP(unittest.TestCase): def test_ExprOp_strcst(self): - from miasm2.expression.expression import ExprInt, ExprOp - from miasm2.ir.translators.translator import Translator + from miasm.expression.expression import ExprInt, ExprOp + from miasm.ir.translators.translator import Translator translator_smt2 = Translator.to_language("smt2") args = [ExprInt(i, 32) for i in range(9)] @@ -23,8 +23,8 @@ class TestIrIr2STP(unittest.TestCase): self.assertRaises(NotImplementedError, translator_smt2.from_expr, ExprOp('X', *args[:1])) def test_ExprSlice_strcst(self): - from miasm2.expression.expression import ExprInt, ExprOp - from miasm2.ir.translators.translator import Translator + from miasm.expression.expression import ExprInt, ExprOp + from miasm.ir.translators.translator import Translator translator_smt2 = Translator.to_language("smt2") args = [ExprInt(i, 32) for i in range(9)] diff --git a/test/ir/ir.py b/test/ir/ir.py index 3dd95c3e..a959a7a7 100644 --- a/test/ir/ir.py +++ b/test/ir/ir.py @@ -1,8 +1,8 @@ from future.utils import viewitems -from miasm2.expression.expression import * -from miasm2.ir.ir import AssignBlock -from miasm2.expression.simplifications import expr_simp +from miasm.expression.expression import * +from miasm.ir.ir import AssignBlock +from miasm.expression.simplifications import expr_simp id_a = ExprId("a", 32) id_b = ExprId("b", 32) diff --git a/test/ir/ir2C.py b/test/ir/ir2C.py index 26683468..c41c98c9 100755 --- a/test/ir/ir2C.py +++ b/test/ir/ir2C.py @@ -3,19 +3,19 @@ from builtins import range import unittest -from miasm2.expression.expression import TOK_EQUAL +from miasm.expression.expression import TOK_EQUAL class TestIrIr2C(unittest.TestCase): def translationTest(self, expr, expected): - from miasm2.ir.translators import Translator + from miasm.ir.translators import Translator translator = Translator.to_language("C") self.assertEqual(translator.from_expr(expr), expected) def test_ExprOp_toC(self): - from miasm2.expression.expression import ExprInt, ExprOp - from miasm2.ir.translators.C import Translator + from miasm.expression.expression import ExprInt, ExprOp + from miasm.ir.translators.C import Translator args = [ExprInt(i, 32) for i in range(9)] translator = Translator.to_language("C") diff --git a/test/ir/reduce_graph.py b/test/ir/reduce_graph.py index f6ebad24..8835b4aa 100644 --- a/test/ir/reduce_graph.py +++ b/test/ir/reduce_graph.py @@ -5,13 +5,13 @@ from pdb import pm from future.utils import viewitems -from miasm2.expression.expression import ExprId, ExprInt, ExprAssign, ExprCond, \ +from miasm.expression.expression import ExprId, ExprInt, ExprAssign, ExprCond, \ ExprLoc, LocKey -from miasm2.core.locationdb import LocationDB -from miasm2.ir.analysis import ira -from miasm2.ir.ir import IRBlock, AssignBlock, IRCFG -from miasm2.analysis.data_flow import merge_blocks +from miasm.core.locationdb import LocationDB +from miasm.ir.analysis import ira +from miasm.ir.ir import IRBlock, AssignBlock, IRCFG +from miasm.analysis.data_flow import merge_blocks loc_db = LocationDB() diff --git a/test/ir/symbexec.py b/test/ir/symbexec.py index 3ab99c91..d627f4b9 100755 --- a/test/ir/symbexec.py +++ b/test/ir/symbexec.py @@ -11,12 +11,12 @@ import unittest class TestSymbExec(unittest.TestCase): def test_ClassDef(self): - from miasm2.expression.expression import ExprInt, ExprId, ExprMem, \ + from miasm.expression.expression import ExprInt, ExprId, ExprMem, \ ExprCompose, ExprAssign - from miasm2.arch.x86.sem import ir_x86_32 - from miasm2.core.locationdb import LocationDB - from miasm2.ir.symbexec import SymbolicExecutionEngine - from miasm2.ir.ir import AssignBlock + from miasm.arch.x86.sem import ir_x86_32 + from miasm.core.locationdb import LocationDB + from miasm.ir.symbexec import SymbolicExecutionEngine + from miasm.ir.ir import AssignBlock loc_db = LocationDB() diff --git a/test/ir/translators/smt2.py b/test/ir/translators/smt2.py index 78472d0a..81f63b45 100644 --- a/test/ir/translators/smt2.py +++ b/test/ir/translators/smt2.py @@ -1,7 +1,7 @@ from z3 import Solver, unsat, parse_smt2_string -from miasm2.expression.expression import * -from miasm2.ir.translators.smt2 import TranslatorSMT2 -from miasm2.ir.translators.z3_ir import TranslatorZ3 +from miasm.expression.expression import * +from miasm.ir.translators.smt2 import TranslatorSMT2 +from miasm.ir.translators.z3_ir import TranslatorZ3 # create nested expression a = ExprId("a", 64) diff --git a/test/ir/translators/z3_ir.py b/test/ir/translators/z3_ir.py index 68421bea..b28269fb 100644 --- a/test/ir/translators/z3_ir.py +++ b/test/ir/translators/z3_ir.py @@ -1,9 +1,9 @@ from __future__ import print_function import z3 -from miasm2.core.locationdb import LocationDB -from miasm2.expression.expression import * -from miasm2.ir.translators.z3_ir import Z3Mem, TranslatorZ3 +from miasm.core.locationdb import LocationDB +from miasm.expression.expression import * +from miasm.ir.translators.z3_ir import Z3Mem, TranslatorZ3 # Some examples of use/unit tests. diff --git a/test/jitter/bad_block.py b/test/jitter/bad_block.py index 0756dfd5..256d2388 100644 --- a/test/jitter/bad_block.py +++ b/test/jitter/bad_block.py @@ -1,7 +1,7 @@ import sys -from miasm2.core.utils import decode_hex -from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE, EXCEPT_UNK_MNEMO -from miasm2.analysis.machine import Machine +from miasm.core.utils import decode_hex +from miasm.jitter.csts import PAGE_READ, PAGE_WRITE, EXCEPT_UNK_MNEMO +from miasm.analysis.machine import Machine def code_sentinelle(jitter): jitter.run = False diff --git a/test/jitter/jit_options.py b/test/jitter/jit_options.py index 91d59edd..74808330 100644 --- a/test/jitter/jit_options.py +++ b/test/jitter/jit_options.py @@ -2,9 +2,9 @@ from __future__ import print_function import os import sys -from miasm2.core.utils import decode_hex -from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE -from miasm2.analysis.machine import Machine +from miasm.core.utils import decode_hex +from miasm.jitter.csts import PAGE_READ, PAGE_WRITE +from miasm.analysis.machine import Machine from pdb import pm # Shellcode diff --git a/test/jitter/jitload.py b/test/jitter/jitload.py index 5473b7d2..8b758a89 100644 --- a/test/jitter/jitload.py +++ b/test/jitter/jitload.py @@ -1,10 +1,10 @@ import sys from pdb import pm -from miasm2.core.utils import decode_hex, encode_hex -from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE -from miasm2.analysis.machine import Machine -from miasm2.expression.expression import ExprId, ExprAssign, ExprInt, ExprMem +from miasm.core.utils import decode_hex, encode_hex +from miasm.jitter.csts import PAGE_READ, PAGE_WRITE +from miasm.analysis.machine import Machine +from miasm.expression.expression import ExprId, ExprAssign, ExprInt, ExprMem # Initial data: from 'example/samples/x86_32_sc.bin' data = decode_hex("8d49048d5b0180f90174058d5bffeb038d5b0189d8c3") diff --git a/test/jitter/jmp_out_mem.py b/test/jitter/jmp_out_mem.py index ff137b84..2b064f73 100644 --- a/test/jitter/jmp_out_mem.py +++ b/test/jitter/jmp_out_mem.py @@ -1,7 +1,7 @@ import sys -from miasm2.core.utils import decode_hex -from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE, EXCEPT_ACCESS_VIOL -from miasm2.analysis.machine import Machine +from miasm.core.utils import decode_hex +from miasm.jitter.csts import PAGE_READ, PAGE_WRITE, EXCEPT_ACCESS_VIOL +from miasm.analysis.machine import Machine def code_sentinelle(jitter): jitter.run = False diff --git a/test/jitter/test_post_instr.py b/test/jitter/test_post_instr.py index ab8f8a74..52274a46 100644 --- a/test/jitter/test_post_instr.py +++ b/test/jitter/test_post_instr.py @@ -1,9 +1,9 @@ from __future__ import print_function import sys -from miasm2.core.utils import decode_hex -from miasm2.analysis.machine import Machine -from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE, \ +from miasm.core.utils import decode_hex +from miasm.analysis.machine import Machine +from miasm.jitter.csts import PAGE_READ, PAGE_WRITE, \ EXCEPT_BREAKPOINT_MEMORY, EXCEPT_ACCESS_VIOL machine = Machine("x86_32") diff --git a/test/jitter/vm_mngr.py b/test/jitter/vm_mngr.py index 3aa4105e..468fb347 100644 --- a/test/jitter/vm_mngr.py +++ b/test/jitter/vm_mngr.py @@ -1,6 +1,6 @@ import sys -from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE -from miasm2.analysis.machine import Machine +from miasm.jitter.csts import PAGE_READ, PAGE_WRITE +from miasm.analysis.machine import Machine myjit = Machine("x86_32").jitter(sys.argv[1]) diff --git a/test/os_dep/common.py b/test/os_dep/common.py index 52512075..749d71a7 100755 --- a/test/os_dep/common.py +++ b/test/os_dep/common.py @@ -4,10 +4,10 @@ from builtins import range import unittest import logging -from miasm2.analysis.machine import Machine -import miasm2.os_dep.common as commonapi -from miasm2.core.utils import pck32 -from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE +from miasm.analysis.machine import Machine +import miasm.os_dep.common as commonapi +from miasm.core.utils import pck32 +from miasm.jitter.csts import PAGE_READ, PAGE_WRITE machine = Machine("x86_32") diff --git a/test/os_dep/linux/stdlib.py b/test/os_dep/linux/stdlib.py index 80b99969..a205002b 100755 --- a/test/os_dep/linux/stdlib.py +++ b/test/os_dep/linux/stdlib.py @@ -3,10 +3,10 @@ import unittest import logging -from miasm2.analysis.machine import Machine -import miasm2.os_dep.linux_stdlib as stdlib -from miasm2.core.utils import pck32 -from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE +from miasm.analysis.machine import Machine +import miasm.os_dep.linux_stdlib as stdlib +from miasm.core.utils import pck32 +from miasm.jitter.csts import PAGE_READ, PAGE_WRITE machine = Machine("x86_32") diff --git a/test/os_dep/linux/test_env.py b/test/os_dep/linux/test_env.py index 1e8e7678..13351a84 100644 --- a/test/os_dep/linux/test_env.py +++ b/test/os_dep/linux/test_env.py @@ -2,8 +2,8 @@ from __future__ import print_function import os import sys from pdb import pm -from miasm2.analysis.binary import Container -from miasm2.analysis.sandbox import Sandbox_Linux_x86_32, Sandbox_Linux_x86_64,\ +from miasm.analysis.binary import Container +from miasm.analysis.sandbox import Sandbox_Linux_x86_32, Sandbox_Linux_x86_64,\ Sandbox_Linux_arml, Sandbox_Linux_aarch64l if len(sys.argv) < 2: diff --git a/test/os_dep/win_api_x86_32.py b/test/os_dep/win_api_x86_32.py index 2dcac61d..a7d88f90 100755 --- a/test/os_dep/win_api_x86_32.py +++ b/test/os_dep/win_api_x86_32.py @@ -4,10 +4,10 @@ from builtins import range import unittest import logging -from miasm2.analysis.machine import Machine -import miasm2.os_dep.win_api_x86_32 as winapi -from miasm2.core.utils import pck32 -from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE +from miasm.analysis.machine import Machine +import miasm.os_dep.win_api_x86_32 as winapi +from miasm.core.utils import pck32 +from miasm.jitter.csts import PAGE_READ, PAGE_WRITE machine = Machine("x86_32") -- cgit 1.4.1 From d3a42fa5dcdb0c467586eb6b92271fa4dbeb648b Mon Sep 17 00:00:00 2001 From: Pierre LALET Date: Wed, 27 Feb 2019 20:29:44 +0100 Subject: Include elfesteem fork in miasm --- .codespell_ignore | 1 + README.md | 13 +- example/asm/shellcode.py | 4 +- example/elfesteem/minidump_to_pe.py | 48 + example/elfesteem/test_pe.py | 31 + example/jitter/arm_sc.py | 2 +- example/jitter/run_with_linuxenv.py | 2 +- example/jitter/unpack_upx.py | 2 +- miasm/analysis/binary.py | 4 +- miasm/elfesteem/__init__.py | 3 + miasm/elfesteem/cstruct.py | 154 ++++ miasm/elfesteem/elf.py | 1538 ++++++++++++++++++++++++++++++++ miasm/elfesteem/elf_init.py | 878 ++++++++++++++++++ miasm/elfesteem/minidump.py | 545 ++++++++++++ miasm/elfesteem/minidump_init.py | 194 ++++ miasm/elfesteem/new_cstruct.py | 265 ++++++ miasm/elfesteem/pe.py | 1668 +++++++++++++++++++++++++++++++++++ miasm/elfesteem/pe_init.py | 603 +++++++++++++ miasm/elfesteem/strpatchwork.py | 106 +++ miasm/jitter/loader/elf.py | 12 +- miasm/jitter/loader/pe.py | 6 +- miasm/os_dep/win_api_x86_32_seh.py | 2 +- requirements.txt | 1 - setup.py | 1 + test/analysis/dse.py | 2 +- test/arch/aarch64/unit/asm_test.py | 2 +- test/arch/mips32/unit/asm_test.py | 2 +- test/arch/x86/unit/asm_test.py | 2 +- 28 files changed, 6058 insertions(+), 33 deletions(-) create mode 100644 example/elfesteem/minidump_to_pe.py create mode 100644 example/elfesteem/test_pe.py create mode 100644 miasm/elfesteem/__init__.py create mode 100644 miasm/elfesteem/cstruct.py create mode 100644 miasm/elfesteem/elf.py create mode 100644 miasm/elfesteem/elf_init.py create mode 100644 miasm/elfesteem/minidump.py create mode 100644 miasm/elfesteem/minidump_init.py create mode 100644 miasm/elfesteem/new_cstruct.py create mode 100644 miasm/elfesteem/pe.py create mode 100644 miasm/elfesteem/pe_init.py create mode 100644 miasm/elfesteem/strpatchwork.py (limited to 'example/asm/shellcode.py') diff --git a/.codespell_ignore b/.codespell_ignore index 3724a1f4..8eab9f6f 100644 --- a/.codespell_ignore +++ b/.codespell_ignore @@ -4,3 +4,4 @@ uint mye iff nto +rela diff --git a/README.md b/README.md index 010f75d6..0d1fdf8a 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ Miasm is a free and open source (GPLv2) reverse engineering framework. Miasm aims to analyze / modify / generate binary programs. Here is a non exhaustive list of features: -* Opening / modifying / generating PE / ELF 32 / 64 LE / BE using Elfesteem +* Opening / modifying / generating PE / ELF 32 / 64 LE / BE * Assembling / Disassembling X86 / ARM / MIPS / SH4 / MSP430 * Representing assembly semantic using intermediate language * Emulating using JIT (dynamic code analysis, unpacking, ...) @@ -525,7 +525,6 @@ Miasm uses: * python-pyparsing * python-dev -* elfesteem from [Elfesteem](https://github.com/serpilliere/elfesteem.git) * optionally python-pycparser (version >= 2.17) To enable code JIT, one of the following module is mandatory: @@ -539,14 +538,6 @@ To enable code JIT, one of the following module is mandatory: Configuration ------------- -* Install elfesteem -```pycon -git clone https://github.com/serpilliere/elfesteem.git elfesteem -cd elfesteem -python setup.py build -sudo python setup.py install -``` - To use the jitter, GCC or LLVM is recommended * GCC (any version) * Clang (any version) @@ -570,7 +561,7 @@ Windows & IDA Most of Miasm's IDA plugins use a subset of Miasm functionality. A quick way to have them working is to add: -* `elfesteem` directory and `pyparsing.py` to `C:\...\IDA\python\` or `pip install pyparsing elfesteem` +* `pyparsing.py` to `C:\...\IDA\python\` or `pip install pyparsing` * `miasm/miasm` directory to `C:\...\IDA\python\` All features excepting JITter related ones will be available. For a more complete installation, please refer to above paragraphs. diff --git a/example/asm/shellcode.py b/example/asm/shellcode.py index ed489bbd..59ea3a94 100755 --- a/example/asm/shellcode.py +++ b/example/asm/shellcode.py @@ -4,8 +4,8 @@ from argparse import ArgumentParser from pdb import pm from future.utils import viewitems -from elfesteem import pe_init -from elfesteem.strpatchwork import StrPatchwork +from miasm.elfesteem import pe_init +from miasm.elfesteem.strpatchwork import StrPatchwork from miasm.core import parse_asm, asmblock from miasm.analysis.machine import Machine diff --git a/example/elfesteem/minidump_to_pe.py b/example/elfesteem/minidump_to_pe.py new file mode 100644 index 00000000..8aff3e62 --- /dev/null +++ b/example/elfesteem/minidump_to_pe.py @@ -0,0 +1,48 @@ +#! /usr/bin/env python +"""Minidump to PE example""" + +import sys + +from future.utils import viewvalues + +from miasm.elfesteem.minidump_init import Minidump +from miasm.elfesteem.pe_init import PE + +minidump = Minidump(open(sys.argv[1], 'rb').read()) + +pe = PE() +for i, memory in enumerate(sorted(viewvalues(minidump.memory), + key=lambda x:x.address)): + # Get section name + name = str(memory.name) + if not name: + name = "s_%02d" % i + else: + name = name.split('\\')[-1] + + # Get section protection + protect = memory.pretty_protect + protect_mask = 0x20 + if protect == "UNKNOWN": + protect_mask |= 0xe0000000 + else: + if "EXECUTE" in protect: + protect_mask |= 1 << 29 + if "READ" in protect: + protect_mask |= 1 << 30 + if "WRITE" in protect: + protect_mask |= 1 << 31 + + # Add the section + pe.SHList.add_section(name=name, addr=memory.address, rawsize=memory.size, + data=memory.content, flags=protect_mask) + +# Find entry point +try: + entry_point = minidump.threads.Threads[0].ThreadContext.Eip[0] +except AttributeError: + entry_point = minidump.threads.Threads[0].ThreadContext.Rip[0] + +pe.Opthdr.AddressOfEntryPoint = entry_point + +open("out_pe.bin", "wb").write(bytes(pe)) diff --git a/example/elfesteem/test_pe.py b/example/elfesteem/test_pe.py new file mode 100644 index 00000000..e9cff0b4 --- /dev/null +++ b/example/elfesteem/test_pe.py @@ -0,0 +1,31 @@ +#! /usr/bin/env python + +import miasm.elfesteem.pe as pe +from miasm.elfesteem.pe_init import PE +import rlcompleter +import readline +import pdb +import sys +from pprint import pprint as pp +readline.parse_and_bind("tab: complete") + + +e_ = PE() +mysh = b"\xc3" +s_text = e_.SHList.add_section( + name="text", addr=0x1000, rawsize=0x1000, data=mysh) +e_.Opthdr.AddressOfEntryPoint = s_text.addr +new_dll = [({"name": "kernel32.dll", + "firstthunk": s_text.addr + 0x100}, + ["CreateFileA", "SetFilePointer", "WriteFile", "CloseHandle"] + ), + ({"name": "USER32.dll", + "firstthunk": None}, + ["SetDlgItemInt", "GetMenu", "HideCaret"] + ) + ] +e_.DirImport.add_dlldesc(new_dll) + +s_myimp = e_.SHList.add_section(name="myimp", rawsize=0x1000) +e_.DirImport.set_rva(s_myimp.addr) +open('uu.bin', 'wb').write(bytes(e_)) diff --git a/example/jitter/arm_sc.py b/example/jitter/arm_sc.py index 8d5b5677..ddadbf29 100755 --- a/example/jitter/arm_sc.py +++ b/example/jitter/arm_sc.py @@ -3,7 +3,7 @@ from miasm.core.utils import int_to_byte from miasm.analysis.sandbox import Sandbox_Linux_armb_str from miasm.analysis.sandbox import Sandbox_Linux_arml_str -from elfesteem.strpatchwork import StrPatchwork +from miasm.elfesteem.strpatchwork import StrPatchwork from pdb import pm diff --git a/example/jitter/run_with_linuxenv.py b/example/jitter/run_with_linuxenv.py index e2869699..0237cc94 100644 --- a/example/jitter/run_with_linuxenv.py +++ b/example/jitter/run_with_linuxenv.py @@ -2,7 +2,7 @@ from argparse import ArgumentParser import logging import re -from elfesteem import elf as elf_csts +from miasm.elfesteem import elf as elf_csts from miasm.os_dep.linux import environment, syscall from miasm.analysis.machine import Machine diff --git a/example/jitter/unpack_upx.py b/example/jitter/unpack_upx.py index 0a41d038..05d28b16 100644 --- a/example/jitter/unpack_upx.py +++ b/example/jitter/unpack_upx.py @@ -2,7 +2,7 @@ from __future__ import print_function import os import logging from pdb import pm -from elfesteem import pe +from miasm.elfesteem import pe from miasm.analysis.sandbox import Sandbox_Win_x86_32 # User defined methods diff --git a/miasm/analysis/binary.py b/miasm/analysis/binary.py index 82f83112..6dc095cf 100644 --- a/miasm/analysis/binary.py +++ b/miasm/analysis/binary.py @@ -131,7 +131,7 @@ class ContainerPE(Container): def parse(self, data, vm=None, **kwargs): from miasm.jitter.loader.pe import vm_load_pe, guess_arch - from elfesteem import pe_init + from miasm.elfesteem import pe_init # Parse signature if not data.startswith(b'MZ'): @@ -178,7 +178,7 @@ class ContainerELF(Container): """ from miasm.jitter.loader.elf import vm_load_elf, guess_arch, \ fill_loc_db_with_symbols - from elfesteem import elf_init + from miasm.elfesteem import elf_init # Parse signature if not data.startswith(b'\x7fELF'): diff --git a/miasm/elfesteem/__init__.py b/miasm/elfesteem/__init__.py new file mode 100644 index 00000000..1a602f38 --- /dev/null +++ b/miasm/elfesteem/__init__.py @@ -0,0 +1,3 @@ +#!/usr/bin/env python + +__all__ = ['pe_init', 'elf_init', 'strpatchwork'] diff --git a/miasm/elfesteem/cstruct.py b/miasm/elfesteem/cstruct.py new file mode 100644 index 00000000..06d2e002 --- /dev/null +++ b/miasm/elfesteem/cstruct.py @@ -0,0 +1,154 @@ +#! /usr/bin/env python + +from __future__ import print_function +from builtins import zip +from functools import reduce +import struct + +from future.utils import PY3 + +type_size = {} +size2type = {} +for t in 'B', 'H', 'I', 'Q': + s = struct.calcsize(t) + type_size[t] = s * 8 + size2type[s * 8] = t + +type_size['u08'] = size2type[8] +type_size['u16'] = size2type[16] +type_size['u32'] = size2type[32] +type_size['u64'] = size2type[64] + + +def fix_size(fields, wsize): + out = [] + for name, v in fields: + if v.endswith("s"): + pass + elif v == "ptr": + v = size2type[wsize] + elif not v in type_size: + raise ValueError("unknown Cstruct type", v) + else: + v = type_size[v] + out.append((name, v)) + fields = out + return fields + + +class Cstruct_Metaclass(type): + + def __new__(cls, name, bases, dct): + o = super(Cstruct_Metaclass, cls).__new__(cls, name, bases, dct) + o._packstring = o._packformat + \ + "".join(x[1] for x in o._fields) + o._size = struct.calcsize(o._packstring) + return o + + +class CStruct(object): + #__metaclass__ = Cstruct_Metaclass + _packformat = "" + _fields = [] + + @classmethod + def _from_file(cls, f): + return cls(f.read(cls._size)) + + def __init__(self, sex, wsize, *args, **kargs): + if sex == 1: + sex = '<' + else: + sex = '>' + # packformat enforce sex + if self._packformat: + sex = "" + pstr = fix_size(self._fields, wsize) + self._packstring = sex + self._packformat + \ + "".join(x[1] for x in pstr) + self._size = struct.calcsize(self._packstring) + + self._names = [x[0] for x in self._fields] + if kargs: + self.__dict__.update(kargs) + else: + if args: + s = args[0] + else: + s = b"" + s += b"\x00" * self._size + s = s[:self._size] + self._unpack(s) + + def _unpack(self, s): + disas = struct.unpack(self._packstring, s) + for n, v in zip(self._names, disas): + setattr(self, n, v) + + def _pack(self): + return struct.pack(self._packstring, + *(getattr(self, x) for x in self._names)) + + def _spack(self, superstruct, shift=0): + attr = [] + for name in self._names: + s = getattr(self, name) + if isinstance(s, CStruct): + if s in superstruct: + s = reduce(lambda x, y: x + len(y), + superstruct[:superstruct.index(s)], + 0) + s += shift + else: + raise Exception("%r is not a superstructure" % s) + attr.append(s) + return struct.pack(self._packstring, *attr) + + def _copy(self): + return self.__class__(**self.__dict__) + + def __len__(self): + return self._size + + def __str__(self): + if PY3: + return repr(self) + return self.__bytes__() + + def __bytes__(self): + return self._pack() + + def __repr__(self): + return "<%s=%s>" % (self.__class__.__name__, "/".join(repr( + getattr(self, x[0])) for x in self._fields + )) + + def __getitem__(self, item): # to work with format strings + return getattr(self, item) + + def _show(self): + print("##%s:" % self.__class__.__name__) + fmt = "%%-%is = %%r" % max(len(x[0]) for x in self._fields) + for fn, ft in self._fields: + print(fmt % (fn, getattr(self, fn))) + + +class CStructStruct(object): + + def __init__(self, lst, shift=0): + self._lst = lst + self._shift = shift + + def __getattr__(self, attr): + return getattr(self._lst, attr) + + def __str__(self): + if PY3: + return repr(self) + return self.__bytes__() + + def __bytes__(self): + return b"".join( + a if isinstance(a, bytes) else a._spack(self._lst, self._shift) + for a in self._lst + ) diff --git a/miasm/elfesteem/elf.py b/miasm/elfesteem/elf.py new file mode 100644 index 00000000..bdd088b8 --- /dev/null +++ b/miasm/elfesteem/elf.py @@ -0,0 +1,1538 @@ +#! /usr/bin/env python + +from miasm.elfesteem.cstruct import CStruct + +class Ehdr(CStruct): + _fields = [ ("ident","16s"), + ("type","u16"), + ("machine","u16"), + ("version","u32"), + ("entry","ptr"), + ("phoff","ptr"), + ("shoff","ptr"), + ("flags","u32"), + ("ehsize","u16"), + ("phentsize","u16"), + ("phnum","u16"), + ("shentsize","u16"), + ("shnum","u16"), + ("shstrndx","u16") ] + + +class Shdr(CStruct): + _fields = [ ("name","u32"), + ("type","u32"), + ("flags","ptr"), + ("addr","ptr"), + ("offset","ptr"), + ("size","ptr"), + ("link","u32"), + ("info","u32"), + ("addralign","ptr"), + ("entsize","ptr") ] + +class Phdr(CStruct): + _fields = [ ("type","u32"), + ("offset","u32"), + ("vaddr","u32"), + ("paddr","u32"), + ("filesz","u32"), + ("memsz","u32"), + ("flags","u32"), + ("align","u32") ] + +class Phdr64(CStruct): + _fields = [ ("type","u32"), + ("flags","u32"), + ("offset","ptr"), + ("vaddr","ptr"), + ("paddr","ptr"), + ("filesz","ptr"), + ("memsz","ptr"), + ("align","ptr") ] + +class Nhdr(CStruct): + _fields = [ ("namesz","u32"), + ("descsz","u32"), + ("type", "u32") ] + + +class Sym32(CStruct): + _fields = [ ("name","u32"), + ("value","u32"), + ("size","u32"), + ("info","u08"), + ("other","u08"), + ("shndx","u16") ] + +class Sym64(CStruct): + _fields = [ ("name","u32"), + ("info","u08"), + ("other","u08"), + ("shndx","u16"), + ("value","u64"), + ("size","u64") ] + +class Dym(CStruct): + _fields = [ ("tag","u32"), + ("val","u32") ] + +class Rel32(CStruct): + _fields = [ ("offset","ptr"), + ("info","u32") ] + +class Rel64(CStruct): + _fields = [ ("offset","ptr"), + ("info","u64") ] + +class Rela32(CStruct): + _fields = [ ("offset","ptr"), + ("info","u32"), + ("addend","ptr") ] + +class Rela64(CStruct): + _fields = [ ("offset","ptr"), + ("info","u64"), + ("addend","ptr") ] + +class Dynamic(CStruct): + _fields = [ ("type","ptr"), + ("name","ptr") ] + + +# Legal values for e_ident (identification indexes) + +EI_MAG0 = 0 # File identification +EI_MAG1 = 1 # File identification +EI_MAG2 = 2 # File identification +EI_MAG3 = 3 # File identification +EI_CLASS = 4 # File class +EI_DATA = 5 # Data encoding +EI_VERSION = 6 # File version +EI_OSABI = 7 # Operating system/ABI identification +EI_ABIVERSION = 8 # ABI version +EI_PAD = 9 # Start of padding bytes +EI_NIDENT = 16 # Size of e_ident[] + +# Legal values for e_ident[EI_CLASS] + +ELFCLASSNONE = 0 # Invalid class +ELFCLASS32 = 1 # 32-bit objects +ELFCLASS64 = 2 # 64-bit objects + +# Legal values for e_ident[EI_DATA] + +ELFDATANONE = 0 # Invalid data encoding +ELFDATA2LSB = 1 # Least significant byte at lowest address +ELFDATA2MSB = 2 # Most significant byte at lowest address + +# Legal values for e_type (object file type). + +ET_NONE = 0 # No file type +ET_REL = 1 # Relocatable file +ET_EXEC = 2 # Executable file +ET_DYN = 3 # Shared object file +ET_CORE = 4 # Core file +ET_NUM = 5 # Number of defined types +ET_LOOS = 0xfe00 # OS-specific range start +ET_HIOS = 0xfeff # OS-specific range end +ET_LOPROC = 0xff00 # Processor-specific range start +ET_HIPROC = 0xffff # Processor-specific range end + +# Legal values for e_machine (architecture). + +EM_NONE = 0 # No machine +EM_M32 = 1 # AT&T WE 32100 +EM_SPARC = 2 # SUN SPARC +EM_386 = 3 # Intel 80386 +EM_68K = 4 # Motorola m68k family +EM_88K = 5 # Motorola m88k family +EM_486 = 6 # Intel 80486 +EM_860 = 7 # Intel 80860 +EM_MIPS = 8 # MIPS R3000 big-endian +EM_S370 = 9 # IBM System/370 +EM_MIPS_RS3_LE = 10 # MIPS R3000 little-endian + +EM_PARISC = 15 # HPPA +EM_VPP500 = 17 # Fujitsu VPP500 +EM_SPARC32PLUS = 18 # Sun's "v8plus" +EM_960 = 19 # Intel 80960 +EM_PPC = 20 # PowerPC +EM_PPC64 = 21 # PowerPC 64-bit +EM_S390 = 22 # IBM S390 + +EM_V800 = 36 # NEC V800 series +EM_FR20 = 37 # Fujitsu FR20 +EM_RH32 = 38 # TRW RH-32 +EM_RCE = 39 # Motorola RCE +EM_ARM = 40 # ARM +EM_FAKE_ALPHA = 41 # Digital Alpha +EM_SH = 42 # Hitachi SH +EM_SPARCV9 = 43 # SPARC v9 64-bit +EM_TRICORE = 44 # Siemens Tricore +EM_ARC = 45 # Argonaut RISC Core +EM_H8_300 = 46 # Hitachi H8/300 +EM_H8_300H = 47 # Hitachi H8/300H +EM_H8S = 48 # Hitachi H8S +EM_H8_500 = 49 # Hitachi H8/500 +EM_IA_64 = 50 # Intel Merced +EM_MIPS_X = 51 # Stanford MIPS-X +EM_COLDFIRE = 52 # Motorola Coldfire +EM_68HC12 = 53 # Motorola M68HC12 +EM_MMA = 54 # Fujitsu MMA Multimedia Accelerator*/ +EM_PCP = 55 # Siemens PCP +EM_NCPU = 56 # Sony nCPU embeeded RISC +EM_NDR1 = 57 # Denso NDR1 microprocessor +EM_STARCORE = 58 # Motorola Start*Core processor +EM_ME16 = 59 # Toyota ME16 processor +EM_ST100 = 60 # STMicroelectronic ST100 processor +EM_TINYJ = 61 # Advanced Logic Corp. Tinyj emb.fam*/ +EM_X86_64 = 62 # AMD x86-64 architecture +EM_AARCH64 = 183 # Aarch64 architecture +EM_PDSP = 63 # Sony DSP Processor + +EM_FX66 = 66 # Siemens FX66 microcontroller +EM_ST9PLUS = 67 # STMicroelectronics ST9+ 8/16 mc +EM_ST7 = 68 # STmicroelectronics ST7 8 bit mc +EM_68HC16 = 69 # Motorola MC68HC16 microcontroller +EM_68HC11 = 70 # Motorola MC68HC11 microcontroller +EM_68HC08 = 71 # Motorola MC68HC08 microcontroller +EM_68HC05 = 72 # Motorola MC68HC05 microcontroller +EM_SVX = 73 # Silicon Graphics SVx +EM_ST19 = 74 # STMicroelectronics ST19 8 bit mc +EM_VAX = 75 # Digital VAX +EM_CRIS = 76 # Axis Communications 32-bit embedded processor +EM_JAVELIN = 77 # Infineon Technologies 32-bit embedded processor +EM_FIREPATH = 78 # Element 14 64-bit DSP Processor +EM_ZSP = 79 # LSI Logic 16-bit DSP Processor +EM_MMIX = 80 # Donald Knuth's educational 64-bit processor +EM_HUANY = 81 # Harvard University machine-independent object files +EM_PRISM = 82 # SiTera Prism +EM_AVR = 83 # Atmel AVR 8-bit microcontroller +EM_FR30 = 84 # Fujitsu FR30 +EM_D10V = 85 # Mitsubishi D10V +EM_D30V = 86 # Mitsubishi D30V +EM_V850 = 87 # NEC v850 +EM_M32R = 88 # Mitsubishi M32R +EM_MN10300 = 89 # Matsushita MN10300 +EM_MN10200 = 90 # Matsushita MN10200 +EM_PJ = 91 # picoJava +EM_OPENRISC = 92 # OpenRISC 32-bit embedded processor +EM_ARC_A5 = 93 # ARC Cores Tangent-A5 +EM_XTENSA = 94 # Tensilica Xtensa Architecture + +EM_ALPHA = 0x9026 + +# Legal values for sh_type (section type). + +SHT_NULL = 0 # Section header table entry unused +SHT_PROGBITS = 1 # Program data +SHT_SYMTAB = 2 # Symbol table +SHT_STRTAB = 3 # String table +SHT_RELA = 4 # Relocation entries with addends +SHT_HASH = 5 # Symbol hash table +SHT_DYNAMIC = 6 # Dynamic linking information +SHT_NOTE = 7 # Notes +SHT_NOBITS = 8 # Program space with no data (bss) +SHT_REL = 9 # Relocation entries, no addends +SHT_SHLIB = 10 # Reserved +SHT_DYNSYM = 11 # Dynamic linker symbol table +SHT_INIT_ARRAY = 14 # Array of constructors +SHT_FINI_ARRAY = 15 # Array of destructors +SHT_PREINIT_ARRAY = 16 # Array of pre-constructors +SHT_GROUP = 17 # Section group +SHT_SYMTAB_SHNDX = 18 # Extended section indices +SHT_NUM = 19 # Number of defined types. +SHT_LOOS = 0x60000000 # Start OS-specific +SHT_GNU_LIBLIST = 0x6ffffff7 # Prelink library list +SHT_CHECKSUM = 0x6ffffff8 # Checksum for DSO content. +SHT_LOSUNW = 0x6ffffffa # Sun-specific low bound. +SHT_SUNW_move = 0x6ffffffa +SHT_SUNW_COMDAT = 0x6ffffffb +SHT_SUNW_syminfo = 0x6ffffffc +SHT_GNU_verdef = 0x6ffffffd # Version definition section. +SHT_GNU_verneed = 0x6ffffffe # Version needs section. +SHT_GNU_versym = 0x6fffffff # Version symbol table. +SHT_HISUNW = 0x6fffffff # Sun-specific high bound. +SHT_HIOS = 0x6fffffff # End OS-specific type +SHT_LOPROC = 0x70000000 # Start of processor-specific +SHT_HIPROC = 0x7fffffff # End of processor-specific +SHT_LOUSER = 0x80000000 # Start of application-specific +SHT_HIUSER = 0x8fffffff # End of application-specific + +# Legal values for sh_flags (section flags). + +SHF_WRITE = (1 << 0) # Writable +SHF_ALLOC = (1 << 1) # Occupies memory during execution +SHF_EXECINSTR = (1 << 2) # Executable +SHF_MERGE = (1 << 4) # Might be merged +SHF_STRINGS = (1 << 5) # Contains nul-terminated strings +SHF_INFO_LINK = (1 << 6) # `sh_info' contains SHT index +SHF_LINK_ORDER = (1 << 7) # Preserve order after combining +SHF_OS_NONCONFORMING = (1 << 8) # Non-standard OS specific handling required +SHF_GROUP = (1 << 9) # Section is member of a group. +SHF_TLS = (1 << 10) # Section hold thread-local data. +SHF_MASKOS = 0x0ff00000 # OS-specific. +SHF_MASKPROC = 0xf0000000 # Processor-specific + +# Section group handling. + +GRP_COMDAT = 0x1 # Mark group as COMDAT. + +# Legal values for p_type (segment type). + +PT_NULL = 0 # Program header table entry unused +PT_LOAD = 1 # Loadable program segment +PT_DYNAMIC = 2 # Dynamic linking information +PT_INTERP = 3 # Program interpreter +PT_NOTE = 4 # Auxiliary information +PT_SHLIB = 5 # Reserved +PT_PHDR = 6 # Entry for header table itself +PT_TLS = 7 # Thread-local storage segment +PT_NUM = 8 # Number of defined types +PT_LOOS = 0x60000000 # Start of OS-specific +PT_GNU_EH_FRAME = 0x6474e550 # GCC .eh_frame_hdr segment +PT_GNU_STACK = 0x6474e551 # Indicates stack executability +PT_LOSUNW = 0x6ffffffa +PT_SUNWBSS = 0x6ffffffa # Sun Specific segment +PT_SUNWSTACK = 0x6ffffffb # Stack segment +PT_HISUNW = 0x6fffffff +PT_HIOS = 0x6fffffff # End of OS-specific +PT_LOPROC = 0x70000000 # Start of processor-specific +PT_HIPROC = 0x7fffffff # End of processor-specific + +# Legal values for p_flags (segment flags). + +PF_X = (1 << 0) # Segment is executable +PF_W = (1 << 1) # Segment is writable +PF_R = (1 << 2) # Segment is readable +PF_MASKOS = 0x0ff00000 # OS-specific +PF_MASKPROC = 0xf0000000 # Processor-specific + +# Legal values for note segment descriptor types for core files. + +NT_PRSTATUS = 1 # Contains copy of prstatus struct +NT_FPREGSET = 2 # Contains copy of fpregset struct +NT_PRPSINFO = 3 # Contains copy of prpsinfo struct +NT_PRXREG = 4 # Contains copy of prxregset struct +NT_TASKSTRUCT = 4 # Contains copy of task structure +NT_PLATFORM = 5 # String from sysinfo(SI_PLATFORM) +NT_AUXV = 6 # Contains copy of auxv array +NT_GWINDOWS = 7 # Contains copy of gwindows struct +NT_ASRS = 8 # Contains copy of asrset struct +NT_PSTATUS = 10 # Contains copy of pstatus struct +NT_PSINFO = 13 # Contains copy of psinfo struct +NT_PRCRED = 14 # Contains copy of prcred struct +NT_UTSNAME = 15 # Contains copy of utsname struct +NT_LWPSTATUS = 16 # Contains copy of lwpstatus struct +NT_LWPSINFO = 17 # Contains copy of lwpinfo struct +NT_PRFPXREG = 20 # Contains copy of fprxregset struct + +# Legal values for the note segment descriptor types for object files. + +NT_VERSION = 1 # Contains a version string. + +# Legal values for ST_BIND subfield of st_info (symbol binding). +# bind = Sym.info >> 4 +# val = Sym.info 0xf + +STB_LOCAL = 0 # Local symbol +STB_GLOBAL = 1 # Global symbol +STB_WEAK = 2 # Weak symbol +STB_NUM = 3 # Number of defined types. +STB_LOOS = 10 # Start of OS-specific +STB_HIOS = 12 # End of OS-specific +STB_LOPROC = 13 # Start of processor-specific +STB_HIPROC = 15 # End of processor-specific + +#Legal values for ST_TYPE subfield of st_info (symbol type). + +STT_NOTYPE = 0 # Symbol type is unspecified +STT_OBJECT = 1 # Symbol is a data object +STT_FUNC = 2 # Symbol is a code object +STT_SECTION = 3 # Symbol associated with a section +STT_FILE = 4 # Symbol's name is file name +STT_COMMON = 5 # Symbol is a common data object +STT_TLS = 6 # Symbol is thread-local data object*/ +STT_NUM = 7 # Number of defined types. +STT_LOOS = 10 # Start of OS-specific +STT_GNU_IFUNC = 10 # Symbol is indirect code object +STT_HIOS = 12 # End of OS-specific +STT_LOPROC = 13 # Start of processor-specific +STT_HIPROC = 15 # End of processor-specific + +# Legal values for d_tag (dynamic entry type). + +DT_NULL = 0 # Marks end of dynamic section +DT_NEEDED = 1 # Name of needed library +DT_PLTRELSZ = 2 # Size in bytes of PLT relocs +DT_PLTGOT = 3 # Processor defined value +DT_HASH = 4 # Address of symbol hash table +DT_STRTAB = 5 # Address of string table +DT_SYMTAB = 6 # Address of symbol table +DT_RELA = 7 # Address of Rela relocs +DT_RELASZ = 8 # Total size of Rela relocs +DT_RELAENT = 9 # Size of one Rela reloc +DT_STRSZ = 10 # Size of string table +DT_SYMENT = 11 # Size of one symbol table entry +DT_INIT = 12 # Address of init function +DT_FINI = 13 # Address of termination function +DT_SONAME = 14 # Name of shared object +DT_RPATH = 15 # Library search path (deprecated) +DT_SYMBOLIC = 16 # Start symbol search here +DT_REL = 17 # Address of Rel relocs +DT_RELSZ = 18 # Total size of Rel relocs +DT_RELENT = 19 # Size of one Rel reloc +DT_PLTREL = 20 # Type of reloc in PLT +DT_DEBUG = 21 # For debugging; unspecified +DT_TEXTREL = 22 # Reloc might modify .text +DT_JMPREL = 23 # Address of PLT relocs +DT_BIND_NOW = 24 # Process relocations of object +DT_INIT_ARRAY = 25 # Array with addresses of init fct +DT_FINI_ARRAY = 26 # Array with addresses of fini fct +DT_INIT_ARRAYSZ = 27 # Size in bytes of DT_INIT_ARRAY +DT_FINI_ARRAYSZ = 28 # Size in bytes of DT_FINI_ARRAY +DT_RUNPATH = 29 # Library search path +DT_FLAGS = 30 # Flags for the object being loaded +DT_ENCODING = 32 # Start of encoded range +DT_PREINIT_ARRAY = 32 # Array with addresses of preinit fct +DT_PREINIT_ARRAYSZ = 33 # size in bytes of DT_PREINIT_ARRAY +DT_NUM = 34 # Number used +DT_LOOS = 0x6000000d # Start of OS-specific +DT_HIOS = 0x6ffff000 # End of OS-specific +DT_LOPROC = 0x70000000 # Start of processor-specific +DT_HIPROC = 0x7fffffff # End of processor-specific +#DT_PROCNUM = DT_MIPS_NUM # Most used by any processor + +# DT_* entries which fall between DT_VALRNGHI & DT_VALRNGLO use the +# Dyn.d_un.d_val field of the Elf*_Dyn structure. This follows Sun's +# approach. +DT_VALRNGLO = 0x6ffffd00 +DT_GNU_PRELINKED = 0x6ffffdf5 # Prelinking timestamp +DT_GNU_CONFLICTSZ = 0x6ffffdf6 # Size of conflict section +DT_GNU_LIBLISTSZ = 0x6ffffdf7 # Size of library list +DT_CHECKSUM = 0x6ffffdf8 +DT_PLTPADSZ = 0x6ffffdf9 +DT_MOVEENT = 0x6ffffdfa +DT_MOVESZ = 0x6ffffdfb +DT_FEATURE_1 = 0x6ffffdfc # Feature selection (DTF_*). +DT_POSFLAG_1 = 0x6ffffdfd # Flags for DT_* entries, effecting the following DT_* entry. +DT_SYMINSZ = 0x6ffffdfe # Size of syminfo table (in bytes) +DT_SYMINENT = 0x6ffffdff # Entry size of syminfo +DT_VALRNGHI = 0x6ffffdff +DT_VALNUM = 12 + +# DT_* entries which fall between DT_ADDRRNGHI & DT_ADDRRNGLO use the +# Dyn.d_un.d_ptr field of the Elf*_Dyn structure. +# +# If any adjustment is made to the ELF object after it has been +# built these entries will need to be adjusted. +DT_ADDRRNGLO = 0x6ffffe00 +DT_GNU_CONFLICT = 0x6ffffef8 # Start of conflict section +DT_GNU_LIBLIST = 0x6ffffef9 # Library list +DT_CONFIG = 0x6ffffefa # Configuration information. +DT_DEPAUDIT = 0x6ffffefb # Dependency auditing. +DT_AUDIT = 0x6ffffefc # Object auditing. +DT_PLTPAD = 0x6ffffefd # PLT padding. +DT_MOVETAB = 0x6ffffefe # Move table. +DT_SYMINFO = 0x6ffffeff # Syminfo table. +DT_ADDRRNGHI = 0x6ffffeff +DT_ADDRNUM = 10 + +# The versioning entry types. The next are defined as part of the +# GNU extension. +DT_VERSYM = 0x6ffffff0 + +DT_RELACOUNT = 0x6ffffff9 +DT_RELCOUNT = 0x6ffffffa + +# These were chosen by Sun. +DT_FLAGS_1 = 0x6ffffffb # State flags, see DF_1_* below. +DT_VERDEF = 0x6ffffffc # Address of version definition table +DT_VERDEFNUM = 0x6ffffffd # Number of version definitions +DT_VERNEED = 0x6ffffffe # Address of table with needed versions +DT_VERNEEDNUM = 0x6fffffff # Number of needed versions +DT_VERSIONTAGNUM = 16 + +# Sun added these machine-independent extensions in the "processor-specific" +# range. Be compatible. +DT_AUXILIARY = 0x7ffffffd # Shared object to load before self +DT_FILTER = 0x7fffffff # Shared object to get values from +DT_EXTRANUM = 3 + +# Values of `d_un.d_val' in the DT_FLAGS entry. +DF_ORIGIN = 0x00000001 # Object may use DF_ORIGIN +DF_SYMBOLIC = 0x00000002 # Symbol resolutions starts here +DF_TEXTREL = 0x00000004 # Object contains text relocations +DF_BIND_NOW = 0x00000008 # No lazy binding for this object +DF_STATIC_TLS = 0x00000010 # Module uses the static TLS model + +# State flags selectable in the `d_un.d_val' element of the DT_FLAGS_1 +# entry in the dynamic section. +DF_1_NOW = 0x00000001 # Set RTLD_NOW for this object. +DF_1_GLOBAL = 0x00000002 # Set RTLD_GLOBAL for this object. +DF_1_GROUP = 0x00000004 # Set RTLD_GROUP for this object. +DF_1_NODELETE = 0x00000008 # Set RTLD_NODELETE for this object. +DF_1_LOADFLTR = 0x00000010 # Trigger filtee loading at runtime. +DF_1_INITFIRST = 0x00000020 # Set RTLD_INITFIRST for this object +DF_1_NOOPEN = 0x00000040 # Set RTLD_NOOPEN for this object. +DF_1_ORIGIN = 0x00000080 # $ORIGIN must be handled. +DF_1_DIRECT = 0x00000100 # Direct binding enabled. +DF_1_TRANS = 0x00000200 +DF_1_INTERPOSE = 0x00000400 # Object is used to interpose. +DF_1_NODEFLIB = 0x00000800 # Ignore default lib search path. +DF_1_NODUMP = 0x00001000 # Object can't be dldump'ed. +DF_1_CONFALT = 0x00002000 # Configuration alternative created. +DF_1_ENDFILTEE = 0x00004000 # Filtee terminates filters search. +DF_1_DISPRELDNE = 0x00008000 # Disp reloc applied at build time. +DF_1_DISPRELPND = 0x00010000 # Disp reloc applied at run-time. + +# Flags for the feature selection in DT_FEATURE_1. +DTF_1_PARINIT = 0x00000001 +DTF_1_CONFEXP = 0x00000002 + +# Flags in the DT_POSFLAG_1 entry effecting only the next DT_* entry. +DF_P1_LAZYLOAD = 0x00000001 # Lazyload following object. +DF_P1_GROUPPERM = 0x00000002 # Symbols from next object are not generally available. + +# GNU Versioning +VER_FLG_BASE = 1 # Version of the file itself, must not be used to match symbols +VER_FLG_WEAK = 2 # Reference to this version is weak +VER_NEED_CURRENT = 1 # Versioning implementation number + +# Relocs + +# Motorola 68k relocations + +R_68K_NONE = 0 # No reloc +R_68K_32 = 1 # Direct 32 bit +R_68K_16 = 2 # Direct 16 bit +R_68K_8 = 3 # Direct 8 bit +R_68K_PC32 = 4 # PC relative 32 bit +R_68K_PC16 = 5 # PC relative 16 bit +R_68K_PC8 = 6 # PC relative 8 bit +R_68K_GOT32 = 7 # 32 bit PC relative GOT entry +R_68K_GOT16 = 8 # 16 bit PC relative GOT entry +R_68K_GOT8 = 9 # 8 bit PC relative GOT entry +R_68K_GOT32O = 10 # 32 bit GOT offset +R_68K_GOT16O = 11 # 16 bit GOT offset +R_68K_GOT8O = 12 # 8 bit GOT offset +R_68K_PLT32 = 13 # 32 bit PC relative PLT address +R_68K_PLT16 = 14 # 16 bit PC relative PLT address +R_68K_PLT8 = 15 # 8 bit PC relative PLT address +R_68K_PLT32O = 16 # 32 bit PLT offset +R_68K_PLT16O = 17 # 16 bit PLT offset +R_68K_PLT8O = 18 # 8 bit PLT offset +R_68K_COPY = 19 # Copy symbol at runtime +R_68K_GLOB_DAT = 20 # Create GOT entry +R_68K_JMP_SLOT = 21 # Create PLT entry +R_68K_RELATIVE = 22 # Adjust by program base +R_68K_TLS_GD32 = 25 # 32 bit GOT offset for GD +R_68K_TLS_GD16 = 26 # 16 bit GOT offset for GD +R_68K_TLS_GD8 = 27 # 8 bit GOT offset for GD +R_68K_TLS_LDM32 = 28 # 32 bit GOT offset for LDM +R_68K_TLS_LDM16 = 29 # 16 bit GOT offset for LDM +R_68K_TLS_LDM8 = 30 # 8 bit GOT offset for LDM +R_68K_TLS_LDO32 = 31 # 32 bit module-relative offset +R_68K_TLS_LDO16 = 32 # 16 bit module-relative offset +R_68K_TLS_LDO8 = 33 # 8 bit module-relative offset +R_68K_TLS_IE32 = 34 # 32 bit GOT offset for IE +R_68K_TLS_IE16 = 35 # 16 bit GOT offset for IE +R_68K_TLS_IE8 = 36 # 8 bit GOT offset for IE +R_68K_TLS_LE32 = 37 # 32 bit offset relative to static TLS block +R_68K_TLS_LE16 = 38 # 16 bit offset relative to static TLS block +R_68K_TLS_LE8 = 39 # 8 bit offset relative to static TLS block +R_68K_TLS_DTPMOD32 = 40 # 32 bit module number +R_68K_TLS_DTPREL32 = 41 # 32 bit module-relative offset +R_68K_TLS_TPREL32 = 42 # 32 bit TP-relative offset +# Keep this the last entry. +R_68K_NUM = 43 + +# Intel 80386 relocations + +R_386_NONE = 0 # No reloc +R_386_32 = 1 # Direct 32 bit +R_386_PC32 = 2 # PC relative 32 bit +R_386_GOT32 = 3 # 32 bit GOT entry +R_386_PLT32 = 4 # 32 bit PLT address +R_386_COPY = 5 # Copy symbol at runtime +R_386_GLOB_DAT = 6 # Create GOT entry +R_386_JMP_SLOT = 7 # Create PLT entry +R_386_RELATIVE = 8 # Adjust by program base +R_386_GOTOFF = 9 # 32 bit offset to GOT +R_386_GOTPC = 10 # 32 bit PC relative offset to GOT +R_386_32PLT = 11 +R_386_TLS_TPOFF = 14 # Offset in static TLS block +R_386_TLS_IE = 15 # Address of GOT entry for static TLS block offset +R_386_TLS_GOTIE = 16 # GOT entry for static TLS block offset +R_386_TLS_LE = 17 # Offset relative to static TLS block +R_386_TLS_GD = 18 # Direct 32 bit for GNU version of general dynamic thread local data +R_386_TLS_LDM = 19 # Direct 32 bit for GNU version of local dynamic thread local data in LE code +R_386_16 = 20 +R_386_PC16 = 21 +R_386_8 = 22 +R_386_PC8 = 23 +R_386_TLS_GD_32 = 24 # Direct 32 bit for general dynamic thread local data +R_386_TLS_GD_PUSH = 25 # Tag for pushl in GD TLS code +R_386_TLS_GD_CALL = 26 # Relocation for call to __tls_get_addr() +R_386_TLS_GD_POP = 27 # Tag for popl in GD TLS code +R_386_TLS_LDM_32 = 28 # Direct 32 bit for local dynamic thread local data in LE code +R_386_TLS_LDM_PUSH = 29 # Tag for pushl in LDM TLS code +R_386_TLS_LDM_CALL = 30 # Relocation for call to __tls_get_addr() in LDM code +R_386_TLS_LDM_POP = 31 # Tag for popl in LDM TLS code +R_386_TLS_LDO_32 = 32 # Offset relative to TLS block +R_386_TLS_IE_32 = 33 # GOT entry for negated static TLS block offset +R_386_TLS_LE_32 = 34 # Negated offset relative to static TLS block +R_386_TLS_DTPMOD32 = 35 # ID of module containing symbol +R_386_TLS_DTPOFF32 = 36 # Offset in TLS block +R_386_TLS_TPOFF32 = 37 # Negated offset in static TLS block +# 38? +R_386_TLS_GOTDESC = 39 # GOT offset for TLS descriptor. +R_386_TLS_DESC_CALL = 40 # Marker of call through TLS descriptor for relaxation. +R_386_TLS_DESC = 41 # TLS descriptor containing pointer to code and to argument, returning the TLS offset for the symbol. +R_386_IRELATIVE = 42 # Adjust indirectly by program base +# Keep this the last entry. +R_386_NUM = 43 + +# SUN SPARC relocations + +R_SPARC_NONE = 0 # No reloc +R_SPARC_8 = 1 # Direct 8 bit +R_SPARC_16 = 2 # Direct 16 bit +R_SPARC_32 = 3 # Direct 32 bit +R_SPARC_DISP8 = 4 # PC relative 8 bit +R_SPARC_DISP16 = 5 # PC relative 16 bit +R_SPARC_DISP32 = 6 # PC relative 32 bit +R_SPARC_WDISP30 = 7 # PC relative 30 bit shifted +R_SPARC_WDISP22 = 8 # PC relative 22 bit shifted +R_SPARC_HI22 = 9 # High 22 bit +R_SPARC_22 = 10 # Direct 22 bit +R_SPARC_13 = 11 # Direct 13 bit +R_SPARC_LO10 = 12 # Truncated 10 bit +R_SPARC_GOT10 = 13 # Truncated 10 bit GOT entry +R_SPARC_GOT13 = 14 # 13 bit GOT entry +R_SPARC_GOT22 = 15 # 22 bit GOT entry shifted +R_SPARC_PC10 = 16 # PC relative 10 bit truncated +R_SPARC_PC22 = 17 # PC relative 22 bit shifted +R_SPARC_WPLT30 = 18 # 30 bit PC relative PLT address +R_SPARC_COPY = 19 # Copy symbol at runtime +R_SPARC_GLOB_DAT = 20 # Create GOT entry +R_SPARC_JMP_SLOT = 21 # Create PLT entry +R_SPARC_RELATIVE = 22 # Adjust by program base +R_SPARC_UA32 = 23 # Direct 32 bit unaligned + +# Additional Sparc64 relocs. + +R_SPARC_PLT32 = 24 # Direct 32 bit ref to PLT entry +R_SPARC_HIPLT22 = 25 # High 22 bit PLT entry +R_SPARC_LOPLT10 = 26 # Truncated 10 bit PLT entry +R_SPARC_PCPLT32 = 27 # PC rel 32 bit ref to PLT entry +R_SPARC_PCPLT22 = 28 # PC rel high 22 bit PLT entry +R_SPARC_PCPLT10 = 29 # PC rel trunc 10 bit PLT entry +R_SPARC_10 = 30 # Direct 10 bit +R_SPARC_11 = 31 # Direct 11 bit +R_SPARC_64 = 32 # Direct 64 bit +R_SPARC_OLO10 = 33 # 10bit with secondary 13bit addend +R_SPARC_HH22 = 34 # Top 22 bits of direct 64 bit +R_SPARC_HM10 = 35 # High middle 10 bits of ... +R_SPARC_LM22 = 36 # Low middle 22 bits of ... +R_SPARC_PC_HH22 = 37 # Top 22 bits of pc rel 64 bit +R_SPARC_PC_HM10 = 38 # High middle 10 bit of ... +R_SPARC_PC_LM22 = 39 # Low miggle 22 bits of ... +R_SPARC_WDISP16 = 40 # PC relative 16 bit shifted +R_SPARC_WDISP19 = 41 # PC relative 19 bit shifted +R_SPARC_GLOB_JMP = 42 # was part of v9 ABI but was removed +R_SPARC_7 = 43 # Direct 7 bit +R_SPARC_5 = 44 # Direct 5 bit +R_SPARC_6 = 45 # Direct 6 bit +R_SPARC_DISP64 = 46 # PC relative 64 bit +R_SPARC_PLT64 = 47 # Direct 64 bit ref to PLT entry +R_SPARC_HIX22 = 48 # High 22 bit complemented +R_SPARC_LOX10 = 49 # Truncated 11 bit complemented +R_SPARC_H44 = 50 # Direct high 12 of 44 bit +R_SPARC_M44 = 51 # Direct mid 22 of 44 bit +R_SPARC_L44 = 52 # Direct low 10 of 44 bit +R_SPARC_REGISTER = 53 # Global register usage +R_SPARC_UA64 = 54 # Direct 64 bit unaligned +R_SPARC_UA16 = 55 # Direct 16 bit unaligned +R_SPARC_TLS_GD_HI22 = 56 +R_SPARC_TLS_GD_LO10 = 57 +R_SPARC_TLS_GD_ADD = 58 +R_SPARC_TLS_GD_CALL = 59 +R_SPARC_TLS_LDM_HI22 = 60 +R_SPARC_TLS_LDM_LO10 = 61 +R_SPARC_TLS_LDM_ADD = 62 +R_SPARC_TLS_LDM_CALL = 63 +R_SPARC_TLS_LDO_HIX22 = 64 +R_SPARC_TLS_LDO_LOX10 = 65 +R_SPARC_TLS_LDO_ADD = 66 +R_SPARC_TLS_IE_HI22 = 67 +R_SPARC_TLS_IE_LO10 = 68 +R_SPARC_TLS_IE_LD = 69 +R_SPARC_TLS_IE_LDX = 70 +R_SPARC_TLS_IE_ADD = 71 +R_SPARC_TLS_LE_HIX22 = 72 +R_SPARC_TLS_LE_LOX10 = 73 +R_SPARC_TLS_DTPMOD32 = 74 +R_SPARC_TLS_DTPMOD64 = 75 +R_SPARC_TLS_DTPOFF32 = 76 +R_SPARC_TLS_DTPOFF64 = 77 +R_SPARC_TLS_TPOFF32 = 78 +R_SPARC_TLS_TPOFF64 = 79 +R_SPARC_GOTDATA_HIX22 = 80 +R_SPARC_GOTDATA_LOX10 = 81 +R_SPARC_GOTDATA_OP_HIX22 = 82 +R_SPARC_GOTDATA_OP_LOX10 = 83 +R_SPARC_GOTDATA_OP = 84 +R_SPARC_H34 = 85 +R_SPARC_SIZE32 = 86 +R_SPARC_SIZE64 = 87 +R_SPARC_JMP_IREL = 248 +R_SPARC_IRELATIVE = 249 +R_SPARC_GNU_VTINHERIT = 250 +R_SPARC_GNU_VTENTRY = 251 +R_SPARC_REV32 = 252 +# Keep this the last entry. +R_SPARC_NUM = 253 + +# MIPS R3000 relocations + +R_MIPS_NONE = 0 # No reloc +R_MIPS_16 = 1 # Direct 16 bit +R_MIPS_32 = 2 # Direct 32 bit +R_MIPS_REL32 = 3 # PC relative 32 bit +R_MIPS_26 = 4 # Direct 26 bit shifted +R_MIPS_HI16 = 5 # High 16 bit +R_MIPS_LO16 = 6 # Low 16 bit +R_MIPS_GPREL16 = 7 # GP relative 16 bit +R_MIPS_LITERAL = 8 # 16 bit literal entry +R_MIPS_GOT16 = 9 # 16 bit GOT entry +R_MIPS_PC16 = 10 # PC relative 16 bit +R_MIPS_CALL16 = 11 # 16 bit GOT entry for function +R_MIPS_GPREL32 = 12 # GP relative 32 bit + +R_MIPS_SHIFT5 = 16 +R_MIPS_SHIFT6 = 17 +R_MIPS_64 = 18 +R_MIPS_GOT_DISP = 19 +R_MIPS_GOT_PAGE = 20 +R_MIPS_GOT_OFST = 21 +R_MIPS_GOT_HI16 = 22 +R_MIPS_GOT_LO16 = 23 +R_MIPS_SUB = 24 +R_MIPS_INSERT_A = 25 +R_MIPS_INSERT_B = 26 +R_MIPS_DELETE = 27 +R_MIPS_HIGHER = 28 +R_MIPS_HIGHEST = 29 +R_MIPS_CALL_HI16 = 30 +R_MIPS_CALL_LO16 = 31 +R_MIPS_SCN_DISP = 32 +R_MIPS_REL16 = 33 +R_MIPS_ADD_IMMEDIATE = 34 +R_MIPS_PJUMP = 35 +R_MIPS_RELGOT = 36 +R_MIPS_JALR = 37 +R_MIPS_TLS_DTPMOD32 = 38 # Module number 32 bit +R_MIPS_TLS_DTPREL32 = 39 # Module-relative offset 32 bit +R_MIPS_TLS_DTPMOD64 = 40 # Module number 64 bit +R_MIPS_TLS_DTPREL64 = 41 # Module-relative offset 64 bit +R_MIPS_TLS_GD = 42 # 16 bit GOT offset for GD +R_MIPS_TLS_LDM = 43 # 16 bit GOT offset for LDM +R_MIPS_TLS_DTPREL_HI16 = 44 # Module-relative offset, high 16 bits +R_MIPS_TLS_DTPREL_LO16 = 45 # Module-relative offset, low 16 bits +R_MIPS_TLS_GOTTPREL = 46 # 16 bit GOT offset for IE +R_MIPS_TLS_TPREL32 = 47 # TP-relative offset, 32 bit +R_MIPS_TLS_TPREL64 = 48 # TP-relative offset, 64 bit +R_MIPS_TLS_TPREL_HI16 = 49 # TP-relative offset, high 16 bits +R_MIPS_TLS_TPREL_LO16 = 50 # TP-relative offset, low 16 bits +R_MIPS_GLOB_DAT = 51 +R_MIPS_COPY = 126 +R_MIPS_JUMP_SLOT = 127 +# Keep this the last entry. +R_MIPS_NUM = 128 + +# HPPA relocations + +R_PARISC_NONE = 0 # No reloc. +R_PARISC_DIR32 = 1 # Direct 32-bit reference. +R_PARISC_DIR21L = 2 # Left 21 bits of eff. address. +R_PARISC_DIR17R = 3 # Right 17 bits of eff. address. +R_PARISC_DIR17F = 4 # 17 bits of eff. address. +R_PARISC_DIR14R = 6 # Right 14 bits of eff. address. +R_PARISC_PCREL32 = 9 # 32-bit rel. address. +R_PARISC_PCREL21L = 10 # Left 21 bits of rel. address. +R_PARISC_PCREL17R = 11 # Right 17 bits of rel. address. +R_PARISC_PCREL17F = 12 # 17 bits of rel. address. +R_PARISC_PCREL14R = 14 # Right 14 bits of rel. address. +R_PARISC_DPREL21L = 18 # Left 21 bits of rel. address. +R_PARISC_DPREL14R = 22 # Right 14 bits of rel. address. +R_PARISC_GPREL21L = 26 # GP-relative, left 21 bits. +R_PARISC_GPREL14R = 30 # GP-relative, right 14 bits. +R_PARISC_LTOFF21L = 34 # LT-relative, left 21 bits. +R_PARISC_LTOFF14R = 38 # LT-relative, right 14 bits. +R_PARISC_SECREL32 = 41 # 32 bits section rel. address. +R_PARISC_SEGBASE = 48 # No relocation, set segment base. +R_PARISC_SEGREL32 = 49 # 32 bits segment rel. address. +R_PARISC_PLTOFF21L = 50 # PLT rel. address, left 21 bits. +R_PARISC_PLTOFF14R = 54 # PLT rel. address, right 14 bits. +R_PARISC_LTOFF_FPTR32 = 57 # 32 bits LT-rel. function pointer. +R_PARISC_LTOFF_FPTR21L = 58 # LT-rel. fct ptr, left 21 bits. +R_PARISC_LTOFF_FPTR14R = 62 # LT-rel. fct ptr, right 14 bits. +R_PARISC_FPTR64 = 64 # 64 bits function address. +R_PARISC_PLABEL32 = 65 # 32 bits function address. +R_PARISC_PLABEL21L = 66 # Left 21 bits of fdesc address. +R_PARISC_PLABEL14R = 70 # Right 14 bits of fdesc address. +R_PARISC_PCREL64 = 72 # 64 bits PC-rel. address. +R_PARISC_PCREL22F = 74 # 22 bits PC-rel. address. +R_PARISC_PCREL14WR = 75 # PC-rel. address, right 14 bits. +R_PARISC_PCREL14DR = 76 # PC rel. address, right 14 bits. +R_PARISC_PCREL16F = 77 # 16 bits PC-rel. address. +R_PARISC_PCREL16WF = 78 # 16 bits PC-rel. address. +R_PARISC_PCREL16DF = 79 # 16 bits PC-rel. address. +R_PARISC_DIR64 = 80 # 64 bits of eff. address. +R_PARISC_DIR14WR = 83 # 14 bits of eff. address. +R_PARISC_DIR14DR = 84 # 14 bits of eff. address. +R_PARISC_DIR16F = 85 # 16 bits of eff. address. +R_PARISC_DIR16WF = 86 # 16 bits of eff. address. +R_PARISC_DIR16DF = 87 # 16 bits of eff. address. +R_PARISC_GPREL64 = 88 # 64 bits of GP-rel. address. +R_PARISC_GPREL14WR = 91 # GP-rel. address, right 14 bits. +R_PARISC_GPREL14DR = 92 # GP-rel. address, right 14 bits. +R_PARISC_GPREL16F = 93 # 16 bits GP-rel. address. +R_PARISC_GPREL16WF = 94 # 16 bits GP-rel. address. +R_PARISC_GPREL16DF = 95 # 16 bits GP-rel. address. +R_PARISC_LTOFF64 = 96 # 64 bits LT-rel. address. +R_PARISC_LTOFF14WR = 99 # LT-rel. address, right 14 bits. +R_PARISC_LTOFF14DR = 100 # LT-rel. address, right 14 bits. +R_PARISC_LTOFF16F = 101 # 16 bits LT-rel. address. +R_PARISC_LTOFF16WF = 102 # 16 bits LT-rel. address. +R_PARISC_LTOFF16DF = 103 # 16 bits LT-rel. address. +R_PARISC_SECREL64 = 104 # 64 bits section rel. address. +R_PARISC_SEGREL64 = 112 # 64 bits segment rel. address. +R_PARISC_PLTOFF14WR = 115 # PLT-rel. address, right 14 bits. +R_PARISC_PLTOFF14DR = 116 # PLT-rel. address, right 14 bits. +R_PARISC_PLTOFF16F = 117 # 16 bits LT-rel. address. +R_PARISC_PLTOFF16WF = 118 # 16 bits PLT-rel. address. +R_PARISC_PLTOFF16DF = 119 # 16 bits PLT-rel. address. +R_PARISC_LTOFF_FPTR64 = 120 # 64 bits LT-rel. function ptr. +R_PARISC_LTOFF_FPTR14WR = 123 # LT-rel. fct. ptr., right 14 bits. +R_PARISC_LTOFF_FPTR14DR = 124 # LT-rel. fct. ptr., right 14 bits. +R_PARISC_LTOFF_FPTR16F = 125 # 16 bits LT-rel. function ptr. +R_PARISC_LTOFF_FPTR16WF = 126 # 16 bits LT-rel. function ptr. +R_PARISC_LTOFF_FPTR16DF = 127 # 16 bits LT-rel. function ptr. +R_PARISC_LORESERVE = 128 +R_PARISC_COPY = 128 # Copy relocation. +R_PARISC_IPLT = 129 # Dynamic reloc, imported PLT +R_PARISC_EPLT = 130 # Dynamic reloc, exported PLT +R_PARISC_TPREL32 = 153 # 32 bits TP-rel. address. +R_PARISC_TPREL21L = 154 # TP-rel. address, left 21 bits. +R_PARISC_TPREL14R = 158 # TP-rel. address, right 14 bits. +R_PARISC_LTOFF_TP21L = 162 # LT-TP-rel. address, left 21 bits. +R_PARISC_LTOFF_TP14R = 166 # LT-TP-rel. address, right 14 bits.*/ +R_PARISC_LTOFF_TP14F = 167 # 14 bits LT-TP-rel. address. +R_PARISC_TPREL64 = 216 # 64 bits TP-rel. address. +R_PARISC_TPREL14WR = 219 # TP-rel. address, right 14 bits. +R_PARISC_TPREL14DR = 220 # TP-rel. address, right 14 bits. +R_PARISC_TPREL16F = 221 # 16 bits TP-rel. address. +R_PARISC_TPREL16WF = 222 # 16 bits TP-rel. address. +R_PARISC_TPREL16DF = 223 # 16 bits TP-rel. address. +R_PARISC_LTOFF_TP64 = 224 # 64 bits LT-TP-rel. address. +R_PARISC_LTOFF_TP14WR = 227 # LT-TP-rel. address, right 14 bits.*/ +R_PARISC_LTOFF_TP14DR = 228 # LT-TP-rel. address, right 14 bits.*/ +R_PARISC_LTOFF_TP16F = 229 # 16 bits LT-TP-rel. address. +R_PARISC_LTOFF_TP16WF = 230 # 16 bits LT-TP-rel. address. +R_PARISC_LTOFF_TP16DF = 231 # 16 bits LT-TP-rel. address. +R_PARISC_GNU_VTENTRY = 232 +R_PARISC_GNU_VTINHERIT = 233 +R_PARISC_TLS_GD21L = 234 # GD 21-bit left. +R_PARISC_TLS_GD14R = 235 # GD 14-bit right. +R_PARISC_TLS_GDCALL = 236 # GD call to __t_g_a. +R_PARISC_TLS_LDM21L = 237 # LD module 21-bit left. +R_PARISC_TLS_LDM14R = 238 # LD module 14-bit right. +R_PARISC_TLS_LDMCALL = 239 # LD module call to __t_g_a. +R_PARISC_TLS_LDO21L = 240 # LD offset 21-bit left. +R_PARISC_TLS_LDO14R = 241 # LD offset 14-bit right. +R_PARISC_TLS_DTPMOD32 = 242 # DTP module 32-bit. +R_PARISC_TLS_DTPMOD64 = 243 # DTP module 64-bit. +R_PARISC_TLS_DTPOFF32 = 244 # DTP offset 32-bit. +R_PARISC_TLS_DTPOFF64 = 245 # DTP offset 32-bit. +R_PARISC_TLS_LE21L = R_PARISC_TPREL21L +R_PARISC_TLS_LE14R = R_PARISC_TPREL14R +R_PARISC_TLS_IE21L = R_PARISC_LTOFF_TP21L +R_PARISC_TLS_IE14R = R_PARISC_LTOFF_TP14R +R_PARISC_TLS_TPREL32 = R_PARISC_TPREL32 +R_PARISC_TLS_TPREL64 = R_PARISC_TPREL64 +R_PARISC_HIRESERVE = 255 + +# Alpha relocations + +R_ALPHA_NONE = 0 # No reloc +R_ALPHA_REFLONG = 1 # Direct 32 bit +R_ALPHA_REFQUAD = 2 # Direct 64 bit +R_ALPHA_GPREL32 = 3 # GP relative 32 bit +R_ALPHA_LITERAL = 4 # GP relative 16 bit w/optimization +R_ALPHA_LITUSE = 5 # Optimization hint for LITERAL +R_ALPHA_GPDISP = 6 # Add displacement to GP +R_ALPHA_BRADDR = 7 # PC+4 relative 23 bit shifted +R_ALPHA_HINT = 8 # PC+4 relative 16 bit shifted +R_ALPHA_SREL16 = 9 # PC relative 16 bit +R_ALPHA_SREL32 = 10 # PC relative 32 bit +R_ALPHA_SREL64 = 11 # PC relative 64 bit +R_ALPHA_GPRELHIGH = 17 # GP relative 32 bit, high 16 bits +R_ALPHA_GPRELLOW = 18 # GP relative 32 bit, low 16 bits +R_ALPHA_GPREL16 = 19 # GP relative 16 bit +R_ALPHA_COPY = 24 # Copy symbol at runtime +R_ALPHA_GLOB_DAT = 25 # Create GOT entry +R_ALPHA_JMP_SLOT = 26 # Create PLT entry +R_ALPHA_RELATIVE = 27 # Adjust by program base +R_ALPHA_TLS_GD_HI = 28 +R_ALPHA_TLSGD = 29 +R_ALPHA_TLS_LDM = 30 +R_ALPHA_DTPMOD64 = 31 +R_ALPHA_GOTDTPREL = 32 +R_ALPHA_DTPREL64 = 33 +R_ALPHA_DTPRELHI = 34 +R_ALPHA_DTPRELLO = 35 +R_ALPHA_DTPREL16 = 36 +R_ALPHA_GOTTPREL = 37 +R_ALPHA_TPREL64 = 38 +R_ALPHA_TPRELHI = 39 +R_ALPHA_TPRELLO = 40 +R_ALPHA_TPREL16 = 41 +# Keep this the last entry. +R_ALPHA_NUM = 46 + +# PowerPC relocations + +R_PPC_NONE = 0 +R_PPC_ADDR32 = 1 # 32bit absolute address +R_PPC_ADDR24 = 2 # 26bit address, 2 bits ignored. +R_PPC_ADDR16 = 3 # 16bit absolute address +R_PPC_ADDR16_LO = 4 # lower 16bit of absolute address +R_PPC_ADDR16_HI = 5 # high 16bit of absolute address +R_PPC_ADDR16_HA = 6 # adjusted high 16bit +R_PPC_ADDR14 = 7 # 16bit address, 2 bits ignored +R_PPC_ADDR14_BRTAKEN = 8 +R_PPC_ADDR14_BRNTAKEN = 9 +R_PPC_REL24 = 10 # PC relative 26 bit +R_PPC_REL14 = 11 # PC relative 16 bit +R_PPC_REL14_BRTAKEN = 12 +R_PPC_REL14_BRNTAKEN = 13 +R_PPC_GOT16 = 14 +R_PPC_GOT16_LO = 15 +R_PPC_GOT16_HI = 16 +R_PPC_GOT16_HA = 17 +R_PPC_PLTREL24 = 18 +R_PPC_COPY = 19 +R_PPC_GLOB_DAT = 20 +R_PPC_JMP_SLOT = 21 +R_PPC_RELATIVE = 22 +R_PPC_LOCAL24PC = 23 +R_PPC_UADDR32 = 24 +R_PPC_UADDR16 = 25 +R_PPC_REL32 = 26 +R_PPC_PLT32 = 27 +R_PPC_PLTREL32 = 28 +R_PPC_PLT16_LO = 29 +R_PPC_PLT16_HI = 30 +R_PPC_PLT16_HA = 31 +R_PPC_SDAREL16 = 32 +R_PPC_SECTOFF = 33 +R_PPC_SECTOFF_LO = 34 +R_PPC_SECTOFF_HI = 35 +R_PPC_SECTOFF_HA = 36 + +# PowerPC relocations defined for the TLS access ABI. +R_PPC_TLS = 67 # none (sym+add)@tls +R_PPC_DTPMOD32 = 68 # word32 (sym+add)@dtpmod +R_PPC_TPREL16 = 69 # half16* (sym+add)@tprel +R_PPC_TPREL16_LO = 70 # half16 (sym+add)@tprel@l +R_PPC_TPREL16_HI = 71 # half16 (sym+add)@tprel@h +R_PPC_TPREL16_HA = 72 # half16 (sym+add)@tprel@ha +R_PPC_TPREL32 = 73 # word32 (sym+add)@tprel +R_PPC_DTPREL16 = 74 # half16* (sym+add)@dtprel +R_PPC_DTPREL16_LO = 75 # half16 (sym+add)@dtprel@l +R_PPC_DTPREL16_HI = 76 # half16 (sym+add)@dtprel@h +R_PPC_DTPREL16_HA = 77 # half16 (sym+add)@dtprel@ha +R_PPC_DTPREL32 = 78 # word32 (sym+add)@dtprel +R_PPC_GOT_TLSGD16 = 79 # half16* (sym+add)@got@tlsgd +R_PPC_GOT_TLSGD16_LO = 80 # half16 (sym+add)@got@tlsgd@l +R_PPC_GOT_TLSGD16_HI = 81 # half16 (sym+add)@got@tlsgd@h +R_PPC_GOT_TLSGD16_HA = 82 # half16 (sym+add)@got@tlsgd@ha +R_PPC_GOT_TLSLD16 = 83 # half16* (sym+add)@got@tlsld +R_PPC_GOT_TLSLD16_LO = 84 # half16 (sym+add)@got@tlsld@l +R_PPC_GOT_TLSLD16_HI = 85 # half16 (sym+add)@got@tlsld@h +R_PPC_GOT_TLSLD16_HA = 86 # half16 (sym+add)@got@tlsld@ha +R_PPC_GOT_TPREL16 = 87 # half16* (sym+add)@got@tprel +R_PPC_GOT_TPREL16_LO = 88 # half16 (sym+add)@got@tprel@l +R_PPC_GOT_TPREL16_HI = 89 # half16 (sym+add)@got@tprel@h +R_PPC_GOT_TPREL16_HA = 90 # half16 (sym+add)@got@tprel@ha +R_PPC_GOT_DTPREL16 = 91 # half16* (sym+add)@got@dtprel +R_PPC_GOT_DTPREL16_LO = 92 # half16* (sym+add)@got@dtprel@l +R_PPC_GOT_DTPREL16_HI = 93 # half16* (sym+add)@got@dtprel@h +R_PPC_GOT_DTPREL16_HA = 94 # half16* (sym+add)@got@dtprel@ha + +# The remaining relocs are from the Embedded ELF ABI, and are not in the SVR4 ELF ABI. +R_PPC_EMB_NADDR32 = 101 +R_PPC_EMB_NADDR16 = 102 +R_PPC_EMB_NADDR16_LO = 103 +R_PPC_EMB_NADDR16_HI = 104 +R_PPC_EMB_NADDR16_HA = 105 +R_PPC_EMB_SDAI16 = 106 +R_PPC_EMB_SDA2I16 = 107 +R_PPC_EMB_SDA2REL = 108 +R_PPC_EMB_SDA21 = 109 # 16 bit offset in SDA +R_PPC_EMB_MRKREF = 110 +R_PPC_EMB_RELSEC16 = 111 +R_PPC_EMB_RELST_LO = 112 +R_PPC_EMB_RELST_HI = 113 +R_PPC_EMB_RELST_HA = 114 +R_PPC_EMB_BIT_FLD = 115 +R_PPC_EMB_RELSDA = 116 # 16 bit relative offset in SDA + +# Diab tool relocations. +R_PPC_DIAB_SDA21_LO = 180 # like EMB_SDA21, but lower 16 bit +R_PPC_DIAB_SDA21_HI = 181 # like EMB_SDA21, but high 16 bit +R_PPC_DIAB_SDA21_HA = 182 # like EMB_SDA21, adjusted high 16 +R_PPC_DIAB_RELSDA_LO = 183 # like EMB_RELSDA, but lower 16 bit +R_PPC_DIAB_RELSDA_HI = 184 # like EMB_RELSDA, but high 16 bit +R_PPC_DIAB_RELSDA_HA = 185 # like EMB_RELSDA, adjusted high 16 + +# GNU extension to support local ifunc. +R_PPC_IRELATIVE = 248 + +# GNU relocs used in PIC code sequences. +R_PPC_REL16 = 249 # half16 (sym+add-.) +R_PPC_REL16_LO = 250 # half16 (sym+add-.)@l +R_PPC_REL16_HI = 251 # half16 (sym+add-.)@h +R_PPC_REL16_HA = 252 # half16 (sym+add-.)@ha + +# This is a phony reloc to handle any old fashioned TOC16 references that may still be in object files. +R_PPC_TOC16 = 255 + +# PowerPC64 relocations defined by the ABIs +R_PPC64_NONE = R_PPC_NONE +R_PPC64_ADDR32 = R_PPC_ADDR32 # 32bit absolute address +R_PPC64_ADDR24 = R_PPC_ADDR24 # 26bit address, word aligned +R_PPC64_ADDR16 = R_PPC_ADDR16 # 16bit absolute address +R_PPC64_ADDR16_LO = R_PPC_ADDR16_LO # lower 16bits of address +R_PPC64_ADDR16_HI = R_PPC_ADDR16_HI # high 16bits of address. +R_PPC64_ADDR16_HA = R_PPC_ADDR16_HA # adjusted high 16bits. +R_PPC64_ADDR14 = R_PPC_ADDR14 # 16bit address, word aligned +R_PPC64_ADDR14_BRTAKEN = R_PPC_ADDR14_BRTAKEN +R_PPC64_ADDR14_BRNTAKEN = R_PPC_ADDR14_BRNTAKEN +R_PPC64_REL24 = R_PPC_REL24 # PC-rel. 26 bit, word aligned +R_PPC64_REL14 = R_PPC_REL14 # PC relative 16 bit +R_PPC64_REL14_BRTAKEN = R_PPC_REL14_BRTAKEN +R_PPC64_REL14_BRNTAKEN = R_PPC_REL14_BRNTAKEN +R_PPC64_GOT16 = R_PPC_GOT16 +R_PPC64_GOT16_LO = R_PPC_GOT16_LO +R_PPC64_GOT16_HI = R_PPC_GOT16_HI +R_PPC64_GOT16_HA = R_PPC_GOT16_HA + +R_PPC64_COPY = R_PPC_COPY +R_PPC64_GLOB_DAT = R_PPC_GLOB_DAT +R_PPC64_JMP_SLOT = R_PPC_JMP_SLOT +R_PPC64_RELATIVE = R_PPC_RELATIVE + +R_PPC64_UADDR32 = R_PPC_UADDR32 +R_PPC64_UADDR16 = R_PPC_UADDR16 +R_PPC64_REL32 = R_PPC_REL32 +R_PPC64_PLT32 = R_PPC_PLT32 +R_PPC64_PLTREL32 = R_PPC_PLTREL32 +R_PPC64_PLT16_LO = R_PPC_PLT16_LO +R_PPC64_PLT16_HI = R_PPC_PLT16_HI +R_PPC64_PLT16_HA = R_PPC_PLT16_HA + +R_PPC64_SECTOFF = R_PPC_SECTOFF +R_PPC64_SECTOFF_LO = R_PPC_SECTOFF_LO +R_PPC64_SECTOFF_HI = R_PPC_SECTOFF_HI +R_PPC64_SECTOFF_HA = R_PPC_SECTOFF_HA +R_PPC64_ADDR30 = 37 # word30 (S + A - P) >> 2 +R_PPC64_ADDR64 = 38 # doubleword64 S + A +R_PPC64_ADDR16_HIGHER = 39 # half16 #higher(S + A) +R_PPC64_ADDR16_HIGHERA = 40 # half16 #highera(S + A) +R_PPC64_ADDR16_HIGHEST = 41 # half16 #highest(S + A) +R_PPC64_ADDR16_HIGHESTA = 42 # half16 #highesta(S + A) +R_PPC64_UADDR64 = 43 # doubleword64 S + A +R_PPC64_REL64 = 44 # doubleword64 S + A - P +R_PPC64_PLT64 = 45 # doubleword64 L + A +R_PPC64_PLTREL64 = 46 # doubleword64 L + A - P +R_PPC64_TOC16 = 47 # half16* S + A - .TOC +R_PPC64_TOC16_LO = 48 # half16 #lo(S + A - .TOC.) +R_PPC64_TOC16_HI = 49 # half16 #hi(S + A - .TOC.) +R_PPC64_TOC16_HA = 50 # half16 #ha(S + A - .TOC.) +R_PPC64_TOC = 51 # doubleword64 .TOC +R_PPC64_PLTGOT16 = 52 # half16* M + A +R_PPC64_PLTGOT16_LO = 53 # half16 #lo(M + A) +R_PPC64_PLTGOT16_HI = 54 # half16 #hi(M + A) +R_PPC64_PLTGOT16_HA = 55 # half16 #ha(M + A) + +R_PPC64_ADDR16_DS = 56 # half16ds* (S + A) >> 2 +R_PPC64_ADDR16_LO_DS = 57 # half16ds #lo(S + A) >> 2 +R_PPC64_GOT16_DS = 58 # half16ds* (G + A) >> 2 +R_PPC64_GOT16_LO_DS = 59 # half16ds #lo(G + A) >> 2 +R_PPC64_PLT16_LO_DS = 60 # half16ds #lo(L + A) >> 2 +R_PPC64_SECTOFF_DS = 61 # half16ds* (R + A) >> 2 +R_PPC64_SECTOFF_LO_DS = 62 # half16ds #lo(R + A) >> 2 +R_PPC64_TOC16_DS = 63 # half16ds* (S + A - .TOC.) >> 2 +R_PPC64_TOC16_LO_DS = 64 # half16ds #lo(S + A - .TOC.) >> 2 +R_PPC64_PLTGOT16_DS = 65 # half16ds* (M + A) >> 2 +R_PPC64_PLTGOT16_LO_DS = 66 # half16ds #lo(M + A) >> 2 + +# PowerPC64 relocations defined for the TLS access ABI. +R_PPC64_TLS = 67 # none (sym+add)@tls +R_PPC64_DTPMOD64 = 68 # doubleword64 (sym+add)@dtpmod +R_PPC64_TPREL16 = 69 # half16* (sym+add)@tprel +R_PPC64_TPREL16_LO = 70 # half16 (sym+add)@tprel@l +R_PPC64_TPREL16_HI = 71 # half16 (sym+add)@tprel@h +R_PPC64_TPREL16_HA = 72 # half16 (sym+add)@tprel@ha +R_PPC64_TPREL64 = 73 # doubleword64 (sym+add)@tprel +R_PPC64_DTPREL16 = 74 # half16* (sym+add)@dtprel +R_PPC64_DTPREL16_LO = 75 # half16 (sym+add)@dtprel@l +R_PPC64_DTPREL16_HI = 76 # half16 (sym+add)@dtprel@h +R_PPC64_DTPREL16_HA = 77 # half16 (sym+add)@dtprel@ha +R_PPC64_DTPREL64 = 78 # doubleword64 (sym+add)@dtprel +R_PPC64_GOT_TLSGD16 = 79 # half16* (sym+add)@got@tlsgd +R_PPC64_GOT_TLSGD16_LO = 80 # half16 (sym+add)@got@tlsgd@l +R_PPC64_GOT_TLSGD16_HI = 81 # half16 (sym+add)@got@tlsgd@h +R_PPC64_GOT_TLSGD16_HA = 82 # half16 (sym+add)@got@tlsgd@ha +R_PPC64_GOT_TLSLD16 = 83 # half16* (sym+add)@got@tlsld +R_PPC64_GOT_TLSLD16_LO = 84 # half16 (sym+add)@got@tlsld@l +R_PPC64_GOT_TLSLD16_HI = 85 # half16 (sym+add)@got@tlsld@h +R_PPC64_GOT_TLSLD16_HA = 86 # half16 (sym+add)@got@tlsld@ha +R_PPC64_GOT_TPREL16_DS = 87 # half16ds* (sym+add)@got@tprel +R_PPC64_GOT_TPREL16_LO_DS = 88 # half16ds (sym+add)@got@tprel@l +R_PPC64_GOT_TPREL16_HI = 89 # half16 (sym+add)@got@tprel@h +R_PPC64_GOT_TPREL16_HA = 90 # half16 (sym+add)@got@tprel@ha +R_PPC64_GOT_DTPREL16_DS = 91 # half16ds* (sym+add)@got@dtprel +R_PPC64_GOT_DTPREL16_LO_DS = 92 # half16ds (sym+add)@got@dtprel@l +R_PPC64_GOT_DTPREL16_HI = 93 # half16 (sym+add)@got@dtprel@h +R_PPC64_GOT_DTPREL16_HA = 94 # half16 (sym+add)@got@dtprel@ha +R_PPC64_TPREL16_DS = 95 # half16ds* (sym+add)@tprel +R_PPC64_TPREL16_LO_DS = 96 # half16ds (sym+add)@tprel@l +R_PPC64_TPREL16_HIGHER = 97 # half16 (sym+add)@tprel@higher +R_PPC64_TPREL16_HIGHERA = 98 # half16 (sym+add)@tprel@highera +R_PPC64_TPREL16_HIGHEST = 99 # half16 (sym+add)@tprel@highest +R_PPC64_TPREL16_HIGHESTA = 100 # half16 (sym+add)@tprel@highesta +R_PPC64_DTPREL16_DS = 101 # half16ds* (sym+add)@dtprel +R_PPC64_DTPREL16_LO_DS = 102 # half16ds (sym+add)@dtprel@l +R_PPC64_DTPREL16_HIGHER = 103 # half16 (sym+add)@dtprel@higher +R_PPC64_DTPREL16_HIGHERA = 104 # half16 (sym+add)@dtprel@highera +R_PPC64_DTPREL16_HIGHEST = 105 # half16 (sym+add)@dtprel@highest +R_PPC64_DTPREL16_HIGHESTA = 106 # half16 (sym+add)@dtprel@highesta + +# GNU extension to support local ifunc. +R_PPC64_JMP_IREL = 247 +R_PPC64_IRELATIVE = 248 +R_PPC64_REL16 = 249 # half16 (sym+add-.) +R_PPC64_REL16_LO = 250 # half16 (sym+add-.)@l +R_PPC64_REL16_HI = 251 # half16 (sym+add-.)@h +R_PPC64_REL16_HA = 252 # half16 (sym+add-.)@ha + +# PowerPC64 specific values for the Dyn d_tag field. +DT_PPC64_GLINK = (DT_LOPROC + 0) +DT_PPC64_OPD = (DT_LOPROC + 1) +DT_PPC64_OPDSZ = (DT_LOPROC + 2) +DT_PPC64_NUM = 3 + +# ARM relocations + +R_ARM_NONE = 0 # No reloc +R_ARM_PC24 = 1 # PC relative 26 bit branch +R_ARM_ABS32 = 2 # Direct 32 bit +R_ARM_REL32 = 3 # PC relative 32 bit +R_ARM_PC13 = 4 +R_ARM_ABS16 = 5 # Direct 16 bit +R_ARM_ABS12 = 6 # Direct 12 bit +R_ARM_THM_ABS5 = 7 +R_ARM_ABS8 = 8 # Direct 8 bit +R_ARM_SBREL32 = 9 +R_ARM_THM_PC22 = 10 +R_ARM_THM_PC8 = 11 +R_ARM_AMP_VCALL9 = 12 +R_ARM_SWI24 = 13 # Obsolete static relocation. +R_ARM_TLS_DESC = 13 # Dynamic relocation. +R_ARM_THM_SWI8 = 14 +R_ARM_XPC25 = 15 +R_ARM_THM_XPC22 = 16 +R_ARM_TLS_DTPMOD32 = 17 # ID of module containing symbol +R_ARM_TLS_DTPOFF32 = 18 # Offset in TLS block +R_ARM_TLS_TPOFF32 = 19 # Offset in static TLS block +R_ARM_COPY = 20 # Copy symbol at runtime +R_ARM_GLOB_DAT = 21 # Create GOT entry +R_ARM_JUMP_SLOT = 22 # Create PLT entry +R_ARM_RELATIVE = 23 # Adjust by program base +R_ARM_GOTOFF = 24 # 32 bit offset to GOT +R_ARM_GOTPC = 25 # 32 bit PC relative offset to GOT +R_ARM_GOT32 = 26 # 32 bit GOT entry +R_ARM_PLT32 = 27 # 32 bit PLT address +R_ARM_ALU_PCREL_7_0 = 32 +R_ARM_ALU_PCREL_15_8 = 33 +R_ARM_ALU_PCREL_23_15 = 34 +R_ARM_LDR_SBREL_11_0 = 35 +R_ARM_ALU_SBREL_19_12 = 36 +R_ARM_ALU_SBREL_27_20 = 37 +R_ARM_TLS_GOTDESC = 90 +R_ARM_TLS_CALL = 91 +R_ARM_TLS_DESCSEQ = 92 +R_ARM_THM_TLS_CALL = 93 +R_ARM_GNU_VTENTRY = 100 +R_ARM_GNU_VTINHERIT = 101 +R_ARM_THM_PC11 = 102 # thumb unconditional branch +R_ARM_THM_PC9 = 103 # thumb conditional branch +R_ARM_TLS_GD32 = 104 # PC-rel 32 bit for global dynamic thread local data +R_ARM_TLS_LDM32 = 105 # PC-rel 32 bit for local dynamic thread local data +R_ARM_TLS_LDO32 = 106 # 32 bit offset relative to TLS block +R_ARM_TLS_IE32 = 107 # PC-rel 32 bit for GOT entry of static TLS block offset +R_ARM_TLS_LE32 = 108 # 32 bit offset relative to static TLS block +R_ARM_THM_TLS_DESCSEQ = 129 +R_ARM_IRELATIVE = 160 +R_ARM_RXPC25 = 249 +R_ARM_RSBREL32 = 250 +R_ARM_THM_RPC22 = 251 +R_ARM_RREL32 = 252 +R_ARM_RABS22 = 253 +R_ARM_RPC24 = 254 +R_ARM_RBASE = 255 +# Keep this the last entry. +R_ARM_NUM = 256 + +# IA-64 relocations + +R_IA64_NONE = 0x00 # none +R_IA64_IMM14 = 0x21 # symbol + addend, add imm14 +R_IA64_IMM22 = 0x22 # symbol + addend, add imm22 +R_IA64_IMM64 = 0x23 # symbol + addend, mov imm64 +R_IA64_DIR32MSB = 0x24 # symbol + addend, data4 MSB +R_IA64_DIR32LSB = 0x25 # symbol + addend, data4 LSB +R_IA64_DIR64MSB = 0x26 # symbol + addend, data8 MSB +R_IA64_DIR64LSB = 0x27 # symbol + addend, data8 LSB +R_IA64_GPREL22 = 0x2a # @gprel(sym + add), add imm22 +R_IA64_GPREL64I = 0x2b # @gprel(sym + add), mov imm64 +R_IA64_GPREL32MSB = 0x2c # @gprel(sym + add), data4 MSB +R_IA64_GPREL32LSB = 0x2d # @gprel(sym + add), data4 LSB +R_IA64_GPREL64MSB = 0x2e # @gprel(sym + add), data8 MSB +R_IA64_GPREL64LSB = 0x2f # @gprel(sym + add), data8 LSB +R_IA64_LTOFF22 = 0x32 # @ltoff(sym + add), add imm22 +R_IA64_LTOFF64I = 0x33 # @ltoff(sym + add), mov imm64 +R_IA64_PLTOFF22 = 0x3a # @pltoff(sym + add), add imm22 +R_IA64_PLTOFF64I = 0x3b # @pltoff(sym + add), mov imm64 +R_IA64_PLTOFF64MSB = 0x3e # @pltoff(sym + add), data8 MSB +R_IA64_PLTOFF64LSB = 0x3f # @pltoff(sym + add), data8 LSB +R_IA64_FPTR64I = 0x43 # @fptr(sym + add), mov imm64 +R_IA64_FPTR32MSB = 0x44 # @fptr(sym + add), data4 MSB +R_IA64_FPTR32LSB = 0x45 # @fptr(sym + add), data4 LSB +R_IA64_FPTR64MSB = 0x46 # @fptr(sym + add), data8 MSB +R_IA64_FPTR64LSB = 0x47 # @fptr(sym + add), data8 LSB +R_IA64_PCREL60B = 0x48 # @pcrel(sym + add), brl +R_IA64_PCREL21B = 0x49 # @pcrel(sym + add), ptb, call +R_IA64_PCREL21M = 0x4a # @pcrel(sym + add), chk.s +R_IA64_PCREL21F = 0x4b # @pcrel(sym + add), fchkf +R_IA64_PCREL32MSB = 0x4c # @pcrel(sym + add), data4 MSB +R_IA64_PCREL32LSB = 0x4d # @pcrel(sym + add), data4 LSB +R_IA64_PCREL64MSB = 0x4e # @pcrel(sym + add), data8 MSB +R_IA64_PCREL64LSB = 0x4f # @pcrel(sym + add), data8 LSB +R_IA64_LTOFF_FPTR22 = 0x52 # @ltoff(@fptr(s+a)), imm22 +R_IA64_LTOFF_FPTR64I = 0x53 # @ltoff(@fptr(s+a)), imm64 +R_IA64_LTOFF_FPTR32MSB = 0x54 # @ltoff(@fptr(s+a)), data4 MSB +R_IA64_LTOFF_FPTR32LSB = 0x55 # @ltoff(@fptr(s+a)), data4 LSB +R_IA64_LTOFF_FPTR64MSB = 0x56 # @ltoff(@fptr(s+a)), data8 MSB +R_IA64_LTOFF_FPTR64LSB = 0x57 # @ltoff(@fptr(s+a)), data8 LSB +R_IA64_SEGREL32MSB = 0x5c # @segrel(sym + add), data4 MSB +R_IA64_SEGREL32LSB = 0x5d # @segrel(sym + add), data4 LSB +R_IA64_SEGREL64MSB = 0x5e # @segrel(sym + add), data8 MSB +R_IA64_SEGREL64LSB = 0x5f # @segrel(sym + add), data8 LSB +R_IA64_SECREL32MSB = 0x64 # @secrel(sym + add), data4 MSB +R_IA64_SECREL32LSB = 0x65 # @secrel(sym + add), data4 LSB +R_IA64_SECREL64MSB = 0x66 # @secrel(sym + add), data8 MSB +R_IA64_SECREL64LSB = 0x67 # @secrel(sym + add), data8 LSB +R_IA64_REL32MSB = 0x6c # data 4 + REL +R_IA64_REL32LSB = 0x6d # data 4 + REL +R_IA64_REL64MSB = 0x6e # data 8 + REL +R_IA64_REL64LSB = 0x6f # data 8 + REL +R_IA64_LTV32MSB = 0x74 # symbol + addend, data4 MSB +R_IA64_LTV32LSB = 0x75 # symbol + addend, data4 LSB +R_IA64_LTV64MSB = 0x76 # symbol + addend, data8 MSB +R_IA64_LTV64LSB = 0x77 # symbol + addend, data8 LSB +R_IA64_PCREL21BI = 0x79 # @pcrel(sym + add), 21bit inst +R_IA64_PCREL22 = 0x7a # @pcrel(sym + add), 22bit inst +R_IA64_PCREL64I = 0x7b # @pcrel(sym + add), 64bit inst +R_IA64_IPLTMSB = 0x80 # dynamic reloc, imported PLT, MSB +R_IA64_IPLTLSB = 0x81 # dynamic reloc, imported PLT, LSB +R_IA64_COPY = 0x84 # copy relocation +R_IA64_SUB = 0x85 # Addend and symbol difference +R_IA64_LTOFF22X = 0x86 # LTOFF22, relaxable. +R_IA64_LDXMOV = 0x87 # Use of LTOFF22X. +R_IA64_TPREL14 = 0x91 # @tprel(sym + add), imm14 +R_IA64_TPREL22 = 0x92 # @tprel(sym + add), imm22 +R_IA64_TPREL64I = 0x93 # @tprel(sym + add), imm64 +R_IA64_TPREL64MSB = 0x96 # @tprel(sym + add), data8 MSB +R_IA64_TPREL64LSB = 0x97 # @tprel(sym + add), data8 LSB +R_IA64_LTOFF_TPREL22 = 0x9a # @ltoff(@tprel(s+a)), imm2 +R_IA64_DTPMOD64MSB = 0xa6 # @dtpmod(sym + add), data8 MSB +R_IA64_DTPMOD64LSB = 0xa7 # @dtpmod(sym + add), data8 LSB +R_IA64_LTOFF_DTPMOD22 = 0xaa # @ltoff(@dtpmod(sym + add)), imm22 +R_IA64_DTPREL14 = 0xb1 # @dtprel(sym + add), imm14 +R_IA64_DTPREL22 = 0xb2 # @dtprel(sym + add), imm22 +R_IA64_DTPREL64I = 0xb3 # @dtprel(sym + add), imm64 +R_IA64_DTPREL32MSB = 0xb4 # @dtprel(sym + add), data4 MSB +R_IA64_DTPREL32LSB = 0xb5 # @dtprel(sym + add), data4 LSB +R_IA64_DTPREL64MSB = 0xb6 # @dtprel(sym + add), data8 MSB +R_IA64_DTPREL64LSB = 0xb7 # @dtprel(sym + add), data8 LSB +R_IA64_LTOFF_DTPREL22 = 0xba # @ltoff(@dtprel(s+a)), imm22 + +# SH relocations + +R_SH_NONE = 0 +R_SH_DIR32 = 1 +R_SH_REL32 = 2 +R_SH_DIR8WPN = 3 +R_SH_IND12W = 4 +R_SH_DIR8WPL = 5 +R_SH_DIR8WPZ = 6 +R_SH_DIR8BP = 7 +R_SH_DIR8W = 8 +R_SH_DIR8L = 9 +R_SH_SWITCH16 = 25 +R_SH_SWITCH32 = 26 +R_SH_USES = 27 +R_SH_COUNT = 28 +R_SH_ALIGN = 29 +R_SH_CODE = 30 +R_SH_DATA = 31 +R_SH_LABEL = 32 +R_SH_SWITCH8 = 33 +R_SH_GNU_VTINHERIT = 34 +R_SH_GNU_VTENTRY = 35 +R_SH_TLS_GD_32 = 144 +R_SH_TLS_LD_32 = 145 +R_SH_TLS_LDO_32 = 146 +R_SH_TLS_IE_32 = 147 +R_SH_TLS_LE_32 = 148 +R_SH_TLS_DTPMOD32 = 149 +R_SH_TLS_DTPOFF32 = 150 +R_SH_TLS_TPOFF32 = 151 +R_SH_GOT32 = 160 +R_SH_PLT32 = 161 +R_SH_COPY = 162 +R_SH_GLOB_DAT = 163 +R_SH_JMP_SLOT = 164 +R_SH_RELATIVE = 165 +R_SH_GOTOFF = 166 +R_SH_GOTPC = 167 +# Keep this the last entry. +R_SH_NUM = 256 + +# S/390 relocations + +R_390_NONE = 0 # No reloc. +R_390_8 = 1 # Direct 8 bit. +R_390_12 = 2 # Direct 12 bit. +R_390_16 = 3 # Direct 16 bit. +R_390_32 = 4 # Direct 32 bit. +R_390_PC32 = 5 # PC relative 32 bit. +R_390_GOT12 = 6 # 12 bit GOT offset. +R_390_GOT32 = 7 # 32 bit GOT offset. +R_390_PLT32 = 8 # 32 bit PC relative PLT address. +R_390_COPY = 9 # Copy symbol at runtime. +R_390_GLOB_DAT = 10 # Create GOT entry. +R_390_JMP_SLOT = 11 # Create PLT entry. +R_390_RELATIVE = 12 # Adjust by program base. +R_390_GOTOFF32 = 13 # 32 bit offset to GOT. +R_390_GOTPC = 14 # 32 bit PC relative offset to GOT. +R_390_GOT16 = 15 # 16 bit GOT offset. +R_390_PC16 = 16 # PC relative 16 bit. +R_390_PC16DBL = 17 # PC relative 16 bit shifted by 1. +R_390_PLT16DBL = 18 # 16 bit PC rel. PLT shifted by 1. +R_390_PC32DBL = 19 # PC relative 32 bit shifted by 1. +R_390_PLT32DBL = 20 # 32 bit PC rel. PLT shifted by 1. +R_390_GOTPCDBL = 21 # 32 bit PC rel. GOT shifted by 1. +R_390_64 = 22 # Direct 64 bit. +R_390_PC64 = 23 # PC relative 64 bit. +R_390_GOT64 = 24 # 64 bit GOT offset. +R_390_PLT64 = 25 # 64 bit PC relative PLT address. +R_390_GOTENT = 26 # 32 bit PC rel. to GOT entry >> 1. +R_390_GOTOFF16 = 27 # 16 bit offset to GOT. +R_390_GOTOFF64 = 28 # 64 bit offset to GOT. +R_390_GOTPLT12 = 29 # 12 bit offset to jump slot. +R_390_GOTPLT16 = 30 # 16 bit offset to jump slot. +R_390_GOTPLT32 = 31 # 32 bit offset to jump slot. +R_390_GOTPLT64 = 32 # 64 bit offset to jump slot. +R_390_GOTPLTENT = 33 # 32 bit rel. offset to jump slot. +R_390_PLTOFF16 = 34 # 16 bit offset from GOT to PLT. +R_390_PLTOFF32 = 35 # 32 bit offset from GOT to PLT. +R_390_PLTOFF64 = 36 # 16 bit offset from GOT to PLT. +R_390_TLS_LOAD = 37 # Tag for load insn in TLS code. +R_390_TLS_GDCALL = 38 # Tag for function call in general dynamic TLS code. +R_390_TLS_LDCALL = 39 # Tag for function call in local dynamic TLS code. +R_390_TLS_GD32 = 40 # Direct 32 bit for general dynamic thread local data. +R_390_TLS_GD64 = 41 # Direct 64 bit for general dynamic thread local data. +R_390_TLS_GOTIE12 = 42 # 12 bit GOT offset for static TLS block offset. +R_390_TLS_GOTIE32 = 43 # 32 bit GOT offset for static TLS block offset. +R_390_TLS_GOTIE64 = 44 # 64 bit GOT offset for static TLS block offset. +R_390_TLS_LDM32 = 45 # Direct 32 bit for local dynamic thread local data in LE code. +R_390_TLS_LDM64 = 46 # Direct 64 bit for local dynamic thread local data in LE code. +R_390_TLS_IE32 = 47 # 32 bit address of GOT entry for negated static TLS block offset. +R_390_TLS_IE64 = 48 # 64 bit address of GOT entry for negated static TLS block offset. +R_390_TLS_IEENT = 49 # 32 bit rel. offset to GOT entry for negated static TLS block offset. +R_390_TLS_LE32 = 50 # 32 bit negated offset relative to static TLS block. +R_390_TLS_LE64 = 51 # 64 bit negated offset relative to static TLS block. +R_390_TLS_LDO32 = 52 # 32 bit offset relative to TLS block. +R_390_TLS_LDO64 = 53 # 64 bit offset relative to TLS block. +R_390_TLS_DTPMOD = 54 # ID of module containing symbol. +R_390_TLS_DTPOFF = 55 # Offset in TLS block. +R_390_TLS_TPOFF = 56 # Negated offset in static TLS block. +R_390_20 = 57 # Direct 20 bit. +R_390_GOT20 = 58 # 20 bit GOT offset. +R_390_GOTPLT20 = 59 # 20 bit offset to jump slot. +R_390_TLS_GOTIE20 = 60 # 20 bit GOT offset for static TLS block offset. +# Keep this the last entry. +R_390_NUM = 61 + + +# CRIS relocations. +R_CRIS_NONE = 0 +R_CRIS_8 = 1 +R_CRIS_16 = 2 +R_CRIS_32 = 3 +R_CRIS_8_PCREL = 4 +R_CRIS_16_PCREL = 5 +R_CRIS_32_PCREL = 6 +R_CRIS_GNU_VTINHERIT = 7 +R_CRIS_GNU_VTENTRY = 8 +R_CRIS_COPY = 9 +R_CRIS_GLOB_DAT = 10 +R_CRIS_JUMP_SLOT = 11 +R_CRIS_RELATIVE = 12 +R_CRIS_16_GOT = 13 +R_CRIS_32_GOT = 14 +R_CRIS_16_GOTPLT = 15 +R_CRIS_32_GOTPLT = 16 +R_CRIS_32_GOTREL = 17 +R_CRIS_32_PLT_GOTREL = 18 +R_CRIS_32_PLT_PCREL = 19 + +R_CRIS_NUM = 20 + + +# AMD x86-64 relocations. +R_X86_64_NONE = 0 # No reloc +R_X86_64_64 = 1 # Direct 64 bit +R_X86_64_PC32 = 2 # PC relative 32 bit signed +R_X86_64_GOT32 = 3 # 32 bit GOT entry +R_X86_64_PLT32 = 4 # 32 bit PLT address +R_X86_64_COPY = 5 # Copy symbol at runtime +R_X86_64_GLOB_DAT = 6 # Create GOT entry +R_X86_64_JUMP_SLOT = 7 # Create PLT entry +R_X86_64_RELATIVE = 8 # Adjust by program base +R_X86_64_GOTPCREL = 9 # 32 bit signed PC relative offset to GOT +R_X86_64_32 = 10 # Direct 32 bit zero extended +R_X86_64_32S = 11 # Direct 32 bit sign extended +R_X86_64_16 = 12 # Direct 16 bit zero extended +R_X86_64_PC16 = 13 # 16 bit sign extended pc relative +R_X86_64_8 = 14 # Direct 8 bit sign extended +R_X86_64_PC8 = 15 # 8 bit sign extended pc relative +R_X86_64_DTPMOD64 = 16 # ID of module containing symbol +R_X86_64_DTPOFF64 = 17 # Offset in module's TLS block +R_X86_64_TPOFF64 = 18 # Offset in initial TLS block +R_X86_64_TLSGD = 19 # 32 bit signed PC relative offset to two GOT entries for GD symbol +R_X86_64_TLSLD = 20 # 32 bit signed PC relative offset to two GOT entries for LD symbol +R_X86_64_DTPOFF32 = 21 # Offset in TLS block +R_X86_64_GOTTPOFF = 22 # 32 bit signed PC relative offset to GOT entry for IE symbol +R_X86_64_TPOFF32 = 23 # Offset in initial TLS block +R_X86_64_PC64 = 24 # PC relative 64 bit +R_X86_64_GOTOFF64 = 25 # 64 bit offset to GOT +R_X86_64_GOTPC32 = 26 # 32 bit signed pc relative offset to GOT +R_X86_64_GOT64 = 27 # 64-bit GOT entry offset +R_X86_64_GOTPCREL64 = 28 # 64-bit PC relative offset to GOT entry +R_X86_64_GOTPC64 = 29 # 64-bit PC relative offset to GOT +R_X86_64_GOTPLT64 = 30 # like GOT64, says PLT entry needed +R_X86_64_PLTOFF64 = 31 # 64-bit GOT relative offset to PLT entry +R_X86_64_SIZE32 = 32 # Size of symbol plus 32-bit addend +R_X86_64_SIZE64 = 33 # Size of symbol plus 64-bit addend +R_X86_64_GOTPC32_TLSDESC = 34 # GOT offset for TLS descriptor. +R_X86_64_TLSDESC_CALL = 35 # Marker for call through TLS descriptor. +R_X86_64_TLSDESC = 36 # TLS descriptor. +R_X86_64_IRELATIVE = 37 # Adjust indirectly by program base + +R_X86_64_NUM = 38 + + +# AM33 relocations. +R_MN10300_NONE = 0 # No reloc. +R_MN10300_32 = 1 # Direct 32 bit. +R_MN10300_16 = 2 # Direct 16 bit. +R_MN10300_8 = 3 # Direct 8 bit. +R_MN10300_PCREL32 = 4 # PC-relative 32-bit. +R_MN10300_PCREL16 = 5 # PC-relative 16-bit signed. +R_MN10300_PCREL8 = 6 # PC-relative 8-bit signed. +R_MN10300_GNU_VTINHERIT = 7 # Ancient C++ vtable garbage... +R_MN10300_GNU_VTENTRY = 8 # ... collection annotation. +R_MN10300_24 = 9 # Direct 24 bit. +R_MN10300_GOTPC32 = 10 # 32-bit PCrel offset to GOT. +R_MN10300_GOTPC16 = 11 # 16-bit PCrel offset to GOT. +R_MN10300_GOTOFF32 = 12 # 32-bit offset from GOT. +R_MN10300_GOTOFF24 = 13 # 24-bit offset from GOT. +R_MN10300_GOTOFF16 = 14 # 16-bit offset from GOT. +R_MN10300_PLT32 = 15 # 32-bit PCrel to PLT entry. +R_MN10300_PLT16 = 16 # 16-bit PCrel to PLT entry. +R_MN10300_GOT32 = 17 # 32-bit offset to GOT entry. +R_MN10300_GOT24 = 18 # 24-bit offset to GOT entry. +R_MN10300_GOT16 = 19 # 16-bit offset to GOT entry. +R_MN10300_COPY = 20 # Copy symbol at runtime. +R_MN10300_GLOB_DAT = 21 # Create GOT entry. +R_MN10300_JMP_SLOT = 22 # Create PLT entry. +R_MN10300_RELATIVE = 23 # Adjust by program base. + +R_MN10300_NUM = 24 + + +# M32R relocs. +R_M32R_NONE = 0 # No reloc. +R_M32R_16 = 1 # Direct 16 bit. +R_M32R_32 = 2 # Direct 32 bit. +R_M32R_24 = 3 # Direct 24 bit. +R_M32R_10_PCREL = 4 # PC relative 10 bit shifted. +R_M32R_18_PCREL = 5 # PC relative 18 bit shifted. +R_M32R_26_PCREL = 6 # PC relative 26 bit shifted. +R_M32R_HI16_ULO = 7 # High 16 bit with unsigned low. +R_M32R_HI16_SLO = 8 # High 16 bit with signed low. +R_M32R_LO16 = 9 # Low 16 bit. +R_M32R_SDA16 = 10 # 16 bit offset in SDA. +R_M32R_GNU_VTINHERIT = 11 +R_M32R_GNU_VTENTRY = 12 +# M32R relocs use SHT_RELA. +R_M32R_16_RELA = 33 # Direct 16 bit. +R_M32R_32_RELA = 34 # Direct 32 bit. +R_M32R_24_RELA = 35 # Direct 24 bit. +R_M32R_10_PCREL_RELA = 36 # PC relative 10 bit shifted. +R_M32R_18_PCREL_RELA = 37 # PC relative 18 bit shifted. +R_M32R_26_PCREL_RELA = 38 # PC relative 26 bit shifted. +R_M32R_HI16_ULO_RELA = 39 # High 16 bit with unsigned low +R_M32R_HI16_SLO_RELA = 40 # High 16 bit with signed low +R_M32R_LO16_RELA = 41 # Low 16 bit +R_M32R_SDA16_RELA = 42 # 16 bit offset in SDA +R_M32R_RELA_GNU_VTINHERIT = 43 +R_M32R_RELA_GNU_VTENTRY = 44 +R_M32R_REL32 = 45 # PC relative 32 bit. + +R_M32R_GOT24 = 48 # 24 bit GOT entry +R_M32R_26_PLTREL = 49 # 26 bit PC relative to PLT shifted +R_M32R_COPY = 50 # Copy symbol at runtime +R_M32R_GLOB_DAT = 51 # Create GOT entry +R_M32R_JMP_SLOT = 52 # Create PLT entry +R_M32R_RELATIVE = 53 # Adjust by program base +R_M32R_GOTOFF = 54 # 24 bit offset to GOT +R_M32R_GOTPC24 = 55 # 24 bit PC relative offset to GOT +R_M32R_GOT16_HI_ULO = 56 # High 16 bit GOT entry with unsigned low +R_M32R_GOT16_HI_SLO = 57 # High 16 bit GOT entry with signed low +R_M32R_GOT16_LO = 58 # Low 16 bit GOT entry +R_M32R_GOTPC_HI_ULO = 59 # High 16 bit PC relative offset to GOT with unsigned low +R_M32R_GOTPC_HI_SLO = 60 # High 16 bit PC relative offset to GOT with signed low +R_M32R_GOTPC_LO = 61 # Low 16 bit PC relative offset to GOT +R_M32R_GOTOFF_HI_ULO = 62 # High 16 bit offset to GOT with unsigned low +R_M32R_GOTOFF_HI_SLO = 63 # High 16 bit offset to GOT with signed low +R_M32R_GOTOFF_LO = 64 # Low 16 bit offset to GOT +R_M32R_NUM = 256 # Keep this the last entry. diff --git a/miasm/elfesteem/elf_init.py b/miasm/elfesteem/elf_init.py new file mode 100644 index 00000000..14a37eb5 --- /dev/null +++ b/miasm/elfesteem/elf_init.py @@ -0,0 +1,878 @@ +#! /usr/bin/env python + +from __future__ import print_function +from builtins import range +import logging +import struct + +from future.utils import PY3, with_metaclass + +from miasm.core.utils import force_bytes +from miasm.elfesteem import cstruct +from miasm.elfesteem import elf +from miasm.elfesteem.strpatchwork import StrPatchwork + +log = logging.getLogger("elfparse") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.WARN) + + +def printable(string): + if isinstance(string, bytes): + return "".join( + c.decode() if b" " <= c < b"~" else "." + for c in (string[i:i+1] for i in range(len(string))) + ) + return string + + +class StructWrapper_metaclass(type): + + def __new__(cls, name, bases, dct): + wrapped = dct["wrapped"] + if wrapped is not None: # XXX: make dct lookup look into base classes + for fname, v in wrapped._fields: + dct[fname] = property(dct.pop("get_" + fname, + lambda self, fname=fname: getattr( + self.cstr, fname)), + dct.pop("set_" + fname, + lambda self, v, fname=fname: setattr( + self.cstr, fname, v)), + dct.pop("del_" + fname, None)) + return type.__new__(cls, name, bases, dct) + + +class StructWrapper(with_metaclass(StructWrapper_metaclass, object)): + + wrapped = None + + def __init__(self, parent, sex, size, *args, **kargs): + self.cstr = self.wrapped(sex, size, *args, **kargs) + self.parent = parent + + def __getitem__(self, item): + return getattr(self, item) + + def __repr__(self): + return "> 8].name + + def get_type(self): + return self.cstr.info & 0xff + + +class WRel64(StructWrapper): + wrapped = elf.Rel64 + wrapped._fields.append(("sym", "u32")) + wrapped._fields.append(("type", "u32")) + + def get_sym(self): + return self.parent.linksection.symtab[self.cstr.info >> 32].name + + def get_type(self): + return self.cstr.info & 0xffffffff + + +class WRela32(WRel32): + wrapped = elf.Rela32 + wrapped._fields.append(("sym", "u32")) + wrapped._fields.append(("type", "u08")) + + def get_sym(self): + return self.parent.linksection.symtab[self.cstr.info >> 8].name + + def get_type(self): + return self.cstr.info & 0xff + + +class WRela64(WRel64): + wrapped = elf.Rela64 + wrapped._fields.append(("sym", "u32")) + wrapped._fields.append(("type", "u32")) + + def get_sym(self): + return self.parent.linksection.symtab[self.cstr.info >> 32].name + + def get_type(self): + return self.cstr.info & 0xffffffff + + +class WShdr(StructWrapper): + wrapped = elf.Shdr + + def get_name(self): + return self.parent.parent._shstr.get_name(self.cstr.name) + + +class WDynamic(StructWrapper): + wrapped = elf.Dynamic + + def get_name(self): + if self.type == elf.DT_NEEDED: + return self.parent.linksection.get_name(self.cstr.name) + return self.cstr.name + + +class WPhdr(StructWrapper): + wrapped = elf.Phdr + + +class WPhdr64(StructWrapper): + wrapped = elf.Phdr64 + + +class WNhdr(StructWrapper): + wrapped = elf.Nhdr + + +class ContentManager(object): + + def __get__(self, owner, x): + if hasattr(owner, '_content'): + return owner._content + + def __set__(self, owner, new_content): + owner.resize(len(owner._content), len(new_content)) + owner._content = StrPatchwork(new_content) + owner.parse_content(owner.sex, owner.size) + + def __delete__(self, owner): + self.__set__(owner, None) + + +# Sections + +class Section_metaclass(type): + + def __new__(cls, name, bases, dct): + o = type.__new__(cls, name, bases, dct) + if name != "Section": + Section.register(o) + return o + + def register(cls, o): + if o.sht is not None: + cls.sectypes[o.sht] = o + + def __call__(cls, parent, sex, size, shstr=None): + sh = None + if shstr is not None: + sh = WShdr(None, sex, size, shstr) + if sh.type in Section.sectypes: + cls = Section.sectypes[sh.type] + i = cls.__new__(cls, cls.__name__, cls.__bases__, cls.__dict__) + if sh is not None: + sh.parent = i + i.__init__(parent, sh) + return i + + +class Section(with_metaclass(Section_metaclass, object)): + + sectypes = {} + content = ContentManager() + + def resize(self, old, new): + self.sh.size += new - old + self.parent.resize(self, new - old) + if self.phparent: + self.phparent.resize(self, new - old) + + def parse_content(self, sex, size): + self.sex, self.size = sex, size + pass + + def get_linksection(self): + return self.parent[self.sh.link] + + def set_linksection(self, val): + if isinstance(val, Section): + val = self.parent.shlist.find(val) + if type(val) is int: + self.sh.link = val + linksection = property(get_linksection, set_linksection) + + def get_infosection(self): + # XXX info may not be in sh list ?!? + if not self.sh.info in self.parent: + return None + return self.parent[self.sh.info] + + def set_infosection(self, val): + if isinstance(val, Section): + val = self.parent.shlist.find(val) + if type(val) is int: + self.sh.info = val + infosection = property(get_infosection, set_infosection) + + def __init__(self, parent, sh=None): + self.parent = parent + self.phparent = None + self.sh = sh + self._content = b"" + + def __repr__(self): + r = "{%(name)s ofs=%(offset)#x sz=%(size)#x addr=%(addr)#010x}" % self.sh + return r + + +class NullSection(Section): + sht = elf.SHT_NULL + + def get_name(self, ofs): + # XXX check this + return b"" + + +class ProgBits(Section): + sht = elf.SHT_PROGBITS + + +class HashSection(Section): + sht = elf.SHT_HASH + + +class NoBitsSection(Section): + sht = elf.SHT_NOBITS + + +class ShLibSection(Section): + sht = elf.SHT_SHLIB + + +class InitArray(Section): + sht = elf.SHT_INIT_ARRAY + + +class FiniArray(Section): + sht = elf.SHT_FINI_ARRAY + + +class GroupSection(Section): + sht = elf.SHT_GROUP + + +class SymTabSHIndeces(Section): + sht = elf.SHT_SYMTAB_SHNDX + + +class GNUVerSym(Section): + sht = elf.SHT_GNU_versym + + +class GNUVerNeed(Section): + sht = elf.SHT_GNU_verneed + + +class GNUVerDef(Section): + sht = elf.SHT_GNU_verdef + + +class GNULibLIst(Section): + sht = elf.SHT_GNU_LIBLIST + + +class CheckSumSection(Section): + sht = elf.SHT_CHECKSUM + + +class NoteSection(Section): + sht = elf.SHT_NOTE + + def parse_content(self, sex, size): + self.sex, self.size = sex, size + c = self.content + hsz = 12 + self.notes = [] + # XXX: c may not be aligned? + while len(c) > hsz: + note = WNhdr(self, sex, size, c) + namesz, descsz = note.namesz, note.descsz + name = c[hsz:hsz + namesz] + desc = c[hsz + namesz:hsz + namesz + descsz] + c = c[hsz + namesz + descsz:] + self.notes.append((note.type, name, desc)) + + +class Dynamic(Section): + sht = elf.SHT_DYNAMIC + + def parse_content(self, sex, size): + self.sex, self.size = sex, size + c = self.content + self.dyntab = [] + self.dynamic = {} + sz = self.sh.entsize + idx = 0 + while len(c) > sz*idx: + s = c[sz*idx:sz*(idx+1)] + idx += 1 + dyn = WDynamic(self, sex, size, s) + self.dyntab.append(dyn) + if isinstance(dyn.name, str): + self[dyn.name] = dyn + + def __setitem__(self, item, value): + if isinstance(item, bytes): + self.dynamic[item] = value + return + if isinstance(item, str): + self.symbols[item.encode()] = value + return + self.dyntab[item] = value + + def __getitem__(self, item): + if isinstance(item, bytes): + return self.dynamic[item] + if isinstance(item, str): + return self.dynamic[item.encode()] + return self.dyntab[item] + + +class StrTable(Section): + sht = elf.SHT_STRTAB + + def parse_content(self, sex, size): + self.sex, self.size = sex, size + self.res = {} + c = self.content + q = 0 + index = 0 + l = len(c) + while index < l: + p = c.find(b"\x00", index) + if p < 0: + log.warning("Missing trailing 0 for string [%s]" % c) # XXX + p = len(c) - index + self.res[index] = c[index:p] + # print q, c[:p] + index = p + 1 + # q += p+1 + # c = c[p+1:] + + def get_name(self, ofs): + return self.content[ofs:self.content.find(b'\x00', start=ofs)] + + def add_name(self, name): + name = force_bytes(name) + name = name + b"\x00" + if name in self.content: + return self.content.find(name) + n = len(self.content) + self.content = bytes(self.content) + name + return n + + def mod_name(self, name, new_name): + s = bytes(self.content) + name_b = b'\x00%s\x00' % name.encode() + if not name_b in s: + raise ValueError('Unknown name %r' % name) + self.content = s.replace( + name_b, + b'\x00%s\x00' % new_name.encode() + ) + return len(self.content) + + +class SymTable(Section): + sht = elf.SHT_SYMTAB + + def parse_content(self, sex, size): + self.sex, self.size = sex, size + c = self.content + self.symtab = [] + self.symbols = {} + sz = self.sh.entsize + index = 0 + l = len(c) + if size == 32: + WSym = WSym32 + elif size == 64: + WSym = WSym64 + else: + ValueError('unknown size') + while index < l: + s = c[index:index + sz] + index += sz + sym = WSym(self, sex, size, s) + self.symtab.append(sym) + self[sym.name] = sym + + def __getitem__(self, item): + if isinstance(item, bytes): + return self.symbols[item] + if isinstance(item, str): + return self.symbols[item.encode()] + return self.symtab[item] + + def __setitem__(self, item, value): + if isinstance(item, bytes): + self.symbols[item] = value + return + if isinstance(item, str): + self.symbols[item.encode()] = value + return + self.symtab[item] = value + + +class DynSymTable(SymTable): + sht = elf.SHT_DYNSYM + + +class RelTable(Section): + sht = elf.SHT_REL + + def parse_content(self, sex, size): + self.sex, self.size = sex, size + if size == 32: + WRel = WRel32 + elif size == 64: + WRel = WRel64 + else: + ValueError('unknown size') + c = self.content + self.reltab = [] + self.rel = {} + sz = self.sh.entsize + + idx = 0 + while len(c) > sz*idx: + s = c[sz*idx:sz*(idx+1)] + idx += 1 + rel = WRel(self, sex, size, s) + self.reltab.append(rel) + if rel.parent.linksection != self.parent.shlist[0]: + self.rel[rel.sym] = rel + + +class RelATable(RelTable): + sht = elf.SHT_RELA + +# Section List + + +class SHList(object): + + def __init__(self, parent, sex, size): + self.parent = parent + self.shlist = [] + ehdr = self.parent.Ehdr + of1 = ehdr.shoff + if not of1: # No SH table + return + for i in range(ehdr.shnum): + of2 = of1 + ehdr.shentsize + shstr = parent[of1:of2] + self.shlist.append(Section(self, sex, size, shstr=shstr)) + of1 = of2 + self._shstr = self.shlist[ehdr.shstrndx] + + for s in self.shlist: + if not isinstance(s, NoBitsSection): + s._content = StrPatchwork( + parent[s.sh.offset: s.sh.offset + s.sh.size] + ) + # Follow dependencies when initializing sections + zero = self.shlist[0] + todo = self.shlist[1:] + done = [] + while todo: + s = todo.pop(0) + if ((s.linksection == zero or s.linksection in done) and + (s.infosection in [zero, None] or s.infosection in done)): + done.append(s) + s.parse_content(sex, size) + else: + todo.append(s) + for s in self.shlist: + self.do_add_section(s) + + def do_add_section(self, section): + n = section.sh.name + if n.startswith(b"."): + n = n[1:] + n = printable(n).replace(".", "_").replace("-", "_") + setattr(self, n, section) # xxx + + def append(self, item): + self.do_add_section(item) + self.shlist.append(item) + + def __getitem__(self, item): + return self.shlist[item] + + def __repr__(self): + rep = ["# section offset size addr flags"] + for i, s in enumerate(self.shlist): + l = "%(name)-15s %(offset)08x %(size)06x %(addr)08x %(flags)x " % s.sh + l = ("%2i " % i) + l + s.__class__.__name__ + rep.append(l) + return "\n".join(rep) + + def __bytes__(self): + return b"".join( + bytes(s.sh) for s in self.shlist + ) + + def __str__(self): + if PY3: + return repr(self) + return bytes(self) + + def resize(self, sec, diff): + for s in self.shlist: + if s.sh.offset > sec.sh.offset: + s.sh.offset += diff + if self.parent.Ehdr.shoff > sec.sh.offset: + self.parent.Ehdr.shoff += diff + if self.parent.Ehdr.phoff > sec.sh.offset: + self.parent.Ehdr.phoff += diff + +# Program Header List + + +class ProgramHeader(object): + + def __init__(self, parent, sex, size, phstr): + self.parent = parent + self.ph = WPhdr(self, sex, size, phstr) + self.shlist = [] + for s in self.parent.parent.sh: + if isinstance(s, NullSection): + continue + if ((isinstance(s, NoBitsSection) and s.sh.offset == self.ph.offset + self.ph.filesz) + or self.ph.offset <= s.sh.offset < self.ph.offset + self.ph.filesz): + s.phparent = self + self.shlist.append(s) + + def resize(self, sec, diff): + self.ph.filesz += diff + self.ph.memsz += diff + self.parent.resize(sec, diff) + + +class ProgramHeader64(object): + + def __init__(self, parent, sex, size, phstr): + self.parent = parent + self.ph = WPhdr64(self, sex, size, phstr) + self.shlist = [] + for s in self.parent.parent.sh: + if isinstance(s, NullSection): + continue + if ((isinstance(s, NoBitsSection) and s.sh.offset == self.ph.offset + self.ph.filesz) + or self.ph.offset <= s.sh.offset < self.ph.offset + self.ph.filesz): + s.phparent = self + self.shlist.append(s) + + def resize(self, sec, diff): + self.ph.filesz += diff + self.ph.memsz += diff + self.parent.resize(sec, diff) + + +class PHList(object): + + def __init__(self, parent, sex, size): + self.parent = parent + self.phlist = [] + ehdr = self.parent.Ehdr + of1 = ehdr.phoff + for i in range(ehdr.phnum): + of2 = of1 + ehdr.phentsize + phstr = parent[of1:of2] + if size == 32: + self.phlist.append(ProgramHeader(self, sex, size, phstr)) + else: + self.phlist.append(ProgramHeader64(self, sex, size, phstr)) + of1 = of2 + + def __getitem__(self, item): + return self.phlist[item] + + def __repr__(self): + r = [" offset filesz vaddr memsz"] + for i, p in enumerate(self.phlist): + l = "%(offset)07x %(filesz)06x %(vaddr)08x %(memsz)07x %(type)02x %(flags)01x" % p.ph + l = ("%2i " % i) + l + r.append(l) + r.append(" " + " ".join(printable(s.sh.name) for s in p.shlist)) + return "\n".join(r) + + def __bytes__(self): + return b"".join( + bytes(p.ph) for p in self.phlist + ) + + def __str__(self): + if PY3: + return repr(self) + return self.__bytes__(self) + + def resize(self, sec, diff): + for p in self.phlist: + if p.ph.offset > sec.sh.offset: + p.ph.offset += diff + if p.ph.vaddr > sec.phparent.ph.vaddr + sec.sh.offset: + p.ph.vaddr += diff + if p.ph.paddr > sec.phparent.ph.paddr + sec.sh.offset: + p.ph.paddr += diff + + +class virt(object): + + def __init__(self, x): + self.parent = x + + def get_rvaitem(self, start, stop=None): + if stop == None: + s = self.parent.getsectionbyvad(start) + if s: + start = start - s.sh.addr + else: + s = self.parent.getphbyvad(start) + if s: + start = start - s.ph.vaddr + if not s: + return [(None, start)] + return [(s, start)] + total_len = stop - start + + virt_item = [] + while total_len: + s = self.parent.getsectionbyvad(start) + if not s: + s = self.parent.getphbyvad(start) + if not s: + raise ValueError('unknown rva address! %x' % start) + if isinstance(s, ProgramHeader) or isinstance(s, ProgramHeader64): + s_max = s.ph.filesz + s_start = start - s.ph.vaddr + s_stop = stop - s.ph.vaddr + else: + s_max = s.sh.size + s_start = start - s.sh.addr + s_stop = stop - s.sh.addr + if s_stop > s_max: + s_stop = s_max + + s_len = s_stop - s_start + if s_len == 0: + raise ValueError('empty section! %x' % start) + total_len -= s_len + start += s_len + n_item = slice(s_start, s_stop) + virt_item.append((s, n_item)) + return virt_item + + def item2virtitem(self, item): + if not type(item) is slice: # integer + return self.get_rvaitem(item) + start = item.start + stop = item.stop + assert(item.step is None) + return self.get_rvaitem(start, stop) + + def get(self, ad_start, ad_stop=None): + rva_items = self.get_rvaitem(ad_start, ad_stop) + data_out = b"" + for s, n_item in rva_items: + if not (isinstance(s, ProgramHeader) or isinstance(s, ProgramHeader64)): + data_out += s.content.__getitem__(n_item) + continue + if not type(n_item) is slice: + n_item = slice(n_item, n_item + 1, 1) + start = n_item.start + s.ph.offset + stop = n_item.stop + s.ph.offset + if n_item.step != None: + step = n_item.step + s.ph.offset + else: + step = None + n_item = slice(start, stop, step) + # data_out += self.parent.content.__s.content.__getitem__(n_item) + data_out += self.parent.content.__getitem__(n_item) + + return data_out + + def set(self, item, data): + if not type(item) is slice: + item = slice(item, item + len(data), None) + virt_item = self.item2virtitem(item) + if not virt_item: + return + off = 0 + for s, n_item in virt_item: + if isinstance(s, ProgBits): + i = slice(off, n_item.stop + off - n_item.start, n_item.step) + + data_slice = data.__getitem__(i) + s.content.__setitem__(n_item, data_slice) + off = i.stop + else: + raise ValueError('TODO XXX') + + return + + def __getitem__(self, item): + if isinstance(item, slice): + assert(item.step is None) + return self.get(item.start, item.stop) + else: + return self.get(item) + + def __setitem__(self, item, data): + if isinstance(item, slice): + rva = item.start + else: + rva = item + self.set(rva, data) + + def max_addr(self): + # the maximum virtual address is found by retrieving the maximum + # possible virtual address, either from the program entries, and + # section entries. if there is no such object, raise an error. + l = 0 + if self.parent.ph.phlist: + for phdr in self.parent.ph.phlist: + l = max(l, phdr.ph.vaddr + phdr.ph.memsz) + if self.parent.sh.shlist: + for shdr in self.parent.sh.shlist: + l = max(l, shdr.sh.addr + shdr.sh.size) + if not l: + raise ValueError('maximum virtual address not found !') + return l + + def is_addr_in(self, ad): + return self.parent.is_in_virt_address(ad) + + def find(self, pattern, start=0): + sections = [] + offset = start + for s in self.parent.ph: + s_max = s.ph.memsz # max(s.ph.filesz, s.ph.memsz) + if offset < s.ph.vaddr + s_max: + sections.append(s) + + if not sections: + return -1 + offset -= sections[0].ph.vaddr + if offset < 0: + offset = 0 + for s in sections: + data = self.parent.content[s.ph.offset:s.ph.offset + s.ph.filesz] + ret = data.find(pattern, offset) + if ret != -1: + return ret + s.ph.vaddr # self.parent.rva2virt(s.addr + ret) + offset = 0 + return -1 + +# ELF object + + +class ELF(object): + + def __init__(self, elfstr): + self._content = elfstr + self.parse_content() + + self._virt = virt(self) + + def get_virt(self): + return self._virt + virt = property(get_virt) + + content = ContentManager() + + def parse_content(self): + h = self.content[:8] + self.size = struct.unpack('B', h[4:5])[0] * 32 + self.sex = struct.unpack('B', h[5:6])[0] + self.Ehdr = WEhdr(self, self.sex, self.size, self.content) + self.sh = SHList(self, self.sex, self.size) + self.ph = PHList(self, self.sex, self.size) + + def resize(self, old, new): + pass + + def __getitem__(self, item): + return self.content[item] + + def build_content(self): + c = StrPatchwork() + c[0] = bytes(self.Ehdr) + c[self.Ehdr.phoff] = bytes(self.ph) + for s in self.sh: + c[s.sh.offset] = bytes(s.content) + c[self.Ehdr.shoff] = bytes(self.sh) + return bytes(c) + + def __bytes__(self): + return self.build_content() + + def __str__(self): + if PY3: + return repr(self) + return bytes(self) + + def getphbyvad(self, ad): + for s in self.ph: + if s.ph.vaddr <= ad < s.ph.vaddr + s.ph.memsz: + return s + + def getsectionbyvad(self, ad): + for s in self.sh: + if s.sh.addr <= ad < s.sh.addr + s.sh.size: + return s + + def getsectionbyname(self, name): + name = force_bytes(name) + for s in self.sh: + try: + if s.sh.name.strip(b'\x00') == name: + return s + except UnicodeDecodeError: + pass + return None + + def is_in_virt_address(self, ad): + for s in self.sh: + if s.sh.addr <= ad < s.sh.addr + s.sh.size: + return True + return False diff --git a/miasm/elfesteem/minidump.py b/miasm/elfesteem/minidump.py new file mode 100644 index 00000000..ee2be8a0 --- /dev/null +++ b/miasm/elfesteem/minidump.py @@ -0,0 +1,545 @@ +"""Constants and structures associated to Minidump format +Based on: http://amnesia.gtisc.gatech.edu/~moyix/minidump.py +""" +from future.utils import viewitems + +from future.builtins import int as int_types +from miasm.elfesteem.new_cstruct import CStruct + +class Enumeration(object): + """Stand for an enumeration type""" + + def __init__(self, enum_info): + """enum_info: {name: value}""" + self._enum_info = enum_info + self._inv_info = dict((v, k) for k, v in viewitems(enum_info)) + + def __getitem__(self, key): + """Helper: assume that string is for key, integer is for value""" + if isinstance(key, int_types): + return self._inv_info[key] + return self._enum_info[key] + + def __getattr__(self, key): + if key in self._enum_info: + return self._enum_info[key] + raise AttributeError + + def from_value(self, value): + return self._inv_info[value] + + +class Rva(CStruct): + """Relative Virtual Address + Note: RVA in Minidump means "file offset" + """ + _fields = [("rva", "u32"), + ] + + +minidumpType = Enumeration({ + # MINIDUMP_TYPE + # https://msdn.microsoft.com/en-us/library/ms680519(v=vs.85).aspx + "MiniDumpNormal" : 0x00000000, + "MiniDumpWithDataSegs" : 0x00000001, + "MiniDumpWithFullMemory" : 0x00000002, + "MiniDumpWithHandleData" : 0x00000004, + "MiniDumpFilterMemory" : 0x00000008, + "MiniDumpScanMemory" : 0x00000010, + "MiniDumpWithUnloadedModules" : 0x00000020, + "MiniDumpWithIndirectlyReferencedMemory" : 0x00000040, + "MiniDumpFilterModulePaths" : 0x00000080, + "MiniDumpWithProcessThreadData" : 0x00000100, + "MiniDumpWithPrivateReadWriteMemory" : 0x00000200, + "MiniDumpWithoutOptionalData" : 0x00000400, + "MiniDumpWithFullMemoryInfo" : 0x00000800, + "MiniDumpWithThreadInfo" : 0x00001000, + "MiniDumpWithCodeSegs" : 0x00002000, + "MiniDumpWithoutAuxiliaryState" : 0x00004000, + "MiniDumpWithFullAuxiliaryState" : 0x00008000, + "MiniDumpWithPrivateWriteCopyMemory" : 0x00010000, + "MiniDumpIgnoreInaccessibleMemory" : 0x00020000, + "MiniDumpWithTokenInformation" : 0x00040000, + "MiniDumpWithModuleHeaders" : 0x00080000, + "MiniDumpFilterTriage" : 0x00100000, + "MiniDumpValidTypeFlags" : 0x001fffff, +}) + +class MinidumpHDR(CStruct): + """MINIDUMP_HEADER + https://msdn.microsoft.com/en-us/library/ms680378(VS.85).aspx + """ + _fields = [("Magic", "u32"), # MDMP + ("Version", "u16"), + ("ImplementationVersion", "u16"), + ("NumberOfStreams", "u32"), + ("StreamDirectoryRva", "Rva"), + ("Checksum", "u32"), + ("TimeDateStamp", "u32"), + ("Flags", "u32") + ] + +class LocationDescriptor(CStruct): + """MINIDUMP_LOCATION_DESCRIPTOR + https://msdn.microsoft.com/en-us/library/ms680383(v=vs.85).aspx + """ + _fields = [("DataSize", "u32"), + ("Rva", "Rva"), + ] + + +streamType = Enumeration({ + # MINIDUMP_STREAM_TYPE + # https://msdn.microsoft.com/en-us/library/ms680394(v=vs.85).aspx + "UnusedStream" : 0, + "ReservedStream0" : 1, + "ReservedStream1" : 2, + "ThreadListStream" : 3, + "ModuleListStream" : 4, + "MemoryListStream" : 5, + "ExceptionStream" : 6, + "SystemInfoStream" : 7, + "ThreadExListStream" : 8, + "Memory64ListStream" : 9, + "CommentStreamA" : 10, + "CommentStreamW" : 11, + "HandleDataStream" : 12, + "FunctionTableStream" : 13, + "UnloadedModuleListStream" : 14, + "MiscInfoStream" : 15, + "MemoryInfoListStream" : 16, + "ThreadInfoListStream" : 17, + "HandleOperationListStream" : 18, + "LastReservedStream" : 0xffff, +}) + +class StreamDirectory(CStruct): + """MINIDUMP_DIRECTORY + https://msdn.microsoft.com/en-us/library/ms680365(VS.85).aspx + """ + _fields = [("StreamType", "u32"), + ("Location", "LocationDescriptor"), + ] + + @property + def pretty_name(self): + return streamType[self.StreamType] + + +class FixedFileInfo(CStruct): + """VS_FIXEDFILEINFO + https://msdn.microsoft.com/en-us/library/ms646997(v=vs.85).aspx + """ + _fields = [("dwSignature", "u32"), + ("dwStrucVersion", "u32"), + ("dwFileVersionMS", "u32"), + ("dwFileVersionLS", "u32"), + ("dwProductVersionMS", "u32"), + ("dwProductVersionLS", "u32"), + ("dwFileFlagsMask", "u32"), + ("dwFileFlags", "u32"), + ("dwFileOS", "u32"), + ("dwFileType", "u32"), + ("dwFileSubtype", "u32"), + ("dwFileDateMS", "u32"), + ("dwFileDateLS", "u32"), + ] + +class MinidumpString(CStruct): + """MINIDUMP_STRING + https://msdn.microsoft.com/en-us/library/ms680395(v=vs.85).aspx + """ + _fields = [("Length", "u32"), + ("Buffer", "u08", lambda string:string.Length), + ] + +class Module(CStruct): + """MINIDUMP_MODULE + https://msdn.microsoft.com/en-us/library/ms680392(v=vs.85).aspx + """ + _fields = [("BaseOfImage", "u64"), + ("SizeOfImage", "u32"), + ("CheckSum", "u32"), + ("TimeDateStamp", "u32"), + ("ModuleNameRva", "Rva"), + ("VersionInfo", "FixedFileInfo"), + ("CvRecord", "LocationDescriptor"), + ("MiscRecord", "LocationDescriptor"), + ("Reserved0", "u64"), + ("Reserved1", "u64"), + ] + + +class ModuleList(CStruct): + """MINIDUMP_MODULE_LIST + https://msdn.microsoft.com/en-us/library/ms680391(v=vs.85).aspx + """ + _fields = [("NumberOfModules", "u32"), + ("Modules", "Module", lambda mlist:mlist.NumberOfModules), + ] + + +class MemoryDescriptor64(CStruct): + """MINIDUMP_MEMORY_DESCRIPTOR64 + https://msdn.microsoft.com/en-us/library/ms680384(v=vs.85).aspx + """ + _fields = [("StartOfMemoryRange", "u64"), + ("DataSize", "u64") + ] + + +class Memory64List(CStruct): + """MINIDUMP_MEMORY64_LIST + https://msdn.microsoft.com/en-us/library/ms680387(v=vs.85).aspx + """ + _fields = [("NumberOfMemoryRanges", "u64"), + ("BaseRva", "u64"), + ("MemoryRanges", "MemoryDescriptor64", + lambda mlist:mlist.NumberOfMemoryRanges), + ] + +class MemoryDescriptor(CStruct): + """MINIDUMP_MEMORY_DESCRIPTOR + https://msdn.microsoft.com/en-us/library/ms680384(v=vs.85).aspx + """ + _fields = [("StartOfMemoryRange", "u64"), + ("Memory", "LocationDescriptor"), + ] + +class MemoryList(CStruct): + """MINIDUMP_MEMORY_LIST + https://msdn.microsoft.com/en-us/library/ms680387(v=vs.85).aspx + """ + _fields = [("NumberOfMemoryRanges", "u32"), + ("MemoryRanges", "MemoryDescriptor", + lambda mlist:mlist.NumberOfMemoryRanges), + ] + +memProtect = Enumeration({ + # MEM PROTECT + # https://msdn.microsoft.com/en-us/library/aa366786(v=vs.85).aspx + "PAGE_NOACCESS" : 0x0001, + "PAGE_READONLY" : 0x0002, + "PAGE_READWRITE" : 0x0004, + "PAGE_WRITECOPY" : 0x0008, + "PAGE_EXECUTE" : 0x0010, + "PAGE_EXECUTE_READ" : 0x0020, + "PAGE_EXECUTE_READWRITE" : 0x0040, + "PAGE_EXECUTE_WRITECOPY" : 0x0080, + "PAGE_GUARD" : 0x0100, + "PAGE_NOCACHE" : 0x0200, + "PAGE_WRITECOMBINE" : 0x0400, +}) + +class MemoryInfo(CStruct): + """MINIDUMP_MEMORY_INFO + https://msdn.microsoft.com/en-us/library/ms680386(v=vs.85).aspx + """ + _fields = [("BaseAddress", "u64"), + ("AllocationBase", "u64"), + ("AllocationProtect", "u32"), + ("__alignment1", "u32"), + ("RegionSize", "u64"), + ("State", "u32"), + ("Protect", "u32"), + ("Type", "u32"), + ("__alignment2", "u32"), + ] + +class MemoryInfoList(CStruct): + """MINIDUMP_MEMORY_INFO_LIST + https://msdn.microsoft.com/en-us/library/ms680385(v=vs.85).aspx + """ + _fields = [("SizeOfHeader", "u32"), + ("SizeOfEntry", "u32"), + ("NumberOfEntries", "u64"), + # Fake field, for easy access to MemoryInfo elements + ("MemoryInfos", "MemoryInfo", + lambda mlist: mlist.NumberOfEntries), + ] + + +contextFlags_x86 = Enumeration({ + "CONTEXT_i386" : 0x00010000, + "CONTEXT_CONTROL" : 0x00010001, + "CONTEXT_INTEGER" : 0x00010002, + "CONTEXT_SEGMENTS" : 0x00010004, + "CONTEXT_FLOATING_POINT" : 0x00010008, + "CONTEXT_DEBUG_REGISTERS" : 0x00010010, + "CONTEXT_EXTENDED_REGISTERS" : 0x00010020, +}) + +class FloatingSaveArea(CStruct): + """FLOATING_SAVE_AREA + http://terminus.rewolf.pl/terminus/structures/ntdll/_FLOATING_SAVE_AREA_x86.html + """ + _fields = [("ControlWord", "u32"), + ("StatusWord", "u32"), + ("TagWord", "u32"), + ("ErrorOffset", "u32"), + ("ErrorSelector", "u32"), + ("DataOffset", "u32"), + ("DataSelector", "u32"), + ("RegisterArea", "80s"), + ("Cr0NpxState", "u32"), + ] + +class Context_x86(CStruct): + """CONTEXT x86 + https://msdn.microsoft.com/en-us/en-en/library/ms679284(v=vs.85).aspx + http://terminus.rewolf.pl/terminus/structures/ntdll/_CONTEXT_x86.html + """ + + MAXIMUM_SUPPORTED_EXTENSION = 512 + + def is_activated(flag): + mask = contextFlags_x86[flag] + def check_context(ctx): + if (ctx.ContextFlags & mask == mask): + return 1 + return 0 + return check_context + + _fields = [("ContextFlags", "u32"), + # DebugRegisters + ("Dr0", "u32", is_activated("CONTEXT_DEBUG_REGISTERS")), + ("Dr1", "u32", is_activated("CONTEXT_DEBUG_REGISTERS")), + ("Dr2", "u32", is_activated("CONTEXT_DEBUG_REGISTERS")), + ("Dr3", "u32", is_activated("CONTEXT_DEBUG_REGISTERS")), + ("Dr6", "u32", is_activated("CONTEXT_DEBUG_REGISTERS")), + ("Dr7", "u32", is_activated("CONTEXT_DEBUG_REGISTERS")), + + ("FloatSave", "FloatingSaveArea", + is_activated("CONTEXT_FLOATING_POINT")), + + # SegmentRegisters + ("SegGs", "u32", is_activated("CONTEXT_SEGMENTS")), + ("SegFs", "u32", is_activated("CONTEXT_SEGMENTS")), + ("SegEs", "u32", is_activated("CONTEXT_SEGMENTS")), + ("SegDs", "u32", is_activated("CONTEXT_SEGMENTS")), + # IntegerRegisters + ("Edi", "u32", is_activated("CONTEXT_INTEGER")), + ("Esi", "u32", is_activated("CONTEXT_INTEGER")), + ("Ebx", "u32", is_activated("CONTEXT_INTEGER")), + ("Edx", "u32", is_activated("CONTEXT_INTEGER")), + ("Ecx", "u32", is_activated("CONTEXT_INTEGER")), + ("Eax", "u32", is_activated("CONTEXT_INTEGER")), + # ControlRegisters + ("Ebp", "u32", is_activated("CONTEXT_CONTROL")), + ("Eip", "u32", is_activated("CONTEXT_CONTROL")), + ("SegCs", "u32", is_activated("CONTEXT_CONTROL")), + ("EFlags", "u32", is_activated("CONTEXT_CONTROL")), + ("Esp", "u32", is_activated("CONTEXT_CONTROL")), + ("SegSs", "u32", is_activated("CONTEXT_CONTROL")), + + ("ExtendedRegisters", "%ds" % MAXIMUM_SUPPORTED_EXTENSION, + is_activated("CONTEXT_EXTENDED_REGISTERS")), + ] + + +contextFlags_AMD64 = Enumeration({ + "CONTEXT_AMD64" : 0x00100000, + "CONTEXT_CONTROL" : 0x00100001, + "CONTEXT_INTEGER" : 0x00100002, + "CONTEXT_SEGMENTS" : 0x00100004, + "CONTEXT_FLOATING_POINT" : 0x00100008, + "CONTEXT_DEBUG_REGISTERS" : 0x00100010, + "CONTEXT_XSTATE" : 0x00100020, + "CONTEXT_EXCEPTION_ACTIVE" : 0x08000000, + "CONTEXT_SERVICE_ACTIVE" : 0x10000000, + "CONTEXT_EXCEPTION_REQUEST" : 0x40000000, + "CONTEXT_EXCEPTION_REPORTING" : 0x80000000, +}) + + +class M128A(CStruct): + """M128A + http://terminus.rewolf.pl/terminus/structures/ntdll/_M128A_x64.html + """ + _fields = [("Low", "u64"), + ("High", "u64"), + ] + +class Context_AMD64(CStruct): + """CONTEXT AMD64 + https://github.com/duarten/Threadjack/blob/master/WinNT.h + """ + + def is_activated(flag): + mask = contextFlags_AMD64[flag] + def check_context(ctx): + if (ctx.ContextFlags & mask == mask): + return 1 + return 0 + return check_context + + _fields = [ + + # Only used for Convenience + ("P1Home", "u64"), + ("P2Home", "u64"), + ("P3Home", "u64"), + ("P4Home", "u64"), + ("P5Home", "u64"), + ("P6Home", "u64"), + + # Control + ("ContextFlags", "u32"), + ("MxCsr", "u32"), + + # Segment & processor + # /!\ activation depends on multiple flags + ("SegCs", "u16", is_activated("CONTEXT_CONTROL")), + ("SegDs", "u16", is_activated("CONTEXT_SEGMENTS")), + ("SegEs", "u16", is_activated("CONTEXT_SEGMENTS")), + ("SegFs", "u16", is_activated("CONTEXT_SEGMENTS")), + ("SegGs", "u16", is_activated("CONTEXT_SEGMENTS")), + ("SegSs", "u16", is_activated("CONTEXT_CONTROL")), + ("EFlags", "u32", is_activated("CONTEXT_CONTROL")), + + # Debug registers + ("Dr0", "u64", is_activated("CONTEXT_DEBUG_REGISTERS")), + ("Dr1", "u64", is_activated("CONTEXT_DEBUG_REGISTERS")), + ("Dr2", "u64", is_activated("CONTEXT_DEBUG_REGISTERS")), + ("Dr3", "u64", is_activated("CONTEXT_DEBUG_REGISTERS")), + ("Dr6", "u64", is_activated("CONTEXT_DEBUG_REGISTERS")), + ("Dr7", "u64", is_activated("CONTEXT_DEBUG_REGISTERS")), + + # Integer registers + # /!\ activation depends on multiple flags + ("Rax", "u64", is_activated("CONTEXT_INTEGER")), + ("Rcx", "u64", is_activated("CONTEXT_INTEGER")), + ("Rdx", "u64", is_activated("CONTEXT_INTEGER")), + ("Rbx", "u64", is_activated("CONTEXT_INTEGER")), + ("Rsp", "u64", is_activated("CONTEXT_CONTROL")), + ("Rbp", "u64", is_activated("CONTEXT_INTEGER")), + ("Rsi", "u64", is_activated("CONTEXT_INTEGER")), + ("Rdi", "u64", is_activated("CONTEXT_INTEGER")), + ("R8", "u64", is_activated("CONTEXT_INTEGER")), + ("R9", "u64", is_activated("CONTEXT_INTEGER")), + ("R10", "u64", is_activated("CONTEXT_INTEGER")), + ("R11", "u64", is_activated("CONTEXT_INTEGER")), + ("R12", "u64", is_activated("CONTEXT_INTEGER")), + ("R13", "u64", is_activated("CONTEXT_INTEGER")), + ("R14", "u64", is_activated("CONTEXT_INTEGER")), + ("R15", "u64", is_activated("CONTEXT_INTEGER")), + ("Rip", "u64", is_activated("CONTEXT_CONTROL")), + + # Floating point + ("Header", "M128A", lambda ctx: 2), + ("Legacy", "M128A", lambda ctx: 8), + ("Xmm0", "M128A"), + ("Xmm1", "M128A"), + ("Xmm2", "M128A"), + ("Xmm3", "M128A"), + ("Xmm4", "M128A"), + ("Xmm5", "M128A"), + ("Xmm6", "M128A"), + ("Xmm7", "M128A"), + ("Xmm8", "M128A"), + ("Xmm9", "M128A"), + ("Xmm10", "M128A"), + ("Xmm11", "M128A"), + ("Xmm12", "M128A"), + ("Xmm13", "M128A"), + ("Xmm14", "M128A"), + ("Xmm15", "M128A"), + + + # Vector registers + ("VectorRegister", "M128A", lambda ctx: 16), + ("VectorControl", "u64"), + + # Special debug control regs + ("DebugControl", "u64"), + ("LastBranchToRip", "u64"), + ("LastBranchFromRip", "u64"), + ("LastExceptionToRip", "u64"), + ("LastExceptionFromRip", "u64"), + ] + +processorArchitecture = Enumeration({ + "PROCESSOR_ARCHITECTURE_X86" : 0, + "PROCESSOR_ARCHITECTURE_MIPS" : 1, + "PROCESSOR_ARCHITECTURE_ALPHA" : 2, + "PROCESSOR_ARCHITECTURE_PPC" : 3, + "PROCESSOR_ARCHITECTURE_SHX" : 4, + "PROCESSOR_ARCHITECTURE_ARM" : 5, + "PROCESSOR_ARCHITECTURE_IA64" : 6, + "PROCESSOR_ARCHITECTURE_ALPHA64" : 7, + "PROCESSOR_ARCHITECTURE_MSIL" : 8, + "PROCESSOR_ARCHITECTURE_AMD64" : 9, + "PROCESSOR_ARCHITECTURE_X86_WIN64" : 10, + "PROCESSOR_ARCHITECTURE_UNKNOWN" : 0xffff, +}) + +class Thread(CStruct): + """MINIDUMP_THREAD + https://msdn.microsoft.com/en-us/library/ms680517(v=vs.85).aspx + """ + + arch2context_cls = { + processorArchitecture.PROCESSOR_ARCHITECTURE_X86: Context_x86, + processorArchitecture.PROCESSOR_ARCHITECTURE_AMD64: Context_AMD64, + } + + def parse_context(self, content, offset): + loc_desc = LocationDescriptor.unpack(content, offset, self.parent_head) + + # Use the correct context depending on architecture + systeminfo = self.parent_head.systeminfo + context_cls = self.arch2context_cls.get(systeminfo.ProcessorArchitecture, + None) + if context_cls is None: + raise ValueError("Unsupported architecture: %s" % systeminfo.pretty_processor_architecture) + + ctxt = context_cls.unpack(content, loc_desc.Rva.rva, self.parent_head) + fake_loc_descriptor = LocationDescriptor(DataSize=0, Rva=Rva(rva=0)) + return ctxt, offset + len(fake_loc_descriptor) + + _fields = [("ThreadId", "u32"), + ("SuspendCount", "u32"), + ("PriorityClass", "u32"), + ("Priority", "u32"), + ("Teb", "u64"), + ("Stack", "MemoryDescriptor"), + ("ThreadContext", (parse_context, + lambda thread, value: NotImplemented)), + ] + +class ThreadList(CStruct): + """MINIDUMP_THREAD_LIST + https://msdn.microsoft.com/en-us/library/ms680515(v=vs.85).aspx + """ + _fields = [("NumberOfThreads", "u32"), + ("Threads", "Thread", + lambda mlist: mlist.NumberOfThreads), + ] + + +class SystemInfo(CStruct): + """MINIDUMP_SYSTEM_INFO + https://msdn.microsoft.com/en-us/library/ms680396(v=vs.85).aspx + """ + _fields = [("ProcessorArchitecture", "u16"), + ("ProcessorLevel", "u16"), + ("ProcessorRevision", "u16"), + ("NumberOfProcessors", "u08"), + ("ProductType", "u08"), + ("MajorVersion", "u32"), + ("MinorVersion", "u32"), + ("BuildNumber", "u32"), + ("PlatformId", "u32"), + ("CSDVersionRva", "Rva"), + ("SuiteMask", "u16"), + ("Reserved2", "u16"), + ("VendorId", "u32", lambda sinfo: 3), + ("VersionInformation", "u32"), + ("FeatureInformation", "u32"), + ("AMDExtendedCpuFeatures", "u32"), + ] + + @property + def pretty_processor_architecture(self): + return processorArchitecture[self.ProcessorArchitecture] + diff --git a/miasm/elfesteem/minidump_init.py b/miasm/elfesteem/minidump_init.py new file mode 100644 index 00000000..0a9022b9 --- /dev/null +++ b/miasm/elfesteem/minidump_init.py @@ -0,0 +1,194 @@ +""" +High-level abstraction of Minidump file +""" +from builtins import range +import struct + +from miasm.elfesteem.strpatchwork import StrPatchwork +from miasm.elfesteem import minidump as mp + + +class MemorySegment(object): + """Stand for a segment in memory with additional information""" + + def __init__(self, offset, memory_desc, module=None, memory_info=None): + self.offset = offset + self.memory_desc = memory_desc + self.module = module + self.memory_info = memory_info + self.minidump = self.memory_desc.parent_head + + @property + def address(self): + return self.memory_desc.StartOfMemoryRange + + @property + def size(self): + if isinstance(self.memory_desc, mp.MemoryDescriptor64): + return self.memory_desc.DataSize + elif isinstance(self.memory_desc, mp.MemoryDescriptor): + return self.memory_desc.Memory.DataSize + raise TypeError + + @property + def name(self): + if not self.module: + return "" + name = mp.MinidumpString.unpack(self.minidump._content, + self.module.ModuleNameRva.rva, + self.minidump) + return b"".join( + struct.pack("B", x) for x in name.Buffer + ).decode("utf-16") + + @property + def content(self): + return self.minidump._content[self.offset:self.offset + self.size] + + @property + def protect(self): + if self.memory_info: + return self.memory_info.Protect + return None + + @property + def pretty_protect(self): + if self.protect is None: + return "UNKNOWN" + return mp.memProtect[self.protect] + + +class Minidump(object): + """Stand for a Minidump file + + Here is a few limitation: + - only < 4GB Minidump are supported (LocationDescriptor handling) + - only Stream relative to memory mapping are implemented + + Official description is available on MSDN: + https://msdn.microsoft.com/en-us/library/ms680378(VS.85).aspx + """ + + _sex = 0 + _wsize = 32 + + def __init__(self, minidump_str): + self._content = StrPatchwork(minidump_str) + + # Specific streams + self.modulelist = None + self.memory64list = None + self.memorylist = None + self.memoryinfolist = None + self.systeminfo = None + + # Get information + self.streams = [] + self.threads = None + self.parse_content() + + # Memory information + self.memory = {} # base address (virtual) -> Memory information + self.build_memory() + + def parse_content(self): + """Build structures corresponding to current content""" + + # Header + offset = 0 + self.minidumpHDR = mp.MinidumpHDR.unpack(self._content, offset, self) + assert self.minidumpHDR.Magic == 0x504d444d + + # Streams + base_offset = self.minidumpHDR.StreamDirectoryRva.rva + empty_stream = mp.StreamDirectory( + StreamType=0, + Location=mp.LocationDescriptor( + DataSize=0, + Rva=mp.Rva(rva=0) + ) + ) + streamdir_size = len(empty_stream) + for i in range(self.minidumpHDR.NumberOfStreams): + stream_offset = base_offset + i * streamdir_size + stream = mp.StreamDirectory.unpack(self._content, stream_offset, self) + self.streams.append(stream) + + # Launch specific action depending on the stream + datasize = stream.Location.DataSize + offset = stream.Location.Rva.rva + if stream.StreamType == mp.streamType.ModuleListStream: + self.modulelist = mp.ModuleList.unpack(self._content, offset, self) + elif stream.StreamType == mp.streamType.MemoryListStream: + self.memorylist = mp.MemoryList.unpack(self._content, offset, self) + elif stream.StreamType == mp.streamType.Memory64ListStream: + self.memory64list = mp.Memory64List.unpack(self._content, offset, self) + elif stream.StreamType == mp.streamType.MemoryInfoListStream: + self.memoryinfolist = mp.MemoryInfoList.unpack(self._content, offset, self) + elif stream.StreamType == mp.streamType.SystemInfoStream: + self.systeminfo = mp.SystemInfo.unpack(self._content, offset, self) + + # Some streams need the SystemInfo stream to work + for stream in self.streams: + datasize = stream.Location.DataSize + offset = stream.Location.Rva.rva + if (self.systeminfo is not None and + stream.StreamType == mp.streamType.ThreadListStream): + self.threads = mp.ThreadList.unpack(self._content, offset, self) + + + def build_memory(self): + """Build an easier to use memory view based on ModuleList and + Memory64List streams""" + + addr2module = dict((module.BaseOfImage, module) + for module in (self.modulelist.Modules if + self.modulelist else [])) + addr2meminfo = dict((memory.BaseAddress, memory) + for memory in (self.memoryinfolist.MemoryInfos if + self.memoryinfolist else [])) + + mode64 = self.minidumpHDR.Flags & mp.minidumpType.MiniDumpWithFullMemory + + if mode64: + offset = self.memory64list.BaseRva + memranges = self.memory64list.MemoryRanges + else: + memranges = self.memorylist.MemoryRanges + + for memory in memranges: + if not mode64: + offset = memory.Memory.Rva.rva + + # Create a MemorySegment with augmented information + base_address = memory.StartOfMemoryRange + module = addr2module.get(base_address, None) + meminfo = addr2meminfo.get(base_address, None) + self.memory[base_address] = MemorySegment(offset, memory, + module, meminfo) + + if mode64: + offset += memory.DataSize + + # Sanity check + if mode64: + assert all(addr in self.memory for addr in addr2module) + + def get(self, virt_start, virt_stop): + """Return the content at the (virtual addresses) + [virt_start:virt_stop]""" + + # Find the corresponding memory segment + for addr in self.memory: + if virt_start <= addr <= virt_stop: + break + else: + return b"" + + memory = self.memory[addr] + shift = addr - virt_start + last = virt_stop - addr + if last > memory.size: + raise RuntimeError("Multi-page not implemented") + + return self._content[memory.offset + shift:memory.offset + last] diff --git a/miasm/elfesteem/new_cstruct.py b/miasm/elfesteem/new_cstruct.py new file mode 100644 index 00000000..ec591aa8 --- /dev/null +++ b/miasm/elfesteem/new_cstruct.py @@ -0,0 +1,265 @@ +#! /usr/bin/env python + +from __future__ import print_function +import re +import struct + +from future.utils import PY3, viewitems, with_metaclass + +type2realtype = {} +size2type = {} +size2type_s = {} + +for t in 'B', 'H', 'I', 'Q': + s = struct.calcsize(t) + type2realtype[t] = s * 8 + size2type[s * 8] = t + +for t in 'b', 'h', 'i', 'q': + s = struct.calcsize(t) + type2realtype[t] = s * 8 + size2type_s[s * 8] = t + +type2realtype['u08'] = size2type[8] +type2realtype['u16'] = size2type[16] +type2realtype['u32'] = size2type[32] +type2realtype['u64'] = size2type[64] + +type2realtype['s08'] = size2type_s[8] +type2realtype['s16'] = size2type_s[16] +type2realtype['s32'] = size2type_s[32] +type2realtype['s64'] = size2type_s[64] + +type2realtype['d'] = 'd' +type2realtype['f'] = 'f' +type2realtype['q'] = 'q' +type2realtype['ptr'] = 'ptr' + +sex_types = {0: '<', 1: '>'} + + +def fix_size(fields, wsize): + out = [] + for name, v in fields: + if v.endswith("s"): + pass + elif v == "ptr": + v = size2type[wsize] + elif not v in type2realtype: + raise ValueError("unknown Cstruct type", v) + else: + v = type2realtype[v] + out.append((name, v)) + fields = out + return fields + + +def real_fmt(fmt, wsize): + if fmt == "ptr": + v = size2type[wsize] + elif fmt in type2realtype: + v = type2realtype[fmt] + else: + v = fmt + return v + +all_cstructs = {} + + +class Cstruct_Metaclass(type): + field_suffix = "_value" + + def __new__(cls, name, bases, dct): + for fields in dct['_fields']: + fname = fields[0] + if fname in ['parent', 'parent_head']: + raise ValueError('field name will confuse internal structs', + repr(fname)) + dct[fname] = property(dct.pop("get_" + fname, + lambda self, fname=fname: getattr( + self, fname + self.__class__.field_suffix)), + dct.pop("set_" + fname, + lambda self, v, fname=fname: setattr( + self, fname + self.__class__.field_suffix, v)), + dct.pop("del_" + fname, None)) + + o = super(Cstruct_Metaclass, cls).__new__(cls, name, bases, dct) + if name != "CStruct": + all_cstructs[name] = o + return o + + def unpack_l(cls, s, off=0, parent_head=None, _sex=None, _wsize=None): + if _sex is None and _wsize is None: + # get sex and size from parent + if parent_head is not None: + _sex = parent_head._sex + _wsize = parent_head._wsize + else: + _sex = 0 + _wsize = 32 + c = cls(_sex=_sex, _wsize=_wsize) + if parent_head is None: + parent_head = c + c.parent_head = parent_head + + of1 = off + for field in c._fields: + cpt = None + if len(field) == 2: + fname, ffmt = field + elif len(field) == 3: + fname, ffmt, cpt = field + if ffmt in type2realtype or (isinstance(ffmt, str) and re.match(r'\d+s', ffmt)): + # basic types + if cpt: + value = [] + i = 0 + while i < cpt(c): + fmt = real_fmt(ffmt, _wsize) + of2 = of1 + struct.calcsize(fmt) + value.append(struct.unpack(c.sex + fmt, s[of1:of2])[0]) + of1 = of2 + i += 1 + else: + fmt = real_fmt(ffmt, _wsize) + of2 = of1 + struct.calcsize(fmt) + if not (0 <= of1 < len(s) and 0 <= of2 < len(s)): + raise RuntimeError("not enough data") + value = struct.unpack(c.sex + fmt, s[of1:of2])[0] + elif ffmt == "sz": # null terminated special case + of2 = s.find(b'\x00', of1) + if of2 == -1: + raise ValueError('no null char in string!') + of2 += 1 + value = s[of1:of2 - 1] + elif ffmt in all_cstructs: + of2 = of1 + # sub structures + if cpt: + value = [] + i = 0 + while i < cpt(c): + v, l = all_cstructs[ffmt].unpack_l( + s, of1, parent_head, _sex, _wsize) + v.parent = c + value.append(v) + of2 = of1 + l + of1 = of2 + i += 1 + else: + value, l = all_cstructs[ffmt].unpack_l( + s, of1, parent_head, _sex, _wsize) + value.parent = c + of2 = of1 + l + elif isinstance(ffmt, tuple): + f_get, f_set = ffmt + value, of2 = f_get(c, s, of1) + else: + raise ValueError('unknown class', ffmt) + of1 = of2 + setattr(c, fname + c.__class__.field_suffix, value) + + return c, of2 - off + + def unpack(cls, s, off=0, parent_head=None, _sex=None, _wsize=None): + c, l = cls.unpack_l(s, off=off, + parent_head=parent_head, _sex=_sex, _wsize=_wsize) + return c + + +class CStruct(with_metaclass(Cstruct_Metaclass, object)): + _packformat = "" + _fields = [] + + def __init__(self, parent_head=None, _sex=None, _wsize=None, **kargs): + self.parent_head = parent_head + self._size = None + kargs = dict(kargs) + # if not sex or size: get the one of the parent + if _sex == None and _wsize == None: + if parent_head: + _sex = parent_head._sex + _wsize = parent_head._wsize + else: + # else default sex & size + _sex = 0 + _wsize = 32 + # _sex is 0 or 1, sex is '<' or '>' + self._sex = _sex + self._wsize = _wsize + if self._packformat: + self.sex = self._packformat + else: + self.sex = sex_types[_sex] + for f in self._fields: + setattr(self, f[0] + self.__class__.field_suffix, None) + if kargs: + for k, v in viewitems(kargs): + self.__dict__[k + self.__class__.field_suffix] = v + + def pack(self): + out = b'' + for field in self._fields: + cpt = None + if len(field) == 2: + fname, ffmt = field + elif len(field) == 3: + fname, ffmt, cpt = field + + value = getattr(self, fname + self.__class__.field_suffix) + if ffmt in type2realtype or (isinstance(ffmt, str) and re.match(r'\d+s', ffmt)): + # basic types + fmt = real_fmt(ffmt, self._wsize) + if cpt == None: + if value == None: + o = struct.calcsize(fmt) * b"\x00" + else: + if isinstance(value, str): + value = value.encode() + o = struct.pack(self.sex + fmt, value) + else: + o = b"" + for v in value: + if value == None: + o += struct.calcsize(fmt) * b"\x00" + else: + o += struct.pack(self.sex + fmt, v) + + elif ffmt == "sz": # null terminated special case + o = value + b'\x00' + elif ffmt in all_cstructs: + # sub structures + if cpt == None: + o = bytes(value) + else: + o = b"" + for v in value: + o += bytes(v) + elif isinstance(ffmt, tuple): + f_get, f_set = ffmt + o = f_set(self, value) + + else: + raise ValueError('unknown class', ffmt) + out += o + + return out + + def __bytes__(self): + return self.pack() + + def __str__(self): + if PY3: + return repr(self) + return self.__bytes__() + + def __len__(self): + return len(self.pack()) + + def __repr__(self): + return "<%s=%s>" % (self.__class__.__name__, "/".join( + repr(getattr(self, x[0])) for x in self._fields) + ) + + def __getitem__(self, item): # to work with format strings + return getattr(self, item) diff --git a/miasm/elfesteem/pe.py b/miasm/elfesteem/pe.py new file mode 100644 index 00000000..56bffbaa --- /dev/null +++ b/miasm/elfesteem/pe.py @@ -0,0 +1,1668 @@ +#! /usr/bin/env python + +from __future__ import print_function +from builtins import range, str +from collections import defaultdict +import logging +import struct + +from future.builtins import int as int_types +from future.utils import PY3 + +from miasm.core.utils import force_bytes +from miasm.elfesteem.new_cstruct import CStruct +from miasm.elfesteem.strpatchwork import StrPatchwork + +log = logging.getLogger("pepy") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.WARN) + + +class InvalidOffset(Exception): + pass + + +class Doshdr(CStruct): + _fields = [("magic", "u16"), + ("cblp", "u16"), + ("cp", "u16"), + ("crlc", "u16"), + ("cparhdr", "u16"), + ("minalloc", "u16"), + ("maxalloc", "u16"), + ("ss", "u16"), + ("sp", "u16"), + ("csum", "u16"), + ("ip", "u16"), + ("cs", "u16"), + ("lfarlc", "u16"), + ("ovno", "u16"), + ("res", "8s"), + ("oemid", "u16"), + ("oeminfo", "u16"), + ("res2", "20s"), + ("lfanew", "u32")] + + +class NTsig(CStruct): + _fields = [("signature", "u32"), + ] + + +class Coffhdr(CStruct): + _fields = [("machine", "u16"), + ("numberofsections", "u16"), + ("timedatestamp", "u32"), + ("pointertosymboltable", "u32"), + ("numberofsymbols", "u32"), + ("sizeofoptionalheader", "u16"), + ("characteristics", "u16")] + + +class Optehdr(CStruct): + _fields = [("rva", "u32"), + ("size", "u32")] + + +def get_optehdr_num(nthdr): + numberofrva = nthdr.numberofrvaandsizes + parent = nthdr.parent_head + entry_size = 8 + if parent.Coffhdr.sizeofoptionalheader < numberofrva * entry_size + len(parent.Opthdr): + numberofrva = (parent.Coffhdr.sizeofoptionalheader - len(parent.Opthdr)) // entry_size + log.warn('Bad number of rva.. using default %d' % numberofrva) + numberofrva = 0x10 + return numberofrva + + +class Opthdr32(CStruct): + _fields = [("magic", "u16"), + ("majorlinkerversion", "u08"), + ("minorlinkerversion", "u08"), + ("SizeOfCode", "u32"), + ("sizeofinitializeddata", "u32"), + ("sizeofuninitializeddata", "u32"), + ("AddressOfEntryPoint", "u32"), + ("BaseOfCode", "u32"), + ("BaseOfData", "u32"), + ] + + +class Opthdr64(CStruct): + _fields = [("magic", "u16"), + ("majorlinkerversion", "u08"), + ("minorlinkerversion", "u08"), + ("SizeOfCode", "u32"), + ("sizeofinitializeddata", "u32"), + ("sizeofuninitializeddata", "u32"), + ("AddressOfEntryPoint", "u32"), + ("BaseOfCode", "u32"), + ] + + +class NThdr(CStruct): + _fields = [("ImageBase", "ptr"), + ("sectionalignment", "u32"), + ("filealignment", "u32"), + ("majoroperatingsystemversion", "u16"), + ("minoroperatingsystemversion", "u16"), + ("MajorImageVersion", "u16"), + ("MinorImageVersion", "u16"), + ("majorsubsystemversion", "u16"), + ("minorsubsystemversion", "u16"), + ("Reserved1", "u32"), + ("sizeofimage", "u32"), + ("sizeofheaders", "u32"), + ("CheckSum", "u32"), + ("subsystem", "u16"), + ("dllcharacteristics", "u16"), + ("sizeofstackreserve", "ptr"), + ("sizeofstackcommit", "ptr"), + ("sizeofheapreserve", "ptr"), + ("sizeofheapcommit", "ptr"), + ("loaderflags", "u32"), + ("numberofrvaandsizes", "u32"), + ("optentries", "Optehdr", lambda c:get_optehdr_num(c)) + ] + + +class Shdr(CStruct): + _fields = [("name", "8s"), + ("size", "u32"), + ("addr", "u32"), + ("rawsize", "u32"), + ("offset", "u32"), + ("pointertorelocations", "u32"), + ("pointertolinenumbers", "u32"), + ("numberofrelocations", "u16"), + ("numberoflinenumbers", "u16"), + ("flags", "u32")] + + + def get_data(self): + parent = self.parent_head + data = parent.img_rva[self.addr:self.addr + self.size] + return data + + def set_data(self, data): + parent = self.parent_head + parent.img_rva[self.addr] = data + + + data = property(get_data, set_data) + +class SHList(CStruct): + _fields = [ + ("shlist", "Shdr", lambda c:c.parent_head.Coffhdr.numberofsections)] + + def add_section(self, name="default", data=b"", **args): + s_align = self.parent_head.NThdr.sectionalignment + s_align = max(0x1000, s_align) + + f_align = self.parent_head.NThdr.filealignment + f_align = max(0x200, f_align) + size = len(data) + rawsize = len(data) + if len(self): + addr = self[-1].addr + self[-1].size + s_last = self[0] + for section in self: + if s_last.offset + s_last.rawsize < section.offset + section.rawsize: + s_last = section + offset = s_last.offset + s_last.rawsize + else: + s_null = bytes(Shdr.unpack(b"\x00" * 0x100)) + offset = self.parent_head.Doshdr.lfanew + len(self.parent_head.NTsig) + len( + self.parent_head.Coffhdr) + self.parent_head.Coffhdr.sizeofoptionalheader + len(bytes(self.parent_head.SHList) + s_null) + addr = 0x2000 + # round addr + addr = (addr + (s_align - 1)) & ~(s_align - 1) + offset = (offset + (f_align - 1)) & ~(f_align - 1) + + attrs = {"name": name, "size": size, + "addr": addr, "rawsize": rawsize, + "offset": offset, + "pointertorelocations": 0, + "pointertolinenumbers": 0, + "numberofrelocations": 0, + "numberoflinenumbers": 0, + "flags": 0xE0000020, + "data": data + } + attrs.update(args) + section = Shdr(self.parent_head, _sex=self.parent_head._sex, + _wsize=self.parent_head._wsize, **attrs) + section.data = data + + if section.rawsize > len(data): + section.data = section.data + b'\x00' * (section.rawsize - len(data)) + section.size = section.rawsize + section.data = bytes(StrPatchwork(section.data)) + section.size = max(s_align, section.size) + + self.append(section) + self.parent_head.Coffhdr.numberofsections = len(self) + + length = (section.addr + section.size + (s_align - 1)) & ~(s_align - 1) + self.parent_head.NThdr.sizeofimage = length + return section + + def align_sections(self, f_align=None, s_align=None): + if f_align == None: + f_align = self.parent_head.NThdr.filealignment + f_align = max(0x200, f_align) + if s_align == None: + s_align = self.parent_head.NThdr.sectionalignment + s_align = max(0x1000, s_align) + + if self is None: + return + + addr = self[0].offset + for section in self: + raw_off = f_align * ((addr + f_align - 1) // f_align) + section.offset = raw_off + section.rawsize = len(section.data) + addr = raw_off + section.rawsize + + def __repr__(self): + rep = ["# section offset size addr flags rawsize "] + for i, section in enumerate(self): + name = force_bytes(section.name) + out = "%-15s" % name.strip(b'\x00').decode() + out += "%(offset)08x %(size)06x %(addr)08x %(flags)08x %(rawsize)08x" % section + out = ("%2i " % i) + out + rep.append(out) + return "\n".join(rep) + + def __getitem__(self, item): + return self.shlist[item] + + def __len__(self): + return len(self.shlist) + + def append(self, section): + self.shlist.append(section) + + +class Rva(CStruct): + _fields = [("rva", "ptr"), + ] + + +class Rva32(CStruct): + _fields = [("rva", "u32"), + ] + + +class DescName(CStruct): + _fields = [("name", (lambda c, raw, off: c.gets(raw, off), + lambda c, value: c.sets(value))) + ] + + def gets(self, raw, off): + name = raw[off:raw.find(b'\x00', off)] + return name, off + len(name) + 1 + + def sets(self, value): + return bytes(value) + b"\x00" + + +class ImportByName(CStruct): + _fields = [("hint", "u16"), + ("name", "sz") + ] + + +class ImpDesc_e(CStruct): + _fields = [("originalfirstthunk", "u32"), + ("timestamp", "u32"), + ("forwarderchain", "u32"), + ("name", "u32"), + ("firstthunk", "u32") + ] + + +class struct_array(object): + + def __init__(self, target_class, raw, off, cstr, num=None): + self.l = [] + self.cls = target_class + self.end = None + i = 0 + if not raw: + return + + while (num == None) or (num and i < num): + entry, length = cstr.unpack_l(raw, off, + target_class.parent_head, + target_class.parent_head._sex, + target_class.parent_head._wsize) + if num == None: + if raw[off:off + length] == b'\x00' * length: + self.end = b'\x00' * length + break + self.l.append(entry) + off += length + i += 1 + + def __bytes__(self): + out = b"".join(bytes(x) for x in self.l) + if self.end is not None: + out += self.end + return out + + def __str__(self): + if PY3: + return repr(self) + return self.__bytes__() + + def __getitem__(self, item): + return self.l.__getitem__(item) + + def __len__(self): + return len(self.l) + + def append(self, entry): + self.l.append(entry) + + def insert(self, index, entry): + self.l.insert(index, entry) + + +class DirImport(CStruct): + _fields = [("impdesc", (lambda c, raw, off:c.gete(raw, off), + lambda c, value:c.sete(value)))] + + def gete(self, raw, off): + if not off: + return None, off + if self.parent_head._wsize == 32: + mask_ptr = 0x80000000 + elif self.parent_head._wsize == 64: + mask_ptr = 0x8000000000000000 + + ofend = off + \ + self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_IMPORT].size + out = [] + while off < ofend: + if not 0 <= off < len(self.parent_head.img_rva): + break + imp, length = ImpDesc_e.unpack_l(raw, off) + if (raw[off:off+length] == b'\x00' * length or + imp.name == 0): + # Special case + break + if not (imp.originalfirstthunk or imp.firstthunk): + log.warning("no thunk!!") + break + + out.append(imp) + off += length + imp.dlldescname = DescName.unpack(raw, imp.name, self.parent_head) + if imp.originalfirstthunk and imp.originalfirstthunk < len(self.parent_head.img_rva): + imp.originalfirstthunks = struct_array(self, raw, + imp.originalfirstthunk, + Rva) + else: + imp.originalfirstthunks = None + + if imp.firstthunk and imp.firstthunk < len(self.parent_head.img_rva): + imp.firstthunks = struct_array(self, raw, + imp.firstthunk, + Rva) + else: + imp.firstthunks = None + imp.impbynames = [] + if imp.originalfirstthunk and imp.originalfirstthunk < len(self.parent_head.img_rva): + tmp_thunk = imp.originalfirstthunks + elif imp.firstthunk: + tmp_thunk = imp.firstthunks + for i in range(len(tmp_thunk)): + if tmp_thunk[i].rva & mask_ptr == 0: + try: + entry = ImportByName.unpack(raw, + tmp_thunk[i].rva, + self.parent_head) + except: + log.warning( + 'cannot import from add %s' % tmp_thunk[i].rva + ) + entry = 0 + imp.impbynames.append(entry) + else: + imp.impbynames.append(tmp_thunk[i].rva & (mask_ptr - 1)) + return out, off + + def sete(self, entries): + return b"".join(bytes(entry) for entry in entries) + b"\x00" * (4 * 5) + + def __len__(self): + length = (len(self.impdesc) + 1) * (5 * 4) # ImpDesc_e size + rva_size = self.parent_head._wsize // 8 + for entry in self.impdesc: + length += len(entry.dlldescname) + if entry.originalfirstthunk and self.parent_head.rva2off(entry.originalfirstthunk): + length += (len(entry.originalfirstthunks) + 1) * rva_size + if entry.firstthunk: + length += (len(entry.firstthunks) + 1) * rva_size + for imp in entry.impbynames: + if isinstance(imp, ImportByName): + length += len(imp) + return length + + def set_rva(self, rva, size=None): + self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_IMPORT].rva = rva + rva_size = self.parent_head._wsize // 8 + if not size: + self.parent_head.NThdr.optentries[ + DIRECTORY_ENTRY_IMPORT].size = len(self) + else: + self.parent_head.NThdr.optentries[ + DIRECTORY_ENTRY_IMPORT].size = size + rva += (len(self.impdesc) + 1) * 5 * 4 # ImpDesc size + for entry in self.impdesc: + entry.name = rva + rva += len(entry.dlldescname) + if entry.originalfirstthunk: # and self.parent_head.rva2off(entry.originalfirstthunk): + entry.originalfirstthunk = rva + rva += (len(entry.originalfirstthunks) + 1) * rva_size + # XXX rva fthunk not patched => keep original func addr + # if entry.firstthunk: + # entry.firstthunk = rva + # rva+=(len(entry.firstthunks)+1)*self.parent_head._wsize//8 # Rva size + if entry.originalfirstthunk and entry.firstthunk: + if isinstance(entry.originalfirstthunk, struct_array): + tmp_thunk = entry.originalfirstthunks + elif isinstance(entry.firstthunks, struct_array): + tmp_thunk = entry.firstthunks + else: + raise RuntimeError("No thunk!") + elif entry.originalfirstthunk: # and self.parent_head.rva2off(entry.originalfirstthunk): + tmp_thunk = entry.originalfirstthunks + elif entry.firstthunk: + tmp_thunk = entry.firstthunks + else: + raise RuntimeError("No thunk!") + + if tmp_thunk == entry.originalfirstthunks: + entry.firstthunks = tmp_thunk + else: + entry.originalfirstthunks = tmp_thunk + for i, imp in enumerate(entry.impbynames): + if isinstance(imp, ImportByName): + tmp_thunk[i].rva = rva + rva += len(imp) + + def build_content(self, raw): + dirimp = self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_IMPORT] + of1 = dirimp.rva + if not of1: # No Import + return + raw[self.parent_head.rva2off(of1)] = bytes(self) + for entry in self.impdesc: + raw[self.parent_head.rva2off(entry.name)] = bytes(entry.dlldescname) + if (entry.originalfirstthunk and + self.parent_head.rva2off(entry.originalfirstthunk)): + # Add thunks list and terminating null entry + off = self.parent_head.rva2off(entry.originalfirstthunk) + raw[off] = bytes(entry.originalfirstthunks) + if entry.firstthunk: + # Add thunks list and terminating null entry + off = self.parent_head.rva2off(entry.firstthunk) + raw[off] = bytes(entry.firstthunks) + if (entry.originalfirstthunk and + self.parent_head.rva2off(entry.originalfirstthunk)): + tmp_thunk = entry.originalfirstthunks + elif entry.firstthunk: + tmp_thunk = entry.firstthunks + else: + raise RuntimeError("No thunk!") + for j, imp in enumerate(entry.impbynames): + if isinstance(imp, ImportByName): + raw[self.parent_head.rva2off(tmp_thunk[j].rva)] = bytes(imp) + + def get_dlldesc(self): + out = [] + for impdesc in self.impdesc: + dllname = impdesc.dlldescname.name + funcs = [] + for imp in impdesc.impbynames: + if isinstance(imp, ImportByName): + funcs.append(imp.name) + else: + funcs.append(imp) + entry = ({"name": dllname, "firstthunk": impdesc.firstthunk}, funcs) + out.append(entry) + return out + + def __repr__(self): + rep = ["<%s>" % self.__class__.__name__] + for i, entry in enumerate(self.impdesc): + out = "%2d %-25s %s" % (i, repr(entry.dlldescname), repr(entry)) + rep.append(out) + for index, imp in enumerate(entry.impbynames): + out = " %2d %-16s" % (index, repr(imp)) + rep.append(out) + return "\n".join(rep) + + def add_dlldesc(self, new_dll): + rva_size = self.parent_head._wsize // 8 + if self.parent_head._wsize == 32: + mask_ptr = 0x80000000 + elif self.parent_head._wsize == 64: + mask_ptr = 0x8000000000000000 + new_impdesc = [] + of1 = None + for import_descriptor, new_functions in new_dll: + if isinstance(import_descriptor.get("name"), str): + import_descriptor["name"] = import_descriptor["name"].encode() + new_functions = [ + funcname.encode() if isinstance(funcname, str) else funcname + for funcname in new_functions + ] + for attr in ["timestamp", "forwarderchain", "originalfirstthunk"]: + if attr not in import_descriptor: + import_descriptor[attr] = 0 + entry = ImpDesc_e(self.parent_head, **import_descriptor) + if entry.firstthunk != None: + of1 = entry.firstthunk + elif of1 == None: + raise RuntimeError("set fthunk") + else: + entry.firstthunk = of1 + entry.dlldescname = DescName(self.parent_head, name=entry.name) + entry.originalfirstthunk = 0 + entry.originalfirstthunks = struct_array(self, None, + None, + Rva) + entry.firstthunks = struct_array(self, None, + None, + Rva) + + impbynames = [] + for new_function in new_functions: + rva_ofirstt = Rva(self.parent_head) + if isinstance(new_function, int_types): + rva_ofirstt.rva = mask_ptr + new_function + ibn = new_function + elif isinstance(new_function, bytes): + rva_ofirstt.rva = True + ibn = ImportByName(self.parent_head) + ibn.name = new_function + ibn.hint = 0 + else: + raise RuntimeError('unknown func type %s' % new_function) + impbynames.append(ibn) + entry.originalfirstthunks.append(rva_ofirstt) + rva_func = Rva(self.parent_head) + if isinstance(ibn, ImportByName): + rva_func.rva = 0xDEADBEEF # default func addr + else: + # ord ?XXX? + rva_func.rva = rva_ofirstt.rva + entry.firstthunks.append(rva_func) + of1 += rva_size + # for null thunk + of1 += rva_size + entry.impbynames = impbynames + new_impdesc.append(entry) + if self.impdesc is None: + self.impdesc = struct_array(self, None, + None, + ImpDesc_e) + self.impdesc.l = new_impdesc + else: + for entry in new_impdesc: + self.impdesc.append(entry) + + def get_funcrva(self, dllname, funcname): + dllname = force_bytes(dllname) + funcname = force_bytes(funcname) + + rva_size = self.parent_head._wsize // 8 + if self.parent_head._wsize == 32: + mask_ptr = 0x80000000 - 1 + elif self.parent_head._wsize == 64: + mask_ptr = 0x8000000000000000 - 1 + + for entry in self.impdesc: + if entry.dlldescname.name.lower() != dllname.lower(): + continue + if entry.originalfirstthunk and self.parent_head.rva2off(entry.originalfirstthunk): + tmp_thunk = entry.originalfirstthunks + elif entry.firstthunk: + tmp_thunk = entry.firstthunks + else: + raise RuntimeError("No thunk!") + if isinstance(funcname, bytes): + for j, imp in enumerate(entry.impbynames): + if isinstance(imp, ImportByName): + if funcname == imp.name: + return entry.firstthunk + j * rva_size + elif isinstance(funcname, int_types): + for j, imp in enumerate(entry.impbynames): + if not isinstance(imp, ImportByName): + if tmp_thunk[j].rva & mask_ptr == funcname: + return entry.firstthunk + j * rva_size + else: + raise ValueError('Unknown: %s %s' % (dllname, funcname)) + + def get_funcvirt(self, dllname, funcname): + rva = self.get_funcrva(dllname, funcname) + if rva == None: + return + return self.parent_head.rva2virt(rva) + + +class ExpDesc_e(CStruct): + _fields = [("characteristics", "u32"), + ("timestamp", "u32"), + ("majorv", "u16"), + ("minorv", "u16"), + ("name", "u32"), + ("base", "u32"), + ("numberoffunctions", "u32"), + ("numberofnames", "u32"), + ("addressoffunctions", "u32"), + ("addressofnames", "u32"), + ("addressofordinals", "u32"), + ] + + +class DirExport(CStruct): + _fields = [("expdesc", (lambda c, raw, off:c.gete(raw, off), + lambda c, value:c.sete(value)))] + + def gete(self, raw, off): + off_o = off + if not off: + return None, off + off_sav = off + if off >= len(raw): + log.warn("export dir malformed!") + return None, off_o + expdesc = ExpDesc_e.unpack(raw, + off, + self.parent_head) + if self.parent_head.rva2off(expdesc.addressoffunctions) == None or \ + self.parent_head.rva2off(expdesc.addressofnames) == None or \ + self.parent_head.rva2off(expdesc.addressofordinals) == None: + log.warn("export dir malformed!") + return None, off_o + self.dlldescname = DescName.unpack(raw, expdesc.name, self.parent_head) + try: + self.f_address = struct_array(self, raw, + expdesc.addressoffunctions, + Rva32, expdesc.numberoffunctions) + self.f_names = struct_array(self, raw, + expdesc.addressofnames, + Rva32, expdesc.numberofnames) + self.f_nameordinals = struct_array(self, raw, + expdesc.addressofordinals, + Ordinal, expdesc.numberofnames) + except RuntimeError: + log.warn("export dir malformed!") + return None, off_o + for func in self.f_names: + func.name = DescName.unpack(raw, func.rva, self.parent_head) + return expdesc, off_sav + + def sete(self, _): + return bytes(self.expdesc) + + def build_content(self, raw): + direxp = self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_EXPORT] + of1 = direxp.rva + if self.expdesc is None: # No Export + return + raw[self.parent_head.rva2off(of1)] = bytes(self.expdesc) + raw[self.parent_head.rva2off(self.expdesc.name)] = bytes(self.dlldescname) + raw[self.parent_head.rva2off(self.expdesc.addressoffunctions)] = bytes(self.f_address) + if self.expdesc.addressofnames != 0: + raw[self.parent_head.rva2off(self.expdesc.addressofnames)] = bytes(self.f_names) + if self.expdesc.addressofordinals != 0: + raw[self.parent_head.rva2off(self.expdesc.addressofordinals)] = bytes(self.f_nameordinals) + for func in self.f_names: + raw[self.parent_head.rva2off(func.rva)] = bytes(func.name) + + # XXX BUG names must be alphanumeric ordered + names = [func.name for func in self.f_names] + names_ = names[:] + if names != names_: + log.warn("unsorted export names, may bug") + + def set_rva(self, rva, size=None): + rva_size = self.parent_head._wsize // 8 + if self.expdesc is None: + return + self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_EXPORT].rva = rva + if not size: + self.parent_head.NThdr.optentries[ + DIRECTORY_ENTRY_EXPORT].size = len(self) + else: + self.parent_head.NThdr.optentries[ + DIRECTORY_ENTRY_EXPORT].size = size + rva += len(self.expdesc) + self.expdesc.name = rva + rva += len(self.dlldescname) + self.expdesc.addressoffunctions = rva + rva += len(self.f_address) * rva_size + self.expdesc.addressofnames = rva + rva += len(self.f_names) * rva_size + self.expdesc.addressofordinals = rva + rva += len(self.f_nameordinals) * 2 # Ordinal size + for func in self.f_names: + func.rva = rva + rva += len(func.name) + + def __len__(self): + rva_size = self.parent_head._wsize // 8 + length = 0 + if self.expdesc is None: + return length + length += len(self.expdesc) + length += len(self.dlldescname) + length += len(self.f_address) * rva_size + length += len(self.f_names) * rva_size + length += len(self.f_nameordinals) * 2 # Ordinal size + for entry in self.f_names: + length += len(entry.name) + return length + + def __repr__(self): + rep = ["<%s>" % self.__class__.__name__] + if self.expdesc is None: + return "\n".join(rep) + + rep = ["<%s %d (%s) %s>" % (self.__class__.__name__, + self.expdesc.numberoffunctions, self.dlldescname, repr(self.expdesc))] + tmp_names = [[] for _ in range(self.expdesc.numberoffunctions)] + for i, entry in enumerate(self.f_names): + tmp_names[self.f_nameordinals[i].ordinal].append(entry.name) + for i, entry in enumerate(self.f_address): + tmpn = [] + if not entry.rva: + continue + out = "%2d %.8X %s" % (i + self.expdesc.base, entry.rva, repr(tmp_names[i])) + rep.append(out) + return "\n".join(rep) + + def create(self, name='default.dll'): + self.expdesc = ExpDesc_e(self.parent_head) + for attr in ["characteristics", + "timestamp", + "majorv", + "minorv", + "name", + "base", + "numberoffunctions", + "numberofnames", + "addressoffunctions", + "addressofnames", + "addressofordinals", + ]: + setattr(self.expdesc, attr, 0) + + self.dlldescname = DescName(self.parent_head) + self.dlldescname.name = name + self.f_address = struct_array(self, None, + None, + Rva) + self.f_names = struct_array(self, None, + None, + Rva) + self.f_nameordinals = struct_array(self, None, + None, + Ordinal) + self.expdesc.base = 1 + + def add_name(self, name, rva=0xdeadc0fe): + if self.expdesc is None: + return + names = [func.name.name for func in self.f_names] + names_s = names[:] + names_s.sort() + if names_s != names: + log.warn('tab names was not sorted may bug') + names.append(name) + names.sort() + index = names.index(name) + descname = DescName(self.parent_head) + + descname.name = name + wname = Rva(self.parent_head) + + wname.name = descname + woffset = Rva(self.parent_head) + woffset.rva = rva + wordinal = Ordinal(self.parent_head) + # func is append to list + wordinal.ordinal = len(self.f_address) + self.f_address.append(woffset) + # self.f_names.insert(index, wname) + # self.f_nameordinals.insert(index, wordinal) + self.f_names.insert(index, wname) + self.f_nameordinals.insert(index, wordinal) + self.expdesc.numberofnames += 1 + self.expdesc.numberoffunctions += 1 + + def get_funcrva(self, f_str): + if self.expdesc is None: + return None + for i, entry in enumerate(self.f_names): + if f_str != entry.name.name: + continue + ordinal = self.f_nameordinals[i].ordinal + rva = self.f_address[ordinal].rva + return rva + return None + + def get_funcvirt(self, addr): + rva = self.get_funcrva(addr) + if rva == None: + return + return self.parent_head.rva2virt(rva) + + +class Delaydesc_e(CStruct): + _fields = [("attrs", "u32"), + ("name", "u32"), + ("hmod", "u32"), + ("firstthunk", "u32"), + ("originalfirstthunk", "u32"), + ("boundiat", "u32"), + ("unloadiat", "u32"), + ("timestamp", "u32"), + ] + + +class DirDelay(CStruct): + _fields = [("delaydesc", (lambda c, raw, off:c.gete(raw, off), + lambda c, value:c.sete(value)))] + + def gete(self, raw, off): + if not off: + return None, off + + ofend = off + \ + self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_DELAY_IMPORT].size + out = [] + while off < ofend: + if off >= len(raw): + log.warn('warning bad reloc offset') + break + + delaydesc, length = Delaydesc_e.unpack_l(raw, + off, + self.parent_head) + if raw[off:off+length] == b'\x00' * length: + # Special case + break + off += length + out.append(delaydesc) + + if self.parent_head._wsize == 32: + mask_ptr = 0x80000000 + elif self.parent_head._wsize == 64: + mask_ptr = 0x8000000000000000 + + parent = self.parent_head + for entry in out: + isfromva = (entry.attrs & 1) == 0 + if isfromva: + isfromva = lambda x: parent.virt2rva(x) + else: + isfromva = lambda x: x + entry.dlldescname = DescName.unpack(raw, isfromva(entry.name), + self.parent_head) + if entry.originalfirstthunk: + addr = isfromva(entry.originalfirstthunk) + if not 0 <= addr < len(raw): + log.warning("Bad delay") + break + entry.originalfirstthunks = struct_array(self, raw, + addr, + Rva) + else: + entry.originalfirstthunks + + if entry.firstthunk: + entry.firstthunks = struct_array(self, raw, + isfromva(entry.firstthunk), + Rva) + else: + entry.firstthunk = None + + entry.impbynames = [] + if entry.originalfirstthunk and self.parent_head.rva2off(isfromva(entry.originalfirstthunk)): + tmp_thunk = entry.originalfirstthunks + elif entry.firstthunk: + tmp_thunk = entry.firstthunks + else: + print(ValueError("no thunk in delay dir!! ")) + return + for i in range(len(tmp_thunk)): + if tmp_thunk[i].rva & mask_ptr == 0: + imp = ImportByName.unpack(raw, + isfromva(tmp_thunk[i].rva), + self.parent_head) + entry.impbynames.append(imp) + else: + entry.impbynames.append( + isfromva(tmp_thunk[i].rva & (mask_ptr - 1))) + # print(repr(entry[-1])) + # raise ValueError('XXX to check') + return out, off + + def sete(self, entries): + return "".join(bytes(entry) for entry in entries) + b"\x00" * (4 * 8) # DelayDesc_e + + def __len__(self): + rva_size = self.parent_head._wsize // 8 + length = (len(self.delaydesc) + 1) * (4 * 8) # DelayDesc_e + for entry in self.delaydesc: + length += len(entry.dlldescname) + if entry.originalfirstthunk and self.parent_head.rva2off(entry.originalfirstthunk): + length += (len(entry.originalfirstthunks) + 1) * rva_size + if entry.firstthunk: + length += (len(entry.firstthunks) + 1) * rva_size + for imp in entry.impbynames: + if isinstance(imp, ImportByName): + length += len(imp) + return length + + def set_rva(self, rva, size=None): + rva_size = self.parent_head._wsize // 8 + self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_DELAY_IMPORT].rva = rva + if not size: + self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_DELAY_IMPORT].size = len(self) + else: + self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_DELAY_IMPORT].size = size + rva += (len(self.delaydesc) + 1) * (4 * 8) # DelayDesc_e + parent = self.parent_head + for entry in self.delaydesc: + isfromva = (entry.attrs & 1) == 0 + if isfromva: + isfromva = lambda x: self.parent_head.rva2virt(x) + else: + isfromva = lambda x: x + + entry.name = isfromva(rva) + rva += len(entry.dlldescname) + if entry.originalfirstthunk: # and self.parent_head.rva2off(entry.originalfirstthunk): + entry.originalfirstthunk = isfromva(rva) + rva += (len(entry.originalfirstthunks) + 1) * rva_size + # XXX rva fthunk not patched => fun addr + # if entry.firstthunk: + # entry.firstthunk = rva + # rva+=(len(entry.firstthunks)+1)*pe.Rva._size + if entry.originalfirstthunk and self.parent_head.rva2off(entry.originalfirstthunk): + tmp_thunk = entry.originalfirstthunks + elif entry.firstthunk: + tmp_thunk = entry.firstthunks + else: + raise RuntimeError("No thunk!") + for i, imp in enumerate(entry.impbynames): + if isinstance(imp, ImportByName): + tmp_thunk[i].rva = isfromva(rva) + rva += len(imp) + + def build_content(self, raw): + if len(self.parent_head.NThdr.optentries) < DIRECTORY_ENTRY_DELAY_IMPORT: + return + dirdelay = self.parent_head.NThdr.optentries[ + DIRECTORY_ENTRY_DELAY_IMPORT] + of1 = dirdelay.rva + if not of1: # No Delay Import + return + raw[self.parent_head.rva2off(of1)] = bytes(self) + for entry in self.delaydesc: + raw[self.parent_head.rva2off(entry.name)] = bytes(entry.dlldescname) + if entry.originalfirstthunk and self.parent_head.rva2off(entry.originalfirstthunk): + raw[self.parent_head.rva2off(entry.originalfirstthunk)] = bytes(entry.originalfirstthunks) + if entry.firstthunk: + raw[self.parent_head.rva2off(entry.firstthunk)] = bytes(entry.firstthunks) + if entry.originalfirstthunk and self.parent_head.rva2off(entry.originalfirstthunk): + tmp_thunk = entry.originalfirstthunks + elif entry.firstthunk: + tmp_thunk = entry.firstthunks + else: + raise RuntimeError("No thunk!") + for j, imp in enumerate(entry.impbynames): + if isinstance(imp, ImportByName): + raw[self.parent_head.rva2off(tmp_thunk[j].rva)] = bytes(imp) + + def __repr__(self): + rep = ["<%s>" % self.__class__.__name__] + for i, entry in enumerate(self.delaydesc): + out = "%2d %-25s %s" % (i, repr(entry.dlldescname), repr(entry)) + rep.append(out) + for index, func in enumerate(entry.impbynames): + out = " %2d %-16s" % (index, repr(func)) + rep.append(out) + return "\n".join(rep) + + def add_dlldesc(self, new_dll): + if self.parent_head._wsize == 32: + mask_ptr = 0x80000000 + elif self.parent_head._wsize == 64: + mask_ptr = 0x8000000000000000 + new_impdesc = [] + of1 = None + new_delaydesc = [] + for import_descriptor, new_functions in new_dll: + if isinstance(import_descriptor.get("name"), str): + import_descriptor["name"] = import_descriptor["name"].encode() + new_functions = [ + funcname.encode() if isinstance(funcname, str) else funcname + for funcname in new_functions + ] + for attr in ["attrs", "name", "hmod", "firstthunk", "originalfirstthunk", "boundiat", "unloadiat", "timestamp"]: + if not attr in import_descriptor: + import_descriptor[attr] = 0 + entry = Delaydesc_e(self.parent_head, **import_descriptor) + # entry.cstr.__dict__.update(import_descriptor) + if entry.firstthunk != None: + of1 = entry.firstthunk + elif of1 == None: + raise RuntimeError("set fthunk") + else: + entry.firstthunk = of1 + entry.dlldescname = DescName(self.parent_head, name=entry.name) + entry.originalfirstthunk = 0 + entry.originalfirstthunks = struct_array(self, None, + None, + Rva) + entry.firstthunks = struct_array(self, None, + None, + Rva) + + impbynames = [] + for new_function in new_functions: + rva_ofirstt = Rva(self.parent_head) + if isinstance(new_function, int_types): + rva_ofirstt.rva = mask_ptr + new_function + ibn = None + elif isinstance(new_function, bytes): + rva_ofirstt.rva = True + ibn = ImportByName(self.parent_head) + ibn.name = new_function + ibn.hint = 0 + else: + raise RuntimeError('unknown func type %s' % new_function) + impbynames.append(ibn) + entry.originalfirstthunks.append(rva_ofirstt) + + rva_func = Rva(self.parent_head) + if ibn != None: + rva_func.rva = 0xDEADBEEF # default func addr + else: + # ord ?XXX? + rva_func.rva = rva_ofirstt.rva + entry.firstthunks.append(rva_func) + of1 += 4 + # for null thunk + of1 += 4 + entry.impbynames = impbynames + new_delaydesc.append(entry) + if self.delaydesc is None: + self.delaydesc = struct_array(self, None, + None, + Delaydesc_e) + self.delaydesc.l = new_delaydesc + else: + for entry in new_delaydesc: + self.delaydesc.append(entry) + + def get_funcrva(self, func): + for entry in self.delaydesc: + isfromva = (entry.attrs & 1) == 0 + if isfromva: + isfromva = lambda x: self.parent_head.virt2rva(x) + else: + isfromva = lambda x: x + if entry.originalfirstthunk and self.parent_head.rva2off(isfromva(entry.originalfirstthunk)): + tmp_thunk = entry.originalfirstthunks + elif entry.firstthunk: + tmp_thunk = entry.firstthunks + else: + raise RuntimeError("No thunk!") + if isinstance(func, bytes): + for j, imp in enumerate(entry.impbynames): + if isinstance(imp, ImportByName): + if func == imp.name: + return isfromva(entry.firstthunk) + j * 4 + elif isinstance(func, int_types): + for j, imp in enumerate(entry.impbynames): + if not isinstance(imp, ImportByName): + if isfromva(tmp_thunk[j].rva & 0x7FFFFFFF) == func: + return isfromva(entry.firstthunk) + j * 4 + else: + raise ValueError('unknown func tpye %r' % func) + + def get_funcvirt(self, addr): + rva = self.get_funcrva(addr) + if rva == None: + return + return self.parent_head.rva2virt(rva) + + +class Rel(CStruct): + _fields = [("rva", "u32"), + ("size", "u32") + ] + + +class Reloc(CStruct): + _fields = [("rel", (lambda c, raw, off:c.gete(raw, off), + lambda c, value:c.sete(value)))] + + def gete(self, raw, off): + rel = struct.unpack('H', raw[off:off + 2])[0] + return (rel >> 12, rel & 0xfff), off + 2 + + def sete(self, value): + return struct.pack('H', (value[0] << 12) | value[1]) + + def __repr__(self): + return '<%d %d>' % (self.rel[0], self.rel[1]) + + +class DirReloc(CStruct): + _fields = [("reldesc", (lambda c, raw, off:c.gete(raw, off), + lambda c, value:c.sete(value)))] + + def gete(self, raw, off): + if not off: + return None, off + + ofend = off + \ + self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_BASERELOC].size + out = [] + while off < ofend: + if off >= len(raw): + log.warn('warning bad reloc offset') + break + reldesc, length = Rel.unpack_l(raw, + off, + self.parent_head) + if reldesc.size == 0: + log.warn('warning null reldesc') + reldesc.size = length + break + of2 = off + length + if of2 + reldesc.size > len(self.parent_head.img_rva): + log.warn('relocation too big, skipping') + break + reldesc.rels = struct_array(self, raw, + of2, + Reloc, + (reldesc.size - length) // 2) # / Reloc size + reldesc.patchrel = False + out.append(reldesc) + off += reldesc.size + return out, off + + def sete(self, entries): + return b"".join( + bytes(entry) + bytes(entry.rels) + for entry in entries + ) + + def set_rva(self, rva, size=None): + if self.reldesc is None: + return + self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_BASERELOC].rva = rva + if not size: + self.parent_head.NThdr.optentries[ + DIRECTORY_ENTRY_BASERELOC].size = len(self) + else: + self.parent_head.NThdr.optentries[ + DIRECTORY_ENTRY_BASERELOC].size = size + + def build_content(self, raw): + dirrel = self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_BASERELOC] + dirrel.size = len(self) + of1 = dirrel.rva + if self.reldesc is None: # No Reloc + return + raw[self.parent_head.rva2off(of1)] = bytes(self) + + def __len__(self): + if self.reldesc is None: + return 0 + length = 0 + for entry in self.reldesc: + length += entry.size + return length + + def __bytes__(self): + return b"".join( + bytes(entry) + bytes(entry.rels) + for entry in self.reldesc + ) + + def __str__(self): + if PY3: + return repr(self) + return self.__bytes__() + + def __repr__(self): + rep = ["<%s>" % self.__class__.__name__] + if self.reldesc is None: + return "\n".join(rep) + for i, entry in enumerate(self.reldesc): + out = "%2d %s" % (i, repr(entry)) + rep.append(out) + """ + #display too many lines... + for ii, m in enumerate(entry.rels): + l = "\t%2d %s"%(ii, repr(m) ) + rep.append(l) + """ + out = "\t%2d rels..." % (len(entry.rels)) + rep.append(out) + return "\n".join(rep) + + def add_reloc(self, rels, rtype=3, patchrel=True): + dirrel = self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_BASERELOC] + if not rels: + return + rels.sort() + all_base_ad = set([x & 0xFFFFF000 for x in rels]) + all_base_ad = list(all_base_ad) + all_base_ad.sort() + rels_by_base = defaultdict(list) + while rels: + reloc = rels.pop() + if reloc >= all_base_ad[-1]: + rels_by_base[all_base_ad[-1]].append(reloc) + else: + all_base_ad.pop() + rels_by_base[all_base_ad[-1]].append(reloc) + rels_by_base = [x for x in list(rels_by_base.items())] + rels_by_base.sort() + for o_init, rels in rels_by_base: + # o_init = rels[0]&0xFFFFF000 + offsets = struct_array(self, None, None, Reloc, 0) + for reloc_value in rels: + if (reloc_value & 0xFFFFF000) != o_init: + raise RuntimeError("relocs must be in same range") + reloc = Reloc(self.parent_head) + reloc.rel = (rtype, reloc_value - o_init) + offsets.append(reloc) + while len(offsets) & 3: + reloc = Reloc(self.parent_head) + reloc.rel = (0, 0) + offsets.append(reloc) + reldesc = Rel(self.parent_head) # Reloc(self.parent_head) + reldesc.rva = o_init + reldesc.size = (len(offsets) * 2 + 8) + reldesc.rels = offsets + reldesc.patchrel = patchrel + # if self.reldesc is None: + # self.reldesc = [] + self.reldesc.append(reldesc) + dirrel.size += reldesc.size + + def del_reloc(self, taboffset): + if self.reldesc is None: + return + for rel in self.reldesc: + of1 = rel.rva + i = 0 + while i < len(rel.rels): + reloc = rel.rels[i] + if reloc.rel[0] != 0 and reloc.rel[1] + of1 in taboffset: + print('del reloc', hex(reloc.rel[1] + of1)) + del rel.rels[i] + rel.size -= Reloc._size + else: + i += 1 + + +class DirRes(CStruct): + _fields = [("resdesc", (lambda c, raw, off:c.gete(raw, off), + lambda c, value:c.sete(value)))] + + def gete(self, raw, off): + if not off: + return None, off + if off >= len(self.parent_head.img_rva): + log.warning('cannot parse resources, %X' % off) + return None, off + + off_orig = off + ofend = off + self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].size + + resdesc, length = ResDesc_e.unpack_l(raw, + off, + self.parent_head) + off += length + nbr = resdesc.numberofnamedentries + resdesc.numberofidentries + + out = [] + tmp_off = off + for _ in range(nbr): + if tmp_off >= ofend: + break + if tmp_off + length >= len(raw): + log.warn('warning bad resource offset') + break + try: + entry, length = ResEntry.unpack_l(raw, tmp_off, self.parent_head) + except RuntimeError: + log.warn('bad resentry') + return None, tmp_off + out.append(entry) + tmp_off += length + resdesc.resentries = struct_array(self, raw, + off, + ResEntry, + nbr) + dir_todo = {off_orig: resdesc} + dir_done = {} + while dir_todo: + off, my_dir = dir_todo.popitem() + dir_done[off] = my_dir + for entry in my_dir.resentries: + off = entry.offsettosubdir + if not off: + # data dir + off = entry.offsettodata + if not 0 <= off < len(raw): + log.warn('bad resrouce entry') + continue + data = ResDataEntry.unpack(raw, + off, + self.parent_head) + off = data.offsettodata + data.s = StrPatchwork(raw[off:off + data.size]) + entry.data = data + continue + # subdir + if off in dir_done: + log.warn('warning recusif subdir') + continue + if not 0 <= off < len(self.parent_head.img_rva): + log.warn('bad resrouce entry') + continue + subdir, length = ResDesc_e.unpack_l(raw, + off, + self.parent_head) + nbr = subdir.numberofnamedentries + subdir.numberofidentries + try: + subdir.resentries = struct_array(self, raw, + off + length, + ResEntry, + nbr) + except RuntimeError: + log.warn('bad resrouce entry') + continue + + entry.subdir = subdir + dir_todo[off] = entry.subdir + return resdesc, off + + def build_content(self, raw): + if self.resdesc is None: + return + of1 = self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].rva + raw[self.parent_head.rva2off(of1)] = bytes(self.resdesc) + dir_todo = {self.parent_head.NThdr.optentries[ + DIRECTORY_ENTRY_RESOURCE].rva: self.resdesc} + dir_done = {} + while dir_todo: + of1, my_dir = dir_todo.popitem() + dir_done[of1] = my_dir + raw[self.parent_head.rva2off(of1)] = bytes(my_dir) + of1 += len(my_dir) + of_base = of1 + for entry in my_dir.resentries: + raw[of_base] = bytes(entry) + of_base += len(entry) + if entry.name_s: + raw[self.parent_head.rva2off(entry.name)] = bytes(entry.name_s) + of1 = entry.offsettosubdir + if not of1: + raw[self.parent_head.rva2off(entry.offsettodata)] = bytes(entry.data) + raw[self.parent_head.rva2off(entry.data.offsettodata)] = bytes(entry.data.s) + continue + dir_todo[of1] = entry.subdir + + def __len__(self): + length = 0 + if self.resdesc is None: + return length + dir_todo = [self.resdesc] + dir_done = [] + while dir_todo: + my_dir = dir_todo.pop() + if my_dir in dir_done: + raise ValueError('Recursive directory') + dir_done.append(my_dir) + length += len(my_dir) + length += len(my_dir.resentries) * 8 # ResEntry size + for entry in my_dir.resentries: + if not entry.offsettosubdir: + continue + if not entry.subdir in dir_todo: + dir_todo.append(entry.subdir) + else: + raise RuntimeError("recursive dir") + + dir_todo = dir_done + while dir_todo: + my_dir = dir_todo.pop() + for entry in my_dir.resentries: + if entry.name_s: + length += len(entry.name_s) + of1 = entry.offsettosubdir + if not of1: + length += 4 * 4 # WResDataEntry size + # XXX because rva may be even rounded + length += 1 + length += entry.data.size + continue + return length + + def set_rva(self, rva, size=None): + if self.resdesc is None: + return + self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].rva = rva + if not size: + self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].size = len(self) + else: + self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].size = size + dir_todo = [self.resdesc] + dir_done = {} + while dir_todo: + my_dir = dir_todo.pop() + dir_done[rva] = my_dir + rva += len(my_dir) + rva += len(my_dir.resentries) * 8 # ResEntry size + for entry in my_dir.resentries: + if not entry.offsettosubdir: + continue + if not entry.subdir in dir_todo: + dir_todo.append(entry.subdir) + else: + raise RuntimeError("recursive dir") + dir_todo = dir_done + dir_inv = dict([(x[1], x[0]) for x in list(dir_todo.items())]) + while dir_todo: + rva_tmp, my_dir = dir_todo.popitem() + for entry in my_dir.resentries: + if entry.name_s: + entry.name = rva + rva += len(entry.name_s) + of1 = entry.offsettosubdir + if not of1: + entry.offsettodata = rva + rva += 4 * 4 # ResDataEntry size + # XXX menu rsrc must be even aligned? + if rva % 2: + rva += 1 + entry.data.offsettodata = rva + rva += entry.data.size + continue + entry.offsettosubdir = dir_inv[entry.subdir] + + def __repr__(self): + rep = ["<%s>" % (self.__class__.__name__)] + if self.resdesc is None: + return "\n".join(rep) + dir_todo = [self.resdesc] + resources = [] + index = -1 + while dir_todo: + entry = dir_todo.pop(0) + if isinstance(entry, int): + index += entry + elif isinstance(entry, ResDesc_e): + # resources.append((index, repr(entry))) + dir_todo = [1] + entry.resentries.l + [-1] + dir_todo + elif isinstance(entry, ResEntry): + if entry.offsettosubdir: + resources.append((index, repr(entry))) + dir_todo = [entry.subdir] + dir_todo + else: + resources.append((index, repr(entry))) + else: + raise RuntimeError("zarb") + for i, resource in resources: + rep.append(' ' * 4 * i + resource) + return "\n".join(rep) + + +class Ordinal(CStruct): + _fields = [("ordinal", "u16"), + ] + + +class ResDesc_e(CStruct): + _fields = [("characteristics", "u32"), + ("timestamp", "u32"), + ("majorv", "u16"), + ("minorv", "u16"), + ("numberofnamedentries", "u16"), + ("numberofidentries", "u16") + ] + + +class SUnicode(CStruct): + _fields = [("length", "u16"), + ("value", (lambda c, raw, off:c.gets(raw, off), + lambda c, value:c.sets(value))) + ] + + def gets(self, raw, off): + value = raw[off:off + self.length * 2] + return value, off + self.length + + def sets(self, value): + return self.value + + +class ResEntry(CStruct): + _fields = [("name", (lambda c, raw, off:c._get_name(raw, off), + lambda c, value:c._set_name(value))), + ("offsettodata", (lambda c, raw, off:c._get_offset(raw, off), + lambda c, value:c._set_offset(value))) + ] + + def _get_name(self, raw, off): + self.data = None + # off = self.parent_head.rva2off(off) + name = struct.unpack('I', raw[off:off + 4])[0] + self.name_s = None + if name & 0x80000000: + name = (name & 0x7FFFFFFF) + self.parent_head.NThdr.optentries[ + DIRECTORY_ENTRY_RESOURCE].rva # XXX res rva?? + name &= 0x7FFFFFFF + if name >= len(raw): + raise RuntimeError("Bad resentry") + self.name_s = SUnicode.unpack(raw, + name, + self.parent_head) + return name, off + 4 + + def _set_name(self, name): + if self.name_s: + rva = self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].rva + name = (self.name - rva) + 0x80000000 + return struct.pack('I', name) + + def _get_offset(self, raw, off): + self.offsettosubdir = None + rva = self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].rva + offsettodata_o = struct.unpack('I', raw[off:off + 4])[0] + offsettodata = (offsettodata_o & 0x7FFFFFFF) + rva # XXX res rva?? + if offsettodata_o & 0x80000000: + self.offsettosubdir = offsettodata + return offsettodata, off + 4 + + def _set_offset(self, offset): + rva = self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].rva + offsettodata = offset - rva + if self.offsettosubdir: + offsettodata = (self.offsettosubdir - rva) + 0x80000000 + return struct.pack('I', offsettodata) + + def __repr__(self): + if self.name_s: + nameid = "%s" % repr(self.name_s) + else: + if self.name in RT: # and not self.offsettosubdir: + nameid = "ID %s" % RT[self.name] + else: + nameid = "ID %d" % self.name + if self.offsettosubdir: + offsettodata = "subdir: %x" % self.offsettosubdir + else: + offsettodata = "data: %x" % self.offsettodata + return "<%s %s>" % (nameid, offsettodata) + + +class ResDataEntry(CStruct): + _fields = [("offsettodata", "u32"), + ("size", "u32"), + ("codepage", "u32"), + ("reserved", "u32"), + ] + + +class Symb(CStruct): + _fields = [("name", "8s"), + ("res1", "u32"), + ("res2", "u32"), + ("res3", "u16")] + + +DIRECTORY_ENTRY_EXPORT = 0 +DIRECTORY_ENTRY_IMPORT = 1 +DIRECTORY_ENTRY_RESOURCE = 2 +DIRECTORY_ENTRY_EXCEPTION = 3 +DIRECTORY_ENTRY_SECURITY = 4 +DIRECTORY_ENTRY_BASERELOC = 5 +DIRECTORY_ENTRY_DEBUG = 6 +DIRECTORY_ENTRY_COPYRIGHT = 7 +DIRECTORY_ENTRY_GLOBALPTR = 8 +DIRECTORY_ENTRY_TLS = 9 +DIRECTORY_ENTRY_LOAD_CONFIG = 10 +DIRECTORY_ENTRY_BOUND_IMPORT = 11 +DIRECTORY_ENTRY_IAT = 12 +DIRECTORY_ENTRY_DELAY_IMPORT = 13 +DIRECTORY_ENTRY_COM_DESCRIPTOR = 14 +DIRECTORY_ENTRY_RESERVED = 15 + + +RT_CURSOR = 1 +RT_BITMAP = 2 +RT_ICON = 3 +RT_MENU = 4 +RT_DIALOG = 5 +RT_STRING = 6 +RT_FONTDIR = 7 +RT_FONT = 8 +RT_ACCELERATOR = 9 +RT_RCDATA = 10 +RT_MESSAGETABLE = 11 +RT_GROUP_CURSOR = 12 +RT_GROUP_ICON = 14 +RT_VERSION = 16 +RT_DLGINCLUDE = 17 +RT_PLUGPLAY = 19 +RT_VXD = 20 +RT_ANICURSOR = 21 +RT_ANIICON = 22 +RT_HTML = 23 +RT_MANIFEST = 24 + + +RT = { + RT_CURSOR: "RT_CURSOR", + RT_BITMAP: "RT_BITMAP", + RT_ICON: "RT_ICON", + RT_MENU: "RT_MENU", + RT_DIALOG: "RT_DIALOG", + RT_STRING: "RT_STRING", + RT_FONTDIR: "RT_FONTDIR", + RT_FONT: "RT_FONT", + RT_ACCELERATOR: "RT_ACCELERATOR", + RT_RCDATA: "RT_RCDATA", + RT_MESSAGETABLE: "RT_MESSAGETABLE", + RT_GROUP_CURSOR: "RT_GROUP_CURSOR", + RT_GROUP_ICON: "RT_GROUP_ICON", + RT_VERSION: "RT_VERSION", + RT_DLGINCLUDE: "RT_DLGINCLUDE", + RT_PLUGPLAY: "RT_PLUGPLAY", + RT_VXD: "RT_VXD", + RT_ANICURSOR: "RT_ANICURSOR", + RT_ANIICON: "RT_ANIICON", + RT_HTML: "RT_HTML", + RT_MANIFEST: "RT_MANIFEST", +} diff --git a/miasm/elfesteem/pe_init.py b/miasm/elfesteem/pe_init.py new file mode 100644 index 00000000..e243cecb --- /dev/null +++ b/miasm/elfesteem/pe_init.py @@ -0,0 +1,603 @@ +#! /usr/bin/env python + +from __future__ import print_function + +from builtins import range +import array +from functools import reduce +import logging +import struct + +from future.builtins import int as int_types +from future.utils import PY3 + +from miasm.elfesteem import pe +from miasm.elfesteem.strpatchwork import StrPatchwork + +log = logging.getLogger("peparse") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.WARN) + + +class ContentManager(object): + + def __get__(self, owner, _): + if hasattr(owner, '_content'): + return owner._content + + def __set__(self, owner, new_content): + owner.resize(len(owner._content), len(new_content)) + owner._content = new_content + + def __delete__(self, owner): + self.__set__(owner, None) + + +class ContectRva(object): + + def __init__(self, parent): + self.parent = parent + + def get(self, rva_start, rva_stop=None): + """ + Get data in RVA view starting at @rva_start, stopping at @rva_stop + @rva_start: rva start address + @rva_stop: rva stop address + """ + if rva_start < 0: + raise ValueError("Out of range") + if rva_stop is not None: + if rva_stop > len(self.parent.img_rva): + rva_stop = len(self.parent.img_rva) + if rva_start > len(self.parent.img_rva): + raise ValueError("Out of range") + return self.parent.img_rva[rva_start:rva_stop] + if rva_start > len(self.parent.img_rva): + raise ValueError("Out of range") + return self.parent.img_rva[rva_start] + + def set(self, rva, data): + """ + Set @data in RVA view starting at @start + @rva: rva start address + @data: data to set + """ + if not isinstance(rva, int_types): + raise ValueError('addr must be int/long') + + if rva < 0: + raise ValueError("Out of range") + + if rva + len(data) > len(self.parent.img_rva): + raise ValueError("Out of range") + self.parent.img_rva[rva] = data + + def __getitem__(self, item): + if isinstance(item, slice): + assert(item.step is None) + return self.get(item.start, item.stop) + return self.get(item) + + def __setitem__(self, item, data): + if isinstance(item, slice): + rva = item.start + else: + rva = item + self.set(rva, data) + + +class ContentVirtual(object): + + def __init__(self, parent): + self.parent = parent + + def __getitem__(self, item): + raise DeprecationWarning("Replace code by virt.get(start, [stop])") + + def __setitem__(self, item, data): + raise DeprecationWarning("Replace code by virt.set(start, data)") + + def __call__(self, ad_start, ad_stop=None, ad_step=None): + raise DeprecationWarning("Replace code by virt.get(start, stop)") + + def get(self, virt_start, virt_stop=None): + """ + Get data in VIRTUAL view starting at @virt_start, stopping at @virt_stop + @virt_start: virt start address + @virt_stop: virt stop address + """ + rva_start = self.parent.virt2rva(virt_start) + if virt_stop != None: + rva_stop = self.parent.virt2rva(virt_stop) + else: + rva_stop = None + return self.parent.rva.get(rva_start, rva_stop) + + def set(self, addr, data): + """ + Set @data in VIRTUAL view starting at @start + @addr: virtual start address + @data: data to set + """ + if not isinstance(addr, int_types): + raise ValueError('addr must be int/long') + self.parent.rva.set(self.parent.virt2rva(addr), data) + + def max_addr(self): + section = self.parent.SHList[-1] + length = section.addr + section.size + self.parent.NThdr.ImageBase + return int(length) + + def find(self, pattern, start=0, end=None): + if start != 0: + start = self.parent.virt2rva(start) + if end != None: + end = self.parent.virt2rva(end) + + ret = self.parent.img_rva.find(pattern, start, end) + if ret == -1: + return -1 + return self.parent.rva2virt(ret) + + def rfind(self, pattern, start=0, end=None): + if start != 0: + start = self.parent.virt2rva(start) + if end != None: + end = self.parent.virt2rva(end) + + ret = self.parent.img_rva.rfind(pattern, start, end) + if ret == -1: + return -1 + return self.parent.rva2virt(ret) + + def is_addr_in(self, addr): + return self.parent.is_in_virt_address(addr) + + + +def compute_crc(raw, olds): + out = 0 + data = raw[:] + if len(raw) % 2: + end = struct.unpack('B', data[-1])[0] + data = data[:-1] + if (len(raw) & ~0x1) % 4: + out += struct.unpack('H', data[:2])[0] + data = data[2:] + data = array.array('I', data) + out = reduce(lambda x, y: x + y, data, out) + out -= olds + while out > 0xFFFFFFFF: + out = (out >> 32) + (out & 0xFFFFFFFF) + while out > 0xFFFF: + out = (out & 0xFFFF) + ((out >> 16) & 0xFFFF) + if len(raw) % 2: + out += end + out += len(data) + return out + + + +# PE object +class PE(object): + content = ContentManager() + + def __init__(self, pestr=None, + loadfrommem=False, + parse_resources=True, + parse_delay=True, + parse_reloc=True, + wsize=32): + self._rva = ContectRva(self) + self._virt = ContentVirtual(self) + self.img_rva = StrPatchwork() + if pestr is None: + self._content = StrPatchwork() + self._sex = 0 + self._wsize = wsize + self.Doshdr = pe.Doshdr(self) + self.NTsig = pe.NTsig(self) + self.Coffhdr = pe.Coffhdr(self) + + if self._wsize == 32: + Opthdr = pe.Opthdr32 + else: + Opthdr = pe.Opthdr64 + + self.Opthdr = Opthdr(self) + self.NThdr = pe.NThdr(self) + self.NThdr.optentries = [pe.Optehdr(self) for _ in range(0x10)] + self.NThdr.CheckSum = 0 + self.SHList = pe.SHList(self) + self.SHList.shlist = [] + + self.NThdr.sizeofheaders = 0x1000 + + self.DirImport = pe.DirImport(self) + self.DirExport = pe.DirExport(self) + self.DirDelay = pe.DirDelay(self) + self.DirReloc = pe.DirReloc(self) + self.DirRes = pe.DirRes(self) + + self.Doshdr.magic = 0x5a4d + self.Doshdr.lfanew = 0xe0 + + self.NTsig.signature = 0x4550 + if wsize == 32: + self.Opthdr.magic = 0x10b + elif wsize == 64: + self.Opthdr.magic = 0x20b + else: + raise ValueError('unknown pe size %r' % wsize) + self.Opthdr.majorlinkerversion = 0x7 + self.Opthdr.minorlinkerversion = 0x0 + self.NThdr.filealignment = 0x1000 + self.NThdr.sectionalignment = 0x1000 + self.NThdr.majoroperatingsystemversion = 0x5 + self.NThdr.minoroperatingsystemversion = 0x1 + self.NThdr.MajorImageVersion = 0x5 + self.NThdr.MinorImageVersion = 0x1 + self.NThdr.majorsubsystemversion = 0x4 + self.NThdr.minorsubsystemversion = 0x0 + self.NThdr.subsystem = 0x3 + if wsize == 32: + self.NThdr.dllcharacteristics = 0x8000 + else: + self.NThdr.dllcharacteristics = 0x8000 + + # for createthread + self.NThdr.sizeofstackreserve = 0x200000 + self.NThdr.sizeofstackcommit = 0x1000 + self.NThdr.sizeofheapreserve = 0x100000 + self.NThdr.sizeofheapcommit = 0x1000 + + self.NThdr.ImageBase = 0x400000 + self.NThdr.sizeofheaders = 0x1000 + self.NThdr.numberofrvaandsizes = 0x10 + + self.NTsig.signature = 0x4550 + if wsize == 32: + self.Coffhdr.machine = 0x14c + elif wsize == 64: + self.Coffhdr.machine = 0x8664 + else: + raise ValueError('unknown pe size %r' % wsize) + if wsize == 32: + self.Coffhdr.characteristics = 0x10f + self.Coffhdr.sizeofoptionalheader = 0xe0 + else: + self.Coffhdr.characteristics = 0x22 # 0x2f + self.Coffhdr.sizeofoptionalheader = 0xf0 + + else: + self._content = StrPatchwork(pestr) + self.loadfrommem = loadfrommem + self.parse_content(parse_resources=parse_resources, + parse_delay=parse_delay, + parse_reloc=parse_reloc) + + def isPE(self): + if self.NTsig is None: + return False + return self.NTsig.signature == 0x4550 + + def parse_content(self, + parse_resources=True, + parse_delay=True, + parse_reloc=True): + off = 0 + self._sex = 0 + self._wsize = 32 + self.Doshdr = pe.Doshdr.unpack(self.content, off, self) + off = self.Doshdr.lfanew + if off > len(self.content): + log.warn('ntsig after eof!') + self.NTsig = None + return + self.NTsig = pe.NTsig.unpack(self.content, + off, self) + self.DirImport = None + self.DirExport = None + self.DirDelay = None + self.DirReloc = None + self.DirRes = None + + if self.NTsig.signature != 0x4550: + log.warn('not a valid pe!') + return + off += len(self.NTsig) + self.Coffhdr, length = pe.Coffhdr.unpack_l(self.content, + off, + self) + + off += length + self._wsize = ord(self.content[off+1]) * 32 + + if self._wsize == 32: + Opthdr = pe.Opthdr32 + else: + Opthdr = pe.Opthdr64 + + if len(self.content) < 0x200: + # Fix for very little PE + self.content += (0x200 - len(self.content)) * b'\x00' + + self.Opthdr, length = Opthdr.unpack_l(self.content, off, self) + self.NThdr = pe.NThdr.unpack(self.content, off + length, self) + self.img_rva[0] = self.content[:self.NThdr.sizeofheaders] + off += self.Coffhdr.sizeofoptionalheader + self.SHList = pe.SHList.unpack(self.content, off, self) + + # load section data + filealignment = self.NThdr.filealignment + sectionalignment = self.NThdr.sectionalignment + for section in self.SHList.shlist: + virt_size = (section.size // sectionalignment + 1) * sectionalignment + if self.loadfrommem: + section.offset = section.addr + if self.NThdr.sectionalignment > 0x1000: + raw_off = 0x200 * (section.offset // 0x200) + else: + raw_off = section.offset + if raw_off != section.offset: + log.warn('unaligned raw section (%x %x)!', raw_off, section.offset) + section.data = StrPatchwork() + + if section.rawsize == 0: + rounded_size = 0 + else: + if section.rawsize % filealignment: + rs = (section.rawsize // filealignment + 1) * filealignment + else: + rs = section.rawsize + rounded_size = rs + if rounded_size > virt_size: + rounded_size = min(rounded_size, section.size) + data = self.content[raw_off:raw_off + rounded_size] + section.data = data + # Pad data to page size 0x1000 + length = len(data) + data += b"\x00" * ((((length + 0xfff)) & 0xFFFFF000) - length) + self.img_rva[section.addr] = data + # Fix img_rva + self.img_rva = self.img_rva + + try: + self.DirImport = pe.DirImport.unpack(self.img_rva, + self.NThdr.optentries[ + pe.DIRECTORY_ENTRY_IMPORT].rva, + self) + except pe.InvalidOffset: + log.warning('cannot parse DirImport, skipping') + self.DirImport = pe.DirImport(self) + + try: + self.DirExport = pe.DirExport.unpack(self.img_rva, + self.NThdr.optentries[ + pe.DIRECTORY_ENTRY_EXPORT].rva, + self) + except pe.InvalidOffset: + log.warning('cannot parse DirExport, skipping') + self.DirExport = pe.DirExport(self) + + if len(self.NThdr.optentries) > pe.DIRECTORY_ENTRY_DELAY_IMPORT: + self.DirDelay = pe.DirDelay(self) + if parse_delay: + try: + self.DirDelay = pe.DirDelay.unpack(self.img_rva, + self.NThdr.optentries[ + pe.DIRECTORY_ENTRY_DELAY_IMPORT].rva, + self) + except pe.InvalidOffset: + log.warning('cannot parse DirDelay, skipping') + if len(self.NThdr.optentries) > pe.DIRECTORY_ENTRY_BASERELOC: + self.DirReloc = pe.DirReloc(self) + if parse_reloc: + try: + self.DirReloc = pe.DirReloc.unpack(self.img_rva, + self.NThdr.optentries[ + pe.DIRECTORY_ENTRY_BASERELOC].rva, + self) + except pe.InvalidOffset: + log.warning('cannot parse DirReloc, skipping') + if len(self.NThdr.optentries) > pe.DIRECTORY_ENTRY_RESOURCE: + self.DirRes = pe.DirRes(self) + if parse_resources: + self.DirRes = pe.DirRes(self) + try: + self.DirRes = pe.DirRes.unpack(self.img_rva, + self.NThdr.optentries[ + pe.DIRECTORY_ENTRY_RESOURCE].rva, + self) + except pe.InvalidOffset: + log.warning('cannot parse DirRes, skipping') + + def resize(self, old, new): + pass + + def __getitem__(self, item): + return self.content[item] + + def __setitem__(self, item, data): + self.content.__setitem__(item, data) + return + + def getsectionbyrva(self, rva): + if self.SHList is None: + return None + for section in self.SHList.shlist: + """ + TODO CHECK: + some binaries have import rva outside section, but addresses + seems to be rounded + """ + mask = self.NThdr.sectionalignment - 1 + if section.addr <= rva < (section.addr + section.size + mask) & ~(mask): + return section + return None + + def getsectionbyvad(self, vad): + return self.getsectionbyrva(self.virt2rva(vad)) + + def getsectionbyoff(self, off): + if self.SHList is None: + return None + for section in self.SHList.shlist: + if section.offset <= off < section.offset + section.rawsize: + return section + return None + + def getsectionbyname(self, name): + if self.SHList is None: + return None + for section in self.SHList: + if section.name.strip(b'\x00').decode() == name: + return section + return None + + def is_rva_ok(self, rva): + return self.getsectionbyrva(rva) is not None + + def rva2off(self, rva): + # Special case rva in header + if rva < self.NThdr.sizeofheaders: + return rva + section = self.getsectionbyrva(rva) + if section is None: + raise pe.InvalidOffset('cannot get offset for 0x%X' % rva) + soff = (section.offset // self.NThdr.filealignment) * self.NThdr.filealignment + return rva - section.addr + soff + + def off2rva(self, off): + section = self.getsectionbyoff(off) + if section is None: + return + return off - section.offset + section.addr + + def virt2rva(self, virt): + if virt is None: + return + return virt - self.NThdr.ImageBase + + def rva2virt(self, rva): + if rva is None: + return + return rva + self.NThdr.ImageBase + + def virt2off(self, virt): + return self.rva2off(self.virt2rva(virt)) + + def off2virt(self, off): + return self.rva2virt(self.off2rva(off)) + + def is_in_virt_address(self, addr): + if addr < self.NThdr.ImageBase: + return False + addr = self.virt2rva(addr) + for section in self.SHList.shlist: + if section.addr <= addr < section.addr + section.size: + return True + return False + + def get_drva(self): + print('Deprecated: Use PE.rva instead of PE.drva') + return self._rva + + def get_rva(self): + return self._rva + + # TODO XXX remove drva api + drva = property(get_drva) + rva = property(get_rva) + + def get_virt(self): + return self._virt + + virt = property(get_virt) + + def build_content(self): + + content = StrPatchwork() + content[0] = bytes(self.Doshdr) + + for section in self.SHList.shlist: + content[section.offset:section.offset + section.rawsize] = bytes(section.data) + + # fix image size + section_last = self.SHList.shlist[-1] + size = section_last.addr + section_last.size + (self.NThdr.sectionalignment - 1) + size &= ~(self.NThdr.sectionalignment - 1) + self.NThdr.sizeofimage = size + + off = self.Doshdr.lfanew + content[off] = bytes(self.NTsig) + off += len(self.NTsig) + content[off] = bytes(self.Coffhdr) + off += len(self.Coffhdr) + off_shlist = off + self.Coffhdr.sizeofoptionalheader + content[off] = bytes(self.Opthdr) + off += len(self.Opthdr) + content[off] = bytes(self.NThdr) + off += len(self.NThdr) + # content[off] = bytes(self.Optehdr) + + off = off_shlist + content[off] = bytes(self.SHList) + + for section in self.SHList: + if off + len(bytes(self.SHList)) > section.offset: + log.warn("section offset overlap pe hdr 0x%x 0x%x" % + (off + len(bytes(self.SHList)), section.offset)) + self.DirImport.build_content(content) + self.DirExport.build_content(content) + self.DirDelay.build_content(content) + self.DirReloc.build_content(content) + self.DirRes.build_content(content) + + if (self.Doshdr.lfanew + len(self.NTsig) + len(self.Coffhdr)) % 4: + log.warn("non aligned coffhdr, bad crc calculation") + crcs = compute_crc(bytes(content), self.NThdr.CheckSum) + content[self.Doshdr.lfanew + len(self.NTsig) + len(self.Coffhdr) + 64] = struct.pack('I', crcs) + return bytes(content) + + def __bytes__(self): + return self.build_content() + + def __str__(self): + if PY3: + return repr(self) + return self.__bytes__() + + def export_funcs(self): + if self.DirExport is None: + print('no export dir found') + return None, None + + all_func = {} + for i, export in enumerate(self.DirExport.f_names): + all_func[export.name.name] = self.rva2virt( + self.DirExport.f_address[self.DirExport.f_nameordinals[i].ordinal].rva) + all_func[self.DirExport.f_nameordinals[i].ordinal + self.DirExport.expdesc.base] = self.rva2virt( + self.DirExport.f_address[self.DirExport.f_nameordinals[i].ordinal].rva) + # XXX todo: test if redirected export + return all_func + + def reloc_to(self, imgbase): + offset = imgbase - self.NThdr.ImageBase + if self.DirReloc is None: + log.warn('no relocation found!') + for rel in self.DirReloc.reldesc: + rva = rel.rva + for reloc in rel.rels: + reloc_type, off = reloc.rel + if reloc_type == 0 and off == 0: + continue + if reloc_type != 3: + raise NotImplementedError('Reloc type not supported') + off += rva + value = struct.unpack('I', self.rva.get(off, off + 4))[0] + value += offset + self.rva.set(off, struct.pack('I', value & 0xFFFFFFFF)) + self.NThdr.ImageBase = imgbase diff --git a/miasm/elfesteem/strpatchwork.py b/miasm/elfesteem/strpatchwork.py new file mode 100644 index 00000000..e1a5de91 --- /dev/null +++ b/miasm/elfesteem/strpatchwork.py @@ -0,0 +1,106 @@ +from array import array +import struct +from sys import maxsize + +from future.utils import PY3 + +if PY3: + + def array_frombytes(arr, value): + return arr.frombytes(value) + + def array_tobytes(arr): + return arr.tobytes() + + +else: + + def array_frombytes(arr, value): + return arr.fromstring(value) + + def array_tobytes(arr): + return arr.tostring() + + +class StrPatchwork(object): + + def __init__(self, s=b"", paddingbyte=b"\x00"): + s_raw = bytes(s) + val = array("B") + array_frombytes(val, s_raw) + self.s = val + # cache s to avoid rebuilding str after each find + self.s_cache = s_raw + self.paddingbyte = paddingbyte + + def __bytes__(self): + return array_tobytes(self.s) + + def __str__(self): + if PY3: + return repr(self) + return self.__bytes__() + + def __getitem__(self, item): + s = self.s + if isinstance(item, slice): + end = item.stop + l = len(s) + if (end is not None and l < end) and end != maxsize: + # XXX hack [x:] give 2GB limit + # This is inefficient but avoids complicated maths if step is + # not 1 + s = s[:] + + tmp = array("B") + array_frombytes(tmp, self.paddingbyte * (end - l)) + s.extend(tmp) + r = s[item] + return array_tobytes(r) + + else: + if item > len(s): + return self.paddingbyte + else: + return struct.pack("B", s[item]) + + def __setitem__(self, item, val): + if val is None: + return + val_array = array("B") + array_frombytes(val_array, bytes(val)) + if type(item) is not slice: + item = slice(item, item + len(val_array)) + end = item.stop + l = len(self.s) + if l < end: + tmp = array("B") + array_frombytes(tmp, self.paddingbyte * (end - l)) + self.s.extend(tmp) + self.s[item] = val_array + self.s_cache = None + + def __repr__(self): + return "" % array_tobytes(self.s) + + def __len__(self): + return len(self.s) + + def __contains__(self, val): + return val in bytes(self) + + def __iadd__(self, other): + tmp = array("B") + array_frombytes(tmp, bytes(other)) + self.s.extend(tmp) + return self + + def find(self, pattern, start=0, end=None): + if not self.s_cache: + self.s_cache = array_tobytes(self.s) + return self.s_cache.find(pattern, start, end) + + def rfind(self, pattern, start=0, end=None): + if not self.s_cache: + self.s_cache = array_tobytes(self.s) + return self.s_cache.rfind(pattern, start, end) diff --git a/miasm/jitter/loader/elf.py b/miasm/jitter/loader/elf.py index b36638f3..4c68fc91 100644 --- a/miasm/jitter/loader/elf.py +++ b/miasm/jitter/loader/elf.py @@ -3,9 +3,9 @@ from collections import defaultdict from future.utils import viewitems -from elfesteem import cstruct -from elfesteem import * -import elfesteem.elf as elf_csts +from miasm.elfesteem import cstruct +from miasm.elfesteem import * +import miasm.elfesteem.elf as elf_csts from miasm.jitter.csts import * from miasm.jitter.loader.utils import canon_libname_libfunc, libimp @@ -56,11 +56,11 @@ def preload_elf(vm, e, runtime_lib, patch_vm_imp=True, loc_db=None): return runtime_lib, dyn_funcs def fill_loc_db_with_symbols(elf, loc_db, base_addr=0): - """Parse the elfesteem's ELF @elf to extract symbols, and fill the LocationDB + """Parse the miasm.elfesteem's ELF @elf to extract symbols, and fill the LocationDB instance @loc_db with parsed symbols. The ELF is considered mapped at @base_addr - @elf: elfesteem's ELF instance + @elf: miasm.elfesteem's ELF instance @loc_db: LocationDB used to retrieve symbols'offset @base_addr: addr to reloc to (if any) """ @@ -163,7 +163,7 @@ def fill_loc_db_with_symbols(elf, loc_db, base_addr=0): def apply_reloc_x86(elf, vm, section, base_addr, loc_db): """Apply relocation for x86 ELF contained in the section @section - @elf: elfesteem's ELF instance + @elf: miasm.elfesteem's ELF instance @vm: VmMngr instance @section: elf's section containing relocation to perform @base_addr: addr to reloc to diff --git a/miasm/jitter/loader/pe.py b/miasm/jitter/loader/pe.py index 7145a817..a2bdd3ac 100644 --- a/miasm/jitter/loader/pe.py +++ b/miasm/jitter/loader/pe.py @@ -6,9 +6,9 @@ from collections import defaultdict from future.utils import viewitems, viewvalues -from elfesteem import pe -from elfesteem import cstruct -from elfesteem import * +from miasm.elfesteem import pe +from miasm.elfesteem import cstruct +from miasm.elfesteem import * from miasm.jitter.csts import * from miasm.jitter.loader.utils import canon_libname_libfunc, libimp diff --git a/miasm/os_dep/win_api_x86_32_seh.py b/miasm/os_dep/win_api_x86_32_seh.py index 90a68eec..5d8ed3d7 100644 --- a/miasm/os_dep/win_api_x86_32_seh.py +++ b/miasm/os_dep/win_api_x86_32_seh.py @@ -23,7 +23,7 @@ import struct from future.utils import viewitems -from elfesteem import pe_init +from miasm.elfesteem import pe_init from miasm.jitter.csts import PAGE_READ, PAGE_WRITE from miasm.core.utils import pck32 diff --git a/requirements.txt b/requirements.txt index 84530589..135ca071 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ pyparsing future -git+https://github.com/serpilliere/elfesteem@py23_lalet#egg=elfesteem-0.1 llvmlite==0.26.0 diff --git a/setup.py b/setup.py index c52e1f9e..3aaeaf27 100755 --- a/setup.py +++ b/setup.py @@ -30,6 +30,7 @@ def buil_all(): "miasm/analysis", "miasm/os_dep", "miasm/os_dep/linux", + "miasm/elfesteem", "miasm/jitter", "miasm/jitter/arch", "miasm/jitter/loader", diff --git a/test/analysis/dse.py b/test/analysis/dse.py index 8691551c..21225f54 100644 --- a/test/analysis/dse.py +++ b/test/analysis/dse.py @@ -3,7 +3,7 @@ from pdb import pm from future.utils import viewitems -from elfesteem.strpatchwork import StrPatchwork +from miasm.elfesteem.strpatchwork import StrPatchwork from miasm.core import parse_asm from miasm.expression.expression import ExprCompose, ExprOp, ExprInt, ExprId from miasm.core.asmblock import asm_resolve_final diff --git a/test/arch/aarch64/unit/asm_test.py b/test/arch/aarch64/unit/asm_test.py index 65a537a0..9d193b47 100644 --- a/test/arch/aarch64/unit/asm_test.py +++ b/test/arch/aarch64/unit/asm_test.py @@ -7,7 +7,7 @@ from miasm.arch.aarch64.arch import mn_aarch64, base_expr, variable from miasm.core import parse_asm from miasm.expression.expression import * from miasm.core import asmblock -from elfesteem.strpatchwork import StrPatchwork +from miasm.elfesteem.strpatchwork import StrPatchwork from miasm.analysis.machine import Machine from miasm.jitter.csts import * diff --git a/test/arch/mips32/unit/asm_test.py b/test/arch/mips32/unit/asm_test.py index 38a2d928..dd95c3af 100644 --- a/test/arch/mips32/unit/asm_test.py +++ b/test/arch/mips32/unit/asm_test.py @@ -7,7 +7,7 @@ from miasm.arch.mips32.arch import mn_mips32 from miasm.core import parse_asm from miasm.expression.expression import * from miasm.core import asmblock -from elfesteem.strpatchwork import StrPatchwork +from miasm.elfesteem.strpatchwork import StrPatchwork from miasm.analysis.machine import Machine from miasm.jitter.csts import * diff --git a/test/arch/x86/unit/asm_test.py b/test/arch/x86/unit/asm_test.py index 0059f511..cf87ac93 100644 --- a/test/arch/x86/unit/asm_test.py +++ b/test/arch/x86/unit/asm_test.py @@ -9,7 +9,7 @@ from miasm.arch.x86.arch import mn_x86, base_expr, variable from miasm.core import parse_asm from miasm.expression.expression import * from miasm.core import asmblock -from elfesteem.strpatchwork import StrPatchwork +from miasm.elfesteem.strpatchwork import StrPatchwork from miasm.analysis.machine import Machine from miasm.jitter.csts import * -- cgit 1.4.1 From 26c1075723a02984da6d3bc7423c5c0c43082dc3 Mon Sep 17 00:00:00 2001 From: Fabrice Desclaux Date: Mon, 4 Mar 2019 12:22:16 +0100 Subject: Rename elfesteem loader --- example/asm/shellcode.py | 4 +- example/elfesteem/minidump_to_pe.py | 4 +- example/elfesteem/test_pe.py | 4 +- example/jitter/arm_sc.py | 2 +- example/jitter/run_with_linuxenv.py | 2 +- example/jitter/unpack_upx.py | 2 +- miasm/analysis/binary.py | 4 +- miasm/elfesteem/__init__.py | 3 - miasm/elfesteem/cstruct.py | 154 ---- miasm/elfesteem/elf.py | 1538 -------------------------------- miasm/elfesteem/elf_init.py | 878 ------------------ miasm/elfesteem/minidump.py | 545 ------------ miasm/elfesteem/minidump_init.py | 194 ---- miasm/elfesteem/new_cstruct.py | 265 ------ miasm/elfesteem/pe.py | 1668 ----------------------------------- miasm/elfesteem/pe_init.py | 603 ------------- miasm/elfesteem/strpatchwork.py | 106 --- miasm/jitter/loader/elf.py | 12 +- miasm/jitter/loader/pe.py | 6 +- miasm/loader/__init__.py | 3 + miasm/loader/cstruct.py | 154 ++++ miasm/loader/elf.py | 1538 ++++++++++++++++++++++++++++++++ miasm/loader/elf_init.py | 878 ++++++++++++++++++ miasm/loader/minidump.py | 545 ++++++++++++ miasm/loader/minidump_init.py | 194 ++++ miasm/loader/new_cstruct.py | 265 ++++++ miasm/loader/pe.py | 1668 +++++++++++++++++++++++++++++++++++ miasm/loader/pe_init.py | 603 +++++++++++++ miasm/loader/strpatchwork.py | 106 +++ miasm/os_dep/win_api_x86_32_seh.py | 2 +- setup.py | 2 +- test/analysis/dse.py | 2 +- test/arch/aarch64/unit/asm_test.py | 2 +- test/arch/mips32/unit/asm_test.py | 2 +- test/arch/x86/unit/asm_test.py | 2 +- 35 files changed, 5980 insertions(+), 5980 deletions(-) delete mode 100644 miasm/elfesteem/__init__.py delete mode 100644 miasm/elfesteem/cstruct.py delete mode 100644 miasm/elfesteem/elf.py delete mode 100644 miasm/elfesteem/elf_init.py delete mode 100644 miasm/elfesteem/minidump.py delete mode 100644 miasm/elfesteem/minidump_init.py delete mode 100644 miasm/elfesteem/new_cstruct.py delete mode 100644 miasm/elfesteem/pe.py delete mode 100644 miasm/elfesteem/pe_init.py delete mode 100644 miasm/elfesteem/strpatchwork.py create mode 100644 miasm/loader/__init__.py create mode 100644 miasm/loader/cstruct.py create mode 100644 miasm/loader/elf.py create mode 100644 miasm/loader/elf_init.py create mode 100644 miasm/loader/minidump.py create mode 100644 miasm/loader/minidump_init.py create mode 100644 miasm/loader/new_cstruct.py create mode 100644 miasm/loader/pe.py create mode 100644 miasm/loader/pe_init.py create mode 100644 miasm/loader/strpatchwork.py (limited to 'example/asm/shellcode.py') diff --git a/example/asm/shellcode.py b/example/asm/shellcode.py index 59ea3a94..67c882e9 100755 --- a/example/asm/shellcode.py +++ b/example/asm/shellcode.py @@ -4,8 +4,8 @@ from argparse import ArgumentParser from pdb import pm from future.utils import viewitems -from miasm.elfesteem import pe_init -from miasm.elfesteem.strpatchwork import StrPatchwork +from miasm.loader import pe_init +from miasm.loader.strpatchwork import StrPatchwork from miasm.core import parse_asm, asmblock from miasm.analysis.machine import Machine diff --git a/example/elfesteem/minidump_to_pe.py b/example/elfesteem/minidump_to_pe.py index 8aff3e62..30a95325 100644 --- a/example/elfesteem/minidump_to_pe.py +++ b/example/elfesteem/minidump_to_pe.py @@ -5,8 +5,8 @@ import sys from future.utils import viewvalues -from miasm.elfesteem.minidump_init import Minidump -from miasm.elfesteem.pe_init import PE +from miasm.loader.minidump_init import Minidump +from miasm.loader.pe_init import PE minidump = Minidump(open(sys.argv[1], 'rb').read()) diff --git a/example/elfesteem/test_pe.py b/example/elfesteem/test_pe.py index e9cff0b4..543cbea5 100644 --- a/example/elfesteem/test_pe.py +++ b/example/elfesteem/test_pe.py @@ -1,7 +1,7 @@ #! /usr/bin/env python -import miasm.elfesteem.pe as pe -from miasm.elfesteem.pe_init import PE +import miasm.loader.pe as pe +from miasm.loader.pe_init import PE import rlcompleter import readline import pdb diff --git a/example/jitter/arm_sc.py b/example/jitter/arm_sc.py index ddadbf29..9ff770ff 100755 --- a/example/jitter/arm_sc.py +++ b/example/jitter/arm_sc.py @@ -3,7 +3,7 @@ from miasm.core.utils import int_to_byte from miasm.analysis.sandbox import Sandbox_Linux_armb_str from miasm.analysis.sandbox import Sandbox_Linux_arml_str -from miasm.elfesteem.strpatchwork import StrPatchwork +from miasm.loader.strpatchwork import StrPatchwork from pdb import pm diff --git a/example/jitter/run_with_linuxenv.py b/example/jitter/run_with_linuxenv.py index 0237cc94..f981d2dd 100644 --- a/example/jitter/run_with_linuxenv.py +++ b/example/jitter/run_with_linuxenv.py @@ -2,7 +2,7 @@ from argparse import ArgumentParser import logging import re -from miasm.elfesteem import elf as elf_csts +from miasm.loader import elf as elf_csts from miasm.os_dep.linux import environment, syscall from miasm.analysis.machine import Machine diff --git a/example/jitter/unpack_upx.py b/example/jitter/unpack_upx.py index 05d28b16..3b8125f4 100644 --- a/example/jitter/unpack_upx.py +++ b/example/jitter/unpack_upx.py @@ -2,7 +2,7 @@ from __future__ import print_function import os import logging from pdb import pm -from miasm.elfesteem import pe +from miasm.loader import pe from miasm.analysis.sandbox import Sandbox_Win_x86_32 # User defined methods diff --git a/miasm/analysis/binary.py b/miasm/analysis/binary.py index 6dc095cf..66244822 100644 --- a/miasm/analysis/binary.py +++ b/miasm/analysis/binary.py @@ -131,7 +131,7 @@ class ContainerPE(Container): def parse(self, data, vm=None, **kwargs): from miasm.jitter.loader.pe import vm_load_pe, guess_arch - from miasm.elfesteem import pe_init + from miasm.loader import pe_init # Parse signature if not data.startswith(b'MZ'): @@ -178,7 +178,7 @@ class ContainerELF(Container): """ from miasm.jitter.loader.elf import vm_load_elf, guess_arch, \ fill_loc_db_with_symbols - from miasm.elfesteem import elf_init + from miasm.loader import elf_init # Parse signature if not data.startswith(b'\x7fELF'): diff --git a/miasm/elfesteem/__init__.py b/miasm/elfesteem/__init__.py deleted file mode 100644 index 1a602f38..00000000 --- a/miasm/elfesteem/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -#!/usr/bin/env python - -__all__ = ['pe_init', 'elf_init', 'strpatchwork'] diff --git a/miasm/elfesteem/cstruct.py b/miasm/elfesteem/cstruct.py deleted file mode 100644 index 06d2e002..00000000 --- a/miasm/elfesteem/cstruct.py +++ /dev/null @@ -1,154 +0,0 @@ -#! /usr/bin/env python - -from __future__ import print_function -from builtins import zip -from functools import reduce -import struct - -from future.utils import PY3 - -type_size = {} -size2type = {} -for t in 'B', 'H', 'I', 'Q': - s = struct.calcsize(t) - type_size[t] = s * 8 - size2type[s * 8] = t - -type_size['u08'] = size2type[8] -type_size['u16'] = size2type[16] -type_size['u32'] = size2type[32] -type_size['u64'] = size2type[64] - - -def fix_size(fields, wsize): - out = [] - for name, v in fields: - if v.endswith("s"): - pass - elif v == "ptr": - v = size2type[wsize] - elif not v in type_size: - raise ValueError("unknown Cstruct type", v) - else: - v = type_size[v] - out.append((name, v)) - fields = out - return fields - - -class Cstruct_Metaclass(type): - - def __new__(cls, name, bases, dct): - o = super(Cstruct_Metaclass, cls).__new__(cls, name, bases, dct) - o._packstring = o._packformat + \ - "".join(x[1] for x in o._fields) - o._size = struct.calcsize(o._packstring) - return o - - -class CStruct(object): - #__metaclass__ = Cstruct_Metaclass - _packformat = "" - _fields = [] - - @classmethod - def _from_file(cls, f): - return cls(f.read(cls._size)) - - def __init__(self, sex, wsize, *args, **kargs): - if sex == 1: - sex = '<' - else: - sex = '>' - # packformat enforce sex - if self._packformat: - sex = "" - pstr = fix_size(self._fields, wsize) - self._packstring = sex + self._packformat + \ - "".join(x[1] for x in pstr) - self._size = struct.calcsize(self._packstring) - - self._names = [x[0] for x in self._fields] - if kargs: - self.__dict__.update(kargs) - else: - if args: - s = args[0] - else: - s = b"" - s += b"\x00" * self._size - s = s[:self._size] - self._unpack(s) - - def _unpack(self, s): - disas = struct.unpack(self._packstring, s) - for n, v in zip(self._names, disas): - setattr(self, n, v) - - def _pack(self): - return struct.pack(self._packstring, - *(getattr(self, x) for x in self._names)) - - def _spack(self, superstruct, shift=0): - attr = [] - for name in self._names: - s = getattr(self, name) - if isinstance(s, CStruct): - if s in superstruct: - s = reduce(lambda x, y: x + len(y), - superstruct[:superstruct.index(s)], - 0) - s += shift - else: - raise Exception("%r is not a superstructure" % s) - attr.append(s) - return struct.pack(self._packstring, *attr) - - def _copy(self): - return self.__class__(**self.__dict__) - - def __len__(self): - return self._size - - def __str__(self): - if PY3: - return repr(self) - return self.__bytes__() - - def __bytes__(self): - return self._pack() - - def __repr__(self): - return "<%s=%s>" % (self.__class__.__name__, "/".join(repr( - getattr(self, x[0])) for x in self._fields - )) - - def __getitem__(self, item): # to work with format strings - return getattr(self, item) - - def _show(self): - print("##%s:" % self.__class__.__name__) - fmt = "%%-%is = %%r" % max(len(x[0]) for x in self._fields) - for fn, ft in self._fields: - print(fmt % (fn, getattr(self, fn))) - - -class CStructStruct(object): - - def __init__(self, lst, shift=0): - self._lst = lst - self._shift = shift - - def __getattr__(self, attr): - return getattr(self._lst, attr) - - def __str__(self): - if PY3: - return repr(self) - return self.__bytes__() - - def __bytes__(self): - return b"".join( - a if isinstance(a, bytes) else a._spack(self._lst, self._shift) - for a in self._lst - ) diff --git a/miasm/elfesteem/elf.py b/miasm/elfesteem/elf.py deleted file mode 100644 index bdd088b8..00000000 --- a/miasm/elfesteem/elf.py +++ /dev/null @@ -1,1538 +0,0 @@ -#! /usr/bin/env python - -from miasm.elfesteem.cstruct import CStruct - -class Ehdr(CStruct): - _fields = [ ("ident","16s"), - ("type","u16"), - ("machine","u16"), - ("version","u32"), - ("entry","ptr"), - ("phoff","ptr"), - ("shoff","ptr"), - ("flags","u32"), - ("ehsize","u16"), - ("phentsize","u16"), - ("phnum","u16"), - ("shentsize","u16"), - ("shnum","u16"), - ("shstrndx","u16") ] - - -class Shdr(CStruct): - _fields = [ ("name","u32"), - ("type","u32"), - ("flags","ptr"), - ("addr","ptr"), - ("offset","ptr"), - ("size","ptr"), - ("link","u32"), - ("info","u32"), - ("addralign","ptr"), - ("entsize","ptr") ] - -class Phdr(CStruct): - _fields = [ ("type","u32"), - ("offset","u32"), - ("vaddr","u32"), - ("paddr","u32"), - ("filesz","u32"), - ("memsz","u32"), - ("flags","u32"), - ("align","u32") ] - -class Phdr64(CStruct): - _fields = [ ("type","u32"), - ("flags","u32"), - ("offset","ptr"), - ("vaddr","ptr"), - ("paddr","ptr"), - ("filesz","ptr"), - ("memsz","ptr"), - ("align","ptr") ] - -class Nhdr(CStruct): - _fields = [ ("namesz","u32"), - ("descsz","u32"), - ("type", "u32") ] - - -class Sym32(CStruct): - _fields = [ ("name","u32"), - ("value","u32"), - ("size","u32"), - ("info","u08"), - ("other","u08"), - ("shndx","u16") ] - -class Sym64(CStruct): - _fields = [ ("name","u32"), - ("info","u08"), - ("other","u08"), - ("shndx","u16"), - ("value","u64"), - ("size","u64") ] - -class Dym(CStruct): - _fields = [ ("tag","u32"), - ("val","u32") ] - -class Rel32(CStruct): - _fields = [ ("offset","ptr"), - ("info","u32") ] - -class Rel64(CStruct): - _fields = [ ("offset","ptr"), - ("info","u64") ] - -class Rela32(CStruct): - _fields = [ ("offset","ptr"), - ("info","u32"), - ("addend","ptr") ] - -class Rela64(CStruct): - _fields = [ ("offset","ptr"), - ("info","u64"), - ("addend","ptr") ] - -class Dynamic(CStruct): - _fields = [ ("type","ptr"), - ("name","ptr") ] - - -# Legal values for e_ident (identification indexes) - -EI_MAG0 = 0 # File identification -EI_MAG1 = 1 # File identification -EI_MAG2 = 2 # File identification -EI_MAG3 = 3 # File identification -EI_CLASS = 4 # File class -EI_DATA = 5 # Data encoding -EI_VERSION = 6 # File version -EI_OSABI = 7 # Operating system/ABI identification -EI_ABIVERSION = 8 # ABI version -EI_PAD = 9 # Start of padding bytes -EI_NIDENT = 16 # Size of e_ident[] - -# Legal values for e_ident[EI_CLASS] - -ELFCLASSNONE = 0 # Invalid class -ELFCLASS32 = 1 # 32-bit objects -ELFCLASS64 = 2 # 64-bit objects - -# Legal values for e_ident[EI_DATA] - -ELFDATANONE = 0 # Invalid data encoding -ELFDATA2LSB = 1 # Least significant byte at lowest address -ELFDATA2MSB = 2 # Most significant byte at lowest address - -# Legal values for e_type (object file type). - -ET_NONE = 0 # No file type -ET_REL = 1 # Relocatable file -ET_EXEC = 2 # Executable file -ET_DYN = 3 # Shared object file -ET_CORE = 4 # Core file -ET_NUM = 5 # Number of defined types -ET_LOOS = 0xfe00 # OS-specific range start -ET_HIOS = 0xfeff # OS-specific range end -ET_LOPROC = 0xff00 # Processor-specific range start -ET_HIPROC = 0xffff # Processor-specific range end - -# Legal values for e_machine (architecture). - -EM_NONE = 0 # No machine -EM_M32 = 1 # AT&T WE 32100 -EM_SPARC = 2 # SUN SPARC -EM_386 = 3 # Intel 80386 -EM_68K = 4 # Motorola m68k family -EM_88K = 5 # Motorola m88k family -EM_486 = 6 # Intel 80486 -EM_860 = 7 # Intel 80860 -EM_MIPS = 8 # MIPS R3000 big-endian -EM_S370 = 9 # IBM System/370 -EM_MIPS_RS3_LE = 10 # MIPS R3000 little-endian - -EM_PARISC = 15 # HPPA -EM_VPP500 = 17 # Fujitsu VPP500 -EM_SPARC32PLUS = 18 # Sun's "v8plus" -EM_960 = 19 # Intel 80960 -EM_PPC = 20 # PowerPC -EM_PPC64 = 21 # PowerPC 64-bit -EM_S390 = 22 # IBM S390 - -EM_V800 = 36 # NEC V800 series -EM_FR20 = 37 # Fujitsu FR20 -EM_RH32 = 38 # TRW RH-32 -EM_RCE = 39 # Motorola RCE -EM_ARM = 40 # ARM -EM_FAKE_ALPHA = 41 # Digital Alpha -EM_SH = 42 # Hitachi SH -EM_SPARCV9 = 43 # SPARC v9 64-bit -EM_TRICORE = 44 # Siemens Tricore -EM_ARC = 45 # Argonaut RISC Core -EM_H8_300 = 46 # Hitachi H8/300 -EM_H8_300H = 47 # Hitachi H8/300H -EM_H8S = 48 # Hitachi H8S -EM_H8_500 = 49 # Hitachi H8/500 -EM_IA_64 = 50 # Intel Merced -EM_MIPS_X = 51 # Stanford MIPS-X -EM_COLDFIRE = 52 # Motorola Coldfire -EM_68HC12 = 53 # Motorola M68HC12 -EM_MMA = 54 # Fujitsu MMA Multimedia Accelerator*/ -EM_PCP = 55 # Siemens PCP -EM_NCPU = 56 # Sony nCPU embeeded RISC -EM_NDR1 = 57 # Denso NDR1 microprocessor -EM_STARCORE = 58 # Motorola Start*Core processor -EM_ME16 = 59 # Toyota ME16 processor -EM_ST100 = 60 # STMicroelectronic ST100 processor -EM_TINYJ = 61 # Advanced Logic Corp. Tinyj emb.fam*/ -EM_X86_64 = 62 # AMD x86-64 architecture -EM_AARCH64 = 183 # Aarch64 architecture -EM_PDSP = 63 # Sony DSP Processor - -EM_FX66 = 66 # Siemens FX66 microcontroller -EM_ST9PLUS = 67 # STMicroelectronics ST9+ 8/16 mc -EM_ST7 = 68 # STmicroelectronics ST7 8 bit mc -EM_68HC16 = 69 # Motorola MC68HC16 microcontroller -EM_68HC11 = 70 # Motorola MC68HC11 microcontroller -EM_68HC08 = 71 # Motorola MC68HC08 microcontroller -EM_68HC05 = 72 # Motorola MC68HC05 microcontroller -EM_SVX = 73 # Silicon Graphics SVx -EM_ST19 = 74 # STMicroelectronics ST19 8 bit mc -EM_VAX = 75 # Digital VAX -EM_CRIS = 76 # Axis Communications 32-bit embedded processor -EM_JAVELIN = 77 # Infineon Technologies 32-bit embedded processor -EM_FIREPATH = 78 # Element 14 64-bit DSP Processor -EM_ZSP = 79 # LSI Logic 16-bit DSP Processor -EM_MMIX = 80 # Donald Knuth's educational 64-bit processor -EM_HUANY = 81 # Harvard University machine-independent object files -EM_PRISM = 82 # SiTera Prism -EM_AVR = 83 # Atmel AVR 8-bit microcontroller -EM_FR30 = 84 # Fujitsu FR30 -EM_D10V = 85 # Mitsubishi D10V -EM_D30V = 86 # Mitsubishi D30V -EM_V850 = 87 # NEC v850 -EM_M32R = 88 # Mitsubishi M32R -EM_MN10300 = 89 # Matsushita MN10300 -EM_MN10200 = 90 # Matsushita MN10200 -EM_PJ = 91 # picoJava -EM_OPENRISC = 92 # OpenRISC 32-bit embedded processor -EM_ARC_A5 = 93 # ARC Cores Tangent-A5 -EM_XTENSA = 94 # Tensilica Xtensa Architecture - -EM_ALPHA = 0x9026 - -# Legal values for sh_type (section type). - -SHT_NULL = 0 # Section header table entry unused -SHT_PROGBITS = 1 # Program data -SHT_SYMTAB = 2 # Symbol table -SHT_STRTAB = 3 # String table -SHT_RELA = 4 # Relocation entries with addends -SHT_HASH = 5 # Symbol hash table -SHT_DYNAMIC = 6 # Dynamic linking information -SHT_NOTE = 7 # Notes -SHT_NOBITS = 8 # Program space with no data (bss) -SHT_REL = 9 # Relocation entries, no addends -SHT_SHLIB = 10 # Reserved -SHT_DYNSYM = 11 # Dynamic linker symbol table -SHT_INIT_ARRAY = 14 # Array of constructors -SHT_FINI_ARRAY = 15 # Array of destructors -SHT_PREINIT_ARRAY = 16 # Array of pre-constructors -SHT_GROUP = 17 # Section group -SHT_SYMTAB_SHNDX = 18 # Extended section indices -SHT_NUM = 19 # Number of defined types. -SHT_LOOS = 0x60000000 # Start OS-specific -SHT_GNU_LIBLIST = 0x6ffffff7 # Prelink library list -SHT_CHECKSUM = 0x6ffffff8 # Checksum for DSO content. -SHT_LOSUNW = 0x6ffffffa # Sun-specific low bound. -SHT_SUNW_move = 0x6ffffffa -SHT_SUNW_COMDAT = 0x6ffffffb -SHT_SUNW_syminfo = 0x6ffffffc -SHT_GNU_verdef = 0x6ffffffd # Version definition section. -SHT_GNU_verneed = 0x6ffffffe # Version needs section. -SHT_GNU_versym = 0x6fffffff # Version symbol table. -SHT_HISUNW = 0x6fffffff # Sun-specific high bound. -SHT_HIOS = 0x6fffffff # End OS-specific type -SHT_LOPROC = 0x70000000 # Start of processor-specific -SHT_HIPROC = 0x7fffffff # End of processor-specific -SHT_LOUSER = 0x80000000 # Start of application-specific -SHT_HIUSER = 0x8fffffff # End of application-specific - -# Legal values for sh_flags (section flags). - -SHF_WRITE = (1 << 0) # Writable -SHF_ALLOC = (1 << 1) # Occupies memory during execution -SHF_EXECINSTR = (1 << 2) # Executable -SHF_MERGE = (1 << 4) # Might be merged -SHF_STRINGS = (1 << 5) # Contains nul-terminated strings -SHF_INFO_LINK = (1 << 6) # `sh_info' contains SHT index -SHF_LINK_ORDER = (1 << 7) # Preserve order after combining -SHF_OS_NONCONFORMING = (1 << 8) # Non-standard OS specific handling required -SHF_GROUP = (1 << 9) # Section is member of a group. -SHF_TLS = (1 << 10) # Section hold thread-local data. -SHF_MASKOS = 0x0ff00000 # OS-specific. -SHF_MASKPROC = 0xf0000000 # Processor-specific - -# Section group handling. - -GRP_COMDAT = 0x1 # Mark group as COMDAT. - -# Legal values for p_type (segment type). - -PT_NULL = 0 # Program header table entry unused -PT_LOAD = 1 # Loadable program segment -PT_DYNAMIC = 2 # Dynamic linking information -PT_INTERP = 3 # Program interpreter -PT_NOTE = 4 # Auxiliary information -PT_SHLIB = 5 # Reserved -PT_PHDR = 6 # Entry for header table itself -PT_TLS = 7 # Thread-local storage segment -PT_NUM = 8 # Number of defined types -PT_LOOS = 0x60000000 # Start of OS-specific -PT_GNU_EH_FRAME = 0x6474e550 # GCC .eh_frame_hdr segment -PT_GNU_STACK = 0x6474e551 # Indicates stack executability -PT_LOSUNW = 0x6ffffffa -PT_SUNWBSS = 0x6ffffffa # Sun Specific segment -PT_SUNWSTACK = 0x6ffffffb # Stack segment -PT_HISUNW = 0x6fffffff -PT_HIOS = 0x6fffffff # End of OS-specific -PT_LOPROC = 0x70000000 # Start of processor-specific -PT_HIPROC = 0x7fffffff # End of processor-specific - -# Legal values for p_flags (segment flags). - -PF_X = (1 << 0) # Segment is executable -PF_W = (1 << 1) # Segment is writable -PF_R = (1 << 2) # Segment is readable -PF_MASKOS = 0x0ff00000 # OS-specific -PF_MASKPROC = 0xf0000000 # Processor-specific - -# Legal values for note segment descriptor types for core files. - -NT_PRSTATUS = 1 # Contains copy of prstatus struct -NT_FPREGSET = 2 # Contains copy of fpregset struct -NT_PRPSINFO = 3 # Contains copy of prpsinfo struct -NT_PRXREG = 4 # Contains copy of prxregset struct -NT_TASKSTRUCT = 4 # Contains copy of task structure -NT_PLATFORM = 5 # String from sysinfo(SI_PLATFORM) -NT_AUXV = 6 # Contains copy of auxv array -NT_GWINDOWS = 7 # Contains copy of gwindows struct -NT_ASRS = 8 # Contains copy of asrset struct -NT_PSTATUS = 10 # Contains copy of pstatus struct -NT_PSINFO = 13 # Contains copy of psinfo struct -NT_PRCRED = 14 # Contains copy of prcred struct -NT_UTSNAME = 15 # Contains copy of utsname struct -NT_LWPSTATUS = 16 # Contains copy of lwpstatus struct -NT_LWPSINFO = 17 # Contains copy of lwpinfo struct -NT_PRFPXREG = 20 # Contains copy of fprxregset struct - -# Legal values for the note segment descriptor types for object files. - -NT_VERSION = 1 # Contains a version string. - -# Legal values for ST_BIND subfield of st_info (symbol binding). -# bind = Sym.info >> 4 -# val = Sym.info 0xf - -STB_LOCAL = 0 # Local symbol -STB_GLOBAL = 1 # Global symbol -STB_WEAK = 2 # Weak symbol -STB_NUM = 3 # Number of defined types. -STB_LOOS = 10 # Start of OS-specific -STB_HIOS = 12 # End of OS-specific -STB_LOPROC = 13 # Start of processor-specific -STB_HIPROC = 15 # End of processor-specific - -#Legal values for ST_TYPE subfield of st_info (symbol type). - -STT_NOTYPE = 0 # Symbol type is unspecified -STT_OBJECT = 1 # Symbol is a data object -STT_FUNC = 2 # Symbol is a code object -STT_SECTION = 3 # Symbol associated with a section -STT_FILE = 4 # Symbol's name is file name -STT_COMMON = 5 # Symbol is a common data object -STT_TLS = 6 # Symbol is thread-local data object*/ -STT_NUM = 7 # Number of defined types. -STT_LOOS = 10 # Start of OS-specific -STT_GNU_IFUNC = 10 # Symbol is indirect code object -STT_HIOS = 12 # End of OS-specific -STT_LOPROC = 13 # Start of processor-specific -STT_HIPROC = 15 # End of processor-specific - -# Legal values for d_tag (dynamic entry type). - -DT_NULL = 0 # Marks end of dynamic section -DT_NEEDED = 1 # Name of needed library -DT_PLTRELSZ = 2 # Size in bytes of PLT relocs -DT_PLTGOT = 3 # Processor defined value -DT_HASH = 4 # Address of symbol hash table -DT_STRTAB = 5 # Address of string table -DT_SYMTAB = 6 # Address of symbol table -DT_RELA = 7 # Address of Rela relocs -DT_RELASZ = 8 # Total size of Rela relocs -DT_RELAENT = 9 # Size of one Rela reloc -DT_STRSZ = 10 # Size of string table -DT_SYMENT = 11 # Size of one symbol table entry -DT_INIT = 12 # Address of init function -DT_FINI = 13 # Address of termination function -DT_SONAME = 14 # Name of shared object -DT_RPATH = 15 # Library search path (deprecated) -DT_SYMBOLIC = 16 # Start symbol search here -DT_REL = 17 # Address of Rel relocs -DT_RELSZ = 18 # Total size of Rel relocs -DT_RELENT = 19 # Size of one Rel reloc -DT_PLTREL = 20 # Type of reloc in PLT -DT_DEBUG = 21 # For debugging; unspecified -DT_TEXTREL = 22 # Reloc might modify .text -DT_JMPREL = 23 # Address of PLT relocs -DT_BIND_NOW = 24 # Process relocations of object -DT_INIT_ARRAY = 25 # Array with addresses of init fct -DT_FINI_ARRAY = 26 # Array with addresses of fini fct -DT_INIT_ARRAYSZ = 27 # Size in bytes of DT_INIT_ARRAY -DT_FINI_ARRAYSZ = 28 # Size in bytes of DT_FINI_ARRAY -DT_RUNPATH = 29 # Library search path -DT_FLAGS = 30 # Flags for the object being loaded -DT_ENCODING = 32 # Start of encoded range -DT_PREINIT_ARRAY = 32 # Array with addresses of preinit fct -DT_PREINIT_ARRAYSZ = 33 # size in bytes of DT_PREINIT_ARRAY -DT_NUM = 34 # Number used -DT_LOOS = 0x6000000d # Start of OS-specific -DT_HIOS = 0x6ffff000 # End of OS-specific -DT_LOPROC = 0x70000000 # Start of processor-specific -DT_HIPROC = 0x7fffffff # End of processor-specific -#DT_PROCNUM = DT_MIPS_NUM # Most used by any processor - -# DT_* entries which fall between DT_VALRNGHI & DT_VALRNGLO use the -# Dyn.d_un.d_val field of the Elf*_Dyn structure. This follows Sun's -# approach. -DT_VALRNGLO = 0x6ffffd00 -DT_GNU_PRELINKED = 0x6ffffdf5 # Prelinking timestamp -DT_GNU_CONFLICTSZ = 0x6ffffdf6 # Size of conflict section -DT_GNU_LIBLISTSZ = 0x6ffffdf7 # Size of library list -DT_CHECKSUM = 0x6ffffdf8 -DT_PLTPADSZ = 0x6ffffdf9 -DT_MOVEENT = 0x6ffffdfa -DT_MOVESZ = 0x6ffffdfb -DT_FEATURE_1 = 0x6ffffdfc # Feature selection (DTF_*). -DT_POSFLAG_1 = 0x6ffffdfd # Flags for DT_* entries, effecting the following DT_* entry. -DT_SYMINSZ = 0x6ffffdfe # Size of syminfo table (in bytes) -DT_SYMINENT = 0x6ffffdff # Entry size of syminfo -DT_VALRNGHI = 0x6ffffdff -DT_VALNUM = 12 - -# DT_* entries which fall between DT_ADDRRNGHI & DT_ADDRRNGLO use the -# Dyn.d_un.d_ptr field of the Elf*_Dyn structure. -# -# If any adjustment is made to the ELF object after it has been -# built these entries will need to be adjusted. -DT_ADDRRNGLO = 0x6ffffe00 -DT_GNU_CONFLICT = 0x6ffffef8 # Start of conflict section -DT_GNU_LIBLIST = 0x6ffffef9 # Library list -DT_CONFIG = 0x6ffffefa # Configuration information. -DT_DEPAUDIT = 0x6ffffefb # Dependency auditing. -DT_AUDIT = 0x6ffffefc # Object auditing. -DT_PLTPAD = 0x6ffffefd # PLT padding. -DT_MOVETAB = 0x6ffffefe # Move table. -DT_SYMINFO = 0x6ffffeff # Syminfo table. -DT_ADDRRNGHI = 0x6ffffeff -DT_ADDRNUM = 10 - -# The versioning entry types. The next are defined as part of the -# GNU extension. -DT_VERSYM = 0x6ffffff0 - -DT_RELACOUNT = 0x6ffffff9 -DT_RELCOUNT = 0x6ffffffa - -# These were chosen by Sun. -DT_FLAGS_1 = 0x6ffffffb # State flags, see DF_1_* below. -DT_VERDEF = 0x6ffffffc # Address of version definition table -DT_VERDEFNUM = 0x6ffffffd # Number of version definitions -DT_VERNEED = 0x6ffffffe # Address of table with needed versions -DT_VERNEEDNUM = 0x6fffffff # Number of needed versions -DT_VERSIONTAGNUM = 16 - -# Sun added these machine-independent extensions in the "processor-specific" -# range. Be compatible. -DT_AUXILIARY = 0x7ffffffd # Shared object to load before self -DT_FILTER = 0x7fffffff # Shared object to get values from -DT_EXTRANUM = 3 - -# Values of `d_un.d_val' in the DT_FLAGS entry. -DF_ORIGIN = 0x00000001 # Object may use DF_ORIGIN -DF_SYMBOLIC = 0x00000002 # Symbol resolutions starts here -DF_TEXTREL = 0x00000004 # Object contains text relocations -DF_BIND_NOW = 0x00000008 # No lazy binding for this object -DF_STATIC_TLS = 0x00000010 # Module uses the static TLS model - -# State flags selectable in the `d_un.d_val' element of the DT_FLAGS_1 -# entry in the dynamic section. -DF_1_NOW = 0x00000001 # Set RTLD_NOW for this object. -DF_1_GLOBAL = 0x00000002 # Set RTLD_GLOBAL for this object. -DF_1_GROUP = 0x00000004 # Set RTLD_GROUP for this object. -DF_1_NODELETE = 0x00000008 # Set RTLD_NODELETE for this object. -DF_1_LOADFLTR = 0x00000010 # Trigger filtee loading at runtime. -DF_1_INITFIRST = 0x00000020 # Set RTLD_INITFIRST for this object -DF_1_NOOPEN = 0x00000040 # Set RTLD_NOOPEN for this object. -DF_1_ORIGIN = 0x00000080 # $ORIGIN must be handled. -DF_1_DIRECT = 0x00000100 # Direct binding enabled. -DF_1_TRANS = 0x00000200 -DF_1_INTERPOSE = 0x00000400 # Object is used to interpose. -DF_1_NODEFLIB = 0x00000800 # Ignore default lib search path. -DF_1_NODUMP = 0x00001000 # Object can't be dldump'ed. -DF_1_CONFALT = 0x00002000 # Configuration alternative created. -DF_1_ENDFILTEE = 0x00004000 # Filtee terminates filters search. -DF_1_DISPRELDNE = 0x00008000 # Disp reloc applied at build time. -DF_1_DISPRELPND = 0x00010000 # Disp reloc applied at run-time. - -# Flags for the feature selection in DT_FEATURE_1. -DTF_1_PARINIT = 0x00000001 -DTF_1_CONFEXP = 0x00000002 - -# Flags in the DT_POSFLAG_1 entry effecting only the next DT_* entry. -DF_P1_LAZYLOAD = 0x00000001 # Lazyload following object. -DF_P1_GROUPPERM = 0x00000002 # Symbols from next object are not generally available. - -# GNU Versioning -VER_FLG_BASE = 1 # Version of the file itself, must not be used to match symbols -VER_FLG_WEAK = 2 # Reference to this version is weak -VER_NEED_CURRENT = 1 # Versioning implementation number - -# Relocs - -# Motorola 68k relocations - -R_68K_NONE = 0 # No reloc -R_68K_32 = 1 # Direct 32 bit -R_68K_16 = 2 # Direct 16 bit -R_68K_8 = 3 # Direct 8 bit -R_68K_PC32 = 4 # PC relative 32 bit -R_68K_PC16 = 5 # PC relative 16 bit -R_68K_PC8 = 6 # PC relative 8 bit -R_68K_GOT32 = 7 # 32 bit PC relative GOT entry -R_68K_GOT16 = 8 # 16 bit PC relative GOT entry -R_68K_GOT8 = 9 # 8 bit PC relative GOT entry -R_68K_GOT32O = 10 # 32 bit GOT offset -R_68K_GOT16O = 11 # 16 bit GOT offset -R_68K_GOT8O = 12 # 8 bit GOT offset -R_68K_PLT32 = 13 # 32 bit PC relative PLT address -R_68K_PLT16 = 14 # 16 bit PC relative PLT address -R_68K_PLT8 = 15 # 8 bit PC relative PLT address -R_68K_PLT32O = 16 # 32 bit PLT offset -R_68K_PLT16O = 17 # 16 bit PLT offset -R_68K_PLT8O = 18 # 8 bit PLT offset -R_68K_COPY = 19 # Copy symbol at runtime -R_68K_GLOB_DAT = 20 # Create GOT entry -R_68K_JMP_SLOT = 21 # Create PLT entry -R_68K_RELATIVE = 22 # Adjust by program base -R_68K_TLS_GD32 = 25 # 32 bit GOT offset for GD -R_68K_TLS_GD16 = 26 # 16 bit GOT offset for GD -R_68K_TLS_GD8 = 27 # 8 bit GOT offset for GD -R_68K_TLS_LDM32 = 28 # 32 bit GOT offset for LDM -R_68K_TLS_LDM16 = 29 # 16 bit GOT offset for LDM -R_68K_TLS_LDM8 = 30 # 8 bit GOT offset for LDM -R_68K_TLS_LDO32 = 31 # 32 bit module-relative offset -R_68K_TLS_LDO16 = 32 # 16 bit module-relative offset -R_68K_TLS_LDO8 = 33 # 8 bit module-relative offset -R_68K_TLS_IE32 = 34 # 32 bit GOT offset for IE -R_68K_TLS_IE16 = 35 # 16 bit GOT offset for IE -R_68K_TLS_IE8 = 36 # 8 bit GOT offset for IE -R_68K_TLS_LE32 = 37 # 32 bit offset relative to static TLS block -R_68K_TLS_LE16 = 38 # 16 bit offset relative to static TLS block -R_68K_TLS_LE8 = 39 # 8 bit offset relative to static TLS block -R_68K_TLS_DTPMOD32 = 40 # 32 bit module number -R_68K_TLS_DTPREL32 = 41 # 32 bit module-relative offset -R_68K_TLS_TPREL32 = 42 # 32 bit TP-relative offset -# Keep this the last entry. -R_68K_NUM = 43 - -# Intel 80386 relocations - -R_386_NONE = 0 # No reloc -R_386_32 = 1 # Direct 32 bit -R_386_PC32 = 2 # PC relative 32 bit -R_386_GOT32 = 3 # 32 bit GOT entry -R_386_PLT32 = 4 # 32 bit PLT address -R_386_COPY = 5 # Copy symbol at runtime -R_386_GLOB_DAT = 6 # Create GOT entry -R_386_JMP_SLOT = 7 # Create PLT entry -R_386_RELATIVE = 8 # Adjust by program base -R_386_GOTOFF = 9 # 32 bit offset to GOT -R_386_GOTPC = 10 # 32 bit PC relative offset to GOT -R_386_32PLT = 11 -R_386_TLS_TPOFF = 14 # Offset in static TLS block -R_386_TLS_IE = 15 # Address of GOT entry for static TLS block offset -R_386_TLS_GOTIE = 16 # GOT entry for static TLS block offset -R_386_TLS_LE = 17 # Offset relative to static TLS block -R_386_TLS_GD = 18 # Direct 32 bit for GNU version of general dynamic thread local data -R_386_TLS_LDM = 19 # Direct 32 bit for GNU version of local dynamic thread local data in LE code -R_386_16 = 20 -R_386_PC16 = 21 -R_386_8 = 22 -R_386_PC8 = 23 -R_386_TLS_GD_32 = 24 # Direct 32 bit for general dynamic thread local data -R_386_TLS_GD_PUSH = 25 # Tag for pushl in GD TLS code -R_386_TLS_GD_CALL = 26 # Relocation for call to __tls_get_addr() -R_386_TLS_GD_POP = 27 # Tag for popl in GD TLS code -R_386_TLS_LDM_32 = 28 # Direct 32 bit for local dynamic thread local data in LE code -R_386_TLS_LDM_PUSH = 29 # Tag for pushl in LDM TLS code -R_386_TLS_LDM_CALL = 30 # Relocation for call to __tls_get_addr() in LDM code -R_386_TLS_LDM_POP = 31 # Tag for popl in LDM TLS code -R_386_TLS_LDO_32 = 32 # Offset relative to TLS block -R_386_TLS_IE_32 = 33 # GOT entry for negated static TLS block offset -R_386_TLS_LE_32 = 34 # Negated offset relative to static TLS block -R_386_TLS_DTPMOD32 = 35 # ID of module containing symbol -R_386_TLS_DTPOFF32 = 36 # Offset in TLS block -R_386_TLS_TPOFF32 = 37 # Negated offset in static TLS block -# 38? -R_386_TLS_GOTDESC = 39 # GOT offset for TLS descriptor. -R_386_TLS_DESC_CALL = 40 # Marker of call through TLS descriptor for relaxation. -R_386_TLS_DESC = 41 # TLS descriptor containing pointer to code and to argument, returning the TLS offset for the symbol. -R_386_IRELATIVE = 42 # Adjust indirectly by program base -# Keep this the last entry. -R_386_NUM = 43 - -# SUN SPARC relocations - -R_SPARC_NONE = 0 # No reloc -R_SPARC_8 = 1 # Direct 8 bit -R_SPARC_16 = 2 # Direct 16 bit -R_SPARC_32 = 3 # Direct 32 bit -R_SPARC_DISP8 = 4 # PC relative 8 bit -R_SPARC_DISP16 = 5 # PC relative 16 bit -R_SPARC_DISP32 = 6 # PC relative 32 bit -R_SPARC_WDISP30 = 7 # PC relative 30 bit shifted -R_SPARC_WDISP22 = 8 # PC relative 22 bit shifted -R_SPARC_HI22 = 9 # High 22 bit -R_SPARC_22 = 10 # Direct 22 bit -R_SPARC_13 = 11 # Direct 13 bit -R_SPARC_LO10 = 12 # Truncated 10 bit -R_SPARC_GOT10 = 13 # Truncated 10 bit GOT entry -R_SPARC_GOT13 = 14 # 13 bit GOT entry -R_SPARC_GOT22 = 15 # 22 bit GOT entry shifted -R_SPARC_PC10 = 16 # PC relative 10 bit truncated -R_SPARC_PC22 = 17 # PC relative 22 bit shifted -R_SPARC_WPLT30 = 18 # 30 bit PC relative PLT address -R_SPARC_COPY = 19 # Copy symbol at runtime -R_SPARC_GLOB_DAT = 20 # Create GOT entry -R_SPARC_JMP_SLOT = 21 # Create PLT entry -R_SPARC_RELATIVE = 22 # Adjust by program base -R_SPARC_UA32 = 23 # Direct 32 bit unaligned - -# Additional Sparc64 relocs. - -R_SPARC_PLT32 = 24 # Direct 32 bit ref to PLT entry -R_SPARC_HIPLT22 = 25 # High 22 bit PLT entry -R_SPARC_LOPLT10 = 26 # Truncated 10 bit PLT entry -R_SPARC_PCPLT32 = 27 # PC rel 32 bit ref to PLT entry -R_SPARC_PCPLT22 = 28 # PC rel high 22 bit PLT entry -R_SPARC_PCPLT10 = 29 # PC rel trunc 10 bit PLT entry -R_SPARC_10 = 30 # Direct 10 bit -R_SPARC_11 = 31 # Direct 11 bit -R_SPARC_64 = 32 # Direct 64 bit -R_SPARC_OLO10 = 33 # 10bit with secondary 13bit addend -R_SPARC_HH22 = 34 # Top 22 bits of direct 64 bit -R_SPARC_HM10 = 35 # High middle 10 bits of ... -R_SPARC_LM22 = 36 # Low middle 22 bits of ... -R_SPARC_PC_HH22 = 37 # Top 22 bits of pc rel 64 bit -R_SPARC_PC_HM10 = 38 # High middle 10 bit of ... -R_SPARC_PC_LM22 = 39 # Low miggle 22 bits of ... -R_SPARC_WDISP16 = 40 # PC relative 16 bit shifted -R_SPARC_WDISP19 = 41 # PC relative 19 bit shifted -R_SPARC_GLOB_JMP = 42 # was part of v9 ABI but was removed -R_SPARC_7 = 43 # Direct 7 bit -R_SPARC_5 = 44 # Direct 5 bit -R_SPARC_6 = 45 # Direct 6 bit -R_SPARC_DISP64 = 46 # PC relative 64 bit -R_SPARC_PLT64 = 47 # Direct 64 bit ref to PLT entry -R_SPARC_HIX22 = 48 # High 22 bit complemented -R_SPARC_LOX10 = 49 # Truncated 11 bit complemented -R_SPARC_H44 = 50 # Direct high 12 of 44 bit -R_SPARC_M44 = 51 # Direct mid 22 of 44 bit -R_SPARC_L44 = 52 # Direct low 10 of 44 bit -R_SPARC_REGISTER = 53 # Global register usage -R_SPARC_UA64 = 54 # Direct 64 bit unaligned -R_SPARC_UA16 = 55 # Direct 16 bit unaligned -R_SPARC_TLS_GD_HI22 = 56 -R_SPARC_TLS_GD_LO10 = 57 -R_SPARC_TLS_GD_ADD = 58 -R_SPARC_TLS_GD_CALL = 59 -R_SPARC_TLS_LDM_HI22 = 60 -R_SPARC_TLS_LDM_LO10 = 61 -R_SPARC_TLS_LDM_ADD = 62 -R_SPARC_TLS_LDM_CALL = 63 -R_SPARC_TLS_LDO_HIX22 = 64 -R_SPARC_TLS_LDO_LOX10 = 65 -R_SPARC_TLS_LDO_ADD = 66 -R_SPARC_TLS_IE_HI22 = 67 -R_SPARC_TLS_IE_LO10 = 68 -R_SPARC_TLS_IE_LD = 69 -R_SPARC_TLS_IE_LDX = 70 -R_SPARC_TLS_IE_ADD = 71 -R_SPARC_TLS_LE_HIX22 = 72 -R_SPARC_TLS_LE_LOX10 = 73 -R_SPARC_TLS_DTPMOD32 = 74 -R_SPARC_TLS_DTPMOD64 = 75 -R_SPARC_TLS_DTPOFF32 = 76 -R_SPARC_TLS_DTPOFF64 = 77 -R_SPARC_TLS_TPOFF32 = 78 -R_SPARC_TLS_TPOFF64 = 79 -R_SPARC_GOTDATA_HIX22 = 80 -R_SPARC_GOTDATA_LOX10 = 81 -R_SPARC_GOTDATA_OP_HIX22 = 82 -R_SPARC_GOTDATA_OP_LOX10 = 83 -R_SPARC_GOTDATA_OP = 84 -R_SPARC_H34 = 85 -R_SPARC_SIZE32 = 86 -R_SPARC_SIZE64 = 87 -R_SPARC_JMP_IREL = 248 -R_SPARC_IRELATIVE = 249 -R_SPARC_GNU_VTINHERIT = 250 -R_SPARC_GNU_VTENTRY = 251 -R_SPARC_REV32 = 252 -# Keep this the last entry. -R_SPARC_NUM = 253 - -# MIPS R3000 relocations - -R_MIPS_NONE = 0 # No reloc -R_MIPS_16 = 1 # Direct 16 bit -R_MIPS_32 = 2 # Direct 32 bit -R_MIPS_REL32 = 3 # PC relative 32 bit -R_MIPS_26 = 4 # Direct 26 bit shifted -R_MIPS_HI16 = 5 # High 16 bit -R_MIPS_LO16 = 6 # Low 16 bit -R_MIPS_GPREL16 = 7 # GP relative 16 bit -R_MIPS_LITERAL = 8 # 16 bit literal entry -R_MIPS_GOT16 = 9 # 16 bit GOT entry -R_MIPS_PC16 = 10 # PC relative 16 bit -R_MIPS_CALL16 = 11 # 16 bit GOT entry for function -R_MIPS_GPREL32 = 12 # GP relative 32 bit - -R_MIPS_SHIFT5 = 16 -R_MIPS_SHIFT6 = 17 -R_MIPS_64 = 18 -R_MIPS_GOT_DISP = 19 -R_MIPS_GOT_PAGE = 20 -R_MIPS_GOT_OFST = 21 -R_MIPS_GOT_HI16 = 22 -R_MIPS_GOT_LO16 = 23 -R_MIPS_SUB = 24 -R_MIPS_INSERT_A = 25 -R_MIPS_INSERT_B = 26 -R_MIPS_DELETE = 27 -R_MIPS_HIGHER = 28 -R_MIPS_HIGHEST = 29 -R_MIPS_CALL_HI16 = 30 -R_MIPS_CALL_LO16 = 31 -R_MIPS_SCN_DISP = 32 -R_MIPS_REL16 = 33 -R_MIPS_ADD_IMMEDIATE = 34 -R_MIPS_PJUMP = 35 -R_MIPS_RELGOT = 36 -R_MIPS_JALR = 37 -R_MIPS_TLS_DTPMOD32 = 38 # Module number 32 bit -R_MIPS_TLS_DTPREL32 = 39 # Module-relative offset 32 bit -R_MIPS_TLS_DTPMOD64 = 40 # Module number 64 bit -R_MIPS_TLS_DTPREL64 = 41 # Module-relative offset 64 bit -R_MIPS_TLS_GD = 42 # 16 bit GOT offset for GD -R_MIPS_TLS_LDM = 43 # 16 bit GOT offset for LDM -R_MIPS_TLS_DTPREL_HI16 = 44 # Module-relative offset, high 16 bits -R_MIPS_TLS_DTPREL_LO16 = 45 # Module-relative offset, low 16 bits -R_MIPS_TLS_GOTTPREL = 46 # 16 bit GOT offset for IE -R_MIPS_TLS_TPREL32 = 47 # TP-relative offset, 32 bit -R_MIPS_TLS_TPREL64 = 48 # TP-relative offset, 64 bit -R_MIPS_TLS_TPREL_HI16 = 49 # TP-relative offset, high 16 bits -R_MIPS_TLS_TPREL_LO16 = 50 # TP-relative offset, low 16 bits -R_MIPS_GLOB_DAT = 51 -R_MIPS_COPY = 126 -R_MIPS_JUMP_SLOT = 127 -# Keep this the last entry. -R_MIPS_NUM = 128 - -# HPPA relocations - -R_PARISC_NONE = 0 # No reloc. -R_PARISC_DIR32 = 1 # Direct 32-bit reference. -R_PARISC_DIR21L = 2 # Left 21 bits of eff. address. -R_PARISC_DIR17R = 3 # Right 17 bits of eff. address. -R_PARISC_DIR17F = 4 # 17 bits of eff. address. -R_PARISC_DIR14R = 6 # Right 14 bits of eff. address. -R_PARISC_PCREL32 = 9 # 32-bit rel. address. -R_PARISC_PCREL21L = 10 # Left 21 bits of rel. address. -R_PARISC_PCREL17R = 11 # Right 17 bits of rel. address. -R_PARISC_PCREL17F = 12 # 17 bits of rel. address. -R_PARISC_PCREL14R = 14 # Right 14 bits of rel. address. -R_PARISC_DPREL21L = 18 # Left 21 bits of rel. address. -R_PARISC_DPREL14R = 22 # Right 14 bits of rel. address. -R_PARISC_GPREL21L = 26 # GP-relative, left 21 bits. -R_PARISC_GPREL14R = 30 # GP-relative, right 14 bits. -R_PARISC_LTOFF21L = 34 # LT-relative, left 21 bits. -R_PARISC_LTOFF14R = 38 # LT-relative, right 14 bits. -R_PARISC_SECREL32 = 41 # 32 bits section rel. address. -R_PARISC_SEGBASE = 48 # No relocation, set segment base. -R_PARISC_SEGREL32 = 49 # 32 bits segment rel. address. -R_PARISC_PLTOFF21L = 50 # PLT rel. address, left 21 bits. -R_PARISC_PLTOFF14R = 54 # PLT rel. address, right 14 bits. -R_PARISC_LTOFF_FPTR32 = 57 # 32 bits LT-rel. function pointer. -R_PARISC_LTOFF_FPTR21L = 58 # LT-rel. fct ptr, left 21 bits. -R_PARISC_LTOFF_FPTR14R = 62 # LT-rel. fct ptr, right 14 bits. -R_PARISC_FPTR64 = 64 # 64 bits function address. -R_PARISC_PLABEL32 = 65 # 32 bits function address. -R_PARISC_PLABEL21L = 66 # Left 21 bits of fdesc address. -R_PARISC_PLABEL14R = 70 # Right 14 bits of fdesc address. -R_PARISC_PCREL64 = 72 # 64 bits PC-rel. address. -R_PARISC_PCREL22F = 74 # 22 bits PC-rel. address. -R_PARISC_PCREL14WR = 75 # PC-rel. address, right 14 bits. -R_PARISC_PCREL14DR = 76 # PC rel. address, right 14 bits. -R_PARISC_PCREL16F = 77 # 16 bits PC-rel. address. -R_PARISC_PCREL16WF = 78 # 16 bits PC-rel. address. -R_PARISC_PCREL16DF = 79 # 16 bits PC-rel. address. -R_PARISC_DIR64 = 80 # 64 bits of eff. address. -R_PARISC_DIR14WR = 83 # 14 bits of eff. address. -R_PARISC_DIR14DR = 84 # 14 bits of eff. address. -R_PARISC_DIR16F = 85 # 16 bits of eff. address. -R_PARISC_DIR16WF = 86 # 16 bits of eff. address. -R_PARISC_DIR16DF = 87 # 16 bits of eff. address. -R_PARISC_GPREL64 = 88 # 64 bits of GP-rel. address. -R_PARISC_GPREL14WR = 91 # GP-rel. address, right 14 bits. -R_PARISC_GPREL14DR = 92 # GP-rel. address, right 14 bits. -R_PARISC_GPREL16F = 93 # 16 bits GP-rel. address. -R_PARISC_GPREL16WF = 94 # 16 bits GP-rel. address. -R_PARISC_GPREL16DF = 95 # 16 bits GP-rel. address. -R_PARISC_LTOFF64 = 96 # 64 bits LT-rel. address. -R_PARISC_LTOFF14WR = 99 # LT-rel. address, right 14 bits. -R_PARISC_LTOFF14DR = 100 # LT-rel. address, right 14 bits. -R_PARISC_LTOFF16F = 101 # 16 bits LT-rel. address. -R_PARISC_LTOFF16WF = 102 # 16 bits LT-rel. address. -R_PARISC_LTOFF16DF = 103 # 16 bits LT-rel. address. -R_PARISC_SECREL64 = 104 # 64 bits section rel. address. -R_PARISC_SEGREL64 = 112 # 64 bits segment rel. address. -R_PARISC_PLTOFF14WR = 115 # PLT-rel. address, right 14 bits. -R_PARISC_PLTOFF14DR = 116 # PLT-rel. address, right 14 bits. -R_PARISC_PLTOFF16F = 117 # 16 bits LT-rel. address. -R_PARISC_PLTOFF16WF = 118 # 16 bits PLT-rel. address. -R_PARISC_PLTOFF16DF = 119 # 16 bits PLT-rel. address. -R_PARISC_LTOFF_FPTR64 = 120 # 64 bits LT-rel. function ptr. -R_PARISC_LTOFF_FPTR14WR = 123 # LT-rel. fct. ptr., right 14 bits. -R_PARISC_LTOFF_FPTR14DR = 124 # LT-rel. fct. ptr., right 14 bits. -R_PARISC_LTOFF_FPTR16F = 125 # 16 bits LT-rel. function ptr. -R_PARISC_LTOFF_FPTR16WF = 126 # 16 bits LT-rel. function ptr. -R_PARISC_LTOFF_FPTR16DF = 127 # 16 bits LT-rel. function ptr. -R_PARISC_LORESERVE = 128 -R_PARISC_COPY = 128 # Copy relocation. -R_PARISC_IPLT = 129 # Dynamic reloc, imported PLT -R_PARISC_EPLT = 130 # Dynamic reloc, exported PLT -R_PARISC_TPREL32 = 153 # 32 bits TP-rel. address. -R_PARISC_TPREL21L = 154 # TP-rel. address, left 21 bits. -R_PARISC_TPREL14R = 158 # TP-rel. address, right 14 bits. -R_PARISC_LTOFF_TP21L = 162 # LT-TP-rel. address, left 21 bits. -R_PARISC_LTOFF_TP14R = 166 # LT-TP-rel. address, right 14 bits.*/ -R_PARISC_LTOFF_TP14F = 167 # 14 bits LT-TP-rel. address. -R_PARISC_TPREL64 = 216 # 64 bits TP-rel. address. -R_PARISC_TPREL14WR = 219 # TP-rel. address, right 14 bits. -R_PARISC_TPREL14DR = 220 # TP-rel. address, right 14 bits. -R_PARISC_TPREL16F = 221 # 16 bits TP-rel. address. -R_PARISC_TPREL16WF = 222 # 16 bits TP-rel. address. -R_PARISC_TPREL16DF = 223 # 16 bits TP-rel. address. -R_PARISC_LTOFF_TP64 = 224 # 64 bits LT-TP-rel. address. -R_PARISC_LTOFF_TP14WR = 227 # LT-TP-rel. address, right 14 bits.*/ -R_PARISC_LTOFF_TP14DR = 228 # LT-TP-rel. address, right 14 bits.*/ -R_PARISC_LTOFF_TP16F = 229 # 16 bits LT-TP-rel. address. -R_PARISC_LTOFF_TP16WF = 230 # 16 bits LT-TP-rel. address. -R_PARISC_LTOFF_TP16DF = 231 # 16 bits LT-TP-rel. address. -R_PARISC_GNU_VTENTRY = 232 -R_PARISC_GNU_VTINHERIT = 233 -R_PARISC_TLS_GD21L = 234 # GD 21-bit left. -R_PARISC_TLS_GD14R = 235 # GD 14-bit right. -R_PARISC_TLS_GDCALL = 236 # GD call to __t_g_a. -R_PARISC_TLS_LDM21L = 237 # LD module 21-bit left. -R_PARISC_TLS_LDM14R = 238 # LD module 14-bit right. -R_PARISC_TLS_LDMCALL = 239 # LD module call to __t_g_a. -R_PARISC_TLS_LDO21L = 240 # LD offset 21-bit left. -R_PARISC_TLS_LDO14R = 241 # LD offset 14-bit right. -R_PARISC_TLS_DTPMOD32 = 242 # DTP module 32-bit. -R_PARISC_TLS_DTPMOD64 = 243 # DTP module 64-bit. -R_PARISC_TLS_DTPOFF32 = 244 # DTP offset 32-bit. -R_PARISC_TLS_DTPOFF64 = 245 # DTP offset 32-bit. -R_PARISC_TLS_LE21L = R_PARISC_TPREL21L -R_PARISC_TLS_LE14R = R_PARISC_TPREL14R -R_PARISC_TLS_IE21L = R_PARISC_LTOFF_TP21L -R_PARISC_TLS_IE14R = R_PARISC_LTOFF_TP14R -R_PARISC_TLS_TPREL32 = R_PARISC_TPREL32 -R_PARISC_TLS_TPREL64 = R_PARISC_TPREL64 -R_PARISC_HIRESERVE = 255 - -# Alpha relocations - -R_ALPHA_NONE = 0 # No reloc -R_ALPHA_REFLONG = 1 # Direct 32 bit -R_ALPHA_REFQUAD = 2 # Direct 64 bit -R_ALPHA_GPREL32 = 3 # GP relative 32 bit -R_ALPHA_LITERAL = 4 # GP relative 16 bit w/optimization -R_ALPHA_LITUSE = 5 # Optimization hint for LITERAL -R_ALPHA_GPDISP = 6 # Add displacement to GP -R_ALPHA_BRADDR = 7 # PC+4 relative 23 bit shifted -R_ALPHA_HINT = 8 # PC+4 relative 16 bit shifted -R_ALPHA_SREL16 = 9 # PC relative 16 bit -R_ALPHA_SREL32 = 10 # PC relative 32 bit -R_ALPHA_SREL64 = 11 # PC relative 64 bit -R_ALPHA_GPRELHIGH = 17 # GP relative 32 bit, high 16 bits -R_ALPHA_GPRELLOW = 18 # GP relative 32 bit, low 16 bits -R_ALPHA_GPREL16 = 19 # GP relative 16 bit -R_ALPHA_COPY = 24 # Copy symbol at runtime -R_ALPHA_GLOB_DAT = 25 # Create GOT entry -R_ALPHA_JMP_SLOT = 26 # Create PLT entry -R_ALPHA_RELATIVE = 27 # Adjust by program base -R_ALPHA_TLS_GD_HI = 28 -R_ALPHA_TLSGD = 29 -R_ALPHA_TLS_LDM = 30 -R_ALPHA_DTPMOD64 = 31 -R_ALPHA_GOTDTPREL = 32 -R_ALPHA_DTPREL64 = 33 -R_ALPHA_DTPRELHI = 34 -R_ALPHA_DTPRELLO = 35 -R_ALPHA_DTPREL16 = 36 -R_ALPHA_GOTTPREL = 37 -R_ALPHA_TPREL64 = 38 -R_ALPHA_TPRELHI = 39 -R_ALPHA_TPRELLO = 40 -R_ALPHA_TPREL16 = 41 -# Keep this the last entry. -R_ALPHA_NUM = 46 - -# PowerPC relocations - -R_PPC_NONE = 0 -R_PPC_ADDR32 = 1 # 32bit absolute address -R_PPC_ADDR24 = 2 # 26bit address, 2 bits ignored. -R_PPC_ADDR16 = 3 # 16bit absolute address -R_PPC_ADDR16_LO = 4 # lower 16bit of absolute address -R_PPC_ADDR16_HI = 5 # high 16bit of absolute address -R_PPC_ADDR16_HA = 6 # adjusted high 16bit -R_PPC_ADDR14 = 7 # 16bit address, 2 bits ignored -R_PPC_ADDR14_BRTAKEN = 8 -R_PPC_ADDR14_BRNTAKEN = 9 -R_PPC_REL24 = 10 # PC relative 26 bit -R_PPC_REL14 = 11 # PC relative 16 bit -R_PPC_REL14_BRTAKEN = 12 -R_PPC_REL14_BRNTAKEN = 13 -R_PPC_GOT16 = 14 -R_PPC_GOT16_LO = 15 -R_PPC_GOT16_HI = 16 -R_PPC_GOT16_HA = 17 -R_PPC_PLTREL24 = 18 -R_PPC_COPY = 19 -R_PPC_GLOB_DAT = 20 -R_PPC_JMP_SLOT = 21 -R_PPC_RELATIVE = 22 -R_PPC_LOCAL24PC = 23 -R_PPC_UADDR32 = 24 -R_PPC_UADDR16 = 25 -R_PPC_REL32 = 26 -R_PPC_PLT32 = 27 -R_PPC_PLTREL32 = 28 -R_PPC_PLT16_LO = 29 -R_PPC_PLT16_HI = 30 -R_PPC_PLT16_HA = 31 -R_PPC_SDAREL16 = 32 -R_PPC_SECTOFF = 33 -R_PPC_SECTOFF_LO = 34 -R_PPC_SECTOFF_HI = 35 -R_PPC_SECTOFF_HA = 36 - -# PowerPC relocations defined for the TLS access ABI. -R_PPC_TLS = 67 # none (sym+add)@tls -R_PPC_DTPMOD32 = 68 # word32 (sym+add)@dtpmod -R_PPC_TPREL16 = 69 # half16* (sym+add)@tprel -R_PPC_TPREL16_LO = 70 # half16 (sym+add)@tprel@l -R_PPC_TPREL16_HI = 71 # half16 (sym+add)@tprel@h -R_PPC_TPREL16_HA = 72 # half16 (sym+add)@tprel@ha -R_PPC_TPREL32 = 73 # word32 (sym+add)@tprel -R_PPC_DTPREL16 = 74 # half16* (sym+add)@dtprel -R_PPC_DTPREL16_LO = 75 # half16 (sym+add)@dtprel@l -R_PPC_DTPREL16_HI = 76 # half16 (sym+add)@dtprel@h -R_PPC_DTPREL16_HA = 77 # half16 (sym+add)@dtprel@ha -R_PPC_DTPREL32 = 78 # word32 (sym+add)@dtprel -R_PPC_GOT_TLSGD16 = 79 # half16* (sym+add)@got@tlsgd -R_PPC_GOT_TLSGD16_LO = 80 # half16 (sym+add)@got@tlsgd@l -R_PPC_GOT_TLSGD16_HI = 81 # half16 (sym+add)@got@tlsgd@h -R_PPC_GOT_TLSGD16_HA = 82 # half16 (sym+add)@got@tlsgd@ha -R_PPC_GOT_TLSLD16 = 83 # half16* (sym+add)@got@tlsld -R_PPC_GOT_TLSLD16_LO = 84 # half16 (sym+add)@got@tlsld@l -R_PPC_GOT_TLSLD16_HI = 85 # half16 (sym+add)@got@tlsld@h -R_PPC_GOT_TLSLD16_HA = 86 # half16 (sym+add)@got@tlsld@ha -R_PPC_GOT_TPREL16 = 87 # half16* (sym+add)@got@tprel -R_PPC_GOT_TPREL16_LO = 88 # half16 (sym+add)@got@tprel@l -R_PPC_GOT_TPREL16_HI = 89 # half16 (sym+add)@got@tprel@h -R_PPC_GOT_TPREL16_HA = 90 # half16 (sym+add)@got@tprel@ha -R_PPC_GOT_DTPREL16 = 91 # half16* (sym+add)@got@dtprel -R_PPC_GOT_DTPREL16_LO = 92 # half16* (sym+add)@got@dtprel@l -R_PPC_GOT_DTPREL16_HI = 93 # half16* (sym+add)@got@dtprel@h -R_PPC_GOT_DTPREL16_HA = 94 # half16* (sym+add)@got@dtprel@ha - -# The remaining relocs are from the Embedded ELF ABI, and are not in the SVR4 ELF ABI. -R_PPC_EMB_NADDR32 = 101 -R_PPC_EMB_NADDR16 = 102 -R_PPC_EMB_NADDR16_LO = 103 -R_PPC_EMB_NADDR16_HI = 104 -R_PPC_EMB_NADDR16_HA = 105 -R_PPC_EMB_SDAI16 = 106 -R_PPC_EMB_SDA2I16 = 107 -R_PPC_EMB_SDA2REL = 108 -R_PPC_EMB_SDA21 = 109 # 16 bit offset in SDA -R_PPC_EMB_MRKREF = 110 -R_PPC_EMB_RELSEC16 = 111 -R_PPC_EMB_RELST_LO = 112 -R_PPC_EMB_RELST_HI = 113 -R_PPC_EMB_RELST_HA = 114 -R_PPC_EMB_BIT_FLD = 115 -R_PPC_EMB_RELSDA = 116 # 16 bit relative offset in SDA - -# Diab tool relocations. -R_PPC_DIAB_SDA21_LO = 180 # like EMB_SDA21, but lower 16 bit -R_PPC_DIAB_SDA21_HI = 181 # like EMB_SDA21, but high 16 bit -R_PPC_DIAB_SDA21_HA = 182 # like EMB_SDA21, adjusted high 16 -R_PPC_DIAB_RELSDA_LO = 183 # like EMB_RELSDA, but lower 16 bit -R_PPC_DIAB_RELSDA_HI = 184 # like EMB_RELSDA, but high 16 bit -R_PPC_DIAB_RELSDA_HA = 185 # like EMB_RELSDA, adjusted high 16 - -# GNU extension to support local ifunc. -R_PPC_IRELATIVE = 248 - -# GNU relocs used in PIC code sequences. -R_PPC_REL16 = 249 # half16 (sym+add-.) -R_PPC_REL16_LO = 250 # half16 (sym+add-.)@l -R_PPC_REL16_HI = 251 # half16 (sym+add-.)@h -R_PPC_REL16_HA = 252 # half16 (sym+add-.)@ha - -# This is a phony reloc to handle any old fashioned TOC16 references that may still be in object files. -R_PPC_TOC16 = 255 - -# PowerPC64 relocations defined by the ABIs -R_PPC64_NONE = R_PPC_NONE -R_PPC64_ADDR32 = R_PPC_ADDR32 # 32bit absolute address -R_PPC64_ADDR24 = R_PPC_ADDR24 # 26bit address, word aligned -R_PPC64_ADDR16 = R_PPC_ADDR16 # 16bit absolute address -R_PPC64_ADDR16_LO = R_PPC_ADDR16_LO # lower 16bits of address -R_PPC64_ADDR16_HI = R_PPC_ADDR16_HI # high 16bits of address. -R_PPC64_ADDR16_HA = R_PPC_ADDR16_HA # adjusted high 16bits. -R_PPC64_ADDR14 = R_PPC_ADDR14 # 16bit address, word aligned -R_PPC64_ADDR14_BRTAKEN = R_PPC_ADDR14_BRTAKEN -R_PPC64_ADDR14_BRNTAKEN = R_PPC_ADDR14_BRNTAKEN -R_PPC64_REL24 = R_PPC_REL24 # PC-rel. 26 bit, word aligned -R_PPC64_REL14 = R_PPC_REL14 # PC relative 16 bit -R_PPC64_REL14_BRTAKEN = R_PPC_REL14_BRTAKEN -R_PPC64_REL14_BRNTAKEN = R_PPC_REL14_BRNTAKEN -R_PPC64_GOT16 = R_PPC_GOT16 -R_PPC64_GOT16_LO = R_PPC_GOT16_LO -R_PPC64_GOT16_HI = R_PPC_GOT16_HI -R_PPC64_GOT16_HA = R_PPC_GOT16_HA - -R_PPC64_COPY = R_PPC_COPY -R_PPC64_GLOB_DAT = R_PPC_GLOB_DAT -R_PPC64_JMP_SLOT = R_PPC_JMP_SLOT -R_PPC64_RELATIVE = R_PPC_RELATIVE - -R_PPC64_UADDR32 = R_PPC_UADDR32 -R_PPC64_UADDR16 = R_PPC_UADDR16 -R_PPC64_REL32 = R_PPC_REL32 -R_PPC64_PLT32 = R_PPC_PLT32 -R_PPC64_PLTREL32 = R_PPC_PLTREL32 -R_PPC64_PLT16_LO = R_PPC_PLT16_LO -R_PPC64_PLT16_HI = R_PPC_PLT16_HI -R_PPC64_PLT16_HA = R_PPC_PLT16_HA - -R_PPC64_SECTOFF = R_PPC_SECTOFF -R_PPC64_SECTOFF_LO = R_PPC_SECTOFF_LO -R_PPC64_SECTOFF_HI = R_PPC_SECTOFF_HI -R_PPC64_SECTOFF_HA = R_PPC_SECTOFF_HA -R_PPC64_ADDR30 = 37 # word30 (S + A - P) >> 2 -R_PPC64_ADDR64 = 38 # doubleword64 S + A -R_PPC64_ADDR16_HIGHER = 39 # half16 #higher(S + A) -R_PPC64_ADDR16_HIGHERA = 40 # half16 #highera(S + A) -R_PPC64_ADDR16_HIGHEST = 41 # half16 #highest(S + A) -R_PPC64_ADDR16_HIGHESTA = 42 # half16 #highesta(S + A) -R_PPC64_UADDR64 = 43 # doubleword64 S + A -R_PPC64_REL64 = 44 # doubleword64 S + A - P -R_PPC64_PLT64 = 45 # doubleword64 L + A -R_PPC64_PLTREL64 = 46 # doubleword64 L + A - P -R_PPC64_TOC16 = 47 # half16* S + A - .TOC -R_PPC64_TOC16_LO = 48 # half16 #lo(S + A - .TOC.) -R_PPC64_TOC16_HI = 49 # half16 #hi(S + A - .TOC.) -R_PPC64_TOC16_HA = 50 # half16 #ha(S + A - .TOC.) -R_PPC64_TOC = 51 # doubleword64 .TOC -R_PPC64_PLTGOT16 = 52 # half16* M + A -R_PPC64_PLTGOT16_LO = 53 # half16 #lo(M + A) -R_PPC64_PLTGOT16_HI = 54 # half16 #hi(M + A) -R_PPC64_PLTGOT16_HA = 55 # half16 #ha(M + A) - -R_PPC64_ADDR16_DS = 56 # half16ds* (S + A) >> 2 -R_PPC64_ADDR16_LO_DS = 57 # half16ds #lo(S + A) >> 2 -R_PPC64_GOT16_DS = 58 # half16ds* (G + A) >> 2 -R_PPC64_GOT16_LO_DS = 59 # half16ds #lo(G + A) >> 2 -R_PPC64_PLT16_LO_DS = 60 # half16ds #lo(L + A) >> 2 -R_PPC64_SECTOFF_DS = 61 # half16ds* (R + A) >> 2 -R_PPC64_SECTOFF_LO_DS = 62 # half16ds #lo(R + A) >> 2 -R_PPC64_TOC16_DS = 63 # half16ds* (S + A - .TOC.) >> 2 -R_PPC64_TOC16_LO_DS = 64 # half16ds #lo(S + A - .TOC.) >> 2 -R_PPC64_PLTGOT16_DS = 65 # half16ds* (M + A) >> 2 -R_PPC64_PLTGOT16_LO_DS = 66 # half16ds #lo(M + A) >> 2 - -# PowerPC64 relocations defined for the TLS access ABI. -R_PPC64_TLS = 67 # none (sym+add)@tls -R_PPC64_DTPMOD64 = 68 # doubleword64 (sym+add)@dtpmod -R_PPC64_TPREL16 = 69 # half16* (sym+add)@tprel -R_PPC64_TPREL16_LO = 70 # half16 (sym+add)@tprel@l -R_PPC64_TPREL16_HI = 71 # half16 (sym+add)@tprel@h -R_PPC64_TPREL16_HA = 72 # half16 (sym+add)@tprel@ha -R_PPC64_TPREL64 = 73 # doubleword64 (sym+add)@tprel -R_PPC64_DTPREL16 = 74 # half16* (sym+add)@dtprel -R_PPC64_DTPREL16_LO = 75 # half16 (sym+add)@dtprel@l -R_PPC64_DTPREL16_HI = 76 # half16 (sym+add)@dtprel@h -R_PPC64_DTPREL16_HA = 77 # half16 (sym+add)@dtprel@ha -R_PPC64_DTPREL64 = 78 # doubleword64 (sym+add)@dtprel -R_PPC64_GOT_TLSGD16 = 79 # half16* (sym+add)@got@tlsgd -R_PPC64_GOT_TLSGD16_LO = 80 # half16 (sym+add)@got@tlsgd@l -R_PPC64_GOT_TLSGD16_HI = 81 # half16 (sym+add)@got@tlsgd@h -R_PPC64_GOT_TLSGD16_HA = 82 # half16 (sym+add)@got@tlsgd@ha -R_PPC64_GOT_TLSLD16 = 83 # half16* (sym+add)@got@tlsld -R_PPC64_GOT_TLSLD16_LO = 84 # half16 (sym+add)@got@tlsld@l -R_PPC64_GOT_TLSLD16_HI = 85 # half16 (sym+add)@got@tlsld@h -R_PPC64_GOT_TLSLD16_HA = 86 # half16 (sym+add)@got@tlsld@ha -R_PPC64_GOT_TPREL16_DS = 87 # half16ds* (sym+add)@got@tprel -R_PPC64_GOT_TPREL16_LO_DS = 88 # half16ds (sym+add)@got@tprel@l -R_PPC64_GOT_TPREL16_HI = 89 # half16 (sym+add)@got@tprel@h -R_PPC64_GOT_TPREL16_HA = 90 # half16 (sym+add)@got@tprel@ha -R_PPC64_GOT_DTPREL16_DS = 91 # half16ds* (sym+add)@got@dtprel -R_PPC64_GOT_DTPREL16_LO_DS = 92 # half16ds (sym+add)@got@dtprel@l -R_PPC64_GOT_DTPREL16_HI = 93 # half16 (sym+add)@got@dtprel@h -R_PPC64_GOT_DTPREL16_HA = 94 # half16 (sym+add)@got@dtprel@ha -R_PPC64_TPREL16_DS = 95 # half16ds* (sym+add)@tprel -R_PPC64_TPREL16_LO_DS = 96 # half16ds (sym+add)@tprel@l -R_PPC64_TPREL16_HIGHER = 97 # half16 (sym+add)@tprel@higher -R_PPC64_TPREL16_HIGHERA = 98 # half16 (sym+add)@tprel@highera -R_PPC64_TPREL16_HIGHEST = 99 # half16 (sym+add)@tprel@highest -R_PPC64_TPREL16_HIGHESTA = 100 # half16 (sym+add)@tprel@highesta -R_PPC64_DTPREL16_DS = 101 # half16ds* (sym+add)@dtprel -R_PPC64_DTPREL16_LO_DS = 102 # half16ds (sym+add)@dtprel@l -R_PPC64_DTPREL16_HIGHER = 103 # half16 (sym+add)@dtprel@higher -R_PPC64_DTPREL16_HIGHERA = 104 # half16 (sym+add)@dtprel@highera -R_PPC64_DTPREL16_HIGHEST = 105 # half16 (sym+add)@dtprel@highest -R_PPC64_DTPREL16_HIGHESTA = 106 # half16 (sym+add)@dtprel@highesta - -# GNU extension to support local ifunc. -R_PPC64_JMP_IREL = 247 -R_PPC64_IRELATIVE = 248 -R_PPC64_REL16 = 249 # half16 (sym+add-.) -R_PPC64_REL16_LO = 250 # half16 (sym+add-.)@l -R_PPC64_REL16_HI = 251 # half16 (sym+add-.)@h -R_PPC64_REL16_HA = 252 # half16 (sym+add-.)@ha - -# PowerPC64 specific values for the Dyn d_tag field. -DT_PPC64_GLINK = (DT_LOPROC + 0) -DT_PPC64_OPD = (DT_LOPROC + 1) -DT_PPC64_OPDSZ = (DT_LOPROC + 2) -DT_PPC64_NUM = 3 - -# ARM relocations - -R_ARM_NONE = 0 # No reloc -R_ARM_PC24 = 1 # PC relative 26 bit branch -R_ARM_ABS32 = 2 # Direct 32 bit -R_ARM_REL32 = 3 # PC relative 32 bit -R_ARM_PC13 = 4 -R_ARM_ABS16 = 5 # Direct 16 bit -R_ARM_ABS12 = 6 # Direct 12 bit -R_ARM_THM_ABS5 = 7 -R_ARM_ABS8 = 8 # Direct 8 bit -R_ARM_SBREL32 = 9 -R_ARM_THM_PC22 = 10 -R_ARM_THM_PC8 = 11 -R_ARM_AMP_VCALL9 = 12 -R_ARM_SWI24 = 13 # Obsolete static relocation. -R_ARM_TLS_DESC = 13 # Dynamic relocation. -R_ARM_THM_SWI8 = 14 -R_ARM_XPC25 = 15 -R_ARM_THM_XPC22 = 16 -R_ARM_TLS_DTPMOD32 = 17 # ID of module containing symbol -R_ARM_TLS_DTPOFF32 = 18 # Offset in TLS block -R_ARM_TLS_TPOFF32 = 19 # Offset in static TLS block -R_ARM_COPY = 20 # Copy symbol at runtime -R_ARM_GLOB_DAT = 21 # Create GOT entry -R_ARM_JUMP_SLOT = 22 # Create PLT entry -R_ARM_RELATIVE = 23 # Adjust by program base -R_ARM_GOTOFF = 24 # 32 bit offset to GOT -R_ARM_GOTPC = 25 # 32 bit PC relative offset to GOT -R_ARM_GOT32 = 26 # 32 bit GOT entry -R_ARM_PLT32 = 27 # 32 bit PLT address -R_ARM_ALU_PCREL_7_0 = 32 -R_ARM_ALU_PCREL_15_8 = 33 -R_ARM_ALU_PCREL_23_15 = 34 -R_ARM_LDR_SBREL_11_0 = 35 -R_ARM_ALU_SBREL_19_12 = 36 -R_ARM_ALU_SBREL_27_20 = 37 -R_ARM_TLS_GOTDESC = 90 -R_ARM_TLS_CALL = 91 -R_ARM_TLS_DESCSEQ = 92 -R_ARM_THM_TLS_CALL = 93 -R_ARM_GNU_VTENTRY = 100 -R_ARM_GNU_VTINHERIT = 101 -R_ARM_THM_PC11 = 102 # thumb unconditional branch -R_ARM_THM_PC9 = 103 # thumb conditional branch -R_ARM_TLS_GD32 = 104 # PC-rel 32 bit for global dynamic thread local data -R_ARM_TLS_LDM32 = 105 # PC-rel 32 bit for local dynamic thread local data -R_ARM_TLS_LDO32 = 106 # 32 bit offset relative to TLS block -R_ARM_TLS_IE32 = 107 # PC-rel 32 bit for GOT entry of static TLS block offset -R_ARM_TLS_LE32 = 108 # 32 bit offset relative to static TLS block -R_ARM_THM_TLS_DESCSEQ = 129 -R_ARM_IRELATIVE = 160 -R_ARM_RXPC25 = 249 -R_ARM_RSBREL32 = 250 -R_ARM_THM_RPC22 = 251 -R_ARM_RREL32 = 252 -R_ARM_RABS22 = 253 -R_ARM_RPC24 = 254 -R_ARM_RBASE = 255 -# Keep this the last entry. -R_ARM_NUM = 256 - -# IA-64 relocations - -R_IA64_NONE = 0x00 # none -R_IA64_IMM14 = 0x21 # symbol + addend, add imm14 -R_IA64_IMM22 = 0x22 # symbol + addend, add imm22 -R_IA64_IMM64 = 0x23 # symbol + addend, mov imm64 -R_IA64_DIR32MSB = 0x24 # symbol + addend, data4 MSB -R_IA64_DIR32LSB = 0x25 # symbol + addend, data4 LSB -R_IA64_DIR64MSB = 0x26 # symbol + addend, data8 MSB -R_IA64_DIR64LSB = 0x27 # symbol + addend, data8 LSB -R_IA64_GPREL22 = 0x2a # @gprel(sym + add), add imm22 -R_IA64_GPREL64I = 0x2b # @gprel(sym + add), mov imm64 -R_IA64_GPREL32MSB = 0x2c # @gprel(sym + add), data4 MSB -R_IA64_GPREL32LSB = 0x2d # @gprel(sym + add), data4 LSB -R_IA64_GPREL64MSB = 0x2e # @gprel(sym + add), data8 MSB -R_IA64_GPREL64LSB = 0x2f # @gprel(sym + add), data8 LSB -R_IA64_LTOFF22 = 0x32 # @ltoff(sym + add), add imm22 -R_IA64_LTOFF64I = 0x33 # @ltoff(sym + add), mov imm64 -R_IA64_PLTOFF22 = 0x3a # @pltoff(sym + add), add imm22 -R_IA64_PLTOFF64I = 0x3b # @pltoff(sym + add), mov imm64 -R_IA64_PLTOFF64MSB = 0x3e # @pltoff(sym + add), data8 MSB -R_IA64_PLTOFF64LSB = 0x3f # @pltoff(sym + add), data8 LSB -R_IA64_FPTR64I = 0x43 # @fptr(sym + add), mov imm64 -R_IA64_FPTR32MSB = 0x44 # @fptr(sym + add), data4 MSB -R_IA64_FPTR32LSB = 0x45 # @fptr(sym + add), data4 LSB -R_IA64_FPTR64MSB = 0x46 # @fptr(sym + add), data8 MSB -R_IA64_FPTR64LSB = 0x47 # @fptr(sym + add), data8 LSB -R_IA64_PCREL60B = 0x48 # @pcrel(sym + add), brl -R_IA64_PCREL21B = 0x49 # @pcrel(sym + add), ptb, call -R_IA64_PCREL21M = 0x4a # @pcrel(sym + add), chk.s -R_IA64_PCREL21F = 0x4b # @pcrel(sym + add), fchkf -R_IA64_PCREL32MSB = 0x4c # @pcrel(sym + add), data4 MSB -R_IA64_PCREL32LSB = 0x4d # @pcrel(sym + add), data4 LSB -R_IA64_PCREL64MSB = 0x4e # @pcrel(sym + add), data8 MSB -R_IA64_PCREL64LSB = 0x4f # @pcrel(sym + add), data8 LSB -R_IA64_LTOFF_FPTR22 = 0x52 # @ltoff(@fptr(s+a)), imm22 -R_IA64_LTOFF_FPTR64I = 0x53 # @ltoff(@fptr(s+a)), imm64 -R_IA64_LTOFF_FPTR32MSB = 0x54 # @ltoff(@fptr(s+a)), data4 MSB -R_IA64_LTOFF_FPTR32LSB = 0x55 # @ltoff(@fptr(s+a)), data4 LSB -R_IA64_LTOFF_FPTR64MSB = 0x56 # @ltoff(@fptr(s+a)), data8 MSB -R_IA64_LTOFF_FPTR64LSB = 0x57 # @ltoff(@fptr(s+a)), data8 LSB -R_IA64_SEGREL32MSB = 0x5c # @segrel(sym + add), data4 MSB -R_IA64_SEGREL32LSB = 0x5d # @segrel(sym + add), data4 LSB -R_IA64_SEGREL64MSB = 0x5e # @segrel(sym + add), data8 MSB -R_IA64_SEGREL64LSB = 0x5f # @segrel(sym + add), data8 LSB -R_IA64_SECREL32MSB = 0x64 # @secrel(sym + add), data4 MSB -R_IA64_SECREL32LSB = 0x65 # @secrel(sym + add), data4 LSB -R_IA64_SECREL64MSB = 0x66 # @secrel(sym + add), data8 MSB -R_IA64_SECREL64LSB = 0x67 # @secrel(sym + add), data8 LSB -R_IA64_REL32MSB = 0x6c # data 4 + REL -R_IA64_REL32LSB = 0x6d # data 4 + REL -R_IA64_REL64MSB = 0x6e # data 8 + REL -R_IA64_REL64LSB = 0x6f # data 8 + REL -R_IA64_LTV32MSB = 0x74 # symbol + addend, data4 MSB -R_IA64_LTV32LSB = 0x75 # symbol + addend, data4 LSB -R_IA64_LTV64MSB = 0x76 # symbol + addend, data8 MSB -R_IA64_LTV64LSB = 0x77 # symbol + addend, data8 LSB -R_IA64_PCREL21BI = 0x79 # @pcrel(sym + add), 21bit inst -R_IA64_PCREL22 = 0x7a # @pcrel(sym + add), 22bit inst -R_IA64_PCREL64I = 0x7b # @pcrel(sym + add), 64bit inst -R_IA64_IPLTMSB = 0x80 # dynamic reloc, imported PLT, MSB -R_IA64_IPLTLSB = 0x81 # dynamic reloc, imported PLT, LSB -R_IA64_COPY = 0x84 # copy relocation -R_IA64_SUB = 0x85 # Addend and symbol difference -R_IA64_LTOFF22X = 0x86 # LTOFF22, relaxable. -R_IA64_LDXMOV = 0x87 # Use of LTOFF22X. -R_IA64_TPREL14 = 0x91 # @tprel(sym + add), imm14 -R_IA64_TPREL22 = 0x92 # @tprel(sym + add), imm22 -R_IA64_TPREL64I = 0x93 # @tprel(sym + add), imm64 -R_IA64_TPREL64MSB = 0x96 # @tprel(sym + add), data8 MSB -R_IA64_TPREL64LSB = 0x97 # @tprel(sym + add), data8 LSB -R_IA64_LTOFF_TPREL22 = 0x9a # @ltoff(@tprel(s+a)), imm2 -R_IA64_DTPMOD64MSB = 0xa6 # @dtpmod(sym + add), data8 MSB -R_IA64_DTPMOD64LSB = 0xa7 # @dtpmod(sym + add), data8 LSB -R_IA64_LTOFF_DTPMOD22 = 0xaa # @ltoff(@dtpmod(sym + add)), imm22 -R_IA64_DTPREL14 = 0xb1 # @dtprel(sym + add), imm14 -R_IA64_DTPREL22 = 0xb2 # @dtprel(sym + add), imm22 -R_IA64_DTPREL64I = 0xb3 # @dtprel(sym + add), imm64 -R_IA64_DTPREL32MSB = 0xb4 # @dtprel(sym + add), data4 MSB -R_IA64_DTPREL32LSB = 0xb5 # @dtprel(sym + add), data4 LSB -R_IA64_DTPREL64MSB = 0xb6 # @dtprel(sym + add), data8 MSB -R_IA64_DTPREL64LSB = 0xb7 # @dtprel(sym + add), data8 LSB -R_IA64_LTOFF_DTPREL22 = 0xba # @ltoff(@dtprel(s+a)), imm22 - -# SH relocations - -R_SH_NONE = 0 -R_SH_DIR32 = 1 -R_SH_REL32 = 2 -R_SH_DIR8WPN = 3 -R_SH_IND12W = 4 -R_SH_DIR8WPL = 5 -R_SH_DIR8WPZ = 6 -R_SH_DIR8BP = 7 -R_SH_DIR8W = 8 -R_SH_DIR8L = 9 -R_SH_SWITCH16 = 25 -R_SH_SWITCH32 = 26 -R_SH_USES = 27 -R_SH_COUNT = 28 -R_SH_ALIGN = 29 -R_SH_CODE = 30 -R_SH_DATA = 31 -R_SH_LABEL = 32 -R_SH_SWITCH8 = 33 -R_SH_GNU_VTINHERIT = 34 -R_SH_GNU_VTENTRY = 35 -R_SH_TLS_GD_32 = 144 -R_SH_TLS_LD_32 = 145 -R_SH_TLS_LDO_32 = 146 -R_SH_TLS_IE_32 = 147 -R_SH_TLS_LE_32 = 148 -R_SH_TLS_DTPMOD32 = 149 -R_SH_TLS_DTPOFF32 = 150 -R_SH_TLS_TPOFF32 = 151 -R_SH_GOT32 = 160 -R_SH_PLT32 = 161 -R_SH_COPY = 162 -R_SH_GLOB_DAT = 163 -R_SH_JMP_SLOT = 164 -R_SH_RELATIVE = 165 -R_SH_GOTOFF = 166 -R_SH_GOTPC = 167 -# Keep this the last entry. -R_SH_NUM = 256 - -# S/390 relocations - -R_390_NONE = 0 # No reloc. -R_390_8 = 1 # Direct 8 bit. -R_390_12 = 2 # Direct 12 bit. -R_390_16 = 3 # Direct 16 bit. -R_390_32 = 4 # Direct 32 bit. -R_390_PC32 = 5 # PC relative 32 bit. -R_390_GOT12 = 6 # 12 bit GOT offset. -R_390_GOT32 = 7 # 32 bit GOT offset. -R_390_PLT32 = 8 # 32 bit PC relative PLT address. -R_390_COPY = 9 # Copy symbol at runtime. -R_390_GLOB_DAT = 10 # Create GOT entry. -R_390_JMP_SLOT = 11 # Create PLT entry. -R_390_RELATIVE = 12 # Adjust by program base. -R_390_GOTOFF32 = 13 # 32 bit offset to GOT. -R_390_GOTPC = 14 # 32 bit PC relative offset to GOT. -R_390_GOT16 = 15 # 16 bit GOT offset. -R_390_PC16 = 16 # PC relative 16 bit. -R_390_PC16DBL = 17 # PC relative 16 bit shifted by 1. -R_390_PLT16DBL = 18 # 16 bit PC rel. PLT shifted by 1. -R_390_PC32DBL = 19 # PC relative 32 bit shifted by 1. -R_390_PLT32DBL = 20 # 32 bit PC rel. PLT shifted by 1. -R_390_GOTPCDBL = 21 # 32 bit PC rel. GOT shifted by 1. -R_390_64 = 22 # Direct 64 bit. -R_390_PC64 = 23 # PC relative 64 bit. -R_390_GOT64 = 24 # 64 bit GOT offset. -R_390_PLT64 = 25 # 64 bit PC relative PLT address. -R_390_GOTENT = 26 # 32 bit PC rel. to GOT entry >> 1. -R_390_GOTOFF16 = 27 # 16 bit offset to GOT. -R_390_GOTOFF64 = 28 # 64 bit offset to GOT. -R_390_GOTPLT12 = 29 # 12 bit offset to jump slot. -R_390_GOTPLT16 = 30 # 16 bit offset to jump slot. -R_390_GOTPLT32 = 31 # 32 bit offset to jump slot. -R_390_GOTPLT64 = 32 # 64 bit offset to jump slot. -R_390_GOTPLTENT = 33 # 32 bit rel. offset to jump slot. -R_390_PLTOFF16 = 34 # 16 bit offset from GOT to PLT. -R_390_PLTOFF32 = 35 # 32 bit offset from GOT to PLT. -R_390_PLTOFF64 = 36 # 16 bit offset from GOT to PLT. -R_390_TLS_LOAD = 37 # Tag for load insn in TLS code. -R_390_TLS_GDCALL = 38 # Tag for function call in general dynamic TLS code. -R_390_TLS_LDCALL = 39 # Tag for function call in local dynamic TLS code. -R_390_TLS_GD32 = 40 # Direct 32 bit for general dynamic thread local data. -R_390_TLS_GD64 = 41 # Direct 64 bit for general dynamic thread local data. -R_390_TLS_GOTIE12 = 42 # 12 bit GOT offset for static TLS block offset. -R_390_TLS_GOTIE32 = 43 # 32 bit GOT offset for static TLS block offset. -R_390_TLS_GOTIE64 = 44 # 64 bit GOT offset for static TLS block offset. -R_390_TLS_LDM32 = 45 # Direct 32 bit for local dynamic thread local data in LE code. -R_390_TLS_LDM64 = 46 # Direct 64 bit for local dynamic thread local data in LE code. -R_390_TLS_IE32 = 47 # 32 bit address of GOT entry for negated static TLS block offset. -R_390_TLS_IE64 = 48 # 64 bit address of GOT entry for negated static TLS block offset. -R_390_TLS_IEENT = 49 # 32 bit rel. offset to GOT entry for negated static TLS block offset. -R_390_TLS_LE32 = 50 # 32 bit negated offset relative to static TLS block. -R_390_TLS_LE64 = 51 # 64 bit negated offset relative to static TLS block. -R_390_TLS_LDO32 = 52 # 32 bit offset relative to TLS block. -R_390_TLS_LDO64 = 53 # 64 bit offset relative to TLS block. -R_390_TLS_DTPMOD = 54 # ID of module containing symbol. -R_390_TLS_DTPOFF = 55 # Offset in TLS block. -R_390_TLS_TPOFF = 56 # Negated offset in static TLS block. -R_390_20 = 57 # Direct 20 bit. -R_390_GOT20 = 58 # 20 bit GOT offset. -R_390_GOTPLT20 = 59 # 20 bit offset to jump slot. -R_390_TLS_GOTIE20 = 60 # 20 bit GOT offset for static TLS block offset. -# Keep this the last entry. -R_390_NUM = 61 - - -# CRIS relocations. -R_CRIS_NONE = 0 -R_CRIS_8 = 1 -R_CRIS_16 = 2 -R_CRIS_32 = 3 -R_CRIS_8_PCREL = 4 -R_CRIS_16_PCREL = 5 -R_CRIS_32_PCREL = 6 -R_CRIS_GNU_VTINHERIT = 7 -R_CRIS_GNU_VTENTRY = 8 -R_CRIS_COPY = 9 -R_CRIS_GLOB_DAT = 10 -R_CRIS_JUMP_SLOT = 11 -R_CRIS_RELATIVE = 12 -R_CRIS_16_GOT = 13 -R_CRIS_32_GOT = 14 -R_CRIS_16_GOTPLT = 15 -R_CRIS_32_GOTPLT = 16 -R_CRIS_32_GOTREL = 17 -R_CRIS_32_PLT_GOTREL = 18 -R_CRIS_32_PLT_PCREL = 19 - -R_CRIS_NUM = 20 - - -# AMD x86-64 relocations. -R_X86_64_NONE = 0 # No reloc -R_X86_64_64 = 1 # Direct 64 bit -R_X86_64_PC32 = 2 # PC relative 32 bit signed -R_X86_64_GOT32 = 3 # 32 bit GOT entry -R_X86_64_PLT32 = 4 # 32 bit PLT address -R_X86_64_COPY = 5 # Copy symbol at runtime -R_X86_64_GLOB_DAT = 6 # Create GOT entry -R_X86_64_JUMP_SLOT = 7 # Create PLT entry -R_X86_64_RELATIVE = 8 # Adjust by program base -R_X86_64_GOTPCREL = 9 # 32 bit signed PC relative offset to GOT -R_X86_64_32 = 10 # Direct 32 bit zero extended -R_X86_64_32S = 11 # Direct 32 bit sign extended -R_X86_64_16 = 12 # Direct 16 bit zero extended -R_X86_64_PC16 = 13 # 16 bit sign extended pc relative -R_X86_64_8 = 14 # Direct 8 bit sign extended -R_X86_64_PC8 = 15 # 8 bit sign extended pc relative -R_X86_64_DTPMOD64 = 16 # ID of module containing symbol -R_X86_64_DTPOFF64 = 17 # Offset in module's TLS block -R_X86_64_TPOFF64 = 18 # Offset in initial TLS block -R_X86_64_TLSGD = 19 # 32 bit signed PC relative offset to two GOT entries for GD symbol -R_X86_64_TLSLD = 20 # 32 bit signed PC relative offset to two GOT entries for LD symbol -R_X86_64_DTPOFF32 = 21 # Offset in TLS block -R_X86_64_GOTTPOFF = 22 # 32 bit signed PC relative offset to GOT entry for IE symbol -R_X86_64_TPOFF32 = 23 # Offset in initial TLS block -R_X86_64_PC64 = 24 # PC relative 64 bit -R_X86_64_GOTOFF64 = 25 # 64 bit offset to GOT -R_X86_64_GOTPC32 = 26 # 32 bit signed pc relative offset to GOT -R_X86_64_GOT64 = 27 # 64-bit GOT entry offset -R_X86_64_GOTPCREL64 = 28 # 64-bit PC relative offset to GOT entry -R_X86_64_GOTPC64 = 29 # 64-bit PC relative offset to GOT -R_X86_64_GOTPLT64 = 30 # like GOT64, says PLT entry needed -R_X86_64_PLTOFF64 = 31 # 64-bit GOT relative offset to PLT entry -R_X86_64_SIZE32 = 32 # Size of symbol plus 32-bit addend -R_X86_64_SIZE64 = 33 # Size of symbol plus 64-bit addend -R_X86_64_GOTPC32_TLSDESC = 34 # GOT offset for TLS descriptor. -R_X86_64_TLSDESC_CALL = 35 # Marker for call through TLS descriptor. -R_X86_64_TLSDESC = 36 # TLS descriptor. -R_X86_64_IRELATIVE = 37 # Adjust indirectly by program base - -R_X86_64_NUM = 38 - - -# AM33 relocations. -R_MN10300_NONE = 0 # No reloc. -R_MN10300_32 = 1 # Direct 32 bit. -R_MN10300_16 = 2 # Direct 16 bit. -R_MN10300_8 = 3 # Direct 8 bit. -R_MN10300_PCREL32 = 4 # PC-relative 32-bit. -R_MN10300_PCREL16 = 5 # PC-relative 16-bit signed. -R_MN10300_PCREL8 = 6 # PC-relative 8-bit signed. -R_MN10300_GNU_VTINHERIT = 7 # Ancient C++ vtable garbage... -R_MN10300_GNU_VTENTRY = 8 # ... collection annotation. -R_MN10300_24 = 9 # Direct 24 bit. -R_MN10300_GOTPC32 = 10 # 32-bit PCrel offset to GOT. -R_MN10300_GOTPC16 = 11 # 16-bit PCrel offset to GOT. -R_MN10300_GOTOFF32 = 12 # 32-bit offset from GOT. -R_MN10300_GOTOFF24 = 13 # 24-bit offset from GOT. -R_MN10300_GOTOFF16 = 14 # 16-bit offset from GOT. -R_MN10300_PLT32 = 15 # 32-bit PCrel to PLT entry. -R_MN10300_PLT16 = 16 # 16-bit PCrel to PLT entry. -R_MN10300_GOT32 = 17 # 32-bit offset to GOT entry. -R_MN10300_GOT24 = 18 # 24-bit offset to GOT entry. -R_MN10300_GOT16 = 19 # 16-bit offset to GOT entry. -R_MN10300_COPY = 20 # Copy symbol at runtime. -R_MN10300_GLOB_DAT = 21 # Create GOT entry. -R_MN10300_JMP_SLOT = 22 # Create PLT entry. -R_MN10300_RELATIVE = 23 # Adjust by program base. - -R_MN10300_NUM = 24 - - -# M32R relocs. -R_M32R_NONE = 0 # No reloc. -R_M32R_16 = 1 # Direct 16 bit. -R_M32R_32 = 2 # Direct 32 bit. -R_M32R_24 = 3 # Direct 24 bit. -R_M32R_10_PCREL = 4 # PC relative 10 bit shifted. -R_M32R_18_PCREL = 5 # PC relative 18 bit shifted. -R_M32R_26_PCREL = 6 # PC relative 26 bit shifted. -R_M32R_HI16_ULO = 7 # High 16 bit with unsigned low. -R_M32R_HI16_SLO = 8 # High 16 bit with signed low. -R_M32R_LO16 = 9 # Low 16 bit. -R_M32R_SDA16 = 10 # 16 bit offset in SDA. -R_M32R_GNU_VTINHERIT = 11 -R_M32R_GNU_VTENTRY = 12 -# M32R relocs use SHT_RELA. -R_M32R_16_RELA = 33 # Direct 16 bit. -R_M32R_32_RELA = 34 # Direct 32 bit. -R_M32R_24_RELA = 35 # Direct 24 bit. -R_M32R_10_PCREL_RELA = 36 # PC relative 10 bit shifted. -R_M32R_18_PCREL_RELA = 37 # PC relative 18 bit shifted. -R_M32R_26_PCREL_RELA = 38 # PC relative 26 bit shifted. -R_M32R_HI16_ULO_RELA = 39 # High 16 bit with unsigned low -R_M32R_HI16_SLO_RELA = 40 # High 16 bit with signed low -R_M32R_LO16_RELA = 41 # Low 16 bit -R_M32R_SDA16_RELA = 42 # 16 bit offset in SDA -R_M32R_RELA_GNU_VTINHERIT = 43 -R_M32R_RELA_GNU_VTENTRY = 44 -R_M32R_REL32 = 45 # PC relative 32 bit. - -R_M32R_GOT24 = 48 # 24 bit GOT entry -R_M32R_26_PLTREL = 49 # 26 bit PC relative to PLT shifted -R_M32R_COPY = 50 # Copy symbol at runtime -R_M32R_GLOB_DAT = 51 # Create GOT entry -R_M32R_JMP_SLOT = 52 # Create PLT entry -R_M32R_RELATIVE = 53 # Adjust by program base -R_M32R_GOTOFF = 54 # 24 bit offset to GOT -R_M32R_GOTPC24 = 55 # 24 bit PC relative offset to GOT -R_M32R_GOT16_HI_ULO = 56 # High 16 bit GOT entry with unsigned low -R_M32R_GOT16_HI_SLO = 57 # High 16 bit GOT entry with signed low -R_M32R_GOT16_LO = 58 # Low 16 bit GOT entry -R_M32R_GOTPC_HI_ULO = 59 # High 16 bit PC relative offset to GOT with unsigned low -R_M32R_GOTPC_HI_SLO = 60 # High 16 bit PC relative offset to GOT with signed low -R_M32R_GOTPC_LO = 61 # Low 16 bit PC relative offset to GOT -R_M32R_GOTOFF_HI_ULO = 62 # High 16 bit offset to GOT with unsigned low -R_M32R_GOTOFF_HI_SLO = 63 # High 16 bit offset to GOT with signed low -R_M32R_GOTOFF_LO = 64 # Low 16 bit offset to GOT -R_M32R_NUM = 256 # Keep this the last entry. diff --git a/miasm/elfesteem/elf_init.py b/miasm/elfesteem/elf_init.py deleted file mode 100644 index 14a37eb5..00000000 --- a/miasm/elfesteem/elf_init.py +++ /dev/null @@ -1,878 +0,0 @@ -#! /usr/bin/env python - -from __future__ import print_function -from builtins import range -import logging -import struct - -from future.utils import PY3, with_metaclass - -from miasm.core.utils import force_bytes -from miasm.elfesteem import cstruct -from miasm.elfesteem import elf -from miasm.elfesteem.strpatchwork import StrPatchwork - -log = logging.getLogger("elfparse") -console_handler = logging.StreamHandler() -console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) -log.addHandler(console_handler) -log.setLevel(logging.WARN) - - -def printable(string): - if isinstance(string, bytes): - return "".join( - c.decode() if b" " <= c < b"~" else "." - for c in (string[i:i+1] for i in range(len(string))) - ) - return string - - -class StructWrapper_metaclass(type): - - def __new__(cls, name, bases, dct): - wrapped = dct["wrapped"] - if wrapped is not None: # XXX: make dct lookup look into base classes - for fname, v in wrapped._fields: - dct[fname] = property(dct.pop("get_" + fname, - lambda self, fname=fname: getattr( - self.cstr, fname)), - dct.pop("set_" + fname, - lambda self, v, fname=fname: setattr( - self.cstr, fname, v)), - dct.pop("del_" + fname, None)) - return type.__new__(cls, name, bases, dct) - - -class StructWrapper(with_metaclass(StructWrapper_metaclass, object)): - - wrapped = None - - def __init__(self, parent, sex, size, *args, **kargs): - self.cstr = self.wrapped(sex, size, *args, **kargs) - self.parent = parent - - def __getitem__(self, item): - return getattr(self, item) - - def __repr__(self): - return "> 8].name - - def get_type(self): - return self.cstr.info & 0xff - - -class WRel64(StructWrapper): - wrapped = elf.Rel64 - wrapped._fields.append(("sym", "u32")) - wrapped._fields.append(("type", "u32")) - - def get_sym(self): - return self.parent.linksection.symtab[self.cstr.info >> 32].name - - def get_type(self): - return self.cstr.info & 0xffffffff - - -class WRela32(WRel32): - wrapped = elf.Rela32 - wrapped._fields.append(("sym", "u32")) - wrapped._fields.append(("type", "u08")) - - def get_sym(self): - return self.parent.linksection.symtab[self.cstr.info >> 8].name - - def get_type(self): - return self.cstr.info & 0xff - - -class WRela64(WRel64): - wrapped = elf.Rela64 - wrapped._fields.append(("sym", "u32")) - wrapped._fields.append(("type", "u32")) - - def get_sym(self): - return self.parent.linksection.symtab[self.cstr.info >> 32].name - - def get_type(self): - return self.cstr.info & 0xffffffff - - -class WShdr(StructWrapper): - wrapped = elf.Shdr - - def get_name(self): - return self.parent.parent._shstr.get_name(self.cstr.name) - - -class WDynamic(StructWrapper): - wrapped = elf.Dynamic - - def get_name(self): - if self.type == elf.DT_NEEDED: - return self.parent.linksection.get_name(self.cstr.name) - return self.cstr.name - - -class WPhdr(StructWrapper): - wrapped = elf.Phdr - - -class WPhdr64(StructWrapper): - wrapped = elf.Phdr64 - - -class WNhdr(StructWrapper): - wrapped = elf.Nhdr - - -class ContentManager(object): - - def __get__(self, owner, x): - if hasattr(owner, '_content'): - return owner._content - - def __set__(self, owner, new_content): - owner.resize(len(owner._content), len(new_content)) - owner._content = StrPatchwork(new_content) - owner.parse_content(owner.sex, owner.size) - - def __delete__(self, owner): - self.__set__(owner, None) - - -# Sections - -class Section_metaclass(type): - - def __new__(cls, name, bases, dct): - o = type.__new__(cls, name, bases, dct) - if name != "Section": - Section.register(o) - return o - - def register(cls, o): - if o.sht is not None: - cls.sectypes[o.sht] = o - - def __call__(cls, parent, sex, size, shstr=None): - sh = None - if shstr is not None: - sh = WShdr(None, sex, size, shstr) - if sh.type in Section.sectypes: - cls = Section.sectypes[sh.type] - i = cls.__new__(cls, cls.__name__, cls.__bases__, cls.__dict__) - if sh is not None: - sh.parent = i - i.__init__(parent, sh) - return i - - -class Section(with_metaclass(Section_metaclass, object)): - - sectypes = {} - content = ContentManager() - - def resize(self, old, new): - self.sh.size += new - old - self.parent.resize(self, new - old) - if self.phparent: - self.phparent.resize(self, new - old) - - def parse_content(self, sex, size): - self.sex, self.size = sex, size - pass - - def get_linksection(self): - return self.parent[self.sh.link] - - def set_linksection(self, val): - if isinstance(val, Section): - val = self.parent.shlist.find(val) - if type(val) is int: - self.sh.link = val - linksection = property(get_linksection, set_linksection) - - def get_infosection(self): - # XXX info may not be in sh list ?!? - if not self.sh.info in self.parent: - return None - return self.parent[self.sh.info] - - def set_infosection(self, val): - if isinstance(val, Section): - val = self.parent.shlist.find(val) - if type(val) is int: - self.sh.info = val - infosection = property(get_infosection, set_infosection) - - def __init__(self, parent, sh=None): - self.parent = parent - self.phparent = None - self.sh = sh - self._content = b"" - - def __repr__(self): - r = "{%(name)s ofs=%(offset)#x sz=%(size)#x addr=%(addr)#010x}" % self.sh - return r - - -class NullSection(Section): - sht = elf.SHT_NULL - - def get_name(self, ofs): - # XXX check this - return b"" - - -class ProgBits(Section): - sht = elf.SHT_PROGBITS - - -class HashSection(Section): - sht = elf.SHT_HASH - - -class NoBitsSection(Section): - sht = elf.SHT_NOBITS - - -class ShLibSection(Section): - sht = elf.SHT_SHLIB - - -class InitArray(Section): - sht = elf.SHT_INIT_ARRAY - - -class FiniArray(Section): - sht = elf.SHT_FINI_ARRAY - - -class GroupSection(Section): - sht = elf.SHT_GROUP - - -class SymTabSHIndeces(Section): - sht = elf.SHT_SYMTAB_SHNDX - - -class GNUVerSym(Section): - sht = elf.SHT_GNU_versym - - -class GNUVerNeed(Section): - sht = elf.SHT_GNU_verneed - - -class GNUVerDef(Section): - sht = elf.SHT_GNU_verdef - - -class GNULibLIst(Section): - sht = elf.SHT_GNU_LIBLIST - - -class CheckSumSection(Section): - sht = elf.SHT_CHECKSUM - - -class NoteSection(Section): - sht = elf.SHT_NOTE - - def parse_content(self, sex, size): - self.sex, self.size = sex, size - c = self.content - hsz = 12 - self.notes = [] - # XXX: c may not be aligned? - while len(c) > hsz: - note = WNhdr(self, sex, size, c) - namesz, descsz = note.namesz, note.descsz - name = c[hsz:hsz + namesz] - desc = c[hsz + namesz:hsz + namesz + descsz] - c = c[hsz + namesz + descsz:] - self.notes.append((note.type, name, desc)) - - -class Dynamic(Section): - sht = elf.SHT_DYNAMIC - - def parse_content(self, sex, size): - self.sex, self.size = sex, size - c = self.content - self.dyntab = [] - self.dynamic = {} - sz = self.sh.entsize - idx = 0 - while len(c) > sz*idx: - s = c[sz*idx:sz*(idx+1)] - idx += 1 - dyn = WDynamic(self, sex, size, s) - self.dyntab.append(dyn) - if isinstance(dyn.name, str): - self[dyn.name] = dyn - - def __setitem__(self, item, value): - if isinstance(item, bytes): - self.dynamic[item] = value - return - if isinstance(item, str): - self.symbols[item.encode()] = value - return - self.dyntab[item] = value - - def __getitem__(self, item): - if isinstance(item, bytes): - return self.dynamic[item] - if isinstance(item, str): - return self.dynamic[item.encode()] - return self.dyntab[item] - - -class StrTable(Section): - sht = elf.SHT_STRTAB - - def parse_content(self, sex, size): - self.sex, self.size = sex, size - self.res = {} - c = self.content - q = 0 - index = 0 - l = len(c) - while index < l: - p = c.find(b"\x00", index) - if p < 0: - log.warning("Missing trailing 0 for string [%s]" % c) # XXX - p = len(c) - index - self.res[index] = c[index:p] - # print q, c[:p] - index = p + 1 - # q += p+1 - # c = c[p+1:] - - def get_name(self, ofs): - return self.content[ofs:self.content.find(b'\x00', start=ofs)] - - def add_name(self, name): - name = force_bytes(name) - name = name + b"\x00" - if name in self.content: - return self.content.find(name) - n = len(self.content) - self.content = bytes(self.content) + name - return n - - def mod_name(self, name, new_name): - s = bytes(self.content) - name_b = b'\x00%s\x00' % name.encode() - if not name_b in s: - raise ValueError('Unknown name %r' % name) - self.content = s.replace( - name_b, - b'\x00%s\x00' % new_name.encode() - ) - return len(self.content) - - -class SymTable(Section): - sht = elf.SHT_SYMTAB - - def parse_content(self, sex, size): - self.sex, self.size = sex, size - c = self.content - self.symtab = [] - self.symbols = {} - sz = self.sh.entsize - index = 0 - l = len(c) - if size == 32: - WSym = WSym32 - elif size == 64: - WSym = WSym64 - else: - ValueError('unknown size') - while index < l: - s = c[index:index + sz] - index += sz - sym = WSym(self, sex, size, s) - self.symtab.append(sym) - self[sym.name] = sym - - def __getitem__(self, item): - if isinstance(item, bytes): - return self.symbols[item] - if isinstance(item, str): - return self.symbols[item.encode()] - return self.symtab[item] - - def __setitem__(self, item, value): - if isinstance(item, bytes): - self.symbols[item] = value - return - if isinstance(item, str): - self.symbols[item.encode()] = value - return - self.symtab[item] = value - - -class DynSymTable(SymTable): - sht = elf.SHT_DYNSYM - - -class RelTable(Section): - sht = elf.SHT_REL - - def parse_content(self, sex, size): - self.sex, self.size = sex, size - if size == 32: - WRel = WRel32 - elif size == 64: - WRel = WRel64 - else: - ValueError('unknown size') - c = self.content - self.reltab = [] - self.rel = {} - sz = self.sh.entsize - - idx = 0 - while len(c) > sz*idx: - s = c[sz*idx:sz*(idx+1)] - idx += 1 - rel = WRel(self, sex, size, s) - self.reltab.append(rel) - if rel.parent.linksection != self.parent.shlist[0]: - self.rel[rel.sym] = rel - - -class RelATable(RelTable): - sht = elf.SHT_RELA - -# Section List - - -class SHList(object): - - def __init__(self, parent, sex, size): - self.parent = parent - self.shlist = [] - ehdr = self.parent.Ehdr - of1 = ehdr.shoff - if not of1: # No SH table - return - for i in range(ehdr.shnum): - of2 = of1 + ehdr.shentsize - shstr = parent[of1:of2] - self.shlist.append(Section(self, sex, size, shstr=shstr)) - of1 = of2 - self._shstr = self.shlist[ehdr.shstrndx] - - for s in self.shlist: - if not isinstance(s, NoBitsSection): - s._content = StrPatchwork( - parent[s.sh.offset: s.sh.offset + s.sh.size] - ) - # Follow dependencies when initializing sections - zero = self.shlist[0] - todo = self.shlist[1:] - done = [] - while todo: - s = todo.pop(0) - if ((s.linksection == zero or s.linksection in done) and - (s.infosection in [zero, None] or s.infosection in done)): - done.append(s) - s.parse_content(sex, size) - else: - todo.append(s) - for s in self.shlist: - self.do_add_section(s) - - def do_add_section(self, section): - n = section.sh.name - if n.startswith(b"."): - n = n[1:] - n = printable(n).replace(".", "_").replace("-", "_") - setattr(self, n, section) # xxx - - def append(self, item): - self.do_add_section(item) - self.shlist.append(item) - - def __getitem__(self, item): - return self.shlist[item] - - def __repr__(self): - rep = ["# section offset size addr flags"] - for i, s in enumerate(self.shlist): - l = "%(name)-15s %(offset)08x %(size)06x %(addr)08x %(flags)x " % s.sh - l = ("%2i " % i) + l + s.__class__.__name__ - rep.append(l) - return "\n".join(rep) - - def __bytes__(self): - return b"".join( - bytes(s.sh) for s in self.shlist - ) - - def __str__(self): - if PY3: - return repr(self) - return bytes(self) - - def resize(self, sec, diff): - for s in self.shlist: - if s.sh.offset > sec.sh.offset: - s.sh.offset += diff - if self.parent.Ehdr.shoff > sec.sh.offset: - self.parent.Ehdr.shoff += diff - if self.parent.Ehdr.phoff > sec.sh.offset: - self.parent.Ehdr.phoff += diff - -# Program Header List - - -class ProgramHeader(object): - - def __init__(self, parent, sex, size, phstr): - self.parent = parent - self.ph = WPhdr(self, sex, size, phstr) - self.shlist = [] - for s in self.parent.parent.sh: - if isinstance(s, NullSection): - continue - if ((isinstance(s, NoBitsSection) and s.sh.offset == self.ph.offset + self.ph.filesz) - or self.ph.offset <= s.sh.offset < self.ph.offset + self.ph.filesz): - s.phparent = self - self.shlist.append(s) - - def resize(self, sec, diff): - self.ph.filesz += diff - self.ph.memsz += diff - self.parent.resize(sec, diff) - - -class ProgramHeader64(object): - - def __init__(self, parent, sex, size, phstr): - self.parent = parent - self.ph = WPhdr64(self, sex, size, phstr) - self.shlist = [] - for s in self.parent.parent.sh: - if isinstance(s, NullSection): - continue - if ((isinstance(s, NoBitsSection) and s.sh.offset == self.ph.offset + self.ph.filesz) - or self.ph.offset <= s.sh.offset < self.ph.offset + self.ph.filesz): - s.phparent = self - self.shlist.append(s) - - def resize(self, sec, diff): - self.ph.filesz += diff - self.ph.memsz += diff - self.parent.resize(sec, diff) - - -class PHList(object): - - def __init__(self, parent, sex, size): - self.parent = parent - self.phlist = [] - ehdr = self.parent.Ehdr - of1 = ehdr.phoff - for i in range(ehdr.phnum): - of2 = of1 + ehdr.phentsize - phstr = parent[of1:of2] - if size == 32: - self.phlist.append(ProgramHeader(self, sex, size, phstr)) - else: - self.phlist.append(ProgramHeader64(self, sex, size, phstr)) - of1 = of2 - - def __getitem__(self, item): - return self.phlist[item] - - def __repr__(self): - r = [" offset filesz vaddr memsz"] - for i, p in enumerate(self.phlist): - l = "%(offset)07x %(filesz)06x %(vaddr)08x %(memsz)07x %(type)02x %(flags)01x" % p.ph - l = ("%2i " % i) + l - r.append(l) - r.append(" " + " ".join(printable(s.sh.name) for s in p.shlist)) - return "\n".join(r) - - def __bytes__(self): - return b"".join( - bytes(p.ph) for p in self.phlist - ) - - def __str__(self): - if PY3: - return repr(self) - return self.__bytes__(self) - - def resize(self, sec, diff): - for p in self.phlist: - if p.ph.offset > sec.sh.offset: - p.ph.offset += diff - if p.ph.vaddr > sec.phparent.ph.vaddr + sec.sh.offset: - p.ph.vaddr += diff - if p.ph.paddr > sec.phparent.ph.paddr + sec.sh.offset: - p.ph.paddr += diff - - -class virt(object): - - def __init__(self, x): - self.parent = x - - def get_rvaitem(self, start, stop=None): - if stop == None: - s = self.parent.getsectionbyvad(start) - if s: - start = start - s.sh.addr - else: - s = self.parent.getphbyvad(start) - if s: - start = start - s.ph.vaddr - if not s: - return [(None, start)] - return [(s, start)] - total_len = stop - start - - virt_item = [] - while total_len: - s = self.parent.getsectionbyvad(start) - if not s: - s = self.parent.getphbyvad(start) - if not s: - raise ValueError('unknown rva address! %x' % start) - if isinstance(s, ProgramHeader) or isinstance(s, ProgramHeader64): - s_max = s.ph.filesz - s_start = start - s.ph.vaddr - s_stop = stop - s.ph.vaddr - else: - s_max = s.sh.size - s_start = start - s.sh.addr - s_stop = stop - s.sh.addr - if s_stop > s_max: - s_stop = s_max - - s_len = s_stop - s_start - if s_len == 0: - raise ValueError('empty section! %x' % start) - total_len -= s_len - start += s_len - n_item = slice(s_start, s_stop) - virt_item.append((s, n_item)) - return virt_item - - def item2virtitem(self, item): - if not type(item) is slice: # integer - return self.get_rvaitem(item) - start = item.start - stop = item.stop - assert(item.step is None) - return self.get_rvaitem(start, stop) - - def get(self, ad_start, ad_stop=None): - rva_items = self.get_rvaitem(ad_start, ad_stop) - data_out = b"" - for s, n_item in rva_items: - if not (isinstance(s, ProgramHeader) or isinstance(s, ProgramHeader64)): - data_out += s.content.__getitem__(n_item) - continue - if not type(n_item) is slice: - n_item = slice(n_item, n_item + 1, 1) - start = n_item.start + s.ph.offset - stop = n_item.stop + s.ph.offset - if n_item.step != None: - step = n_item.step + s.ph.offset - else: - step = None - n_item = slice(start, stop, step) - # data_out += self.parent.content.__s.content.__getitem__(n_item) - data_out += self.parent.content.__getitem__(n_item) - - return data_out - - def set(self, item, data): - if not type(item) is slice: - item = slice(item, item + len(data), None) - virt_item = self.item2virtitem(item) - if not virt_item: - return - off = 0 - for s, n_item in virt_item: - if isinstance(s, ProgBits): - i = slice(off, n_item.stop + off - n_item.start, n_item.step) - - data_slice = data.__getitem__(i) - s.content.__setitem__(n_item, data_slice) - off = i.stop - else: - raise ValueError('TODO XXX') - - return - - def __getitem__(self, item): - if isinstance(item, slice): - assert(item.step is None) - return self.get(item.start, item.stop) - else: - return self.get(item) - - def __setitem__(self, item, data): - if isinstance(item, slice): - rva = item.start - else: - rva = item - self.set(rva, data) - - def max_addr(self): - # the maximum virtual address is found by retrieving the maximum - # possible virtual address, either from the program entries, and - # section entries. if there is no such object, raise an error. - l = 0 - if self.parent.ph.phlist: - for phdr in self.parent.ph.phlist: - l = max(l, phdr.ph.vaddr + phdr.ph.memsz) - if self.parent.sh.shlist: - for shdr in self.parent.sh.shlist: - l = max(l, shdr.sh.addr + shdr.sh.size) - if not l: - raise ValueError('maximum virtual address not found !') - return l - - def is_addr_in(self, ad): - return self.parent.is_in_virt_address(ad) - - def find(self, pattern, start=0): - sections = [] - offset = start - for s in self.parent.ph: - s_max = s.ph.memsz # max(s.ph.filesz, s.ph.memsz) - if offset < s.ph.vaddr + s_max: - sections.append(s) - - if not sections: - return -1 - offset -= sections[0].ph.vaddr - if offset < 0: - offset = 0 - for s in sections: - data = self.parent.content[s.ph.offset:s.ph.offset + s.ph.filesz] - ret = data.find(pattern, offset) - if ret != -1: - return ret + s.ph.vaddr # self.parent.rva2virt(s.addr + ret) - offset = 0 - return -1 - -# ELF object - - -class ELF(object): - - def __init__(self, elfstr): - self._content = elfstr - self.parse_content() - - self._virt = virt(self) - - def get_virt(self): - return self._virt - virt = property(get_virt) - - content = ContentManager() - - def parse_content(self): - h = self.content[:8] - self.size = struct.unpack('B', h[4:5])[0] * 32 - self.sex = struct.unpack('B', h[5:6])[0] - self.Ehdr = WEhdr(self, self.sex, self.size, self.content) - self.sh = SHList(self, self.sex, self.size) - self.ph = PHList(self, self.sex, self.size) - - def resize(self, old, new): - pass - - def __getitem__(self, item): - return self.content[item] - - def build_content(self): - c = StrPatchwork() - c[0] = bytes(self.Ehdr) - c[self.Ehdr.phoff] = bytes(self.ph) - for s in self.sh: - c[s.sh.offset] = bytes(s.content) - c[self.Ehdr.shoff] = bytes(self.sh) - return bytes(c) - - def __bytes__(self): - return self.build_content() - - def __str__(self): - if PY3: - return repr(self) - return bytes(self) - - def getphbyvad(self, ad): - for s in self.ph: - if s.ph.vaddr <= ad < s.ph.vaddr + s.ph.memsz: - return s - - def getsectionbyvad(self, ad): - for s in self.sh: - if s.sh.addr <= ad < s.sh.addr + s.sh.size: - return s - - def getsectionbyname(self, name): - name = force_bytes(name) - for s in self.sh: - try: - if s.sh.name.strip(b'\x00') == name: - return s - except UnicodeDecodeError: - pass - return None - - def is_in_virt_address(self, ad): - for s in self.sh: - if s.sh.addr <= ad < s.sh.addr + s.sh.size: - return True - return False diff --git a/miasm/elfesteem/minidump.py b/miasm/elfesteem/minidump.py deleted file mode 100644 index ee2be8a0..00000000 --- a/miasm/elfesteem/minidump.py +++ /dev/null @@ -1,545 +0,0 @@ -"""Constants and structures associated to Minidump format -Based on: http://amnesia.gtisc.gatech.edu/~moyix/minidump.py -""" -from future.utils import viewitems - -from future.builtins import int as int_types -from miasm.elfesteem.new_cstruct import CStruct - -class Enumeration(object): - """Stand for an enumeration type""" - - def __init__(self, enum_info): - """enum_info: {name: value}""" - self._enum_info = enum_info - self._inv_info = dict((v, k) for k, v in viewitems(enum_info)) - - def __getitem__(self, key): - """Helper: assume that string is for key, integer is for value""" - if isinstance(key, int_types): - return self._inv_info[key] - return self._enum_info[key] - - def __getattr__(self, key): - if key in self._enum_info: - return self._enum_info[key] - raise AttributeError - - def from_value(self, value): - return self._inv_info[value] - - -class Rva(CStruct): - """Relative Virtual Address - Note: RVA in Minidump means "file offset" - """ - _fields = [("rva", "u32"), - ] - - -minidumpType = Enumeration({ - # MINIDUMP_TYPE - # https://msdn.microsoft.com/en-us/library/ms680519(v=vs.85).aspx - "MiniDumpNormal" : 0x00000000, - "MiniDumpWithDataSegs" : 0x00000001, - "MiniDumpWithFullMemory" : 0x00000002, - "MiniDumpWithHandleData" : 0x00000004, - "MiniDumpFilterMemory" : 0x00000008, - "MiniDumpScanMemory" : 0x00000010, - "MiniDumpWithUnloadedModules" : 0x00000020, - "MiniDumpWithIndirectlyReferencedMemory" : 0x00000040, - "MiniDumpFilterModulePaths" : 0x00000080, - "MiniDumpWithProcessThreadData" : 0x00000100, - "MiniDumpWithPrivateReadWriteMemory" : 0x00000200, - "MiniDumpWithoutOptionalData" : 0x00000400, - "MiniDumpWithFullMemoryInfo" : 0x00000800, - "MiniDumpWithThreadInfo" : 0x00001000, - "MiniDumpWithCodeSegs" : 0x00002000, - "MiniDumpWithoutAuxiliaryState" : 0x00004000, - "MiniDumpWithFullAuxiliaryState" : 0x00008000, - "MiniDumpWithPrivateWriteCopyMemory" : 0x00010000, - "MiniDumpIgnoreInaccessibleMemory" : 0x00020000, - "MiniDumpWithTokenInformation" : 0x00040000, - "MiniDumpWithModuleHeaders" : 0x00080000, - "MiniDumpFilterTriage" : 0x00100000, - "MiniDumpValidTypeFlags" : 0x001fffff, -}) - -class MinidumpHDR(CStruct): - """MINIDUMP_HEADER - https://msdn.microsoft.com/en-us/library/ms680378(VS.85).aspx - """ - _fields = [("Magic", "u32"), # MDMP - ("Version", "u16"), - ("ImplementationVersion", "u16"), - ("NumberOfStreams", "u32"), - ("StreamDirectoryRva", "Rva"), - ("Checksum", "u32"), - ("TimeDateStamp", "u32"), - ("Flags", "u32") - ] - -class LocationDescriptor(CStruct): - """MINIDUMP_LOCATION_DESCRIPTOR - https://msdn.microsoft.com/en-us/library/ms680383(v=vs.85).aspx - """ - _fields = [("DataSize", "u32"), - ("Rva", "Rva"), - ] - - -streamType = Enumeration({ - # MINIDUMP_STREAM_TYPE - # https://msdn.microsoft.com/en-us/library/ms680394(v=vs.85).aspx - "UnusedStream" : 0, - "ReservedStream0" : 1, - "ReservedStream1" : 2, - "ThreadListStream" : 3, - "ModuleListStream" : 4, - "MemoryListStream" : 5, - "ExceptionStream" : 6, - "SystemInfoStream" : 7, - "ThreadExListStream" : 8, - "Memory64ListStream" : 9, - "CommentStreamA" : 10, - "CommentStreamW" : 11, - "HandleDataStream" : 12, - "FunctionTableStream" : 13, - "UnloadedModuleListStream" : 14, - "MiscInfoStream" : 15, - "MemoryInfoListStream" : 16, - "ThreadInfoListStream" : 17, - "HandleOperationListStream" : 18, - "LastReservedStream" : 0xffff, -}) - -class StreamDirectory(CStruct): - """MINIDUMP_DIRECTORY - https://msdn.microsoft.com/en-us/library/ms680365(VS.85).aspx - """ - _fields = [("StreamType", "u32"), - ("Location", "LocationDescriptor"), - ] - - @property - def pretty_name(self): - return streamType[self.StreamType] - - -class FixedFileInfo(CStruct): - """VS_FIXEDFILEINFO - https://msdn.microsoft.com/en-us/library/ms646997(v=vs.85).aspx - """ - _fields = [("dwSignature", "u32"), - ("dwStrucVersion", "u32"), - ("dwFileVersionMS", "u32"), - ("dwFileVersionLS", "u32"), - ("dwProductVersionMS", "u32"), - ("dwProductVersionLS", "u32"), - ("dwFileFlagsMask", "u32"), - ("dwFileFlags", "u32"), - ("dwFileOS", "u32"), - ("dwFileType", "u32"), - ("dwFileSubtype", "u32"), - ("dwFileDateMS", "u32"), - ("dwFileDateLS", "u32"), - ] - -class MinidumpString(CStruct): - """MINIDUMP_STRING - https://msdn.microsoft.com/en-us/library/ms680395(v=vs.85).aspx - """ - _fields = [("Length", "u32"), - ("Buffer", "u08", lambda string:string.Length), - ] - -class Module(CStruct): - """MINIDUMP_MODULE - https://msdn.microsoft.com/en-us/library/ms680392(v=vs.85).aspx - """ - _fields = [("BaseOfImage", "u64"), - ("SizeOfImage", "u32"), - ("CheckSum", "u32"), - ("TimeDateStamp", "u32"), - ("ModuleNameRva", "Rva"), - ("VersionInfo", "FixedFileInfo"), - ("CvRecord", "LocationDescriptor"), - ("MiscRecord", "LocationDescriptor"), - ("Reserved0", "u64"), - ("Reserved1", "u64"), - ] - - -class ModuleList(CStruct): - """MINIDUMP_MODULE_LIST - https://msdn.microsoft.com/en-us/library/ms680391(v=vs.85).aspx - """ - _fields = [("NumberOfModules", "u32"), - ("Modules", "Module", lambda mlist:mlist.NumberOfModules), - ] - - -class MemoryDescriptor64(CStruct): - """MINIDUMP_MEMORY_DESCRIPTOR64 - https://msdn.microsoft.com/en-us/library/ms680384(v=vs.85).aspx - """ - _fields = [("StartOfMemoryRange", "u64"), - ("DataSize", "u64") - ] - - -class Memory64List(CStruct): - """MINIDUMP_MEMORY64_LIST - https://msdn.microsoft.com/en-us/library/ms680387(v=vs.85).aspx - """ - _fields = [("NumberOfMemoryRanges", "u64"), - ("BaseRva", "u64"), - ("MemoryRanges", "MemoryDescriptor64", - lambda mlist:mlist.NumberOfMemoryRanges), - ] - -class MemoryDescriptor(CStruct): - """MINIDUMP_MEMORY_DESCRIPTOR - https://msdn.microsoft.com/en-us/library/ms680384(v=vs.85).aspx - """ - _fields = [("StartOfMemoryRange", "u64"), - ("Memory", "LocationDescriptor"), - ] - -class MemoryList(CStruct): - """MINIDUMP_MEMORY_LIST - https://msdn.microsoft.com/en-us/library/ms680387(v=vs.85).aspx - """ - _fields = [("NumberOfMemoryRanges", "u32"), - ("MemoryRanges", "MemoryDescriptor", - lambda mlist:mlist.NumberOfMemoryRanges), - ] - -memProtect = Enumeration({ - # MEM PROTECT - # https://msdn.microsoft.com/en-us/library/aa366786(v=vs.85).aspx - "PAGE_NOACCESS" : 0x0001, - "PAGE_READONLY" : 0x0002, - "PAGE_READWRITE" : 0x0004, - "PAGE_WRITECOPY" : 0x0008, - "PAGE_EXECUTE" : 0x0010, - "PAGE_EXECUTE_READ" : 0x0020, - "PAGE_EXECUTE_READWRITE" : 0x0040, - "PAGE_EXECUTE_WRITECOPY" : 0x0080, - "PAGE_GUARD" : 0x0100, - "PAGE_NOCACHE" : 0x0200, - "PAGE_WRITECOMBINE" : 0x0400, -}) - -class MemoryInfo(CStruct): - """MINIDUMP_MEMORY_INFO - https://msdn.microsoft.com/en-us/library/ms680386(v=vs.85).aspx - """ - _fields = [("BaseAddress", "u64"), - ("AllocationBase", "u64"), - ("AllocationProtect", "u32"), - ("__alignment1", "u32"), - ("RegionSize", "u64"), - ("State", "u32"), - ("Protect", "u32"), - ("Type", "u32"), - ("__alignment2", "u32"), - ] - -class MemoryInfoList(CStruct): - """MINIDUMP_MEMORY_INFO_LIST - https://msdn.microsoft.com/en-us/library/ms680385(v=vs.85).aspx - """ - _fields = [("SizeOfHeader", "u32"), - ("SizeOfEntry", "u32"), - ("NumberOfEntries", "u64"), - # Fake field, for easy access to MemoryInfo elements - ("MemoryInfos", "MemoryInfo", - lambda mlist: mlist.NumberOfEntries), - ] - - -contextFlags_x86 = Enumeration({ - "CONTEXT_i386" : 0x00010000, - "CONTEXT_CONTROL" : 0x00010001, - "CONTEXT_INTEGER" : 0x00010002, - "CONTEXT_SEGMENTS" : 0x00010004, - "CONTEXT_FLOATING_POINT" : 0x00010008, - "CONTEXT_DEBUG_REGISTERS" : 0x00010010, - "CONTEXT_EXTENDED_REGISTERS" : 0x00010020, -}) - -class FloatingSaveArea(CStruct): - """FLOATING_SAVE_AREA - http://terminus.rewolf.pl/terminus/structures/ntdll/_FLOATING_SAVE_AREA_x86.html - """ - _fields = [("ControlWord", "u32"), - ("StatusWord", "u32"), - ("TagWord", "u32"), - ("ErrorOffset", "u32"), - ("ErrorSelector", "u32"), - ("DataOffset", "u32"), - ("DataSelector", "u32"), - ("RegisterArea", "80s"), - ("Cr0NpxState", "u32"), - ] - -class Context_x86(CStruct): - """CONTEXT x86 - https://msdn.microsoft.com/en-us/en-en/library/ms679284(v=vs.85).aspx - http://terminus.rewolf.pl/terminus/structures/ntdll/_CONTEXT_x86.html - """ - - MAXIMUM_SUPPORTED_EXTENSION = 512 - - def is_activated(flag): - mask = contextFlags_x86[flag] - def check_context(ctx): - if (ctx.ContextFlags & mask == mask): - return 1 - return 0 - return check_context - - _fields = [("ContextFlags", "u32"), - # DebugRegisters - ("Dr0", "u32", is_activated("CONTEXT_DEBUG_REGISTERS")), - ("Dr1", "u32", is_activated("CONTEXT_DEBUG_REGISTERS")), - ("Dr2", "u32", is_activated("CONTEXT_DEBUG_REGISTERS")), - ("Dr3", "u32", is_activated("CONTEXT_DEBUG_REGISTERS")), - ("Dr6", "u32", is_activated("CONTEXT_DEBUG_REGISTERS")), - ("Dr7", "u32", is_activated("CONTEXT_DEBUG_REGISTERS")), - - ("FloatSave", "FloatingSaveArea", - is_activated("CONTEXT_FLOATING_POINT")), - - # SegmentRegisters - ("SegGs", "u32", is_activated("CONTEXT_SEGMENTS")), - ("SegFs", "u32", is_activated("CONTEXT_SEGMENTS")), - ("SegEs", "u32", is_activated("CONTEXT_SEGMENTS")), - ("SegDs", "u32", is_activated("CONTEXT_SEGMENTS")), - # IntegerRegisters - ("Edi", "u32", is_activated("CONTEXT_INTEGER")), - ("Esi", "u32", is_activated("CONTEXT_INTEGER")), - ("Ebx", "u32", is_activated("CONTEXT_INTEGER")), - ("Edx", "u32", is_activated("CONTEXT_INTEGER")), - ("Ecx", "u32", is_activated("CONTEXT_INTEGER")), - ("Eax", "u32", is_activated("CONTEXT_INTEGER")), - # ControlRegisters - ("Ebp", "u32", is_activated("CONTEXT_CONTROL")), - ("Eip", "u32", is_activated("CONTEXT_CONTROL")), - ("SegCs", "u32", is_activated("CONTEXT_CONTROL")), - ("EFlags", "u32", is_activated("CONTEXT_CONTROL")), - ("Esp", "u32", is_activated("CONTEXT_CONTROL")), - ("SegSs", "u32", is_activated("CONTEXT_CONTROL")), - - ("ExtendedRegisters", "%ds" % MAXIMUM_SUPPORTED_EXTENSION, - is_activated("CONTEXT_EXTENDED_REGISTERS")), - ] - - -contextFlags_AMD64 = Enumeration({ - "CONTEXT_AMD64" : 0x00100000, - "CONTEXT_CONTROL" : 0x00100001, - "CONTEXT_INTEGER" : 0x00100002, - "CONTEXT_SEGMENTS" : 0x00100004, - "CONTEXT_FLOATING_POINT" : 0x00100008, - "CONTEXT_DEBUG_REGISTERS" : 0x00100010, - "CONTEXT_XSTATE" : 0x00100020, - "CONTEXT_EXCEPTION_ACTIVE" : 0x08000000, - "CONTEXT_SERVICE_ACTIVE" : 0x10000000, - "CONTEXT_EXCEPTION_REQUEST" : 0x40000000, - "CONTEXT_EXCEPTION_REPORTING" : 0x80000000, -}) - - -class M128A(CStruct): - """M128A - http://terminus.rewolf.pl/terminus/structures/ntdll/_M128A_x64.html - """ - _fields = [("Low", "u64"), - ("High", "u64"), - ] - -class Context_AMD64(CStruct): - """CONTEXT AMD64 - https://github.com/duarten/Threadjack/blob/master/WinNT.h - """ - - def is_activated(flag): - mask = contextFlags_AMD64[flag] - def check_context(ctx): - if (ctx.ContextFlags & mask == mask): - return 1 - return 0 - return check_context - - _fields = [ - - # Only used for Convenience - ("P1Home", "u64"), - ("P2Home", "u64"), - ("P3Home", "u64"), - ("P4Home", "u64"), - ("P5Home", "u64"), - ("P6Home", "u64"), - - # Control - ("ContextFlags", "u32"), - ("MxCsr", "u32"), - - # Segment & processor - # /!\ activation depends on multiple flags - ("SegCs", "u16", is_activated("CONTEXT_CONTROL")), - ("SegDs", "u16", is_activated("CONTEXT_SEGMENTS")), - ("SegEs", "u16", is_activated("CONTEXT_SEGMENTS")), - ("SegFs", "u16", is_activated("CONTEXT_SEGMENTS")), - ("SegGs", "u16", is_activated("CONTEXT_SEGMENTS")), - ("SegSs", "u16", is_activated("CONTEXT_CONTROL")), - ("EFlags", "u32", is_activated("CONTEXT_CONTROL")), - - # Debug registers - ("Dr0", "u64", is_activated("CONTEXT_DEBUG_REGISTERS")), - ("Dr1", "u64", is_activated("CONTEXT_DEBUG_REGISTERS")), - ("Dr2", "u64", is_activated("CONTEXT_DEBUG_REGISTERS")), - ("Dr3", "u64", is_activated("CONTEXT_DEBUG_REGISTERS")), - ("Dr6", "u64", is_activated("CONTEXT_DEBUG_REGISTERS")), - ("Dr7", "u64", is_activated("CONTEXT_DEBUG_REGISTERS")), - - # Integer registers - # /!\ activation depends on multiple flags - ("Rax", "u64", is_activated("CONTEXT_INTEGER")), - ("Rcx", "u64", is_activated("CONTEXT_INTEGER")), - ("Rdx", "u64", is_activated("CONTEXT_INTEGER")), - ("Rbx", "u64", is_activated("CONTEXT_INTEGER")), - ("Rsp", "u64", is_activated("CONTEXT_CONTROL")), - ("Rbp", "u64", is_activated("CONTEXT_INTEGER")), - ("Rsi", "u64", is_activated("CONTEXT_INTEGER")), - ("Rdi", "u64", is_activated("CONTEXT_INTEGER")), - ("R8", "u64", is_activated("CONTEXT_INTEGER")), - ("R9", "u64", is_activated("CONTEXT_INTEGER")), - ("R10", "u64", is_activated("CONTEXT_INTEGER")), - ("R11", "u64", is_activated("CONTEXT_INTEGER")), - ("R12", "u64", is_activated("CONTEXT_INTEGER")), - ("R13", "u64", is_activated("CONTEXT_INTEGER")), - ("R14", "u64", is_activated("CONTEXT_INTEGER")), - ("R15", "u64", is_activated("CONTEXT_INTEGER")), - ("Rip", "u64", is_activated("CONTEXT_CONTROL")), - - # Floating point - ("Header", "M128A", lambda ctx: 2), - ("Legacy", "M128A", lambda ctx: 8), - ("Xmm0", "M128A"), - ("Xmm1", "M128A"), - ("Xmm2", "M128A"), - ("Xmm3", "M128A"), - ("Xmm4", "M128A"), - ("Xmm5", "M128A"), - ("Xmm6", "M128A"), - ("Xmm7", "M128A"), - ("Xmm8", "M128A"), - ("Xmm9", "M128A"), - ("Xmm10", "M128A"), - ("Xmm11", "M128A"), - ("Xmm12", "M128A"), - ("Xmm13", "M128A"), - ("Xmm14", "M128A"), - ("Xmm15", "M128A"), - - - # Vector registers - ("VectorRegister", "M128A", lambda ctx: 16), - ("VectorControl", "u64"), - - # Special debug control regs - ("DebugControl", "u64"), - ("LastBranchToRip", "u64"), - ("LastBranchFromRip", "u64"), - ("LastExceptionToRip", "u64"), - ("LastExceptionFromRip", "u64"), - ] - -processorArchitecture = Enumeration({ - "PROCESSOR_ARCHITECTURE_X86" : 0, - "PROCESSOR_ARCHITECTURE_MIPS" : 1, - "PROCESSOR_ARCHITECTURE_ALPHA" : 2, - "PROCESSOR_ARCHITECTURE_PPC" : 3, - "PROCESSOR_ARCHITECTURE_SHX" : 4, - "PROCESSOR_ARCHITECTURE_ARM" : 5, - "PROCESSOR_ARCHITECTURE_IA64" : 6, - "PROCESSOR_ARCHITECTURE_ALPHA64" : 7, - "PROCESSOR_ARCHITECTURE_MSIL" : 8, - "PROCESSOR_ARCHITECTURE_AMD64" : 9, - "PROCESSOR_ARCHITECTURE_X86_WIN64" : 10, - "PROCESSOR_ARCHITECTURE_UNKNOWN" : 0xffff, -}) - -class Thread(CStruct): - """MINIDUMP_THREAD - https://msdn.microsoft.com/en-us/library/ms680517(v=vs.85).aspx - """ - - arch2context_cls = { - processorArchitecture.PROCESSOR_ARCHITECTURE_X86: Context_x86, - processorArchitecture.PROCESSOR_ARCHITECTURE_AMD64: Context_AMD64, - } - - def parse_context(self, content, offset): - loc_desc = LocationDescriptor.unpack(content, offset, self.parent_head) - - # Use the correct context depending on architecture - systeminfo = self.parent_head.systeminfo - context_cls = self.arch2context_cls.get(systeminfo.ProcessorArchitecture, - None) - if context_cls is None: - raise ValueError("Unsupported architecture: %s" % systeminfo.pretty_processor_architecture) - - ctxt = context_cls.unpack(content, loc_desc.Rva.rva, self.parent_head) - fake_loc_descriptor = LocationDescriptor(DataSize=0, Rva=Rva(rva=0)) - return ctxt, offset + len(fake_loc_descriptor) - - _fields = [("ThreadId", "u32"), - ("SuspendCount", "u32"), - ("PriorityClass", "u32"), - ("Priority", "u32"), - ("Teb", "u64"), - ("Stack", "MemoryDescriptor"), - ("ThreadContext", (parse_context, - lambda thread, value: NotImplemented)), - ] - -class ThreadList(CStruct): - """MINIDUMP_THREAD_LIST - https://msdn.microsoft.com/en-us/library/ms680515(v=vs.85).aspx - """ - _fields = [("NumberOfThreads", "u32"), - ("Threads", "Thread", - lambda mlist: mlist.NumberOfThreads), - ] - - -class SystemInfo(CStruct): - """MINIDUMP_SYSTEM_INFO - https://msdn.microsoft.com/en-us/library/ms680396(v=vs.85).aspx - """ - _fields = [("ProcessorArchitecture", "u16"), - ("ProcessorLevel", "u16"), - ("ProcessorRevision", "u16"), - ("NumberOfProcessors", "u08"), - ("ProductType", "u08"), - ("MajorVersion", "u32"), - ("MinorVersion", "u32"), - ("BuildNumber", "u32"), - ("PlatformId", "u32"), - ("CSDVersionRva", "Rva"), - ("SuiteMask", "u16"), - ("Reserved2", "u16"), - ("VendorId", "u32", lambda sinfo: 3), - ("VersionInformation", "u32"), - ("FeatureInformation", "u32"), - ("AMDExtendedCpuFeatures", "u32"), - ] - - @property - def pretty_processor_architecture(self): - return processorArchitecture[self.ProcessorArchitecture] - diff --git a/miasm/elfesteem/minidump_init.py b/miasm/elfesteem/minidump_init.py deleted file mode 100644 index 0a9022b9..00000000 --- a/miasm/elfesteem/minidump_init.py +++ /dev/null @@ -1,194 +0,0 @@ -""" -High-level abstraction of Minidump file -""" -from builtins import range -import struct - -from miasm.elfesteem.strpatchwork import StrPatchwork -from miasm.elfesteem import minidump as mp - - -class MemorySegment(object): - """Stand for a segment in memory with additional information""" - - def __init__(self, offset, memory_desc, module=None, memory_info=None): - self.offset = offset - self.memory_desc = memory_desc - self.module = module - self.memory_info = memory_info - self.minidump = self.memory_desc.parent_head - - @property - def address(self): - return self.memory_desc.StartOfMemoryRange - - @property - def size(self): - if isinstance(self.memory_desc, mp.MemoryDescriptor64): - return self.memory_desc.DataSize - elif isinstance(self.memory_desc, mp.MemoryDescriptor): - return self.memory_desc.Memory.DataSize - raise TypeError - - @property - def name(self): - if not self.module: - return "" - name = mp.MinidumpString.unpack(self.minidump._content, - self.module.ModuleNameRva.rva, - self.minidump) - return b"".join( - struct.pack("B", x) for x in name.Buffer - ).decode("utf-16") - - @property - def content(self): - return self.minidump._content[self.offset:self.offset + self.size] - - @property - def protect(self): - if self.memory_info: - return self.memory_info.Protect - return None - - @property - def pretty_protect(self): - if self.protect is None: - return "UNKNOWN" - return mp.memProtect[self.protect] - - -class Minidump(object): - """Stand for a Minidump file - - Here is a few limitation: - - only < 4GB Minidump are supported (LocationDescriptor handling) - - only Stream relative to memory mapping are implemented - - Official description is available on MSDN: - https://msdn.microsoft.com/en-us/library/ms680378(VS.85).aspx - """ - - _sex = 0 - _wsize = 32 - - def __init__(self, minidump_str): - self._content = StrPatchwork(minidump_str) - - # Specific streams - self.modulelist = None - self.memory64list = None - self.memorylist = None - self.memoryinfolist = None - self.systeminfo = None - - # Get information - self.streams = [] - self.threads = None - self.parse_content() - - # Memory information - self.memory = {} # base address (virtual) -> Memory information - self.build_memory() - - def parse_content(self): - """Build structures corresponding to current content""" - - # Header - offset = 0 - self.minidumpHDR = mp.MinidumpHDR.unpack(self._content, offset, self) - assert self.minidumpHDR.Magic == 0x504d444d - - # Streams - base_offset = self.minidumpHDR.StreamDirectoryRva.rva - empty_stream = mp.StreamDirectory( - StreamType=0, - Location=mp.LocationDescriptor( - DataSize=0, - Rva=mp.Rva(rva=0) - ) - ) - streamdir_size = len(empty_stream) - for i in range(self.minidumpHDR.NumberOfStreams): - stream_offset = base_offset + i * streamdir_size - stream = mp.StreamDirectory.unpack(self._content, stream_offset, self) - self.streams.append(stream) - - # Launch specific action depending on the stream - datasize = stream.Location.DataSize - offset = stream.Location.Rva.rva - if stream.StreamType == mp.streamType.ModuleListStream: - self.modulelist = mp.ModuleList.unpack(self._content, offset, self) - elif stream.StreamType == mp.streamType.MemoryListStream: - self.memorylist = mp.MemoryList.unpack(self._content, offset, self) - elif stream.StreamType == mp.streamType.Memory64ListStream: - self.memory64list = mp.Memory64List.unpack(self._content, offset, self) - elif stream.StreamType == mp.streamType.MemoryInfoListStream: - self.memoryinfolist = mp.MemoryInfoList.unpack(self._content, offset, self) - elif stream.StreamType == mp.streamType.SystemInfoStream: - self.systeminfo = mp.SystemInfo.unpack(self._content, offset, self) - - # Some streams need the SystemInfo stream to work - for stream in self.streams: - datasize = stream.Location.DataSize - offset = stream.Location.Rva.rva - if (self.systeminfo is not None and - stream.StreamType == mp.streamType.ThreadListStream): - self.threads = mp.ThreadList.unpack(self._content, offset, self) - - - def build_memory(self): - """Build an easier to use memory view based on ModuleList and - Memory64List streams""" - - addr2module = dict((module.BaseOfImage, module) - for module in (self.modulelist.Modules if - self.modulelist else [])) - addr2meminfo = dict((memory.BaseAddress, memory) - for memory in (self.memoryinfolist.MemoryInfos if - self.memoryinfolist else [])) - - mode64 = self.minidumpHDR.Flags & mp.minidumpType.MiniDumpWithFullMemory - - if mode64: - offset = self.memory64list.BaseRva - memranges = self.memory64list.MemoryRanges - else: - memranges = self.memorylist.MemoryRanges - - for memory in memranges: - if not mode64: - offset = memory.Memory.Rva.rva - - # Create a MemorySegment with augmented information - base_address = memory.StartOfMemoryRange - module = addr2module.get(base_address, None) - meminfo = addr2meminfo.get(base_address, None) - self.memory[base_address] = MemorySegment(offset, memory, - module, meminfo) - - if mode64: - offset += memory.DataSize - - # Sanity check - if mode64: - assert all(addr in self.memory for addr in addr2module) - - def get(self, virt_start, virt_stop): - """Return the content at the (virtual addresses) - [virt_start:virt_stop]""" - - # Find the corresponding memory segment - for addr in self.memory: - if virt_start <= addr <= virt_stop: - break - else: - return b"" - - memory = self.memory[addr] - shift = addr - virt_start - last = virt_stop - addr - if last > memory.size: - raise RuntimeError("Multi-page not implemented") - - return self._content[memory.offset + shift:memory.offset + last] diff --git a/miasm/elfesteem/new_cstruct.py b/miasm/elfesteem/new_cstruct.py deleted file mode 100644 index ec591aa8..00000000 --- a/miasm/elfesteem/new_cstruct.py +++ /dev/null @@ -1,265 +0,0 @@ -#! /usr/bin/env python - -from __future__ import print_function -import re -import struct - -from future.utils import PY3, viewitems, with_metaclass - -type2realtype = {} -size2type = {} -size2type_s = {} - -for t in 'B', 'H', 'I', 'Q': - s = struct.calcsize(t) - type2realtype[t] = s * 8 - size2type[s * 8] = t - -for t in 'b', 'h', 'i', 'q': - s = struct.calcsize(t) - type2realtype[t] = s * 8 - size2type_s[s * 8] = t - -type2realtype['u08'] = size2type[8] -type2realtype['u16'] = size2type[16] -type2realtype['u32'] = size2type[32] -type2realtype['u64'] = size2type[64] - -type2realtype['s08'] = size2type_s[8] -type2realtype['s16'] = size2type_s[16] -type2realtype['s32'] = size2type_s[32] -type2realtype['s64'] = size2type_s[64] - -type2realtype['d'] = 'd' -type2realtype['f'] = 'f' -type2realtype['q'] = 'q' -type2realtype['ptr'] = 'ptr' - -sex_types = {0: '<', 1: '>'} - - -def fix_size(fields, wsize): - out = [] - for name, v in fields: - if v.endswith("s"): - pass - elif v == "ptr": - v = size2type[wsize] - elif not v in type2realtype: - raise ValueError("unknown Cstruct type", v) - else: - v = type2realtype[v] - out.append((name, v)) - fields = out - return fields - - -def real_fmt(fmt, wsize): - if fmt == "ptr": - v = size2type[wsize] - elif fmt in type2realtype: - v = type2realtype[fmt] - else: - v = fmt - return v - -all_cstructs = {} - - -class Cstruct_Metaclass(type): - field_suffix = "_value" - - def __new__(cls, name, bases, dct): - for fields in dct['_fields']: - fname = fields[0] - if fname in ['parent', 'parent_head']: - raise ValueError('field name will confuse internal structs', - repr(fname)) - dct[fname] = property(dct.pop("get_" + fname, - lambda self, fname=fname: getattr( - self, fname + self.__class__.field_suffix)), - dct.pop("set_" + fname, - lambda self, v, fname=fname: setattr( - self, fname + self.__class__.field_suffix, v)), - dct.pop("del_" + fname, None)) - - o = super(Cstruct_Metaclass, cls).__new__(cls, name, bases, dct) - if name != "CStruct": - all_cstructs[name] = o - return o - - def unpack_l(cls, s, off=0, parent_head=None, _sex=None, _wsize=None): - if _sex is None and _wsize is None: - # get sex and size from parent - if parent_head is not None: - _sex = parent_head._sex - _wsize = parent_head._wsize - else: - _sex = 0 - _wsize = 32 - c = cls(_sex=_sex, _wsize=_wsize) - if parent_head is None: - parent_head = c - c.parent_head = parent_head - - of1 = off - for field in c._fields: - cpt = None - if len(field) == 2: - fname, ffmt = field - elif len(field) == 3: - fname, ffmt, cpt = field - if ffmt in type2realtype or (isinstance(ffmt, str) and re.match(r'\d+s', ffmt)): - # basic types - if cpt: - value = [] - i = 0 - while i < cpt(c): - fmt = real_fmt(ffmt, _wsize) - of2 = of1 + struct.calcsize(fmt) - value.append(struct.unpack(c.sex + fmt, s[of1:of2])[0]) - of1 = of2 - i += 1 - else: - fmt = real_fmt(ffmt, _wsize) - of2 = of1 + struct.calcsize(fmt) - if not (0 <= of1 < len(s) and 0 <= of2 < len(s)): - raise RuntimeError("not enough data") - value = struct.unpack(c.sex + fmt, s[of1:of2])[0] - elif ffmt == "sz": # null terminated special case - of2 = s.find(b'\x00', of1) - if of2 == -1: - raise ValueError('no null char in string!') - of2 += 1 - value = s[of1:of2 - 1] - elif ffmt in all_cstructs: - of2 = of1 - # sub structures - if cpt: - value = [] - i = 0 - while i < cpt(c): - v, l = all_cstructs[ffmt].unpack_l( - s, of1, parent_head, _sex, _wsize) - v.parent = c - value.append(v) - of2 = of1 + l - of1 = of2 - i += 1 - else: - value, l = all_cstructs[ffmt].unpack_l( - s, of1, parent_head, _sex, _wsize) - value.parent = c - of2 = of1 + l - elif isinstance(ffmt, tuple): - f_get, f_set = ffmt - value, of2 = f_get(c, s, of1) - else: - raise ValueError('unknown class', ffmt) - of1 = of2 - setattr(c, fname + c.__class__.field_suffix, value) - - return c, of2 - off - - def unpack(cls, s, off=0, parent_head=None, _sex=None, _wsize=None): - c, l = cls.unpack_l(s, off=off, - parent_head=parent_head, _sex=_sex, _wsize=_wsize) - return c - - -class CStruct(with_metaclass(Cstruct_Metaclass, object)): - _packformat = "" - _fields = [] - - def __init__(self, parent_head=None, _sex=None, _wsize=None, **kargs): - self.parent_head = parent_head - self._size = None - kargs = dict(kargs) - # if not sex or size: get the one of the parent - if _sex == None and _wsize == None: - if parent_head: - _sex = parent_head._sex - _wsize = parent_head._wsize - else: - # else default sex & size - _sex = 0 - _wsize = 32 - # _sex is 0 or 1, sex is '<' or '>' - self._sex = _sex - self._wsize = _wsize - if self._packformat: - self.sex = self._packformat - else: - self.sex = sex_types[_sex] - for f in self._fields: - setattr(self, f[0] + self.__class__.field_suffix, None) - if kargs: - for k, v in viewitems(kargs): - self.__dict__[k + self.__class__.field_suffix] = v - - def pack(self): - out = b'' - for field in self._fields: - cpt = None - if len(field) == 2: - fname, ffmt = field - elif len(field) == 3: - fname, ffmt, cpt = field - - value = getattr(self, fname + self.__class__.field_suffix) - if ffmt in type2realtype or (isinstance(ffmt, str) and re.match(r'\d+s', ffmt)): - # basic types - fmt = real_fmt(ffmt, self._wsize) - if cpt == None: - if value == None: - o = struct.calcsize(fmt) * b"\x00" - else: - if isinstance(value, str): - value = value.encode() - o = struct.pack(self.sex + fmt, value) - else: - o = b"" - for v in value: - if value == None: - o += struct.calcsize(fmt) * b"\x00" - else: - o += struct.pack(self.sex + fmt, v) - - elif ffmt == "sz": # null terminated special case - o = value + b'\x00' - elif ffmt in all_cstructs: - # sub structures - if cpt == None: - o = bytes(value) - else: - o = b"" - for v in value: - o += bytes(v) - elif isinstance(ffmt, tuple): - f_get, f_set = ffmt - o = f_set(self, value) - - else: - raise ValueError('unknown class', ffmt) - out += o - - return out - - def __bytes__(self): - return self.pack() - - def __str__(self): - if PY3: - return repr(self) - return self.__bytes__() - - def __len__(self): - return len(self.pack()) - - def __repr__(self): - return "<%s=%s>" % (self.__class__.__name__, "/".join( - repr(getattr(self, x[0])) for x in self._fields) - ) - - def __getitem__(self, item): # to work with format strings - return getattr(self, item) diff --git a/miasm/elfesteem/pe.py b/miasm/elfesteem/pe.py deleted file mode 100644 index 56bffbaa..00000000 --- a/miasm/elfesteem/pe.py +++ /dev/null @@ -1,1668 +0,0 @@ -#! /usr/bin/env python - -from __future__ import print_function -from builtins import range, str -from collections import defaultdict -import logging -import struct - -from future.builtins import int as int_types -from future.utils import PY3 - -from miasm.core.utils import force_bytes -from miasm.elfesteem.new_cstruct import CStruct -from miasm.elfesteem.strpatchwork import StrPatchwork - -log = logging.getLogger("pepy") -console_handler = logging.StreamHandler() -console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) -log.addHandler(console_handler) -log.setLevel(logging.WARN) - - -class InvalidOffset(Exception): - pass - - -class Doshdr(CStruct): - _fields = [("magic", "u16"), - ("cblp", "u16"), - ("cp", "u16"), - ("crlc", "u16"), - ("cparhdr", "u16"), - ("minalloc", "u16"), - ("maxalloc", "u16"), - ("ss", "u16"), - ("sp", "u16"), - ("csum", "u16"), - ("ip", "u16"), - ("cs", "u16"), - ("lfarlc", "u16"), - ("ovno", "u16"), - ("res", "8s"), - ("oemid", "u16"), - ("oeminfo", "u16"), - ("res2", "20s"), - ("lfanew", "u32")] - - -class NTsig(CStruct): - _fields = [("signature", "u32"), - ] - - -class Coffhdr(CStruct): - _fields = [("machine", "u16"), - ("numberofsections", "u16"), - ("timedatestamp", "u32"), - ("pointertosymboltable", "u32"), - ("numberofsymbols", "u32"), - ("sizeofoptionalheader", "u16"), - ("characteristics", "u16")] - - -class Optehdr(CStruct): - _fields = [("rva", "u32"), - ("size", "u32")] - - -def get_optehdr_num(nthdr): - numberofrva = nthdr.numberofrvaandsizes - parent = nthdr.parent_head - entry_size = 8 - if parent.Coffhdr.sizeofoptionalheader < numberofrva * entry_size + len(parent.Opthdr): - numberofrva = (parent.Coffhdr.sizeofoptionalheader - len(parent.Opthdr)) // entry_size - log.warn('Bad number of rva.. using default %d' % numberofrva) - numberofrva = 0x10 - return numberofrva - - -class Opthdr32(CStruct): - _fields = [("magic", "u16"), - ("majorlinkerversion", "u08"), - ("minorlinkerversion", "u08"), - ("SizeOfCode", "u32"), - ("sizeofinitializeddata", "u32"), - ("sizeofuninitializeddata", "u32"), - ("AddressOfEntryPoint", "u32"), - ("BaseOfCode", "u32"), - ("BaseOfData", "u32"), - ] - - -class Opthdr64(CStruct): - _fields = [("magic", "u16"), - ("majorlinkerversion", "u08"), - ("minorlinkerversion", "u08"), - ("SizeOfCode", "u32"), - ("sizeofinitializeddata", "u32"), - ("sizeofuninitializeddata", "u32"), - ("AddressOfEntryPoint", "u32"), - ("BaseOfCode", "u32"), - ] - - -class NThdr(CStruct): - _fields = [("ImageBase", "ptr"), - ("sectionalignment", "u32"), - ("filealignment", "u32"), - ("majoroperatingsystemversion", "u16"), - ("minoroperatingsystemversion", "u16"), - ("MajorImageVersion", "u16"), - ("MinorImageVersion", "u16"), - ("majorsubsystemversion", "u16"), - ("minorsubsystemversion", "u16"), - ("Reserved1", "u32"), - ("sizeofimage", "u32"), - ("sizeofheaders", "u32"), - ("CheckSum", "u32"), - ("subsystem", "u16"), - ("dllcharacteristics", "u16"), - ("sizeofstackreserve", "ptr"), - ("sizeofstackcommit", "ptr"), - ("sizeofheapreserve", "ptr"), - ("sizeofheapcommit", "ptr"), - ("loaderflags", "u32"), - ("numberofrvaandsizes", "u32"), - ("optentries", "Optehdr", lambda c:get_optehdr_num(c)) - ] - - -class Shdr(CStruct): - _fields = [("name", "8s"), - ("size", "u32"), - ("addr", "u32"), - ("rawsize", "u32"), - ("offset", "u32"), - ("pointertorelocations", "u32"), - ("pointertolinenumbers", "u32"), - ("numberofrelocations", "u16"), - ("numberoflinenumbers", "u16"), - ("flags", "u32")] - - - def get_data(self): - parent = self.parent_head - data = parent.img_rva[self.addr:self.addr + self.size] - return data - - def set_data(self, data): - parent = self.parent_head - parent.img_rva[self.addr] = data - - - data = property(get_data, set_data) - -class SHList(CStruct): - _fields = [ - ("shlist", "Shdr", lambda c:c.parent_head.Coffhdr.numberofsections)] - - def add_section(self, name="default", data=b"", **args): - s_align = self.parent_head.NThdr.sectionalignment - s_align = max(0x1000, s_align) - - f_align = self.parent_head.NThdr.filealignment - f_align = max(0x200, f_align) - size = len(data) - rawsize = len(data) - if len(self): - addr = self[-1].addr + self[-1].size - s_last = self[0] - for section in self: - if s_last.offset + s_last.rawsize < section.offset + section.rawsize: - s_last = section - offset = s_last.offset + s_last.rawsize - else: - s_null = bytes(Shdr.unpack(b"\x00" * 0x100)) - offset = self.parent_head.Doshdr.lfanew + len(self.parent_head.NTsig) + len( - self.parent_head.Coffhdr) + self.parent_head.Coffhdr.sizeofoptionalheader + len(bytes(self.parent_head.SHList) + s_null) - addr = 0x2000 - # round addr - addr = (addr + (s_align - 1)) & ~(s_align - 1) - offset = (offset + (f_align - 1)) & ~(f_align - 1) - - attrs = {"name": name, "size": size, - "addr": addr, "rawsize": rawsize, - "offset": offset, - "pointertorelocations": 0, - "pointertolinenumbers": 0, - "numberofrelocations": 0, - "numberoflinenumbers": 0, - "flags": 0xE0000020, - "data": data - } - attrs.update(args) - section = Shdr(self.parent_head, _sex=self.parent_head._sex, - _wsize=self.parent_head._wsize, **attrs) - section.data = data - - if section.rawsize > len(data): - section.data = section.data + b'\x00' * (section.rawsize - len(data)) - section.size = section.rawsize - section.data = bytes(StrPatchwork(section.data)) - section.size = max(s_align, section.size) - - self.append(section) - self.parent_head.Coffhdr.numberofsections = len(self) - - length = (section.addr + section.size + (s_align - 1)) & ~(s_align - 1) - self.parent_head.NThdr.sizeofimage = length - return section - - def align_sections(self, f_align=None, s_align=None): - if f_align == None: - f_align = self.parent_head.NThdr.filealignment - f_align = max(0x200, f_align) - if s_align == None: - s_align = self.parent_head.NThdr.sectionalignment - s_align = max(0x1000, s_align) - - if self is None: - return - - addr = self[0].offset - for section in self: - raw_off = f_align * ((addr + f_align - 1) // f_align) - section.offset = raw_off - section.rawsize = len(section.data) - addr = raw_off + section.rawsize - - def __repr__(self): - rep = ["# section offset size addr flags rawsize "] - for i, section in enumerate(self): - name = force_bytes(section.name) - out = "%-15s" % name.strip(b'\x00').decode() - out += "%(offset)08x %(size)06x %(addr)08x %(flags)08x %(rawsize)08x" % section - out = ("%2i " % i) + out - rep.append(out) - return "\n".join(rep) - - def __getitem__(self, item): - return self.shlist[item] - - def __len__(self): - return len(self.shlist) - - def append(self, section): - self.shlist.append(section) - - -class Rva(CStruct): - _fields = [("rva", "ptr"), - ] - - -class Rva32(CStruct): - _fields = [("rva", "u32"), - ] - - -class DescName(CStruct): - _fields = [("name", (lambda c, raw, off: c.gets(raw, off), - lambda c, value: c.sets(value))) - ] - - def gets(self, raw, off): - name = raw[off:raw.find(b'\x00', off)] - return name, off + len(name) + 1 - - def sets(self, value): - return bytes(value) + b"\x00" - - -class ImportByName(CStruct): - _fields = [("hint", "u16"), - ("name", "sz") - ] - - -class ImpDesc_e(CStruct): - _fields = [("originalfirstthunk", "u32"), - ("timestamp", "u32"), - ("forwarderchain", "u32"), - ("name", "u32"), - ("firstthunk", "u32") - ] - - -class struct_array(object): - - def __init__(self, target_class, raw, off, cstr, num=None): - self.l = [] - self.cls = target_class - self.end = None - i = 0 - if not raw: - return - - while (num == None) or (num and i < num): - entry, length = cstr.unpack_l(raw, off, - target_class.parent_head, - target_class.parent_head._sex, - target_class.parent_head._wsize) - if num == None: - if raw[off:off + length] == b'\x00' * length: - self.end = b'\x00' * length - break - self.l.append(entry) - off += length - i += 1 - - def __bytes__(self): - out = b"".join(bytes(x) for x in self.l) - if self.end is not None: - out += self.end - return out - - def __str__(self): - if PY3: - return repr(self) - return self.__bytes__() - - def __getitem__(self, item): - return self.l.__getitem__(item) - - def __len__(self): - return len(self.l) - - def append(self, entry): - self.l.append(entry) - - def insert(self, index, entry): - self.l.insert(index, entry) - - -class DirImport(CStruct): - _fields = [("impdesc", (lambda c, raw, off:c.gete(raw, off), - lambda c, value:c.sete(value)))] - - def gete(self, raw, off): - if not off: - return None, off - if self.parent_head._wsize == 32: - mask_ptr = 0x80000000 - elif self.parent_head._wsize == 64: - mask_ptr = 0x8000000000000000 - - ofend = off + \ - self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_IMPORT].size - out = [] - while off < ofend: - if not 0 <= off < len(self.parent_head.img_rva): - break - imp, length = ImpDesc_e.unpack_l(raw, off) - if (raw[off:off+length] == b'\x00' * length or - imp.name == 0): - # Special case - break - if not (imp.originalfirstthunk or imp.firstthunk): - log.warning("no thunk!!") - break - - out.append(imp) - off += length - imp.dlldescname = DescName.unpack(raw, imp.name, self.parent_head) - if imp.originalfirstthunk and imp.originalfirstthunk < len(self.parent_head.img_rva): - imp.originalfirstthunks = struct_array(self, raw, - imp.originalfirstthunk, - Rva) - else: - imp.originalfirstthunks = None - - if imp.firstthunk and imp.firstthunk < len(self.parent_head.img_rva): - imp.firstthunks = struct_array(self, raw, - imp.firstthunk, - Rva) - else: - imp.firstthunks = None - imp.impbynames = [] - if imp.originalfirstthunk and imp.originalfirstthunk < len(self.parent_head.img_rva): - tmp_thunk = imp.originalfirstthunks - elif imp.firstthunk: - tmp_thunk = imp.firstthunks - for i in range(len(tmp_thunk)): - if tmp_thunk[i].rva & mask_ptr == 0: - try: - entry = ImportByName.unpack(raw, - tmp_thunk[i].rva, - self.parent_head) - except: - log.warning( - 'cannot import from add %s' % tmp_thunk[i].rva - ) - entry = 0 - imp.impbynames.append(entry) - else: - imp.impbynames.append(tmp_thunk[i].rva & (mask_ptr - 1)) - return out, off - - def sete(self, entries): - return b"".join(bytes(entry) for entry in entries) + b"\x00" * (4 * 5) - - def __len__(self): - length = (len(self.impdesc) + 1) * (5 * 4) # ImpDesc_e size - rva_size = self.parent_head._wsize // 8 - for entry in self.impdesc: - length += len(entry.dlldescname) - if entry.originalfirstthunk and self.parent_head.rva2off(entry.originalfirstthunk): - length += (len(entry.originalfirstthunks) + 1) * rva_size - if entry.firstthunk: - length += (len(entry.firstthunks) + 1) * rva_size - for imp in entry.impbynames: - if isinstance(imp, ImportByName): - length += len(imp) - return length - - def set_rva(self, rva, size=None): - self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_IMPORT].rva = rva - rva_size = self.parent_head._wsize // 8 - if not size: - self.parent_head.NThdr.optentries[ - DIRECTORY_ENTRY_IMPORT].size = len(self) - else: - self.parent_head.NThdr.optentries[ - DIRECTORY_ENTRY_IMPORT].size = size - rva += (len(self.impdesc) + 1) * 5 * 4 # ImpDesc size - for entry in self.impdesc: - entry.name = rva - rva += len(entry.dlldescname) - if entry.originalfirstthunk: # and self.parent_head.rva2off(entry.originalfirstthunk): - entry.originalfirstthunk = rva - rva += (len(entry.originalfirstthunks) + 1) * rva_size - # XXX rva fthunk not patched => keep original func addr - # if entry.firstthunk: - # entry.firstthunk = rva - # rva+=(len(entry.firstthunks)+1)*self.parent_head._wsize//8 # Rva size - if entry.originalfirstthunk and entry.firstthunk: - if isinstance(entry.originalfirstthunk, struct_array): - tmp_thunk = entry.originalfirstthunks - elif isinstance(entry.firstthunks, struct_array): - tmp_thunk = entry.firstthunks - else: - raise RuntimeError("No thunk!") - elif entry.originalfirstthunk: # and self.parent_head.rva2off(entry.originalfirstthunk): - tmp_thunk = entry.originalfirstthunks - elif entry.firstthunk: - tmp_thunk = entry.firstthunks - else: - raise RuntimeError("No thunk!") - - if tmp_thunk == entry.originalfirstthunks: - entry.firstthunks = tmp_thunk - else: - entry.originalfirstthunks = tmp_thunk - for i, imp in enumerate(entry.impbynames): - if isinstance(imp, ImportByName): - tmp_thunk[i].rva = rva - rva += len(imp) - - def build_content(self, raw): - dirimp = self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_IMPORT] - of1 = dirimp.rva - if not of1: # No Import - return - raw[self.parent_head.rva2off(of1)] = bytes(self) - for entry in self.impdesc: - raw[self.parent_head.rva2off(entry.name)] = bytes(entry.dlldescname) - if (entry.originalfirstthunk and - self.parent_head.rva2off(entry.originalfirstthunk)): - # Add thunks list and terminating null entry - off = self.parent_head.rva2off(entry.originalfirstthunk) - raw[off] = bytes(entry.originalfirstthunks) - if entry.firstthunk: - # Add thunks list and terminating null entry - off = self.parent_head.rva2off(entry.firstthunk) - raw[off] = bytes(entry.firstthunks) - if (entry.originalfirstthunk and - self.parent_head.rva2off(entry.originalfirstthunk)): - tmp_thunk = entry.originalfirstthunks - elif entry.firstthunk: - tmp_thunk = entry.firstthunks - else: - raise RuntimeError("No thunk!") - for j, imp in enumerate(entry.impbynames): - if isinstance(imp, ImportByName): - raw[self.parent_head.rva2off(tmp_thunk[j].rva)] = bytes(imp) - - def get_dlldesc(self): - out = [] - for impdesc in self.impdesc: - dllname = impdesc.dlldescname.name - funcs = [] - for imp in impdesc.impbynames: - if isinstance(imp, ImportByName): - funcs.append(imp.name) - else: - funcs.append(imp) - entry = ({"name": dllname, "firstthunk": impdesc.firstthunk}, funcs) - out.append(entry) - return out - - def __repr__(self): - rep = ["<%s>" % self.__class__.__name__] - for i, entry in enumerate(self.impdesc): - out = "%2d %-25s %s" % (i, repr(entry.dlldescname), repr(entry)) - rep.append(out) - for index, imp in enumerate(entry.impbynames): - out = " %2d %-16s" % (index, repr(imp)) - rep.append(out) - return "\n".join(rep) - - def add_dlldesc(self, new_dll): - rva_size = self.parent_head._wsize // 8 - if self.parent_head._wsize == 32: - mask_ptr = 0x80000000 - elif self.parent_head._wsize == 64: - mask_ptr = 0x8000000000000000 - new_impdesc = [] - of1 = None - for import_descriptor, new_functions in new_dll: - if isinstance(import_descriptor.get("name"), str): - import_descriptor["name"] = import_descriptor["name"].encode() - new_functions = [ - funcname.encode() if isinstance(funcname, str) else funcname - for funcname in new_functions - ] - for attr in ["timestamp", "forwarderchain", "originalfirstthunk"]: - if attr not in import_descriptor: - import_descriptor[attr] = 0 - entry = ImpDesc_e(self.parent_head, **import_descriptor) - if entry.firstthunk != None: - of1 = entry.firstthunk - elif of1 == None: - raise RuntimeError("set fthunk") - else: - entry.firstthunk = of1 - entry.dlldescname = DescName(self.parent_head, name=entry.name) - entry.originalfirstthunk = 0 - entry.originalfirstthunks = struct_array(self, None, - None, - Rva) - entry.firstthunks = struct_array(self, None, - None, - Rva) - - impbynames = [] - for new_function in new_functions: - rva_ofirstt = Rva(self.parent_head) - if isinstance(new_function, int_types): - rva_ofirstt.rva = mask_ptr + new_function - ibn = new_function - elif isinstance(new_function, bytes): - rva_ofirstt.rva = True - ibn = ImportByName(self.parent_head) - ibn.name = new_function - ibn.hint = 0 - else: - raise RuntimeError('unknown func type %s' % new_function) - impbynames.append(ibn) - entry.originalfirstthunks.append(rva_ofirstt) - rva_func = Rva(self.parent_head) - if isinstance(ibn, ImportByName): - rva_func.rva = 0xDEADBEEF # default func addr - else: - # ord ?XXX? - rva_func.rva = rva_ofirstt.rva - entry.firstthunks.append(rva_func) - of1 += rva_size - # for null thunk - of1 += rva_size - entry.impbynames = impbynames - new_impdesc.append(entry) - if self.impdesc is None: - self.impdesc = struct_array(self, None, - None, - ImpDesc_e) - self.impdesc.l = new_impdesc - else: - for entry in new_impdesc: - self.impdesc.append(entry) - - def get_funcrva(self, dllname, funcname): - dllname = force_bytes(dllname) - funcname = force_bytes(funcname) - - rva_size = self.parent_head._wsize // 8 - if self.parent_head._wsize == 32: - mask_ptr = 0x80000000 - 1 - elif self.parent_head._wsize == 64: - mask_ptr = 0x8000000000000000 - 1 - - for entry in self.impdesc: - if entry.dlldescname.name.lower() != dllname.lower(): - continue - if entry.originalfirstthunk and self.parent_head.rva2off(entry.originalfirstthunk): - tmp_thunk = entry.originalfirstthunks - elif entry.firstthunk: - tmp_thunk = entry.firstthunks - else: - raise RuntimeError("No thunk!") - if isinstance(funcname, bytes): - for j, imp in enumerate(entry.impbynames): - if isinstance(imp, ImportByName): - if funcname == imp.name: - return entry.firstthunk + j * rva_size - elif isinstance(funcname, int_types): - for j, imp in enumerate(entry.impbynames): - if not isinstance(imp, ImportByName): - if tmp_thunk[j].rva & mask_ptr == funcname: - return entry.firstthunk + j * rva_size - else: - raise ValueError('Unknown: %s %s' % (dllname, funcname)) - - def get_funcvirt(self, dllname, funcname): - rva = self.get_funcrva(dllname, funcname) - if rva == None: - return - return self.parent_head.rva2virt(rva) - - -class ExpDesc_e(CStruct): - _fields = [("characteristics", "u32"), - ("timestamp", "u32"), - ("majorv", "u16"), - ("minorv", "u16"), - ("name", "u32"), - ("base", "u32"), - ("numberoffunctions", "u32"), - ("numberofnames", "u32"), - ("addressoffunctions", "u32"), - ("addressofnames", "u32"), - ("addressofordinals", "u32"), - ] - - -class DirExport(CStruct): - _fields = [("expdesc", (lambda c, raw, off:c.gete(raw, off), - lambda c, value:c.sete(value)))] - - def gete(self, raw, off): - off_o = off - if not off: - return None, off - off_sav = off - if off >= len(raw): - log.warn("export dir malformed!") - return None, off_o - expdesc = ExpDesc_e.unpack(raw, - off, - self.parent_head) - if self.parent_head.rva2off(expdesc.addressoffunctions) == None or \ - self.parent_head.rva2off(expdesc.addressofnames) == None or \ - self.parent_head.rva2off(expdesc.addressofordinals) == None: - log.warn("export dir malformed!") - return None, off_o - self.dlldescname = DescName.unpack(raw, expdesc.name, self.parent_head) - try: - self.f_address = struct_array(self, raw, - expdesc.addressoffunctions, - Rva32, expdesc.numberoffunctions) - self.f_names = struct_array(self, raw, - expdesc.addressofnames, - Rva32, expdesc.numberofnames) - self.f_nameordinals = struct_array(self, raw, - expdesc.addressofordinals, - Ordinal, expdesc.numberofnames) - except RuntimeError: - log.warn("export dir malformed!") - return None, off_o - for func in self.f_names: - func.name = DescName.unpack(raw, func.rva, self.parent_head) - return expdesc, off_sav - - def sete(self, _): - return bytes(self.expdesc) - - def build_content(self, raw): - direxp = self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_EXPORT] - of1 = direxp.rva - if self.expdesc is None: # No Export - return - raw[self.parent_head.rva2off(of1)] = bytes(self.expdesc) - raw[self.parent_head.rva2off(self.expdesc.name)] = bytes(self.dlldescname) - raw[self.parent_head.rva2off(self.expdesc.addressoffunctions)] = bytes(self.f_address) - if self.expdesc.addressofnames != 0: - raw[self.parent_head.rva2off(self.expdesc.addressofnames)] = bytes(self.f_names) - if self.expdesc.addressofordinals != 0: - raw[self.parent_head.rva2off(self.expdesc.addressofordinals)] = bytes(self.f_nameordinals) - for func in self.f_names: - raw[self.parent_head.rva2off(func.rva)] = bytes(func.name) - - # XXX BUG names must be alphanumeric ordered - names = [func.name for func in self.f_names] - names_ = names[:] - if names != names_: - log.warn("unsorted export names, may bug") - - def set_rva(self, rva, size=None): - rva_size = self.parent_head._wsize // 8 - if self.expdesc is None: - return - self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_EXPORT].rva = rva - if not size: - self.parent_head.NThdr.optentries[ - DIRECTORY_ENTRY_EXPORT].size = len(self) - else: - self.parent_head.NThdr.optentries[ - DIRECTORY_ENTRY_EXPORT].size = size - rva += len(self.expdesc) - self.expdesc.name = rva - rva += len(self.dlldescname) - self.expdesc.addressoffunctions = rva - rva += len(self.f_address) * rva_size - self.expdesc.addressofnames = rva - rva += len(self.f_names) * rva_size - self.expdesc.addressofordinals = rva - rva += len(self.f_nameordinals) * 2 # Ordinal size - for func in self.f_names: - func.rva = rva - rva += len(func.name) - - def __len__(self): - rva_size = self.parent_head._wsize // 8 - length = 0 - if self.expdesc is None: - return length - length += len(self.expdesc) - length += len(self.dlldescname) - length += len(self.f_address) * rva_size - length += len(self.f_names) * rva_size - length += len(self.f_nameordinals) * 2 # Ordinal size - for entry in self.f_names: - length += len(entry.name) - return length - - def __repr__(self): - rep = ["<%s>" % self.__class__.__name__] - if self.expdesc is None: - return "\n".join(rep) - - rep = ["<%s %d (%s) %s>" % (self.__class__.__name__, - self.expdesc.numberoffunctions, self.dlldescname, repr(self.expdesc))] - tmp_names = [[] for _ in range(self.expdesc.numberoffunctions)] - for i, entry in enumerate(self.f_names): - tmp_names[self.f_nameordinals[i].ordinal].append(entry.name) - for i, entry in enumerate(self.f_address): - tmpn = [] - if not entry.rva: - continue - out = "%2d %.8X %s" % (i + self.expdesc.base, entry.rva, repr(tmp_names[i])) - rep.append(out) - return "\n".join(rep) - - def create(self, name='default.dll'): - self.expdesc = ExpDesc_e(self.parent_head) - for attr in ["characteristics", - "timestamp", - "majorv", - "minorv", - "name", - "base", - "numberoffunctions", - "numberofnames", - "addressoffunctions", - "addressofnames", - "addressofordinals", - ]: - setattr(self.expdesc, attr, 0) - - self.dlldescname = DescName(self.parent_head) - self.dlldescname.name = name - self.f_address = struct_array(self, None, - None, - Rva) - self.f_names = struct_array(self, None, - None, - Rva) - self.f_nameordinals = struct_array(self, None, - None, - Ordinal) - self.expdesc.base = 1 - - def add_name(self, name, rva=0xdeadc0fe): - if self.expdesc is None: - return - names = [func.name.name for func in self.f_names] - names_s = names[:] - names_s.sort() - if names_s != names: - log.warn('tab names was not sorted may bug') - names.append(name) - names.sort() - index = names.index(name) - descname = DescName(self.parent_head) - - descname.name = name - wname = Rva(self.parent_head) - - wname.name = descname - woffset = Rva(self.parent_head) - woffset.rva = rva - wordinal = Ordinal(self.parent_head) - # func is append to list - wordinal.ordinal = len(self.f_address) - self.f_address.append(woffset) - # self.f_names.insert(index, wname) - # self.f_nameordinals.insert(index, wordinal) - self.f_names.insert(index, wname) - self.f_nameordinals.insert(index, wordinal) - self.expdesc.numberofnames += 1 - self.expdesc.numberoffunctions += 1 - - def get_funcrva(self, f_str): - if self.expdesc is None: - return None - for i, entry in enumerate(self.f_names): - if f_str != entry.name.name: - continue - ordinal = self.f_nameordinals[i].ordinal - rva = self.f_address[ordinal].rva - return rva - return None - - def get_funcvirt(self, addr): - rva = self.get_funcrva(addr) - if rva == None: - return - return self.parent_head.rva2virt(rva) - - -class Delaydesc_e(CStruct): - _fields = [("attrs", "u32"), - ("name", "u32"), - ("hmod", "u32"), - ("firstthunk", "u32"), - ("originalfirstthunk", "u32"), - ("boundiat", "u32"), - ("unloadiat", "u32"), - ("timestamp", "u32"), - ] - - -class DirDelay(CStruct): - _fields = [("delaydesc", (lambda c, raw, off:c.gete(raw, off), - lambda c, value:c.sete(value)))] - - def gete(self, raw, off): - if not off: - return None, off - - ofend = off + \ - self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_DELAY_IMPORT].size - out = [] - while off < ofend: - if off >= len(raw): - log.warn('warning bad reloc offset') - break - - delaydesc, length = Delaydesc_e.unpack_l(raw, - off, - self.parent_head) - if raw[off:off+length] == b'\x00' * length: - # Special case - break - off += length - out.append(delaydesc) - - if self.parent_head._wsize == 32: - mask_ptr = 0x80000000 - elif self.parent_head._wsize == 64: - mask_ptr = 0x8000000000000000 - - parent = self.parent_head - for entry in out: - isfromva = (entry.attrs & 1) == 0 - if isfromva: - isfromva = lambda x: parent.virt2rva(x) - else: - isfromva = lambda x: x - entry.dlldescname = DescName.unpack(raw, isfromva(entry.name), - self.parent_head) - if entry.originalfirstthunk: - addr = isfromva(entry.originalfirstthunk) - if not 0 <= addr < len(raw): - log.warning("Bad delay") - break - entry.originalfirstthunks = struct_array(self, raw, - addr, - Rva) - else: - entry.originalfirstthunks - - if entry.firstthunk: - entry.firstthunks = struct_array(self, raw, - isfromva(entry.firstthunk), - Rva) - else: - entry.firstthunk = None - - entry.impbynames = [] - if entry.originalfirstthunk and self.parent_head.rva2off(isfromva(entry.originalfirstthunk)): - tmp_thunk = entry.originalfirstthunks - elif entry.firstthunk: - tmp_thunk = entry.firstthunks - else: - print(ValueError("no thunk in delay dir!! ")) - return - for i in range(len(tmp_thunk)): - if tmp_thunk[i].rva & mask_ptr == 0: - imp = ImportByName.unpack(raw, - isfromva(tmp_thunk[i].rva), - self.parent_head) - entry.impbynames.append(imp) - else: - entry.impbynames.append( - isfromva(tmp_thunk[i].rva & (mask_ptr - 1))) - # print(repr(entry[-1])) - # raise ValueError('XXX to check') - return out, off - - def sete(self, entries): - return "".join(bytes(entry) for entry in entries) + b"\x00" * (4 * 8) # DelayDesc_e - - def __len__(self): - rva_size = self.parent_head._wsize // 8 - length = (len(self.delaydesc) + 1) * (4 * 8) # DelayDesc_e - for entry in self.delaydesc: - length += len(entry.dlldescname) - if entry.originalfirstthunk and self.parent_head.rva2off(entry.originalfirstthunk): - length += (len(entry.originalfirstthunks) + 1) * rva_size - if entry.firstthunk: - length += (len(entry.firstthunks) + 1) * rva_size - for imp in entry.impbynames: - if isinstance(imp, ImportByName): - length += len(imp) - return length - - def set_rva(self, rva, size=None): - rva_size = self.parent_head._wsize // 8 - self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_DELAY_IMPORT].rva = rva - if not size: - self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_DELAY_IMPORT].size = len(self) - else: - self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_DELAY_IMPORT].size = size - rva += (len(self.delaydesc) + 1) * (4 * 8) # DelayDesc_e - parent = self.parent_head - for entry in self.delaydesc: - isfromva = (entry.attrs & 1) == 0 - if isfromva: - isfromva = lambda x: self.parent_head.rva2virt(x) - else: - isfromva = lambda x: x - - entry.name = isfromva(rva) - rva += len(entry.dlldescname) - if entry.originalfirstthunk: # and self.parent_head.rva2off(entry.originalfirstthunk): - entry.originalfirstthunk = isfromva(rva) - rva += (len(entry.originalfirstthunks) + 1) * rva_size - # XXX rva fthunk not patched => fun addr - # if entry.firstthunk: - # entry.firstthunk = rva - # rva+=(len(entry.firstthunks)+1)*pe.Rva._size - if entry.originalfirstthunk and self.parent_head.rva2off(entry.originalfirstthunk): - tmp_thunk = entry.originalfirstthunks - elif entry.firstthunk: - tmp_thunk = entry.firstthunks - else: - raise RuntimeError("No thunk!") - for i, imp in enumerate(entry.impbynames): - if isinstance(imp, ImportByName): - tmp_thunk[i].rva = isfromva(rva) - rva += len(imp) - - def build_content(self, raw): - if len(self.parent_head.NThdr.optentries) < DIRECTORY_ENTRY_DELAY_IMPORT: - return - dirdelay = self.parent_head.NThdr.optentries[ - DIRECTORY_ENTRY_DELAY_IMPORT] - of1 = dirdelay.rva - if not of1: # No Delay Import - return - raw[self.parent_head.rva2off(of1)] = bytes(self) - for entry in self.delaydesc: - raw[self.parent_head.rva2off(entry.name)] = bytes(entry.dlldescname) - if entry.originalfirstthunk and self.parent_head.rva2off(entry.originalfirstthunk): - raw[self.parent_head.rva2off(entry.originalfirstthunk)] = bytes(entry.originalfirstthunks) - if entry.firstthunk: - raw[self.parent_head.rva2off(entry.firstthunk)] = bytes(entry.firstthunks) - if entry.originalfirstthunk and self.parent_head.rva2off(entry.originalfirstthunk): - tmp_thunk = entry.originalfirstthunks - elif entry.firstthunk: - tmp_thunk = entry.firstthunks - else: - raise RuntimeError("No thunk!") - for j, imp in enumerate(entry.impbynames): - if isinstance(imp, ImportByName): - raw[self.parent_head.rva2off(tmp_thunk[j].rva)] = bytes(imp) - - def __repr__(self): - rep = ["<%s>" % self.__class__.__name__] - for i, entry in enumerate(self.delaydesc): - out = "%2d %-25s %s" % (i, repr(entry.dlldescname), repr(entry)) - rep.append(out) - for index, func in enumerate(entry.impbynames): - out = " %2d %-16s" % (index, repr(func)) - rep.append(out) - return "\n".join(rep) - - def add_dlldesc(self, new_dll): - if self.parent_head._wsize == 32: - mask_ptr = 0x80000000 - elif self.parent_head._wsize == 64: - mask_ptr = 0x8000000000000000 - new_impdesc = [] - of1 = None - new_delaydesc = [] - for import_descriptor, new_functions in new_dll: - if isinstance(import_descriptor.get("name"), str): - import_descriptor["name"] = import_descriptor["name"].encode() - new_functions = [ - funcname.encode() if isinstance(funcname, str) else funcname - for funcname in new_functions - ] - for attr in ["attrs", "name", "hmod", "firstthunk", "originalfirstthunk", "boundiat", "unloadiat", "timestamp"]: - if not attr in import_descriptor: - import_descriptor[attr] = 0 - entry = Delaydesc_e(self.parent_head, **import_descriptor) - # entry.cstr.__dict__.update(import_descriptor) - if entry.firstthunk != None: - of1 = entry.firstthunk - elif of1 == None: - raise RuntimeError("set fthunk") - else: - entry.firstthunk = of1 - entry.dlldescname = DescName(self.parent_head, name=entry.name) - entry.originalfirstthunk = 0 - entry.originalfirstthunks = struct_array(self, None, - None, - Rva) - entry.firstthunks = struct_array(self, None, - None, - Rva) - - impbynames = [] - for new_function in new_functions: - rva_ofirstt = Rva(self.parent_head) - if isinstance(new_function, int_types): - rva_ofirstt.rva = mask_ptr + new_function - ibn = None - elif isinstance(new_function, bytes): - rva_ofirstt.rva = True - ibn = ImportByName(self.parent_head) - ibn.name = new_function - ibn.hint = 0 - else: - raise RuntimeError('unknown func type %s' % new_function) - impbynames.append(ibn) - entry.originalfirstthunks.append(rva_ofirstt) - - rva_func = Rva(self.parent_head) - if ibn != None: - rva_func.rva = 0xDEADBEEF # default func addr - else: - # ord ?XXX? - rva_func.rva = rva_ofirstt.rva - entry.firstthunks.append(rva_func) - of1 += 4 - # for null thunk - of1 += 4 - entry.impbynames = impbynames - new_delaydesc.append(entry) - if self.delaydesc is None: - self.delaydesc = struct_array(self, None, - None, - Delaydesc_e) - self.delaydesc.l = new_delaydesc - else: - for entry in new_delaydesc: - self.delaydesc.append(entry) - - def get_funcrva(self, func): - for entry in self.delaydesc: - isfromva = (entry.attrs & 1) == 0 - if isfromva: - isfromva = lambda x: self.parent_head.virt2rva(x) - else: - isfromva = lambda x: x - if entry.originalfirstthunk and self.parent_head.rva2off(isfromva(entry.originalfirstthunk)): - tmp_thunk = entry.originalfirstthunks - elif entry.firstthunk: - tmp_thunk = entry.firstthunks - else: - raise RuntimeError("No thunk!") - if isinstance(func, bytes): - for j, imp in enumerate(entry.impbynames): - if isinstance(imp, ImportByName): - if func == imp.name: - return isfromva(entry.firstthunk) + j * 4 - elif isinstance(func, int_types): - for j, imp in enumerate(entry.impbynames): - if not isinstance(imp, ImportByName): - if isfromva(tmp_thunk[j].rva & 0x7FFFFFFF) == func: - return isfromva(entry.firstthunk) + j * 4 - else: - raise ValueError('unknown func tpye %r' % func) - - def get_funcvirt(self, addr): - rva = self.get_funcrva(addr) - if rva == None: - return - return self.parent_head.rva2virt(rva) - - -class Rel(CStruct): - _fields = [("rva", "u32"), - ("size", "u32") - ] - - -class Reloc(CStruct): - _fields = [("rel", (lambda c, raw, off:c.gete(raw, off), - lambda c, value:c.sete(value)))] - - def gete(self, raw, off): - rel = struct.unpack('H', raw[off:off + 2])[0] - return (rel >> 12, rel & 0xfff), off + 2 - - def sete(self, value): - return struct.pack('H', (value[0] << 12) | value[1]) - - def __repr__(self): - return '<%d %d>' % (self.rel[0], self.rel[1]) - - -class DirReloc(CStruct): - _fields = [("reldesc", (lambda c, raw, off:c.gete(raw, off), - lambda c, value:c.sete(value)))] - - def gete(self, raw, off): - if not off: - return None, off - - ofend = off + \ - self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_BASERELOC].size - out = [] - while off < ofend: - if off >= len(raw): - log.warn('warning bad reloc offset') - break - reldesc, length = Rel.unpack_l(raw, - off, - self.parent_head) - if reldesc.size == 0: - log.warn('warning null reldesc') - reldesc.size = length - break - of2 = off + length - if of2 + reldesc.size > len(self.parent_head.img_rva): - log.warn('relocation too big, skipping') - break - reldesc.rels = struct_array(self, raw, - of2, - Reloc, - (reldesc.size - length) // 2) # / Reloc size - reldesc.patchrel = False - out.append(reldesc) - off += reldesc.size - return out, off - - def sete(self, entries): - return b"".join( - bytes(entry) + bytes(entry.rels) - for entry in entries - ) - - def set_rva(self, rva, size=None): - if self.reldesc is None: - return - self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_BASERELOC].rva = rva - if not size: - self.parent_head.NThdr.optentries[ - DIRECTORY_ENTRY_BASERELOC].size = len(self) - else: - self.parent_head.NThdr.optentries[ - DIRECTORY_ENTRY_BASERELOC].size = size - - def build_content(self, raw): - dirrel = self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_BASERELOC] - dirrel.size = len(self) - of1 = dirrel.rva - if self.reldesc is None: # No Reloc - return - raw[self.parent_head.rva2off(of1)] = bytes(self) - - def __len__(self): - if self.reldesc is None: - return 0 - length = 0 - for entry in self.reldesc: - length += entry.size - return length - - def __bytes__(self): - return b"".join( - bytes(entry) + bytes(entry.rels) - for entry in self.reldesc - ) - - def __str__(self): - if PY3: - return repr(self) - return self.__bytes__() - - def __repr__(self): - rep = ["<%s>" % self.__class__.__name__] - if self.reldesc is None: - return "\n".join(rep) - for i, entry in enumerate(self.reldesc): - out = "%2d %s" % (i, repr(entry)) - rep.append(out) - """ - #display too many lines... - for ii, m in enumerate(entry.rels): - l = "\t%2d %s"%(ii, repr(m) ) - rep.append(l) - """ - out = "\t%2d rels..." % (len(entry.rels)) - rep.append(out) - return "\n".join(rep) - - def add_reloc(self, rels, rtype=3, patchrel=True): - dirrel = self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_BASERELOC] - if not rels: - return - rels.sort() - all_base_ad = set([x & 0xFFFFF000 for x in rels]) - all_base_ad = list(all_base_ad) - all_base_ad.sort() - rels_by_base = defaultdict(list) - while rels: - reloc = rels.pop() - if reloc >= all_base_ad[-1]: - rels_by_base[all_base_ad[-1]].append(reloc) - else: - all_base_ad.pop() - rels_by_base[all_base_ad[-1]].append(reloc) - rels_by_base = [x for x in list(rels_by_base.items())] - rels_by_base.sort() - for o_init, rels in rels_by_base: - # o_init = rels[0]&0xFFFFF000 - offsets = struct_array(self, None, None, Reloc, 0) - for reloc_value in rels: - if (reloc_value & 0xFFFFF000) != o_init: - raise RuntimeError("relocs must be in same range") - reloc = Reloc(self.parent_head) - reloc.rel = (rtype, reloc_value - o_init) - offsets.append(reloc) - while len(offsets) & 3: - reloc = Reloc(self.parent_head) - reloc.rel = (0, 0) - offsets.append(reloc) - reldesc = Rel(self.parent_head) # Reloc(self.parent_head) - reldesc.rva = o_init - reldesc.size = (len(offsets) * 2 + 8) - reldesc.rels = offsets - reldesc.patchrel = patchrel - # if self.reldesc is None: - # self.reldesc = [] - self.reldesc.append(reldesc) - dirrel.size += reldesc.size - - def del_reloc(self, taboffset): - if self.reldesc is None: - return - for rel in self.reldesc: - of1 = rel.rva - i = 0 - while i < len(rel.rels): - reloc = rel.rels[i] - if reloc.rel[0] != 0 and reloc.rel[1] + of1 in taboffset: - print('del reloc', hex(reloc.rel[1] + of1)) - del rel.rels[i] - rel.size -= Reloc._size - else: - i += 1 - - -class DirRes(CStruct): - _fields = [("resdesc", (lambda c, raw, off:c.gete(raw, off), - lambda c, value:c.sete(value)))] - - def gete(self, raw, off): - if not off: - return None, off - if off >= len(self.parent_head.img_rva): - log.warning('cannot parse resources, %X' % off) - return None, off - - off_orig = off - ofend = off + self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].size - - resdesc, length = ResDesc_e.unpack_l(raw, - off, - self.parent_head) - off += length - nbr = resdesc.numberofnamedentries + resdesc.numberofidentries - - out = [] - tmp_off = off - for _ in range(nbr): - if tmp_off >= ofend: - break - if tmp_off + length >= len(raw): - log.warn('warning bad resource offset') - break - try: - entry, length = ResEntry.unpack_l(raw, tmp_off, self.parent_head) - except RuntimeError: - log.warn('bad resentry') - return None, tmp_off - out.append(entry) - tmp_off += length - resdesc.resentries = struct_array(self, raw, - off, - ResEntry, - nbr) - dir_todo = {off_orig: resdesc} - dir_done = {} - while dir_todo: - off, my_dir = dir_todo.popitem() - dir_done[off] = my_dir - for entry in my_dir.resentries: - off = entry.offsettosubdir - if not off: - # data dir - off = entry.offsettodata - if not 0 <= off < len(raw): - log.warn('bad resrouce entry') - continue - data = ResDataEntry.unpack(raw, - off, - self.parent_head) - off = data.offsettodata - data.s = StrPatchwork(raw[off:off + data.size]) - entry.data = data - continue - # subdir - if off in dir_done: - log.warn('warning recusif subdir') - continue - if not 0 <= off < len(self.parent_head.img_rva): - log.warn('bad resrouce entry') - continue - subdir, length = ResDesc_e.unpack_l(raw, - off, - self.parent_head) - nbr = subdir.numberofnamedentries + subdir.numberofidentries - try: - subdir.resentries = struct_array(self, raw, - off + length, - ResEntry, - nbr) - except RuntimeError: - log.warn('bad resrouce entry') - continue - - entry.subdir = subdir - dir_todo[off] = entry.subdir - return resdesc, off - - def build_content(self, raw): - if self.resdesc is None: - return - of1 = self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].rva - raw[self.parent_head.rva2off(of1)] = bytes(self.resdesc) - dir_todo = {self.parent_head.NThdr.optentries[ - DIRECTORY_ENTRY_RESOURCE].rva: self.resdesc} - dir_done = {} - while dir_todo: - of1, my_dir = dir_todo.popitem() - dir_done[of1] = my_dir - raw[self.parent_head.rva2off(of1)] = bytes(my_dir) - of1 += len(my_dir) - of_base = of1 - for entry in my_dir.resentries: - raw[of_base] = bytes(entry) - of_base += len(entry) - if entry.name_s: - raw[self.parent_head.rva2off(entry.name)] = bytes(entry.name_s) - of1 = entry.offsettosubdir - if not of1: - raw[self.parent_head.rva2off(entry.offsettodata)] = bytes(entry.data) - raw[self.parent_head.rva2off(entry.data.offsettodata)] = bytes(entry.data.s) - continue - dir_todo[of1] = entry.subdir - - def __len__(self): - length = 0 - if self.resdesc is None: - return length - dir_todo = [self.resdesc] - dir_done = [] - while dir_todo: - my_dir = dir_todo.pop() - if my_dir in dir_done: - raise ValueError('Recursive directory') - dir_done.append(my_dir) - length += len(my_dir) - length += len(my_dir.resentries) * 8 # ResEntry size - for entry in my_dir.resentries: - if not entry.offsettosubdir: - continue - if not entry.subdir in dir_todo: - dir_todo.append(entry.subdir) - else: - raise RuntimeError("recursive dir") - - dir_todo = dir_done - while dir_todo: - my_dir = dir_todo.pop() - for entry in my_dir.resentries: - if entry.name_s: - length += len(entry.name_s) - of1 = entry.offsettosubdir - if not of1: - length += 4 * 4 # WResDataEntry size - # XXX because rva may be even rounded - length += 1 - length += entry.data.size - continue - return length - - def set_rva(self, rva, size=None): - if self.resdesc is None: - return - self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].rva = rva - if not size: - self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].size = len(self) - else: - self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].size = size - dir_todo = [self.resdesc] - dir_done = {} - while dir_todo: - my_dir = dir_todo.pop() - dir_done[rva] = my_dir - rva += len(my_dir) - rva += len(my_dir.resentries) * 8 # ResEntry size - for entry in my_dir.resentries: - if not entry.offsettosubdir: - continue - if not entry.subdir in dir_todo: - dir_todo.append(entry.subdir) - else: - raise RuntimeError("recursive dir") - dir_todo = dir_done - dir_inv = dict([(x[1], x[0]) for x in list(dir_todo.items())]) - while dir_todo: - rva_tmp, my_dir = dir_todo.popitem() - for entry in my_dir.resentries: - if entry.name_s: - entry.name = rva - rva += len(entry.name_s) - of1 = entry.offsettosubdir - if not of1: - entry.offsettodata = rva - rva += 4 * 4 # ResDataEntry size - # XXX menu rsrc must be even aligned? - if rva % 2: - rva += 1 - entry.data.offsettodata = rva - rva += entry.data.size - continue - entry.offsettosubdir = dir_inv[entry.subdir] - - def __repr__(self): - rep = ["<%s>" % (self.__class__.__name__)] - if self.resdesc is None: - return "\n".join(rep) - dir_todo = [self.resdesc] - resources = [] - index = -1 - while dir_todo: - entry = dir_todo.pop(0) - if isinstance(entry, int): - index += entry - elif isinstance(entry, ResDesc_e): - # resources.append((index, repr(entry))) - dir_todo = [1] + entry.resentries.l + [-1] + dir_todo - elif isinstance(entry, ResEntry): - if entry.offsettosubdir: - resources.append((index, repr(entry))) - dir_todo = [entry.subdir] + dir_todo - else: - resources.append((index, repr(entry))) - else: - raise RuntimeError("zarb") - for i, resource in resources: - rep.append(' ' * 4 * i + resource) - return "\n".join(rep) - - -class Ordinal(CStruct): - _fields = [("ordinal", "u16"), - ] - - -class ResDesc_e(CStruct): - _fields = [("characteristics", "u32"), - ("timestamp", "u32"), - ("majorv", "u16"), - ("minorv", "u16"), - ("numberofnamedentries", "u16"), - ("numberofidentries", "u16") - ] - - -class SUnicode(CStruct): - _fields = [("length", "u16"), - ("value", (lambda c, raw, off:c.gets(raw, off), - lambda c, value:c.sets(value))) - ] - - def gets(self, raw, off): - value = raw[off:off + self.length * 2] - return value, off + self.length - - def sets(self, value): - return self.value - - -class ResEntry(CStruct): - _fields = [("name", (lambda c, raw, off:c._get_name(raw, off), - lambda c, value:c._set_name(value))), - ("offsettodata", (lambda c, raw, off:c._get_offset(raw, off), - lambda c, value:c._set_offset(value))) - ] - - def _get_name(self, raw, off): - self.data = None - # off = self.parent_head.rva2off(off) - name = struct.unpack('I', raw[off:off + 4])[0] - self.name_s = None - if name & 0x80000000: - name = (name & 0x7FFFFFFF) + self.parent_head.NThdr.optentries[ - DIRECTORY_ENTRY_RESOURCE].rva # XXX res rva?? - name &= 0x7FFFFFFF - if name >= len(raw): - raise RuntimeError("Bad resentry") - self.name_s = SUnicode.unpack(raw, - name, - self.parent_head) - return name, off + 4 - - def _set_name(self, name): - if self.name_s: - rva = self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].rva - name = (self.name - rva) + 0x80000000 - return struct.pack('I', name) - - def _get_offset(self, raw, off): - self.offsettosubdir = None - rva = self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].rva - offsettodata_o = struct.unpack('I', raw[off:off + 4])[0] - offsettodata = (offsettodata_o & 0x7FFFFFFF) + rva # XXX res rva?? - if offsettodata_o & 0x80000000: - self.offsettosubdir = offsettodata - return offsettodata, off + 4 - - def _set_offset(self, offset): - rva = self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].rva - offsettodata = offset - rva - if self.offsettosubdir: - offsettodata = (self.offsettosubdir - rva) + 0x80000000 - return struct.pack('I', offsettodata) - - def __repr__(self): - if self.name_s: - nameid = "%s" % repr(self.name_s) - else: - if self.name in RT: # and not self.offsettosubdir: - nameid = "ID %s" % RT[self.name] - else: - nameid = "ID %d" % self.name - if self.offsettosubdir: - offsettodata = "subdir: %x" % self.offsettosubdir - else: - offsettodata = "data: %x" % self.offsettodata - return "<%s %s>" % (nameid, offsettodata) - - -class ResDataEntry(CStruct): - _fields = [("offsettodata", "u32"), - ("size", "u32"), - ("codepage", "u32"), - ("reserved", "u32"), - ] - - -class Symb(CStruct): - _fields = [("name", "8s"), - ("res1", "u32"), - ("res2", "u32"), - ("res3", "u16")] - - -DIRECTORY_ENTRY_EXPORT = 0 -DIRECTORY_ENTRY_IMPORT = 1 -DIRECTORY_ENTRY_RESOURCE = 2 -DIRECTORY_ENTRY_EXCEPTION = 3 -DIRECTORY_ENTRY_SECURITY = 4 -DIRECTORY_ENTRY_BASERELOC = 5 -DIRECTORY_ENTRY_DEBUG = 6 -DIRECTORY_ENTRY_COPYRIGHT = 7 -DIRECTORY_ENTRY_GLOBALPTR = 8 -DIRECTORY_ENTRY_TLS = 9 -DIRECTORY_ENTRY_LOAD_CONFIG = 10 -DIRECTORY_ENTRY_BOUND_IMPORT = 11 -DIRECTORY_ENTRY_IAT = 12 -DIRECTORY_ENTRY_DELAY_IMPORT = 13 -DIRECTORY_ENTRY_COM_DESCRIPTOR = 14 -DIRECTORY_ENTRY_RESERVED = 15 - - -RT_CURSOR = 1 -RT_BITMAP = 2 -RT_ICON = 3 -RT_MENU = 4 -RT_DIALOG = 5 -RT_STRING = 6 -RT_FONTDIR = 7 -RT_FONT = 8 -RT_ACCELERATOR = 9 -RT_RCDATA = 10 -RT_MESSAGETABLE = 11 -RT_GROUP_CURSOR = 12 -RT_GROUP_ICON = 14 -RT_VERSION = 16 -RT_DLGINCLUDE = 17 -RT_PLUGPLAY = 19 -RT_VXD = 20 -RT_ANICURSOR = 21 -RT_ANIICON = 22 -RT_HTML = 23 -RT_MANIFEST = 24 - - -RT = { - RT_CURSOR: "RT_CURSOR", - RT_BITMAP: "RT_BITMAP", - RT_ICON: "RT_ICON", - RT_MENU: "RT_MENU", - RT_DIALOG: "RT_DIALOG", - RT_STRING: "RT_STRING", - RT_FONTDIR: "RT_FONTDIR", - RT_FONT: "RT_FONT", - RT_ACCELERATOR: "RT_ACCELERATOR", - RT_RCDATA: "RT_RCDATA", - RT_MESSAGETABLE: "RT_MESSAGETABLE", - RT_GROUP_CURSOR: "RT_GROUP_CURSOR", - RT_GROUP_ICON: "RT_GROUP_ICON", - RT_VERSION: "RT_VERSION", - RT_DLGINCLUDE: "RT_DLGINCLUDE", - RT_PLUGPLAY: "RT_PLUGPLAY", - RT_VXD: "RT_VXD", - RT_ANICURSOR: "RT_ANICURSOR", - RT_ANIICON: "RT_ANIICON", - RT_HTML: "RT_HTML", - RT_MANIFEST: "RT_MANIFEST", -} diff --git a/miasm/elfesteem/pe_init.py b/miasm/elfesteem/pe_init.py deleted file mode 100644 index e243cecb..00000000 --- a/miasm/elfesteem/pe_init.py +++ /dev/null @@ -1,603 +0,0 @@ -#! /usr/bin/env python - -from __future__ import print_function - -from builtins import range -import array -from functools import reduce -import logging -import struct - -from future.builtins import int as int_types -from future.utils import PY3 - -from miasm.elfesteem import pe -from miasm.elfesteem.strpatchwork import StrPatchwork - -log = logging.getLogger("peparse") -console_handler = logging.StreamHandler() -console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) -log.addHandler(console_handler) -log.setLevel(logging.WARN) - - -class ContentManager(object): - - def __get__(self, owner, _): - if hasattr(owner, '_content'): - return owner._content - - def __set__(self, owner, new_content): - owner.resize(len(owner._content), len(new_content)) - owner._content = new_content - - def __delete__(self, owner): - self.__set__(owner, None) - - -class ContectRva(object): - - def __init__(self, parent): - self.parent = parent - - def get(self, rva_start, rva_stop=None): - """ - Get data in RVA view starting at @rva_start, stopping at @rva_stop - @rva_start: rva start address - @rva_stop: rva stop address - """ - if rva_start < 0: - raise ValueError("Out of range") - if rva_stop is not None: - if rva_stop > len(self.parent.img_rva): - rva_stop = len(self.parent.img_rva) - if rva_start > len(self.parent.img_rva): - raise ValueError("Out of range") - return self.parent.img_rva[rva_start:rva_stop] - if rva_start > len(self.parent.img_rva): - raise ValueError("Out of range") - return self.parent.img_rva[rva_start] - - def set(self, rva, data): - """ - Set @data in RVA view starting at @start - @rva: rva start address - @data: data to set - """ - if not isinstance(rva, int_types): - raise ValueError('addr must be int/long') - - if rva < 0: - raise ValueError("Out of range") - - if rva + len(data) > len(self.parent.img_rva): - raise ValueError("Out of range") - self.parent.img_rva[rva] = data - - def __getitem__(self, item): - if isinstance(item, slice): - assert(item.step is None) - return self.get(item.start, item.stop) - return self.get(item) - - def __setitem__(self, item, data): - if isinstance(item, slice): - rva = item.start - else: - rva = item - self.set(rva, data) - - -class ContentVirtual(object): - - def __init__(self, parent): - self.parent = parent - - def __getitem__(self, item): - raise DeprecationWarning("Replace code by virt.get(start, [stop])") - - def __setitem__(self, item, data): - raise DeprecationWarning("Replace code by virt.set(start, data)") - - def __call__(self, ad_start, ad_stop=None, ad_step=None): - raise DeprecationWarning("Replace code by virt.get(start, stop)") - - def get(self, virt_start, virt_stop=None): - """ - Get data in VIRTUAL view starting at @virt_start, stopping at @virt_stop - @virt_start: virt start address - @virt_stop: virt stop address - """ - rva_start = self.parent.virt2rva(virt_start) - if virt_stop != None: - rva_stop = self.parent.virt2rva(virt_stop) - else: - rva_stop = None - return self.parent.rva.get(rva_start, rva_stop) - - def set(self, addr, data): - """ - Set @data in VIRTUAL view starting at @start - @addr: virtual start address - @data: data to set - """ - if not isinstance(addr, int_types): - raise ValueError('addr must be int/long') - self.parent.rva.set(self.parent.virt2rva(addr), data) - - def max_addr(self): - section = self.parent.SHList[-1] - length = section.addr + section.size + self.parent.NThdr.ImageBase - return int(length) - - def find(self, pattern, start=0, end=None): - if start != 0: - start = self.parent.virt2rva(start) - if end != None: - end = self.parent.virt2rva(end) - - ret = self.parent.img_rva.find(pattern, start, end) - if ret == -1: - return -1 - return self.parent.rva2virt(ret) - - def rfind(self, pattern, start=0, end=None): - if start != 0: - start = self.parent.virt2rva(start) - if end != None: - end = self.parent.virt2rva(end) - - ret = self.parent.img_rva.rfind(pattern, start, end) - if ret == -1: - return -1 - return self.parent.rva2virt(ret) - - def is_addr_in(self, addr): - return self.parent.is_in_virt_address(addr) - - - -def compute_crc(raw, olds): - out = 0 - data = raw[:] - if len(raw) % 2: - end = struct.unpack('B', data[-1])[0] - data = data[:-1] - if (len(raw) & ~0x1) % 4: - out += struct.unpack('H', data[:2])[0] - data = data[2:] - data = array.array('I', data) - out = reduce(lambda x, y: x + y, data, out) - out -= olds - while out > 0xFFFFFFFF: - out = (out >> 32) + (out & 0xFFFFFFFF) - while out > 0xFFFF: - out = (out & 0xFFFF) + ((out >> 16) & 0xFFFF) - if len(raw) % 2: - out += end - out += len(data) - return out - - - -# PE object -class PE(object): - content = ContentManager() - - def __init__(self, pestr=None, - loadfrommem=False, - parse_resources=True, - parse_delay=True, - parse_reloc=True, - wsize=32): - self._rva = ContectRva(self) - self._virt = ContentVirtual(self) - self.img_rva = StrPatchwork() - if pestr is None: - self._content = StrPatchwork() - self._sex = 0 - self._wsize = wsize - self.Doshdr = pe.Doshdr(self) - self.NTsig = pe.NTsig(self) - self.Coffhdr = pe.Coffhdr(self) - - if self._wsize == 32: - Opthdr = pe.Opthdr32 - else: - Opthdr = pe.Opthdr64 - - self.Opthdr = Opthdr(self) - self.NThdr = pe.NThdr(self) - self.NThdr.optentries = [pe.Optehdr(self) for _ in range(0x10)] - self.NThdr.CheckSum = 0 - self.SHList = pe.SHList(self) - self.SHList.shlist = [] - - self.NThdr.sizeofheaders = 0x1000 - - self.DirImport = pe.DirImport(self) - self.DirExport = pe.DirExport(self) - self.DirDelay = pe.DirDelay(self) - self.DirReloc = pe.DirReloc(self) - self.DirRes = pe.DirRes(self) - - self.Doshdr.magic = 0x5a4d - self.Doshdr.lfanew = 0xe0 - - self.NTsig.signature = 0x4550 - if wsize == 32: - self.Opthdr.magic = 0x10b - elif wsize == 64: - self.Opthdr.magic = 0x20b - else: - raise ValueError('unknown pe size %r' % wsize) - self.Opthdr.majorlinkerversion = 0x7 - self.Opthdr.minorlinkerversion = 0x0 - self.NThdr.filealignment = 0x1000 - self.NThdr.sectionalignment = 0x1000 - self.NThdr.majoroperatingsystemversion = 0x5 - self.NThdr.minoroperatingsystemversion = 0x1 - self.NThdr.MajorImageVersion = 0x5 - self.NThdr.MinorImageVersion = 0x1 - self.NThdr.majorsubsystemversion = 0x4 - self.NThdr.minorsubsystemversion = 0x0 - self.NThdr.subsystem = 0x3 - if wsize == 32: - self.NThdr.dllcharacteristics = 0x8000 - else: - self.NThdr.dllcharacteristics = 0x8000 - - # for createthread - self.NThdr.sizeofstackreserve = 0x200000 - self.NThdr.sizeofstackcommit = 0x1000 - self.NThdr.sizeofheapreserve = 0x100000 - self.NThdr.sizeofheapcommit = 0x1000 - - self.NThdr.ImageBase = 0x400000 - self.NThdr.sizeofheaders = 0x1000 - self.NThdr.numberofrvaandsizes = 0x10 - - self.NTsig.signature = 0x4550 - if wsize == 32: - self.Coffhdr.machine = 0x14c - elif wsize == 64: - self.Coffhdr.machine = 0x8664 - else: - raise ValueError('unknown pe size %r' % wsize) - if wsize == 32: - self.Coffhdr.characteristics = 0x10f - self.Coffhdr.sizeofoptionalheader = 0xe0 - else: - self.Coffhdr.characteristics = 0x22 # 0x2f - self.Coffhdr.sizeofoptionalheader = 0xf0 - - else: - self._content = StrPatchwork(pestr) - self.loadfrommem = loadfrommem - self.parse_content(parse_resources=parse_resources, - parse_delay=parse_delay, - parse_reloc=parse_reloc) - - def isPE(self): - if self.NTsig is None: - return False - return self.NTsig.signature == 0x4550 - - def parse_content(self, - parse_resources=True, - parse_delay=True, - parse_reloc=True): - off = 0 - self._sex = 0 - self._wsize = 32 - self.Doshdr = pe.Doshdr.unpack(self.content, off, self) - off = self.Doshdr.lfanew - if off > len(self.content): - log.warn('ntsig after eof!') - self.NTsig = None - return - self.NTsig = pe.NTsig.unpack(self.content, - off, self) - self.DirImport = None - self.DirExport = None - self.DirDelay = None - self.DirReloc = None - self.DirRes = None - - if self.NTsig.signature != 0x4550: - log.warn('not a valid pe!') - return - off += len(self.NTsig) - self.Coffhdr, length = pe.Coffhdr.unpack_l(self.content, - off, - self) - - off += length - self._wsize = ord(self.content[off+1]) * 32 - - if self._wsize == 32: - Opthdr = pe.Opthdr32 - else: - Opthdr = pe.Opthdr64 - - if len(self.content) < 0x200: - # Fix for very little PE - self.content += (0x200 - len(self.content)) * b'\x00' - - self.Opthdr, length = Opthdr.unpack_l(self.content, off, self) - self.NThdr = pe.NThdr.unpack(self.content, off + length, self) - self.img_rva[0] = self.content[:self.NThdr.sizeofheaders] - off += self.Coffhdr.sizeofoptionalheader - self.SHList = pe.SHList.unpack(self.content, off, self) - - # load section data - filealignment = self.NThdr.filealignment - sectionalignment = self.NThdr.sectionalignment - for section in self.SHList.shlist: - virt_size = (section.size // sectionalignment + 1) * sectionalignment - if self.loadfrommem: - section.offset = section.addr - if self.NThdr.sectionalignment > 0x1000: - raw_off = 0x200 * (section.offset // 0x200) - else: - raw_off = section.offset - if raw_off != section.offset: - log.warn('unaligned raw section (%x %x)!', raw_off, section.offset) - section.data = StrPatchwork() - - if section.rawsize == 0: - rounded_size = 0 - else: - if section.rawsize % filealignment: - rs = (section.rawsize // filealignment + 1) * filealignment - else: - rs = section.rawsize - rounded_size = rs - if rounded_size > virt_size: - rounded_size = min(rounded_size, section.size) - data = self.content[raw_off:raw_off + rounded_size] - section.data = data - # Pad data to page size 0x1000 - length = len(data) - data += b"\x00" * ((((length + 0xfff)) & 0xFFFFF000) - length) - self.img_rva[section.addr] = data - # Fix img_rva - self.img_rva = self.img_rva - - try: - self.DirImport = pe.DirImport.unpack(self.img_rva, - self.NThdr.optentries[ - pe.DIRECTORY_ENTRY_IMPORT].rva, - self) - except pe.InvalidOffset: - log.warning('cannot parse DirImport, skipping') - self.DirImport = pe.DirImport(self) - - try: - self.DirExport = pe.DirExport.unpack(self.img_rva, - self.NThdr.optentries[ - pe.DIRECTORY_ENTRY_EXPORT].rva, - self) - except pe.InvalidOffset: - log.warning('cannot parse DirExport, skipping') - self.DirExport = pe.DirExport(self) - - if len(self.NThdr.optentries) > pe.DIRECTORY_ENTRY_DELAY_IMPORT: - self.DirDelay = pe.DirDelay(self) - if parse_delay: - try: - self.DirDelay = pe.DirDelay.unpack(self.img_rva, - self.NThdr.optentries[ - pe.DIRECTORY_ENTRY_DELAY_IMPORT].rva, - self) - except pe.InvalidOffset: - log.warning('cannot parse DirDelay, skipping') - if len(self.NThdr.optentries) > pe.DIRECTORY_ENTRY_BASERELOC: - self.DirReloc = pe.DirReloc(self) - if parse_reloc: - try: - self.DirReloc = pe.DirReloc.unpack(self.img_rva, - self.NThdr.optentries[ - pe.DIRECTORY_ENTRY_BASERELOC].rva, - self) - except pe.InvalidOffset: - log.warning('cannot parse DirReloc, skipping') - if len(self.NThdr.optentries) > pe.DIRECTORY_ENTRY_RESOURCE: - self.DirRes = pe.DirRes(self) - if parse_resources: - self.DirRes = pe.DirRes(self) - try: - self.DirRes = pe.DirRes.unpack(self.img_rva, - self.NThdr.optentries[ - pe.DIRECTORY_ENTRY_RESOURCE].rva, - self) - except pe.InvalidOffset: - log.warning('cannot parse DirRes, skipping') - - def resize(self, old, new): - pass - - def __getitem__(self, item): - return self.content[item] - - def __setitem__(self, item, data): - self.content.__setitem__(item, data) - return - - def getsectionbyrva(self, rva): - if self.SHList is None: - return None - for section in self.SHList.shlist: - """ - TODO CHECK: - some binaries have import rva outside section, but addresses - seems to be rounded - """ - mask = self.NThdr.sectionalignment - 1 - if section.addr <= rva < (section.addr + section.size + mask) & ~(mask): - return section - return None - - def getsectionbyvad(self, vad): - return self.getsectionbyrva(self.virt2rva(vad)) - - def getsectionbyoff(self, off): - if self.SHList is None: - return None - for section in self.SHList.shlist: - if section.offset <= off < section.offset + section.rawsize: - return section - return None - - def getsectionbyname(self, name): - if self.SHList is None: - return None - for section in self.SHList: - if section.name.strip(b'\x00').decode() == name: - return section - return None - - def is_rva_ok(self, rva): - return self.getsectionbyrva(rva) is not None - - def rva2off(self, rva): - # Special case rva in header - if rva < self.NThdr.sizeofheaders: - return rva - section = self.getsectionbyrva(rva) - if section is None: - raise pe.InvalidOffset('cannot get offset for 0x%X' % rva) - soff = (section.offset // self.NThdr.filealignment) * self.NThdr.filealignment - return rva - section.addr + soff - - def off2rva(self, off): - section = self.getsectionbyoff(off) - if section is None: - return - return off - section.offset + section.addr - - def virt2rva(self, virt): - if virt is None: - return - return virt - self.NThdr.ImageBase - - def rva2virt(self, rva): - if rva is None: - return - return rva + self.NThdr.ImageBase - - def virt2off(self, virt): - return self.rva2off(self.virt2rva(virt)) - - def off2virt(self, off): - return self.rva2virt(self.off2rva(off)) - - def is_in_virt_address(self, addr): - if addr < self.NThdr.ImageBase: - return False - addr = self.virt2rva(addr) - for section in self.SHList.shlist: - if section.addr <= addr < section.addr + section.size: - return True - return False - - def get_drva(self): - print('Deprecated: Use PE.rva instead of PE.drva') - return self._rva - - def get_rva(self): - return self._rva - - # TODO XXX remove drva api - drva = property(get_drva) - rva = property(get_rva) - - def get_virt(self): - return self._virt - - virt = property(get_virt) - - def build_content(self): - - content = StrPatchwork() - content[0] = bytes(self.Doshdr) - - for section in self.SHList.shlist: - content[section.offset:section.offset + section.rawsize] = bytes(section.data) - - # fix image size - section_last = self.SHList.shlist[-1] - size = section_last.addr + section_last.size + (self.NThdr.sectionalignment - 1) - size &= ~(self.NThdr.sectionalignment - 1) - self.NThdr.sizeofimage = size - - off = self.Doshdr.lfanew - content[off] = bytes(self.NTsig) - off += len(self.NTsig) - content[off] = bytes(self.Coffhdr) - off += len(self.Coffhdr) - off_shlist = off + self.Coffhdr.sizeofoptionalheader - content[off] = bytes(self.Opthdr) - off += len(self.Opthdr) - content[off] = bytes(self.NThdr) - off += len(self.NThdr) - # content[off] = bytes(self.Optehdr) - - off = off_shlist - content[off] = bytes(self.SHList) - - for section in self.SHList: - if off + len(bytes(self.SHList)) > section.offset: - log.warn("section offset overlap pe hdr 0x%x 0x%x" % - (off + len(bytes(self.SHList)), section.offset)) - self.DirImport.build_content(content) - self.DirExport.build_content(content) - self.DirDelay.build_content(content) - self.DirReloc.build_content(content) - self.DirRes.build_content(content) - - if (self.Doshdr.lfanew + len(self.NTsig) + len(self.Coffhdr)) % 4: - log.warn("non aligned coffhdr, bad crc calculation") - crcs = compute_crc(bytes(content), self.NThdr.CheckSum) - content[self.Doshdr.lfanew + len(self.NTsig) + len(self.Coffhdr) + 64] = struct.pack('I', crcs) - return bytes(content) - - def __bytes__(self): - return self.build_content() - - def __str__(self): - if PY3: - return repr(self) - return self.__bytes__() - - def export_funcs(self): - if self.DirExport is None: - print('no export dir found') - return None, None - - all_func = {} - for i, export in enumerate(self.DirExport.f_names): - all_func[export.name.name] = self.rva2virt( - self.DirExport.f_address[self.DirExport.f_nameordinals[i].ordinal].rva) - all_func[self.DirExport.f_nameordinals[i].ordinal + self.DirExport.expdesc.base] = self.rva2virt( - self.DirExport.f_address[self.DirExport.f_nameordinals[i].ordinal].rva) - # XXX todo: test if redirected export - return all_func - - def reloc_to(self, imgbase): - offset = imgbase - self.NThdr.ImageBase - if self.DirReloc is None: - log.warn('no relocation found!') - for rel in self.DirReloc.reldesc: - rva = rel.rva - for reloc in rel.rels: - reloc_type, off = reloc.rel - if reloc_type == 0 and off == 0: - continue - if reloc_type != 3: - raise NotImplementedError('Reloc type not supported') - off += rva - value = struct.unpack('I', self.rva.get(off, off + 4))[0] - value += offset - self.rva.set(off, struct.pack('I', value & 0xFFFFFFFF)) - self.NThdr.ImageBase = imgbase diff --git a/miasm/elfesteem/strpatchwork.py b/miasm/elfesteem/strpatchwork.py deleted file mode 100644 index e1a5de91..00000000 --- a/miasm/elfesteem/strpatchwork.py +++ /dev/null @@ -1,106 +0,0 @@ -from array import array -import struct -from sys import maxsize - -from future.utils import PY3 - -if PY3: - - def array_frombytes(arr, value): - return arr.frombytes(value) - - def array_tobytes(arr): - return arr.tobytes() - - -else: - - def array_frombytes(arr, value): - return arr.fromstring(value) - - def array_tobytes(arr): - return arr.tostring() - - -class StrPatchwork(object): - - def __init__(self, s=b"", paddingbyte=b"\x00"): - s_raw = bytes(s) - val = array("B") - array_frombytes(val, s_raw) - self.s = val - # cache s to avoid rebuilding str after each find - self.s_cache = s_raw - self.paddingbyte = paddingbyte - - def __bytes__(self): - return array_tobytes(self.s) - - def __str__(self): - if PY3: - return repr(self) - return self.__bytes__() - - def __getitem__(self, item): - s = self.s - if isinstance(item, slice): - end = item.stop - l = len(s) - if (end is not None and l < end) and end != maxsize: - # XXX hack [x:] give 2GB limit - # This is inefficient but avoids complicated maths if step is - # not 1 - s = s[:] - - tmp = array("B") - array_frombytes(tmp, self.paddingbyte * (end - l)) - s.extend(tmp) - r = s[item] - return array_tobytes(r) - - else: - if item > len(s): - return self.paddingbyte - else: - return struct.pack("B", s[item]) - - def __setitem__(self, item, val): - if val is None: - return - val_array = array("B") - array_frombytes(val_array, bytes(val)) - if type(item) is not slice: - item = slice(item, item + len(val_array)) - end = item.stop - l = len(self.s) - if l < end: - tmp = array("B") - array_frombytes(tmp, self.paddingbyte * (end - l)) - self.s.extend(tmp) - self.s[item] = val_array - self.s_cache = None - - def __repr__(self): - return "" % array_tobytes(self.s) - - def __len__(self): - return len(self.s) - - def __contains__(self, val): - return val in bytes(self) - - def __iadd__(self, other): - tmp = array("B") - array_frombytes(tmp, bytes(other)) - self.s.extend(tmp) - return self - - def find(self, pattern, start=0, end=None): - if not self.s_cache: - self.s_cache = array_tobytes(self.s) - return self.s_cache.find(pattern, start, end) - - def rfind(self, pattern, start=0, end=None): - if not self.s_cache: - self.s_cache = array_tobytes(self.s) - return self.s_cache.rfind(pattern, start, end) diff --git a/miasm/jitter/loader/elf.py b/miasm/jitter/loader/elf.py index 4c68fc91..b70872df 100644 --- a/miasm/jitter/loader/elf.py +++ b/miasm/jitter/loader/elf.py @@ -3,9 +3,9 @@ from collections import defaultdict from future.utils import viewitems -from miasm.elfesteem import cstruct -from miasm.elfesteem import * -import miasm.elfesteem.elf as elf_csts +from miasm.loader import cstruct +from miasm.loader import * +import miasm.loader.elf as elf_csts from miasm.jitter.csts import * from miasm.jitter.loader.utils import canon_libname_libfunc, libimp @@ -56,11 +56,11 @@ def preload_elf(vm, e, runtime_lib, patch_vm_imp=True, loc_db=None): return runtime_lib, dyn_funcs def fill_loc_db_with_symbols(elf, loc_db, base_addr=0): - """Parse the miasm.elfesteem's ELF @elf to extract symbols, and fill the LocationDB + """Parse the miasm.loader's ELF @elf to extract symbols, and fill the LocationDB instance @loc_db with parsed symbols. The ELF is considered mapped at @base_addr - @elf: miasm.elfesteem's ELF instance + @elf: miasm.loader's ELF instance @loc_db: LocationDB used to retrieve symbols'offset @base_addr: addr to reloc to (if any) """ @@ -163,7 +163,7 @@ def fill_loc_db_with_symbols(elf, loc_db, base_addr=0): def apply_reloc_x86(elf, vm, section, base_addr, loc_db): """Apply relocation for x86 ELF contained in the section @section - @elf: miasm.elfesteem's ELF instance + @elf: miasm.loader's ELF instance @vm: VmMngr instance @section: elf's section containing relocation to perform @base_addr: addr to reloc to diff --git a/miasm/jitter/loader/pe.py b/miasm/jitter/loader/pe.py index a2bdd3ac..63a56d38 100644 --- a/miasm/jitter/loader/pe.py +++ b/miasm/jitter/loader/pe.py @@ -6,9 +6,9 @@ from collections import defaultdict from future.utils import viewitems, viewvalues -from miasm.elfesteem import pe -from miasm.elfesteem import cstruct -from miasm.elfesteem import * +from miasm.loader import pe +from miasm.loader import cstruct +from miasm.loader import * from miasm.jitter.csts import * from miasm.jitter.loader.utils import canon_libname_libfunc, libimp diff --git a/miasm/loader/__init__.py b/miasm/loader/__init__.py new file mode 100644 index 00000000..1a602f38 --- /dev/null +++ b/miasm/loader/__init__.py @@ -0,0 +1,3 @@ +#!/usr/bin/env python + +__all__ = ['pe_init', 'elf_init', 'strpatchwork'] diff --git a/miasm/loader/cstruct.py b/miasm/loader/cstruct.py new file mode 100644 index 00000000..06d2e002 --- /dev/null +++ b/miasm/loader/cstruct.py @@ -0,0 +1,154 @@ +#! /usr/bin/env python + +from __future__ import print_function +from builtins import zip +from functools import reduce +import struct + +from future.utils import PY3 + +type_size = {} +size2type = {} +for t in 'B', 'H', 'I', 'Q': + s = struct.calcsize(t) + type_size[t] = s * 8 + size2type[s * 8] = t + +type_size['u08'] = size2type[8] +type_size['u16'] = size2type[16] +type_size['u32'] = size2type[32] +type_size['u64'] = size2type[64] + + +def fix_size(fields, wsize): + out = [] + for name, v in fields: + if v.endswith("s"): + pass + elif v == "ptr": + v = size2type[wsize] + elif not v in type_size: + raise ValueError("unknown Cstruct type", v) + else: + v = type_size[v] + out.append((name, v)) + fields = out + return fields + + +class Cstruct_Metaclass(type): + + def __new__(cls, name, bases, dct): + o = super(Cstruct_Metaclass, cls).__new__(cls, name, bases, dct) + o._packstring = o._packformat + \ + "".join(x[1] for x in o._fields) + o._size = struct.calcsize(o._packstring) + return o + + +class CStruct(object): + #__metaclass__ = Cstruct_Metaclass + _packformat = "" + _fields = [] + + @classmethod + def _from_file(cls, f): + return cls(f.read(cls._size)) + + def __init__(self, sex, wsize, *args, **kargs): + if sex == 1: + sex = '<' + else: + sex = '>' + # packformat enforce sex + if self._packformat: + sex = "" + pstr = fix_size(self._fields, wsize) + self._packstring = sex + self._packformat + \ + "".join(x[1] for x in pstr) + self._size = struct.calcsize(self._packstring) + + self._names = [x[0] for x in self._fields] + if kargs: + self.__dict__.update(kargs) + else: + if args: + s = args[0] + else: + s = b"" + s += b"\x00" * self._size + s = s[:self._size] + self._unpack(s) + + def _unpack(self, s): + disas = struct.unpack(self._packstring, s) + for n, v in zip(self._names, disas): + setattr(self, n, v) + + def _pack(self): + return struct.pack(self._packstring, + *(getattr(self, x) for x in self._names)) + + def _spack(self, superstruct, shift=0): + attr = [] + for name in self._names: + s = getattr(self, name) + if isinstance(s, CStruct): + if s in superstruct: + s = reduce(lambda x, y: x + len(y), + superstruct[:superstruct.index(s)], + 0) + s += shift + else: + raise Exception("%r is not a superstructure" % s) + attr.append(s) + return struct.pack(self._packstring, *attr) + + def _copy(self): + return self.__class__(**self.__dict__) + + def __len__(self): + return self._size + + def __str__(self): + if PY3: + return repr(self) + return self.__bytes__() + + def __bytes__(self): + return self._pack() + + def __repr__(self): + return "<%s=%s>" % (self.__class__.__name__, "/".join(repr( + getattr(self, x[0])) for x in self._fields + )) + + def __getitem__(self, item): # to work with format strings + return getattr(self, item) + + def _show(self): + print("##%s:" % self.__class__.__name__) + fmt = "%%-%is = %%r" % max(len(x[0]) for x in self._fields) + for fn, ft in self._fields: + print(fmt % (fn, getattr(self, fn))) + + +class CStructStruct(object): + + def __init__(self, lst, shift=0): + self._lst = lst + self._shift = shift + + def __getattr__(self, attr): + return getattr(self._lst, attr) + + def __str__(self): + if PY3: + return repr(self) + return self.__bytes__() + + def __bytes__(self): + return b"".join( + a if isinstance(a, bytes) else a._spack(self._lst, self._shift) + for a in self._lst + ) diff --git a/miasm/loader/elf.py b/miasm/loader/elf.py new file mode 100644 index 00000000..74258782 --- /dev/null +++ b/miasm/loader/elf.py @@ -0,0 +1,1538 @@ +#! /usr/bin/env python + +from miasm.loader.cstruct import CStruct + +class Ehdr(CStruct): + _fields = [ ("ident","16s"), + ("type","u16"), + ("machine","u16"), + ("version","u32"), + ("entry","ptr"), + ("phoff","ptr"), + ("shoff","ptr"), + ("flags","u32"), + ("ehsize","u16"), + ("phentsize","u16"), + ("phnum","u16"), + ("shentsize","u16"), + ("shnum","u16"), + ("shstrndx","u16") ] + + +class Shdr(CStruct): + _fields = [ ("name","u32"), + ("type","u32"), + ("flags","ptr"), + ("addr","ptr"), + ("offset","ptr"), + ("size","ptr"), + ("link","u32"), + ("info","u32"), + ("addralign","ptr"), + ("entsize","ptr") ] + +class Phdr(CStruct): + _fields = [ ("type","u32"), + ("offset","u32"), + ("vaddr","u32"), + ("paddr","u32"), + ("filesz","u32"), + ("memsz","u32"), + ("flags","u32"), + ("align","u32") ] + +class Phdr64(CStruct): + _fields = [ ("type","u32"), + ("flags","u32"), + ("offset","ptr"), + ("vaddr","ptr"), + ("paddr","ptr"), + ("filesz","ptr"), + ("memsz","ptr"), + ("align","ptr") ] + +class Nhdr(CStruct): + _fields = [ ("namesz","u32"), + ("descsz","u32"), + ("type", "u32") ] + + +class Sym32(CStruct): + _fields = [ ("name","u32"), + ("value","u32"), + ("size","u32"), + ("info","u08"), + ("other","u08"), + ("shndx","u16") ] + +class Sym64(CStruct): + _fields = [ ("name","u32"), + ("info","u08"), + ("other","u08"), + ("shndx","u16"), + ("value","u64"), + ("size","u64") ] + +class Dym(CStruct): + _fields = [ ("tag","u32"), + ("val","u32") ] + +class Rel32(CStruct): + _fields = [ ("offset","ptr"), + ("info","u32") ] + +class Rel64(CStruct): + _fields = [ ("offset","ptr"), + ("info","u64") ] + +class Rela32(CStruct): + _fields = [ ("offset","ptr"), + ("info","u32"), + ("addend","ptr") ] + +class Rela64(CStruct): + _fields = [ ("offset","ptr"), + ("info","u64"), + ("addend","ptr") ] + +class Dynamic(CStruct): + _fields = [ ("type","ptr"), + ("name","ptr") ] + + +# Legal values for e_ident (identification indexes) + +EI_MAG0 = 0 # File identification +EI_MAG1 = 1 # File identification +EI_MAG2 = 2 # File identification +EI_MAG3 = 3 # File identification +EI_CLASS = 4 # File class +EI_DATA = 5 # Data encoding +EI_VERSION = 6 # File version +EI_OSABI = 7 # Operating system/ABI identification +EI_ABIVERSION = 8 # ABI version +EI_PAD = 9 # Start of padding bytes +EI_NIDENT = 16 # Size of e_ident[] + +# Legal values for e_ident[EI_CLASS] + +ELFCLASSNONE = 0 # Invalid class +ELFCLASS32 = 1 # 32-bit objects +ELFCLASS64 = 2 # 64-bit objects + +# Legal values for e_ident[EI_DATA] + +ELFDATANONE = 0 # Invalid data encoding +ELFDATA2LSB = 1 # Least significant byte at lowest address +ELFDATA2MSB = 2 # Most significant byte at lowest address + +# Legal values for e_type (object file type). + +ET_NONE = 0 # No file type +ET_REL = 1 # Relocatable file +ET_EXEC = 2 # Executable file +ET_DYN = 3 # Shared object file +ET_CORE = 4 # Core file +ET_NUM = 5 # Number of defined types +ET_LOOS = 0xfe00 # OS-specific range start +ET_HIOS = 0xfeff # OS-specific range end +ET_LOPROC = 0xff00 # Processor-specific range start +ET_HIPROC = 0xffff # Processor-specific range end + +# Legal values for e_machine (architecture). + +EM_NONE = 0 # No machine +EM_M32 = 1 # AT&T WE 32100 +EM_SPARC = 2 # SUN SPARC +EM_386 = 3 # Intel 80386 +EM_68K = 4 # Motorola m68k family +EM_88K = 5 # Motorola m88k family +EM_486 = 6 # Intel 80486 +EM_860 = 7 # Intel 80860 +EM_MIPS = 8 # MIPS R3000 big-endian +EM_S370 = 9 # IBM System/370 +EM_MIPS_RS3_LE = 10 # MIPS R3000 little-endian + +EM_PARISC = 15 # HPPA +EM_VPP500 = 17 # Fujitsu VPP500 +EM_SPARC32PLUS = 18 # Sun's "v8plus" +EM_960 = 19 # Intel 80960 +EM_PPC = 20 # PowerPC +EM_PPC64 = 21 # PowerPC 64-bit +EM_S390 = 22 # IBM S390 + +EM_V800 = 36 # NEC V800 series +EM_FR20 = 37 # Fujitsu FR20 +EM_RH32 = 38 # TRW RH-32 +EM_RCE = 39 # Motorola RCE +EM_ARM = 40 # ARM +EM_FAKE_ALPHA = 41 # Digital Alpha +EM_SH = 42 # Hitachi SH +EM_SPARCV9 = 43 # SPARC v9 64-bit +EM_TRICORE = 44 # Siemens Tricore +EM_ARC = 45 # Argonaut RISC Core +EM_H8_300 = 46 # Hitachi H8/300 +EM_H8_300H = 47 # Hitachi H8/300H +EM_H8S = 48 # Hitachi H8S +EM_H8_500 = 49 # Hitachi H8/500 +EM_IA_64 = 50 # Intel Merced +EM_MIPS_X = 51 # Stanford MIPS-X +EM_COLDFIRE = 52 # Motorola Coldfire +EM_68HC12 = 53 # Motorola M68HC12 +EM_MMA = 54 # Fujitsu MMA Multimedia Accelerator*/ +EM_PCP = 55 # Siemens PCP +EM_NCPU = 56 # Sony nCPU embeeded RISC +EM_NDR1 = 57 # Denso NDR1 microprocessor +EM_STARCORE = 58 # Motorola Start*Core processor +EM_ME16 = 59 # Toyota ME16 processor +EM_ST100 = 60 # STMicroelectronic ST100 processor +EM_TINYJ = 61 # Advanced Logic Corp. Tinyj emb.fam*/ +EM_X86_64 = 62 # AMD x86-64 architecture +EM_AARCH64 = 183 # Aarch64 architecture +EM_PDSP = 63 # Sony DSP Processor + +EM_FX66 = 66 # Siemens FX66 microcontroller +EM_ST9PLUS = 67 # STMicroelectronics ST9+ 8/16 mc +EM_ST7 = 68 # STmicroelectronics ST7 8 bit mc +EM_68HC16 = 69 # Motorola MC68HC16 microcontroller +EM_68HC11 = 70 # Motorola MC68HC11 microcontroller +EM_68HC08 = 71 # Motorola MC68HC08 microcontroller +EM_68HC05 = 72 # Motorola MC68HC05 microcontroller +EM_SVX = 73 # Silicon Graphics SVx +EM_ST19 = 74 # STMicroelectronics ST19 8 bit mc +EM_VAX = 75 # Digital VAX +EM_CRIS = 76 # Axis Communications 32-bit embedded processor +EM_JAVELIN = 77 # Infineon Technologies 32-bit embedded processor +EM_FIREPATH = 78 # Element 14 64-bit DSP Processor +EM_ZSP = 79 # LSI Logic 16-bit DSP Processor +EM_MMIX = 80 # Donald Knuth's educational 64-bit processor +EM_HUANY = 81 # Harvard University machine-independent object files +EM_PRISM = 82 # SiTera Prism +EM_AVR = 83 # Atmel AVR 8-bit microcontroller +EM_FR30 = 84 # Fujitsu FR30 +EM_D10V = 85 # Mitsubishi D10V +EM_D30V = 86 # Mitsubishi D30V +EM_V850 = 87 # NEC v850 +EM_M32R = 88 # Mitsubishi M32R +EM_MN10300 = 89 # Matsushita MN10300 +EM_MN10200 = 90 # Matsushita MN10200 +EM_PJ = 91 # picoJava +EM_OPENRISC = 92 # OpenRISC 32-bit embedded processor +EM_ARC_A5 = 93 # ARC Cores Tangent-A5 +EM_XTENSA = 94 # Tensilica Xtensa Architecture + +EM_ALPHA = 0x9026 + +# Legal values for sh_type (section type). + +SHT_NULL = 0 # Section header table entry unused +SHT_PROGBITS = 1 # Program data +SHT_SYMTAB = 2 # Symbol table +SHT_STRTAB = 3 # String table +SHT_RELA = 4 # Relocation entries with addends +SHT_HASH = 5 # Symbol hash table +SHT_DYNAMIC = 6 # Dynamic linking information +SHT_NOTE = 7 # Notes +SHT_NOBITS = 8 # Program space with no data (bss) +SHT_REL = 9 # Relocation entries, no addends +SHT_SHLIB = 10 # Reserved +SHT_DYNSYM = 11 # Dynamic linker symbol table +SHT_INIT_ARRAY = 14 # Array of constructors +SHT_FINI_ARRAY = 15 # Array of destructors +SHT_PREINIT_ARRAY = 16 # Array of pre-constructors +SHT_GROUP = 17 # Section group +SHT_SYMTAB_SHNDX = 18 # Extended section indices +SHT_NUM = 19 # Number of defined types. +SHT_LOOS = 0x60000000 # Start OS-specific +SHT_GNU_LIBLIST = 0x6ffffff7 # Prelink library list +SHT_CHECKSUM = 0x6ffffff8 # Checksum for DSO content. +SHT_LOSUNW = 0x6ffffffa # Sun-specific low bound. +SHT_SUNW_move = 0x6ffffffa +SHT_SUNW_COMDAT = 0x6ffffffb +SHT_SUNW_syminfo = 0x6ffffffc +SHT_GNU_verdef = 0x6ffffffd # Version definition section. +SHT_GNU_verneed = 0x6ffffffe # Version needs section. +SHT_GNU_versym = 0x6fffffff # Version symbol table. +SHT_HISUNW = 0x6fffffff # Sun-specific high bound. +SHT_HIOS = 0x6fffffff # End OS-specific type +SHT_LOPROC = 0x70000000 # Start of processor-specific +SHT_HIPROC = 0x7fffffff # End of processor-specific +SHT_LOUSER = 0x80000000 # Start of application-specific +SHT_HIUSER = 0x8fffffff # End of application-specific + +# Legal values for sh_flags (section flags). + +SHF_WRITE = (1 << 0) # Writable +SHF_ALLOC = (1 << 1) # Occupies memory during execution +SHF_EXECINSTR = (1 << 2) # Executable +SHF_MERGE = (1 << 4) # Might be merged +SHF_STRINGS = (1 << 5) # Contains nul-terminated strings +SHF_INFO_LINK = (1 << 6) # `sh_info' contains SHT index +SHF_LINK_ORDER = (1 << 7) # Preserve order after combining +SHF_OS_NONCONFORMING = (1 << 8) # Non-standard OS specific handling required +SHF_GROUP = (1 << 9) # Section is member of a group. +SHF_TLS = (1 << 10) # Section hold thread-local data. +SHF_MASKOS = 0x0ff00000 # OS-specific. +SHF_MASKPROC = 0xf0000000 # Processor-specific + +# Section group handling. + +GRP_COMDAT = 0x1 # Mark group as COMDAT. + +# Legal values for p_type (segment type). + +PT_NULL = 0 # Program header table entry unused +PT_LOAD = 1 # Loadable program segment +PT_DYNAMIC = 2 # Dynamic linking information +PT_INTERP = 3 # Program interpreter +PT_NOTE = 4 # Auxiliary information +PT_SHLIB = 5 # Reserved +PT_PHDR = 6 # Entry for header table itself +PT_TLS = 7 # Thread-local storage segment +PT_NUM = 8 # Number of defined types +PT_LOOS = 0x60000000 # Start of OS-specific +PT_GNU_EH_FRAME = 0x6474e550 # GCC .eh_frame_hdr segment +PT_GNU_STACK = 0x6474e551 # Indicates stack executability +PT_LOSUNW = 0x6ffffffa +PT_SUNWBSS = 0x6ffffffa # Sun Specific segment +PT_SUNWSTACK = 0x6ffffffb # Stack segment +PT_HISUNW = 0x6fffffff +PT_HIOS = 0x6fffffff # End of OS-specific +PT_LOPROC = 0x70000000 # Start of processor-specific +PT_HIPROC = 0x7fffffff # End of processor-specific + +# Legal values for p_flags (segment flags). + +PF_X = (1 << 0) # Segment is executable +PF_W = (1 << 1) # Segment is writable +PF_R = (1 << 2) # Segment is readable +PF_MASKOS = 0x0ff00000 # OS-specific +PF_MASKPROC = 0xf0000000 # Processor-specific + +# Legal values for note segment descriptor types for core files. + +NT_PRSTATUS = 1 # Contains copy of prstatus struct +NT_FPREGSET = 2 # Contains copy of fpregset struct +NT_PRPSINFO = 3 # Contains copy of prpsinfo struct +NT_PRXREG = 4 # Contains copy of prxregset struct +NT_TASKSTRUCT = 4 # Contains copy of task structure +NT_PLATFORM = 5 # String from sysinfo(SI_PLATFORM) +NT_AUXV = 6 # Contains copy of auxv array +NT_GWINDOWS = 7 # Contains copy of gwindows struct +NT_ASRS = 8 # Contains copy of asrset struct +NT_PSTATUS = 10 # Contains copy of pstatus struct +NT_PSINFO = 13 # Contains copy of psinfo struct +NT_PRCRED = 14 # Contains copy of prcred struct +NT_UTSNAME = 15 # Contains copy of utsname struct +NT_LWPSTATUS = 16 # Contains copy of lwpstatus struct +NT_LWPSINFO = 17 # Contains copy of lwpinfo struct +NT_PRFPXREG = 20 # Contains copy of fprxregset struct + +# Legal values for the note segment descriptor types for object files. + +NT_VERSION = 1 # Contains a version string. + +# Legal values for ST_BIND subfield of st_info (symbol binding). +# bind = Sym.info >> 4 +# val = Sym.info 0xf + +STB_LOCAL = 0 # Local symbol +STB_GLOBAL = 1 # Global symbol +STB_WEAK = 2 # Weak symbol +STB_NUM = 3 # Number of defined types. +STB_LOOS = 10 # Start of OS-specific +STB_HIOS = 12 # End of OS-specific +STB_LOPROC = 13 # Start of processor-specific +STB_HIPROC = 15 # End of processor-specific + +#Legal values for ST_TYPE subfield of st_info (symbol type). + +STT_NOTYPE = 0 # Symbol type is unspecified +STT_OBJECT = 1 # Symbol is a data object +STT_FUNC = 2 # Symbol is a code object +STT_SECTION = 3 # Symbol associated with a section +STT_FILE = 4 # Symbol's name is file name +STT_COMMON = 5 # Symbol is a common data object +STT_TLS = 6 # Symbol is thread-local data object*/ +STT_NUM = 7 # Number of defined types. +STT_LOOS = 10 # Start of OS-specific +STT_GNU_IFUNC = 10 # Symbol is indirect code object +STT_HIOS = 12 # End of OS-specific +STT_LOPROC = 13 # Start of processor-specific +STT_HIPROC = 15 # End of processor-specific + +# Legal values for d_tag (dynamic entry type). + +DT_NULL = 0 # Marks end of dynamic section +DT_NEEDED = 1 # Name of needed library +DT_PLTRELSZ = 2 # Size in bytes of PLT relocs +DT_PLTGOT = 3 # Processor defined value +DT_HASH = 4 # Address of symbol hash table +DT_STRTAB = 5 # Address of string table +DT_SYMTAB = 6 # Address of symbol table +DT_RELA = 7 # Address of Rela relocs +DT_RELASZ = 8 # Total size of Rela relocs +DT_RELAENT = 9 # Size of one Rela reloc +DT_STRSZ = 10 # Size of string table +DT_SYMENT = 11 # Size of one symbol table entry +DT_INIT = 12 # Address of init function +DT_FINI = 13 # Address of termination function +DT_SONAME = 14 # Name of shared object +DT_RPATH = 15 # Library search path (deprecated) +DT_SYMBOLIC = 16 # Start symbol search here +DT_REL = 17 # Address of Rel relocs +DT_RELSZ = 18 # Total size of Rel relocs +DT_RELENT = 19 # Size of one Rel reloc +DT_PLTREL = 20 # Type of reloc in PLT +DT_DEBUG = 21 # For debugging; unspecified +DT_TEXTREL = 22 # Reloc might modify .text +DT_JMPREL = 23 # Address of PLT relocs +DT_BIND_NOW = 24 # Process relocations of object +DT_INIT_ARRAY = 25 # Array with addresses of init fct +DT_FINI_ARRAY = 26 # Array with addresses of fini fct +DT_INIT_ARRAYSZ = 27 # Size in bytes of DT_INIT_ARRAY +DT_FINI_ARRAYSZ = 28 # Size in bytes of DT_FINI_ARRAY +DT_RUNPATH = 29 # Library search path +DT_FLAGS = 30 # Flags for the object being loaded +DT_ENCODING = 32 # Start of encoded range +DT_PREINIT_ARRAY = 32 # Array with addresses of preinit fct +DT_PREINIT_ARRAYSZ = 33 # size in bytes of DT_PREINIT_ARRAY +DT_NUM = 34 # Number used +DT_LOOS = 0x6000000d # Start of OS-specific +DT_HIOS = 0x6ffff000 # End of OS-specific +DT_LOPROC = 0x70000000 # Start of processor-specific +DT_HIPROC = 0x7fffffff # End of processor-specific +#DT_PROCNUM = DT_MIPS_NUM # Most used by any processor + +# DT_* entries which fall between DT_VALRNGHI & DT_VALRNGLO use the +# Dyn.d_un.d_val field of the Elf*_Dyn structure. This follows Sun's +# approach. +DT_VALRNGLO = 0x6ffffd00 +DT_GNU_PRELINKED = 0x6ffffdf5 # Prelinking timestamp +DT_GNU_CONFLICTSZ = 0x6ffffdf6 # Size of conflict section +DT_GNU_LIBLISTSZ = 0x6ffffdf7 # Size of library list +DT_CHECKSUM = 0x6ffffdf8 +DT_PLTPADSZ = 0x6ffffdf9 +DT_MOVEENT = 0x6ffffdfa +DT_MOVESZ = 0x6ffffdfb +DT_FEATURE_1 = 0x6ffffdfc # Feature selection (DTF_*). +DT_POSFLAG_1 = 0x6ffffdfd # Flags for DT_* entries, effecting the following DT_* entry. +DT_SYMINSZ = 0x6ffffdfe # Size of syminfo table (in bytes) +DT_SYMINENT = 0x6ffffdff # Entry size of syminfo +DT_VALRNGHI = 0x6ffffdff +DT_VALNUM = 12 + +# DT_* entries which fall between DT_ADDRRNGHI & DT_ADDRRNGLO use the +# Dyn.d_un.d_ptr field of the Elf*_Dyn structure. +# +# If any adjustment is made to the ELF object after it has been +# built these entries will need to be adjusted. +DT_ADDRRNGLO = 0x6ffffe00 +DT_GNU_CONFLICT = 0x6ffffef8 # Start of conflict section +DT_GNU_LIBLIST = 0x6ffffef9 # Library list +DT_CONFIG = 0x6ffffefa # Configuration information. +DT_DEPAUDIT = 0x6ffffefb # Dependency auditing. +DT_AUDIT = 0x6ffffefc # Object auditing. +DT_PLTPAD = 0x6ffffefd # PLT padding. +DT_MOVETAB = 0x6ffffefe # Move table. +DT_SYMINFO = 0x6ffffeff # Syminfo table. +DT_ADDRRNGHI = 0x6ffffeff +DT_ADDRNUM = 10 + +# The versioning entry types. The next are defined as part of the +# GNU extension. +DT_VERSYM = 0x6ffffff0 + +DT_RELACOUNT = 0x6ffffff9 +DT_RELCOUNT = 0x6ffffffa + +# These were chosen by Sun. +DT_FLAGS_1 = 0x6ffffffb # State flags, see DF_1_* below. +DT_VERDEF = 0x6ffffffc # Address of version definition table +DT_VERDEFNUM = 0x6ffffffd # Number of version definitions +DT_VERNEED = 0x6ffffffe # Address of table with needed versions +DT_VERNEEDNUM = 0x6fffffff # Number of needed versions +DT_VERSIONTAGNUM = 16 + +# Sun added these machine-independent extensions in the "processor-specific" +# range. Be compatible. +DT_AUXILIARY = 0x7ffffffd # Shared object to load before self +DT_FILTER = 0x7fffffff # Shared object to get values from +DT_EXTRANUM = 3 + +# Values of `d_un.d_val' in the DT_FLAGS entry. +DF_ORIGIN = 0x00000001 # Object may use DF_ORIGIN +DF_SYMBOLIC = 0x00000002 # Symbol resolutions starts here +DF_TEXTREL = 0x00000004 # Object contains text relocations +DF_BIND_NOW = 0x00000008 # No lazy binding for this object +DF_STATIC_TLS = 0x00000010 # Module uses the static TLS model + +# State flags selectable in the `d_un.d_val' element of the DT_FLAGS_1 +# entry in the dynamic section. +DF_1_NOW = 0x00000001 # Set RTLD_NOW for this object. +DF_1_GLOBAL = 0x00000002 # Set RTLD_GLOBAL for this object. +DF_1_GROUP = 0x00000004 # Set RTLD_GROUP for this object. +DF_1_NODELETE = 0x00000008 # Set RTLD_NODELETE for this object. +DF_1_LOADFLTR = 0x00000010 # Trigger filtee loading at runtime. +DF_1_INITFIRST = 0x00000020 # Set RTLD_INITFIRST for this object +DF_1_NOOPEN = 0x00000040 # Set RTLD_NOOPEN for this object. +DF_1_ORIGIN = 0x00000080 # $ORIGIN must be handled. +DF_1_DIRECT = 0x00000100 # Direct binding enabled. +DF_1_TRANS = 0x00000200 +DF_1_INTERPOSE = 0x00000400 # Object is used to interpose. +DF_1_NODEFLIB = 0x00000800 # Ignore default lib search path. +DF_1_NODUMP = 0x00001000 # Object can't be dldump'ed. +DF_1_CONFALT = 0x00002000 # Configuration alternative created. +DF_1_ENDFILTEE = 0x00004000 # Filtee terminates filters search. +DF_1_DISPRELDNE = 0x00008000 # Disp reloc applied at build time. +DF_1_DISPRELPND = 0x00010000 # Disp reloc applied at run-time. + +# Flags for the feature selection in DT_FEATURE_1. +DTF_1_PARINIT = 0x00000001 +DTF_1_CONFEXP = 0x00000002 + +# Flags in the DT_POSFLAG_1 entry effecting only the next DT_* entry. +DF_P1_LAZYLOAD = 0x00000001 # Lazyload following object. +DF_P1_GROUPPERM = 0x00000002 # Symbols from next object are not generally available. + +# GNU Versioning +VER_FLG_BASE = 1 # Version of the file itself, must not be used to match symbols +VER_FLG_WEAK = 2 # Reference to this version is weak +VER_NEED_CURRENT = 1 # Versioning implementation number + +# Relocs + +# Motorola 68k relocations + +R_68K_NONE = 0 # No reloc +R_68K_32 = 1 # Direct 32 bit +R_68K_16 = 2 # Direct 16 bit +R_68K_8 = 3 # Direct 8 bit +R_68K_PC32 = 4 # PC relative 32 bit +R_68K_PC16 = 5 # PC relative 16 bit +R_68K_PC8 = 6 # PC relative 8 bit +R_68K_GOT32 = 7 # 32 bit PC relative GOT entry +R_68K_GOT16 = 8 # 16 bit PC relative GOT entry +R_68K_GOT8 = 9 # 8 bit PC relative GOT entry +R_68K_GOT32O = 10 # 32 bit GOT offset +R_68K_GOT16O = 11 # 16 bit GOT offset +R_68K_GOT8O = 12 # 8 bit GOT offset +R_68K_PLT32 = 13 # 32 bit PC relative PLT address +R_68K_PLT16 = 14 # 16 bit PC relative PLT address +R_68K_PLT8 = 15 # 8 bit PC relative PLT address +R_68K_PLT32O = 16 # 32 bit PLT offset +R_68K_PLT16O = 17 # 16 bit PLT offset +R_68K_PLT8O = 18 # 8 bit PLT offset +R_68K_COPY = 19 # Copy symbol at runtime +R_68K_GLOB_DAT = 20 # Create GOT entry +R_68K_JMP_SLOT = 21 # Create PLT entry +R_68K_RELATIVE = 22 # Adjust by program base +R_68K_TLS_GD32 = 25 # 32 bit GOT offset for GD +R_68K_TLS_GD16 = 26 # 16 bit GOT offset for GD +R_68K_TLS_GD8 = 27 # 8 bit GOT offset for GD +R_68K_TLS_LDM32 = 28 # 32 bit GOT offset for LDM +R_68K_TLS_LDM16 = 29 # 16 bit GOT offset for LDM +R_68K_TLS_LDM8 = 30 # 8 bit GOT offset for LDM +R_68K_TLS_LDO32 = 31 # 32 bit module-relative offset +R_68K_TLS_LDO16 = 32 # 16 bit module-relative offset +R_68K_TLS_LDO8 = 33 # 8 bit module-relative offset +R_68K_TLS_IE32 = 34 # 32 bit GOT offset for IE +R_68K_TLS_IE16 = 35 # 16 bit GOT offset for IE +R_68K_TLS_IE8 = 36 # 8 bit GOT offset for IE +R_68K_TLS_LE32 = 37 # 32 bit offset relative to static TLS block +R_68K_TLS_LE16 = 38 # 16 bit offset relative to static TLS block +R_68K_TLS_LE8 = 39 # 8 bit offset relative to static TLS block +R_68K_TLS_DTPMOD32 = 40 # 32 bit module number +R_68K_TLS_DTPREL32 = 41 # 32 bit module-relative offset +R_68K_TLS_TPREL32 = 42 # 32 bit TP-relative offset +# Keep this the last entry. +R_68K_NUM = 43 + +# Intel 80386 relocations + +R_386_NONE = 0 # No reloc +R_386_32 = 1 # Direct 32 bit +R_386_PC32 = 2 # PC relative 32 bit +R_386_GOT32 = 3 # 32 bit GOT entry +R_386_PLT32 = 4 # 32 bit PLT address +R_386_COPY = 5 # Copy symbol at runtime +R_386_GLOB_DAT = 6 # Create GOT entry +R_386_JMP_SLOT = 7 # Create PLT entry +R_386_RELATIVE = 8 # Adjust by program base +R_386_GOTOFF = 9 # 32 bit offset to GOT +R_386_GOTPC = 10 # 32 bit PC relative offset to GOT +R_386_32PLT = 11 +R_386_TLS_TPOFF = 14 # Offset in static TLS block +R_386_TLS_IE = 15 # Address of GOT entry for static TLS block offset +R_386_TLS_GOTIE = 16 # GOT entry for static TLS block offset +R_386_TLS_LE = 17 # Offset relative to static TLS block +R_386_TLS_GD = 18 # Direct 32 bit for GNU version of general dynamic thread local data +R_386_TLS_LDM = 19 # Direct 32 bit for GNU version of local dynamic thread local data in LE code +R_386_16 = 20 +R_386_PC16 = 21 +R_386_8 = 22 +R_386_PC8 = 23 +R_386_TLS_GD_32 = 24 # Direct 32 bit for general dynamic thread local data +R_386_TLS_GD_PUSH = 25 # Tag for pushl in GD TLS code +R_386_TLS_GD_CALL = 26 # Relocation for call to __tls_get_addr() +R_386_TLS_GD_POP = 27 # Tag for popl in GD TLS code +R_386_TLS_LDM_32 = 28 # Direct 32 bit for local dynamic thread local data in LE code +R_386_TLS_LDM_PUSH = 29 # Tag for pushl in LDM TLS code +R_386_TLS_LDM_CALL = 30 # Relocation for call to __tls_get_addr() in LDM code +R_386_TLS_LDM_POP = 31 # Tag for popl in LDM TLS code +R_386_TLS_LDO_32 = 32 # Offset relative to TLS block +R_386_TLS_IE_32 = 33 # GOT entry for negated static TLS block offset +R_386_TLS_LE_32 = 34 # Negated offset relative to static TLS block +R_386_TLS_DTPMOD32 = 35 # ID of module containing symbol +R_386_TLS_DTPOFF32 = 36 # Offset in TLS block +R_386_TLS_TPOFF32 = 37 # Negated offset in static TLS block +# 38? +R_386_TLS_GOTDESC = 39 # GOT offset for TLS descriptor. +R_386_TLS_DESC_CALL = 40 # Marker of call through TLS descriptor for relaxation. +R_386_TLS_DESC = 41 # TLS descriptor containing pointer to code and to argument, returning the TLS offset for the symbol. +R_386_IRELATIVE = 42 # Adjust indirectly by program base +# Keep this the last entry. +R_386_NUM = 43 + +# SUN SPARC relocations + +R_SPARC_NONE = 0 # No reloc +R_SPARC_8 = 1 # Direct 8 bit +R_SPARC_16 = 2 # Direct 16 bit +R_SPARC_32 = 3 # Direct 32 bit +R_SPARC_DISP8 = 4 # PC relative 8 bit +R_SPARC_DISP16 = 5 # PC relative 16 bit +R_SPARC_DISP32 = 6 # PC relative 32 bit +R_SPARC_WDISP30 = 7 # PC relative 30 bit shifted +R_SPARC_WDISP22 = 8 # PC relative 22 bit shifted +R_SPARC_HI22 = 9 # High 22 bit +R_SPARC_22 = 10 # Direct 22 bit +R_SPARC_13 = 11 # Direct 13 bit +R_SPARC_LO10 = 12 # Truncated 10 bit +R_SPARC_GOT10 = 13 # Truncated 10 bit GOT entry +R_SPARC_GOT13 = 14 # 13 bit GOT entry +R_SPARC_GOT22 = 15 # 22 bit GOT entry shifted +R_SPARC_PC10 = 16 # PC relative 10 bit truncated +R_SPARC_PC22 = 17 # PC relative 22 bit shifted +R_SPARC_WPLT30 = 18 # 30 bit PC relative PLT address +R_SPARC_COPY = 19 # Copy symbol at runtime +R_SPARC_GLOB_DAT = 20 # Create GOT entry +R_SPARC_JMP_SLOT = 21 # Create PLT entry +R_SPARC_RELATIVE = 22 # Adjust by program base +R_SPARC_UA32 = 23 # Direct 32 bit unaligned + +# Additional Sparc64 relocs. + +R_SPARC_PLT32 = 24 # Direct 32 bit ref to PLT entry +R_SPARC_HIPLT22 = 25 # High 22 bit PLT entry +R_SPARC_LOPLT10 = 26 # Truncated 10 bit PLT entry +R_SPARC_PCPLT32 = 27 # PC rel 32 bit ref to PLT entry +R_SPARC_PCPLT22 = 28 # PC rel high 22 bit PLT entry +R_SPARC_PCPLT10 = 29 # PC rel trunc 10 bit PLT entry +R_SPARC_10 = 30 # Direct 10 bit +R_SPARC_11 = 31 # Direct 11 bit +R_SPARC_64 = 32 # Direct 64 bit +R_SPARC_OLO10 = 33 # 10bit with secondary 13bit addend +R_SPARC_HH22 = 34 # Top 22 bits of direct 64 bit +R_SPARC_HM10 = 35 # High middle 10 bits of ... +R_SPARC_LM22 = 36 # Low middle 22 bits of ... +R_SPARC_PC_HH22 = 37 # Top 22 bits of pc rel 64 bit +R_SPARC_PC_HM10 = 38 # High middle 10 bit of ... +R_SPARC_PC_LM22 = 39 # Low miggle 22 bits of ... +R_SPARC_WDISP16 = 40 # PC relative 16 bit shifted +R_SPARC_WDISP19 = 41 # PC relative 19 bit shifted +R_SPARC_GLOB_JMP = 42 # was part of v9 ABI but was removed +R_SPARC_7 = 43 # Direct 7 bit +R_SPARC_5 = 44 # Direct 5 bit +R_SPARC_6 = 45 # Direct 6 bit +R_SPARC_DISP64 = 46 # PC relative 64 bit +R_SPARC_PLT64 = 47 # Direct 64 bit ref to PLT entry +R_SPARC_HIX22 = 48 # High 22 bit complemented +R_SPARC_LOX10 = 49 # Truncated 11 bit complemented +R_SPARC_H44 = 50 # Direct high 12 of 44 bit +R_SPARC_M44 = 51 # Direct mid 22 of 44 bit +R_SPARC_L44 = 52 # Direct low 10 of 44 bit +R_SPARC_REGISTER = 53 # Global register usage +R_SPARC_UA64 = 54 # Direct 64 bit unaligned +R_SPARC_UA16 = 55 # Direct 16 bit unaligned +R_SPARC_TLS_GD_HI22 = 56 +R_SPARC_TLS_GD_LO10 = 57 +R_SPARC_TLS_GD_ADD = 58 +R_SPARC_TLS_GD_CALL = 59 +R_SPARC_TLS_LDM_HI22 = 60 +R_SPARC_TLS_LDM_LO10 = 61 +R_SPARC_TLS_LDM_ADD = 62 +R_SPARC_TLS_LDM_CALL = 63 +R_SPARC_TLS_LDO_HIX22 = 64 +R_SPARC_TLS_LDO_LOX10 = 65 +R_SPARC_TLS_LDO_ADD = 66 +R_SPARC_TLS_IE_HI22 = 67 +R_SPARC_TLS_IE_LO10 = 68 +R_SPARC_TLS_IE_LD = 69 +R_SPARC_TLS_IE_LDX = 70 +R_SPARC_TLS_IE_ADD = 71 +R_SPARC_TLS_LE_HIX22 = 72 +R_SPARC_TLS_LE_LOX10 = 73 +R_SPARC_TLS_DTPMOD32 = 74 +R_SPARC_TLS_DTPMOD64 = 75 +R_SPARC_TLS_DTPOFF32 = 76 +R_SPARC_TLS_DTPOFF64 = 77 +R_SPARC_TLS_TPOFF32 = 78 +R_SPARC_TLS_TPOFF64 = 79 +R_SPARC_GOTDATA_HIX22 = 80 +R_SPARC_GOTDATA_LOX10 = 81 +R_SPARC_GOTDATA_OP_HIX22 = 82 +R_SPARC_GOTDATA_OP_LOX10 = 83 +R_SPARC_GOTDATA_OP = 84 +R_SPARC_H34 = 85 +R_SPARC_SIZE32 = 86 +R_SPARC_SIZE64 = 87 +R_SPARC_JMP_IREL = 248 +R_SPARC_IRELATIVE = 249 +R_SPARC_GNU_VTINHERIT = 250 +R_SPARC_GNU_VTENTRY = 251 +R_SPARC_REV32 = 252 +# Keep this the last entry. +R_SPARC_NUM = 253 + +# MIPS R3000 relocations + +R_MIPS_NONE = 0 # No reloc +R_MIPS_16 = 1 # Direct 16 bit +R_MIPS_32 = 2 # Direct 32 bit +R_MIPS_REL32 = 3 # PC relative 32 bit +R_MIPS_26 = 4 # Direct 26 bit shifted +R_MIPS_HI16 = 5 # High 16 bit +R_MIPS_LO16 = 6 # Low 16 bit +R_MIPS_GPREL16 = 7 # GP relative 16 bit +R_MIPS_LITERAL = 8 # 16 bit literal entry +R_MIPS_GOT16 = 9 # 16 bit GOT entry +R_MIPS_PC16 = 10 # PC relative 16 bit +R_MIPS_CALL16 = 11 # 16 bit GOT entry for function +R_MIPS_GPREL32 = 12 # GP relative 32 bit + +R_MIPS_SHIFT5 = 16 +R_MIPS_SHIFT6 = 17 +R_MIPS_64 = 18 +R_MIPS_GOT_DISP = 19 +R_MIPS_GOT_PAGE = 20 +R_MIPS_GOT_OFST = 21 +R_MIPS_GOT_HI16 = 22 +R_MIPS_GOT_LO16 = 23 +R_MIPS_SUB = 24 +R_MIPS_INSERT_A = 25 +R_MIPS_INSERT_B = 26 +R_MIPS_DELETE = 27 +R_MIPS_HIGHER = 28 +R_MIPS_HIGHEST = 29 +R_MIPS_CALL_HI16 = 30 +R_MIPS_CALL_LO16 = 31 +R_MIPS_SCN_DISP = 32 +R_MIPS_REL16 = 33 +R_MIPS_ADD_IMMEDIATE = 34 +R_MIPS_PJUMP = 35 +R_MIPS_RELGOT = 36 +R_MIPS_JALR = 37 +R_MIPS_TLS_DTPMOD32 = 38 # Module number 32 bit +R_MIPS_TLS_DTPREL32 = 39 # Module-relative offset 32 bit +R_MIPS_TLS_DTPMOD64 = 40 # Module number 64 bit +R_MIPS_TLS_DTPREL64 = 41 # Module-relative offset 64 bit +R_MIPS_TLS_GD = 42 # 16 bit GOT offset for GD +R_MIPS_TLS_LDM = 43 # 16 bit GOT offset for LDM +R_MIPS_TLS_DTPREL_HI16 = 44 # Module-relative offset, high 16 bits +R_MIPS_TLS_DTPREL_LO16 = 45 # Module-relative offset, low 16 bits +R_MIPS_TLS_GOTTPREL = 46 # 16 bit GOT offset for IE +R_MIPS_TLS_TPREL32 = 47 # TP-relative offset, 32 bit +R_MIPS_TLS_TPREL64 = 48 # TP-relative offset, 64 bit +R_MIPS_TLS_TPREL_HI16 = 49 # TP-relative offset, high 16 bits +R_MIPS_TLS_TPREL_LO16 = 50 # TP-relative offset, low 16 bits +R_MIPS_GLOB_DAT = 51 +R_MIPS_COPY = 126 +R_MIPS_JUMP_SLOT = 127 +# Keep this the last entry. +R_MIPS_NUM = 128 + +# HPPA relocations + +R_PARISC_NONE = 0 # No reloc. +R_PARISC_DIR32 = 1 # Direct 32-bit reference. +R_PARISC_DIR21L = 2 # Left 21 bits of eff. address. +R_PARISC_DIR17R = 3 # Right 17 bits of eff. address. +R_PARISC_DIR17F = 4 # 17 bits of eff. address. +R_PARISC_DIR14R = 6 # Right 14 bits of eff. address. +R_PARISC_PCREL32 = 9 # 32-bit rel. address. +R_PARISC_PCREL21L = 10 # Left 21 bits of rel. address. +R_PARISC_PCREL17R = 11 # Right 17 bits of rel. address. +R_PARISC_PCREL17F = 12 # 17 bits of rel. address. +R_PARISC_PCREL14R = 14 # Right 14 bits of rel. address. +R_PARISC_DPREL21L = 18 # Left 21 bits of rel. address. +R_PARISC_DPREL14R = 22 # Right 14 bits of rel. address. +R_PARISC_GPREL21L = 26 # GP-relative, left 21 bits. +R_PARISC_GPREL14R = 30 # GP-relative, right 14 bits. +R_PARISC_LTOFF21L = 34 # LT-relative, left 21 bits. +R_PARISC_LTOFF14R = 38 # LT-relative, right 14 bits. +R_PARISC_SECREL32 = 41 # 32 bits section rel. address. +R_PARISC_SEGBASE = 48 # No relocation, set segment base. +R_PARISC_SEGREL32 = 49 # 32 bits segment rel. address. +R_PARISC_PLTOFF21L = 50 # PLT rel. address, left 21 bits. +R_PARISC_PLTOFF14R = 54 # PLT rel. address, right 14 bits. +R_PARISC_LTOFF_FPTR32 = 57 # 32 bits LT-rel. function pointer. +R_PARISC_LTOFF_FPTR21L = 58 # LT-rel. fct ptr, left 21 bits. +R_PARISC_LTOFF_FPTR14R = 62 # LT-rel. fct ptr, right 14 bits. +R_PARISC_FPTR64 = 64 # 64 bits function address. +R_PARISC_PLABEL32 = 65 # 32 bits function address. +R_PARISC_PLABEL21L = 66 # Left 21 bits of fdesc address. +R_PARISC_PLABEL14R = 70 # Right 14 bits of fdesc address. +R_PARISC_PCREL64 = 72 # 64 bits PC-rel. address. +R_PARISC_PCREL22F = 74 # 22 bits PC-rel. address. +R_PARISC_PCREL14WR = 75 # PC-rel. address, right 14 bits. +R_PARISC_PCREL14DR = 76 # PC rel. address, right 14 bits. +R_PARISC_PCREL16F = 77 # 16 bits PC-rel. address. +R_PARISC_PCREL16WF = 78 # 16 bits PC-rel. address. +R_PARISC_PCREL16DF = 79 # 16 bits PC-rel. address. +R_PARISC_DIR64 = 80 # 64 bits of eff. address. +R_PARISC_DIR14WR = 83 # 14 bits of eff. address. +R_PARISC_DIR14DR = 84 # 14 bits of eff. address. +R_PARISC_DIR16F = 85 # 16 bits of eff. address. +R_PARISC_DIR16WF = 86 # 16 bits of eff. address. +R_PARISC_DIR16DF = 87 # 16 bits of eff. address. +R_PARISC_GPREL64 = 88 # 64 bits of GP-rel. address. +R_PARISC_GPREL14WR = 91 # GP-rel. address, right 14 bits. +R_PARISC_GPREL14DR = 92 # GP-rel. address, right 14 bits. +R_PARISC_GPREL16F = 93 # 16 bits GP-rel. address. +R_PARISC_GPREL16WF = 94 # 16 bits GP-rel. address. +R_PARISC_GPREL16DF = 95 # 16 bits GP-rel. address. +R_PARISC_LTOFF64 = 96 # 64 bits LT-rel. address. +R_PARISC_LTOFF14WR = 99 # LT-rel. address, right 14 bits. +R_PARISC_LTOFF14DR = 100 # LT-rel. address, right 14 bits. +R_PARISC_LTOFF16F = 101 # 16 bits LT-rel. address. +R_PARISC_LTOFF16WF = 102 # 16 bits LT-rel. address. +R_PARISC_LTOFF16DF = 103 # 16 bits LT-rel. address. +R_PARISC_SECREL64 = 104 # 64 bits section rel. address. +R_PARISC_SEGREL64 = 112 # 64 bits segment rel. address. +R_PARISC_PLTOFF14WR = 115 # PLT-rel. address, right 14 bits. +R_PARISC_PLTOFF14DR = 116 # PLT-rel. address, right 14 bits. +R_PARISC_PLTOFF16F = 117 # 16 bits LT-rel. address. +R_PARISC_PLTOFF16WF = 118 # 16 bits PLT-rel. address. +R_PARISC_PLTOFF16DF = 119 # 16 bits PLT-rel. address. +R_PARISC_LTOFF_FPTR64 = 120 # 64 bits LT-rel. function ptr. +R_PARISC_LTOFF_FPTR14WR = 123 # LT-rel. fct. ptr., right 14 bits. +R_PARISC_LTOFF_FPTR14DR = 124 # LT-rel. fct. ptr., right 14 bits. +R_PARISC_LTOFF_FPTR16F = 125 # 16 bits LT-rel. function ptr. +R_PARISC_LTOFF_FPTR16WF = 126 # 16 bits LT-rel. function ptr. +R_PARISC_LTOFF_FPTR16DF = 127 # 16 bits LT-rel. function ptr. +R_PARISC_LORESERVE = 128 +R_PARISC_COPY = 128 # Copy relocation. +R_PARISC_IPLT = 129 # Dynamic reloc, imported PLT +R_PARISC_EPLT = 130 # Dynamic reloc, exported PLT +R_PARISC_TPREL32 = 153 # 32 bits TP-rel. address. +R_PARISC_TPREL21L = 154 # TP-rel. address, left 21 bits. +R_PARISC_TPREL14R = 158 # TP-rel. address, right 14 bits. +R_PARISC_LTOFF_TP21L = 162 # LT-TP-rel. address, left 21 bits. +R_PARISC_LTOFF_TP14R = 166 # LT-TP-rel. address, right 14 bits.*/ +R_PARISC_LTOFF_TP14F = 167 # 14 bits LT-TP-rel. address. +R_PARISC_TPREL64 = 216 # 64 bits TP-rel. address. +R_PARISC_TPREL14WR = 219 # TP-rel. address, right 14 bits. +R_PARISC_TPREL14DR = 220 # TP-rel. address, right 14 bits. +R_PARISC_TPREL16F = 221 # 16 bits TP-rel. address. +R_PARISC_TPREL16WF = 222 # 16 bits TP-rel. address. +R_PARISC_TPREL16DF = 223 # 16 bits TP-rel. address. +R_PARISC_LTOFF_TP64 = 224 # 64 bits LT-TP-rel. address. +R_PARISC_LTOFF_TP14WR = 227 # LT-TP-rel. address, right 14 bits.*/ +R_PARISC_LTOFF_TP14DR = 228 # LT-TP-rel. address, right 14 bits.*/ +R_PARISC_LTOFF_TP16F = 229 # 16 bits LT-TP-rel. address. +R_PARISC_LTOFF_TP16WF = 230 # 16 bits LT-TP-rel. address. +R_PARISC_LTOFF_TP16DF = 231 # 16 bits LT-TP-rel. address. +R_PARISC_GNU_VTENTRY = 232 +R_PARISC_GNU_VTINHERIT = 233 +R_PARISC_TLS_GD21L = 234 # GD 21-bit left. +R_PARISC_TLS_GD14R = 235 # GD 14-bit right. +R_PARISC_TLS_GDCALL = 236 # GD call to __t_g_a. +R_PARISC_TLS_LDM21L = 237 # LD module 21-bit left. +R_PARISC_TLS_LDM14R = 238 # LD module 14-bit right. +R_PARISC_TLS_LDMCALL = 239 # LD module call to __t_g_a. +R_PARISC_TLS_LDO21L = 240 # LD offset 21-bit left. +R_PARISC_TLS_LDO14R = 241 # LD offset 14-bit right. +R_PARISC_TLS_DTPMOD32 = 242 # DTP module 32-bit. +R_PARISC_TLS_DTPMOD64 = 243 # DTP module 64-bit. +R_PARISC_TLS_DTPOFF32 = 244 # DTP offset 32-bit. +R_PARISC_TLS_DTPOFF64 = 245 # DTP offset 32-bit. +R_PARISC_TLS_LE21L = R_PARISC_TPREL21L +R_PARISC_TLS_LE14R = R_PARISC_TPREL14R +R_PARISC_TLS_IE21L = R_PARISC_LTOFF_TP21L +R_PARISC_TLS_IE14R = R_PARISC_LTOFF_TP14R +R_PARISC_TLS_TPREL32 = R_PARISC_TPREL32 +R_PARISC_TLS_TPREL64 = R_PARISC_TPREL64 +R_PARISC_HIRESERVE = 255 + +# Alpha relocations + +R_ALPHA_NONE = 0 # No reloc +R_ALPHA_REFLONG = 1 # Direct 32 bit +R_ALPHA_REFQUAD = 2 # Direct 64 bit +R_ALPHA_GPREL32 = 3 # GP relative 32 bit +R_ALPHA_LITERAL = 4 # GP relative 16 bit w/optimization +R_ALPHA_LITUSE = 5 # Optimization hint for LITERAL +R_ALPHA_GPDISP = 6 # Add displacement to GP +R_ALPHA_BRADDR = 7 # PC+4 relative 23 bit shifted +R_ALPHA_HINT = 8 # PC+4 relative 16 bit shifted +R_ALPHA_SREL16 = 9 # PC relative 16 bit +R_ALPHA_SREL32 = 10 # PC relative 32 bit +R_ALPHA_SREL64 = 11 # PC relative 64 bit +R_ALPHA_GPRELHIGH = 17 # GP relative 32 bit, high 16 bits +R_ALPHA_GPRELLOW = 18 # GP relative 32 bit, low 16 bits +R_ALPHA_GPREL16 = 19 # GP relative 16 bit +R_ALPHA_COPY = 24 # Copy symbol at runtime +R_ALPHA_GLOB_DAT = 25 # Create GOT entry +R_ALPHA_JMP_SLOT = 26 # Create PLT entry +R_ALPHA_RELATIVE = 27 # Adjust by program base +R_ALPHA_TLS_GD_HI = 28 +R_ALPHA_TLSGD = 29 +R_ALPHA_TLS_LDM = 30 +R_ALPHA_DTPMOD64 = 31 +R_ALPHA_GOTDTPREL = 32 +R_ALPHA_DTPREL64 = 33 +R_ALPHA_DTPRELHI = 34 +R_ALPHA_DTPRELLO = 35 +R_ALPHA_DTPREL16 = 36 +R_ALPHA_GOTTPREL = 37 +R_ALPHA_TPREL64 = 38 +R_ALPHA_TPRELHI = 39 +R_ALPHA_TPRELLO = 40 +R_ALPHA_TPREL16 = 41 +# Keep this the last entry. +R_ALPHA_NUM = 46 + +# PowerPC relocations + +R_PPC_NONE = 0 +R_PPC_ADDR32 = 1 # 32bit absolute address +R_PPC_ADDR24 = 2 # 26bit address, 2 bits ignored. +R_PPC_ADDR16 = 3 # 16bit absolute address +R_PPC_ADDR16_LO = 4 # lower 16bit of absolute address +R_PPC_ADDR16_HI = 5 # high 16bit of absolute address +R_PPC_ADDR16_HA = 6 # adjusted high 16bit +R_PPC_ADDR14 = 7 # 16bit address, 2 bits ignored +R_PPC_ADDR14_BRTAKEN = 8 +R_PPC_ADDR14_BRNTAKEN = 9 +R_PPC_REL24 = 10 # PC relative 26 bit +R_PPC_REL14 = 11 # PC relative 16 bit +R_PPC_REL14_BRTAKEN = 12 +R_PPC_REL14_BRNTAKEN = 13 +R_PPC_GOT16 = 14 +R_PPC_GOT16_LO = 15 +R_PPC_GOT16_HI = 16 +R_PPC_GOT16_HA = 17 +R_PPC_PLTREL24 = 18 +R_PPC_COPY = 19 +R_PPC_GLOB_DAT = 20 +R_PPC_JMP_SLOT = 21 +R_PPC_RELATIVE = 22 +R_PPC_LOCAL24PC = 23 +R_PPC_UADDR32 = 24 +R_PPC_UADDR16 = 25 +R_PPC_REL32 = 26 +R_PPC_PLT32 = 27 +R_PPC_PLTREL32 = 28 +R_PPC_PLT16_LO = 29 +R_PPC_PLT16_HI = 30 +R_PPC_PLT16_HA = 31 +R_PPC_SDAREL16 = 32 +R_PPC_SECTOFF = 33 +R_PPC_SECTOFF_LO = 34 +R_PPC_SECTOFF_HI = 35 +R_PPC_SECTOFF_HA = 36 + +# PowerPC relocations defined for the TLS access ABI. +R_PPC_TLS = 67 # none (sym+add)@tls +R_PPC_DTPMOD32 = 68 # word32 (sym+add)@dtpmod +R_PPC_TPREL16 = 69 # half16* (sym+add)@tprel +R_PPC_TPREL16_LO = 70 # half16 (sym+add)@tprel@l +R_PPC_TPREL16_HI = 71 # half16 (sym+add)@tprel@h +R_PPC_TPREL16_HA = 72 # half16 (sym+add)@tprel@ha +R_PPC_TPREL32 = 73 # word32 (sym+add)@tprel +R_PPC_DTPREL16 = 74 # half16* (sym+add)@dtprel +R_PPC_DTPREL16_LO = 75 # half16 (sym+add)@dtprel@l +R_PPC_DTPREL16_HI = 76 # half16 (sym+add)@dtprel@h +R_PPC_DTPREL16_HA = 77 # half16 (sym+add)@dtprel@ha +R_PPC_DTPREL32 = 78 # word32 (sym+add)@dtprel +R_PPC_GOT_TLSGD16 = 79 # half16* (sym+add)@got@tlsgd +R_PPC_GOT_TLSGD16_LO = 80 # half16 (sym+add)@got@tlsgd@l +R_PPC_GOT_TLSGD16_HI = 81 # half16 (sym+add)@got@tlsgd@h +R_PPC_GOT_TLSGD16_HA = 82 # half16 (sym+add)@got@tlsgd@ha +R_PPC_GOT_TLSLD16 = 83 # half16* (sym+add)@got@tlsld +R_PPC_GOT_TLSLD16_LO = 84 # half16 (sym+add)@got@tlsld@l +R_PPC_GOT_TLSLD16_HI = 85 # half16 (sym+add)@got@tlsld@h +R_PPC_GOT_TLSLD16_HA = 86 # half16 (sym+add)@got@tlsld@ha +R_PPC_GOT_TPREL16 = 87 # half16* (sym+add)@got@tprel +R_PPC_GOT_TPREL16_LO = 88 # half16 (sym+add)@got@tprel@l +R_PPC_GOT_TPREL16_HI = 89 # half16 (sym+add)@got@tprel@h +R_PPC_GOT_TPREL16_HA = 90 # half16 (sym+add)@got@tprel@ha +R_PPC_GOT_DTPREL16 = 91 # half16* (sym+add)@got@dtprel +R_PPC_GOT_DTPREL16_LO = 92 # half16* (sym+add)@got@dtprel@l +R_PPC_GOT_DTPREL16_HI = 93 # half16* (sym+add)@got@dtprel@h +R_PPC_GOT_DTPREL16_HA = 94 # half16* (sym+add)@got@dtprel@ha + +# The remaining relocs are from the Embedded ELF ABI, and are not in the SVR4 ELF ABI. +R_PPC_EMB_NADDR32 = 101 +R_PPC_EMB_NADDR16 = 102 +R_PPC_EMB_NADDR16_LO = 103 +R_PPC_EMB_NADDR16_HI = 104 +R_PPC_EMB_NADDR16_HA = 105 +R_PPC_EMB_SDAI16 = 106 +R_PPC_EMB_SDA2I16 = 107 +R_PPC_EMB_SDA2REL = 108 +R_PPC_EMB_SDA21 = 109 # 16 bit offset in SDA +R_PPC_EMB_MRKREF = 110 +R_PPC_EMB_RELSEC16 = 111 +R_PPC_EMB_RELST_LO = 112 +R_PPC_EMB_RELST_HI = 113 +R_PPC_EMB_RELST_HA = 114 +R_PPC_EMB_BIT_FLD = 115 +R_PPC_EMB_RELSDA = 116 # 16 bit relative offset in SDA + +# Diab tool relocations. +R_PPC_DIAB_SDA21_LO = 180 # like EMB_SDA21, but lower 16 bit +R_PPC_DIAB_SDA21_HI = 181 # like EMB_SDA21, but high 16 bit +R_PPC_DIAB_SDA21_HA = 182 # like EMB_SDA21, adjusted high 16 +R_PPC_DIAB_RELSDA_LO = 183 # like EMB_RELSDA, but lower 16 bit +R_PPC_DIAB_RELSDA_HI = 184 # like EMB_RELSDA, but high 16 bit +R_PPC_DIAB_RELSDA_HA = 185 # like EMB_RELSDA, adjusted high 16 + +# GNU extension to support local ifunc. +R_PPC_IRELATIVE = 248 + +# GNU relocs used in PIC code sequences. +R_PPC_REL16 = 249 # half16 (sym+add-.) +R_PPC_REL16_LO = 250 # half16 (sym+add-.)@l +R_PPC_REL16_HI = 251 # half16 (sym+add-.)@h +R_PPC_REL16_HA = 252 # half16 (sym+add-.)@ha + +# This is a phony reloc to handle any old fashioned TOC16 references that may still be in object files. +R_PPC_TOC16 = 255 + +# PowerPC64 relocations defined by the ABIs +R_PPC64_NONE = R_PPC_NONE +R_PPC64_ADDR32 = R_PPC_ADDR32 # 32bit absolute address +R_PPC64_ADDR24 = R_PPC_ADDR24 # 26bit address, word aligned +R_PPC64_ADDR16 = R_PPC_ADDR16 # 16bit absolute address +R_PPC64_ADDR16_LO = R_PPC_ADDR16_LO # lower 16bits of address +R_PPC64_ADDR16_HI = R_PPC_ADDR16_HI # high 16bits of address. +R_PPC64_ADDR16_HA = R_PPC_ADDR16_HA # adjusted high 16bits. +R_PPC64_ADDR14 = R_PPC_ADDR14 # 16bit address, word aligned +R_PPC64_ADDR14_BRTAKEN = R_PPC_ADDR14_BRTAKEN +R_PPC64_ADDR14_BRNTAKEN = R_PPC_ADDR14_BRNTAKEN +R_PPC64_REL24 = R_PPC_REL24 # PC-rel. 26 bit, word aligned +R_PPC64_REL14 = R_PPC_REL14 # PC relative 16 bit +R_PPC64_REL14_BRTAKEN = R_PPC_REL14_BRTAKEN +R_PPC64_REL14_BRNTAKEN = R_PPC_REL14_BRNTAKEN +R_PPC64_GOT16 = R_PPC_GOT16 +R_PPC64_GOT16_LO = R_PPC_GOT16_LO +R_PPC64_GOT16_HI = R_PPC_GOT16_HI +R_PPC64_GOT16_HA = R_PPC_GOT16_HA + +R_PPC64_COPY = R_PPC_COPY +R_PPC64_GLOB_DAT = R_PPC_GLOB_DAT +R_PPC64_JMP_SLOT = R_PPC_JMP_SLOT +R_PPC64_RELATIVE = R_PPC_RELATIVE + +R_PPC64_UADDR32 = R_PPC_UADDR32 +R_PPC64_UADDR16 = R_PPC_UADDR16 +R_PPC64_REL32 = R_PPC_REL32 +R_PPC64_PLT32 = R_PPC_PLT32 +R_PPC64_PLTREL32 = R_PPC_PLTREL32 +R_PPC64_PLT16_LO = R_PPC_PLT16_LO +R_PPC64_PLT16_HI = R_PPC_PLT16_HI +R_PPC64_PLT16_HA = R_PPC_PLT16_HA + +R_PPC64_SECTOFF = R_PPC_SECTOFF +R_PPC64_SECTOFF_LO = R_PPC_SECTOFF_LO +R_PPC64_SECTOFF_HI = R_PPC_SECTOFF_HI +R_PPC64_SECTOFF_HA = R_PPC_SECTOFF_HA +R_PPC64_ADDR30 = 37 # word30 (S + A - P) >> 2 +R_PPC64_ADDR64 = 38 # doubleword64 S + A +R_PPC64_ADDR16_HIGHER = 39 # half16 #higher(S + A) +R_PPC64_ADDR16_HIGHERA = 40 # half16 #highera(S + A) +R_PPC64_ADDR16_HIGHEST = 41 # half16 #highest(S + A) +R_PPC64_ADDR16_HIGHESTA = 42 # half16 #highesta(S + A) +R_PPC64_UADDR64 = 43 # doubleword64 S + A +R_PPC64_REL64 = 44 # doubleword64 S + A - P +R_PPC64_PLT64 = 45 # doubleword64 L + A +R_PPC64_PLTREL64 = 46 # doubleword64 L + A - P +R_PPC64_TOC16 = 47 # half16* S + A - .TOC +R_PPC64_TOC16_LO = 48 # half16 #lo(S + A - .TOC.) +R_PPC64_TOC16_HI = 49 # half16 #hi(S + A - .TOC.) +R_PPC64_TOC16_HA = 50 # half16 #ha(S + A - .TOC.) +R_PPC64_TOC = 51 # doubleword64 .TOC +R_PPC64_PLTGOT16 = 52 # half16* M + A +R_PPC64_PLTGOT16_LO = 53 # half16 #lo(M + A) +R_PPC64_PLTGOT16_HI = 54 # half16 #hi(M + A) +R_PPC64_PLTGOT16_HA = 55 # half16 #ha(M + A) + +R_PPC64_ADDR16_DS = 56 # half16ds* (S + A) >> 2 +R_PPC64_ADDR16_LO_DS = 57 # half16ds #lo(S + A) >> 2 +R_PPC64_GOT16_DS = 58 # half16ds* (G + A) >> 2 +R_PPC64_GOT16_LO_DS = 59 # half16ds #lo(G + A) >> 2 +R_PPC64_PLT16_LO_DS = 60 # half16ds #lo(L + A) >> 2 +R_PPC64_SECTOFF_DS = 61 # half16ds* (R + A) >> 2 +R_PPC64_SECTOFF_LO_DS = 62 # half16ds #lo(R + A) >> 2 +R_PPC64_TOC16_DS = 63 # half16ds* (S + A - .TOC.) >> 2 +R_PPC64_TOC16_LO_DS = 64 # half16ds #lo(S + A - .TOC.) >> 2 +R_PPC64_PLTGOT16_DS = 65 # half16ds* (M + A) >> 2 +R_PPC64_PLTGOT16_LO_DS = 66 # half16ds #lo(M + A) >> 2 + +# PowerPC64 relocations defined for the TLS access ABI. +R_PPC64_TLS = 67 # none (sym+add)@tls +R_PPC64_DTPMOD64 = 68 # doubleword64 (sym+add)@dtpmod +R_PPC64_TPREL16 = 69 # half16* (sym+add)@tprel +R_PPC64_TPREL16_LO = 70 # half16 (sym+add)@tprel@l +R_PPC64_TPREL16_HI = 71 # half16 (sym+add)@tprel@h +R_PPC64_TPREL16_HA = 72 # half16 (sym+add)@tprel@ha +R_PPC64_TPREL64 = 73 # doubleword64 (sym+add)@tprel +R_PPC64_DTPREL16 = 74 # half16* (sym+add)@dtprel +R_PPC64_DTPREL16_LO = 75 # half16 (sym+add)@dtprel@l +R_PPC64_DTPREL16_HI = 76 # half16 (sym+add)@dtprel@h +R_PPC64_DTPREL16_HA = 77 # half16 (sym+add)@dtprel@ha +R_PPC64_DTPREL64 = 78 # doubleword64 (sym+add)@dtprel +R_PPC64_GOT_TLSGD16 = 79 # half16* (sym+add)@got@tlsgd +R_PPC64_GOT_TLSGD16_LO = 80 # half16 (sym+add)@got@tlsgd@l +R_PPC64_GOT_TLSGD16_HI = 81 # half16 (sym+add)@got@tlsgd@h +R_PPC64_GOT_TLSGD16_HA = 82 # half16 (sym+add)@got@tlsgd@ha +R_PPC64_GOT_TLSLD16 = 83 # half16* (sym+add)@got@tlsld +R_PPC64_GOT_TLSLD16_LO = 84 # half16 (sym+add)@got@tlsld@l +R_PPC64_GOT_TLSLD16_HI = 85 # half16 (sym+add)@got@tlsld@h +R_PPC64_GOT_TLSLD16_HA = 86 # half16 (sym+add)@got@tlsld@ha +R_PPC64_GOT_TPREL16_DS = 87 # half16ds* (sym+add)@got@tprel +R_PPC64_GOT_TPREL16_LO_DS = 88 # half16ds (sym+add)@got@tprel@l +R_PPC64_GOT_TPREL16_HI = 89 # half16 (sym+add)@got@tprel@h +R_PPC64_GOT_TPREL16_HA = 90 # half16 (sym+add)@got@tprel@ha +R_PPC64_GOT_DTPREL16_DS = 91 # half16ds* (sym+add)@got@dtprel +R_PPC64_GOT_DTPREL16_LO_DS = 92 # half16ds (sym+add)@got@dtprel@l +R_PPC64_GOT_DTPREL16_HI = 93 # half16 (sym+add)@got@dtprel@h +R_PPC64_GOT_DTPREL16_HA = 94 # half16 (sym+add)@got@dtprel@ha +R_PPC64_TPREL16_DS = 95 # half16ds* (sym+add)@tprel +R_PPC64_TPREL16_LO_DS = 96 # half16ds (sym+add)@tprel@l +R_PPC64_TPREL16_HIGHER = 97 # half16 (sym+add)@tprel@higher +R_PPC64_TPREL16_HIGHERA = 98 # half16 (sym+add)@tprel@highera +R_PPC64_TPREL16_HIGHEST = 99 # half16 (sym+add)@tprel@highest +R_PPC64_TPREL16_HIGHESTA = 100 # half16 (sym+add)@tprel@highesta +R_PPC64_DTPREL16_DS = 101 # half16ds* (sym+add)@dtprel +R_PPC64_DTPREL16_LO_DS = 102 # half16ds (sym+add)@dtprel@l +R_PPC64_DTPREL16_HIGHER = 103 # half16 (sym+add)@dtprel@higher +R_PPC64_DTPREL16_HIGHERA = 104 # half16 (sym+add)@dtprel@highera +R_PPC64_DTPREL16_HIGHEST = 105 # half16 (sym+add)@dtprel@highest +R_PPC64_DTPREL16_HIGHESTA = 106 # half16 (sym+add)@dtprel@highesta + +# GNU extension to support local ifunc. +R_PPC64_JMP_IREL = 247 +R_PPC64_IRELATIVE = 248 +R_PPC64_REL16 = 249 # half16 (sym+add-.) +R_PPC64_REL16_LO = 250 # half16 (sym+add-.)@l +R_PPC64_REL16_HI = 251 # half16 (sym+add-.)@h +R_PPC64_REL16_HA = 252 # half16 (sym+add-.)@ha + +# PowerPC64 specific values for the Dyn d_tag field. +DT_PPC64_GLINK = (DT_LOPROC + 0) +DT_PPC64_OPD = (DT_LOPROC + 1) +DT_PPC64_OPDSZ = (DT_LOPROC + 2) +DT_PPC64_NUM = 3 + +# ARM relocations + +R_ARM_NONE = 0 # No reloc +R_ARM_PC24 = 1 # PC relative 26 bit branch +R_ARM_ABS32 = 2 # Direct 32 bit +R_ARM_REL32 = 3 # PC relative 32 bit +R_ARM_PC13 = 4 +R_ARM_ABS16 = 5 # Direct 16 bit +R_ARM_ABS12 = 6 # Direct 12 bit +R_ARM_THM_ABS5 = 7 +R_ARM_ABS8 = 8 # Direct 8 bit +R_ARM_SBREL32 = 9 +R_ARM_THM_PC22 = 10 +R_ARM_THM_PC8 = 11 +R_ARM_AMP_VCALL9 = 12 +R_ARM_SWI24 = 13 # Obsolete static relocation. +R_ARM_TLS_DESC = 13 # Dynamic relocation. +R_ARM_THM_SWI8 = 14 +R_ARM_XPC25 = 15 +R_ARM_THM_XPC22 = 16 +R_ARM_TLS_DTPMOD32 = 17 # ID of module containing symbol +R_ARM_TLS_DTPOFF32 = 18 # Offset in TLS block +R_ARM_TLS_TPOFF32 = 19 # Offset in static TLS block +R_ARM_COPY = 20 # Copy symbol at runtime +R_ARM_GLOB_DAT = 21 # Create GOT entry +R_ARM_JUMP_SLOT = 22 # Create PLT entry +R_ARM_RELATIVE = 23 # Adjust by program base +R_ARM_GOTOFF = 24 # 32 bit offset to GOT +R_ARM_GOTPC = 25 # 32 bit PC relative offset to GOT +R_ARM_GOT32 = 26 # 32 bit GOT entry +R_ARM_PLT32 = 27 # 32 bit PLT address +R_ARM_ALU_PCREL_7_0 = 32 +R_ARM_ALU_PCREL_15_8 = 33 +R_ARM_ALU_PCREL_23_15 = 34 +R_ARM_LDR_SBREL_11_0 = 35 +R_ARM_ALU_SBREL_19_12 = 36 +R_ARM_ALU_SBREL_27_20 = 37 +R_ARM_TLS_GOTDESC = 90 +R_ARM_TLS_CALL = 91 +R_ARM_TLS_DESCSEQ = 92 +R_ARM_THM_TLS_CALL = 93 +R_ARM_GNU_VTENTRY = 100 +R_ARM_GNU_VTINHERIT = 101 +R_ARM_THM_PC11 = 102 # thumb unconditional branch +R_ARM_THM_PC9 = 103 # thumb conditional branch +R_ARM_TLS_GD32 = 104 # PC-rel 32 bit for global dynamic thread local data +R_ARM_TLS_LDM32 = 105 # PC-rel 32 bit for local dynamic thread local data +R_ARM_TLS_LDO32 = 106 # 32 bit offset relative to TLS block +R_ARM_TLS_IE32 = 107 # PC-rel 32 bit for GOT entry of static TLS block offset +R_ARM_TLS_LE32 = 108 # 32 bit offset relative to static TLS block +R_ARM_THM_TLS_DESCSEQ = 129 +R_ARM_IRELATIVE = 160 +R_ARM_RXPC25 = 249 +R_ARM_RSBREL32 = 250 +R_ARM_THM_RPC22 = 251 +R_ARM_RREL32 = 252 +R_ARM_RABS22 = 253 +R_ARM_RPC24 = 254 +R_ARM_RBASE = 255 +# Keep this the last entry. +R_ARM_NUM = 256 + +# IA-64 relocations + +R_IA64_NONE = 0x00 # none +R_IA64_IMM14 = 0x21 # symbol + addend, add imm14 +R_IA64_IMM22 = 0x22 # symbol + addend, add imm22 +R_IA64_IMM64 = 0x23 # symbol + addend, mov imm64 +R_IA64_DIR32MSB = 0x24 # symbol + addend, data4 MSB +R_IA64_DIR32LSB = 0x25 # symbol + addend, data4 LSB +R_IA64_DIR64MSB = 0x26 # symbol + addend, data8 MSB +R_IA64_DIR64LSB = 0x27 # symbol + addend, data8 LSB +R_IA64_GPREL22 = 0x2a # @gprel(sym + add), add imm22 +R_IA64_GPREL64I = 0x2b # @gprel(sym + add), mov imm64 +R_IA64_GPREL32MSB = 0x2c # @gprel(sym + add), data4 MSB +R_IA64_GPREL32LSB = 0x2d # @gprel(sym + add), data4 LSB +R_IA64_GPREL64MSB = 0x2e # @gprel(sym + add), data8 MSB +R_IA64_GPREL64LSB = 0x2f # @gprel(sym + add), data8 LSB +R_IA64_LTOFF22 = 0x32 # @ltoff(sym + add), add imm22 +R_IA64_LTOFF64I = 0x33 # @ltoff(sym + add), mov imm64 +R_IA64_PLTOFF22 = 0x3a # @pltoff(sym + add), add imm22 +R_IA64_PLTOFF64I = 0x3b # @pltoff(sym + add), mov imm64 +R_IA64_PLTOFF64MSB = 0x3e # @pltoff(sym + add), data8 MSB +R_IA64_PLTOFF64LSB = 0x3f # @pltoff(sym + add), data8 LSB +R_IA64_FPTR64I = 0x43 # @fptr(sym + add), mov imm64 +R_IA64_FPTR32MSB = 0x44 # @fptr(sym + add), data4 MSB +R_IA64_FPTR32LSB = 0x45 # @fptr(sym + add), data4 LSB +R_IA64_FPTR64MSB = 0x46 # @fptr(sym + add), data8 MSB +R_IA64_FPTR64LSB = 0x47 # @fptr(sym + add), data8 LSB +R_IA64_PCREL60B = 0x48 # @pcrel(sym + add), brl +R_IA64_PCREL21B = 0x49 # @pcrel(sym + add), ptb, call +R_IA64_PCREL21M = 0x4a # @pcrel(sym + add), chk.s +R_IA64_PCREL21F = 0x4b # @pcrel(sym + add), fchkf +R_IA64_PCREL32MSB = 0x4c # @pcrel(sym + add), data4 MSB +R_IA64_PCREL32LSB = 0x4d # @pcrel(sym + add), data4 LSB +R_IA64_PCREL64MSB = 0x4e # @pcrel(sym + add), data8 MSB +R_IA64_PCREL64LSB = 0x4f # @pcrel(sym + add), data8 LSB +R_IA64_LTOFF_FPTR22 = 0x52 # @ltoff(@fptr(s+a)), imm22 +R_IA64_LTOFF_FPTR64I = 0x53 # @ltoff(@fptr(s+a)), imm64 +R_IA64_LTOFF_FPTR32MSB = 0x54 # @ltoff(@fptr(s+a)), data4 MSB +R_IA64_LTOFF_FPTR32LSB = 0x55 # @ltoff(@fptr(s+a)), data4 LSB +R_IA64_LTOFF_FPTR64MSB = 0x56 # @ltoff(@fptr(s+a)), data8 MSB +R_IA64_LTOFF_FPTR64LSB = 0x57 # @ltoff(@fptr(s+a)), data8 LSB +R_IA64_SEGREL32MSB = 0x5c # @segrel(sym + add), data4 MSB +R_IA64_SEGREL32LSB = 0x5d # @segrel(sym + add), data4 LSB +R_IA64_SEGREL64MSB = 0x5e # @segrel(sym + add), data8 MSB +R_IA64_SEGREL64LSB = 0x5f # @segrel(sym + add), data8 LSB +R_IA64_SECREL32MSB = 0x64 # @secrel(sym + add), data4 MSB +R_IA64_SECREL32LSB = 0x65 # @secrel(sym + add), data4 LSB +R_IA64_SECREL64MSB = 0x66 # @secrel(sym + add), data8 MSB +R_IA64_SECREL64LSB = 0x67 # @secrel(sym + add), data8 LSB +R_IA64_REL32MSB = 0x6c # data 4 + REL +R_IA64_REL32LSB = 0x6d # data 4 + REL +R_IA64_REL64MSB = 0x6e # data 8 + REL +R_IA64_REL64LSB = 0x6f # data 8 + REL +R_IA64_LTV32MSB = 0x74 # symbol + addend, data4 MSB +R_IA64_LTV32LSB = 0x75 # symbol + addend, data4 LSB +R_IA64_LTV64MSB = 0x76 # symbol + addend, data8 MSB +R_IA64_LTV64LSB = 0x77 # symbol + addend, data8 LSB +R_IA64_PCREL21BI = 0x79 # @pcrel(sym + add), 21bit inst +R_IA64_PCREL22 = 0x7a # @pcrel(sym + add), 22bit inst +R_IA64_PCREL64I = 0x7b # @pcrel(sym + add), 64bit inst +R_IA64_IPLTMSB = 0x80 # dynamic reloc, imported PLT, MSB +R_IA64_IPLTLSB = 0x81 # dynamic reloc, imported PLT, LSB +R_IA64_COPY = 0x84 # copy relocation +R_IA64_SUB = 0x85 # Addend and symbol difference +R_IA64_LTOFF22X = 0x86 # LTOFF22, relaxable. +R_IA64_LDXMOV = 0x87 # Use of LTOFF22X. +R_IA64_TPREL14 = 0x91 # @tprel(sym + add), imm14 +R_IA64_TPREL22 = 0x92 # @tprel(sym + add), imm22 +R_IA64_TPREL64I = 0x93 # @tprel(sym + add), imm64 +R_IA64_TPREL64MSB = 0x96 # @tprel(sym + add), data8 MSB +R_IA64_TPREL64LSB = 0x97 # @tprel(sym + add), data8 LSB +R_IA64_LTOFF_TPREL22 = 0x9a # @ltoff(@tprel(s+a)), imm2 +R_IA64_DTPMOD64MSB = 0xa6 # @dtpmod(sym + add), data8 MSB +R_IA64_DTPMOD64LSB = 0xa7 # @dtpmod(sym + add), data8 LSB +R_IA64_LTOFF_DTPMOD22 = 0xaa # @ltoff(@dtpmod(sym + add)), imm22 +R_IA64_DTPREL14 = 0xb1 # @dtprel(sym + add), imm14 +R_IA64_DTPREL22 = 0xb2 # @dtprel(sym + add), imm22 +R_IA64_DTPREL64I = 0xb3 # @dtprel(sym + add), imm64 +R_IA64_DTPREL32MSB = 0xb4 # @dtprel(sym + add), data4 MSB +R_IA64_DTPREL32LSB = 0xb5 # @dtprel(sym + add), data4 LSB +R_IA64_DTPREL64MSB = 0xb6 # @dtprel(sym + add), data8 MSB +R_IA64_DTPREL64LSB = 0xb7 # @dtprel(sym + add), data8 LSB +R_IA64_LTOFF_DTPREL22 = 0xba # @ltoff(@dtprel(s+a)), imm22 + +# SH relocations + +R_SH_NONE = 0 +R_SH_DIR32 = 1 +R_SH_REL32 = 2 +R_SH_DIR8WPN = 3 +R_SH_IND12W = 4 +R_SH_DIR8WPL = 5 +R_SH_DIR8WPZ = 6 +R_SH_DIR8BP = 7 +R_SH_DIR8W = 8 +R_SH_DIR8L = 9 +R_SH_SWITCH16 = 25 +R_SH_SWITCH32 = 26 +R_SH_USES = 27 +R_SH_COUNT = 28 +R_SH_ALIGN = 29 +R_SH_CODE = 30 +R_SH_DATA = 31 +R_SH_LABEL = 32 +R_SH_SWITCH8 = 33 +R_SH_GNU_VTINHERIT = 34 +R_SH_GNU_VTENTRY = 35 +R_SH_TLS_GD_32 = 144 +R_SH_TLS_LD_32 = 145 +R_SH_TLS_LDO_32 = 146 +R_SH_TLS_IE_32 = 147 +R_SH_TLS_LE_32 = 148 +R_SH_TLS_DTPMOD32 = 149 +R_SH_TLS_DTPOFF32 = 150 +R_SH_TLS_TPOFF32 = 151 +R_SH_GOT32 = 160 +R_SH_PLT32 = 161 +R_SH_COPY = 162 +R_SH_GLOB_DAT = 163 +R_SH_JMP_SLOT = 164 +R_SH_RELATIVE = 165 +R_SH_GOTOFF = 166 +R_SH_GOTPC = 167 +# Keep this the last entry. +R_SH_NUM = 256 + +# S/390 relocations + +R_390_NONE = 0 # No reloc. +R_390_8 = 1 # Direct 8 bit. +R_390_12 = 2 # Direct 12 bit. +R_390_16 = 3 # Direct 16 bit. +R_390_32 = 4 # Direct 32 bit. +R_390_PC32 = 5 # PC relative 32 bit. +R_390_GOT12 = 6 # 12 bit GOT offset. +R_390_GOT32 = 7 # 32 bit GOT offset. +R_390_PLT32 = 8 # 32 bit PC relative PLT address. +R_390_COPY = 9 # Copy symbol at runtime. +R_390_GLOB_DAT = 10 # Create GOT entry. +R_390_JMP_SLOT = 11 # Create PLT entry. +R_390_RELATIVE = 12 # Adjust by program base. +R_390_GOTOFF32 = 13 # 32 bit offset to GOT. +R_390_GOTPC = 14 # 32 bit PC relative offset to GOT. +R_390_GOT16 = 15 # 16 bit GOT offset. +R_390_PC16 = 16 # PC relative 16 bit. +R_390_PC16DBL = 17 # PC relative 16 bit shifted by 1. +R_390_PLT16DBL = 18 # 16 bit PC rel. PLT shifted by 1. +R_390_PC32DBL = 19 # PC relative 32 bit shifted by 1. +R_390_PLT32DBL = 20 # 32 bit PC rel. PLT shifted by 1. +R_390_GOTPCDBL = 21 # 32 bit PC rel. GOT shifted by 1. +R_390_64 = 22 # Direct 64 bit. +R_390_PC64 = 23 # PC relative 64 bit. +R_390_GOT64 = 24 # 64 bit GOT offset. +R_390_PLT64 = 25 # 64 bit PC relative PLT address. +R_390_GOTENT = 26 # 32 bit PC rel. to GOT entry >> 1. +R_390_GOTOFF16 = 27 # 16 bit offset to GOT. +R_390_GOTOFF64 = 28 # 64 bit offset to GOT. +R_390_GOTPLT12 = 29 # 12 bit offset to jump slot. +R_390_GOTPLT16 = 30 # 16 bit offset to jump slot. +R_390_GOTPLT32 = 31 # 32 bit offset to jump slot. +R_390_GOTPLT64 = 32 # 64 bit offset to jump slot. +R_390_GOTPLTENT = 33 # 32 bit rel. offset to jump slot. +R_390_PLTOFF16 = 34 # 16 bit offset from GOT to PLT. +R_390_PLTOFF32 = 35 # 32 bit offset from GOT to PLT. +R_390_PLTOFF64 = 36 # 16 bit offset from GOT to PLT. +R_390_TLS_LOAD = 37 # Tag for load insn in TLS code. +R_390_TLS_GDCALL = 38 # Tag for function call in general dynamic TLS code. +R_390_TLS_LDCALL = 39 # Tag for function call in local dynamic TLS code. +R_390_TLS_GD32 = 40 # Direct 32 bit for general dynamic thread local data. +R_390_TLS_GD64 = 41 # Direct 64 bit for general dynamic thread local data. +R_390_TLS_GOTIE12 = 42 # 12 bit GOT offset for static TLS block offset. +R_390_TLS_GOTIE32 = 43 # 32 bit GOT offset for static TLS block offset. +R_390_TLS_GOTIE64 = 44 # 64 bit GOT offset for static TLS block offset. +R_390_TLS_LDM32 = 45 # Direct 32 bit for local dynamic thread local data in LE code. +R_390_TLS_LDM64 = 46 # Direct 64 bit for local dynamic thread local data in LE code. +R_390_TLS_IE32 = 47 # 32 bit address of GOT entry for negated static TLS block offset. +R_390_TLS_IE64 = 48 # 64 bit address of GOT entry for negated static TLS block offset. +R_390_TLS_IEENT = 49 # 32 bit rel. offset to GOT entry for negated static TLS block offset. +R_390_TLS_LE32 = 50 # 32 bit negated offset relative to static TLS block. +R_390_TLS_LE64 = 51 # 64 bit negated offset relative to static TLS block. +R_390_TLS_LDO32 = 52 # 32 bit offset relative to TLS block. +R_390_TLS_LDO64 = 53 # 64 bit offset relative to TLS block. +R_390_TLS_DTPMOD = 54 # ID of module containing symbol. +R_390_TLS_DTPOFF = 55 # Offset in TLS block. +R_390_TLS_TPOFF = 56 # Negated offset in static TLS block. +R_390_20 = 57 # Direct 20 bit. +R_390_GOT20 = 58 # 20 bit GOT offset. +R_390_GOTPLT20 = 59 # 20 bit offset to jump slot. +R_390_TLS_GOTIE20 = 60 # 20 bit GOT offset for static TLS block offset. +# Keep this the last entry. +R_390_NUM = 61 + + +# CRIS relocations. +R_CRIS_NONE = 0 +R_CRIS_8 = 1 +R_CRIS_16 = 2 +R_CRIS_32 = 3 +R_CRIS_8_PCREL = 4 +R_CRIS_16_PCREL = 5 +R_CRIS_32_PCREL = 6 +R_CRIS_GNU_VTINHERIT = 7 +R_CRIS_GNU_VTENTRY = 8 +R_CRIS_COPY = 9 +R_CRIS_GLOB_DAT = 10 +R_CRIS_JUMP_SLOT = 11 +R_CRIS_RELATIVE = 12 +R_CRIS_16_GOT = 13 +R_CRIS_32_GOT = 14 +R_CRIS_16_GOTPLT = 15 +R_CRIS_32_GOTPLT = 16 +R_CRIS_32_GOTREL = 17 +R_CRIS_32_PLT_GOTREL = 18 +R_CRIS_32_PLT_PCREL = 19 + +R_CRIS_NUM = 20 + + +# AMD x86-64 relocations. +R_X86_64_NONE = 0 # No reloc +R_X86_64_64 = 1 # Direct 64 bit +R_X86_64_PC32 = 2 # PC relative 32 bit signed +R_X86_64_GOT32 = 3 # 32 bit GOT entry +R_X86_64_PLT32 = 4 # 32 bit PLT address +R_X86_64_COPY = 5 # Copy symbol at runtime +R_X86_64_GLOB_DAT = 6 # Create GOT entry +R_X86_64_JUMP_SLOT = 7 # Create PLT entry +R_X86_64_RELATIVE = 8 # Adjust by program base +R_X86_64_GOTPCREL = 9 # 32 bit signed PC relative offset to GOT +R_X86_64_32 = 10 # Direct 32 bit zero extended +R_X86_64_32S = 11 # Direct 32 bit sign extended +R_X86_64_16 = 12 # Direct 16 bit zero extended +R_X86_64_PC16 = 13 # 16 bit sign extended pc relative +R_X86_64_8 = 14 # Direct 8 bit sign extended +R_X86_64_PC8 = 15 # 8 bit sign extended pc relative +R_X86_64_DTPMOD64 = 16 # ID of module containing symbol +R_X86_64_DTPOFF64 = 17 # Offset in module's TLS block +R_X86_64_TPOFF64 = 18 # Offset in initial TLS block +R_X86_64_TLSGD = 19 # 32 bit signed PC relative offset to two GOT entries for GD symbol +R_X86_64_TLSLD = 20 # 32 bit signed PC relative offset to two GOT entries for LD symbol +R_X86_64_DTPOFF32 = 21 # Offset in TLS block +R_X86_64_GOTTPOFF = 22 # 32 bit signed PC relative offset to GOT entry for IE symbol +R_X86_64_TPOFF32 = 23 # Offset in initial TLS block +R_X86_64_PC64 = 24 # PC relative 64 bit +R_X86_64_GOTOFF64 = 25 # 64 bit offset to GOT +R_X86_64_GOTPC32 = 26 # 32 bit signed pc relative offset to GOT +R_X86_64_GOT64 = 27 # 64-bit GOT entry offset +R_X86_64_GOTPCREL64 = 28 # 64-bit PC relative offset to GOT entry +R_X86_64_GOTPC64 = 29 # 64-bit PC relative offset to GOT +R_X86_64_GOTPLT64 = 30 # like GOT64, says PLT entry needed +R_X86_64_PLTOFF64 = 31 # 64-bit GOT relative offset to PLT entry +R_X86_64_SIZE32 = 32 # Size of symbol plus 32-bit addend +R_X86_64_SIZE64 = 33 # Size of symbol plus 64-bit addend +R_X86_64_GOTPC32_TLSDESC = 34 # GOT offset for TLS descriptor. +R_X86_64_TLSDESC_CALL = 35 # Marker for call through TLS descriptor. +R_X86_64_TLSDESC = 36 # TLS descriptor. +R_X86_64_IRELATIVE = 37 # Adjust indirectly by program base + +R_X86_64_NUM = 38 + + +# AM33 relocations. +R_MN10300_NONE = 0 # No reloc. +R_MN10300_32 = 1 # Direct 32 bit. +R_MN10300_16 = 2 # Direct 16 bit. +R_MN10300_8 = 3 # Direct 8 bit. +R_MN10300_PCREL32 = 4 # PC-relative 32-bit. +R_MN10300_PCREL16 = 5 # PC-relative 16-bit signed. +R_MN10300_PCREL8 = 6 # PC-relative 8-bit signed. +R_MN10300_GNU_VTINHERIT = 7 # Ancient C++ vtable garbage... +R_MN10300_GNU_VTENTRY = 8 # ... collection annotation. +R_MN10300_24 = 9 # Direct 24 bit. +R_MN10300_GOTPC32 = 10 # 32-bit PCrel offset to GOT. +R_MN10300_GOTPC16 = 11 # 16-bit PCrel offset to GOT. +R_MN10300_GOTOFF32 = 12 # 32-bit offset from GOT. +R_MN10300_GOTOFF24 = 13 # 24-bit offset from GOT. +R_MN10300_GOTOFF16 = 14 # 16-bit offset from GOT. +R_MN10300_PLT32 = 15 # 32-bit PCrel to PLT entry. +R_MN10300_PLT16 = 16 # 16-bit PCrel to PLT entry. +R_MN10300_GOT32 = 17 # 32-bit offset to GOT entry. +R_MN10300_GOT24 = 18 # 24-bit offset to GOT entry. +R_MN10300_GOT16 = 19 # 16-bit offset to GOT entry. +R_MN10300_COPY = 20 # Copy symbol at runtime. +R_MN10300_GLOB_DAT = 21 # Create GOT entry. +R_MN10300_JMP_SLOT = 22 # Create PLT entry. +R_MN10300_RELATIVE = 23 # Adjust by program base. + +R_MN10300_NUM = 24 + + +# M32R relocs. +R_M32R_NONE = 0 # No reloc. +R_M32R_16 = 1 # Direct 16 bit. +R_M32R_32 = 2 # Direct 32 bit. +R_M32R_24 = 3 # Direct 24 bit. +R_M32R_10_PCREL = 4 # PC relative 10 bit shifted. +R_M32R_18_PCREL = 5 # PC relative 18 bit shifted. +R_M32R_26_PCREL = 6 # PC relative 26 bit shifted. +R_M32R_HI16_ULO = 7 # High 16 bit with unsigned low. +R_M32R_HI16_SLO = 8 # High 16 bit with signed low. +R_M32R_LO16 = 9 # Low 16 bit. +R_M32R_SDA16 = 10 # 16 bit offset in SDA. +R_M32R_GNU_VTINHERIT = 11 +R_M32R_GNU_VTENTRY = 12 +# M32R relocs use SHT_RELA. +R_M32R_16_RELA = 33 # Direct 16 bit. +R_M32R_32_RELA = 34 # Direct 32 bit. +R_M32R_24_RELA = 35 # Direct 24 bit. +R_M32R_10_PCREL_RELA = 36 # PC relative 10 bit shifted. +R_M32R_18_PCREL_RELA = 37 # PC relative 18 bit shifted. +R_M32R_26_PCREL_RELA = 38 # PC relative 26 bit shifted. +R_M32R_HI16_ULO_RELA = 39 # High 16 bit with unsigned low +R_M32R_HI16_SLO_RELA = 40 # High 16 bit with signed low +R_M32R_LO16_RELA = 41 # Low 16 bit +R_M32R_SDA16_RELA = 42 # 16 bit offset in SDA +R_M32R_RELA_GNU_VTINHERIT = 43 +R_M32R_RELA_GNU_VTENTRY = 44 +R_M32R_REL32 = 45 # PC relative 32 bit. + +R_M32R_GOT24 = 48 # 24 bit GOT entry +R_M32R_26_PLTREL = 49 # 26 bit PC relative to PLT shifted +R_M32R_COPY = 50 # Copy symbol at runtime +R_M32R_GLOB_DAT = 51 # Create GOT entry +R_M32R_JMP_SLOT = 52 # Create PLT entry +R_M32R_RELATIVE = 53 # Adjust by program base +R_M32R_GOTOFF = 54 # 24 bit offset to GOT +R_M32R_GOTPC24 = 55 # 24 bit PC relative offset to GOT +R_M32R_GOT16_HI_ULO = 56 # High 16 bit GOT entry with unsigned low +R_M32R_GOT16_HI_SLO = 57 # High 16 bit GOT entry with signed low +R_M32R_GOT16_LO = 58 # Low 16 bit GOT entry +R_M32R_GOTPC_HI_ULO = 59 # High 16 bit PC relative offset to GOT with unsigned low +R_M32R_GOTPC_HI_SLO = 60 # High 16 bit PC relative offset to GOT with signed low +R_M32R_GOTPC_LO = 61 # Low 16 bit PC relative offset to GOT +R_M32R_GOTOFF_HI_ULO = 62 # High 16 bit offset to GOT with unsigned low +R_M32R_GOTOFF_HI_SLO = 63 # High 16 bit offset to GOT with signed low +R_M32R_GOTOFF_LO = 64 # Low 16 bit offset to GOT +R_M32R_NUM = 256 # Keep this the last entry. diff --git a/miasm/loader/elf_init.py b/miasm/loader/elf_init.py new file mode 100644 index 00000000..36c4cfaf --- /dev/null +++ b/miasm/loader/elf_init.py @@ -0,0 +1,878 @@ +#! /usr/bin/env python + +from __future__ import print_function +from builtins import range +import logging +import struct + +from future.utils import PY3, with_metaclass + +from miasm.core.utils import force_bytes +from miasm.loader import cstruct +from miasm.loader import elf +from miasm.loader.strpatchwork import StrPatchwork + +log = logging.getLogger("elfparse") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.WARN) + + +def printable(string): + if isinstance(string, bytes): + return "".join( + c.decode() if b" " <= c < b"~" else "." + for c in (string[i:i+1] for i in range(len(string))) + ) + return string + + +class StructWrapper_metaclass(type): + + def __new__(cls, name, bases, dct): + wrapped = dct["wrapped"] + if wrapped is not None: # XXX: make dct lookup look into base classes + for fname, v in wrapped._fields: + dct[fname] = property(dct.pop("get_" + fname, + lambda self, fname=fname: getattr( + self.cstr, fname)), + dct.pop("set_" + fname, + lambda self, v, fname=fname: setattr( + self.cstr, fname, v)), + dct.pop("del_" + fname, None)) + return type.__new__(cls, name, bases, dct) + + +class StructWrapper(with_metaclass(StructWrapper_metaclass, object)): + + wrapped = None + + def __init__(self, parent, sex, size, *args, **kargs): + self.cstr = self.wrapped(sex, size, *args, **kargs) + self.parent = parent + + def __getitem__(self, item): + return getattr(self, item) + + def __repr__(self): + return "> 8].name + + def get_type(self): + return self.cstr.info & 0xff + + +class WRel64(StructWrapper): + wrapped = elf.Rel64 + wrapped._fields.append(("sym", "u32")) + wrapped._fields.append(("type", "u32")) + + def get_sym(self): + return self.parent.linksection.symtab[self.cstr.info >> 32].name + + def get_type(self): + return self.cstr.info & 0xffffffff + + +class WRela32(WRel32): + wrapped = elf.Rela32 + wrapped._fields.append(("sym", "u32")) + wrapped._fields.append(("type", "u08")) + + def get_sym(self): + return self.parent.linksection.symtab[self.cstr.info >> 8].name + + def get_type(self): + return self.cstr.info & 0xff + + +class WRela64(WRel64): + wrapped = elf.Rela64 + wrapped._fields.append(("sym", "u32")) + wrapped._fields.append(("type", "u32")) + + def get_sym(self): + return self.parent.linksection.symtab[self.cstr.info >> 32].name + + def get_type(self): + return self.cstr.info & 0xffffffff + + +class WShdr(StructWrapper): + wrapped = elf.Shdr + + def get_name(self): + return self.parent.parent._shstr.get_name(self.cstr.name) + + +class WDynamic(StructWrapper): + wrapped = elf.Dynamic + + def get_name(self): + if self.type == elf.DT_NEEDED: + return self.parent.linksection.get_name(self.cstr.name) + return self.cstr.name + + +class WPhdr(StructWrapper): + wrapped = elf.Phdr + + +class WPhdr64(StructWrapper): + wrapped = elf.Phdr64 + + +class WNhdr(StructWrapper): + wrapped = elf.Nhdr + + +class ContentManager(object): + + def __get__(self, owner, x): + if hasattr(owner, '_content'): + return owner._content + + def __set__(self, owner, new_content): + owner.resize(len(owner._content), len(new_content)) + owner._content = StrPatchwork(new_content) + owner.parse_content(owner.sex, owner.size) + + def __delete__(self, owner): + self.__set__(owner, None) + + +# Sections + +class Section_metaclass(type): + + def __new__(cls, name, bases, dct): + o = type.__new__(cls, name, bases, dct) + if name != "Section": + Section.register(o) + return o + + def register(cls, o): + if o.sht is not None: + cls.sectypes[o.sht] = o + + def __call__(cls, parent, sex, size, shstr=None): + sh = None + if shstr is not None: + sh = WShdr(None, sex, size, shstr) + if sh.type in Section.sectypes: + cls = Section.sectypes[sh.type] + i = cls.__new__(cls, cls.__name__, cls.__bases__, cls.__dict__) + if sh is not None: + sh.parent = i + i.__init__(parent, sh) + return i + + +class Section(with_metaclass(Section_metaclass, object)): + + sectypes = {} + content = ContentManager() + + def resize(self, old, new): + self.sh.size += new - old + self.parent.resize(self, new - old) + if self.phparent: + self.phparent.resize(self, new - old) + + def parse_content(self, sex, size): + self.sex, self.size = sex, size + pass + + def get_linksection(self): + return self.parent[self.sh.link] + + def set_linksection(self, val): + if isinstance(val, Section): + val = self.parent.shlist.find(val) + if type(val) is int: + self.sh.link = val + linksection = property(get_linksection, set_linksection) + + def get_infosection(self): + # XXX info may not be in sh list ?!? + if not self.sh.info in self.parent: + return None + return self.parent[self.sh.info] + + def set_infosection(self, val): + if isinstance(val, Section): + val = self.parent.shlist.find(val) + if type(val) is int: + self.sh.info = val + infosection = property(get_infosection, set_infosection) + + def __init__(self, parent, sh=None): + self.parent = parent + self.phparent = None + self.sh = sh + self._content = b"" + + def __repr__(self): + r = "{%(name)s ofs=%(offset)#x sz=%(size)#x addr=%(addr)#010x}" % self.sh + return r + + +class NullSection(Section): + sht = elf.SHT_NULL + + def get_name(self, ofs): + # XXX check this + return b"" + + +class ProgBits(Section): + sht = elf.SHT_PROGBITS + + +class HashSection(Section): + sht = elf.SHT_HASH + + +class NoBitsSection(Section): + sht = elf.SHT_NOBITS + + +class ShLibSection(Section): + sht = elf.SHT_SHLIB + + +class InitArray(Section): + sht = elf.SHT_INIT_ARRAY + + +class FiniArray(Section): + sht = elf.SHT_FINI_ARRAY + + +class GroupSection(Section): + sht = elf.SHT_GROUP + + +class SymTabSHIndeces(Section): + sht = elf.SHT_SYMTAB_SHNDX + + +class GNUVerSym(Section): + sht = elf.SHT_GNU_versym + + +class GNUVerNeed(Section): + sht = elf.SHT_GNU_verneed + + +class GNUVerDef(Section): + sht = elf.SHT_GNU_verdef + + +class GNULibLIst(Section): + sht = elf.SHT_GNU_LIBLIST + + +class CheckSumSection(Section): + sht = elf.SHT_CHECKSUM + + +class NoteSection(Section): + sht = elf.SHT_NOTE + + def parse_content(self, sex, size): + self.sex, self.size = sex, size + c = self.content + hsz = 12 + self.notes = [] + # XXX: c may not be aligned? + while len(c) > hsz: + note = WNhdr(self, sex, size, c) + namesz, descsz = note.namesz, note.descsz + name = c[hsz:hsz + namesz] + desc = c[hsz + namesz:hsz + namesz + descsz] + c = c[hsz + namesz + descsz:] + self.notes.append((note.type, name, desc)) + + +class Dynamic(Section): + sht = elf.SHT_DYNAMIC + + def parse_content(self, sex, size): + self.sex, self.size = sex, size + c = self.content + self.dyntab = [] + self.dynamic = {} + sz = self.sh.entsize + idx = 0 + while len(c) > sz*idx: + s = c[sz*idx:sz*(idx+1)] + idx += 1 + dyn = WDynamic(self, sex, size, s) + self.dyntab.append(dyn) + if isinstance(dyn.name, str): + self[dyn.name] = dyn + + def __setitem__(self, item, value): + if isinstance(item, bytes): + self.dynamic[item] = value + return + if isinstance(item, str): + self.symbols[item.encode()] = value + return + self.dyntab[item] = value + + def __getitem__(self, item): + if isinstance(item, bytes): + return self.dynamic[item] + if isinstance(item, str): + return self.dynamic[item.encode()] + return self.dyntab[item] + + +class StrTable(Section): + sht = elf.SHT_STRTAB + + def parse_content(self, sex, size): + self.sex, self.size = sex, size + self.res = {} + c = self.content + q = 0 + index = 0 + l = len(c) + while index < l: + p = c.find(b"\x00", index) + if p < 0: + log.warning("Missing trailing 0 for string [%s]" % c) # XXX + p = len(c) - index + self.res[index] = c[index:p] + # print q, c[:p] + index = p + 1 + # q += p+1 + # c = c[p+1:] + + def get_name(self, ofs): + return self.content[ofs:self.content.find(b'\x00', start=ofs)] + + def add_name(self, name): + name = force_bytes(name) + name = name + b"\x00" + if name in self.content: + return self.content.find(name) + n = len(self.content) + self.content = bytes(self.content) + name + return n + + def mod_name(self, name, new_name): + s = bytes(self.content) + name_b = b'\x00%s\x00' % name.encode() + if not name_b in s: + raise ValueError('Unknown name %r' % name) + self.content = s.replace( + name_b, + b'\x00%s\x00' % new_name.encode() + ) + return len(self.content) + + +class SymTable(Section): + sht = elf.SHT_SYMTAB + + def parse_content(self, sex, size): + self.sex, self.size = sex, size + c = self.content + self.symtab = [] + self.symbols = {} + sz = self.sh.entsize + index = 0 + l = len(c) + if size == 32: + WSym = WSym32 + elif size == 64: + WSym = WSym64 + else: + ValueError('unknown size') + while index < l: + s = c[index:index + sz] + index += sz + sym = WSym(self, sex, size, s) + self.symtab.append(sym) + self[sym.name] = sym + + def __getitem__(self, item): + if isinstance(item, bytes): + return self.symbols[item] + if isinstance(item, str): + return self.symbols[item.encode()] + return self.symtab[item] + + def __setitem__(self, item, value): + if isinstance(item, bytes): + self.symbols[item] = value + return + if isinstance(item, str): + self.symbols[item.encode()] = value + return + self.symtab[item] = value + + +class DynSymTable(SymTable): + sht = elf.SHT_DYNSYM + + +class RelTable(Section): + sht = elf.SHT_REL + + def parse_content(self, sex, size): + self.sex, self.size = sex, size + if size == 32: + WRel = WRel32 + elif size == 64: + WRel = WRel64 + else: + ValueError('unknown size') + c = self.content + self.reltab = [] + self.rel = {} + sz = self.sh.entsize + + idx = 0 + while len(c) > sz*idx: + s = c[sz*idx:sz*(idx+1)] + idx += 1 + rel = WRel(self, sex, size, s) + self.reltab.append(rel) + if rel.parent.linksection != self.parent.shlist[0]: + self.rel[rel.sym] = rel + + +class RelATable(RelTable): + sht = elf.SHT_RELA + +# Section List + + +class SHList(object): + + def __init__(self, parent, sex, size): + self.parent = parent + self.shlist = [] + ehdr = self.parent.Ehdr + of1 = ehdr.shoff + if not of1: # No SH table + return + for i in range(ehdr.shnum): + of2 = of1 + ehdr.shentsize + shstr = parent[of1:of2] + self.shlist.append(Section(self, sex, size, shstr=shstr)) + of1 = of2 + self._shstr = self.shlist[ehdr.shstrndx] + + for s in self.shlist: + if not isinstance(s, NoBitsSection): + s._content = StrPatchwork( + parent[s.sh.offset: s.sh.offset + s.sh.size] + ) + # Follow dependencies when initializing sections + zero = self.shlist[0] + todo = self.shlist[1:] + done = [] + while todo: + s = todo.pop(0) + if ((s.linksection == zero or s.linksection in done) and + (s.infosection in [zero, None] or s.infosection in done)): + done.append(s) + s.parse_content(sex, size) + else: + todo.append(s) + for s in self.shlist: + self.do_add_section(s) + + def do_add_section(self, section): + n = section.sh.name + if n.startswith(b"."): + n = n[1:] + n = printable(n).replace(".", "_").replace("-", "_") + setattr(self, n, section) # xxx + + def append(self, item): + self.do_add_section(item) + self.shlist.append(item) + + def __getitem__(self, item): + return self.shlist[item] + + def __repr__(self): + rep = ["# section offset size addr flags"] + for i, s in enumerate(self.shlist): + l = "%(name)-15s %(offset)08x %(size)06x %(addr)08x %(flags)x " % s.sh + l = ("%2i " % i) + l + s.__class__.__name__ + rep.append(l) + return "\n".join(rep) + + def __bytes__(self): + return b"".join( + bytes(s.sh) for s in self.shlist + ) + + def __str__(self): + if PY3: + return repr(self) + return bytes(self) + + def resize(self, sec, diff): + for s in self.shlist: + if s.sh.offset > sec.sh.offset: + s.sh.offset += diff + if self.parent.Ehdr.shoff > sec.sh.offset: + self.parent.Ehdr.shoff += diff + if self.parent.Ehdr.phoff > sec.sh.offset: + self.parent.Ehdr.phoff += diff + +# Program Header List + + +class ProgramHeader(object): + + def __init__(self, parent, sex, size, phstr): + self.parent = parent + self.ph = WPhdr(self, sex, size, phstr) + self.shlist = [] + for s in self.parent.parent.sh: + if isinstance(s, NullSection): + continue + if ((isinstance(s, NoBitsSection) and s.sh.offset == self.ph.offset + self.ph.filesz) + or self.ph.offset <= s.sh.offset < self.ph.offset + self.ph.filesz): + s.phparent = self + self.shlist.append(s) + + def resize(self, sec, diff): + self.ph.filesz += diff + self.ph.memsz += diff + self.parent.resize(sec, diff) + + +class ProgramHeader64(object): + + def __init__(self, parent, sex, size, phstr): + self.parent = parent + self.ph = WPhdr64(self, sex, size, phstr) + self.shlist = [] + for s in self.parent.parent.sh: + if isinstance(s, NullSection): + continue + if ((isinstance(s, NoBitsSection) and s.sh.offset == self.ph.offset + self.ph.filesz) + or self.ph.offset <= s.sh.offset < self.ph.offset + self.ph.filesz): + s.phparent = self + self.shlist.append(s) + + def resize(self, sec, diff): + self.ph.filesz += diff + self.ph.memsz += diff + self.parent.resize(sec, diff) + + +class PHList(object): + + def __init__(self, parent, sex, size): + self.parent = parent + self.phlist = [] + ehdr = self.parent.Ehdr + of1 = ehdr.phoff + for i in range(ehdr.phnum): + of2 = of1 + ehdr.phentsize + phstr = parent[of1:of2] + if size == 32: + self.phlist.append(ProgramHeader(self, sex, size, phstr)) + else: + self.phlist.append(ProgramHeader64(self, sex, size, phstr)) + of1 = of2 + + def __getitem__(self, item): + return self.phlist[item] + + def __repr__(self): + r = [" offset filesz vaddr memsz"] + for i, p in enumerate(self.phlist): + l = "%(offset)07x %(filesz)06x %(vaddr)08x %(memsz)07x %(type)02x %(flags)01x" % p.ph + l = ("%2i " % i) + l + r.append(l) + r.append(" " + " ".join(printable(s.sh.name) for s in p.shlist)) + return "\n".join(r) + + def __bytes__(self): + return b"".join( + bytes(p.ph) for p in self.phlist + ) + + def __str__(self): + if PY3: + return repr(self) + return self.__bytes__(self) + + def resize(self, sec, diff): + for p in self.phlist: + if p.ph.offset > sec.sh.offset: + p.ph.offset += diff + if p.ph.vaddr > sec.phparent.ph.vaddr + sec.sh.offset: + p.ph.vaddr += diff + if p.ph.paddr > sec.phparent.ph.paddr + sec.sh.offset: + p.ph.paddr += diff + + +class virt(object): + + def __init__(self, x): + self.parent = x + + def get_rvaitem(self, start, stop=None): + if stop == None: + s = self.parent.getsectionbyvad(start) + if s: + start = start - s.sh.addr + else: + s = self.parent.getphbyvad(start) + if s: + start = start - s.ph.vaddr + if not s: + return [(None, start)] + return [(s, start)] + total_len = stop - start + + virt_item = [] + while total_len: + s = self.parent.getsectionbyvad(start) + if not s: + s = self.parent.getphbyvad(start) + if not s: + raise ValueError('unknown rva address! %x' % start) + if isinstance(s, ProgramHeader) or isinstance(s, ProgramHeader64): + s_max = s.ph.filesz + s_start = start - s.ph.vaddr + s_stop = stop - s.ph.vaddr + else: + s_max = s.sh.size + s_start = start - s.sh.addr + s_stop = stop - s.sh.addr + if s_stop > s_max: + s_stop = s_max + + s_len = s_stop - s_start + if s_len == 0: + raise ValueError('empty section! %x' % start) + total_len -= s_len + start += s_len + n_item = slice(s_start, s_stop) + virt_item.append((s, n_item)) + return virt_item + + def item2virtitem(self, item): + if not type(item) is slice: # integer + return self.get_rvaitem(item) + start = item.start + stop = item.stop + assert(item.step is None) + return self.get_rvaitem(start, stop) + + def get(self, ad_start, ad_stop=None): + rva_items = self.get_rvaitem(ad_start, ad_stop) + data_out = b"" + for s, n_item in rva_items: + if not (isinstance(s, ProgramHeader) or isinstance(s, ProgramHeader64)): + data_out += s.content.__getitem__(n_item) + continue + if not type(n_item) is slice: + n_item = slice(n_item, n_item + 1, 1) + start = n_item.start + s.ph.offset + stop = n_item.stop + s.ph.offset + if n_item.step != None: + step = n_item.step + s.ph.offset + else: + step = None + n_item = slice(start, stop, step) + # data_out += self.parent.content.__s.content.__getitem__(n_item) + data_out += self.parent.content.__getitem__(n_item) + + return data_out + + def set(self, item, data): + if not type(item) is slice: + item = slice(item, item + len(data), None) + virt_item = self.item2virtitem(item) + if not virt_item: + return + off = 0 + for s, n_item in virt_item: + if isinstance(s, ProgBits): + i = slice(off, n_item.stop + off - n_item.start, n_item.step) + + data_slice = data.__getitem__(i) + s.content.__setitem__(n_item, data_slice) + off = i.stop + else: + raise ValueError('TODO XXX') + + return + + def __getitem__(self, item): + if isinstance(item, slice): + assert(item.step is None) + return self.get(item.start, item.stop) + else: + return self.get(item) + + def __setitem__(self, item, data): + if isinstance(item, slice): + rva = item.start + else: + rva = item + self.set(rva, data) + + def max_addr(self): + # the maximum virtual address is found by retrieving the maximum + # possible virtual address, either from the program entries, and + # section entries. if there is no such object, raise an error. + l = 0 + if self.parent.ph.phlist: + for phdr in self.parent.ph.phlist: + l = max(l, phdr.ph.vaddr + phdr.ph.memsz) + if self.parent.sh.shlist: + for shdr in self.parent.sh.shlist: + l = max(l, shdr.sh.addr + shdr.sh.size) + if not l: + raise ValueError('maximum virtual address not found !') + return l + + def is_addr_in(self, ad): + return self.parent.is_in_virt_address(ad) + + def find(self, pattern, start=0): + sections = [] + offset = start + for s in self.parent.ph: + s_max = s.ph.memsz # max(s.ph.filesz, s.ph.memsz) + if offset < s.ph.vaddr + s_max: + sections.append(s) + + if not sections: + return -1 + offset -= sections[0].ph.vaddr + if offset < 0: + offset = 0 + for s in sections: + data = self.parent.content[s.ph.offset:s.ph.offset + s.ph.filesz] + ret = data.find(pattern, offset) + if ret != -1: + return ret + s.ph.vaddr # self.parent.rva2virt(s.addr + ret) + offset = 0 + return -1 + +# ELF object + + +class ELF(object): + + def __init__(self, elfstr): + self._content = elfstr + self.parse_content() + + self._virt = virt(self) + + def get_virt(self): + return self._virt + virt = property(get_virt) + + content = ContentManager() + + def parse_content(self): + h = self.content[:8] + self.size = struct.unpack('B', h[4:5])[0] * 32 + self.sex = struct.unpack('B', h[5:6])[0] + self.Ehdr = WEhdr(self, self.sex, self.size, self.content) + self.sh = SHList(self, self.sex, self.size) + self.ph = PHList(self, self.sex, self.size) + + def resize(self, old, new): + pass + + def __getitem__(self, item): + return self.content[item] + + def build_content(self): + c = StrPatchwork() + c[0] = bytes(self.Ehdr) + c[self.Ehdr.phoff] = bytes(self.ph) + for s in self.sh: + c[s.sh.offset] = bytes(s.content) + c[self.Ehdr.shoff] = bytes(self.sh) + return bytes(c) + + def __bytes__(self): + return self.build_content() + + def __str__(self): + if PY3: + return repr(self) + return bytes(self) + + def getphbyvad(self, ad): + for s in self.ph: + if s.ph.vaddr <= ad < s.ph.vaddr + s.ph.memsz: + return s + + def getsectionbyvad(self, ad): + for s in self.sh: + if s.sh.addr <= ad < s.sh.addr + s.sh.size: + return s + + def getsectionbyname(self, name): + name = force_bytes(name) + for s in self.sh: + try: + if s.sh.name.strip(b'\x00') == name: + return s + except UnicodeDecodeError: + pass + return None + + def is_in_virt_address(self, ad): + for s in self.sh: + if s.sh.addr <= ad < s.sh.addr + s.sh.size: + return True + return False diff --git a/miasm/loader/minidump.py b/miasm/loader/minidump.py new file mode 100644 index 00000000..fbb7bde5 --- /dev/null +++ b/miasm/loader/minidump.py @@ -0,0 +1,545 @@ +"""Constants and structures associated to Minidump format +Based on: http://amnesia.gtisc.gatech.edu/~moyix/minidump.py +""" +from future.utils import viewitems + +from future.builtins import int as int_types +from miasm.loader.new_cstruct import CStruct + +class Enumeration(object): + """Stand for an enumeration type""" + + def __init__(self, enum_info): + """enum_info: {name: value}""" + self._enum_info = enum_info + self._inv_info = dict((v, k) for k, v in viewitems(enum_info)) + + def __getitem__(self, key): + """Helper: assume that string is for key, integer is for value""" + if isinstance(key, int_types): + return self._inv_info[key] + return self._enum_info[key] + + def __getattr__(self, key): + if key in self._enum_info: + return self._enum_info[key] + raise AttributeError + + def from_value(self, value): + return self._inv_info[value] + + +class Rva(CStruct): + """Relative Virtual Address + Note: RVA in Minidump means "file offset" + """ + _fields = [("rva", "u32"), + ] + + +minidumpType = Enumeration({ + # MINIDUMP_TYPE + # https://msdn.microsoft.com/en-us/library/ms680519(v=vs.85).aspx + "MiniDumpNormal" : 0x00000000, + "MiniDumpWithDataSegs" : 0x00000001, + "MiniDumpWithFullMemory" : 0x00000002, + "MiniDumpWithHandleData" : 0x00000004, + "MiniDumpFilterMemory" : 0x00000008, + "MiniDumpScanMemory" : 0x00000010, + "MiniDumpWithUnloadedModules" : 0x00000020, + "MiniDumpWithIndirectlyReferencedMemory" : 0x00000040, + "MiniDumpFilterModulePaths" : 0x00000080, + "MiniDumpWithProcessThreadData" : 0x00000100, + "MiniDumpWithPrivateReadWriteMemory" : 0x00000200, + "MiniDumpWithoutOptionalData" : 0x00000400, + "MiniDumpWithFullMemoryInfo" : 0x00000800, + "MiniDumpWithThreadInfo" : 0x00001000, + "MiniDumpWithCodeSegs" : 0x00002000, + "MiniDumpWithoutAuxiliaryState" : 0x00004000, + "MiniDumpWithFullAuxiliaryState" : 0x00008000, + "MiniDumpWithPrivateWriteCopyMemory" : 0x00010000, + "MiniDumpIgnoreInaccessibleMemory" : 0x00020000, + "MiniDumpWithTokenInformation" : 0x00040000, + "MiniDumpWithModuleHeaders" : 0x00080000, + "MiniDumpFilterTriage" : 0x00100000, + "MiniDumpValidTypeFlags" : 0x001fffff, +}) + +class MinidumpHDR(CStruct): + """MINIDUMP_HEADER + https://msdn.microsoft.com/en-us/library/ms680378(VS.85).aspx + """ + _fields = [("Magic", "u32"), # MDMP + ("Version", "u16"), + ("ImplementationVersion", "u16"), + ("NumberOfStreams", "u32"), + ("StreamDirectoryRva", "Rva"), + ("Checksum", "u32"), + ("TimeDateStamp", "u32"), + ("Flags", "u32") + ] + +class LocationDescriptor(CStruct): + """MINIDUMP_LOCATION_DESCRIPTOR + https://msdn.microsoft.com/en-us/library/ms680383(v=vs.85).aspx + """ + _fields = [("DataSize", "u32"), + ("Rva", "Rva"), + ] + + +streamType = Enumeration({ + # MINIDUMP_STREAM_TYPE + # https://msdn.microsoft.com/en-us/library/ms680394(v=vs.85).aspx + "UnusedStream" : 0, + "ReservedStream0" : 1, + "ReservedStream1" : 2, + "ThreadListStream" : 3, + "ModuleListStream" : 4, + "MemoryListStream" : 5, + "ExceptionStream" : 6, + "SystemInfoStream" : 7, + "ThreadExListStream" : 8, + "Memory64ListStream" : 9, + "CommentStreamA" : 10, + "CommentStreamW" : 11, + "HandleDataStream" : 12, + "FunctionTableStream" : 13, + "UnloadedModuleListStream" : 14, + "MiscInfoStream" : 15, + "MemoryInfoListStream" : 16, + "ThreadInfoListStream" : 17, + "HandleOperationListStream" : 18, + "LastReservedStream" : 0xffff, +}) + +class StreamDirectory(CStruct): + """MINIDUMP_DIRECTORY + https://msdn.microsoft.com/en-us/library/ms680365(VS.85).aspx + """ + _fields = [("StreamType", "u32"), + ("Location", "LocationDescriptor"), + ] + + @property + def pretty_name(self): + return streamType[self.StreamType] + + +class FixedFileInfo(CStruct): + """VS_FIXEDFILEINFO + https://msdn.microsoft.com/en-us/library/ms646997(v=vs.85).aspx + """ + _fields = [("dwSignature", "u32"), + ("dwStrucVersion", "u32"), + ("dwFileVersionMS", "u32"), + ("dwFileVersionLS", "u32"), + ("dwProductVersionMS", "u32"), + ("dwProductVersionLS", "u32"), + ("dwFileFlagsMask", "u32"), + ("dwFileFlags", "u32"), + ("dwFileOS", "u32"), + ("dwFileType", "u32"), + ("dwFileSubtype", "u32"), + ("dwFileDateMS", "u32"), + ("dwFileDateLS", "u32"), + ] + +class MinidumpString(CStruct): + """MINIDUMP_STRING + https://msdn.microsoft.com/en-us/library/ms680395(v=vs.85).aspx + """ + _fields = [("Length", "u32"), + ("Buffer", "u08", lambda string:string.Length), + ] + +class Module(CStruct): + """MINIDUMP_MODULE + https://msdn.microsoft.com/en-us/library/ms680392(v=vs.85).aspx + """ + _fields = [("BaseOfImage", "u64"), + ("SizeOfImage", "u32"), + ("CheckSum", "u32"), + ("TimeDateStamp", "u32"), + ("ModuleNameRva", "Rva"), + ("VersionInfo", "FixedFileInfo"), + ("CvRecord", "LocationDescriptor"), + ("MiscRecord", "LocationDescriptor"), + ("Reserved0", "u64"), + ("Reserved1", "u64"), + ] + + +class ModuleList(CStruct): + """MINIDUMP_MODULE_LIST + https://msdn.microsoft.com/en-us/library/ms680391(v=vs.85).aspx + """ + _fields = [("NumberOfModules", "u32"), + ("Modules", "Module", lambda mlist:mlist.NumberOfModules), + ] + + +class MemoryDescriptor64(CStruct): + """MINIDUMP_MEMORY_DESCRIPTOR64 + https://msdn.microsoft.com/en-us/library/ms680384(v=vs.85).aspx + """ + _fields = [("StartOfMemoryRange", "u64"), + ("DataSize", "u64") + ] + + +class Memory64List(CStruct): + """MINIDUMP_MEMORY64_LIST + https://msdn.microsoft.com/en-us/library/ms680387(v=vs.85).aspx + """ + _fields = [("NumberOfMemoryRanges", "u64"), + ("BaseRva", "u64"), + ("MemoryRanges", "MemoryDescriptor64", + lambda mlist:mlist.NumberOfMemoryRanges), + ] + +class MemoryDescriptor(CStruct): + """MINIDUMP_MEMORY_DESCRIPTOR + https://msdn.microsoft.com/en-us/library/ms680384(v=vs.85).aspx + """ + _fields = [("StartOfMemoryRange", "u64"), + ("Memory", "LocationDescriptor"), + ] + +class MemoryList(CStruct): + """MINIDUMP_MEMORY_LIST + https://msdn.microsoft.com/en-us/library/ms680387(v=vs.85).aspx + """ + _fields = [("NumberOfMemoryRanges", "u32"), + ("MemoryRanges", "MemoryDescriptor", + lambda mlist:mlist.NumberOfMemoryRanges), + ] + +memProtect = Enumeration({ + # MEM PROTECT + # https://msdn.microsoft.com/en-us/library/aa366786(v=vs.85).aspx + "PAGE_NOACCESS" : 0x0001, + "PAGE_READONLY" : 0x0002, + "PAGE_READWRITE" : 0x0004, + "PAGE_WRITECOPY" : 0x0008, + "PAGE_EXECUTE" : 0x0010, + "PAGE_EXECUTE_READ" : 0x0020, + "PAGE_EXECUTE_READWRITE" : 0x0040, + "PAGE_EXECUTE_WRITECOPY" : 0x0080, + "PAGE_GUARD" : 0x0100, + "PAGE_NOCACHE" : 0x0200, + "PAGE_WRITECOMBINE" : 0x0400, +}) + +class MemoryInfo(CStruct): + """MINIDUMP_MEMORY_INFO + https://msdn.microsoft.com/en-us/library/ms680386(v=vs.85).aspx + """ + _fields = [("BaseAddress", "u64"), + ("AllocationBase", "u64"), + ("AllocationProtect", "u32"), + ("__alignment1", "u32"), + ("RegionSize", "u64"), + ("State", "u32"), + ("Protect", "u32"), + ("Type", "u32"), + ("__alignment2", "u32"), + ] + +class MemoryInfoList(CStruct): + """MINIDUMP_MEMORY_INFO_LIST + https://msdn.microsoft.com/en-us/library/ms680385(v=vs.85).aspx + """ + _fields = [("SizeOfHeader", "u32"), + ("SizeOfEntry", "u32"), + ("NumberOfEntries", "u64"), + # Fake field, for easy access to MemoryInfo elements + ("MemoryInfos", "MemoryInfo", + lambda mlist: mlist.NumberOfEntries), + ] + + +contextFlags_x86 = Enumeration({ + "CONTEXT_i386" : 0x00010000, + "CONTEXT_CONTROL" : 0x00010001, + "CONTEXT_INTEGER" : 0x00010002, + "CONTEXT_SEGMENTS" : 0x00010004, + "CONTEXT_FLOATING_POINT" : 0x00010008, + "CONTEXT_DEBUG_REGISTERS" : 0x00010010, + "CONTEXT_EXTENDED_REGISTERS" : 0x00010020, +}) + +class FloatingSaveArea(CStruct): + """FLOATING_SAVE_AREA + http://terminus.rewolf.pl/terminus/structures/ntdll/_FLOATING_SAVE_AREA_x86.html + """ + _fields = [("ControlWord", "u32"), + ("StatusWord", "u32"), + ("TagWord", "u32"), + ("ErrorOffset", "u32"), + ("ErrorSelector", "u32"), + ("DataOffset", "u32"), + ("DataSelector", "u32"), + ("RegisterArea", "80s"), + ("Cr0NpxState", "u32"), + ] + +class Context_x86(CStruct): + """CONTEXT x86 + https://msdn.microsoft.com/en-us/en-en/library/ms679284(v=vs.85).aspx + http://terminus.rewolf.pl/terminus/structures/ntdll/_CONTEXT_x86.html + """ + + MAXIMUM_SUPPORTED_EXTENSION = 512 + + def is_activated(flag): + mask = contextFlags_x86[flag] + def check_context(ctx): + if (ctx.ContextFlags & mask == mask): + return 1 + return 0 + return check_context + + _fields = [("ContextFlags", "u32"), + # DebugRegisters + ("Dr0", "u32", is_activated("CONTEXT_DEBUG_REGISTERS")), + ("Dr1", "u32", is_activated("CONTEXT_DEBUG_REGISTERS")), + ("Dr2", "u32", is_activated("CONTEXT_DEBUG_REGISTERS")), + ("Dr3", "u32", is_activated("CONTEXT_DEBUG_REGISTERS")), + ("Dr6", "u32", is_activated("CONTEXT_DEBUG_REGISTERS")), + ("Dr7", "u32", is_activated("CONTEXT_DEBUG_REGISTERS")), + + ("FloatSave", "FloatingSaveArea", + is_activated("CONTEXT_FLOATING_POINT")), + + # SegmentRegisters + ("SegGs", "u32", is_activated("CONTEXT_SEGMENTS")), + ("SegFs", "u32", is_activated("CONTEXT_SEGMENTS")), + ("SegEs", "u32", is_activated("CONTEXT_SEGMENTS")), + ("SegDs", "u32", is_activated("CONTEXT_SEGMENTS")), + # IntegerRegisters + ("Edi", "u32", is_activated("CONTEXT_INTEGER")), + ("Esi", "u32", is_activated("CONTEXT_INTEGER")), + ("Ebx", "u32", is_activated("CONTEXT_INTEGER")), + ("Edx", "u32", is_activated("CONTEXT_INTEGER")), + ("Ecx", "u32", is_activated("CONTEXT_INTEGER")), + ("Eax", "u32", is_activated("CONTEXT_INTEGER")), + # ControlRegisters + ("Ebp", "u32", is_activated("CONTEXT_CONTROL")), + ("Eip", "u32", is_activated("CONTEXT_CONTROL")), + ("SegCs", "u32", is_activated("CONTEXT_CONTROL")), + ("EFlags", "u32", is_activated("CONTEXT_CONTROL")), + ("Esp", "u32", is_activated("CONTEXT_CONTROL")), + ("SegSs", "u32", is_activated("CONTEXT_CONTROL")), + + ("ExtendedRegisters", "%ds" % MAXIMUM_SUPPORTED_EXTENSION, + is_activated("CONTEXT_EXTENDED_REGISTERS")), + ] + + +contextFlags_AMD64 = Enumeration({ + "CONTEXT_AMD64" : 0x00100000, + "CONTEXT_CONTROL" : 0x00100001, + "CONTEXT_INTEGER" : 0x00100002, + "CONTEXT_SEGMENTS" : 0x00100004, + "CONTEXT_FLOATING_POINT" : 0x00100008, + "CONTEXT_DEBUG_REGISTERS" : 0x00100010, + "CONTEXT_XSTATE" : 0x00100020, + "CONTEXT_EXCEPTION_ACTIVE" : 0x08000000, + "CONTEXT_SERVICE_ACTIVE" : 0x10000000, + "CONTEXT_EXCEPTION_REQUEST" : 0x40000000, + "CONTEXT_EXCEPTION_REPORTING" : 0x80000000, +}) + + +class M128A(CStruct): + """M128A + http://terminus.rewolf.pl/terminus/structures/ntdll/_M128A_x64.html + """ + _fields = [("Low", "u64"), + ("High", "u64"), + ] + +class Context_AMD64(CStruct): + """CONTEXT AMD64 + https://github.com/duarten/Threadjack/blob/master/WinNT.h + """ + + def is_activated(flag): + mask = contextFlags_AMD64[flag] + def check_context(ctx): + if (ctx.ContextFlags & mask == mask): + return 1 + return 0 + return check_context + + _fields = [ + + # Only used for Convenience + ("P1Home", "u64"), + ("P2Home", "u64"), + ("P3Home", "u64"), + ("P4Home", "u64"), + ("P5Home", "u64"), + ("P6Home", "u64"), + + # Control + ("ContextFlags", "u32"), + ("MxCsr", "u32"), + + # Segment & processor + # /!\ activation depends on multiple flags + ("SegCs", "u16", is_activated("CONTEXT_CONTROL")), + ("SegDs", "u16", is_activated("CONTEXT_SEGMENTS")), + ("SegEs", "u16", is_activated("CONTEXT_SEGMENTS")), + ("SegFs", "u16", is_activated("CONTEXT_SEGMENTS")), + ("SegGs", "u16", is_activated("CONTEXT_SEGMENTS")), + ("SegSs", "u16", is_activated("CONTEXT_CONTROL")), + ("EFlags", "u32", is_activated("CONTEXT_CONTROL")), + + # Debug registers + ("Dr0", "u64", is_activated("CONTEXT_DEBUG_REGISTERS")), + ("Dr1", "u64", is_activated("CONTEXT_DEBUG_REGISTERS")), + ("Dr2", "u64", is_activated("CONTEXT_DEBUG_REGISTERS")), + ("Dr3", "u64", is_activated("CONTEXT_DEBUG_REGISTERS")), + ("Dr6", "u64", is_activated("CONTEXT_DEBUG_REGISTERS")), + ("Dr7", "u64", is_activated("CONTEXT_DEBUG_REGISTERS")), + + # Integer registers + # /!\ activation depends on multiple flags + ("Rax", "u64", is_activated("CONTEXT_INTEGER")), + ("Rcx", "u64", is_activated("CONTEXT_INTEGER")), + ("Rdx", "u64", is_activated("CONTEXT_INTEGER")), + ("Rbx", "u64", is_activated("CONTEXT_INTEGER")), + ("Rsp", "u64", is_activated("CONTEXT_CONTROL")), + ("Rbp", "u64", is_activated("CONTEXT_INTEGER")), + ("Rsi", "u64", is_activated("CONTEXT_INTEGER")), + ("Rdi", "u64", is_activated("CONTEXT_INTEGER")), + ("R8", "u64", is_activated("CONTEXT_INTEGER")), + ("R9", "u64", is_activated("CONTEXT_INTEGER")), + ("R10", "u64", is_activated("CONTEXT_INTEGER")), + ("R11", "u64", is_activated("CONTEXT_INTEGER")), + ("R12", "u64", is_activated("CONTEXT_INTEGER")), + ("R13", "u64", is_activated("CONTEXT_INTEGER")), + ("R14", "u64", is_activated("CONTEXT_INTEGER")), + ("R15", "u64", is_activated("CONTEXT_INTEGER")), + ("Rip", "u64", is_activated("CONTEXT_CONTROL")), + + # Floating point + ("Header", "M128A", lambda ctx: 2), + ("Legacy", "M128A", lambda ctx: 8), + ("Xmm0", "M128A"), + ("Xmm1", "M128A"), + ("Xmm2", "M128A"), + ("Xmm3", "M128A"), + ("Xmm4", "M128A"), + ("Xmm5", "M128A"), + ("Xmm6", "M128A"), + ("Xmm7", "M128A"), + ("Xmm8", "M128A"), + ("Xmm9", "M128A"), + ("Xmm10", "M128A"), + ("Xmm11", "M128A"), + ("Xmm12", "M128A"), + ("Xmm13", "M128A"), + ("Xmm14", "M128A"), + ("Xmm15", "M128A"), + + + # Vector registers + ("VectorRegister", "M128A", lambda ctx: 16), + ("VectorControl", "u64"), + + # Special debug control regs + ("DebugControl", "u64"), + ("LastBranchToRip", "u64"), + ("LastBranchFromRip", "u64"), + ("LastExceptionToRip", "u64"), + ("LastExceptionFromRip", "u64"), + ] + +processorArchitecture = Enumeration({ + "PROCESSOR_ARCHITECTURE_X86" : 0, + "PROCESSOR_ARCHITECTURE_MIPS" : 1, + "PROCESSOR_ARCHITECTURE_ALPHA" : 2, + "PROCESSOR_ARCHITECTURE_PPC" : 3, + "PROCESSOR_ARCHITECTURE_SHX" : 4, + "PROCESSOR_ARCHITECTURE_ARM" : 5, + "PROCESSOR_ARCHITECTURE_IA64" : 6, + "PROCESSOR_ARCHITECTURE_ALPHA64" : 7, + "PROCESSOR_ARCHITECTURE_MSIL" : 8, + "PROCESSOR_ARCHITECTURE_AMD64" : 9, + "PROCESSOR_ARCHITECTURE_X86_WIN64" : 10, + "PROCESSOR_ARCHITECTURE_UNKNOWN" : 0xffff, +}) + +class Thread(CStruct): + """MINIDUMP_THREAD + https://msdn.microsoft.com/en-us/library/ms680517(v=vs.85).aspx + """ + + arch2context_cls = { + processorArchitecture.PROCESSOR_ARCHITECTURE_X86: Context_x86, + processorArchitecture.PROCESSOR_ARCHITECTURE_AMD64: Context_AMD64, + } + + def parse_context(self, content, offset): + loc_desc = LocationDescriptor.unpack(content, offset, self.parent_head) + + # Use the correct context depending on architecture + systeminfo = self.parent_head.systeminfo + context_cls = self.arch2context_cls.get(systeminfo.ProcessorArchitecture, + None) + if context_cls is None: + raise ValueError("Unsupported architecture: %s" % systeminfo.pretty_processor_architecture) + + ctxt = context_cls.unpack(content, loc_desc.Rva.rva, self.parent_head) + fake_loc_descriptor = LocationDescriptor(DataSize=0, Rva=Rva(rva=0)) + return ctxt, offset + len(fake_loc_descriptor) + + _fields = [("ThreadId", "u32"), + ("SuspendCount", "u32"), + ("PriorityClass", "u32"), + ("Priority", "u32"), + ("Teb", "u64"), + ("Stack", "MemoryDescriptor"), + ("ThreadContext", (parse_context, + lambda thread, value: NotImplemented)), + ] + +class ThreadList(CStruct): + """MINIDUMP_THREAD_LIST + https://msdn.microsoft.com/en-us/library/ms680515(v=vs.85).aspx + """ + _fields = [("NumberOfThreads", "u32"), + ("Threads", "Thread", + lambda mlist: mlist.NumberOfThreads), + ] + + +class SystemInfo(CStruct): + """MINIDUMP_SYSTEM_INFO + https://msdn.microsoft.com/en-us/library/ms680396(v=vs.85).aspx + """ + _fields = [("ProcessorArchitecture", "u16"), + ("ProcessorLevel", "u16"), + ("ProcessorRevision", "u16"), + ("NumberOfProcessors", "u08"), + ("ProductType", "u08"), + ("MajorVersion", "u32"), + ("MinorVersion", "u32"), + ("BuildNumber", "u32"), + ("PlatformId", "u32"), + ("CSDVersionRva", "Rva"), + ("SuiteMask", "u16"), + ("Reserved2", "u16"), + ("VendorId", "u32", lambda sinfo: 3), + ("VersionInformation", "u32"), + ("FeatureInformation", "u32"), + ("AMDExtendedCpuFeatures", "u32"), + ] + + @property + def pretty_processor_architecture(self): + return processorArchitecture[self.ProcessorArchitecture] + diff --git a/miasm/loader/minidump_init.py b/miasm/loader/minidump_init.py new file mode 100644 index 00000000..dd37f730 --- /dev/null +++ b/miasm/loader/minidump_init.py @@ -0,0 +1,194 @@ +""" +High-level abstraction of Minidump file +""" +from builtins import range +import struct + +from miasm.loader.strpatchwork import StrPatchwork +from miasm.loader import minidump as mp + + +class MemorySegment(object): + """Stand for a segment in memory with additional information""" + + def __init__(self, offset, memory_desc, module=None, memory_info=None): + self.offset = offset + self.memory_desc = memory_desc + self.module = module + self.memory_info = memory_info + self.minidump = self.memory_desc.parent_head + + @property + def address(self): + return self.memory_desc.StartOfMemoryRange + + @property + def size(self): + if isinstance(self.memory_desc, mp.MemoryDescriptor64): + return self.memory_desc.DataSize + elif isinstance(self.memory_desc, mp.MemoryDescriptor): + return self.memory_desc.Memory.DataSize + raise TypeError + + @property + def name(self): + if not self.module: + return "" + name = mp.MinidumpString.unpack(self.minidump._content, + self.module.ModuleNameRva.rva, + self.minidump) + return b"".join( + struct.pack("B", x) for x in name.Buffer + ).decode("utf-16") + + @property + def content(self): + return self.minidump._content[self.offset:self.offset + self.size] + + @property + def protect(self): + if self.memory_info: + return self.memory_info.Protect + return None + + @property + def pretty_protect(self): + if self.protect is None: + return "UNKNOWN" + return mp.memProtect[self.protect] + + +class Minidump(object): + """Stand for a Minidump file + + Here is a few limitation: + - only < 4GB Minidump are supported (LocationDescriptor handling) + - only Stream relative to memory mapping are implemented + + Official description is available on MSDN: + https://msdn.microsoft.com/en-us/library/ms680378(VS.85).aspx + """ + + _sex = 0 + _wsize = 32 + + def __init__(self, minidump_str): + self._content = StrPatchwork(minidump_str) + + # Specific streams + self.modulelist = None + self.memory64list = None + self.memorylist = None + self.memoryinfolist = None + self.systeminfo = None + + # Get information + self.streams = [] + self.threads = None + self.parse_content() + + # Memory information + self.memory = {} # base address (virtual) -> Memory information + self.build_memory() + + def parse_content(self): + """Build structures corresponding to current content""" + + # Header + offset = 0 + self.minidumpHDR = mp.MinidumpHDR.unpack(self._content, offset, self) + assert self.minidumpHDR.Magic == 0x504d444d + + # Streams + base_offset = self.minidumpHDR.StreamDirectoryRva.rva + empty_stream = mp.StreamDirectory( + StreamType=0, + Location=mp.LocationDescriptor( + DataSize=0, + Rva=mp.Rva(rva=0) + ) + ) + streamdir_size = len(empty_stream) + for i in range(self.minidumpHDR.NumberOfStreams): + stream_offset = base_offset + i * streamdir_size + stream = mp.StreamDirectory.unpack(self._content, stream_offset, self) + self.streams.append(stream) + + # Launch specific action depending on the stream + datasize = stream.Location.DataSize + offset = stream.Location.Rva.rva + if stream.StreamType == mp.streamType.ModuleListStream: + self.modulelist = mp.ModuleList.unpack(self._content, offset, self) + elif stream.StreamType == mp.streamType.MemoryListStream: + self.memorylist = mp.MemoryList.unpack(self._content, offset, self) + elif stream.StreamType == mp.streamType.Memory64ListStream: + self.memory64list = mp.Memory64List.unpack(self._content, offset, self) + elif stream.StreamType == mp.streamType.MemoryInfoListStream: + self.memoryinfolist = mp.MemoryInfoList.unpack(self._content, offset, self) + elif stream.StreamType == mp.streamType.SystemInfoStream: + self.systeminfo = mp.SystemInfo.unpack(self._content, offset, self) + + # Some streams need the SystemInfo stream to work + for stream in self.streams: + datasize = stream.Location.DataSize + offset = stream.Location.Rva.rva + if (self.systeminfo is not None and + stream.StreamType == mp.streamType.ThreadListStream): + self.threads = mp.ThreadList.unpack(self._content, offset, self) + + + def build_memory(self): + """Build an easier to use memory view based on ModuleList and + Memory64List streams""" + + addr2module = dict((module.BaseOfImage, module) + for module in (self.modulelist.Modules if + self.modulelist else [])) + addr2meminfo = dict((memory.BaseAddress, memory) + for memory in (self.memoryinfolist.MemoryInfos if + self.memoryinfolist else [])) + + mode64 = self.minidumpHDR.Flags & mp.minidumpType.MiniDumpWithFullMemory + + if mode64: + offset = self.memory64list.BaseRva + memranges = self.memory64list.MemoryRanges + else: + memranges = self.memorylist.MemoryRanges + + for memory in memranges: + if not mode64: + offset = memory.Memory.Rva.rva + + # Create a MemorySegment with augmented information + base_address = memory.StartOfMemoryRange + module = addr2module.get(base_address, None) + meminfo = addr2meminfo.get(base_address, None) + self.memory[base_address] = MemorySegment(offset, memory, + module, meminfo) + + if mode64: + offset += memory.DataSize + + # Sanity check + if mode64: + assert all(addr in self.memory for addr in addr2module) + + def get(self, virt_start, virt_stop): + """Return the content at the (virtual addresses) + [virt_start:virt_stop]""" + + # Find the corresponding memory segment + for addr in self.memory: + if virt_start <= addr <= virt_stop: + break + else: + return b"" + + memory = self.memory[addr] + shift = addr - virt_start + last = virt_stop - addr + if last > memory.size: + raise RuntimeError("Multi-page not implemented") + + return self._content[memory.offset + shift:memory.offset + last] diff --git a/miasm/loader/new_cstruct.py b/miasm/loader/new_cstruct.py new file mode 100644 index 00000000..ec591aa8 --- /dev/null +++ b/miasm/loader/new_cstruct.py @@ -0,0 +1,265 @@ +#! /usr/bin/env python + +from __future__ import print_function +import re +import struct + +from future.utils import PY3, viewitems, with_metaclass + +type2realtype = {} +size2type = {} +size2type_s = {} + +for t in 'B', 'H', 'I', 'Q': + s = struct.calcsize(t) + type2realtype[t] = s * 8 + size2type[s * 8] = t + +for t in 'b', 'h', 'i', 'q': + s = struct.calcsize(t) + type2realtype[t] = s * 8 + size2type_s[s * 8] = t + +type2realtype['u08'] = size2type[8] +type2realtype['u16'] = size2type[16] +type2realtype['u32'] = size2type[32] +type2realtype['u64'] = size2type[64] + +type2realtype['s08'] = size2type_s[8] +type2realtype['s16'] = size2type_s[16] +type2realtype['s32'] = size2type_s[32] +type2realtype['s64'] = size2type_s[64] + +type2realtype['d'] = 'd' +type2realtype['f'] = 'f' +type2realtype['q'] = 'q' +type2realtype['ptr'] = 'ptr' + +sex_types = {0: '<', 1: '>'} + + +def fix_size(fields, wsize): + out = [] + for name, v in fields: + if v.endswith("s"): + pass + elif v == "ptr": + v = size2type[wsize] + elif not v in type2realtype: + raise ValueError("unknown Cstruct type", v) + else: + v = type2realtype[v] + out.append((name, v)) + fields = out + return fields + + +def real_fmt(fmt, wsize): + if fmt == "ptr": + v = size2type[wsize] + elif fmt in type2realtype: + v = type2realtype[fmt] + else: + v = fmt + return v + +all_cstructs = {} + + +class Cstruct_Metaclass(type): + field_suffix = "_value" + + def __new__(cls, name, bases, dct): + for fields in dct['_fields']: + fname = fields[0] + if fname in ['parent', 'parent_head']: + raise ValueError('field name will confuse internal structs', + repr(fname)) + dct[fname] = property(dct.pop("get_" + fname, + lambda self, fname=fname: getattr( + self, fname + self.__class__.field_suffix)), + dct.pop("set_" + fname, + lambda self, v, fname=fname: setattr( + self, fname + self.__class__.field_suffix, v)), + dct.pop("del_" + fname, None)) + + o = super(Cstruct_Metaclass, cls).__new__(cls, name, bases, dct) + if name != "CStruct": + all_cstructs[name] = o + return o + + def unpack_l(cls, s, off=0, parent_head=None, _sex=None, _wsize=None): + if _sex is None and _wsize is None: + # get sex and size from parent + if parent_head is not None: + _sex = parent_head._sex + _wsize = parent_head._wsize + else: + _sex = 0 + _wsize = 32 + c = cls(_sex=_sex, _wsize=_wsize) + if parent_head is None: + parent_head = c + c.parent_head = parent_head + + of1 = off + for field in c._fields: + cpt = None + if len(field) == 2: + fname, ffmt = field + elif len(field) == 3: + fname, ffmt, cpt = field + if ffmt in type2realtype or (isinstance(ffmt, str) and re.match(r'\d+s', ffmt)): + # basic types + if cpt: + value = [] + i = 0 + while i < cpt(c): + fmt = real_fmt(ffmt, _wsize) + of2 = of1 + struct.calcsize(fmt) + value.append(struct.unpack(c.sex + fmt, s[of1:of2])[0]) + of1 = of2 + i += 1 + else: + fmt = real_fmt(ffmt, _wsize) + of2 = of1 + struct.calcsize(fmt) + if not (0 <= of1 < len(s) and 0 <= of2 < len(s)): + raise RuntimeError("not enough data") + value = struct.unpack(c.sex + fmt, s[of1:of2])[0] + elif ffmt == "sz": # null terminated special case + of2 = s.find(b'\x00', of1) + if of2 == -1: + raise ValueError('no null char in string!') + of2 += 1 + value = s[of1:of2 - 1] + elif ffmt in all_cstructs: + of2 = of1 + # sub structures + if cpt: + value = [] + i = 0 + while i < cpt(c): + v, l = all_cstructs[ffmt].unpack_l( + s, of1, parent_head, _sex, _wsize) + v.parent = c + value.append(v) + of2 = of1 + l + of1 = of2 + i += 1 + else: + value, l = all_cstructs[ffmt].unpack_l( + s, of1, parent_head, _sex, _wsize) + value.parent = c + of2 = of1 + l + elif isinstance(ffmt, tuple): + f_get, f_set = ffmt + value, of2 = f_get(c, s, of1) + else: + raise ValueError('unknown class', ffmt) + of1 = of2 + setattr(c, fname + c.__class__.field_suffix, value) + + return c, of2 - off + + def unpack(cls, s, off=0, parent_head=None, _sex=None, _wsize=None): + c, l = cls.unpack_l(s, off=off, + parent_head=parent_head, _sex=_sex, _wsize=_wsize) + return c + + +class CStruct(with_metaclass(Cstruct_Metaclass, object)): + _packformat = "" + _fields = [] + + def __init__(self, parent_head=None, _sex=None, _wsize=None, **kargs): + self.parent_head = parent_head + self._size = None + kargs = dict(kargs) + # if not sex or size: get the one of the parent + if _sex == None and _wsize == None: + if parent_head: + _sex = parent_head._sex + _wsize = parent_head._wsize + else: + # else default sex & size + _sex = 0 + _wsize = 32 + # _sex is 0 or 1, sex is '<' or '>' + self._sex = _sex + self._wsize = _wsize + if self._packformat: + self.sex = self._packformat + else: + self.sex = sex_types[_sex] + for f in self._fields: + setattr(self, f[0] + self.__class__.field_suffix, None) + if kargs: + for k, v in viewitems(kargs): + self.__dict__[k + self.__class__.field_suffix] = v + + def pack(self): + out = b'' + for field in self._fields: + cpt = None + if len(field) == 2: + fname, ffmt = field + elif len(field) == 3: + fname, ffmt, cpt = field + + value = getattr(self, fname + self.__class__.field_suffix) + if ffmt in type2realtype or (isinstance(ffmt, str) and re.match(r'\d+s', ffmt)): + # basic types + fmt = real_fmt(ffmt, self._wsize) + if cpt == None: + if value == None: + o = struct.calcsize(fmt) * b"\x00" + else: + if isinstance(value, str): + value = value.encode() + o = struct.pack(self.sex + fmt, value) + else: + o = b"" + for v in value: + if value == None: + o += struct.calcsize(fmt) * b"\x00" + else: + o += struct.pack(self.sex + fmt, v) + + elif ffmt == "sz": # null terminated special case + o = value + b'\x00' + elif ffmt in all_cstructs: + # sub structures + if cpt == None: + o = bytes(value) + else: + o = b"" + for v in value: + o += bytes(v) + elif isinstance(ffmt, tuple): + f_get, f_set = ffmt + o = f_set(self, value) + + else: + raise ValueError('unknown class', ffmt) + out += o + + return out + + def __bytes__(self): + return self.pack() + + def __str__(self): + if PY3: + return repr(self) + return self.__bytes__() + + def __len__(self): + return len(self.pack()) + + def __repr__(self): + return "<%s=%s>" % (self.__class__.__name__, "/".join( + repr(getattr(self, x[0])) for x in self._fields) + ) + + def __getitem__(self, item): # to work with format strings + return getattr(self, item) diff --git a/miasm/loader/pe.py b/miasm/loader/pe.py new file mode 100644 index 00000000..96009ccf --- /dev/null +++ b/miasm/loader/pe.py @@ -0,0 +1,1668 @@ +#! /usr/bin/env python + +from __future__ import print_function +from builtins import range, str +from collections import defaultdict +import logging +import struct + +from future.builtins import int as int_types +from future.utils import PY3 + +from miasm.core.utils import force_bytes +from miasm.loader.new_cstruct import CStruct +from miasm.loader.strpatchwork import StrPatchwork + +log = logging.getLogger("pepy") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.WARN) + + +class InvalidOffset(Exception): + pass + + +class Doshdr(CStruct): + _fields = [("magic", "u16"), + ("cblp", "u16"), + ("cp", "u16"), + ("crlc", "u16"), + ("cparhdr", "u16"), + ("minalloc", "u16"), + ("maxalloc", "u16"), + ("ss", "u16"), + ("sp", "u16"), + ("csum", "u16"), + ("ip", "u16"), + ("cs", "u16"), + ("lfarlc", "u16"), + ("ovno", "u16"), + ("res", "8s"), + ("oemid", "u16"), + ("oeminfo", "u16"), + ("res2", "20s"), + ("lfanew", "u32")] + + +class NTsig(CStruct): + _fields = [("signature", "u32"), + ] + + +class Coffhdr(CStruct): + _fields = [("machine", "u16"), + ("numberofsections", "u16"), + ("timedatestamp", "u32"), + ("pointertosymboltable", "u32"), + ("numberofsymbols", "u32"), + ("sizeofoptionalheader", "u16"), + ("characteristics", "u16")] + + +class Optehdr(CStruct): + _fields = [("rva", "u32"), + ("size", "u32")] + + +def get_optehdr_num(nthdr): + numberofrva = nthdr.numberofrvaandsizes + parent = nthdr.parent_head + entry_size = 8 + if parent.Coffhdr.sizeofoptionalheader < numberofrva * entry_size + len(parent.Opthdr): + numberofrva = (parent.Coffhdr.sizeofoptionalheader - len(parent.Opthdr)) // entry_size + log.warn('Bad number of rva.. using default %d' % numberofrva) + numberofrva = 0x10 + return numberofrva + + +class Opthdr32(CStruct): + _fields = [("magic", "u16"), + ("majorlinkerversion", "u08"), + ("minorlinkerversion", "u08"), + ("SizeOfCode", "u32"), + ("sizeofinitializeddata", "u32"), + ("sizeofuninitializeddata", "u32"), + ("AddressOfEntryPoint", "u32"), + ("BaseOfCode", "u32"), + ("BaseOfData", "u32"), + ] + + +class Opthdr64(CStruct): + _fields = [("magic", "u16"), + ("majorlinkerversion", "u08"), + ("minorlinkerversion", "u08"), + ("SizeOfCode", "u32"), + ("sizeofinitializeddata", "u32"), + ("sizeofuninitializeddata", "u32"), + ("AddressOfEntryPoint", "u32"), + ("BaseOfCode", "u32"), + ] + + +class NThdr(CStruct): + _fields = [("ImageBase", "ptr"), + ("sectionalignment", "u32"), + ("filealignment", "u32"), + ("majoroperatingsystemversion", "u16"), + ("minoroperatingsystemversion", "u16"), + ("MajorImageVersion", "u16"), + ("MinorImageVersion", "u16"), + ("majorsubsystemversion", "u16"), + ("minorsubsystemversion", "u16"), + ("Reserved1", "u32"), + ("sizeofimage", "u32"), + ("sizeofheaders", "u32"), + ("CheckSum", "u32"), + ("subsystem", "u16"), + ("dllcharacteristics", "u16"), + ("sizeofstackreserve", "ptr"), + ("sizeofstackcommit", "ptr"), + ("sizeofheapreserve", "ptr"), + ("sizeofheapcommit", "ptr"), + ("loaderflags", "u32"), + ("numberofrvaandsizes", "u32"), + ("optentries", "Optehdr", lambda c:get_optehdr_num(c)) + ] + + +class Shdr(CStruct): + _fields = [("name", "8s"), + ("size", "u32"), + ("addr", "u32"), + ("rawsize", "u32"), + ("offset", "u32"), + ("pointertorelocations", "u32"), + ("pointertolinenumbers", "u32"), + ("numberofrelocations", "u16"), + ("numberoflinenumbers", "u16"), + ("flags", "u32")] + + + def get_data(self): + parent = self.parent_head + data = parent.img_rva[self.addr:self.addr + self.size] + return data + + def set_data(self, data): + parent = self.parent_head + parent.img_rva[self.addr] = data + + + data = property(get_data, set_data) + +class SHList(CStruct): + _fields = [ + ("shlist", "Shdr", lambda c:c.parent_head.Coffhdr.numberofsections)] + + def add_section(self, name="default", data=b"", **args): + s_align = self.parent_head.NThdr.sectionalignment + s_align = max(0x1000, s_align) + + f_align = self.parent_head.NThdr.filealignment + f_align = max(0x200, f_align) + size = len(data) + rawsize = len(data) + if len(self): + addr = self[-1].addr + self[-1].size + s_last = self[0] + for section in self: + if s_last.offset + s_last.rawsize < section.offset + section.rawsize: + s_last = section + offset = s_last.offset + s_last.rawsize + else: + s_null = bytes(Shdr.unpack(b"\x00" * 0x100)) + offset = self.parent_head.Doshdr.lfanew + len(self.parent_head.NTsig) + len( + self.parent_head.Coffhdr) + self.parent_head.Coffhdr.sizeofoptionalheader + len(bytes(self.parent_head.SHList) + s_null) + addr = 0x2000 + # round addr + addr = (addr + (s_align - 1)) & ~(s_align - 1) + offset = (offset + (f_align - 1)) & ~(f_align - 1) + + attrs = {"name": name, "size": size, + "addr": addr, "rawsize": rawsize, + "offset": offset, + "pointertorelocations": 0, + "pointertolinenumbers": 0, + "numberofrelocations": 0, + "numberoflinenumbers": 0, + "flags": 0xE0000020, + "data": data + } + attrs.update(args) + section = Shdr(self.parent_head, _sex=self.parent_head._sex, + _wsize=self.parent_head._wsize, **attrs) + section.data = data + + if section.rawsize > len(data): + section.data = section.data + b'\x00' * (section.rawsize - len(data)) + section.size = section.rawsize + section.data = bytes(StrPatchwork(section.data)) + section.size = max(s_align, section.size) + + self.append(section) + self.parent_head.Coffhdr.numberofsections = len(self) + + length = (section.addr + section.size + (s_align - 1)) & ~(s_align - 1) + self.parent_head.NThdr.sizeofimage = length + return section + + def align_sections(self, f_align=None, s_align=None): + if f_align == None: + f_align = self.parent_head.NThdr.filealignment + f_align = max(0x200, f_align) + if s_align == None: + s_align = self.parent_head.NThdr.sectionalignment + s_align = max(0x1000, s_align) + + if self is None: + return + + addr = self[0].offset + for section in self: + raw_off = f_align * ((addr + f_align - 1) // f_align) + section.offset = raw_off + section.rawsize = len(section.data) + addr = raw_off + section.rawsize + + def __repr__(self): + rep = ["# section offset size addr flags rawsize "] + for i, section in enumerate(self): + name = force_bytes(section.name) + out = "%-15s" % name.strip(b'\x00').decode() + out += "%(offset)08x %(size)06x %(addr)08x %(flags)08x %(rawsize)08x" % section + out = ("%2i " % i) + out + rep.append(out) + return "\n".join(rep) + + def __getitem__(self, item): + return self.shlist[item] + + def __len__(self): + return len(self.shlist) + + def append(self, section): + self.shlist.append(section) + + +class Rva(CStruct): + _fields = [("rva", "ptr"), + ] + + +class Rva32(CStruct): + _fields = [("rva", "u32"), + ] + + +class DescName(CStruct): + _fields = [("name", (lambda c, raw, off: c.gets(raw, off), + lambda c, value: c.sets(value))) + ] + + def gets(self, raw, off): + name = raw[off:raw.find(b'\x00', off)] + return name, off + len(name) + 1 + + def sets(self, value): + return bytes(value) + b"\x00" + + +class ImportByName(CStruct): + _fields = [("hint", "u16"), + ("name", "sz") + ] + + +class ImpDesc_e(CStruct): + _fields = [("originalfirstthunk", "u32"), + ("timestamp", "u32"), + ("forwarderchain", "u32"), + ("name", "u32"), + ("firstthunk", "u32") + ] + + +class struct_array(object): + + def __init__(self, target_class, raw, off, cstr, num=None): + self.l = [] + self.cls = target_class + self.end = None + i = 0 + if not raw: + return + + while (num == None) or (num and i < num): + entry, length = cstr.unpack_l(raw, off, + target_class.parent_head, + target_class.parent_head._sex, + target_class.parent_head._wsize) + if num == None: + if raw[off:off + length] == b'\x00' * length: + self.end = b'\x00' * length + break + self.l.append(entry) + off += length + i += 1 + + def __bytes__(self): + out = b"".join(bytes(x) for x in self.l) + if self.end is not None: + out += self.end + return out + + def __str__(self): + if PY3: + return repr(self) + return self.__bytes__() + + def __getitem__(self, item): + return self.l.__getitem__(item) + + def __len__(self): + return len(self.l) + + def append(self, entry): + self.l.append(entry) + + def insert(self, index, entry): + self.l.insert(index, entry) + + +class DirImport(CStruct): + _fields = [("impdesc", (lambda c, raw, off:c.gete(raw, off), + lambda c, value:c.sete(value)))] + + def gete(self, raw, off): + if not off: + return None, off + if self.parent_head._wsize == 32: + mask_ptr = 0x80000000 + elif self.parent_head._wsize == 64: + mask_ptr = 0x8000000000000000 + + ofend = off + \ + self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_IMPORT].size + out = [] + while off < ofend: + if not 0 <= off < len(self.parent_head.img_rva): + break + imp, length = ImpDesc_e.unpack_l(raw, off) + if (raw[off:off+length] == b'\x00' * length or + imp.name == 0): + # Special case + break + if not (imp.originalfirstthunk or imp.firstthunk): + log.warning("no thunk!!") + break + + out.append(imp) + off += length + imp.dlldescname = DescName.unpack(raw, imp.name, self.parent_head) + if imp.originalfirstthunk and imp.originalfirstthunk < len(self.parent_head.img_rva): + imp.originalfirstthunks = struct_array(self, raw, + imp.originalfirstthunk, + Rva) + else: + imp.originalfirstthunks = None + + if imp.firstthunk and imp.firstthunk < len(self.parent_head.img_rva): + imp.firstthunks = struct_array(self, raw, + imp.firstthunk, + Rva) + else: + imp.firstthunks = None + imp.impbynames = [] + if imp.originalfirstthunk and imp.originalfirstthunk < len(self.parent_head.img_rva): + tmp_thunk = imp.originalfirstthunks + elif imp.firstthunk: + tmp_thunk = imp.firstthunks + for i in range(len(tmp_thunk)): + if tmp_thunk[i].rva & mask_ptr == 0: + try: + entry = ImportByName.unpack(raw, + tmp_thunk[i].rva, + self.parent_head) + except: + log.warning( + 'cannot import from add %s' % tmp_thunk[i].rva + ) + entry = 0 + imp.impbynames.append(entry) + else: + imp.impbynames.append(tmp_thunk[i].rva & (mask_ptr - 1)) + return out, off + + def sete(self, entries): + return b"".join(bytes(entry) for entry in entries) + b"\x00" * (4 * 5) + + def __len__(self): + length = (len(self.impdesc) + 1) * (5 * 4) # ImpDesc_e size + rva_size = self.parent_head._wsize // 8 + for entry in self.impdesc: + length += len(entry.dlldescname) + if entry.originalfirstthunk and self.parent_head.rva2off(entry.originalfirstthunk): + length += (len(entry.originalfirstthunks) + 1) * rva_size + if entry.firstthunk: + length += (len(entry.firstthunks) + 1) * rva_size + for imp in entry.impbynames: + if isinstance(imp, ImportByName): + length += len(imp) + return length + + def set_rva(self, rva, size=None): + self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_IMPORT].rva = rva + rva_size = self.parent_head._wsize // 8 + if not size: + self.parent_head.NThdr.optentries[ + DIRECTORY_ENTRY_IMPORT].size = len(self) + else: + self.parent_head.NThdr.optentries[ + DIRECTORY_ENTRY_IMPORT].size = size + rva += (len(self.impdesc) + 1) * 5 * 4 # ImpDesc size + for entry in self.impdesc: + entry.name = rva + rva += len(entry.dlldescname) + if entry.originalfirstthunk: # and self.parent_head.rva2off(entry.originalfirstthunk): + entry.originalfirstthunk = rva + rva += (len(entry.originalfirstthunks) + 1) * rva_size + # XXX rva fthunk not patched => keep original func addr + # if entry.firstthunk: + # entry.firstthunk = rva + # rva+=(len(entry.firstthunks)+1)*self.parent_head._wsize//8 # Rva size + if entry.originalfirstthunk and entry.firstthunk: + if isinstance(entry.originalfirstthunk, struct_array): + tmp_thunk = entry.originalfirstthunks + elif isinstance(entry.firstthunks, struct_array): + tmp_thunk = entry.firstthunks + else: + raise RuntimeError("No thunk!") + elif entry.originalfirstthunk: # and self.parent_head.rva2off(entry.originalfirstthunk): + tmp_thunk = entry.originalfirstthunks + elif entry.firstthunk: + tmp_thunk = entry.firstthunks + else: + raise RuntimeError("No thunk!") + + if tmp_thunk == entry.originalfirstthunks: + entry.firstthunks = tmp_thunk + else: + entry.originalfirstthunks = tmp_thunk + for i, imp in enumerate(entry.impbynames): + if isinstance(imp, ImportByName): + tmp_thunk[i].rva = rva + rva += len(imp) + + def build_content(self, raw): + dirimp = self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_IMPORT] + of1 = dirimp.rva + if not of1: # No Import + return + raw[self.parent_head.rva2off(of1)] = bytes(self) + for entry in self.impdesc: + raw[self.parent_head.rva2off(entry.name)] = bytes(entry.dlldescname) + if (entry.originalfirstthunk and + self.parent_head.rva2off(entry.originalfirstthunk)): + # Add thunks list and terminating null entry + off = self.parent_head.rva2off(entry.originalfirstthunk) + raw[off] = bytes(entry.originalfirstthunks) + if entry.firstthunk: + # Add thunks list and terminating null entry + off = self.parent_head.rva2off(entry.firstthunk) + raw[off] = bytes(entry.firstthunks) + if (entry.originalfirstthunk and + self.parent_head.rva2off(entry.originalfirstthunk)): + tmp_thunk = entry.originalfirstthunks + elif entry.firstthunk: + tmp_thunk = entry.firstthunks + else: + raise RuntimeError("No thunk!") + for j, imp in enumerate(entry.impbynames): + if isinstance(imp, ImportByName): + raw[self.parent_head.rva2off(tmp_thunk[j].rva)] = bytes(imp) + + def get_dlldesc(self): + out = [] + for impdesc in self.impdesc: + dllname = impdesc.dlldescname.name + funcs = [] + for imp in impdesc.impbynames: + if isinstance(imp, ImportByName): + funcs.append(imp.name) + else: + funcs.append(imp) + entry = ({"name": dllname, "firstthunk": impdesc.firstthunk}, funcs) + out.append(entry) + return out + + def __repr__(self): + rep = ["<%s>" % self.__class__.__name__] + for i, entry in enumerate(self.impdesc): + out = "%2d %-25s %s" % (i, repr(entry.dlldescname), repr(entry)) + rep.append(out) + for index, imp in enumerate(entry.impbynames): + out = " %2d %-16s" % (index, repr(imp)) + rep.append(out) + return "\n".join(rep) + + def add_dlldesc(self, new_dll): + rva_size = self.parent_head._wsize // 8 + if self.parent_head._wsize == 32: + mask_ptr = 0x80000000 + elif self.parent_head._wsize == 64: + mask_ptr = 0x8000000000000000 + new_impdesc = [] + of1 = None + for import_descriptor, new_functions in new_dll: + if isinstance(import_descriptor.get("name"), str): + import_descriptor["name"] = import_descriptor["name"].encode() + new_functions = [ + funcname.encode() if isinstance(funcname, str) else funcname + for funcname in new_functions + ] + for attr in ["timestamp", "forwarderchain", "originalfirstthunk"]: + if attr not in import_descriptor: + import_descriptor[attr] = 0 + entry = ImpDesc_e(self.parent_head, **import_descriptor) + if entry.firstthunk != None: + of1 = entry.firstthunk + elif of1 == None: + raise RuntimeError("set fthunk") + else: + entry.firstthunk = of1 + entry.dlldescname = DescName(self.parent_head, name=entry.name) + entry.originalfirstthunk = 0 + entry.originalfirstthunks = struct_array(self, None, + None, + Rva) + entry.firstthunks = struct_array(self, None, + None, + Rva) + + impbynames = [] + for new_function in new_functions: + rva_ofirstt = Rva(self.parent_head) + if isinstance(new_function, int_types): + rva_ofirstt.rva = mask_ptr + new_function + ibn = new_function + elif isinstance(new_function, bytes): + rva_ofirstt.rva = True + ibn = ImportByName(self.parent_head) + ibn.name = new_function + ibn.hint = 0 + else: + raise RuntimeError('unknown func type %s' % new_function) + impbynames.append(ibn) + entry.originalfirstthunks.append(rva_ofirstt) + rva_func = Rva(self.parent_head) + if isinstance(ibn, ImportByName): + rva_func.rva = 0xDEADBEEF # default func addr + else: + # ord ?XXX? + rva_func.rva = rva_ofirstt.rva + entry.firstthunks.append(rva_func) + of1 += rva_size + # for null thunk + of1 += rva_size + entry.impbynames = impbynames + new_impdesc.append(entry) + if self.impdesc is None: + self.impdesc = struct_array(self, None, + None, + ImpDesc_e) + self.impdesc.l = new_impdesc + else: + for entry in new_impdesc: + self.impdesc.append(entry) + + def get_funcrva(self, dllname, funcname): + dllname = force_bytes(dllname) + funcname = force_bytes(funcname) + + rva_size = self.parent_head._wsize // 8 + if self.parent_head._wsize == 32: + mask_ptr = 0x80000000 - 1 + elif self.parent_head._wsize == 64: + mask_ptr = 0x8000000000000000 - 1 + + for entry in self.impdesc: + if entry.dlldescname.name.lower() != dllname.lower(): + continue + if entry.originalfirstthunk and self.parent_head.rva2off(entry.originalfirstthunk): + tmp_thunk = entry.originalfirstthunks + elif entry.firstthunk: + tmp_thunk = entry.firstthunks + else: + raise RuntimeError("No thunk!") + if isinstance(funcname, bytes): + for j, imp in enumerate(entry.impbynames): + if isinstance(imp, ImportByName): + if funcname == imp.name: + return entry.firstthunk + j * rva_size + elif isinstance(funcname, int_types): + for j, imp in enumerate(entry.impbynames): + if not isinstance(imp, ImportByName): + if tmp_thunk[j].rva & mask_ptr == funcname: + return entry.firstthunk + j * rva_size + else: + raise ValueError('Unknown: %s %s' % (dllname, funcname)) + + def get_funcvirt(self, dllname, funcname): + rva = self.get_funcrva(dllname, funcname) + if rva == None: + return + return self.parent_head.rva2virt(rva) + + +class ExpDesc_e(CStruct): + _fields = [("characteristics", "u32"), + ("timestamp", "u32"), + ("majorv", "u16"), + ("minorv", "u16"), + ("name", "u32"), + ("base", "u32"), + ("numberoffunctions", "u32"), + ("numberofnames", "u32"), + ("addressoffunctions", "u32"), + ("addressofnames", "u32"), + ("addressofordinals", "u32"), + ] + + +class DirExport(CStruct): + _fields = [("expdesc", (lambda c, raw, off:c.gete(raw, off), + lambda c, value:c.sete(value)))] + + def gete(self, raw, off): + off_o = off + if not off: + return None, off + off_sav = off + if off >= len(raw): + log.warn("export dir malformed!") + return None, off_o + expdesc = ExpDesc_e.unpack(raw, + off, + self.parent_head) + if self.parent_head.rva2off(expdesc.addressoffunctions) == None or \ + self.parent_head.rva2off(expdesc.addressofnames) == None or \ + self.parent_head.rva2off(expdesc.addressofordinals) == None: + log.warn("export dir malformed!") + return None, off_o + self.dlldescname = DescName.unpack(raw, expdesc.name, self.parent_head) + try: + self.f_address = struct_array(self, raw, + expdesc.addressoffunctions, + Rva32, expdesc.numberoffunctions) + self.f_names = struct_array(self, raw, + expdesc.addressofnames, + Rva32, expdesc.numberofnames) + self.f_nameordinals = struct_array(self, raw, + expdesc.addressofordinals, + Ordinal, expdesc.numberofnames) + except RuntimeError: + log.warn("export dir malformed!") + return None, off_o + for func in self.f_names: + func.name = DescName.unpack(raw, func.rva, self.parent_head) + return expdesc, off_sav + + def sete(self, _): + return bytes(self.expdesc) + + def build_content(self, raw): + direxp = self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_EXPORT] + of1 = direxp.rva + if self.expdesc is None: # No Export + return + raw[self.parent_head.rva2off(of1)] = bytes(self.expdesc) + raw[self.parent_head.rva2off(self.expdesc.name)] = bytes(self.dlldescname) + raw[self.parent_head.rva2off(self.expdesc.addressoffunctions)] = bytes(self.f_address) + if self.expdesc.addressofnames != 0: + raw[self.parent_head.rva2off(self.expdesc.addressofnames)] = bytes(self.f_names) + if self.expdesc.addressofordinals != 0: + raw[self.parent_head.rva2off(self.expdesc.addressofordinals)] = bytes(self.f_nameordinals) + for func in self.f_names: + raw[self.parent_head.rva2off(func.rva)] = bytes(func.name) + + # XXX BUG names must be alphanumeric ordered + names = [func.name for func in self.f_names] + names_ = names[:] + if names != names_: + log.warn("unsorted export names, may bug") + + def set_rva(self, rva, size=None): + rva_size = self.parent_head._wsize // 8 + if self.expdesc is None: + return + self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_EXPORT].rva = rva + if not size: + self.parent_head.NThdr.optentries[ + DIRECTORY_ENTRY_EXPORT].size = len(self) + else: + self.parent_head.NThdr.optentries[ + DIRECTORY_ENTRY_EXPORT].size = size + rva += len(self.expdesc) + self.expdesc.name = rva + rva += len(self.dlldescname) + self.expdesc.addressoffunctions = rva + rva += len(self.f_address) * rva_size + self.expdesc.addressofnames = rva + rva += len(self.f_names) * rva_size + self.expdesc.addressofordinals = rva + rva += len(self.f_nameordinals) * 2 # Ordinal size + for func in self.f_names: + func.rva = rva + rva += len(func.name) + + def __len__(self): + rva_size = self.parent_head._wsize // 8 + length = 0 + if self.expdesc is None: + return length + length += len(self.expdesc) + length += len(self.dlldescname) + length += len(self.f_address) * rva_size + length += len(self.f_names) * rva_size + length += len(self.f_nameordinals) * 2 # Ordinal size + for entry in self.f_names: + length += len(entry.name) + return length + + def __repr__(self): + rep = ["<%s>" % self.__class__.__name__] + if self.expdesc is None: + return "\n".join(rep) + + rep = ["<%s %d (%s) %s>" % (self.__class__.__name__, + self.expdesc.numberoffunctions, self.dlldescname, repr(self.expdesc))] + tmp_names = [[] for _ in range(self.expdesc.numberoffunctions)] + for i, entry in enumerate(self.f_names): + tmp_names[self.f_nameordinals[i].ordinal].append(entry.name) + for i, entry in enumerate(self.f_address): + tmpn = [] + if not entry.rva: + continue + out = "%2d %.8X %s" % (i + self.expdesc.base, entry.rva, repr(tmp_names[i])) + rep.append(out) + return "\n".join(rep) + + def create(self, name='default.dll'): + self.expdesc = ExpDesc_e(self.parent_head) + for attr in ["characteristics", + "timestamp", + "majorv", + "minorv", + "name", + "base", + "numberoffunctions", + "numberofnames", + "addressoffunctions", + "addressofnames", + "addressofordinals", + ]: + setattr(self.expdesc, attr, 0) + + self.dlldescname = DescName(self.parent_head) + self.dlldescname.name = name + self.f_address = struct_array(self, None, + None, + Rva) + self.f_names = struct_array(self, None, + None, + Rva) + self.f_nameordinals = struct_array(self, None, + None, + Ordinal) + self.expdesc.base = 1 + + def add_name(self, name, rva=0xdeadc0fe): + if self.expdesc is None: + return + names = [func.name.name for func in self.f_names] + names_s = names[:] + names_s.sort() + if names_s != names: + log.warn('tab names was not sorted may bug') + names.append(name) + names.sort() + index = names.index(name) + descname = DescName(self.parent_head) + + descname.name = name + wname = Rva(self.parent_head) + + wname.name = descname + woffset = Rva(self.parent_head) + woffset.rva = rva + wordinal = Ordinal(self.parent_head) + # func is append to list + wordinal.ordinal = len(self.f_address) + self.f_address.append(woffset) + # self.f_names.insert(index, wname) + # self.f_nameordinals.insert(index, wordinal) + self.f_names.insert(index, wname) + self.f_nameordinals.insert(index, wordinal) + self.expdesc.numberofnames += 1 + self.expdesc.numberoffunctions += 1 + + def get_funcrva(self, f_str): + if self.expdesc is None: + return None + for i, entry in enumerate(self.f_names): + if f_str != entry.name.name: + continue + ordinal = self.f_nameordinals[i].ordinal + rva = self.f_address[ordinal].rva + return rva + return None + + def get_funcvirt(self, addr): + rva = self.get_funcrva(addr) + if rva == None: + return + return self.parent_head.rva2virt(rva) + + +class Delaydesc_e(CStruct): + _fields = [("attrs", "u32"), + ("name", "u32"), + ("hmod", "u32"), + ("firstthunk", "u32"), + ("originalfirstthunk", "u32"), + ("boundiat", "u32"), + ("unloadiat", "u32"), + ("timestamp", "u32"), + ] + + +class DirDelay(CStruct): + _fields = [("delaydesc", (lambda c, raw, off:c.gete(raw, off), + lambda c, value:c.sete(value)))] + + def gete(self, raw, off): + if not off: + return None, off + + ofend = off + \ + self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_DELAY_IMPORT].size + out = [] + while off < ofend: + if off >= len(raw): + log.warn('warning bad reloc offset') + break + + delaydesc, length = Delaydesc_e.unpack_l(raw, + off, + self.parent_head) + if raw[off:off+length] == b'\x00' * length: + # Special case + break + off += length + out.append(delaydesc) + + if self.parent_head._wsize == 32: + mask_ptr = 0x80000000 + elif self.parent_head._wsize == 64: + mask_ptr = 0x8000000000000000 + + parent = self.parent_head + for entry in out: + isfromva = (entry.attrs & 1) == 0 + if isfromva: + isfromva = lambda x: parent.virt2rva(x) + else: + isfromva = lambda x: x + entry.dlldescname = DescName.unpack(raw, isfromva(entry.name), + self.parent_head) + if entry.originalfirstthunk: + addr = isfromva(entry.originalfirstthunk) + if not 0 <= addr < len(raw): + log.warning("Bad delay") + break + entry.originalfirstthunks = struct_array(self, raw, + addr, + Rva) + else: + entry.originalfirstthunks + + if entry.firstthunk: + entry.firstthunks = struct_array(self, raw, + isfromva(entry.firstthunk), + Rva) + else: + entry.firstthunk = None + + entry.impbynames = [] + if entry.originalfirstthunk and self.parent_head.rva2off(isfromva(entry.originalfirstthunk)): + tmp_thunk = entry.originalfirstthunks + elif entry.firstthunk: + tmp_thunk = entry.firstthunks + else: + print(ValueError("no thunk in delay dir!! ")) + return + for i in range(len(tmp_thunk)): + if tmp_thunk[i].rva & mask_ptr == 0: + imp = ImportByName.unpack(raw, + isfromva(tmp_thunk[i].rva), + self.parent_head) + entry.impbynames.append(imp) + else: + entry.impbynames.append( + isfromva(tmp_thunk[i].rva & (mask_ptr - 1))) + # print(repr(entry[-1])) + # raise ValueError('XXX to check') + return out, off + + def sete(self, entries): + return "".join(bytes(entry) for entry in entries) + b"\x00" * (4 * 8) # DelayDesc_e + + def __len__(self): + rva_size = self.parent_head._wsize // 8 + length = (len(self.delaydesc) + 1) * (4 * 8) # DelayDesc_e + for entry in self.delaydesc: + length += len(entry.dlldescname) + if entry.originalfirstthunk and self.parent_head.rva2off(entry.originalfirstthunk): + length += (len(entry.originalfirstthunks) + 1) * rva_size + if entry.firstthunk: + length += (len(entry.firstthunks) + 1) * rva_size + for imp in entry.impbynames: + if isinstance(imp, ImportByName): + length += len(imp) + return length + + def set_rva(self, rva, size=None): + rva_size = self.parent_head._wsize // 8 + self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_DELAY_IMPORT].rva = rva + if not size: + self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_DELAY_IMPORT].size = len(self) + else: + self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_DELAY_IMPORT].size = size + rva += (len(self.delaydesc) + 1) * (4 * 8) # DelayDesc_e + parent = self.parent_head + for entry in self.delaydesc: + isfromva = (entry.attrs & 1) == 0 + if isfromva: + isfromva = lambda x: self.parent_head.rva2virt(x) + else: + isfromva = lambda x: x + + entry.name = isfromva(rva) + rva += len(entry.dlldescname) + if entry.originalfirstthunk: # and self.parent_head.rva2off(entry.originalfirstthunk): + entry.originalfirstthunk = isfromva(rva) + rva += (len(entry.originalfirstthunks) + 1) * rva_size + # XXX rva fthunk not patched => fun addr + # if entry.firstthunk: + # entry.firstthunk = rva + # rva+=(len(entry.firstthunks)+1)*pe.Rva._size + if entry.originalfirstthunk and self.parent_head.rva2off(entry.originalfirstthunk): + tmp_thunk = entry.originalfirstthunks + elif entry.firstthunk: + tmp_thunk = entry.firstthunks + else: + raise RuntimeError("No thunk!") + for i, imp in enumerate(entry.impbynames): + if isinstance(imp, ImportByName): + tmp_thunk[i].rva = isfromva(rva) + rva += len(imp) + + def build_content(self, raw): + if len(self.parent_head.NThdr.optentries) < DIRECTORY_ENTRY_DELAY_IMPORT: + return + dirdelay = self.parent_head.NThdr.optentries[ + DIRECTORY_ENTRY_DELAY_IMPORT] + of1 = dirdelay.rva + if not of1: # No Delay Import + return + raw[self.parent_head.rva2off(of1)] = bytes(self) + for entry in self.delaydesc: + raw[self.parent_head.rva2off(entry.name)] = bytes(entry.dlldescname) + if entry.originalfirstthunk and self.parent_head.rva2off(entry.originalfirstthunk): + raw[self.parent_head.rva2off(entry.originalfirstthunk)] = bytes(entry.originalfirstthunks) + if entry.firstthunk: + raw[self.parent_head.rva2off(entry.firstthunk)] = bytes(entry.firstthunks) + if entry.originalfirstthunk and self.parent_head.rva2off(entry.originalfirstthunk): + tmp_thunk = entry.originalfirstthunks + elif entry.firstthunk: + tmp_thunk = entry.firstthunks + else: + raise RuntimeError("No thunk!") + for j, imp in enumerate(entry.impbynames): + if isinstance(imp, ImportByName): + raw[self.parent_head.rva2off(tmp_thunk[j].rva)] = bytes(imp) + + def __repr__(self): + rep = ["<%s>" % self.__class__.__name__] + for i, entry in enumerate(self.delaydesc): + out = "%2d %-25s %s" % (i, repr(entry.dlldescname), repr(entry)) + rep.append(out) + for index, func in enumerate(entry.impbynames): + out = " %2d %-16s" % (index, repr(func)) + rep.append(out) + return "\n".join(rep) + + def add_dlldesc(self, new_dll): + if self.parent_head._wsize == 32: + mask_ptr = 0x80000000 + elif self.parent_head._wsize == 64: + mask_ptr = 0x8000000000000000 + new_impdesc = [] + of1 = None + new_delaydesc = [] + for import_descriptor, new_functions in new_dll: + if isinstance(import_descriptor.get("name"), str): + import_descriptor["name"] = import_descriptor["name"].encode() + new_functions = [ + funcname.encode() if isinstance(funcname, str) else funcname + for funcname in new_functions + ] + for attr in ["attrs", "name", "hmod", "firstthunk", "originalfirstthunk", "boundiat", "unloadiat", "timestamp"]: + if not attr in import_descriptor: + import_descriptor[attr] = 0 + entry = Delaydesc_e(self.parent_head, **import_descriptor) + # entry.cstr.__dict__.update(import_descriptor) + if entry.firstthunk != None: + of1 = entry.firstthunk + elif of1 == None: + raise RuntimeError("set fthunk") + else: + entry.firstthunk = of1 + entry.dlldescname = DescName(self.parent_head, name=entry.name) + entry.originalfirstthunk = 0 + entry.originalfirstthunks = struct_array(self, None, + None, + Rva) + entry.firstthunks = struct_array(self, None, + None, + Rva) + + impbynames = [] + for new_function in new_functions: + rva_ofirstt = Rva(self.parent_head) + if isinstance(new_function, int_types): + rva_ofirstt.rva = mask_ptr + new_function + ibn = None + elif isinstance(new_function, bytes): + rva_ofirstt.rva = True + ibn = ImportByName(self.parent_head) + ibn.name = new_function + ibn.hint = 0 + else: + raise RuntimeError('unknown func type %s' % new_function) + impbynames.append(ibn) + entry.originalfirstthunks.append(rva_ofirstt) + + rva_func = Rva(self.parent_head) + if ibn != None: + rva_func.rva = 0xDEADBEEF # default func addr + else: + # ord ?XXX? + rva_func.rva = rva_ofirstt.rva + entry.firstthunks.append(rva_func) + of1 += 4 + # for null thunk + of1 += 4 + entry.impbynames = impbynames + new_delaydesc.append(entry) + if self.delaydesc is None: + self.delaydesc = struct_array(self, None, + None, + Delaydesc_e) + self.delaydesc.l = new_delaydesc + else: + for entry in new_delaydesc: + self.delaydesc.append(entry) + + def get_funcrva(self, func): + for entry in self.delaydesc: + isfromva = (entry.attrs & 1) == 0 + if isfromva: + isfromva = lambda x: self.parent_head.virt2rva(x) + else: + isfromva = lambda x: x + if entry.originalfirstthunk and self.parent_head.rva2off(isfromva(entry.originalfirstthunk)): + tmp_thunk = entry.originalfirstthunks + elif entry.firstthunk: + tmp_thunk = entry.firstthunks + else: + raise RuntimeError("No thunk!") + if isinstance(func, bytes): + for j, imp in enumerate(entry.impbynames): + if isinstance(imp, ImportByName): + if func == imp.name: + return isfromva(entry.firstthunk) + j * 4 + elif isinstance(func, int_types): + for j, imp in enumerate(entry.impbynames): + if not isinstance(imp, ImportByName): + if isfromva(tmp_thunk[j].rva & 0x7FFFFFFF) == func: + return isfromva(entry.firstthunk) + j * 4 + else: + raise ValueError('unknown func tpye %r' % func) + + def get_funcvirt(self, addr): + rva = self.get_funcrva(addr) + if rva == None: + return + return self.parent_head.rva2virt(rva) + + +class Rel(CStruct): + _fields = [("rva", "u32"), + ("size", "u32") + ] + + +class Reloc(CStruct): + _fields = [("rel", (lambda c, raw, off:c.gete(raw, off), + lambda c, value:c.sete(value)))] + + def gete(self, raw, off): + rel = struct.unpack('H', raw[off:off + 2])[0] + return (rel >> 12, rel & 0xfff), off + 2 + + def sete(self, value): + return struct.pack('H', (value[0] << 12) | value[1]) + + def __repr__(self): + return '<%d %d>' % (self.rel[0], self.rel[1]) + + +class DirReloc(CStruct): + _fields = [("reldesc", (lambda c, raw, off:c.gete(raw, off), + lambda c, value:c.sete(value)))] + + def gete(self, raw, off): + if not off: + return None, off + + ofend = off + \ + self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_BASERELOC].size + out = [] + while off < ofend: + if off >= len(raw): + log.warn('warning bad reloc offset') + break + reldesc, length = Rel.unpack_l(raw, + off, + self.parent_head) + if reldesc.size == 0: + log.warn('warning null reldesc') + reldesc.size = length + break + of2 = off + length + if of2 + reldesc.size > len(self.parent_head.img_rva): + log.warn('relocation too big, skipping') + break + reldesc.rels = struct_array(self, raw, + of2, + Reloc, + (reldesc.size - length) // 2) # / Reloc size + reldesc.patchrel = False + out.append(reldesc) + off += reldesc.size + return out, off + + def sete(self, entries): + return b"".join( + bytes(entry) + bytes(entry.rels) + for entry in entries + ) + + def set_rva(self, rva, size=None): + if self.reldesc is None: + return + self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_BASERELOC].rva = rva + if not size: + self.parent_head.NThdr.optentries[ + DIRECTORY_ENTRY_BASERELOC].size = len(self) + else: + self.parent_head.NThdr.optentries[ + DIRECTORY_ENTRY_BASERELOC].size = size + + def build_content(self, raw): + dirrel = self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_BASERELOC] + dirrel.size = len(self) + of1 = dirrel.rva + if self.reldesc is None: # No Reloc + return + raw[self.parent_head.rva2off(of1)] = bytes(self) + + def __len__(self): + if self.reldesc is None: + return 0 + length = 0 + for entry in self.reldesc: + length += entry.size + return length + + def __bytes__(self): + return b"".join( + bytes(entry) + bytes(entry.rels) + for entry in self.reldesc + ) + + def __str__(self): + if PY3: + return repr(self) + return self.__bytes__() + + def __repr__(self): + rep = ["<%s>" % self.__class__.__name__] + if self.reldesc is None: + return "\n".join(rep) + for i, entry in enumerate(self.reldesc): + out = "%2d %s" % (i, repr(entry)) + rep.append(out) + """ + #display too many lines... + for ii, m in enumerate(entry.rels): + l = "\t%2d %s"%(ii, repr(m) ) + rep.append(l) + """ + out = "\t%2d rels..." % (len(entry.rels)) + rep.append(out) + return "\n".join(rep) + + def add_reloc(self, rels, rtype=3, patchrel=True): + dirrel = self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_BASERELOC] + if not rels: + return + rels.sort() + all_base_ad = set([x & 0xFFFFF000 for x in rels]) + all_base_ad = list(all_base_ad) + all_base_ad.sort() + rels_by_base = defaultdict(list) + while rels: + reloc = rels.pop() + if reloc >= all_base_ad[-1]: + rels_by_base[all_base_ad[-1]].append(reloc) + else: + all_base_ad.pop() + rels_by_base[all_base_ad[-1]].append(reloc) + rels_by_base = [x for x in list(rels_by_base.items())] + rels_by_base.sort() + for o_init, rels in rels_by_base: + # o_init = rels[0]&0xFFFFF000 + offsets = struct_array(self, None, None, Reloc, 0) + for reloc_value in rels: + if (reloc_value & 0xFFFFF000) != o_init: + raise RuntimeError("relocs must be in same range") + reloc = Reloc(self.parent_head) + reloc.rel = (rtype, reloc_value - o_init) + offsets.append(reloc) + while len(offsets) & 3: + reloc = Reloc(self.parent_head) + reloc.rel = (0, 0) + offsets.append(reloc) + reldesc = Rel(self.parent_head) # Reloc(self.parent_head) + reldesc.rva = o_init + reldesc.size = (len(offsets) * 2 + 8) + reldesc.rels = offsets + reldesc.patchrel = patchrel + # if self.reldesc is None: + # self.reldesc = [] + self.reldesc.append(reldesc) + dirrel.size += reldesc.size + + def del_reloc(self, taboffset): + if self.reldesc is None: + return + for rel in self.reldesc: + of1 = rel.rva + i = 0 + while i < len(rel.rels): + reloc = rel.rels[i] + if reloc.rel[0] != 0 and reloc.rel[1] + of1 in taboffset: + print('del reloc', hex(reloc.rel[1] + of1)) + del rel.rels[i] + rel.size -= Reloc._size + else: + i += 1 + + +class DirRes(CStruct): + _fields = [("resdesc", (lambda c, raw, off:c.gete(raw, off), + lambda c, value:c.sete(value)))] + + def gete(self, raw, off): + if not off: + return None, off + if off >= len(self.parent_head.img_rva): + log.warning('cannot parse resources, %X' % off) + return None, off + + off_orig = off + ofend = off + self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].size + + resdesc, length = ResDesc_e.unpack_l(raw, + off, + self.parent_head) + off += length + nbr = resdesc.numberofnamedentries + resdesc.numberofidentries + + out = [] + tmp_off = off + for _ in range(nbr): + if tmp_off >= ofend: + break + if tmp_off + length >= len(raw): + log.warn('warning bad resource offset') + break + try: + entry, length = ResEntry.unpack_l(raw, tmp_off, self.parent_head) + except RuntimeError: + log.warn('bad resentry') + return None, tmp_off + out.append(entry) + tmp_off += length + resdesc.resentries = struct_array(self, raw, + off, + ResEntry, + nbr) + dir_todo = {off_orig: resdesc} + dir_done = {} + while dir_todo: + off, my_dir = dir_todo.popitem() + dir_done[off] = my_dir + for entry in my_dir.resentries: + off = entry.offsettosubdir + if not off: + # data dir + off = entry.offsettodata + if not 0 <= off < len(raw): + log.warn('bad resrouce entry') + continue + data = ResDataEntry.unpack(raw, + off, + self.parent_head) + off = data.offsettodata + data.s = StrPatchwork(raw[off:off + data.size]) + entry.data = data + continue + # subdir + if off in dir_done: + log.warn('warning recusif subdir') + continue + if not 0 <= off < len(self.parent_head.img_rva): + log.warn('bad resrouce entry') + continue + subdir, length = ResDesc_e.unpack_l(raw, + off, + self.parent_head) + nbr = subdir.numberofnamedentries + subdir.numberofidentries + try: + subdir.resentries = struct_array(self, raw, + off + length, + ResEntry, + nbr) + except RuntimeError: + log.warn('bad resrouce entry') + continue + + entry.subdir = subdir + dir_todo[off] = entry.subdir + return resdesc, off + + def build_content(self, raw): + if self.resdesc is None: + return + of1 = self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].rva + raw[self.parent_head.rva2off(of1)] = bytes(self.resdesc) + dir_todo = {self.parent_head.NThdr.optentries[ + DIRECTORY_ENTRY_RESOURCE].rva: self.resdesc} + dir_done = {} + while dir_todo: + of1, my_dir = dir_todo.popitem() + dir_done[of1] = my_dir + raw[self.parent_head.rva2off(of1)] = bytes(my_dir) + of1 += len(my_dir) + of_base = of1 + for entry in my_dir.resentries: + raw[of_base] = bytes(entry) + of_base += len(entry) + if entry.name_s: + raw[self.parent_head.rva2off(entry.name)] = bytes(entry.name_s) + of1 = entry.offsettosubdir + if not of1: + raw[self.parent_head.rva2off(entry.offsettodata)] = bytes(entry.data) + raw[self.parent_head.rva2off(entry.data.offsettodata)] = bytes(entry.data.s) + continue + dir_todo[of1] = entry.subdir + + def __len__(self): + length = 0 + if self.resdesc is None: + return length + dir_todo = [self.resdesc] + dir_done = [] + while dir_todo: + my_dir = dir_todo.pop() + if my_dir in dir_done: + raise ValueError('Recursive directory') + dir_done.append(my_dir) + length += len(my_dir) + length += len(my_dir.resentries) * 8 # ResEntry size + for entry in my_dir.resentries: + if not entry.offsettosubdir: + continue + if not entry.subdir in dir_todo: + dir_todo.append(entry.subdir) + else: + raise RuntimeError("recursive dir") + + dir_todo = dir_done + while dir_todo: + my_dir = dir_todo.pop() + for entry in my_dir.resentries: + if entry.name_s: + length += len(entry.name_s) + of1 = entry.offsettosubdir + if not of1: + length += 4 * 4 # WResDataEntry size + # XXX because rva may be even rounded + length += 1 + length += entry.data.size + continue + return length + + def set_rva(self, rva, size=None): + if self.resdesc is None: + return + self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].rva = rva + if not size: + self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].size = len(self) + else: + self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].size = size + dir_todo = [self.resdesc] + dir_done = {} + while dir_todo: + my_dir = dir_todo.pop() + dir_done[rva] = my_dir + rva += len(my_dir) + rva += len(my_dir.resentries) * 8 # ResEntry size + for entry in my_dir.resentries: + if not entry.offsettosubdir: + continue + if not entry.subdir in dir_todo: + dir_todo.append(entry.subdir) + else: + raise RuntimeError("recursive dir") + dir_todo = dir_done + dir_inv = dict([(x[1], x[0]) for x in list(dir_todo.items())]) + while dir_todo: + rva_tmp, my_dir = dir_todo.popitem() + for entry in my_dir.resentries: + if entry.name_s: + entry.name = rva + rva += len(entry.name_s) + of1 = entry.offsettosubdir + if not of1: + entry.offsettodata = rva + rva += 4 * 4 # ResDataEntry size + # XXX menu rsrc must be even aligned? + if rva % 2: + rva += 1 + entry.data.offsettodata = rva + rva += entry.data.size + continue + entry.offsettosubdir = dir_inv[entry.subdir] + + def __repr__(self): + rep = ["<%s>" % (self.__class__.__name__)] + if self.resdesc is None: + return "\n".join(rep) + dir_todo = [self.resdesc] + resources = [] + index = -1 + while dir_todo: + entry = dir_todo.pop(0) + if isinstance(entry, int): + index += entry + elif isinstance(entry, ResDesc_e): + # resources.append((index, repr(entry))) + dir_todo = [1] + entry.resentries.l + [-1] + dir_todo + elif isinstance(entry, ResEntry): + if entry.offsettosubdir: + resources.append((index, repr(entry))) + dir_todo = [entry.subdir] + dir_todo + else: + resources.append((index, repr(entry))) + else: + raise RuntimeError("zarb") + for i, resource in resources: + rep.append(' ' * 4 * i + resource) + return "\n".join(rep) + + +class Ordinal(CStruct): + _fields = [("ordinal", "u16"), + ] + + +class ResDesc_e(CStruct): + _fields = [("characteristics", "u32"), + ("timestamp", "u32"), + ("majorv", "u16"), + ("minorv", "u16"), + ("numberofnamedentries", "u16"), + ("numberofidentries", "u16") + ] + + +class SUnicode(CStruct): + _fields = [("length", "u16"), + ("value", (lambda c, raw, off:c.gets(raw, off), + lambda c, value:c.sets(value))) + ] + + def gets(self, raw, off): + value = raw[off:off + self.length * 2] + return value, off + self.length + + def sets(self, value): + return self.value + + +class ResEntry(CStruct): + _fields = [("name", (lambda c, raw, off:c._get_name(raw, off), + lambda c, value:c._set_name(value))), + ("offsettodata", (lambda c, raw, off:c._get_offset(raw, off), + lambda c, value:c._set_offset(value))) + ] + + def _get_name(self, raw, off): + self.data = None + # off = self.parent_head.rva2off(off) + name = struct.unpack('I', raw[off:off + 4])[0] + self.name_s = None + if name & 0x80000000: + name = (name & 0x7FFFFFFF) + self.parent_head.NThdr.optentries[ + DIRECTORY_ENTRY_RESOURCE].rva # XXX res rva?? + name &= 0x7FFFFFFF + if name >= len(raw): + raise RuntimeError("Bad resentry") + self.name_s = SUnicode.unpack(raw, + name, + self.parent_head) + return name, off + 4 + + def _set_name(self, name): + if self.name_s: + rva = self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].rva + name = (self.name - rva) + 0x80000000 + return struct.pack('I', name) + + def _get_offset(self, raw, off): + self.offsettosubdir = None + rva = self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].rva + offsettodata_o = struct.unpack('I', raw[off:off + 4])[0] + offsettodata = (offsettodata_o & 0x7FFFFFFF) + rva # XXX res rva?? + if offsettodata_o & 0x80000000: + self.offsettosubdir = offsettodata + return offsettodata, off + 4 + + def _set_offset(self, offset): + rva = self.parent_head.NThdr.optentries[DIRECTORY_ENTRY_RESOURCE].rva + offsettodata = offset - rva + if self.offsettosubdir: + offsettodata = (self.offsettosubdir - rva) + 0x80000000 + return struct.pack('I', offsettodata) + + def __repr__(self): + if self.name_s: + nameid = "%s" % repr(self.name_s) + else: + if self.name in RT: # and not self.offsettosubdir: + nameid = "ID %s" % RT[self.name] + else: + nameid = "ID %d" % self.name + if self.offsettosubdir: + offsettodata = "subdir: %x" % self.offsettosubdir + else: + offsettodata = "data: %x" % self.offsettodata + return "<%s %s>" % (nameid, offsettodata) + + +class ResDataEntry(CStruct): + _fields = [("offsettodata", "u32"), + ("size", "u32"), + ("codepage", "u32"), + ("reserved", "u32"), + ] + + +class Symb(CStruct): + _fields = [("name", "8s"), + ("res1", "u32"), + ("res2", "u32"), + ("res3", "u16")] + + +DIRECTORY_ENTRY_EXPORT = 0 +DIRECTORY_ENTRY_IMPORT = 1 +DIRECTORY_ENTRY_RESOURCE = 2 +DIRECTORY_ENTRY_EXCEPTION = 3 +DIRECTORY_ENTRY_SECURITY = 4 +DIRECTORY_ENTRY_BASERELOC = 5 +DIRECTORY_ENTRY_DEBUG = 6 +DIRECTORY_ENTRY_COPYRIGHT = 7 +DIRECTORY_ENTRY_GLOBALPTR = 8 +DIRECTORY_ENTRY_TLS = 9 +DIRECTORY_ENTRY_LOAD_CONFIG = 10 +DIRECTORY_ENTRY_BOUND_IMPORT = 11 +DIRECTORY_ENTRY_IAT = 12 +DIRECTORY_ENTRY_DELAY_IMPORT = 13 +DIRECTORY_ENTRY_COM_DESCRIPTOR = 14 +DIRECTORY_ENTRY_RESERVED = 15 + + +RT_CURSOR = 1 +RT_BITMAP = 2 +RT_ICON = 3 +RT_MENU = 4 +RT_DIALOG = 5 +RT_STRING = 6 +RT_FONTDIR = 7 +RT_FONT = 8 +RT_ACCELERATOR = 9 +RT_RCDATA = 10 +RT_MESSAGETABLE = 11 +RT_GROUP_CURSOR = 12 +RT_GROUP_ICON = 14 +RT_VERSION = 16 +RT_DLGINCLUDE = 17 +RT_PLUGPLAY = 19 +RT_VXD = 20 +RT_ANICURSOR = 21 +RT_ANIICON = 22 +RT_HTML = 23 +RT_MANIFEST = 24 + + +RT = { + RT_CURSOR: "RT_CURSOR", + RT_BITMAP: "RT_BITMAP", + RT_ICON: "RT_ICON", + RT_MENU: "RT_MENU", + RT_DIALOG: "RT_DIALOG", + RT_STRING: "RT_STRING", + RT_FONTDIR: "RT_FONTDIR", + RT_FONT: "RT_FONT", + RT_ACCELERATOR: "RT_ACCELERATOR", + RT_RCDATA: "RT_RCDATA", + RT_MESSAGETABLE: "RT_MESSAGETABLE", + RT_GROUP_CURSOR: "RT_GROUP_CURSOR", + RT_GROUP_ICON: "RT_GROUP_ICON", + RT_VERSION: "RT_VERSION", + RT_DLGINCLUDE: "RT_DLGINCLUDE", + RT_PLUGPLAY: "RT_PLUGPLAY", + RT_VXD: "RT_VXD", + RT_ANICURSOR: "RT_ANICURSOR", + RT_ANIICON: "RT_ANIICON", + RT_HTML: "RT_HTML", + RT_MANIFEST: "RT_MANIFEST", +} diff --git a/miasm/loader/pe_init.py b/miasm/loader/pe_init.py new file mode 100644 index 00000000..2d7e3fa1 --- /dev/null +++ b/miasm/loader/pe_init.py @@ -0,0 +1,603 @@ +#! /usr/bin/env python + +from __future__ import print_function + +from builtins import range +import array +from functools import reduce +import logging +import struct + +from future.builtins import int as int_types +from future.utils import PY3 + +from miasm.loader import pe +from miasm.loader.strpatchwork import StrPatchwork + +log = logging.getLogger("peparse") +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.WARN) + + +class ContentManager(object): + + def __get__(self, owner, _): + if hasattr(owner, '_content'): + return owner._content + + def __set__(self, owner, new_content): + owner.resize(len(owner._content), len(new_content)) + owner._content = new_content + + def __delete__(self, owner): + self.__set__(owner, None) + + +class ContectRva(object): + + def __init__(self, parent): + self.parent = parent + + def get(self, rva_start, rva_stop=None): + """ + Get data in RVA view starting at @rva_start, stopping at @rva_stop + @rva_start: rva start address + @rva_stop: rva stop address + """ + if rva_start < 0: + raise ValueError("Out of range") + if rva_stop is not None: + if rva_stop > len(self.parent.img_rva): + rva_stop = len(self.parent.img_rva) + if rva_start > len(self.parent.img_rva): + raise ValueError("Out of range") + return self.parent.img_rva[rva_start:rva_stop] + if rva_start > len(self.parent.img_rva): + raise ValueError("Out of range") + return self.parent.img_rva[rva_start] + + def set(self, rva, data): + """ + Set @data in RVA view starting at @start + @rva: rva start address + @data: data to set + """ + if not isinstance(rva, int_types): + raise ValueError('addr must be int/long') + + if rva < 0: + raise ValueError("Out of range") + + if rva + len(data) > len(self.parent.img_rva): + raise ValueError("Out of range") + self.parent.img_rva[rva] = data + + def __getitem__(self, item): + if isinstance(item, slice): + assert(item.step is None) + return self.get(item.start, item.stop) + return self.get(item) + + def __setitem__(self, item, data): + if isinstance(item, slice): + rva = item.start + else: + rva = item + self.set(rva, data) + + +class ContentVirtual(object): + + def __init__(self, parent): + self.parent = parent + + def __getitem__(self, item): + raise DeprecationWarning("Replace code by virt.get(start, [stop])") + + def __setitem__(self, item, data): + raise DeprecationWarning("Replace code by virt.set(start, data)") + + def __call__(self, ad_start, ad_stop=None, ad_step=None): + raise DeprecationWarning("Replace code by virt.get(start, stop)") + + def get(self, virt_start, virt_stop=None): + """ + Get data in VIRTUAL view starting at @virt_start, stopping at @virt_stop + @virt_start: virt start address + @virt_stop: virt stop address + """ + rva_start = self.parent.virt2rva(virt_start) + if virt_stop != None: + rva_stop = self.parent.virt2rva(virt_stop) + else: + rva_stop = None + return self.parent.rva.get(rva_start, rva_stop) + + def set(self, addr, data): + """ + Set @data in VIRTUAL view starting at @start + @addr: virtual start address + @data: data to set + """ + if not isinstance(addr, int_types): + raise ValueError('addr must be int/long') + self.parent.rva.set(self.parent.virt2rva(addr), data) + + def max_addr(self): + section = self.parent.SHList[-1] + length = section.addr + section.size + self.parent.NThdr.ImageBase + return int(length) + + def find(self, pattern, start=0, end=None): + if start != 0: + start = self.parent.virt2rva(start) + if end != None: + end = self.parent.virt2rva(end) + + ret = self.parent.img_rva.find(pattern, start, end) + if ret == -1: + return -1 + return self.parent.rva2virt(ret) + + def rfind(self, pattern, start=0, end=None): + if start != 0: + start = self.parent.virt2rva(start) + if end != None: + end = self.parent.virt2rva(end) + + ret = self.parent.img_rva.rfind(pattern, start, end) + if ret == -1: + return -1 + return self.parent.rva2virt(ret) + + def is_addr_in(self, addr): + return self.parent.is_in_virt_address(addr) + + + +def compute_crc(raw, olds): + out = 0 + data = raw[:] + if len(raw) % 2: + end = struct.unpack('B', data[-1])[0] + data = data[:-1] + if (len(raw) & ~0x1) % 4: + out += struct.unpack('H', data[:2])[0] + data = data[2:] + data = array.array('I', data) + out = reduce(lambda x, y: x + y, data, out) + out -= olds + while out > 0xFFFFFFFF: + out = (out >> 32) + (out & 0xFFFFFFFF) + while out > 0xFFFF: + out = (out & 0xFFFF) + ((out >> 16) & 0xFFFF) + if len(raw) % 2: + out += end + out += len(data) + return out + + + +# PE object +class PE(object): + content = ContentManager() + + def __init__(self, pestr=None, + loadfrommem=False, + parse_resources=True, + parse_delay=True, + parse_reloc=True, + wsize=32): + self._rva = ContectRva(self) + self._virt = ContentVirtual(self) + self.img_rva = StrPatchwork() + if pestr is None: + self._content = StrPatchwork() + self._sex = 0 + self._wsize = wsize + self.Doshdr = pe.Doshdr(self) + self.NTsig = pe.NTsig(self) + self.Coffhdr = pe.Coffhdr(self) + + if self._wsize == 32: + Opthdr = pe.Opthdr32 + else: + Opthdr = pe.Opthdr64 + + self.Opthdr = Opthdr(self) + self.NThdr = pe.NThdr(self) + self.NThdr.optentries = [pe.Optehdr(self) for _ in range(0x10)] + self.NThdr.CheckSum = 0 + self.SHList = pe.SHList(self) + self.SHList.shlist = [] + + self.NThdr.sizeofheaders = 0x1000 + + self.DirImport = pe.DirImport(self) + self.DirExport = pe.DirExport(self) + self.DirDelay = pe.DirDelay(self) + self.DirReloc = pe.DirReloc(self) + self.DirRes = pe.DirRes(self) + + self.Doshdr.magic = 0x5a4d + self.Doshdr.lfanew = 0xe0 + + self.NTsig.signature = 0x4550 + if wsize == 32: + self.Opthdr.magic = 0x10b + elif wsize == 64: + self.Opthdr.magic = 0x20b + else: + raise ValueError('unknown pe size %r' % wsize) + self.Opthdr.majorlinkerversion = 0x7 + self.Opthdr.minorlinkerversion = 0x0 + self.NThdr.filealignment = 0x1000 + self.NThdr.sectionalignment = 0x1000 + self.NThdr.majoroperatingsystemversion = 0x5 + self.NThdr.minoroperatingsystemversion = 0x1 + self.NThdr.MajorImageVersion = 0x5 + self.NThdr.MinorImageVersion = 0x1 + self.NThdr.majorsubsystemversion = 0x4 + self.NThdr.minorsubsystemversion = 0x0 + self.NThdr.subsystem = 0x3 + if wsize == 32: + self.NThdr.dllcharacteristics = 0x8000 + else: + self.NThdr.dllcharacteristics = 0x8000 + + # for createthread + self.NThdr.sizeofstackreserve = 0x200000 + self.NThdr.sizeofstackcommit = 0x1000 + self.NThdr.sizeofheapreserve = 0x100000 + self.NThdr.sizeofheapcommit = 0x1000 + + self.NThdr.ImageBase = 0x400000 + self.NThdr.sizeofheaders = 0x1000 + self.NThdr.numberofrvaandsizes = 0x10 + + self.NTsig.signature = 0x4550 + if wsize == 32: + self.Coffhdr.machine = 0x14c + elif wsize == 64: + self.Coffhdr.machine = 0x8664 + else: + raise ValueError('unknown pe size %r' % wsize) + if wsize == 32: + self.Coffhdr.characteristics = 0x10f + self.Coffhdr.sizeofoptionalheader = 0xe0 + else: + self.Coffhdr.characteristics = 0x22 # 0x2f + self.Coffhdr.sizeofoptionalheader = 0xf0 + + else: + self._content = StrPatchwork(pestr) + self.loadfrommem = loadfrommem + self.parse_content(parse_resources=parse_resources, + parse_delay=parse_delay, + parse_reloc=parse_reloc) + + def isPE(self): + if self.NTsig is None: + return False + return self.NTsig.signature == 0x4550 + + def parse_content(self, + parse_resources=True, + parse_delay=True, + parse_reloc=True): + off = 0 + self._sex = 0 + self._wsize = 32 + self.Doshdr = pe.Doshdr.unpack(self.content, off, self) + off = self.Doshdr.lfanew + if off > len(self.content): + log.warn('ntsig after eof!') + self.NTsig = None + return + self.NTsig = pe.NTsig.unpack(self.content, + off, self) + self.DirImport = None + self.DirExport = None + self.DirDelay = None + self.DirReloc = None + self.DirRes = None + + if self.NTsig.signature != 0x4550: + log.warn('not a valid pe!') + return + off += len(self.NTsig) + self.Coffhdr, length = pe.Coffhdr.unpack_l(self.content, + off, + self) + + off += length + self._wsize = ord(self.content[off+1]) * 32 + + if self._wsize == 32: + Opthdr = pe.Opthdr32 + else: + Opthdr = pe.Opthdr64 + + if len(self.content) < 0x200: + # Fix for very little PE + self.content += (0x200 - len(self.content)) * b'\x00' + + self.Opthdr, length = Opthdr.unpack_l(self.content, off, self) + self.NThdr = pe.NThdr.unpack(self.content, off + length, self) + self.img_rva[0] = self.content[:self.NThdr.sizeofheaders] + off += self.Coffhdr.sizeofoptionalheader + self.SHList = pe.SHList.unpack(self.content, off, self) + + # load section data + filealignment = self.NThdr.filealignment + sectionalignment = self.NThdr.sectionalignment + for section in self.SHList.shlist: + virt_size = (section.size // sectionalignment + 1) * sectionalignment + if self.loadfrommem: + section.offset = section.addr + if self.NThdr.sectionalignment > 0x1000: + raw_off = 0x200 * (section.offset // 0x200) + else: + raw_off = section.offset + if raw_off != section.offset: + log.warn('unaligned raw section (%x %x)!', raw_off, section.offset) + section.data = StrPatchwork() + + if section.rawsize == 0: + rounded_size = 0 + else: + if section.rawsize % filealignment: + rs = (section.rawsize // filealignment + 1) * filealignment + else: + rs = section.rawsize + rounded_size = rs + if rounded_size > virt_size: + rounded_size = min(rounded_size, section.size) + data = self.content[raw_off:raw_off + rounded_size] + section.data = data + # Pad data to page size 0x1000 + length = len(data) + data += b"\x00" * ((((length + 0xfff)) & 0xFFFFF000) - length) + self.img_rva[section.addr] = data + # Fix img_rva + self.img_rva = self.img_rva + + try: + self.DirImport = pe.DirImport.unpack(self.img_rva, + self.NThdr.optentries[ + pe.DIRECTORY_ENTRY_IMPORT].rva, + self) + except pe.InvalidOffset: + log.warning('cannot parse DirImport, skipping') + self.DirImport = pe.DirImport(self) + + try: + self.DirExport = pe.DirExport.unpack(self.img_rva, + self.NThdr.optentries[ + pe.DIRECTORY_ENTRY_EXPORT].rva, + self) + except pe.InvalidOffset: + log.warning('cannot parse DirExport, skipping') + self.DirExport = pe.DirExport(self) + + if len(self.NThdr.optentries) > pe.DIRECTORY_ENTRY_DELAY_IMPORT: + self.DirDelay = pe.DirDelay(self) + if parse_delay: + try: + self.DirDelay = pe.DirDelay.unpack(self.img_rva, + self.NThdr.optentries[ + pe.DIRECTORY_ENTRY_DELAY_IMPORT].rva, + self) + except pe.InvalidOffset: + log.warning('cannot parse DirDelay, skipping') + if len(self.NThdr.optentries) > pe.DIRECTORY_ENTRY_BASERELOC: + self.DirReloc = pe.DirReloc(self) + if parse_reloc: + try: + self.DirReloc = pe.DirReloc.unpack(self.img_rva, + self.NThdr.optentries[ + pe.DIRECTORY_ENTRY_BASERELOC].rva, + self) + except pe.InvalidOffset: + log.warning('cannot parse DirReloc, skipping') + if len(self.NThdr.optentries) > pe.DIRECTORY_ENTRY_RESOURCE: + self.DirRes = pe.DirRes(self) + if parse_resources: + self.DirRes = pe.DirRes(self) + try: + self.DirRes = pe.DirRes.unpack(self.img_rva, + self.NThdr.optentries[ + pe.DIRECTORY_ENTRY_RESOURCE].rva, + self) + except pe.InvalidOffset: + log.warning('cannot parse DirRes, skipping') + + def resize(self, old, new): + pass + + def __getitem__(self, item): + return self.content[item] + + def __setitem__(self, item, data): + self.content.__setitem__(item, data) + return + + def getsectionbyrva(self, rva): + if self.SHList is None: + return None + for section in self.SHList.shlist: + """ + TODO CHECK: + some binaries have import rva outside section, but addresses + seems to be rounded + """ + mask = self.NThdr.sectionalignment - 1 + if section.addr <= rva < (section.addr + section.size + mask) & ~(mask): + return section + return None + + def getsectionbyvad(self, vad): + return self.getsectionbyrva(self.virt2rva(vad)) + + def getsectionbyoff(self, off): + if self.SHList is None: + return None + for section in self.SHList.shlist: + if section.offset <= off < section.offset + section.rawsize: + return section + return None + + def getsectionbyname(self, name): + if self.SHList is None: + return None + for section in self.SHList: + if section.name.strip(b'\x00').decode() == name: + return section + return None + + def is_rva_ok(self, rva): + return self.getsectionbyrva(rva) is not None + + def rva2off(self, rva): + # Special case rva in header + if rva < self.NThdr.sizeofheaders: + return rva + section = self.getsectionbyrva(rva) + if section is None: + raise pe.InvalidOffset('cannot get offset for 0x%X' % rva) + soff = (section.offset // self.NThdr.filealignment) * self.NThdr.filealignment + return rva - section.addr + soff + + def off2rva(self, off): + section = self.getsectionbyoff(off) + if section is None: + return + return off - section.offset + section.addr + + def virt2rva(self, virt): + if virt is None: + return + return virt - self.NThdr.ImageBase + + def rva2virt(self, rva): + if rva is None: + return + return rva + self.NThdr.ImageBase + + def virt2off(self, virt): + return self.rva2off(self.virt2rva(virt)) + + def off2virt(self, off): + return self.rva2virt(self.off2rva(off)) + + def is_in_virt_address(self, addr): + if addr < self.NThdr.ImageBase: + return False + addr = self.virt2rva(addr) + for section in self.SHList.shlist: + if section.addr <= addr < section.addr + section.size: + return True + return False + + def get_drva(self): + print('Deprecated: Use PE.rva instead of PE.drva') + return self._rva + + def get_rva(self): + return self._rva + + # TODO XXX remove drva api + drva = property(get_drva) + rva = property(get_rva) + + def get_virt(self): + return self._virt + + virt = property(get_virt) + + def build_content(self): + + content = StrPatchwork() + content[0] = bytes(self.Doshdr) + + for section in self.SHList.shlist: + content[section.offset:section.offset + section.rawsize] = bytes(section.data) + + # fix image size + section_last = self.SHList.shlist[-1] + size = section_last.addr + section_last.size + (self.NThdr.sectionalignment - 1) + size &= ~(self.NThdr.sectionalignment - 1) + self.NThdr.sizeofimage = size + + off = self.Doshdr.lfanew + content[off] = bytes(self.NTsig) + off += len(self.NTsig) + content[off] = bytes(self.Coffhdr) + off += len(self.Coffhdr) + off_shlist = off + self.Coffhdr.sizeofoptionalheader + content[off] = bytes(self.Opthdr) + off += len(self.Opthdr) + content[off] = bytes(self.NThdr) + off += len(self.NThdr) + # content[off] = bytes(self.Optehdr) + + off = off_shlist + content[off] = bytes(self.SHList) + + for section in self.SHList: + if off + len(bytes(self.SHList)) > section.offset: + log.warn("section offset overlap pe hdr 0x%x 0x%x" % + (off + len(bytes(self.SHList)), section.offset)) + self.DirImport.build_content(content) + self.DirExport.build_content(content) + self.DirDelay.build_content(content) + self.DirReloc.build_content(content) + self.DirRes.build_content(content) + + if (self.Doshdr.lfanew + len(self.NTsig) + len(self.Coffhdr)) % 4: + log.warn("non aligned coffhdr, bad crc calculation") + crcs = compute_crc(bytes(content), self.NThdr.CheckSum) + content[self.Doshdr.lfanew + len(self.NTsig) + len(self.Coffhdr) + 64] = struct.pack('I', crcs) + return bytes(content) + + def __bytes__(self): + return self.build_content() + + def __str__(self): + if PY3: + return repr(self) + return self.__bytes__() + + def export_funcs(self): + if self.DirExport is None: + print('no export dir found') + return None, None + + all_func = {} + for i, export in enumerate(self.DirExport.f_names): + all_func[export.name.name] = self.rva2virt( + self.DirExport.f_address[self.DirExport.f_nameordinals[i].ordinal].rva) + all_func[self.DirExport.f_nameordinals[i].ordinal + self.DirExport.expdesc.base] = self.rva2virt( + self.DirExport.f_address[self.DirExport.f_nameordinals[i].ordinal].rva) + # XXX todo: test if redirected export + return all_func + + def reloc_to(self, imgbase): + offset = imgbase - self.NThdr.ImageBase + if self.DirReloc is None: + log.warn('no relocation found!') + for rel in self.DirReloc.reldesc: + rva = rel.rva + for reloc in rel.rels: + reloc_type, off = reloc.rel + if reloc_type == 0 and off == 0: + continue + if reloc_type != 3: + raise NotImplementedError('Reloc type not supported') + off += rva + value = struct.unpack('I', self.rva.get(off, off + 4))[0] + value += offset + self.rva.set(off, struct.pack('I', value & 0xFFFFFFFF)) + self.NThdr.ImageBase = imgbase diff --git a/miasm/loader/strpatchwork.py b/miasm/loader/strpatchwork.py new file mode 100644 index 00000000..e1a5de91 --- /dev/null +++ b/miasm/loader/strpatchwork.py @@ -0,0 +1,106 @@ +from array import array +import struct +from sys import maxsize + +from future.utils import PY3 + +if PY3: + + def array_frombytes(arr, value): + return arr.frombytes(value) + + def array_tobytes(arr): + return arr.tobytes() + + +else: + + def array_frombytes(arr, value): + return arr.fromstring(value) + + def array_tobytes(arr): + return arr.tostring() + + +class StrPatchwork(object): + + def __init__(self, s=b"", paddingbyte=b"\x00"): + s_raw = bytes(s) + val = array("B") + array_frombytes(val, s_raw) + self.s = val + # cache s to avoid rebuilding str after each find + self.s_cache = s_raw + self.paddingbyte = paddingbyte + + def __bytes__(self): + return array_tobytes(self.s) + + def __str__(self): + if PY3: + return repr(self) + return self.__bytes__() + + def __getitem__(self, item): + s = self.s + if isinstance(item, slice): + end = item.stop + l = len(s) + if (end is not None and l < end) and end != maxsize: + # XXX hack [x:] give 2GB limit + # This is inefficient but avoids complicated maths if step is + # not 1 + s = s[:] + + tmp = array("B") + array_frombytes(tmp, self.paddingbyte * (end - l)) + s.extend(tmp) + r = s[item] + return array_tobytes(r) + + else: + if item > len(s): + return self.paddingbyte + else: + return struct.pack("B", s[item]) + + def __setitem__(self, item, val): + if val is None: + return + val_array = array("B") + array_frombytes(val_array, bytes(val)) + if type(item) is not slice: + item = slice(item, item + len(val_array)) + end = item.stop + l = len(self.s) + if l < end: + tmp = array("B") + array_frombytes(tmp, self.paddingbyte * (end - l)) + self.s.extend(tmp) + self.s[item] = val_array + self.s_cache = None + + def __repr__(self): + return "" % array_tobytes(self.s) + + def __len__(self): + return len(self.s) + + def __contains__(self, val): + return val in bytes(self) + + def __iadd__(self, other): + tmp = array("B") + array_frombytes(tmp, bytes(other)) + self.s.extend(tmp) + return self + + def find(self, pattern, start=0, end=None): + if not self.s_cache: + self.s_cache = array_tobytes(self.s) + return self.s_cache.find(pattern, start, end) + + def rfind(self, pattern, start=0, end=None): + if not self.s_cache: + self.s_cache = array_tobytes(self.s) + return self.s_cache.rfind(pattern, start, end) diff --git a/miasm/os_dep/win_api_x86_32_seh.py b/miasm/os_dep/win_api_x86_32_seh.py index 5d8ed3d7..40f15480 100644 --- a/miasm/os_dep/win_api_x86_32_seh.py +++ b/miasm/os_dep/win_api_x86_32_seh.py @@ -23,7 +23,7 @@ import struct from future.utils import viewitems -from miasm.elfesteem import pe_init +from miasm.loader import pe_init from miasm.jitter.csts import PAGE_READ, PAGE_WRITE from miasm.core.utils import pck32 diff --git a/setup.py b/setup.py index 3aaeaf27..81f8e4a2 100755 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ def buil_all(): "miasm/analysis", "miasm/os_dep", "miasm/os_dep/linux", - "miasm/elfesteem", + "miasm/loader", "miasm/jitter", "miasm/jitter/arch", "miasm/jitter/loader", diff --git a/test/analysis/dse.py b/test/analysis/dse.py index 21225f54..7d5998f1 100644 --- a/test/analysis/dse.py +++ b/test/analysis/dse.py @@ -3,7 +3,7 @@ from pdb import pm from future.utils import viewitems -from miasm.elfesteem.strpatchwork import StrPatchwork +from miasm.loader.strpatchwork import StrPatchwork from miasm.core import parse_asm from miasm.expression.expression import ExprCompose, ExprOp, ExprInt, ExprId from miasm.core.asmblock import asm_resolve_final diff --git a/test/arch/aarch64/unit/asm_test.py b/test/arch/aarch64/unit/asm_test.py index 9d193b47..fe59f0d8 100644 --- a/test/arch/aarch64/unit/asm_test.py +++ b/test/arch/aarch64/unit/asm_test.py @@ -7,7 +7,7 @@ from miasm.arch.aarch64.arch import mn_aarch64, base_expr, variable from miasm.core import parse_asm from miasm.expression.expression import * from miasm.core import asmblock -from miasm.elfesteem.strpatchwork import StrPatchwork +from miasm.loader.strpatchwork import StrPatchwork from miasm.analysis.machine import Machine from miasm.jitter.csts import * diff --git a/test/arch/mips32/unit/asm_test.py b/test/arch/mips32/unit/asm_test.py index dd95c3af..2dcaf6fc 100644 --- a/test/arch/mips32/unit/asm_test.py +++ b/test/arch/mips32/unit/asm_test.py @@ -7,7 +7,7 @@ from miasm.arch.mips32.arch import mn_mips32 from miasm.core import parse_asm from miasm.expression.expression import * from miasm.core import asmblock -from miasm.elfesteem.strpatchwork import StrPatchwork +from miasm.loader.strpatchwork import StrPatchwork from miasm.analysis.machine import Machine from miasm.jitter.csts import * diff --git a/test/arch/x86/unit/asm_test.py b/test/arch/x86/unit/asm_test.py index cf87ac93..6e7c55e2 100644 --- a/test/arch/x86/unit/asm_test.py +++ b/test/arch/x86/unit/asm_test.py @@ -9,7 +9,7 @@ from miasm.arch.x86.arch import mn_x86, base_expr, variable from miasm.core import parse_asm from miasm.expression.expression import * from miasm.core import asmblock -from miasm.elfesteem.strpatchwork import StrPatchwork +from miasm.loader.strpatchwork import StrPatchwork from miasm.analysis.machine import Machine from miasm.jitter.csts import * -- cgit 1.4.1