diff options
68 files changed, 3613 insertions, 1531 deletions
diff --git a/.travis.yml b/.travis.yml index c3e8b7f7..2a28f290 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,18 +5,18 @@ python: - "2.7" addons: apt: - sources: ['llvm-toolchain-trusty-5.0', 'ubuntu-toolchain-r-test'] + sources: ['llvm-toolchain-trusty-6.0', 'ubuntu-toolchain-r-test'] packages: - make - gcc - python-virtualenv - unzip - - llvm-5.0 - - llvm-5.0-dev + - llvm-6.0 + - llvm-6.0-dev - g++-5 before_script: - "cd .." -- "export LLVM_CONFIG=$(which llvm-config-5.0)" +- "export LLVM_CONFIG=$(which llvm-config-6.0)" - "export CXX=$(which g++-5)" # make virtual env - "python /usr/lib/python2.7/dist-packages/virtualenv.py virtualenv;" diff --git a/README.md b/README.md index 8b1c371e..51890054 100644 --- a/README.md +++ b/README.md @@ -108,8 +108,8 @@ Add instruction to the pool: Print current pool: ``` ->>> for lbl, b in ira.blocs.items(): -... print b +>>> for lbl, irblock in ira.blocks.items(): +... print irblock ... loc_0000000000000000:0x00000000 @@ -119,19 +119,19 @@ loc_0000000000000000:0x00000000 ``` Working with IR, for instance by getting side effects: ``` ->>> from miasm2.expression.expression import get_rw ->>> for lbl, b in ira.blocs.items(): -... for irs in b.irs: -... o_r, o_w = get_rw(irs) -... print 'read: ', [str(x) for x in o_r] -... print 'written:', [str(x) for x in o_w] -... print +>>> for lbl, irblock in ira.blocks.iteritems(): +... for assignblk in irblock: +... rw = assignblk.get_rw() +... for dst, reads in rw.iteritems(): +... print 'read: ', [str(x) for x in reads] +... print 'written:', dst +... print ... read: ['R8', 'R0'] -written: ['R2'] +written: R2 read: ['loc_0000000000000004:0x00000004'] -written: ['IRDst'] +written: IRDst ``` Emulation @@ -165,9 +165,9 @@ Disassembling the shellcode at address `0`: >>> from miasm2.analysis.machine import Machine >>> machine = Machine('x86_32') >>> mdis = machine.dis_engine(c.bin_stream) ->>> blocs = mdis.dis_multiblock(0) ->>> for b in blocs: -... print b +>>> blocks = mdis.dis_multiblock(0) +>>> for block in blocks: +... print block ... loc_0000000000000000:0x00000000 LEA ECX, DWORD PTR [ECX+0x4] @@ -198,7 +198,8 @@ Initializing the Jit engine with a stack: Add the shellcode in an arbitrary memory location: ``` >>> run_addr = 0x40000000 ->>> myjit.vm.add_memory_page(run_addr, PAGE_READ | PAGE_WRITE, s) +>>> from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE +>>> jitter.vm.add_memory_page(run_addr, PAGE_READ | PAGE_WRITE, s) ``` Create a sentinelle to catch the return of the shellcode: @@ -268,22 +269,22 @@ Initializing the IR pool: ``` >>> ira = machine.ira() ->>> for b in blocs: -... ira.add_bloc(b) +>>> for block in blocks: +... ira.add_block(block) ... ``` Initializing the engine with default symbolic values: ``` ->>> from miasm2.ir.symbexec import symbexec ->>> sb = symbexec(ira, machine.mn.regs.regs_init) +>>> from miasm2.ir.symbexec import SymbolicExecutionEngine +>>> sb = SymbolicExecutionEngine(ira, machine.mn.regs.regs_init) ``` Launching the execution: ``` ->>> symbolic_pc = sb.emul_ir_blocs(ira, 0) +>>> symbolic_pc = sb.run_at(0) >>> print symbolic_pc ((ECX_init+0x4)[0:8]+0xFF)?(0xB,0x10) ``` @@ -291,8 +292,8 @@ Launching the execution: Same, with step logs (only changes are displayed): ``` ->>> sb = symbexec(ira, machine.mn.regs.regs_init) ->>> symbolic_pc = sb.emul_ir_blocs(ira, 0, step=True) +>>> sb = SymbolicExecutionEngine(ira, machine.mn.regs.regs_init) +>>> symbolic_pc = sb.run_at(0, step=True) ________________________________________________________________________________ ECX (ECX_init+0x4) ________________________________________________________________________________ @@ -326,7 +327,7 @@ Retry execution with a concrete ECX. Here, the symbolic / concolic execution rea ``` >>> from miasm2.expression.expression import ExprInt32 >>> sb.symbols[machine.mn.regs.ECX] = ExprInt32(-3) ->>> symbolic_pc = sb.emul_ir_blocs(ira, 0, step=True) +>>> symbolic_pc = sb.run_at(0, step=True) ________________________________________________________________________________ ECX 0x1 ________________________________________________________________________________ diff --git a/example/asm/shellcode.py b/example/asm/shellcode.py index bacb65fb..0c08a8a3 100755 --- a/example/asm/shellcode.py +++ b/example/asm/shellcode.py @@ -64,7 +64,10 @@ else: with open(args.source) as fstream: source = fstream.read() -blocks, symbol_pool = parse_asm.parse_txt(machine.mn, attrib, source) + +symbol_pool = asmblock.AsmSymbolPool() + +blocks, symbol_pool = parse_asm.parse_txt(machine.mn, attrib, source, symbol_pool) # Fix shellcode addrs symbol_pool.set_offset(symbol_pool.getby_name("main"), addr_main) diff --git a/example/disasm/single_instr.py b/example/disasm/single_instr.py index 0e29dcee..59b81de7 100644 --- a/example/disasm/single_instr.py +++ b/example/disasm/single_instr.py @@ -1,7 +1,9 @@ from miasm2.arch.x86.arch import mn_x86 from miasm2.arch.x86.regs import EDX +from miasm2.core.asmblock import AsmSymbolPool -l = mn_x86.fromstring('MOV EAX, EBX', 32) +symbol_pool = AsmSymbolPool() +l = mn_x86.fromstring('MOV EAX, EBX', symbol_pool, 32) print "instruction:", l print "arg:", l.args[0] x = mn_x86.asm(l) diff --git a/example/expression/get_read_write.py b/example/expression/get_read_write.py index b4a0773b..9e3b5caf 100644 --- a/example/expression/get_read_write.py +++ b/example/expression/get_read_write.py @@ -1,6 +1,9 @@ from miasm2.arch.x86.arch import mn_x86 from miasm2.expression.expression import get_rw from miasm2.arch.x86.ira import ir_a_x86_32 +from miasm2.core.asmblock import AsmSymbolPool + +symbol_pool = AsmSymbolPool() print """ @@ -11,17 +14,18 @@ Get read/written registers for a given instruction arch = mn_x86 ir_arch = ir_a_x86_32() -l = arch.fromstring('LODSB', 32) +l = arch.fromstring('LODSB', symbol_pool, 32) l.offset, l.l = 0, 15 ir_arch.add_instr(l) print '*' * 80 -for lbl, irblock in ir_arch.blocks.items(): +for lbl, irblock in ir_arch.blocks.iteritems(): print irblock for assignblk in irblock: - o_r, o_w = get_rw(assignblk) - print 'read: ', [str(x) for x in o_r] - print 'written:', [str(x) for x in o_w] - print + rw = assignblk.get_rw() + for dst, reads in rw.iteritems(): + print 'read: ', [str(x) for x in reads] + print 'written:', dst + print open('graph_instr.dot', 'w').write(ir_arch.graph.dot()) diff --git a/example/expression/solve_condition_stp.py b/example/expression/solve_condition_stp.py index 44b73043..201d9f26 100644 --- a/example/expression/solve_condition_stp.py +++ b/example/expression/solve_condition_stp.py @@ -6,7 +6,6 @@ from pdb import pm from miasm2.analysis.machine import Machine from miasm2.expression.expression import ExprInt, ExprCond, ExprId, \ get_expr_ids, ExprAff -from miasm2.arch.x86.arch import ParseAst from miasm2.core.bin_stream import bin_stream_str from miasm2.core import asmblock from miasm2.ir.symbexec import SymbolicExecutionEngine, get_block @@ -50,7 +49,6 @@ def emul_symb(ir_arch, mdis, states_todo, states_done): symbexec.dump(mems=False) assert addr is not None - if isinstance(addr, ExprCond): # Create 2 states, each including complementary conditions cond_group_a = {addr.cond: ExprInt(0, addr.cond.size)} @@ -67,15 +65,15 @@ def emul_symb(ir_arch, mdis, states_todo, states_done): addr_b = int(addr_b.arg) states_todo.add((addr_a, symbexec.symbols.copy(), tuple(list(conds) + cond_group_a.items()))) states_todo.add((addr_b, symbexec.symbols.copy(), tuple(list(conds) + cond_group_b.items()))) + elif addr == ret_addr: + print 'Return address reached' + continue elif isinstance(addr, ExprInt): addr = int(addr.arg) states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) elif asmblock.expr_is_label(addr): addr = addr.name states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) - elif addr == ret_addr: - print 'Return address reached' - continue else: raise ValueError("Unsupported destination") @@ -92,32 +90,6 @@ if __name__ == '__main__': symbols_init = dict(machine.mn.regs.regs_init) - # config parser for 32 bit - reg_and_id = dict(machine.mn.regs.all_regs_ids_byname) - - def my_ast_int2expr(name): - return ExprInt(name, 32) - - # Modifify parser to avoid label creation in PUSH argc - def my_ast_id2expr(string_parsed): - if string_parsed in reg_and_id: - return reg_and_id[string_parsed] - return ExprId(string_parsed, size=32) - - my_var_parser = ParseAst(my_ast_id2expr, my_ast_int2expr) - machine.base_expr.setParseAction(my_var_parser) - - argc = ExprId('argc', 32) - argv = ExprId('argv', 32) - ret_addr = ExprId('ret_addr', 32) - reg_and_id[argc.name] = argc - reg_and_id[argv.name] = argv - reg_and_id[ret_addr.name] = ret_addr - - my_symbols = [argc, argv, ret_addr] - my_symbols = dict([(x.name, x) for x in my_symbols]) - my_symbols.update(machine.mn.regs.all_regs_ids_byname) - ir_arch = machine.ir(mdis.symbol_pool) symbexec = SymbolicExecutionEngine(ir_arch, symbols_init) @@ -126,7 +98,17 @@ if __name__ == '__main__': PUSH argv PUSH argc PUSH ret_addr - ''') + ''', + symbol_pool=mdis.symbol_pool) + + + argc_lbl = symbol_pool.getby_name('argc') + argv_lbl = symbol_pool.getby_name('argv') + ret_addr_lbl = symbol_pool.getby_name('ret_addr') + + argc = ExprId(argc_lbl, 32) + argv = ExprId(argv_lbl, 32) + ret_addr = ExprId(ret_addr_lbl, 32) b = list(blocks)[0] diff --git a/example/ida/ctype_propagation.py b/example/ida/ctype_propagation.py index b2c7d5ab..9b9c2e95 100644 --- a/example/ida/ctype_propagation.py +++ b/example/ida/ctype_propagation.py @@ -1,6 +1,3 @@ -import os -import tempfile - import ida_kernwin import idc import ida_funcs @@ -8,17 +5,14 @@ import ida_funcs from miasm2.core.bin_stream_ida import bin_stream_ida from miasm2.expression import expression as m2_expr from miasm2.expression.simplifications import expr_simp -from miasm2.analysis.depgraph import DependencyGraph from miasm2.ir.ir import IRBlock, AssignBlock from miasm2.arch.x86.ctype import CTypeAMD64_unk, CTypeX86_unk from miasm2.arch.msp430.ctype import CTypeMSP430_unk -from miasm2.expression.expression import ExprId from miasm2.core.objc import CTypesManagerNotPacked, ExprToAccessC, CHandler from miasm2.core.ctypesmngr import CAstTypes -from miasm2.expression.expression import ExprMem, ExprId, ExprInt, ExprOp, ExprAff +from miasm2.expression.expression import ExprId, ExprInt, ExprOp, ExprAff from miasm2.ir.symbexec_types import SymbExecCType from miasm2.expression.parser import str_to_expr -from miasm2.ir.symbexec import SymbolicExecutionEngine, SymbolicState from miasm2.analysis.cst_propag import add_state, propagate_cst_expr from utils import guess_machine @@ -32,27 +26,96 @@ class TypePropagationForm(ida_kernwin.Form): default_types_info = r"""ExprId("RDX", 64): char *""" archs = ["AMD64_unk", "X86_32_unk", "msp430_unk"] + func = ida_funcs.get_func(idc.ScreenEA()) + func_addr = func.startEA + + start_addr = idc.SelStart() + if start_addr == idc.BADADDR: + start_addr = idc.ScreenEA() + end_addr = idc.SelEnd() + ida_kernwin.Form.__init__(self, r"""BUTTON YES* Launch BUTTON CANCEL NONE -Dependency Graph Settings -<##Header file :{headerFile}> -<Architecture/complator:{arch}> -<Types informations:{strTypesInfo}> +Type Propagation Settings + +{FormChangeCb} +Analysis scope: +<Whole function:{rFunction}> +<From an address to the end of function:{rAddr}> +<Between two addresses:{r2Addr}>{cScope}> + +<Target function:{functionAddr}> +<Start address :{startAddr}> +<End address :{endAddr}> + +<Architecture/compilator :{arch}> + +<##Header file :{headerFile}> +<Use a file for type informations:{rTypeFile}>{cTypeFile}> +<##Types informations :{typeFile}> +<Types informations :{strTypesInfo}> + <Unalias stack:{rUnaliasStack}>{cUnalias}> """, { - 'headerFile': ida_kernwin.Form.FileInput(swidth=20, open=True), + 'FormChangeCb': ida_kernwin.Form.FormChangeCb(self.OnFormChange), + 'cScope': ida_kernwin.Form.RadGroupControl( + ("rFunction", "rAddr", "r2Addr")), + 'functionAddr': ida_kernwin.Form.NumericInput( + tp=ida_kernwin.Form.FT_RAWHEX, + value=func_addr), + 'startAddr': ida_kernwin.Form.NumericInput( + tp=ida_kernwin.Form.FT_RAWHEX, + value=start_addr), + 'endAddr': ida_kernwin.Form.NumericInput( + tp=ida_kernwin.Form.FT_RAWHEX, + value=end_addr), 'arch': ida_kernwin.Form.DropdownListControl( items=archs, readonly=False, selval=archs[0]), + 'headerFile': ida_kernwin.Form.FileInput(swidth=20, open=True), + 'cTypeFile': ida_kernwin.Form.ChkGroupControl(("rTypeFile",)), + 'typeFile': ida_kernwin.Form.FileInput(swidth=20, open=True), 'strTypesInfo': ida_kernwin.Form.MultiLineTextControl(text=default_types_info, flags=ida_kernwin.Form.MultiLineTextControl.TXTF_FIXEDFONT), 'cUnalias': ida_kernwin.Form.ChkGroupControl(("rUnaliasStack",)), }) form, args = self.Compile() form.rUnaliasStack.checked = True - + form.rTypeFile.checked = True + + def OnFormChange(self, fid): + if fid == -1: # INIT + self.EnableField(self.functionAddr, True) + self.EnableField(self.startAddr, False) + self.EnableField(self.endAddr, False) + self.EnableField(self.strTypesInfo, False) + self.EnableField(self.typeFile, True) + elif fid == self.cTypeFile.id: + if self.GetControlValue(self.cTypeFile) == 0: + self.EnableField(self.strTypesInfo, True) + self.EnableField(self.typeFile, False) + elif self.GetControlValue(self.cTypeFile) == 1: + self.EnableField(self.strTypesInfo, False) + self.EnableField(self.typeFile, True) + elif fid == self.cScope.id: + # "Whole function" scope + if self.GetControlValue(self.cScope) == 0: + self.EnableField(self.functionAddr, True) + self.EnableField(self.startAddr, False) + self.EnableField(self.endAddr, False) + # "From an address" scope + elif self.GetControlValue(self.cScope) == 1: + self.EnableField(self.functionAddr, False) + self.EnableField(self.startAddr, True) + self.EnableField(self.endAddr, False) + # "Between two addresses" scope + elif self.GetControlValue(self.cScope) == 2: + self.EnableField(self.functionAddr, False) + self.EnableField(self.startAddr, True) + self.EnableField(self.endAddr, True) + return 1 def get_types_mngr(headerFile, arch): text = open(headerFile).read() @@ -193,21 +256,25 @@ def analyse_function(): ir_arch = iraCallStackFixer(mdis.symbol_pool) - # Get the current function - func = ida_funcs.get_func(idc.ScreenEA()) - addr = func.startEA - blocks = mdis.dis_multiblock(addr) - # Generate IR - for block in blocks: - ir_arch.add_block(block) - - # Get settings settings = TypePropagationForm(ir_arch) ret = settings.Execute() if not ret: return + if settings.cScope.value == 0: + addr = settings.functionAddr.value + else: + addr = settings.startAddr.value + if settings.cScope.value == 2: + end = settings.endAddr + mdis.dont_dis = [end] + + blocks = mdis.dis_multiblock(addr) + # Generate IR + for block in blocks: + ir_arch.add_block(block) + cst_propag_link = {} if settings.cUnalias.value: init_infos = {ir_arch.sp: ir_arch.arch.regs.regs_init[ir_arch.sp] } @@ -217,7 +284,14 @@ def analyse_function(): types_mngr = get_types_mngr(settings.headerFile.value, settings.arch.value) mychandler = MyCHandler(types_mngr, {}) infos_types = {} - for line in settings.strTypesInfo.value.split('\n'): + infos_types_raw = [] + + if settings.cTypeFile.value: + infos_types_raw = open(settings.typeFile.value).read().split('\n') + else: + infos_types_raw = settings.strTypesInfo.value.split('\n') + + for line in infos_types_raw: if not line: continue expr_str, ctype_str = line.split(':') diff --git a/example/ida/graph_ir.py b/example/ida/graph_ir.py index 7e303aac..6dfa1f7d 100644 --- a/example/ida/graph_ir.py +++ b/example/ida/graph_ir.py @@ -121,6 +121,10 @@ def build_graph(verbose=False, simplify=False): # print hex(ad), repr(name) if name is None: continue + if (mdis.symbol_pool.getby_offset(addr) or + mdis.symbol_pool.getby_name(name)): + # Symbol alias + continue mdis.symbol_pool.add_label(name, addr) if verbose: diff --git a/example/symbol_exec/single_instr.py b/example/symbol_exec/single_instr.py index e5637ad8..22a48fc6 100644 --- a/example/symbol_exec/single_instr.py +++ b/example/symbol_exec/single_instr.py @@ -2,18 +2,22 @@ from miasm2.core.bin_stream import bin_stream_str from miasm2.ir.symbexec import SymbolicExecutionEngine from miasm2.analysis.machine import Machine +from miasm2.core.asmblock import AsmSymbolPool START_ADDR = 0 machine = Machine("x86_32") +symbol_pool = AsmSymbolPool() + + # Assemble and disassemble a MOV ## Ensure that attributes 'offset' and 'l' are set -line = machine.mn.fromstring("MOV EAX, EBX", 32) +line = machine.mn.fromstring("MOV EAX, EBX", symbol_pool, 32) asm = machine.mn.asm(line)[0] # Get back block bin_stream = bin_stream_str(asm) -mdis = machine.dis_engine(bin_stream) +mdis = machine.dis_engine(bin_stream, symbol_pool=symbol_pool) mdis.lines_wd = 1 asm_block = mdis.dis_block(START_ADDR) diff --git a/miasm2/analysis/data_analysis.py b/miasm2/analysis/data_analysis.py index 130d45a4..bceb0bd8 100644 --- a/miasm2/analysis/data_analysis.py +++ b/miasm2/analysis/data_analysis.py @@ -16,10 +16,6 @@ def intra_block_flow_raw(ir_arch, flow_graph, irb, in_nodes, out_nodes): current_nodes = {} for i, assignblk in enumerate(irb): dict_rw = assignblk.get_rw(cst_read=True) - if irb.label.offset == 0x13: - print irb.label - print i - print dict_rw current_nodes.update(out_nodes) # gen mem arg to mem node links diff --git a/miasm2/analysis/dse.py b/miasm2/analysis/dse.py index 0a5445a6..427a8bd0 100644 --- a/miasm2/analysis/dse.py +++ b/miasm2/analysis/dse.py @@ -192,7 +192,25 @@ class DSEEngine(object): def attach(self, emulator): """Attach the DSE to @emulator - @emulator: jitload (or API equivalent) instance""" + @emulator: jitload (or API equivalent) instance + + To attach *DURING A BREAKPOINT*, one may consider using the following snippet: + + def breakpoint(self, jitter): + ... + dse.attach(jitter) + dse.update... + ... + # Additionnal call to the exec callback is necessary, as breakpoints are + # honored AFTER exec callback + jitter.exec_cb(jitter) + + return True + + Without it, one may encounteer a DriftException error due to a + "desynchronization" between jitter and dse states. Indeed, on 'handle' + call, the jitter must be one instruction AFTER the dse. + """ self.jitter = emulator self.prepare() diff --git a/miasm2/analysis/machine.py b/miasm2/analysis/machine.py index f361b412..73b90aef 100644 --- a/miasm2/analysis/machine.py +++ b/miasm2/analysis/machine.py @@ -79,6 +79,11 @@ class Machine(object): mn = arch.mn_armt from miasm2.arch.arm.ira import ir_a_armtl as ira from miasm2.arch.arm.sem import ir_armtl as ir + try: + from miasm2.arch.arm import jit + jitter = jit.jitter_armtl + except ImportError: + pass elif machine_name == "armtb": from miasm2.arch.arm.disasm import dis_armtb as dis_engine from miasm2.arch.arm import arch diff --git a/miasm2/analysis/sandbox.py b/miasm2/analysis/sandbox.py index 8ffdb4ac..f29d1776 100644 --- a/miasm2/analysis/sandbox.py +++ b/miasm2/analysis/sandbox.py @@ -415,6 +415,34 @@ class Arch_armb(Arch): self.jitter.init_stack() +class Arch_armtl(Arch): + _ARCH_ = "armtl" + STACK_SIZE = 0x100000 + STACK_BASE = 0x100000 + + def __init__(self, **kwargs): + super(Arch_armtl, self).__init__(**kwargs) + + # Init stack + self.jitter.stack_size = self.STACK_SIZE + self.jitter.stack_base = self.STACK_BASE + self.jitter.init_stack() + + +class Arch_mips32b(Arch): + _ARCH_ = "mips32b" + STACK_SIZE = 0x100000 + STACK_BASE = 0x100000 + + def __init__(self, **kwargs): + super(Arch_mips32b, self).__init__(**kwargs) + + # Init stack + self.jitter.stack_size = self.STACK_SIZE + self.jitter.stack_base = self.STACK_BASE + self.jitter.init_stack() + + class Arch_aarch64l(Arch): _ARCH_ = "aarch64l" STACK_SIZE = 0x100000 @@ -647,6 +675,9 @@ class Sandbox_Linux_arml(Sandbox, Arch_arml, OS_Linux): self.jitter.vm.set_mem(ptr, arg) argv_ptrs.append(ptr) + # Round SP to 4 + self.jitter.cpu.SP = self.jitter.cpu.SP & ~ 3 + self.jitter.push_uint32_t(0) for ptr in reversed(env_ptrs): self.jitter.push_uint32_t(ptr) @@ -675,6 +706,110 @@ class Sandbox_Linux_arml(Sandbox, Arch_arml, OS_Linux): super(self.__class__, self).call(prepare_cb, addr, *args) +class Sandbox_Linux_armtl(Sandbox, Arch_armtl, OS_Linux): + + def __init__(self, *args, **kwargs): + Sandbox.__init__(self, *args, **kwargs) + + # Pre-stack some arguments + if self.options.mimic_env: + env_ptrs = [] + for env in self.envp: + env += "\x00" + self.jitter.cpu.SP -= len(env) + ptr = self.jitter.cpu.SP + self.jitter.vm.set_mem(ptr, env) + env_ptrs.append(ptr) + argv_ptrs = [] + for arg in self.argv: + arg += "\x00" + self.jitter.cpu.SP -= len(arg) + ptr = self.jitter.cpu.SP + self.jitter.vm.set_mem(ptr, arg) + argv_ptrs.append(ptr) + + # Round SP to 4 + self.jitter.cpu.SP = self.jitter.cpu.SP & ~ 3 + + self.jitter.push_uint32_t(0) + for ptr in reversed(env_ptrs): + self.jitter.push_uint32_t(ptr) + self.jitter.push_uint32_t(0) + for ptr in reversed(argv_ptrs): + self.jitter.push_uint32_t(ptr) + self.jitter.push_uint32_t(len(self.argv)) + + self.jitter.cpu.LR = self.CALL_FINISH_ADDR + + # Set the runtime guard + self.jitter.add_breakpoint(self.CALL_FINISH_ADDR, self.__class__.code_sentinelle) + + def run(self, addr=None): + if addr is None and self.options.address is None: + addr = self.entry_point + super(Sandbox_Linux_armtl, self).run(addr) + + def call(self, addr, *args, **kwargs): + """ + Direct call of the function at @addr, with arguments @args + @addr: address of the target function + @args: arguments + """ + prepare_cb = kwargs.pop('prepare_cb', self.jitter.func_prepare_systemv) + super(self.__class__, self).call(prepare_cb, addr, *args) + + + +class Sandbox_Linux_mips32b(Sandbox, Arch_mips32b, OS_Linux): + + def __init__(self, *args, **kwargs): + Sandbox.__init__(self, *args, **kwargs) + + # Pre-stack some arguments + if self.options.mimic_env: + env_ptrs = [] + for env in self.envp: + env += "\x00" + self.jitter.cpu.SP -= len(env) + ptr = self.jitter.cpu.SP + self.jitter.vm.set_mem(ptr, env) + env_ptrs.append(ptr) + argv_ptrs = [] + for arg in self.argv: + arg += "\x00" + self.jitter.cpu.SP -= len(arg) + ptr = self.jitter.cpu.SP + self.jitter.vm.set_mem(ptr, arg) + argv_ptrs.append(ptr) + + self.jitter.push_uint32_t(0) + for ptr in reversed(env_ptrs): + self.jitter.push_uint32_t(ptr) + self.jitter.push_uint32_t(0) + for ptr in reversed(argv_ptrs): + self.jitter.push_uint32_t(ptr) + self.jitter.push_uint32_t(len(self.argv)) + + self.jitter.cpu.RA = 0x1337beef + + # Set the runtime guard + self.jitter.add_breakpoint(0x1337beef, self.__class__.code_sentinelle) + + def run(self, addr=None): + if addr is None and self.options.address is None: + addr = self.entry_point + super(Sandbox_Linux_mips32b, self).run(addr) + + def call(self, addr, *args, **kwargs): + """ + Direct call of the function at @addr, with arguments @args + @addr: address of the target function + @args: arguments + """ + prepare_cb = kwargs.pop('prepare_cb', self.jitter.func_prepare_systemv) + super(self.__class__, self).call(prepare_cb, addr, *args) + + class Sandbox_Linux_armb_str(Sandbox, Arch_armb, OS_Linux_str): def __init__(self, *args, **kwargs): diff --git a/miasm2/arch/aarch64/arch.py b/miasm2/arch/aarch64/arch.py index 2712e60a..94be74fd 100644 --- a/miasm2/arch/aarch64/arch.py +++ b/miasm2/arch/aarch64/arch.py @@ -1,6 +1,7 @@ #-*- coding:utf-8 -*- import logging +import math from pyparsing import * from miasm2.expression import expression as m2_expr from miasm2.core.cpu import * @@ -11,7 +12,7 @@ from regs import * from miasm2.core.asmblock import AsmLabel from miasm2.core.cpu import log as log_cpu from miasm2.expression.modint import uint32, uint64, mod_size2int -import math +from miasm2.core.asm_ast import AstInt, AstId, AstMem, AstOp log = logging.getLogger("aarch64dis") console_handler = logging.StreamHandler() @@ -68,43 +69,6 @@ replace_regs = { } -variable, operand, base_expr = gen_base_expr() -_, _, base_expr32 = gen_base_expr() -_, _, base_expr64 = gen_base_expr() - - -def ast_id2expr32(t): - if t in mn_aarch64.regs.all_regs_ids_byname: - t = mn_aarch64.regs.all_regs_ids_byname[t] - if not t.size == 32: - raise StopIteration - return t - -def ast_int2expr32(a): - return m2_expr.ExprInt(a, 32) - - -def ast_id2expr64(t): - if t in mn_aarch64.regs.all_regs_ids_byname: - t = mn_aarch64.regs.all_regs_ids_byname[t] - if not t.size == 64: - raise StopIteration - return t - - -def ast_int2expr64(a): - return m2_expr.ExprInt(a, 64) - -my_var_parser32 = ParseAst(ast_id2expr32, ast_int2expr32, default_size=32) -my_var_parser64 = ParseAst(ast_id2expr64, ast_int2expr64, default_size=64) - -base_expr32.setParseAction(my_var_parser32) -base_expr64.setParseAction(my_var_parser64) - - -int_or_expr = base_expr -int_or_expr32 = base_expr32 -int_or_expr64 = base_expr64 shift2expr_dct = {'LSL': '<<', 'LSR': '>>', 'ASR': 'a>>', 'ROR': '>>>'} @@ -112,121 +76,90 @@ shift_str = ["LSL", "LSR", "ASR", "ROR"] shift_expr = ["<<", ">>", "a>>", '>>>'] -def op_shift2expr(s, l, t): - return shift2expr_dct[t[0]] - +def cb_shift(tokens): + return shift2expr_dct[tokens[0]] -def op_shift2expr_slice_at(s, l, t): - return "slice_at" +def cb_extreg(tokens): + return tokens[0] -def op_ext_reg(s, l, t): - return t[0] - -def shift2expr(t): - if len(t) == 1: - return t[0] - elif len(t) == 3: - if t[0].size == 32 and isinstance(t[2], m2_expr.ExprInt): - t[2] = m2_expr.ExprInt(int(t[2]), 32) - return m2_expr.ExprOp(t[1], t[0], t[2]) +def cb_shiftreg(tokens): + if len(tokens) == 1: + return tokens[0] + elif len(tokens) == 3: + result = AstOp(tokens[1], tokens[0], tokens[2]) + return result else: raise ValueError('bad string') -def shift2expr_sc(t): - if len(t) == 1: - return t[0] - elif len(t) == 3: - if t[0].size == 32 and isinstance(t[2], m2_expr.ExprInt): - t[2] = m2_expr.ExprInt(t[2].arg, 32) - if t[1] != '<<': +def cb_shift_sc(tokens): + if len(tokens) == 1: + return tokens[0] + elif len(tokens) == 3: + if tokens[1] != '<<': raise ValueError('bad op') - return m2_expr.ExprOp("slice_at", t[0], t[2]) + result = AstOp("slice_at", tokens[0], tokens[2]) + return result else: raise ValueError('bad string') -def extend2expr(t): - if len(t) == 1: - return t[0] - return m2_expr.ExprOp(t[1], t[0], t[2]) - +def cb_extend(tokens): + if len(tokens) == 1: + return tokens[0] + result = AstOp(tokens[1], tokens[0], tokens[2]) + return result -def shiftext2expr(t): - if len(t) == 1: - return t[0] - else: - return m2_expr.ExprOp(t[1], t[0], t[2]) -def expr_deref_pc_off(t): - t = t[0] - if len(t) == 2 and t[0] == "PC": - return ExprOp('preinc', PC, t[1]) +def cb_deref_pc_off(tokens): + if len(tokens) == 2 and tokens[0] == "PC": + result = AstOp('preinc', AstId(ExprId('PC', 64)), tokens[1]) + return result raise ValueError('bad string') -def expr_deref_pc_nooff(t): - t = t[0] - if len(t) == 1 and t[0] == "PC": - return ExprOp('preinc', PC) +def cb_deref_pc_nooff(tokens): + if len(tokens) == 1 and tokens[0] == "PC": + result = AstOp('preinc', AstId(PC)) + return result raise ValueError('bad string') -all_binaryop_lsl_t = literal_list( - shift_str).setParseAction(op_shift2expr) +all_binaryop_lsl_t = literal_list(shift_str).setParseAction(cb_shift) -all_binaryop_shiftleft_t = literal_list( - ["LSL"]).setParseAction(op_shift2expr) +all_binaryop_shiftleft_t = literal_list(["LSL"]).setParseAction(cb_shift) extend_lst = ['UXTB', 'UXTH', 'UXTW', 'UXTX', 'SXTB', 'SXTH', 'SXTW', 'SXTX'] extend2_lst = ['UXTW', 'LSL', 'SXTW', 'SXTX'] -all_extend_t = literal_list(extend_lst).setParseAction(op_ext_reg) -all_extend2_t = literal_list(extend2_lst).setParseAction(op_ext_reg) +all_extend_t = literal_list(extend_lst).setParseAction(cb_extreg) +all_extend2_t = literal_list(extend2_lst).setParseAction(cb_extreg) -gpregz32_extend = (gpregsz32_info.parser + Optional( - all_extend_t + int_or_expr32)).setParseAction(extend2expr) -gpregz64_extend = (gpregsz64_info.parser + Optional( - all_extend_t + int_or_expr64)).setParseAction(extend2expr) +gpregz32_extend = (gpregsz32_info.parser + Optional(all_extend_t + base_expr)).setParseAction(cb_extend) +gpregz64_extend = (gpregsz64_info.parser + Optional(all_extend_t + base_expr)).setParseAction(cb_extend) -shift32_off = (gpregsz32_info.parser + Optional(all_binaryop_lsl_t + - (gpregs32_info.parser | int_or_expr))).setParseAction(shift2expr) -shift64_off = (gpregsz64_info.parser + Optional(all_binaryop_lsl_t + - (gpregs64_info.parser | int_or_expr))).setParseAction(shift2expr) +shift32_off = (gpregsz32_info.parser + Optional(all_binaryop_lsl_t + base_expr)).setParseAction(cb_shiftreg) +shift64_off = (gpregsz64_info.parser + Optional(all_binaryop_lsl_t + base_expr)).setParseAction(cb_shiftreg) -shiftimm_imm_sc = (int_or_expr + all_binaryop_shiftleft_t + - int_or_expr).setParseAction(shift2expr_sc) +shiftimm_imm_sc = (base_expr + all_binaryop_shiftleft_t + base_expr).setParseAction(cb_shift_sc) -shiftimm_off_sc = shiftimm_imm_sc | int_or_expr +shiftimm_off_sc = shiftimm_imm_sc | base_expr shift_off = (shift32_off | shift64_off) reg_ext_off = (gpregz32_extend | gpregz64_extend) gpregs_32_64 = (gpregs32_info.parser | gpregs64_info.parser) -gpregsz_32_64 = (gpregsz32_info.parser | gpregsz64_info.parser | int_or_expr) +gpregsz_32_64 = (gpregsz32_info.parser | gpregsz64_info.parser | base_expr) -simdregs = (simd08_info.parser | simd16_info.parser | - simd32_info.parser | simd64_info.parser) +simdregs = (simd08_info.parser | simd16_info.parser | simd32_info.parser | simd64_info.parser) simdregs_h = (simd32_info.parser | simd64_info.parser | simd128_info.parser) -simdregs_h_zero = (simd32_info.parser | - simd64_info.parser | simd128_info.parser | int_or_expr) - - -def ast_id2expr(t): - if not t in mn_aarch64.regs.all_regs_ids_byname: - r = m2_expr.ExprId(AsmLabel(t), 32) - else: - r = mn_aarch64.regs.all_regs_ids_byname[t] - return r - +simdregs_h_zero = (simd32_info.parser | simd64_info.parser | simd128_info.parser | base_expr) -def ast_int2expr(a): - return m2_expr.ExprInt(a, 64) gpregs_info = {32: gpregs32_info, 64: gpregs64_info} @@ -241,72 +174,66 @@ simds_info = {8: simd08_info, 128: simd128_info} -my_var_parser = ParseAst(ast_id2expr, ast_int2expr) -base_expr.setParseAction(my_var_parser) - -def deref2expr_nooff(t): - t = t[0] +def cb_deref_nooff(t): # XXX default - return m2_expr.ExprOp("preinc", t[0], m2_expr.ExprInt(0, 64)) + result = AstOp("preinc", t[0], AstInt(0)) + return result -def deref2expr_post(t): - t = t[0] - if t[1] in regs_module.all_regs_ids: +def cb_deref_post(t): + assert len(t) == 2 + if isinstance(t[1], AstId) and isinstance(t[1].name, ExprId): raise StopIteration - return m2_expr.ExprOp("postinc", t[0], t[1]) + result = AstOp("postinc", *t) + return result -def deref2expr_pre(t): - t = t[0] - if t[1] in regs_module.all_regs_ids: +def cb_deref_pre(t): + assert len(t) == 2 + if isinstance(t[1], AstId) and isinstance(t[1].name, ExprId): raise StopIteration - return m2_expr.ExprOp("preinc", t[0], t[1]) + result = AstOp("preinc", *t) + return result -def deref2expr_pre_wb(t): - t = t[0] - if t[1] in regs_module.all_regs_ids: +def cb_deref_pre_wb(t): + assert len(t) == 2 + if isinstance(t[1], AstId) and isinstance(t[1].name, ExprId): raise StopIteration - return m2_expr.ExprOp("preinc_wb", t[0], t[1]) + result = AstOp("preinc_wb", *t) + return result + LBRACK = Suppress("[") RBRACK = Suppress("]") COMMA = Suppress(",") POSTINC = Suppress("!") -deref_nooff = Group( - LBRACK + gpregs64_info.parser + RBRACK).setParseAction(deref2expr_nooff) -deref_off_post = Group(LBRACK + gpregs64_info.parser + - RBRACK + COMMA + int_or_expr64).setParseAction(deref2expr_post) -deref_off_pre = Group(LBRACK + gpregs64_info.parser + - COMMA + int_or_expr64 + RBRACK).setParseAction(deref2expr_pre) -deref_off_pre_wb = Group(LBRACK + gpregs64_info.parser + COMMA + - int_or_expr64 + RBRACK + POSTINC).setParseAction(deref2expr_pre_wb) +deref_nooff = (LBRACK + gpregs64_info.parser + RBRACK).setParseAction(cb_deref_nooff) +deref_off_post = (LBRACK + gpregs64_info.parser + RBRACK + COMMA + base_expr).setParseAction(cb_deref_post) +deref_off_pre = (LBRACK + gpregs64_info.parser + COMMA + base_expr + RBRACK).setParseAction(cb_deref_pre) +deref_off_pre_wb = (LBRACK + gpregs64_info.parser + COMMA + base_expr + RBRACK + POSTINC).setParseAction(cb_deref_pre_wb) deref = (deref_off_post | deref_off_pre_wb | deref_off_pre | deref_nooff) -deref_pc_off = Group(LBRACK + Literal("PC") + COMMA + int_or_expr64 + RBRACK).setParseAction(expr_deref_pc_off) -deref_pc_nooff = Group(LBRACK + Literal("PC") + RBRACK).setParseAction(expr_deref_pc_nooff) +deref_pc_off = (LBRACK + Literal("PC") + COMMA + base_expr + RBRACK).setParseAction(cb_deref_pc_off) +deref_pc_nooff = (LBRACK + Literal("PC") + RBRACK).setParseAction(cb_deref_pc_nooff) deref_pc = (deref_pc_off | deref_pc_nooff) -def deref_ext2op(t): - t = t[0] +def cb_deref_ext2op(t): if len(t) == 4: - expr = set_imm_to_size(t[1].size, t[3]) - if expr is None: - raise StopIteration - return m2_expr.ExprOp('segm', t[0], m2_expr.ExprOp(t[2], t[1], expr)) + result = AstOp('segm', t[0], AstOp(t[2], t[1], t[3])) + return result elif len(t) == 2: - return m2_expr.ExprOp('segm', t[0], t[1]) + result = AstOp('segm', *t) + return result raise ValueError("cad deref") -deref_ext2 = Group(LBRACK + gpregs_32_64 + COMMA + gpregs_32_64 + - Optional(all_extend2_t + int_or_expr) + RBRACK).setParseAction(deref_ext2op) +deref_ext2 = (LBRACK + gpregs_32_64 + COMMA + gpregs_32_64 + Optional(all_extend2_t + base_expr) + RBRACK).setParseAction(cb_deref_ext2op) class additional_info: @@ -335,6 +262,47 @@ conds_expr, _, conds_info = gen_regs(CONDS, {}) conds_inv_expr, _, conds_inv_info = gen_regs(CONDS_INV, {}) + +class aarch64_arg(m_arg): + def asm_ast_to_expr(self, value, symbol_pool, size_hint=None, fixed_size=None): + if size_hint is None: + size_hint = 64 + if fixed_size is None: + fixed_size = set() + if isinstance(value, AstId): + if value.name in all_regs_ids_byname: + reg = all_regs_ids_byname[value.name] + fixed_size.add(reg.size) + return reg + if isinstance(value.name, ExprId): + fixed_size.add(value.name.size) + return value.name + label = symbol_pool.getby_name_create(value.name) + return ExprId(label, size_hint) + if isinstance(value, AstInt): + assert size_hint is not None + return ExprInt(value.value, size_hint) + if isinstance(value, AstOp): + if value.op == "segm": + segm = self.asm_ast_to_expr(value.args[0], symbol_pool) + ptr = self.asm_ast_to_expr(value.args[1], symbol_pool, None, fixed_size) + return ExprOp('segm', segm, ptr) + + args = [self.asm_ast_to_expr(arg, symbol_pool, None, fixed_size) for arg in value.args] + if len(fixed_size) == 0: + # No fixed size + pass + elif len(fixed_size) == 1: + # One fixed size, regen all + size = list(fixed_size)[0] + args = [self.asm_ast_to_expr(arg, symbol_pool, size, fixed_size) for arg in value.args] + else: + raise ValueError("Size conflict") + + return ExprOp(value.op, *args) + return None + + class instruction_aarch64(instruction): __slots__ = [] delayslot = 0 @@ -567,7 +535,7 @@ class aarch64_gpreg_noarg(reg_noarg): return True -class aarch64_simdreg(reg_noarg, m_arg): +class aarch64_simdreg(reg_noarg, aarch64_arg): parser = simdregs simd_size = [8, 16, 32, 64] @@ -619,7 +587,7 @@ class aarch64_simdreg_32_64_zero(aarch64_simdreg_32_64): return super(aarch64_simdreg_32_64_zero, self).encode() -class aarch64_gpreg_isf(reg_noarg, m_arg): +class aarch64_gpreg_isf(reg_noarg, aarch64_arg): parser = gpregs_32_64 def decode(self, v): @@ -635,7 +603,7 @@ class aarch64_gpreg_isf(reg_noarg, m_arg): return True -class aarch64_gpreg(aarch64_gpreg_noarg, m_arg): +class aarch64_gpreg(aarch64_gpreg_noarg, aarch64_arg): pass @@ -651,12 +619,12 @@ class aarch64_gpreg_n1(aarch64_gpreg): return self.value != 0b11111 -class aarch64_gpregz(aarch64_gpreg_noarg, m_arg): +class aarch64_gpregz(aarch64_gpreg_noarg, aarch64_arg): parser = gpregsz_32_64 gpregs_info = gpregsz_info -class aarch64_gpreg0(bsi, m_arg): +class aarch64_gpreg0(bsi, aarch64_arg): parser = gpregsz_32_64 gpregs_info = gpregsz_info @@ -684,7 +652,7 @@ class aarch64_gpreg0(bsi, m_arg): return True -class aarch64_crreg(reg_noarg, m_arg): +class aarch64_crreg(reg_noarg, aarch64_arg): reg_info = cr_info parser = reg_info.parser @@ -702,7 +670,7 @@ class aarch64_gpreg32_noarg(reg_noarg): parser = reg_info.parser -class aarch64_gpreg32(aarch64_gpreg32_noarg, m_arg): +class aarch64_gpreg32(aarch64_gpreg32_noarg, aarch64_arg): reg_info = gpregs32_info parser = reg_info.parser @@ -712,7 +680,7 @@ class aarch64_gpreg64_noarg(reg_noarg): parser = reg_info.parser -class aarch64_gpreg64(reg_noarg, m_arg): +class aarch64_gpreg64(reg_noarg, aarch64_arg): reg_info = gpregs64_info parser = reg_info.parser @@ -722,7 +690,7 @@ class aarch64_gpregz32_noarg(reg_noarg): parser = reg_info.parser -class aarch64_gpregz32(aarch64_gpreg32_noarg, m_arg): +class aarch64_gpregz32(aarch64_gpreg32_noarg, aarch64_arg): reg_info = gpregsz32_info parser = reg_info.parser @@ -732,7 +700,7 @@ class aarch64_gpregz64_noarg(reg_noarg): parser = reg_info.parser -class aarch64_gpregz64(reg_noarg, m_arg): +class aarch64_gpregz64(reg_noarg, aarch64_arg): reg_info = gpregsz64_info parser = reg_info.parser @@ -742,7 +710,7 @@ class aarch64_simd08_noarg(reg_noarg): parser = reg_info.parser -class aarch64_simd08(aarch64_simd08_noarg, m_arg): +class aarch64_simd08(aarch64_simd08_noarg, aarch64_arg): reg_info = simd08_info parser = reg_info.parser @@ -752,7 +720,7 @@ class aarch64_simd16_noarg(reg_noarg): parser = reg_info.parser -class aarch64_simd16(aarch64_simd16_noarg, m_arg): +class aarch64_simd16(aarch64_simd16_noarg, aarch64_arg): reg_info = simd16_info parser = reg_info.parser @@ -762,7 +730,7 @@ class aarch64_simd32_noarg(reg_noarg): parser = reg_info.parser -class aarch64_simd32(aarch64_simd32_noarg, m_arg): +class aarch64_simd32(aarch64_simd32_noarg, aarch64_arg): reg_info = simd32_info parser = reg_info.parser @@ -772,7 +740,7 @@ class aarch64_simd64_noarg(reg_noarg): parser = reg_info.parser -class aarch64_simd64(aarch64_simd64_noarg, m_arg): +class aarch64_simd64(aarch64_simd64_noarg, aarch64_arg): reg_info = simd64_info parser = reg_info.parser @@ -782,12 +750,12 @@ class aarch64_simd128_noarg(reg_noarg): parser = reg_info.parser -class aarch64_simd128(aarch64_simd128_noarg, m_arg): +class aarch64_simd128(aarch64_simd128_noarg, aarch64_arg): reg_info = simd128_info parser = reg_info.parser -class aarch64_imm_32(imm_noarg, m_arg): +class aarch64_imm_32(imm_noarg, aarch64_arg): parser = base_expr @@ -810,7 +778,7 @@ class aarch64_uint64_noarg(imm_noarg): int2expr = lambda self, x: m2_expr.ExprInt(x, 64) -class aarch64_uint64(aarch64_uint64_noarg, m_arg): +class aarch64_uint64(aarch64_uint64_noarg, aarch64_arg): parser = base_expr @@ -829,8 +797,8 @@ def set_imm_to_size(size, expr): class aarch64_imm_sf(imm_noarg): parser = base_expr - def fromstring(self, s, parser_result=None): - start, stop = super(aarch64_imm_sf, self).fromstring(s, parser_result) + def fromstring(self, text, symbol_pool, parser_result=None): + start, stop = super(aarch64_imm_sf, self).fromstring(text, symbol_pool, parser_result) if start is None: return start, stop size = self.parent.args[0].expr.size @@ -861,7 +829,7 @@ class aarch64_imm_sf(imm_noarg): return True -class aarch64_imm_sft(aarch64_imm_sf, m_arg): +class aarch64_imm_sft(aarch64_imm_sf, aarch64_arg): def encode(self): if not isinstance(self.expr, m2_expr.ExprInt): @@ -895,7 +863,7 @@ OPTION2SIZE = [32, 32, 32, 64, 32, 32, 32, 64] -class aarch64_gpreg_ext(reg_noarg, m_arg): +class aarch64_gpreg_ext(reg_noarg, aarch64_arg): parser = reg_ext_off def encode(self): @@ -934,7 +902,7 @@ EXT2_OP = {0b010: 'UXTW', EXT2_OP_INV = dict([(items[1], items[0]) for items in EXT2_OP.items()]) -class aarch64_gpreg_ext2(reg_noarg, m_arg): +class aarch64_gpreg_ext2(reg_noarg, aarch64_arg): parser = deref_ext2 def get_size(self): @@ -946,6 +914,12 @@ class aarch64_gpreg_ext2(reg_noarg, m_arg): if len(self.expr.args) != 2: return False arg0, arg1 = self.expr.args + if (self.expr.is_op("preinc") and arg0.is_id() and arg1.is_id()): + self.parent.shift.value = 0 + self.parent.rn.value = self.parent.rn.reg_info.expr.index(arg0) + self.value = gpregs_info[arg1.size].expr.index(arg1) + self.parent.option.value = 0b011 + return True if not (isinstance(self.expr, m2_expr.ExprOp) and self.expr.op == 'segm'): return False if not arg0 in self.parent.rn.reg_info.expr: @@ -1022,7 +996,7 @@ def test_set_sf(parent, size): return psize == size -class aarch64_gpreg_sftimm(reg_noarg, m_arg): +class aarch64_gpreg_sftimm(reg_noarg, aarch64_arg): reg_info = gpregsz_info parser = shift_off @@ -1266,7 +1240,7 @@ def EncodeBitMasks(wmask): return immr, imms, immn -class aarch64_imm_nsr(aarch64_imm_sf, m_arg): +class aarch64_imm_nsr(aarch64_imm_sf, aarch64_arg): parser = base_expr def decode(self, v): @@ -1347,7 +1321,7 @@ class aarch64_immhi_page(aarch64_imm_32): return True -class aarch64_imm_hw(m_arg): +class aarch64_imm_hw(aarch64_arg): parser = base_expr shift_op = '<<' @@ -1373,7 +1347,7 @@ class aarch64_imm_hw(m_arg): return False -class aarch64_imm_hw_sc(m_arg): +class aarch64_imm_hw_sc(aarch64_arg): parser = shiftimm_off_sc shift_op = 'slice_at' @@ -1415,7 +1389,7 @@ class aarch64_imm_hw_sc(m_arg): return True -class aarch64_offs(imm_noarg, m_arg): +class aarch64_offs(imm_noarg, aarch64_arg): parser = base_expr def decode(self, v): @@ -1436,7 +1410,7 @@ class aarch64_offs(imm_noarg, m_arg): -class aarch64_offs_pc(imm_noarg, m_arg): +class aarch64_offs_pc(imm_noarg, aarch64_arg): parser = deref_pc def decode(self, v): @@ -1490,7 +1464,7 @@ def get_size(parent): return size -class aarch64_deref(m_arg): +class aarch64_deref(aarch64_arg): parser = deref def decode_w_size(self, off): @@ -1628,17 +1602,17 @@ modf = bs_mod_name(l=1, fname='modf', mn_mod=['', 'S']) sf = bs(l=1, fname='sf', order=-1) -class aarch64_cond_arg(reg_noarg, m_arg): +class aarch64_cond_arg(reg_noarg, aarch64_arg): reg_info = conds_info parser = reg_info.parser -class aarch64_cond_inv_arg(reg_noarg, m_arg): +class aarch64_cond_inv_arg(reg_noarg, aarch64_arg): reg_info = conds_inv_info parser = reg_info.parser -class aarch64_b40(m_arg): +class aarch64_b40(aarch64_arg): parser = base_expr def decode(self, v): @@ -1745,19 +1719,19 @@ imm_sft_12 = bs(l=12, cls=(aarch64_imm_sft,)) imm32_3 = bs(l=3, fname="imm") imm6 = bs(l=6, fname="imm", order=-1) imm3 = bs(l=3, fname="imm", order=-1) -simm6 = bs(l=6, cls=(aarch64_int64_noarg, m_arg), fname="imm", order=-1) +simm6 = bs(l=6, cls=(aarch64_int64_noarg, aarch64_arg), fname="imm", order=-1) simm9 = bs(l=9, cls=(aarch64_int64_noarg,), fname="imm", order=-1) simm7 = bs(l=7, cls=(aarch64_int64_noarg,), fname="imm", order=-1) -nzcv = bs(l=4, cls=(aarch64_uint64_noarg, m_arg), fname="nzcv", order=-1) -uimm5 = bs(l=5, cls=(aarch64_uint64_noarg, m_arg), fname="imm", order=-1) +nzcv = bs(l=4, cls=(aarch64_uint64_noarg, aarch64_arg), fname="nzcv", order=-1) +uimm5 = bs(l=5, cls=(aarch64_uint64_noarg, aarch64_arg), fname="imm", order=-1) uimm12 = bs(l=12, cls=(aarch64_uint64_noarg,), fname="imm", order=-1) -uimm16 = bs(l=16, cls=(aarch64_uint64_noarg, m_arg), fname="imm", order=-1) +uimm16 = bs(l=16, cls=(aarch64_uint64_noarg, aarch64_arg), fname="imm", order=-1) uimm7 = bs(l=7, cls=(aarch64_uint64_noarg,), fname="imm", order=-1) uimm8 = bs(l=8, cls=(aarch64_uint64,), fname="imm", order=-1) -op1 = bs(l=3, cls=(aarch64_uint64, m_arg), fname="op1") -op2 = bs(l=3, cls=(aarch64_uint64, m_arg), fname="op2") +op1 = bs(l=3, cls=(aarch64_uint64, aarch64_arg), fname="op1") +op2 = bs(l=3, cls=(aarch64_uint64, aarch64_arg), fname="op2") imm16 = bs(l=16, fname="imm", order=-1) @@ -1787,8 +1761,8 @@ imm16_hw_sc = bs(l=16, cls=(aarch64_imm_hw_sc,), fname='imm') hw = bs(l=2, fname='hw') -a_imms = bs(l=6, cls=(aarch64_imm_sf, m_arg), fname="imm1", order=-1) -a_immr = bs(l=6, cls=(aarch64_imm_sf, m_arg), fname="imm1", order=-1) +a_imms = bs(l=6, cls=(aarch64_imm_sf, aarch64_arg), fname="imm1", order=-1) +a_immr = bs(l=6, cls=(aarch64_imm_sf, aarch64_arg), fname="imm1", order=-1) diff --git a/miasm2/arch/aarch64/regs.py b/miasm2/arch/aarch64/regs.py index f2655ea7..c9da0653 100644 --- a/miasm2/arch/aarch64/regs.py +++ b/miasm2/arch/aarch64/regs.py @@ -45,9 +45,13 @@ simd128_expr, simd128_init, simd128_info = gen_regs( simd128_str, globals(), 128) -PC, PC_init = gen_reg("PC", globals(), 64) -WZR, WZR_init = gen_reg("WZR", globals(), 32) -XZR, XZR_init = gen_reg("XZR", globals(), 64) +PC, _ = gen_reg("PC", 64) +WZR, _ = gen_reg("WZR", 32) +XZR, _ = gen_reg("XZR", 64) + +PC_init = ExprId("PC_init", 64) +WZR_init = ExprId("WZR_init", 32) +XZR_init = ExprId("XZR_init", 64) reg_zf = 'zf' reg_nf = 'nf' @@ -87,11 +91,12 @@ all_regs_ids = [ X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X12, X13, X14, X15, X16, X17, X18, X19, X20, X21, X22, X23, X24, X25, X26, X27, X28, X29, LR, SP, - exception_flags, interrupt_num, + exception_flags, + interrupt_num, PC, WZR, + XZR, zf, nf, of, cf, - XZR ] @@ -105,22 +110,7 @@ attrib_to_regs = { all_regs_ids_byname = dict([(x.name, x) for x in all_regs_ids]) -all_regs_ids_init = (simd08_init + - simd16_init + - simd32_init + - simd64_init + - simd128_init + - gpregs32_init + - gpregs64_init + - [ - ExprInt(0, 32), - PC_init, - WZR_init, - XZR_init, - zf_init, nf_init, of_init, cf_init, - ExprInt(0, 64), ExprInt(0, 32), - ] - ) +all_regs_ids_init = [ExprId("%s_init" % x.name, x.size) for x in all_regs_ids] regs_init = {} for i, r in enumerate(all_regs_ids): diff --git a/miasm2/arch/aarch64/sem.py b/miasm2/arch/aarch64/sem.py index 2799df7a..88b0d0a7 100644 --- a/miasm2/arch/aarch64/sem.py +++ b/miasm2/arch/aarch64/sem.py @@ -124,9 +124,32 @@ def extend_arg(dst, arg): return arg op, (reg, shift) = arg.op, arg.args - if op == 'SXTW': + if op == "SXTB": + base = reg[:8].signExtend(dst.size) + op = "<<" + elif op == "SXTH": + base = reg[:16].signExtend(dst.size) + op = "<<" + elif op == 'SXTW': + base = reg[:32].signExtend(dst.size) + op = "<<" + elif op == "SXTX": base = reg.signExtend(dst.size) op = "<<" + + elif op == "UXTB": + base = reg[:8].zeroExtend(dst.size) + op = "<<" + elif op == "UXTH": + base = reg[:16].zeroExtend(dst.size) + op = "<<" + elif op == 'UXTW': + base = reg[:32].zeroExtend(dst.size) + op = "<<" + elif op == "UXTX": + base = reg.zeroExtend(dst.size) + op = "<<" + elif op in ['<<', '>>', '<<a', 'a>>', '<<<', '>>>']: base = reg.zeroExtend(dst.size) else: diff --git a/miasm2/arch/arm/arch.py b/miasm2/arch/arm/arch.py index 39b4cb14..35574a84 100644 --- a/miasm2/arch/arm/arch.py +++ b/miasm2/arch/arm/arch.py @@ -8,6 +8,7 @@ from collections import defaultdict from miasm2.core.bin_stream import bin_stream import miasm2.arch.arm.regs as regs_module from miasm2.arch.arm.regs import * +from miasm2.core.asm_ast import AstInt, AstId, AstMem, AstOp # A1 encoding @@ -20,7 +21,7 @@ log.setLevel(logging.DEBUG) # arm regs ############## reg_dum = ExprId('DumReg', 32) -gen_reg('PC', globals()) +PC, _ = gen_reg('PC') # GP regs_str = ['R%d' % r for r in xrange(0x10)] @@ -40,6 +41,9 @@ gpregs_nosppc = reg_info(regs_str[:13] + [str(reg_dum), regs_str[14]], gpregs_nopc = reg_info(regs_str[:14], regs_expr[:14]) +gpregs_nosp = reg_info(regs_str[:13] + [str(reg_dum), regs_str[14], regs_str[15]], + regs_expr[:13] + [reg_dum, regs_expr[14], regs_expr[15]]) + # psr sr_flags = "cxsf" @@ -81,15 +85,33 @@ conditional_branch = ["BEQ", "BNE", "BCS", "BCC", "BMI", "BPL", "BVS", unconditional_branch = ["B", "BX", "BL", "BLX"] +barrier_expr = { + 0b1111: ExprId("SY", 32), + 0b1110: ExprId("ST", 32), + 0b1101: ExprId("LD", 32), + 0b1011: ExprId("ISH", 32), + 0b1010: ExprId("ISHST", 32), + 0b1001: ExprId("ISHLD", 32), + 0b0111: ExprId("NSH", 32), + 0b0110: ExprId("NSHST", 32), + 0b0011: ExprId("OSH", 32), + 0b0010: ExprId("OSHST", 32), + 0b0001: ExprId("OSHLD", 32), +} + +barrier_info = reg_info_dct(barrier_expr) + + + # parser helper ########### -def tok_reg_duo(s, l, t): - t = t[0] - i1 = gpregs.expr.index(t[0]) - i2 = gpregs.expr.index(t[1]) +def cb_tok_reg_duo(tokens): + tokens = tokens[0] + i1 = gpregs.expr.index(tokens[0].name) + i2 = gpregs.expr.index(tokens[1].name) o = [] for i in xrange(i1, i2 + 1): - o.append(gpregs.expr[i]) + o.append(AstId(gpregs.expr[i])) return o LPARENTHESIS = Literal("(") @@ -103,14 +125,14 @@ CIRCUNFLEX = Literal("^") def check_bounds(left_bound, right_bound, value): if left_bound <= value and value <= right_bound: - return ExprInt(value, 32) + return AstInt(value) else: raise ValueError('shift operator immediate value out of bound') def check_values(values, value): if value in values: - return ExprInt(value, 32) + return AstInt(value) else: raise ValueError('shift operator immediate value out of bound') @@ -120,11 +142,11 @@ int_1_32 = str_int.copy().setParseAction(lambda v: check_bounds(1, 32, v[0])) int_8_16_24 = str_int.copy().setParseAction(lambda v: check_values([8, 16, 24], v[0])) -def reglistparse(s, l, t): - t = t[0] - if t[-1] == "^": - return ExprOp('sbit', ExprOp('reglist', *t[:-1])) - return ExprOp('reglist', *t) +def cb_reglistparse(tokens): + tokens = tokens[0] + if tokens[-1] == "^": + return AstOp('sbit', AstOp('reglist', *tokens[:-1])) + return AstOp('reglist', *tokens) allshifts = ['<<', '>>', 'a>>', '>>>', 'rrx'] @@ -136,15 +158,15 @@ shift2expr_dct = {'LSL': '<<', 'LSR': '>>', 'ASR': 'a>>', expr2shift_dct = dict([(x[1], x[0]) for x in shift2expr_dct.items()]) -def op_shift2expr(s, l, t): - return shift2expr_dct[t[0]] +def op_shift2expr(tokens): + return shift2expr_dct[tokens[0]] reg_duo = Group(gpregs.parser + MINUS + - gpregs.parser).setParseAction(tok_reg_duo) + gpregs.parser).setParseAction(cb_tok_reg_duo) reg_or_duo = reg_duo | gpregs.parser gpreg_list = Group(LACC + delimitedList( reg_or_duo, delim=',') + RACC + Optional(CIRCUNFLEX)) -gpreg_list.setParseAction(reglistparse) +gpreg_list.setParseAction(cb_reglistparse) LBRACK = Suppress("[") RBRACK = Suppress("]") @@ -166,111 +188,132 @@ gpreg_p = gpregs.parser psr_p = cpsr_regs.parser | spsr_regs.parser -def shift2expr(t): - if len(t) == 1: - return t[0] - elif len(t) == 2: - return ExprOp(t[1], t[0]) - elif len(t) == 3: - return ExprOp(t[1], t[0], t[2]) +def cb_shift(tokens): + if len(tokens) == 1: + ret = tokens[0] + elif len(tokens) == 2: + ret = AstOp(tokens[1], tokens[0]) + elif len(tokens) == 3: + ret = AstOp(tokens[1], tokens[0], tokens[2]) + else: + raise ValueError("Bad arg") + return ret -variable, operand, base_expr = gen_base_expr() +shift_off = (gpregs.parser + Optional( + (all_unaryop_shifts_t) | + (all_binaryop_1_31_shifts_t + (gpregs.parser | int_1_31)) | + (all_binaryop_1_32_shifts_t + (gpregs.parser | int_1_32)) +)).setParseAction(cb_shift) +shift_off |= base_expr -int_or_expr = base_expr +rot2_expr = (gpregs.parser + Optional( + (ror_shifts_t + (int_8_16_24)) +)).setParseAction(cb_shift) -def ast_id2expr(t): - return mn_arm.regs.all_regs_ids_byname.get(t, t) +OP_LSL = Suppress("LSL") -def ast_int2expr(a): - return ExprInt(a, 32) +def cb_deref_reg_reg(tokens): + if len(tokens) != 2: + raise ValueError("Bad mem format") + return AstMem(AstOp('+', tokens[0], tokens[1]), 8) +def cb_deref_reg_reg_lsl_1(tokens): + if len(tokens) != 3: + raise ValueError("Bad mem format") + reg1, reg2, index = tokens + if not isinstance(index, AstInt) or index.value != 1: + raise ValueError("Bad index") + ret = AstMem(AstOp('+', reg1, AstOp('<<', reg2, index)), 16) + return ret -my_var_parser = ParseAst(ast_id2expr, ast_int2expr) -base_expr.setParseAction(my_var_parser) +deref_reg_reg = (LBRACK + gpregs.parser + COMMA + gpregs.parser + RBRACK).setParseAction(cb_deref_reg_reg) +deref_reg_reg_lsl_1 = (LBRACK + gpregs.parser + COMMA + gpregs.parser + OP_LSL + base_expr + RBRACK).setParseAction(cb_deref_reg_reg_lsl_1) -shift_off = (gpregs.parser + Optional( - (all_unaryop_shifts_t) | - (all_binaryop_1_31_shifts_t + (gpregs.parser | int_1_31)) | - (all_binaryop_1_32_shifts_t + (gpregs.parser | int_1_32)) -)).setParseAction(shift2expr) -shift_off |= base_expr -rot2_expr = (gpregs.parser + Optional( +(gpregs.parser + Optional( (ror_shifts_t + (int_8_16_24)) -)).setParseAction(shift2expr) +)).setParseAction(cb_shift) -def deref2expr_nooff(s, l, t): - t = t[0] +reg_or_base = gpregs.parser | base_expr + +def deref2expr_nooff(tokens): + tokens = tokens[0] # XXX default - return ExprOp("preinc", t[0], ExprInt(0, 32)) + return ExprOp("preinc", tokens[0], ExprInt(0, 32)) -def deref2expr_pre(s, l, t): - t = t[0] - if len(t) == 1: - return ExprOp("preinc", t[0], ExprInt(0, 32)) - elif len(t) == 2: - return ExprOp("preinc", t[0], t[1]) +def cb_deref_preinc(tokens): + tokens = tokens[0] + if len(tokens) == 1: + return AstOp("preinc", tokens[0], AstInt(0)) + elif len(tokens) == 2: + return AstOp("preinc", tokens[0], tokens[1]) else: - raise NotImplementedError('len(t) > 2') + raise NotImplementedError('len(tokens) > 2') -def deref2expr_pre_mem(s, l, t): - t = t[0] - if len(t) == 1: - return ExprMem(ExprOp("preinc", t[0], ExprInt(0, 32)), 32) - elif len(t) == 2: - return ExprMem(ExprOp("preinc", t[0], t[1]), 32) +def cb_deref_pre_mem(tokens): + tokens = tokens[0] + if len(tokens) == 1: + return AstMem(AstOp("preinc", tokens[0], AstInt(0)), 32) + elif len(tokens) == 2: + return AstMem(AstOp("preinc", tokens[0], tokens[1]), 32) else: - raise NotImplementedError('len(t) > 2') + raise NotImplementedError('len(tokens) > 2') -def deref2expr_post(s, l, t): - t = t[0] - return ExprOp("postinc", t[0], t[1]) +def cb_deref_post(tokens): + tokens = tokens[0] + return AstOp("postinc", tokens[0], tokens[1]) -def deref_wb(s, l, t): - t = t[0] - if t[-1] == '!': - return ExprMem(ExprOp('wback', *t[:-1]), 32) - return ExprMem(t[0], 32) +def cb_deref_wb(tokens): + tokens = tokens[0] + if tokens[-1] == '!': + return AstMem(AstOp('wback', *tokens[:-1]), 32) + return AstMem(tokens[0], 32) # shift_off.setParseAction(deref_off) deref_nooff = Group( LBRACK + gpregs.parser + RBRACK).setParseAction(deref2expr_nooff) deref_pre = Group(LBRACK + gpregs.parser + Optional( - COMMA + shift_off) + RBRACK).setParseAction(deref2expr_pre) + COMMA + shift_off) + RBRACK).setParseAction(cb_deref_preinc) deref_post = Group(LBRACK + gpregs.parser + RBRACK + - COMMA + shift_off).setParseAction(deref2expr_post) + COMMA + shift_off).setParseAction(cb_deref_post) deref = Group((deref_post | deref_pre | deref_nooff) - + Optional('!')).setParseAction(deref_wb) + + Optional('!')).setParseAction(cb_deref_wb) -def parsegpreg_wb(s, l, t): - t = t[0] - if t[-1] == '!': - return ExprOp('wback', *t[:-1]) - return t[0] +def cb_gpreb_wb(tokens): + assert len(tokens) == 1 + tokens = tokens[0] + if tokens[-1] == '!': + return AstOp('wback', *tokens[:-1]) + return tokens[0] -gpregs_wb = Group(gpregs.parser + Optional('!')).setParseAction(parsegpreg_wb) +gpregs_wb = Group(gpregs.parser + Optional('!')).setParseAction(cb_gpreb_wb) -# +cond_list_full = ['EQ', 'NE', 'CS', 'CC', 'MI', 'PL', 'VS', 'VC', + 'HI', 'LS', 'GE', 'LT', 'GT', 'LE', 'NV'] cond_list = ['EQ', 'NE', 'CS', 'CC', 'MI', 'PL', 'VS', 'VC', 'HI', 'LS', 'GE', 'LT', 'GT', 'LE', ''] # , 'NV'] cond_dct = dict([(x[1], x[0]) for x in enumerate(cond_list)]) -# default_prio = 0x1337 +bm_cond = bs_mod_name(l=4, fname='cond', mn_mod=cond_list) + + + +cond_dct_barmt = dict([(x[0], x[1]) for x in enumerate(cond_list) if x[0] & 0b1110 != 0b1110]) +bm_cond_barmt = bs_mod_name(l=4, fname='cond', mn_mod=cond_dct_barmt) -bm_cond = bs_mod_name(l=4, fname='cond', mn_mod=cond_list) # cond_dct) def permut_args(order, args): @@ -457,7 +500,11 @@ class instruction_armt(instruction_arm): if self.name == 'BLX': ad = e.arg + (self.offset & 0xfffffffc) elif self.name == 'BL': - ad = e.arg + self.offset + self.l + ad = e.arg + self.offset + elif self.name.startswith('BP'): + ad = e.arg + self.offset + elif self.name.startswith('CB'): + ad = e.arg + self.offset + self.l + 2 else: ad = e.arg + self.offset l = symbol_pool.getby_offset_create(ad) @@ -468,7 +515,7 @@ class instruction_armt(instruction_arm): self.args[0] = s def breakflow(self): - if self.name in conditional_branch + unconditional_branch +["CBZ", "CBNZ"]: + if self.name in conditional_branch + unconditional_branch +["CBZ", "CBNZ", 'TBB', 'TBH']: return True if self.name.startswith("LDM") and PC in self.args[1].args: return True @@ -720,7 +767,31 @@ class mn_armt(cls_mn): return 32 -class arm_reg(reg_noarg, m_arg): +class arm_arg(m_arg): + def asm_ast_to_expr(self, arg, symbol_pool): + if isinstance(arg, AstId): + if isinstance(arg.name, ExprId): + return arg.name + if arg.name in gpregs.str: + return None + label = symbol_pool.getby_name_create(arg.name) + return ExprId(label, 32) + if isinstance(arg, AstOp): + args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in arg.args] + if None in args: + return None + return ExprOp(arg.op, *args) + if isinstance(arg, AstInt): + return ExprInt(arg.value, 32) + if isinstance(arg, AstMem): + ptr = self.asm_ast_to_expr(arg.ptr, symbol_pool) + if ptr is None: + return None + return ExprMem(ptr, arg.size) + return None + + +class arm_reg(reg_noarg, arm_arg): pass @@ -760,7 +831,7 @@ class arm_reg_wb(arm_reg): return True -class arm_psr(m_arg): +class arm_psr(arm_arg): parser = psr_p def decode(self, v): @@ -796,7 +867,7 @@ class arm_preg(arm_reg): parser = reg_info.parser -class arm_imm(imm_noarg, m_arg): +class arm_imm(imm_noarg, arm_arg): parser = base_expr @@ -840,7 +911,7 @@ class arm_offs(arm_imm): return True -class arm_imm8_12(m_arg): +class arm_imm8_12(arm_arg): parser = deref def decode(self, v): @@ -896,8 +967,8 @@ class arm_imm8_12(m_arg): return True -class arm_imm_4_12(m_arg): - parser = base_expr +class arm_imm_4_12(arm_arg): + parser = reg_or_base def decode(self, v): v = v & self.lmask @@ -916,7 +987,7 @@ class arm_imm_4_12(m_arg): return True -class arm_imm_12_4(m_arg): +class arm_imm_12_4(arm_arg): parser = base_expr def decode(self, v): @@ -936,7 +1007,7 @@ class arm_imm_12_4(m_arg): return True -class arm_op2(m_arg): +class arm_op2(arm_arg): parser = shift_off def str_to_imm_rot_form(self, s, neg=False): @@ -1108,7 +1179,7 @@ class arm_op2imm(arm_imm8_12): # if len(v) <1: # raise ValueError('cannot parse', s) - self.parent.rn.fromstring(e.args[0]) + self.parent.rn.expr = e.args[0] if len(e.args) == 1: self.parent.immop.value = 0 self.value = 0 @@ -1169,7 +1240,7 @@ def reglist2str(rlist): return "{" + ", ".join(out) + '}' -class arm_rlist(m_arg): +class arm_rlist(arm_arg): parser = gpreg_list def encode(self): @@ -1272,6 +1343,18 @@ class bs_lnk(bs_mod_name): return name[:1] + self.args['mn_mod'][i] + name[1:] +class armt_rm_cp(bsi): + + def decode(self, v): + if v != gpregs.expr.index(self.parent.rm.expr): + return False + return True + + def encode(self): + self.value = gpregs.expr.index(self.parent.rm.expr) + return True + + accum = bs(l=1) scc = bs_mod_name(l=1, fname='scc', mn_mod=['', 'S']) dumscc = bs("1") @@ -1280,7 +1363,13 @@ rdl = bs(l=4, cls=(arm_gpreg,)) rn = bs(l=4, cls=(arm_gpreg,), fname="rn") rs = bs(l=4, cls=(arm_gpreg,)) -rm = bs(l=4, cls=(arm_gpreg,)) +rm = bs(l=4, cls=(arm_gpreg,), fname='rm') +ra = bs(l=4, cls=(arm_gpreg,)) +rt = bs(l=4, cls=(arm_gpreg,), fname='rt') +rt2 = bs(l=4, cls=(arm_gpreg,)) + +rm_cp = bs(l=4, cls=(armt_rm_cp,)) + op2 = bs(l=12, cls=(arm_op2,)) lnk = bs_lnk(l=1, fname='lnk', mn_mod=['', 'L']) offs = bs(l=24, cls=(arm_offs,), fname="offs") @@ -1333,8 +1422,10 @@ imm16 = bs(l=16, cls=(arm_imm, m_arg)) imm12_off = bs(l=12, fname="imm") +imm2_noarg = bs(l=2, fname="imm") imm4_noarg = bs(l=4, fname="imm4") + imm_4_12 = bs(l=12, cls=(arm_imm_4_12,)) imm12_noarg = bs(l=12, fname="imm") @@ -1356,7 +1447,7 @@ class mul_part_y(bs_mod_name): mul_x = mul_part_x(l=1, fname='x', mn_mod=['B', 'T']) mul_y = mul_part_y(l=1, fname='y', mn_mod=['B', 'T']) -class arm_immed(m_arg): +class arm_immed(arm_arg): parser = deref def decode(self, v): @@ -1429,7 +1520,7 @@ immedL = bs(l=4, cls=(arm_immed, m_arg), fname='immedL') hb = bs(l=1) -class armt2_rot_rm(m_arg): +class armt2_rot_rm(arm_arg): parser = shift_off def decode(self, v): r = self.parent.rm.expr @@ -1450,7 +1541,7 @@ class armt2_rot_rm(m_arg): rot_rm = bs(l=2, cls=(armt2_rot_rm,), fname="rot_rm") -class arm_mem_rn_imm(m_arg): +class arm_mem_rn_imm(arm_arg): parser = deref def decode(self, v): value = self.parent.imm.value @@ -1550,25 +1641,16 @@ bs_mr_name = bs_name(l=1, name=mr_name) bs_addi = bs(l=1, fname="add_imm") bs_rw = bs_mod_name(l=1, fname='rw', mn_mod=['W', '']) -armop("mul", [bs('000000'), bs('0'), scc, rd, - bs('0000'), rs, bs('1001'), rm], [rd, rm, rs]) -armop("umull", [bs('000010'), - bs('0'), scc, rd, rdl, rs, bs('1001'), rm], [rdl, rd, rm, rs]) -armop("umlal", [bs('000010'), - bs('1'), scc, rd, rdl, rs, bs('1001'), rm], [rdl, rd, rm, rs]) -armop("smull", [bs('000011'), bs('0'), scc, rd, - rdl, rs, bs('1001'), rm], [rdl, rd, rm, rs]) -armop("smlal", [bs('000011'), bs('1'), scc, rd, - rdl, rs, bs('1001'), rm], [rdl, rd, rm, rs]) -armop("mla", [bs('000000'), bs('1'), scc, rd, - rn, rs, bs('1001'), rm], [rd, rm, rs, rn]) -armop("mrs", [bs('00010'), psr, bs('00'), - psr_field, rd, bs('000000000000')], [rd, psr]) -armop("msr", [bs('00010'), psr, bs('10'), psr_field, - bs('1111'), bs('0000'), bs('0000'), rm], [psr_field, rm]) +armop("mul", [bs('000000'), bs('0'), scc, rd, bs('0000'), rs, bs('1001'), rm], [rd, rm, rs]) +armop("umull", [bs('000010'), bs('0'), scc, rd, rdl, rs, bs('1001'), rm], [rdl, rd, rm, rs]) +armop("umlal", [bs('000010'), bs('1'), scc, rd, rdl, rs, bs('1001'), rm], [rdl, rd, rm, rs]) +armop("smull", [bs('000011'), bs('0'), scc, rd, rdl, rs, bs('1001'), rm], [rdl, rd, rm, rs]) +armop("smlal", [bs('000011'), bs('1'), scc, rd, rdl, rs, bs('1001'), rm], [rdl, rd, rm, rs]) +armop("mla", [bs('000000'), bs('1'), scc, rd, rn, rs, bs('1001'), rm], [rd, rm, rs, rn]) +armop("mrs", [bs('00010'), psr, bs('00'), psr_field, rd, bs('000000000000')], [rd, psr]) +armop("msr", [bs('00010'), psr, bs('10'), psr_field, bs('1111'), bs('0000'), bs('0000'), rm], [psr_field, rm]) armop("data", [bs('00'), immop, bs_data_name, scc, rn, rd, op2], [rd, rn, op2]) -armop("data_mov", - [bs('00'), immop, bs_data_mov_name, scc, bs('0000'), rd, op2], [rd, op2]) +armop("data_mov", [bs('00'), immop, bs_data_mov_name, scc, bs('0000'), rd, op2], [rd, op2]) armop("data_test", [bs('00'), immop, bs_data_test_name, dumscc, rn, dumr, op2]) armop("b", [bs('101'), lnk, offs]) @@ -1576,40 +1658,27 @@ armop("smul", [bs('00010110'), rd, bs('0000'), rs, bs('1'), mul_y, mul_x, bs('0' # TODO TEST #armop("und", [bs('011'), imm20, bs('1'), imm4]) -armop("transfer", [bs('01'), immop, ppi, updown, trb, wback_no_t, - bs_transfer_name, rn_noarg, rd, op2imm], [rd, op2imm]) -armop("transferh", [bs('000'), ppi, updown, immop, wback_no_t, - bs_transferh_name, rn_noarg, rd, immedH, bs('1011'), immedL], [rd, immedL]) -armop("ldrd", [bs('000'), ppi, updown, immop, wback_no_t, bs_transfer_ldr_name, - rn_noarg, rd, immedH, bs('1101'), immedL], [rd, immedL]) -armop("ldrsh", [bs('000'), ppi, updown, immop, wback_no_t, bs('1'), rn_noarg, - rd, immedH, bs('1'), bs('1'), bs('1'), bs('1'), immedL], [rd, immedL]) -armop("strd", [bs('000'), ppi, updown, immop, wback_no_t, bs('0'), rn_noarg, - rd, immedH, bs('1'), bs('1'), bs('1'), bs('1'), immedL], [rd, immedL]) -armop("btransfersp", [bs('100'), ppi_b_sp, updown_b_sp, sbit, wback_no_t, - bs_btransfer_name, rn_sp, rlist]) -armop("btransfer", [bs('100'), ppi_b_nosp, updown_b_nosp, sbit, wback_no_t, - bs_btransfer_name, rn_wb, rlist]) +armop("transfer", [bs('01'), immop, ppi, updown, trb, wback_no_t, bs_transfer_name, rn_noarg, rd, op2imm], [rd, op2imm]) +armop("transferh", [bs('000'), ppi, updown, immop, wback_no_t, bs_transferh_name, rn_noarg, rd, immedH, bs('1011'), immedL], [rd, immedL]) +armop("ldrd", [bs('000'), ppi, updown, immop, wback_no_t, bs_transfer_ldr_name, rn_noarg, rd, immedH, bs('1101'), immedL], [rd, immedL]) +armop("ldrsh", [bs('000'), ppi, updown, immop, wback_no_t, bs('1'), rn_noarg, rd, immedH, bs('1'), bs('1'), bs('1'), bs('1'), immedL], [rd, immedL]) +armop("strd", [bs('000'), ppi, updown, immop, wback_no_t, bs('0'), rn_noarg, rd, immedH, bs('1'), bs('1'), bs('1'), bs('1'), immedL], [rd, immedL]) +armop("btransfersp", [bs('100'), ppi_b_sp, updown_b_sp, sbit, wback_no_t, bs_btransfer_name, rn_sp, rlist]) +armop("btransfer", [bs('100'), ppi_b_nosp, updown_b_nosp, sbit, wback_no_t, bs_btransfer_name, rn_wb, rlist]) # TODO: TEST armop("swp", [bs('00010'), trb, bs('00'), rn, rd, bs('0000'), bs('1001'), rm]) armop("svc", [bs('1111'), swi_i]) -armop("cdp", [bs('1110'), opc, crn, crd, cpnum, cp, bs('0'), crm], - [cpnum, opc, crd, crn, crm, cp]) -armop("cdata", [bs('110'), ppi, updown, tl, wback_no_t, bs_ctransfer_name, - rn_noarg, crd, cpnum, imm8_12], [cpnum, crd, imm8_12]) -armop("mr", [bs('1110'), cpopc, bs_mr_name, crn, rd, cpnum, cp, bs('1'), crm], - [cpnum, cpopc, rd, crn, crm, cp]) +armop("cdp", [bs('1110'), opc, crn, crd, cpnum, cp, bs('0'), crm], [cpnum, opc, crd, crn, crm, cp]) +armop("cdata", [bs('110'), ppi, updown, tl, wback_no_t, bs_ctransfer_name, rn_noarg, crd, cpnum, imm8_12], [cpnum, crd, imm8_12]) +armop("mr", [bs('1110'), cpopc, bs_mr_name, crn, rd, cpnum, cp, bs('1'), crm], [cpnum, cpopc, rd, crn, crm, cp]) armop("bkpt", [bs('00010010'), imm12_noarg, bs('0111'), imm_12_4]) armop("bx", [bs('000100101111111111110001'), rn]) armop("mov", [bs('00110000'), imm4_noarg, rd, imm_4_12], [rd, imm_4_12]) armop("movt", [bs('00110100'), imm4_noarg, rd, imm_4_12], [rd, imm_4_12]) -armop("blx", [bs('00010010'), bs('1111'), - bs('1111'), bs('1111'), bs('0011'), rm], [rm]) +armop("blx", [bs('00010010'), bs('1111'), bs('1111'), bs('1111'), bs('0011'), rm], [rm]) armop("blx", [fix_cond, bs('101'), lowb, offs_blx], [offs_blx]) -armop("clz", [bs('00010110'), bs('1111'), - rd, bs('1111'), bs('0001'), rm], [rd, rm]) -armop("qadd", - [bs('00010000'), rn, rd, bs('0000'), bs('0101'), rm], [rd, rm, rn]) +armop("clz", [bs('00010110'), bs('1111'), rd, bs('1111'), bs('0001'), rm], [rd, rm]) +armop("qadd", [bs('00010000'), rn, rd, bs('0000'), bs('0101'), rm], [rd, rm, rn]) armop("uxtb", [bs('01101110'), bs('1111'), rd, rot_rm, bs('00'), bs('0111'), rm_noarg]) armop("uxth", [bs('01101111'), bs('1111'), rd, rot_rm, bs('00'), bs('0111'), rm_noarg]) @@ -1631,11 +1700,13 @@ class arm_widthm1(arm_imm, m_arg): if not isinstance(self.expr, ExprInt): return False v = int(self.expr) + -1 + if v > self.lmask: + return False self.value = v return True -class arm_rm_rot2(m_arg): +class arm_rm_rot2(arm_arg): parser = rot2_expr def decode(self, v): expr = gpregs.expr[v] @@ -1662,18 +1733,47 @@ class arm_rm_rot2(m_arg): self.parent.rot2.value = value / 8 return True -class arm_gpreg_nopc(arm_reg): +class arm_gpreg_nopc(reg_noarg): reg_info = gpregs_nopc parser = reg_info.parser + def decode(self, v): + ret = super(arm_gpreg_nopc, self).decode(v) + if ret is False: + return False + if self.expr == reg_dum: + return False + return True + + +class arm_gpreg_nosp(reg_noarg): + reg_info = gpregs_nosp + parser = reg_info.parser + + def decode(self, v): + ret = super(arm_gpreg_nosp, self).decode(v) + if ret is False: + return False + if self.expr == reg_dum: + return False + return True + + rm_rot2 = bs(l=4, cls=(arm_rm_rot2,), fname="rm") rot2 = bs(l=2, fname="rot2") widthm1 = bs(l=5, cls=(arm_widthm1, m_arg)) lsb = bs(l=5, cls=(arm_imm, m_arg)) -rn_nopc = bs(l=4, cls=(arm_gpreg_nopc,), fname="rn") +rd_nopc = bs(l=4, cls=(arm_gpreg_nopc, arm_arg), fname="rd") +rn_nopc = bs(l=4, cls=(arm_gpreg_nopc, arm_arg), fname="rn") +ra_nopc = bs(l=4, cls=(arm_gpreg_nopc, arm_arg), fname="ra") +rt_nopc = bs(l=4, cls=(arm_gpreg_nopc, arm_arg), fname="rt") + +rn_nosp = bs(l=4, cls=(arm_gpreg_nosp, arm_arg), fname="rn") + +rn_nopc_noarg = bs(l=4, cls=(arm_gpreg_nopc,), fname="rn") armop("ubfx", [bs('0111111'), widthm1, rd, lsb, bs('101'), rn], [rd, rn, lsb, widthm1]) @@ -1693,18 +1793,23 @@ gpregs_h = reg_info(regs_str[8:], regs_expr[8:]) gpregs_sppc = reg_info(regs_str[-1:] + regs_str[13:14], regs_expr[-1:] + regs_expr[13:14]) +deref_reg_imm = Group(LBRACK + gpregs.parser + Optional( + COMMA + shift_off) + RBRACK).setParseAction(cb_deref_pre_mem) deref_low = Group(LBRACK + gpregs_l.parser + Optional( - COMMA + shift_off) + RBRACK).setParseAction(deref2expr_pre_mem) + COMMA + shift_off) + RBRACK).setParseAction(cb_deref_pre_mem) deref_pc = Group(LBRACK + gpregs_pc.parser + Optional( - COMMA + shift_off) + RBRACK).setParseAction(deref2expr_pre_mem) + COMMA + shift_off) + RBRACK).setParseAction(cb_deref_pre_mem) deref_sp = Group(LBRACK + gpregs_sp.parser + COMMA + - shift_off + RBRACK).setParseAction(deref2expr_pre_mem) + shift_off + RBRACK).setParseAction(cb_deref_pre_mem) gpregs_l_wb = Group( - gpregs_l.parser + Optional('!')).setParseAction(parsegpreg_wb) + gpregs_l.parser + Optional('!')).setParseAction(cb_gpreb_wb) -class arm_offreg(m_arg): +gpregs_l_13 = reg_info(regs_str[:13], regs_expr[:13]) + + +class arm_offreg(arm_arg): parser = deref_pc def decodeval(self, v): @@ -1815,9 +1920,8 @@ class arm_off7(arm_imm): def encodeval(self, v): return v >> 2 - -class arm_deref(m_arg): - parser = deref_low +class arm_deref_reg_imm(arm_arg): + parser = deref_reg_imm def decode(self, v): v = v & self.lmask @@ -1827,6 +1931,7 @@ class arm_deref(m_arg): return True def encode(self): + self.parent.off.expr = None e = self.expr if not isinstance(e, ExprMem): return False @@ -1848,6 +1953,9 @@ class arm_deref(m_arg): return False return True +class arm_derefl(arm_deref_reg_imm): + parser = deref_low + class arm_offbw(imm_noarg): @@ -1867,6 +1975,26 @@ class arm_offbw(imm_noarg): log.debug('off must be aligned %r', v) return False v >>= 2 + if v > self.lmask: + return False + self.value = v + return True + + + +class arm_off(imm_noarg): + + def decode(self, v): + v = v & self.lmask + self.expr = ExprInt(v, 32) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = int(self.expr) + if v > self.lmask: + return False self.value = v return True @@ -1887,11 +2015,13 @@ class arm_offh(imm_noarg): log.debug('off must be aligned %r', v) return False v >>= 1 + if v > self.lmask: + return False self.value = v return True -class armt_rlist(m_arg): +class armt_rlist(arm_arg): parser = gpreg_list def encode(self): @@ -1916,6 +2046,102 @@ class armt_rlist(m_arg): return True +class armt_rlist13(armt_rlist): + parser = gpreg_list + + def encode(self): + e = self.expr + rlist = [] + reg_l = list(e.args) + + self.parent.pclr.value = 0 + if self.parent.name.startswith('PUSH'): + if regs_expr[14] in reg_l: + reg_l.remove(regs_expr[14]) + self.parent.pclr.value = 1 + else: + if regs_expr[15] in reg_l: + reg_l.remove(regs_expr[15]) + self.parent.pclr.value = 1 + + for reg in reg_l: + if reg not in gpregs_l_13.expr: + return False + rlist.append(gpregs_l_13.expr.index(reg)) + v = 0 + for r in rlist: + v |= 1 << r + self.value = v + return True + + def decode(self, v): + v = v & self.lmask + out = [] + for i in xrange(13): + if 1 << i & v: + out.append(gpregs_l_13.expr[i]) + + if self.parent.pclr.value == 1: + if self.parent.name.startswith("PUSH"): + out += [regs_expr[14]] + else: + out += [regs_expr[15]] + + if not out: + return False + e = ExprOp('reglist', *out) + self.expr = e + return True + + + +class armt_rlist13_pc_lr(armt_rlist): + parser = gpreg_list + + def encode(self): + e = self.expr + rlist = [] + reg_l = list(e.args) + + self.parent.pc_in.value = 0 + self.parent.lr_in.value = 0 + if regs_expr[14] in reg_l: + reg_l.remove(regs_expr[14]) + self.parent.lr_in.value = 1 + if regs_expr[15] in reg_l: + reg_l.remove(regs_expr[15]) + self.parent.pc_in.value = 1 + + for reg in reg_l: + if reg not in gpregs_l_13.expr: + return False + rlist.append(gpregs_l_13.expr.index(reg)) + v = 0 + for r in rlist: + v |= 1 << r + self.value = v + return True + + def decode(self, v): + v = v & self.lmask + out = [] + for i in xrange(13): + if 1 << i & v: + out.append(gpregs_l_13.expr[i]) + + if self.parent.lr_in.value == 1: + out += [regs_expr[14]] + if self.parent.pc_in.value == 1: + out += [regs_expr[15]] + + if not out: + return False + e = ExprOp('reglist', *out) + self.expr = e + return True + + + class armt_rlist_pclr(armt_rlist): def encode(self): @@ -1936,6 +2162,8 @@ class armt_rlist_pclr(armt_rlist): v = 0 for r in rlist: v |= 1 << r + if v > self.lmask: + return False self.value = v return True @@ -2016,7 +2244,7 @@ rsl = bs(l=3, cls=(arm_gpreg_l,), fname="rs") rml = bs(l=3, cls=(arm_gpreg_l,), fname="rm") rol = bs(l=3, cls=(arm_gpreg_l,), fname="ro") rbl = bs(l=3, cls=(arm_gpreg_l,), fname="rb") -rbl_deref = bs(l=3, cls=(arm_deref,), fname="rb") +rbl_deref = bs(l=3, cls=(arm_derefl,), fname="rb") dumrh = bs(l=3, default_val="000") rdh = bs(l=3, cls=(arm_gpreg_h,), fname="rd") @@ -2030,8 +2258,16 @@ off5bw = bs(l=5, cls=(arm_offbw,), fname="off") off5h = bs(l=5, cls=(arm_offh,), fname="off") sppc = bs(l=1, cls=(arm_sppc,)) +off12 = bs(l=12, cls=(arm_off,), fname="off", order=-1) +rn_deref = bs(l=4, cls=(arm_deref_reg_imm,), fname="rt") + -pclr = bs(l=1, fname='pclr') + +pclr = bs(l=1, fname='pclr', order=-2) + + +pc_in = bs(l=1, fname='pc_in', order=-2) +lr_in = bs(l=1, fname='lr_in', order=-2) sp = bs(l=0, cls=(arm_sp,)) @@ -2040,6 +2276,9 @@ sp = bs(l=0, cls=(arm_sp,)) off8s = bs(l=8, cls=(arm_offs,), fname="offs") trlistpclr = bs(l=8, cls=(armt_rlist_pclr,)) trlist = bs(l=8, cls=(armt_rlist,), fname="trlist", order = -1) +trlist13 = bs(l=13, cls=(armt_rlist13,), fname="trlist", order = -1) +trlist13pclr = bs(l=13, cls=(armt_rlist13_pc_lr,), fname="trlist", order = -1) + rbl_wb = bs(l=3, cls=(armt_reg_wb,), fname='rb') @@ -2097,43 +2336,29 @@ bs_br_name = bs_name(l=4, name=br_name) armtop("mshift", [bs('000'), bs_mshift_name, off5, rsl, rdl], [rdl, rsl, off5]) -armtop("addsubr", - [bs('000110'), bs_addsub_name, rnl, rsl, rdl], [rdl, rsl, rnl]) -armtop("addsubi", - [bs('000111'), bs_addsub_name, off3, rsl, rdl], [rdl, rsl, off3]) +armtop("addsubr", [bs('000110'), bs_addsub_name, rnl, rsl, rdl], [rdl, rsl, rnl]) +armtop("addsubi", [bs('000111'), bs_addsub_name, off3, rsl, rdl], [rdl, rsl, off3]) armtop("mcas", [bs('001'), bs_mov_cmp_add_sub_name, rnl, off8]) armtop("alu", [bs('010000'), bs_alu_name, rsl, rdl], [rdl, rsl]) # should not be used ?? -armtop("hiregop00", - [bs('010001'), bs_hiregop_name, bs('00'), rsl, rdl], [rdl, rsl]) -armtop("hiregop01", - [bs('010001'), bs_hiregop_name, bs('01'), rsh, rdl], [rdl, rsh]) -armtop("hiregop10", - [bs('010001'), bs_hiregop_name, bs('10'), rsl, rdh], [rdh, rsl]) -armtop("hiregop11", - [bs('010001'), bs_hiregop_name, bs('11'), rsh, rdh], [rdh, rsh]) +armtop("hiregop00", [bs('010001'), bs_hiregop_name, bs('00'), rsl, rdl], [rdl, rsl]) +armtop("hiregop01", [bs('010001'), bs_hiregop_name, bs('01'), rsh, rdl], [rdl, rsh]) +armtop("hiregop10", [bs('010001'), bs_hiregop_name, bs('10'), rsl, rdh], [rdh, rsl]) +armtop("hiregop11", [bs('010001'), bs_hiregop_name, bs('11'), rsh, rdh], [rdh, rsh]) armtop("bx", [bs('010001'), bs('11'), bs('00'), rsl, dumrh]) armtop("bx", [bs('010001'), bs('11'), bs('01'), rsh, dumrh]) armtop("ldr", [bs('01001'), rdl, offpc8]) -armtop("ldrstr", [bs('0101'), bs_ldr_str_name, - trb, bs('0'), rol_noarg, rbl_deref, rdl], [rdl, rbl_deref]) -armtop("strh", [bs('0101'), bs('00'), bs('1'), - rol_noarg, rbl_deref, rdl], [rdl, rbl_deref]) -armtop("ldrh", [bs('0101'), bs('10'), bs('1'), - rol_noarg, rbl_deref, rdl], [rdl, rbl_deref]) -armtop("ldsb", [bs('0101'), bs('01'), bs('1'), - rol_noarg, rbl_deref, rdl], [rdl, rbl_deref]) -armtop("ldsh", [bs('0101'), bs('11'), bs('1'), - rol_noarg, rbl_deref, rdl], [rdl, rbl_deref]) -armtop("ldst", [bs('011'), trb, - bs_ldr_str_name, off5bw, rbl_deref, rdl], [rdl, rbl_deref]) -armtop("ldhsth", - [bs('1000'), bs_ldrh_strh_name, off5h, rbl_deref, rdl], [rdl, rbl_deref]) +armtop("ldrstr", [bs('0101'), bs_ldr_str_name, trb, bs('0'), rol_noarg, rbl_deref, rdl], [rdl, rbl_deref]) +armtop("strh", [bs('0101'), bs('00'), bs('1'), rol_noarg, rbl_deref, rdl], [rdl, rbl_deref]) +armtop("ldrh", [bs('0101'), bs('10'), bs('1'), rol_noarg, rbl_deref, rdl], [rdl, rbl_deref]) +armtop("ldsb", [bs('0101'), bs('01'), bs('1'), rol_noarg, rbl_deref, rdl], [rdl, rbl_deref]) +armtop("ldsh", [bs('0101'), bs('11'), bs('1'), rol_noarg, rbl_deref, rdl], [rdl, rbl_deref]) +armtop("ldst", [bs('011'), trb, bs_ldr_str_name, off5bw, rbl_deref, rdl], [rdl, rbl_deref]) +armtop("ldhsth", [bs('1000'), bs_ldrh_strh_name, off5h, rbl_deref, rdl], [rdl, rbl_deref]) armtop("ldstsp", [bs('1001'), bs_ldstsp_name, rdl, offsp8], [rdl, offsp8]) armtop("add", [bs('1010'), sppc, rdl, off8sppc], [rdl, sppc, off8sppc]) armtop("addsp", [bs('10110000'), bs_addsubsp_name, sp, off7], [sp, off7]) -armtop("pushpop", - [bs('1011'), bs_pushpop_name, bs('10'), pclr, trlistpclr], [trlistpclr]) +armtop("pushpop", [bs('1011'), bs_pushpop_name, bs('10'), pclr, trlistpclr], [trlistpclr]) armtop("btransfersp", [bs('1100'), bs_tbtransfer_name, rbl_wb, trlist]) armtop("br", [bs('1101'), bs_br_name, offs8]) armtop("blx", [bs("01000111"), bs('10'), rnl, bs('000')]) @@ -2141,6 +2366,7 @@ armtop("svc", [bs('11011111'), imm8]) armtop("b", [bs('11100'), offs11]) armtop("und", [bs('1101'), bs('1110'), imm8_d1]) +armtop("rev", [bs('10111010'), bs('00'), rsl, rdl], [rdl, rsl]) armtop("uxtb", [bs('10110010'), bs('11'), rml, rdl], [rdl, rml]) armtop("uxth", [bs('10110010'), bs('10'), rml, rdl], [rdl, rml]) @@ -2149,18 +2375,25 @@ armtop("sxth", [bs('10110010'), bs('00'), rml, rdl], [rdl, rml]) # thumb2 ###################### # - # ARM Architecture Reference Manual Thumb-2 Supplement -armt_gpreg_shift_off = Group( - gpregs_nosppc.parser + allshifts_t_armt + base_expr -).setParseAction(shift2expr) +armt_gpreg_shift_off = (gpregs_nosppc.parser + allshifts_t_armt + (gpregs.parser | int_1_31)).setParseAction(cb_shift) + + armt_gpreg_shift_off |= gpregs_nosppc.parser class arm_gpreg_nosppc(arm_reg): reg_info = gpregs_nosppc + parser = reg_info.parser + def decode(self, v): + ret = super(arm_gpreg_nosppc, self).decode(v) + if ret is False: + return False + if self.expr == reg_dum: + return False + return True class armt_gpreg_rm_shift_off(arm_reg): @@ -2171,6 +2404,8 @@ class armt_gpreg_rm_shift_off(arm_reg): if v >= len(gpregs_nosppc.expr): return False r = gpregs_nosppc.expr[v] + if r == reg_dum: + return False i = int(self.parent.imm5_3.value) << 2 i |= int(self.parent.imm5_2.value) @@ -2185,11 +2420,15 @@ class armt_gpreg_rm_shift_off(arm_reg): def encode(self): e = self.expr if isinstance(e, ExprId): - self.value = gpregs_nosppc.index(e) + if e not in gpregs_nosppc.expr: + return False + self.value = gpregs_nosppc.expr.index(e) self.parent.stype.value = 0 self.parent.imm5_3.value = 0 self.parent.imm5_2.value = 0 return True + if not e.is_op(): + return False shift = e.op r = gpregs_nosppc.expr.index(e.args[0]) self.value = r @@ -2239,11 +2478,13 @@ class armt2_imm12(arm_imm): self.expr = ExprInt((v << 24) | (v << 16) | (v << 8) | v, 32) return True r = v >> 7 - v = v & 0xFF + v = 0x80 | (v & 0x7F) self.expr = ExprInt(myror32(v, r), 32) return True def encode(self): + if not self.expr.is_int(): + return False v = int(self.expr) value = None # simple encoding @@ -2265,8 +2506,8 @@ class armt2_imm12(arm_imm): # rol encoding for i in xrange(32): o = myrol32(v, i) - if 0 <= o < 0x100 and o & 0x80: - value = (i << 7) | o + if 0x80 <= o <= 0xFF: + value = (i << 7) | (o & 0x7F) break if value is None: log.debug('cannot encode imm12') @@ -2277,6 +2518,108 @@ class armt2_imm12(arm_imm): return True + + +class armt4_imm12(arm_imm): + + def decode(self, v): + v = v & self.lmask + v |= int(self.parent.imm12_3.value) << 8 + v |= int(self.parent.imm12_1.value) << 11 + self.expr = ExprInt(v, 32) + return True + + def encode(self): + if not self.expr.is_int(): + return False + value = int(self.expr) + self.value = value & self.lmask + self.parent.imm12_3.value = (value >> 8) & self.parent.imm12_3.lmask + self.parent.imm12_1.value = (value >> 11) & self.parent.imm12_1.lmask + return True + + + + +class armt2_imm16(arm_imm): + + def decode(self, v): + v = v & self.lmask + v |= int(self.parent.imm16_3.value) << 8 + v |= int(self.parent.imm16_1.value) << 11 + v |= int(self.parent.imm16_4.value) << 12 + self.expr = ExprInt(v, 32) + return True + + def encode(self): + if not self.expr.is_int(): + return False + value = int(self.expr) + self.value = value & self.lmask + self.parent.imm16_3.value = (value >> 8) & self.parent.imm16_3.lmask + self.parent.imm16_1.value = (value >> 11) & self.parent.imm16_1.lmask + self.parent.imm16_4.value = (value >> 12) & self.parent.imm16_4.lmask + return True + + +class armt2_lsb5(arm_imm): + + def decode(self, v): + v = v & self.lmask + v |= int(self.parent.lsb5_3.value) << 2 + self.expr = ExprInt(v, 32) + return True + + def encode(self): + if not self.expr.is_int(): + return False + value = int(self.expr) + self.value = value & self.lmask + self.parent.lsb5_3.value = (value >> 2) & self.parent.lsb5_3.lmask + return True + + +class armt_widthm1(arm_imm): + parser = base_expr + + def decodeval(self, v): + return v + 1 + + def encodeval(self, v): + if v <= 0: + return False + return v - 1 + + + + +class armt2_off20(arm_imm): + + def decode(self, v): + v = v & self.lmask + v <<= 1 + v |= int(self.parent.off20_6.value) << 12 + v |= int(self.parent.off20_j1.value) << 18 + v |= int(self.parent.off20_j2.value) << 19 + v |= int(self.parent.off20_s.value) << 20 + self.expr = ExprInt(v, 32) + return True + + def encode(self): + if not self.expr.is_int(): + return False + value = int(self.expr) + if value & 1: + return False + self.value = (value >> 1) & self.lmask + self.parent.off20_6.value = (value >> 12) & self.parent.off20_6.lmask + self.parent.off20_j1.value = (value >> 18) & self.parent.off20_j1.lmask + self.parent.off20_j2.value = (value >> 19) & self.parent.off20_j2.lmask + self.parent.off20_s.value = (value >> 20) & self.parent.off20_s.lmask + return True + + + class armt2_imm10l(arm_imm): def decode(self, v): @@ -2289,8 +2632,7 @@ class armt2_imm10l(arm_imm): i1, i2 = j1 ^ s ^ 1, j2 ^ s ^ 1 - v = (s << 24) | (i1 << 23) | ( - i2 << 22) | (imm10h << 12) | (imm10l << 2) + v = (s << 24) | (i1 << 23) | (i2 << 22) | (imm10h << 12) | (imm10l << 2) v = sign_ext(v, 25, 32) self.expr = ExprInt(v, 32) return True @@ -2305,8 +2647,7 @@ class armt2_imm10l(arm_imm): v = (-v) & 0xffffffff if v > (1 << 26): return False - i1, i2, imm10h, imm10l = (v >> 23) & 1, ( - v >> 22) & 1, (v >> 12) & 0x3ff, (v >> 2) & 0x3ff + i1, i2, imm10h, imm10l = (v >> 23) & 1, (v >> 22) & 1, (v >> 12) & 0x3ff, (v >> 2) & 0x3ff j1, j2 = i1 ^ s ^ 1, i2 ^ s ^ 1 self.parent.sign.value = s self.parent.j1.value = j1 @@ -2328,24 +2669,24 @@ class armt2_imm11l(arm_imm): i1, i2 = j1 ^ s ^ 1, j2 ^ s ^ 1 - v = (s << 24) | (i1 << 23) | ( - i2 << 22) | (imm10h << 12) | (imm11l << 1) + v = (s << 24) | (i1 << 23) | (i2 << 22) | (imm10h << 12) | (imm11l << 1) v = sign_ext(v, 25, 32) - self.expr = ExprInt(v, 32) + self.expr = ExprInt(v + 4, 32) return True def encode(self): if not isinstance(self.expr, ExprInt): return False - v = self.expr.arg.arg + v = self.expr.arg.arg - 4 s = 0 if v & 0x80000000: s = 1 v = (-v) & 0xffffffff - if v > (1 << 26): + if v >= (1 << 26): + return False + if v & 1: return False - i1, i2, imm10h, imm11l = (v >> 23) & 1, ( - v >> 22) & 1, (v >> 12) & 0x3ff, (v >> 1) & 0x7ff + i1, i2, imm10h, imm11l = (v >> 23) & 1, (v >> 22) & 1, (v >> 12) & 0x3ff, (v >> 1) & 0x7ff j1, j2 = i1 ^ s ^ 1, i2 ^ s ^ 1 self.parent.sign.value = s self.parent.j1.value = j1 @@ -2355,22 +2696,86 @@ class armt2_imm11l(arm_imm): return True + +class armt2_imm6_11l(arm_imm): + + def decode(self, v): + v = v & self.lmask + s = self.parent.sign.value + j1 = self.parent.j1.value + j2 = self.parent.j2.value + imm6h = self.parent.imm6h.value + imm11l = v + + v = (s << 20) | (j2 << 19) | (j1 << 18) | (imm6h << 12) | (imm11l << 1) + v = sign_ext(v, 21, 32) + self.expr = ExprInt(v + 4, 32) + return True + + def encode(self): + if not isinstance(self.expr, ExprInt): + return False + v = self.expr.arg.arg - 4 + s = 0 + if v & 0x80000000: + s = 1 + v = (-v) & 0xffffffff + if v >= (1 << 22): + return False + if v & 1: + return False + i2, i1, imm6h, imm11l = (v >> 19) & 1, (v >> 18) & 1, (v >> 12) & 0x3f, (v >> 1) & 0x7ff + self.parent.sign.value = s + self.parent.j1.value = i1 + self.parent.j2.value = i2 + self.parent.imm6h.value = imm6h + self.value = imm11l + return True + + + imm12_1 = bs(l=1, fname="imm12_1", order=1) imm12_3 = bs(l=3, fname="imm12_3", order=1) imm12_8 = bs(l=8, cls=(armt2_imm12,), fname="imm", order=2) +imm12_8_t4 = bs(l=8, cls=(armt4_imm12,), fname="imm", order=2) + + +imm16_1 = bs(l=1, fname="imm16_1", order=1) +imm16_3 = bs(l=3, fname="imm16_3", order=1) +imm16_4 = bs(l=4, fname="imm16_4", order=1) +imm16_8 = bs(l=8, cls=(armt2_imm16,), fname="imm", order=2) + + imm5_3 = bs(l=3, fname="imm5_3") imm5_2 = bs(l=2, fname="imm5_2") imm_stype = bs(l=2, fname="stype") +imm_stype_00 = bs('00', fname="stype") +imm_stype_11 = bs('11', fname="stype") + + imm1 = bs(l=1, fname="imm1") + +off20_6 = bs(l=6, fname="off20_6", order=1) +off20_11 = bs(l=11, cls=(armt2_off20,), fname="imm", order=2) + + + +lsb5_3 = bs(l=3, fname="lsb5_3", order=1) +lsb5_2 = bs(l=2, cls=(armt2_lsb5,), fname="imm", order=2) + +widthm1 = bs(l=5, cls=(armt_widthm1,), fname="imm", order=2) + + + class armt_imm5_1(arm_imm): def decode(self, v): - v = sign_ext(((self.parent.imm1.value << 5) | v) << 1, 7, 32) + v = ((self.parent.imm1.value << 5) | v) << 1 self.expr = ExprInt(v, 32) return True @@ -2378,8 +2783,8 @@ class armt_imm5_1(arm_imm): if not isinstance(self.expr, ExprInt): return False v = self.expr.arg.arg - if v & 0x80000000: - v &= (1 << 7) - 1 + if v & 0x1: + return False self.parent.imm1.value = (v >> 6) & 1 self.value = (v >> 1) & 0x1f return True @@ -2389,7 +2794,7 @@ aif_expr = [ExprId(x, 32) if x != None else None for x in aif_str] aif_reg = reg_info(aif_str, aif_expr) -class armt_aif(reg_noarg, m_arg): +class armt_aif(reg_noarg, arm_arg): reg_info = aif_reg parser = reg_info.parser @@ -2404,12 +2809,292 @@ class armt_aif(reg_noarg, m_arg): return ret return self.value != 0 - def fromstring(self, s, parser_result=None): - start, stop = super(armt_aif, self).fromstring(s, parser_result) + def fromstring(self, text, symbol_pool, parser_result=None): + start, stop = super(armt_aif, self).fromstring(text, symbol_pool, parser_result) if self.expr.name == "X": return None, None return start, stop + +class armt_it_arg(arm_arg): + arg_E = ExprId('E', 1) + arg_NE = ExprId('NE', 1) + + def decode(self, v): + if v: + return self.arg_E + else: + return self.arg_NE + + def encode(self): + if self.expr == self.arg_E: + return 1 + elif self.expr == self.arg_NE: + return 0 + +class armt_itmask(bs_divert): + prio = 2 + + def divert(self, i, candidates): + out = [] + for cls, _, bases, dct, fields in candidates: + for value in xrange(1, 0x10): + nfields = fields[:] + s = int2bin(value, self.args['l']) + args = dict(self.args) + args.update({'strbits': s}) + f = bs(**args) + nfields[i] = f + inv = nfields[-2].value + ndct = dict(dct) + ndct['name'] = self.modname(ndct['name'], value, inv) + out.append((cls, ndct['name'], bases, ndct, nfields)) + return out + + def modname(self, name, value, inv): + count = 0 + while value & (1 << count) == 0: + count += 1 + out = [] + values = ['E', 'T'] + if inv== 1: + values.reverse() + for index in xrange(3 - count): + if value & (1 << (3 - index)): + out.append(values[0]) + else: + out.append(values[1]) + return name + "".join(out) + + + +class armt_cond_lsb(bs_divert): + prio = 2 + + def divert(self, i, candidates): + out = [] + for cls, _, bases, dct, fields in candidates: + for value in xrange(2): + nfields = fields[:] + s = int2bin(value, self.args['l']) + args = dict(self.args) + args.update({'strbits': s}) + f = bs(**args) + nfields[i] = f + ndct = dict(dct) + out.append((cls, ndct['name'], bases, ndct, nfields)) + return out + + +cond_expr = [ExprId(x, 32) for x in cond_list_full] +cond_info = reg_info(cond_list_full, cond_expr) + +class armt_cond_arg(arm_arg): + parser = cond_info.parser + + def decode(self, v): + v = (v << 1) | self.parent.condlsb.value + self.expr = ExprId(cond_list_full[v], 32) + return True + + def encode(self): + index = cond_list_full.index(self.expr.name) + self.value = index >> 1 + if index & 1 != self.parent.condlsb.value: + return False + return True + + +class armt_op2imm(arm_imm8_12): + parser = deref + + def str_to_imm_rot_form(self, s, neg=False): + if neg: + s = -s & 0xffffffff + if 0 <= s < (1 << 12): + return s + return None + + def decodeval(self, v): + return v + + def encodeval(self, v): + return v + + def decode(self, v): + val = v & self.lmask + val = self.decodeval(val) + if val is False: + return False + imm = val + if self.parent.updown.value == 0: + imm = -imm + if self.parent.ppi.value == 0 and self.parent.wback.value == 0: + return False + if self.parent.ppi.value: + e = ExprOp('preinc', self.parent.rn.expr, ExprInt(imm, 32)) + if self.parent.wback.value == 1: + e = ExprOp('wback', e) + else: + e = ExprOp('postinc', self.parent.rn.expr, ExprInt(imm, 32)) + self.expr = ExprMem(e, 32) + return True + + def encode(self): + self.parent.updown.value = 1 + self.parent.wback.value = 0 + + e = self.expr + assert(isinstance(e, ExprMem)) + e = e.arg + if e.op == 'wback': + self.parent.wback.value = 1 + e = e.args[0] + if e.op == "postinc": + self.parent.ppi.value = 0 + self.parent.wback.value = 1 + elif e.op == "preinc": + self.parent.ppi.value = 1 + else: + # XXX default + self.parent.ppi.value = 1 + + self.parent.rn.expr = e.args[0] + + if len(e.args) == 1: + self.value = 0 + return True + # pure imm + if isinstance(e.args[1], ExprInt): + val = self.str_to_imm_rot_form(int(e.args[1])) + if val is None: + val = self.str_to_imm_rot_form(int(e.args[1]), True) + if val is None: + log.debug('cannot encode inm') + return False + self.parent.updown.value = 0 + val = self.encodeval(val) + if val is False: + return False + self.value = val + return True + # pure reg + if isinstance(e.args[1], ExprId): + rm = gpregs.expr.index(e.args[1]) + shift_kind = 0 + shift_type = 0 + amount = 0 + val = (((((amount << 2) | shift_type) << 1) | shift_kind) << 4) | rm + val = self.encodeval(val) + if val is False: + return False + self.value = val + return True + return False + + +class armt_op2imm00(armt_op2imm): + + def decodeval(self, v): + return v << 2 + + def encodeval(self, v): + if v & 3: + return False + return v >> 2 + + +class armt_deref_reg(arm_imm8_12): + parser = deref + + def decode(self, v): + base = self.parent.rn.expr + off = gpregs.expr[v] + if self.parent.imm.value != 0: + off = off << ExprInt(self.parent.imm.value, 32) + e = ExprMem(ExprOp('preinc', base, off), 8) + self.expr = e + return True + + def encode(self): + if not isinstance(self.expr, ExprMem): + return False + ptr = self.expr.arg + if not ptr.is_op('preinc'): + return False + if len(ptr.args) != 2: + return False + base, off = ptr.args + if base.is_id() and off.is_id(): + self.parent.rn.expr = base + self.parent.imm.value = 0 + self.value = gpregs.expr.index(off) + elif off.is_int(): + return False + elif off.is_op('<<'): + if len(off.args) != 2: + return False + reg, off = off.args + self.parent.rn.expr = base + self.parent.imm.value = 0 + self.value = gpregs.expr.index(reg) + off = int(off) + if off > self.parent.imm.lmask: + return False + self.parent.imm.value = off + return True + + +class armt_deref_reg_reg(arm_arg): + parser = deref_reg_reg + reg_info = gpregs + + def decode(self, v): + expr = self.reg_info.expr[v] + expr = ExprMem(self.parent.rn.expr + expr, 8) + self.expr = expr + return True + + def encode(self): + expr = self.expr + if not expr.is_mem(): + return False + ptr = expr.arg + if not ptr.is_op('+') or len(ptr.args) != 2: + return False + reg1, reg2 = ptr.args + self.parent.rn.expr = reg1 + self.value = self.reg_info.expr.index(reg2) + return True + + +class armt_deref_reg_reg_lsl_1(arm_reg): + parser = deref_reg_reg_lsl_1 + reg_info = gpregs + + def decode(self, v): + expr = self.reg_info.expr[v] + expr = ExprMem(self.parent.rn.expr + (expr << ExprInt(1, 32)), 16) + self.expr = expr + return True + + def encode(self): + expr = self.expr + if not expr.is_mem(): + return False + ptr = expr.arg + if not ptr.is_op('+') or len(ptr.args) != 2: + return False + reg1, reg_shift = ptr.args + self.parent.rn.expr = reg1 + if not reg_shift.is_op('<<') or len(reg_shift.args) != 2: + return False + if reg_shift.args[1] != ExprInt(1, 32): + return False + self.value = self.reg_info.expr.index(reg_shift.args[0]) + return True + + aif = bs(l=3, cls=(armt_aif,)) @@ -2419,25 +3104,160 @@ tsign = bs(l=1, fname="sign") tj1 = bs(l=1, fname="j1") tj2 = bs(l=1, fname="j2") +timm6h = bs(l=6, fname="imm6h") timm10H = bs(l=10, fname="imm10h") timm10L = bs(l=10, cls=(armt2_imm10l,), fname="imm10l") timm11L = bs(l=11, cls=(armt2_imm11l,), fname="imm11l") +timm6h11l = bs(l=11, cls=(armt2_imm6_11l,), fname="imm6h11l") + +itcond = bs(l=4, fname="itcond") +itmask = armt_itmask(l=4, fname="itmask") +bs_cond_arg_msb = bs(l=3, cls=(armt_cond_arg,)) + + +condlsb = armt_cond_lsb(l=1, fname="condlsb") + +deref_immpuw = bs(l=8, cls=(armt_op2imm,)) +deref_immpuw00 = bs(l=8, cls=(armt_op2imm00,)) + -armtop("adc", [bs('11110'), imm12_1, bs('0'), bs('1010'), scc, rn_nosppc, - bs('0'), imm12_3, rd_nosppc, imm12_8]) -armtop("adc", [bs('11101'), bs('01'), bs('1010'), scc, rn_nosppc, - bs('0'), imm5_3, rd_nosppc, imm5_2, imm_stype, rm_sh]) -armtop("bl", [bs('11110'), tsign, timm10H, - bs('11'), tj1, bs('1'), tj2, timm11L]) -armtop("blx", [bs('11110'), tsign, timm10H, - bs('11'), tj1, bs('0'), tj2, timm10L, bs('0')]) +rm_deref_reg = bs(l=4, cls=(armt_deref_reg,)) + +bs_deref_reg_reg = bs(l=4, cls=(armt_deref_reg_reg,)) +bs_deref_reg_reg_lsl_1 = bs(l=4, cls=(armt_deref_reg_reg_lsl_1,)) + + +class armt_barrier_option(reg_noarg, arm_arg): + reg_info = barrier_info + parser = reg_info.parser + + def decode(self, v): + v = v & self.lmask + if v not in self.reg_info.dct_expr: + return False + self.expr = self.reg_info.dct_expr[v] + return True + + def encode(self): + if not self.expr in self.reg_info.dct_expr_inv: + log.debug("cannot encode reg %r", self.expr) + return False + self.value = self.reg_info.dct_expr_inv[self.expr] + if self.value > self.lmask: + log.debug("cannot encode field value %x %x", + self.value, self.lmask) + return False + return True + + def check_fbits(self, v): + return v & self.fmask == self.fbits + +barrier_option = bs(l=4, cls=(armt_barrier_option,)) + +armtop("adc", [bs('11110'), imm12_1, bs('0'), bs('1010'), scc, rn_nosppc, bs('0'), imm12_3, rd_nosppc, imm12_8]) +armtop("adc", [bs('11101'), bs('01'), bs('1010'), scc, rn_nosppc, bs('0'), imm5_3, rd_nosppc, imm5_2, imm_stype, rm_sh]) +armtop("bl", [bs('11110'), tsign, timm10H, bs('11'), tj1, bs('1'), tj2, timm11L]) +armtop("blx", [bs('11110'), tsign, timm10H, bs('11'), tj1, bs('0'), tj2, timm10L, bs('0')]) armtop("cbz", [bs('101100'), imm1, bs('1'), imm5_off, rnl], [rnl, imm5_off]) armtop("cbnz", [bs('101110'), imm1, bs('1'), imm5_off, rnl], [rnl, imm5_off]) armtop("bkpt", [bs('1011'), bs('1110'), imm8]) + +armtop("it", [bs('10111111'), bs_cond_arg_msb, condlsb, itmask]) + + armtop("nop", [bs8(0xBF),bs8(0x0)]) armtop("wfi", [bs8(0xBF),bs8(0x30)]) armtop("cpsid", [bs8(0xB6),bs('0111'), bs('0'), aif], [aif]) armtop("cpsie", [bs8(0xB6),bs('0110'), bs('0'), aif], [aif]) + +armtop("push", [bs('1110100'), bs('10'), bs('0'), bs('1'), bs('0'), bs('1101'), bs('0'), pclr, bs('0'), trlist13], [trlist13]) +armtop("pop", [bs('1110100'), bs('01'), bs('0'), bs('1'), bs('1'), bs('1101'), pc_in, lr_in, bs('0'), trlist13pclr], [trlist13pclr]) +armtop("mov", [bs('11110'), imm12_1, bs('00010'), scc, bs('1111'), bs('0'), imm12_3, rd_nosppc, imm12_8]) +armtop("asr", [bs('11111010'), bs('0100'), rm, bs('1111'), rd, bs('0000'), rs], [rd, rm, rs]) +armtop("lsl", [bs('11111010'), bs('0000'), rm, bs('1111'), rd, bs('0000'), rs], [rd, rm, rs]) +armtop("sel", [bs('11111010'), bs('1010'), rm, bs('1111'), rd, bs('1000'), rs], [rd, rm, rs]) +armtop("rev", [bs('11111010'), bs('1001'), rm, bs('1111'), rd, bs('1000'), rm_cp], [rd, rm]) +armtop("uadd8", [bs('111110101000'), rn, bs('1111'), rd, bs('0100'), rm], [rd, rn, rm]) +armtop("mvn", [bs('11101010011'), scc, bs('11110'), imm5_3, rd_nosppc, imm5_2, imm_stype, rm_sh] ) +armtop("and", [bs('11101010000'), scc, rn_nosppc, bs('0'), imm5_3, rd_nosppc, imm5_2, imm_stype, rm_sh], [rd_nosppc, rn_nosppc, rm_sh] ) +armtop("orr", [bs('11101010010'), scc, rn_nosppc, bs('0'), imm5_3, rd_nosppc, imm5_2, imm_stype, rm_sh], [rd_nosppc, rn_nosppc, rm_sh] ) +armtop("bic", [bs('11101010001'), scc, rn_nosppc, bs('0'), imm5_3, rd_nosppc, imm5_2, imm_stype, rm_sh], [rd_nosppc, rn_nosppc, rm_sh] ) +armtop("add", [bs('11101011000'), scc, rn_nosppc, bs('0'), imm5_3, rd_nosppc, imm5_2, imm_stype, rm_sh], [rd_nosppc, rn_nosppc, rm_sh] ) +armtop("sub", [bs('11101011101'), scc, rn_nosppc, bs('0'), imm5_3, rd_nosppc, imm5_2, imm_stype, rm_sh], [rd_nosppc, rn_nosppc, rm_sh] ) +armtop("eor", [bs('11101010100'), scc, rn_nosppc, bs('0'), imm5_3, rd_nosppc, imm5_2, imm_stype, rm_sh], [rd_nosppc, rn_nosppc, rm_sh] ) +armtop("rsb", [bs('11101011110'), scc, rn, bs('0'), imm5_3, rd, imm5_2, imm_stype, rm_sh], [rd, rn, rm_sh] ) +armtop("orn", [bs('11101010011'), scc, rn_nopc, bs('0'), imm5_3, rd, imm5_2, imm_stype, rm_sh], [rd, rn_nopc, rm_sh] ) +# lsl +armtop("mov", [bs('11101010010'), scc, bs('1111'), bs('0'), imm5_3, rd_nosppc, imm5_2, imm_stype_00, rm_sh], [rd_nosppc, rm_sh] ) +armtop("mov", [bs('11101010010'), scc, bs('1111'), bs('0'), imm5_3, rd_nosppc, imm5_2, imm_stype_11, rm_sh], [rd_nosppc, rm_sh] ) + + +armtop("orr", [bs('11110'), imm12_1, bs('00010'), scc, rn_nosppc, bs('0'), imm12_3, rd, imm12_8] ) +armtop("add", [bs('11110'), imm12_1, bs('01000'), scc, rn, bs('0'), imm12_3, rd, imm12_8], [rd, rn, imm12_8]) +armtop("bic", [bs('11110'), imm12_1, bs('00001'), scc, rn_nosppc, bs('0'), imm12_3, rd, imm12_8], [rd, rn_nosppc, imm12_8]) +armtop("and", [bs('11110'), imm12_1, bs('00000'), scc, rn, bs('0'), imm12_3, rd_nopc, imm12_8], [rd_nopc, rn, imm12_8]) +armtop("sub", [bs('11110'), imm12_1, bs('01101'), scc, rn, bs('0'), imm12_3, rd_nopc, imm12_8], [rd_nopc, rn, imm12_8]) +armtop("add", [bs('11110'), imm12_1, bs('10000'), scc, rn_nosppc, bs('0'), imm12_3, rd, imm12_8_t4], [rd, rn_nosppc, imm12_8_t4]) +armtop("cmp", [bs('11110'), imm12_1, bs('01101'), bs('1'), rn, bs('0'), imm12_3, bs('1111'), imm12_8] ) + + +armtop("mvn", [bs('11110'), imm12_1, bs('00011'), scc, bs('1111'), bs('0'), imm12_3, rd, imm12_8]) +armtop("rsb", [bs('11110'), imm12_1, bs('01110'), scc, rn_nosppc, bs('0'), imm12_3, rd, imm12_8], [rd, rn_nosppc, imm12_8]) +armtop("sub", [bs('11110'), imm12_1, bs('101010'), rn_nosppc, bs('0'), imm12_3, rd, imm12_8_t4], [rd, rn_nosppc, imm12_8_t4]) +armtop("tst", [bs('11110'), imm12_1, bs('000001'), rn, bs('0'), imm12_3, bs('1111'), imm12_8], [rn, imm12_8]) + +armtop("mov", [bs('11110'), imm16_1, bs('100100'), imm16_4, bs('0'), imm16_3, rd, imm16_8] ) +armtop("movt", [bs('11110'), imm16_1, bs('101100'), imm16_4, bs('0'), imm16_3, rd, imm16_8] ) + +armtop("sdiv", [bs('111110111001'), rn, bs('1111'), rd, bs('1111'), rm], [rd, rn, rm] ) +armtop("udiv", [bs('111110111011'), rn, bs('1111'), rd, bs('1111'), rm], [rd, rn, rm] ) +armtop("mls", [bs('111110110000'), rn, ra, rd, bs('0001'), rm], [rd, rn, rm, ra] ) +armtop("mla", [bs('111110110000'), rn, ra_nopc, rd, bs('0000'), rm], [rd, rn, rm, ra_nopc] ) +armtop("mul", [bs('111110110000'), rn, bs('1111'), rd, bs('0000'), rm], [rd, rn, rm] ) + +armtop("smlabb", [bs('111110110001'), rn, ra_nopc, rd, bs('00'), bs('00'), rm], [rd, rn, rm, ra_nopc]) +armtop("smlabt", [bs('111110110001'), rn, ra_nopc, rd, bs('00'), bs('01'), rm], [rd, rn, rm, ra_nopc]) +armtop("smlatb", [bs('111110110001'), rn, ra_nopc, rd, bs('00'), bs('10'), rm], [rd, rn, rm, ra_nopc]) +armtop("smlatt", [bs('111110110001'), rn, ra_nopc, rd, bs('00'), bs('11'), rm], [rd, rn, rm, ra_nopc]) + +armtop("b", [bs('11110'), tsign, bm_cond_barmt, timm6h, bs('10'), tj1, bs('0'), tj2, timm6h11l], [timm6h11l]) +armtop("b", [bs('11110'), tsign, timm10H, bs('10'), tj1, bs('1'), tj2, timm11L], [timm11L]) + +armtop("ubfx", [bs('111100111100'), rn, bs('0'), lsb5_3, rd, lsb5_2, bs('0'), widthm1], [rd, rn, lsb5_2, widthm1]) +armtop("uxth", [bs('111110100001'), bs('1111'), bs('1111'), rd, bs('10'), rot2, rm_rot2], [rd, rm_rot2]) + + + +armtop("str", [bs('111110001100'), rn_deref, rt, off12], [rt, rn_deref]) +armtop("str", [bs('111110000100'), rn_noarg, rt, bs('000000'), imm2_noarg, rm_deref_reg], [rt, rm_deref_reg]) +armtop("str", [bs('111110000100'), rn_noarg, rt, bs('1'), ppi, updown, wback_no_t, deref_immpuw], [rt, deref_immpuw]) +armtop("strb", [bs('111110001000'), rn_deref, rt, off12], [rt, rn_deref]) +armtop("strb", [bs('111110000000'), rn_noarg, rt, bs('1'), ppi, updown, wback_no_t, deref_immpuw], [rt, deref_immpuw]) +armtop("strh", [bs('111110001010'), rn_deref, rt, off12], [rt, rn_deref]) +armtop("strh", [bs('111110000010'), rn_noarg, rt, bs('1'), ppi, updown, wback_no_t, deref_immpuw], [rt, deref_immpuw]) + +armtop("strd", [bs('1110100'), ppi, updown, bs('1'), wback_no_t, bs('0'), rn_nopc_noarg, rt, rt2, deref_immpuw00], [rt, rt2, deref_immpuw00]) +armtop("ldrd", [bs('1110100'), ppi, updown, bs('1'), wback_no_t, bs('1'), rn_nopc_noarg, rt, rt2, deref_immpuw00], [rt, rt2, deref_immpuw00]) + + +armtop("ldr", [bs('111110001101'), rn_deref, rt, off12], [rt, rn_deref]) +armtop("ldr", [bs('111110000101'), rn_noarg, rt, bs('1'), ppi, updown, wback_no_t, deref_immpuw], [rt, deref_immpuw]) +armtop("ldr", [bs('111110000101'), rn_noarg, rt, bs('000000'), imm2_noarg, rm_deref_reg], [rt, rm_deref_reg]) +armtop("ldrb", [bs('111110000001'), rn_noarg, rt, bs('000000'), imm2_noarg, rm_deref_reg], [rt, rm_deref_reg]) +armtop("ldrb", [bs('111110000001'), rn_noarg, rt, bs('1'), ppi, updown, wback_no_t, deref_immpuw], [rt, deref_immpuw]) +armtop("ldrb", [bs('111110001001'), rn_deref, rt_nopc, off12], [rt_nopc, rn_deref]) +armtop("ldrsb",[bs('111110011001'), rn_deref, rt, off12], [rt, rn_deref]) +armtop("ldrsh",[bs('111110011011'), rn_deref, rt, off12], [rt, rn_deref]) +armtop("ldrh", [bs('111110001011'), rn_deref, rt, off12], [rt, rn_deref]) +armtop("ldrh", [bs('111110000011'), rn_noarg, rt, bs('1'), ppi, updown, wback_no_t, deref_immpuw], [rt, deref_immpuw]) + +armtop("pld", [bs('111110001001'), rn_deref, bs('1111'), off12], [rn_deref]) +armtop("pldw", [bs('111110001011'), rn_deref, bs('1111'), off12], [rn_deref]) + +armtop("clz", [bs('111110101011'), rm, bs('1111'), rd, bs('1000'), rm_cp], [rd, rm]) +armtop("tbb", [bs('111010001101'), rn_noarg, bs('11110000000'), bs('0'), bs_deref_reg_reg], [bs_deref_reg_reg]) +armtop("tbh", [bs('111010001101'), rn_noarg, bs('11110000000'), bs('1'), bs_deref_reg_reg_lsl_1], [bs_deref_reg_reg_lsl_1]) +armtop("dsb", [bs('111100111011'), bs('1111'), bs('1000'), bs('1111'), bs('0100'), barrier_option]) diff --git a/miasm2/arch/arm/ira.py b/miasm2/arch/arm/ira.py index bfa9bad2..cfcb294c 100644 --- a/miasm2/arch/arm/ira.py +++ b/miasm2/arch/arm/ira.py @@ -47,11 +47,11 @@ class ir_a_armb(ir_a_armb_base, ir_a_arml): class ir_a_armtl(ir_armtl, ir_a_arml): - def __init__(self, symbol_pool): + def __init__(self, symbol_pool=None): ir_armtl.__init__(self, symbol_pool) self.ret_reg = self.arch.regs.R0 class ir_a_armtb(ir_a_armtl, ir_armtb, ir_a_armb): - def __init__(self, symbol_pool): + def __init__(self, symbol_pool=None): ir_armtb.__init__(self, symbol_pool) self.ret_reg = self.arch.regs.R0 diff --git a/miasm2/arch/arm/jit.py b/miasm2/arch/arm/jit.py index b07f2a38..1a37b7f1 100644 --- a/miasm2/arch/arm/jit.py +++ b/miasm2/arch/arm/jit.py @@ -3,7 +3,10 @@ import logging from miasm2.jitter.jitload import jitter, named_arguments from miasm2.core import asmblock from miasm2.core.utils import pck32, upck32 -from miasm2.arch.arm.sem import ir_armb, ir_arml +from miasm2.arch.arm.sem import ir_armb, ir_arml, ir_armtl, ir_armtb, cond_dct_inv, tab_cond +from miasm2.jitter.codegen import CGen +from miasm2.expression.expression import ExprId, ExprAff, ExprCond +from miasm2.ir.ir import IRBlock, AssignBlock log = logging.getLogger('jit_arm') hnd = logging.StreamHandler() @@ -11,7 +14,49 @@ hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) log.addHandler(hnd) log.setLevel(logging.CRITICAL) + + +class arm_CGen(CGen): + def __init__(self, ir_arch): + self.ir_arch = ir_arch + self.PC = self.ir_arch.arch.regs.PC + self.init_arch_C() + + + def block2assignblks(self, block): + """ + Return the list of irblocks for a native @block + @block: AsmBlock + """ + irblocks_list = [] + index = -1 + while index + 1 < len(block.lines): + index += 1 + instr = block.lines[index] + + if instr.name.startswith("IT"): + assignments = [] + label = self.ir_arch.get_instr_label(instr) + irblocks = [] + index, irblocks = self.ir_arch.do_it_block(label, index, block, assignments, True) + irblocks_list += irblocks + continue + + + assignblk_head, assignblks_extra = self.ir_arch.instr2ir(instr) + # Keep result in ordered list as first element is the assignblk head + # The remainings order is not really important + irblock_head = self.assignblk_to_irbloc(instr, assignblk_head) + irblocks = [irblock_head] + assignblks_extra + + for irblock in irblocks: + assert irblock.dst is not None + irblocks_list.append(irblocks) + return irblocks_list + + class jitter_arml(jitter): + C_Gen = arm_CGen def __init__(self, *args, **kwargs): sp = asmblock.AsmSymbolPool() @@ -69,9 +114,20 @@ class jitter_arml(jitter): jitter.init_run(self, *args, **kwargs) self.cpu.PC = self.pc + class jitter_armb(jitter_arml): + C_Gen = arm_CGen def __init__(self, *args, **kwargs): sp = asmblock.AsmSymbolPool() jitter.__init__(self, ir_armb(sp), *args, **kwargs) self.vm.set_big_endian() + + +class jitter_armtl(jitter_arml): + C_Gen = arm_CGen + + def __init__(self, *args, **kwargs): + sp = asmblock.AsmSymbolPool() + jitter.__init__(self, ir_armtl(sp), *args, **kwargs) + self.vm.set_little_endian() diff --git a/miasm2/arch/arm/regs.py b/miasm2/arch/arm/regs.py index 8587d7c2..dce4cb98 100644 --- a/miasm2/arch/arm/regs.py +++ b/miasm2/arch/arm/regs.py @@ -63,9 +63,27 @@ of_init = ExprId("of_init", size=1) cf_init = ExprId("cf_init", size=1) +reg_ge0 = 'ge0' +reg_ge1 = 'ge1' +reg_ge2 = 'ge2' +reg_ge3 = 'ge3' + +ge0 = ExprId(reg_ge0, size=1) +ge1 = ExprId(reg_ge1, size=1) +ge2 = ExprId(reg_ge2, size=1) +ge3 = ExprId(reg_ge3, size=1) + +ge0_init = ExprId("ge0_init", size=1) +ge1_init = ExprId("ge1_init", size=1) +ge2_init = ExprId("ge2_init", size=1) +ge3_init = ExprId("ge3_init", size=1) + +ge_regs = [ge0, ge1, ge2, ge3] + all_regs_ids = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, SP, LR, PC, zf, nf, of, cf, + ge0, ge1, ge2, ge3, exception_flags, bp_num ] @@ -83,6 +101,7 @@ all_regs_ids_init = [R0_init, R1_init, R2_init, R3_init, R8_init, R9_init, R10_init, R11_init, R12_init, SP_init, LR_init, PC_init, zf_init, nf_init, of_init, cf_init, + ge0_init, ge1_init, ge2_init, ge3_init, ExprInt(0, 32), ExprInt(0, 32) ] diff --git a/miasm2/arch/arm/sem.py b/miasm2/arch/arm/sem.py index 395eb1cb..9e4da3f6 100644 --- a/miasm2/arch/arm/sem.py +++ b/miasm2/arch/arm/sem.py @@ -3,6 +3,7 @@ from miasm2.ir.ir import IntermediateRepresentation, IRBlock, AssignBlock from miasm2.arch.arm.arch import mn_arm, mn_armt from miasm2.arch.arm.regs import * +from miasm2.jitter.csts import EXCEPT_DIV_BY_ZERO # liris.cnrs.fr/~mmrissa/lib/exe/fetch.php?media=armv7-a-r-manual.pdf EXCEPT_SOFT_BP = (1 << 1) @@ -114,7 +115,7 @@ def adc(ir, instr, a, b, c=None): dst = get_dst(a) if dst is not None: e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] def add(ir, instr, a, b, c=None): @@ -129,7 +130,7 @@ def add(ir, instr, a, b, c=None): dst = get_dst(a) if dst is not None: e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] def l_and(ir, instr, a, b, c=None): @@ -143,7 +144,7 @@ def l_and(ir, instr, a, b, c=None): dst = get_dst(a) if dst is not None: e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] def sub(ir, instr, a, b, c=None): @@ -155,7 +156,7 @@ def sub(ir, instr, a, b, c=None): dst = get_dst(a) if dst is not None: e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] def subs(ir, instr, a, b, c=None): @@ -169,7 +170,7 @@ def subs(ir, instr, a, b, c=None): dst = get_dst(a) if dst is not None: e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] def eor(ir, instr, a, b, c=None): @@ -181,7 +182,7 @@ def eor(ir, instr, a, b, c=None): dst = get_dst(a) if dst is not None: e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] def eors(ir, instr, a, b, c=None): @@ -194,7 +195,7 @@ def eors(ir, instr, a, b, c=None): dst = get_dst(a) if dst is not None: e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] def rsb(ir, instr, a, b, c=None): @@ -206,7 +207,7 @@ def rsb(ir, instr, a, b, c=None): dst = get_dst(a) if dst is not None: e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] def rsbs(ir, instr, a, b, c=None): @@ -220,7 +221,7 @@ def rsbs(ir, instr, a, b, c=None): dst = get_dst(a) if dst is not None: e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] def sbc(ir, instr, a, b, c=None): @@ -232,7 +233,7 @@ def sbc(ir, instr, a, b, c=None): dst = get_dst(a) if dst is not None: e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] def sbcs(ir, instr, a, b, c=None): @@ -246,7 +247,7 @@ def sbcs(ir, instr, a, b, c=None): dst = get_dst(a) if dst is not None: e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] def rsc(ir, instr, a, b, c=None): @@ -258,7 +259,7 @@ def rsc(ir, instr, a, b, c=None): dst = get_dst(a) if dst is not None: e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] def rscs(ir, instr, a, b, c=None): @@ -273,16 +274,14 @@ def rscs(ir, instr, a, b, c=None): dst = get_dst(a) if dst is not None: e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] -def tst(ir, instr, a, b, c=None): +def tst(ir, instr, a, b): e = [] - if c is None: - b, c = a, b - r = b & c + r = a & b e += update_flag_logic(r) - return e + return e, [] def teq(ir, instr, a, b, c=None): @@ -291,7 +290,7 @@ def teq(ir, instr, a, b, c=None): b, c = a, b r = b ^ c e += update_flag_logic(r) - return e + return e, [] def l_cmp(ir, instr, a, b, c=None): @@ -301,7 +300,7 @@ def l_cmp(ir, instr, a, b, c=None): r = b - c e += update_flag_arith(r) e += update_flag_sub(b, c, r) - return e + return e, [] def cmn(ir, instr, a, b, c=None): @@ -311,7 +310,7 @@ def cmn(ir, instr, a, b, c=None): r = b + c e += update_flag_arith(r) e += update_flag_add(b, c, r) - return e + return e, [] def orr(ir, instr, a, b, c=None): @@ -323,7 +322,19 @@ def orr(ir, instr, a, b, c=None): dst = get_dst(a) if dst is not None: e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] + + +def orn(ir, instr, a, b, c=None): + e = [] + if c is None: + b, c = a, b + r = ~(b | c) + e.append(ExprAff(a, r)) + dst = get_dst(a) + if dst is not None: + e.append(ExprAff(ir.IRDst, r)) + return e, [] def orrs(ir, instr, a, b, c=None): @@ -336,7 +347,7 @@ def orrs(ir, instr, a, b, c=None): dst = get_dst(a) if dst is not None: e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] def mov(ir, instr, a, b): @@ -344,7 +355,7 @@ def mov(ir, instr, a, b): dst = get_dst(a) if dst is not None: e.append(ExprAff(ir.IRDst, b)) - return e + return e, [] def movt(ir, instr, a, b): @@ -353,7 +364,7 @@ def movt(ir, instr, a, b): dst = get_dst(a) if dst is not None: e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] def movs(ir, instr, a, b): @@ -364,7 +375,7 @@ def movs(ir, instr, a, b): dst = get_dst(a) if dst is not None: e.append(ExprAff(ir.IRDst, b)) - return e + return e, [] def mvn(ir, instr, a, b): @@ -373,7 +384,7 @@ def mvn(ir, instr, a, b): dst = get_dst(a) if dst is not None: e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] def mvns(ir, instr, a, b): @@ -385,7 +396,7 @@ def mvns(ir, instr, a, b): dst = get_dst(a) if dst is not None: e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] def neg(ir, instr, a, b): @@ -395,11 +406,10 @@ def neg(ir, instr, a, b): dst = get_dst(a) if dst is not None: e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] def negs(ir, instr, a, b): - e = subs(ir, instr, a, ExprInt(0, b.size), b) - return e + return subs(ir, instr, a, ExprInt(0, b.size), b) def bic(ir, instr, a, b, c=None): e = [] @@ -410,7 +420,7 @@ def bic(ir, instr, a, b, c=None): dst = get_dst(a) if dst is not None: e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] def bics(ir, instr, a, b, c=None): @@ -423,7 +433,70 @@ def bics(ir, instr, a, b, c=None): dst = get_dst(a) if dst is not None: e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] + + +def sdiv(ir, instr, a, b, c=None): + e = [] + if c is None: + b, c = a, b + + lbl_div = ExprId(ir.gen_label(), ir.IRDst.size) + lbl_except = ExprId(ir.gen_label(), ir.IRDst.size) + lbl_next = ExprId(ir.get_next_label(instr), ir.IRDst.size) + + e.append(ExprAff(ir.IRDst, ExprCond(c, lbl_div, lbl_except))) + + do_except = [] + do_except.append(ExprAff(exception_flags, ExprInt(EXCEPT_DIV_BY_ZERO, exception_flags.size))) + do_except.append(ExprAff(ir.IRDst, lbl_next)) + blk_except = IRBlock(lbl_except.name, [AssignBlock(do_except, instr)]) + + + + r = ExprOp("idiv", b, c) + do_div = [] + do_div.append(ExprAff(a, r)) + dst = get_dst(a) + if dst is not None: + do_div.append(ExprAff(ir.IRDst, r)) + + do_div.append(ExprAff(ir.IRDst, lbl_next)) + blk_div = IRBlock(lbl_div.name, [AssignBlock(do_div, instr)]) + + return e, [blk_div, blk_except] + + +def udiv(ir, instr, a, b, c=None): + e = [] + if c is None: + b, c = a, b + + + + lbl_div = ExprId(ir.gen_label(), ir.IRDst.size) + lbl_except = ExprId(ir.gen_label(), ir.IRDst.size) + lbl_next = ExprId(ir.get_next_label(instr), ir.IRDst.size) + + e.append(ExprAff(ir.IRDst, ExprCond(c, lbl_div, lbl_except))) + + do_except = [] + do_except.append(ExprAff(exception_flags, ExprInt(EXCEPT_DIV_BY_ZERO, exception_flags.size))) + do_except.append(ExprAff(ir.IRDst, lbl_next)) + blk_except = IRBlock(lbl_except.name, [AssignBlock(do_except, instr)]) + + + r = ExprOp("udiv", b, c) + do_div = [] + do_div.append(ExprAff(a, r)) + dst = get_dst(a) + if dst is not None: + do_div.append(ExprAff(ir.IRDst, r)) + + do_div.append(ExprAff(ir.IRDst, lbl_next)) + blk_div = IRBlock(lbl_div.name, [AssignBlock(do_div, instr)]) + + return e, [blk_div, blk_except] def mla(ir, instr, a, b, c, d): @@ -433,7 +506,7 @@ def mla(ir, instr, a, b, c, d): dst = get_dst(a) if dst is not None: e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] def mlas(ir, instr, a, b, c, d): @@ -444,10 +517,20 @@ def mlas(ir, instr, a, b, c, d): dst = get_dst(a) if dst is not None: e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] + + +def mls(ir, instr, a, b, c, d): + e = [] + r = d - (b * c) + e.append(ExprAff(a, r)) + dst = get_dst(a) + if dst is not None: + e.append(ExprAff(ir.IRDst, r)) + return e, [] -def mul(ir, instr, a, b, c = None): +def mul(ir, instr, a, b, c=None): e = [] if c is None: b, c = a, b @@ -456,10 +539,10 @@ def mul(ir, instr, a, b, c = None): dst = get_dst(a) if dst is not None: e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] -def muls(ir, instr, a, b, c = None): +def muls(ir, instr, a, b, c=None): e = [] if c is None: b, c = a, b @@ -469,7 +552,7 @@ def muls(ir, instr, a, b, c = None): dst = get_dst(a) if dst is not None: e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] def umull(ir, instr, a, b, c, d): e = [] @@ -477,7 +560,7 @@ def umull(ir, instr, a, b, c, d): e.append(ExprAff(a, r[0:32])) e.append(ExprAff(b, r[32:64])) # r15/IRDst not allowed as output - return e + return e, [] def umlal(ir, instr, a, b, c, d): e = [] @@ -485,7 +568,7 @@ def umlal(ir, instr, a, b, c, d): e.append(ExprAff(a, r[0:32])) e.append(ExprAff(b, r[32:64])) # r15/IRDst not allowed as output - return e + return e, [] def smull(ir, instr, a, b, c, d): e = [] @@ -493,7 +576,7 @@ def smull(ir, instr, a, b, c, d): e.append(ExprAff(a, r[0:32])) e.append(ExprAff(b, r[32:64])) # r15/IRDst not allowed as output - return e + return e, [] def smlal(ir, instr, a, b, c, d): e = [] @@ -501,13 +584,13 @@ def smlal(ir, instr, a, b, c, d): e.append(ExprAff(a, r[0:32])) e.append(ExprAff(b, r[32:64])) # r15/IRDst not allowed as output - return e + return e, [] def b(ir, instr, a): e = [] e.append(ExprAff(PC, a)) e.append(ExprAff(ir.IRDst, a)) - return e + return e, [] def bl(ir, instr, a): @@ -516,14 +599,14 @@ def bl(ir, instr, a): e.append(ExprAff(PC, a)) e.append(ExprAff(ir.IRDst, a)) e.append(ExprAff(LR, l)) - return e + return e, [] def bx(ir, instr, a): e = [] e.append(ExprAff(PC, a)) e.append(ExprAff(ir.IRDst, a)) - return e + return e, [] def blx(ir, instr, a): @@ -532,10 +615,10 @@ def blx(ir, instr, a): e.append(ExprAff(PC, a)) e.append(ExprAff(ir.IRDst, a)) e.append(ExprAff(LR, l)) - return e + return e, [] -def st_ld_r(ir, instr, a, b, store=False, size=32, s_ext=False, z_ext=False): +def st_ld_r(ir, instr, a, a2, b, store=False, size=32, s_ext=False, z_ext=False): e = [] wb = False b = b.copy() @@ -558,6 +641,9 @@ def st_ld_r(ir, instr, a, b, store=False, size=32, s_ext=False, z_ext=False): else: ad = base + off + # PC base lookup uses PC 4 byte alignemnt + ad = ad.replace_expr({PC: PC & ExprInt(0xFFFFFFFC, 32)}) + dmem = False if size in [8, 16]: if store: @@ -573,9 +659,9 @@ def st_ld_r(ir, instr, a, b, store=False, size=32, s_ext=False, z_ext=False): m = ExprMem(ad, size=size) pass elif size == 64: + assert a2 is not None m = ExprMem(ad, size=32) dmem = True - a2 = ir.arch.regs.all_regs_ids[ir.arch.regs.all_regs_ids.index(a) + 1] size = 32 else: raise ValueError('the size DOES matter') @@ -596,55 +682,53 @@ def st_ld_r(ir, instr, a, b, store=False, size=32, s_ext=False, z_ext=False): # XXX TODO check multiple write cause by wb if wb or postinc: e.append(ExprAff(base, base + off)) - return e + return e, [] def ldr(ir, instr, a, b): - return st_ld_r(ir, instr, a, b, store=False) + return st_ld_r(ir, instr, a, None, b, store=False) -def ldrd(ir, instr, a, b): - e = st_ld_r(ir, instr, a, b, store=False, size=64) - return e +def ldrd(ir, instr, a, b, c=None): + if c is None: + a2 = ir.arch.regs.all_regs_ids[ir.arch.regs.all_regs_ids.index(a) + 1] + else: + a2 = b + b = c + return st_ld_r(ir, instr, a, a2, b, store=False, size=64) def l_str(ir, instr, a, b): - return st_ld_r(ir, instr, a, b, store=True) + return st_ld_r(ir, instr, a, None, b, store=True) -def l_strd(ir, instr, a, b): - e = st_ld_r(ir, instr, a, b, store=True, size=64) - return e - +def l_strd(ir, instr, a, b, c=None): + if c is None: + a2 = ir.arch.regs.all_regs_ids[ir.arch.regs.all_regs_ids.index(a) + 1] + else: + a2 = b + b = c + return st_ld_r(ir, instr, a, a2, b, store=True, size=64) def ldrb(ir, instr, a, b): - e = st_ld_r(ir, instr, a, b, store=False, size=8, z_ext=True) - return e + return st_ld_r(ir, instr, a, None, b, store=False, size=8, z_ext=True) def ldrsb(ir, instr, a, b): - e = st_ld_r( - ir, instr, a, b, store=False, size=8, s_ext=True, z_ext=False) - return e + return st_ld_r(ir, instr, a, None, b, store=False, size=8, s_ext=True, z_ext=False) def strb(ir, instr, a, b): - e = st_ld_r(ir, instr, a, b, store=True, size=8) - return e - + return st_ld_r(ir, instr, a, None, b, store=True, size=8) def ldrh(ir, instr, a, b): - e = st_ld_r(ir, instr, a, b, store=False, size=16, z_ext=True) - return e + return st_ld_r(ir, instr, a, None, b, store=False, size=16, z_ext=True) def strh(ir, instr, a, b): - e = st_ld_r(ir, instr, a, b, store=True, size=16, z_ext=True) - return e + return st_ld_r(ir, instr, a, None, b, store=True, size=16, z_ext=True) def ldrsh(ir, instr, a, b): - e = st_ld_r( - ir, instr, a, b, store=False, size=16, s_ext=True, z_ext=False) - return e + return st_ld_r(ir, instr, a, None, b, store=False, size=16, s_ext=True, z_ext=False) def st_ld_m(ir, instr, a, b, store=False, postinc=False, updown=False): @@ -688,7 +772,7 @@ def st_ld_m(ir, instr, a, b, store=False, postinc=False, updown=False): else: assert(isinstance(b, ExprOp) and b.op == "reglist") - return e + return e, [] def ldmia(ir, instr, a, b): @@ -727,16 +811,16 @@ def svc(ir, instr, a): # XXX TODO implement e = [ ExprAff(exception_flags, ExprInt(EXCEPT_PRIV_INSN, 32))] - return e + return e, [] def und(ir, instr, a, b): # XXX TODO implement e = [] - return e + return e, [] # TODO XXX implement correct CF for shifters -def lsr(ir, instr, a, b, c = None): +def lsr(ir, instr, a, b, c=None): e = [] if c is None: b, c = a, b @@ -745,10 +829,10 @@ def lsr(ir, instr, a, b, c = None): dst = get_dst(a) if dst is not None: e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] -def lsrs(ir, instr, a, b, c = None): +def lsrs(ir, instr, a, b, c=None): e = [] if c is None: b, c = a, b @@ -758,7 +842,7 @@ def lsrs(ir, instr, a, b, c = None): dst = get_dst(a) if dst is not None: e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] def asr(ir, instr, a, b, c=None): e = [] @@ -769,9 +853,9 @@ def asr(ir, instr, a, b, c=None): dst = get_dst(a) if dst is not None: e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] -def asrs(ir, instr, a, b, c): +def asrs(ir, instr, a, b, c=None): e = [] if c is None: b, c = a, b @@ -781,9 +865,9 @@ def asrs(ir, instr, a, b, c): dst = get_dst(a) if dst is not None: e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] -def lsl(ir, instr, a, b, c = None): +def lsl(ir, instr, a, b, c=None): e = [] if c is None: b, c = a, b @@ -792,10 +876,10 @@ def lsl(ir, instr, a, b, c = None): dst = get_dst(a) if dst is not None: e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] -def lsls(ir, instr, a, b, c = None): +def lsls(ir, instr, a, b, c=None): e = [] if c is None: b, c = a, b @@ -805,18 +889,29 @@ def lsls(ir, instr, a, b, c = None): dst = get_dst(a) if dst is not None: e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] + + +def rors(ir, instr, a, b): + e = [] + r = ExprOp(">>>", a, b) + e.append(ExprAff(a, r)) + e += update_flag_logic(r) + dst = get_dst(a) + if dst is not None: + e.append(ExprAff(ir.IRDst, r)) + return e, [] def push(ir, instr, a): e = [] regs = list(a.args) for i in xrange(len(regs)): - r = SP + ExprInt(-4 * (i + 1), 32) + r = SP + ExprInt(-4 * len(regs) + 4 * i, 32) e.append(ExprAff(ExprMem(r, 32), regs[i])) r = SP + ExprInt(-4 * len(regs), 32) e.append(ExprAff(SP, r)) - return e + return e, [] def pop(ir, instr, a): @@ -832,21 +927,21 @@ def pop(ir, instr, a): e.append(ExprAff(SP, r)) if dst is not None: e.append(ExprAff(ir.IRDst, dst)) - return e + return e, [] def cbz(ir, instr, a, b): e = [] lbl_next = ExprId(ir.get_next_label(instr), 32) e.append(ExprAff(ir.IRDst, ExprCond(a, lbl_next, b))) - return e + return e, [] def cbnz(ir, instr, a, b): e = [] lbl_next = ExprId(ir.get_next_label(instr), 32) - e.append(ir.IRDst, ExprCond(a, b, lbl_next)) - return e + e.append(ExprAff(ir.IRDst, ExprCond(a, b, lbl_next))) + return e, [] @@ -858,7 +953,7 @@ def uxtb(ir, instr, a, b): if PC in a.get_r(): dst = PC e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] def uxth(ir, instr, a, b): e = [] @@ -868,7 +963,7 @@ def uxth(ir, instr, a, b): if PC in a.get_r(): dst = PC e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] def sxtb(ir, instr, a, b): e = [] @@ -878,7 +973,7 @@ def sxtb(ir, instr, a, b): if PC in a.get_r(): dst = PC e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] def sxth(ir, instr, a, b): e = [] @@ -888,7 +983,7 @@ def sxth(ir, instr, a, b): if PC in a.get_r(): dst = PC e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] def ubfx(ir, instr, a, b, c, d): @@ -901,7 +996,7 @@ def ubfx(ir, instr, a, b, c, d): if PC in a.get_r(): dst = PC e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] def bfc(ir, instr, a, b, c): e = [] @@ -923,34 +1018,41 @@ def bfc(ir, instr, a, b, c): if PC in a.get_r(): dst = PC e.append(ExprAff(ir.IRDst, r)) - return e + return e, [] def rev(ir, instr, a, b): e = [] c = ExprCompose(b[24:32], b[16:24], b[8:16], b[:8]) e.append(ExprAff(a, c)) - return e + return e, [] def pld(ir, instr, a): - return [] + e = [] + return e, [] + + +def pldw(ir, instr, a): + e = [] + return e, [] def clz(ir, instr, a, b): e = [] e.append(ExprAff(a, ExprOp('clz', b))) - return e + return e, [] def uxtab(ir, instr, a, b, c): e = [] e.append(ExprAff(a, b + (c & ExprInt(0xff, 32)))) - return e + return e, [] def bkpt(ir, instr, a): e = [] e.append(ExprAff(exception_flags, ExprInt(EXCEPT_SOFT_BP, 32))) e.append(ExprAff(bp_num, a)) - return e + return e, [] + def _extract_s16(arg, part): if part == 'B': # bottom 16 bits @@ -958,12 +1060,131 @@ def _extract_s16(arg, part): elif part == 'T': # top 16 bits return arg[16:32] + def smul(ir, instr, a, b, c): - return [ExprAff(a, _extract_s16(b, instr.name[4]).signExtend(32) * _extract_s16(c, instr.name[5]).signExtend(32))] + e = [] + e.append(ExprAff(a, _extract_s16(b, instr.name[4]).signExtend(32) * _extract_s16(c, instr.name[5]).signExtend(32))) + return e, [] + def smulw(ir, instr, a, b, c): + e = [] prod = b.signExtend(48) * _extract_s16(c, instr.name[5]).signExtend(48) - return [ExprAff(a, prod[16:48])] # signed most significant 32 bits of the 48-bit result + e.append(ExprAff(a, prod[16:48])) + return e, [] # signed most significant 32 bits of the 48-bit result + + +def tbb(ir, instr, a): + e = [] + dst = PC + ExprInt(2, 32) * a.zeroExtend(32) + e.append(ExprAff(PC, dst)) + e.append(ExprAff(ir.IRDst, dst)) + return e, [] + + +def tbh(ir, instr, a): + e = [] + dst = PC + ExprInt(2, 32) * a.zeroExtend(32) + e.append(ExprAff(PC, dst)) + e.append(ExprAff(ir.IRDst, dst)) + return e, [] + + +def smlabb(ir, instr, a, b, c, d): + e = [] + result = (b[:16].signExtend(32) * c[:16].signExtend(32)) + d + e.append(ExprAff(a, result)) + return e, [] + + +def smlabt(ir, instr, a, b, c, d): + e = [] + result = (b[:16].signExtend(32) * c[16:32].signExtend(32)) + d + e.append(ExprAff(a, result)) + return e, [] + + +def smlatb(ir, instr, a, b, c, d): + e = [] + result = (b[16:32].signExtend(32) * c[:16].signExtend(32)) + d + e.append(ExprAff(a, result)) + return e, [] + + +def smlatt(ir, instr, a, b, c, d): + e = [] + result = (b[16:32].signExtend(32) * c[16:32].signExtend(32)) + d + e.append(ExprAff(a, result)) + return e, [] + + +def uadd8(ir, instr, a, b, c): + e = [] + sums = [] + ges = [] + for i in xrange(0, 32, 8): + sums.append(b[i:i+8] + c[i:i+8]) + ges.append((b[i:i+8].zeroExtend(9) + c[i:i+8].zeroExtend(9))[8:9]) + + e.append(ExprAff(a, ExprCompose(*sums))) + + for i, value in enumerate(ges): + e.append(ExprAff(ge_regs[i], value)) + return e, [] + + +def sel(ir, instr, a, b, c): + e = [] + cond = nf ^ of ^ ExprInt(1, 1) + parts = [] + for i in xrange(4): + parts.append(ExprCond(ge_regs[i], b[i*8:(i+1)*8], c[i*8:(i+1)*8])) + result = ExprCompose(*parts) + e.append(ExprAff(a, result)) + return e, [] + + +def rev(ir, instr, a, b): + e = [] + result = ExprCompose(b[24:32], b[16:24], b[8:16], b[:8]) + e.append(ExprAff(a, result)) + return e, [] + + +def nop(ir, instr): + e = [] + return e, [] + + +def dsb(ir, instr, a): + # XXX TODO + e = [] + return e, [] + + +def cpsie(ir, instr, a): + # XXX TODO + e = [] + return e, [] + + +def cpsid(ir, instr, a): + # XXX TODO + e = [] + return e, [] + + +def wfe(ir, instr): + # XXX TODO + e = [] + return e, [] + + +def wfi(ir, instr): + # XXX TODO + e = [] + return e, [] + COND_EQ = 0 COND_NE = 1 @@ -1001,6 +1222,7 @@ cond_dct = { # COND_NV: "NV", } +cond_dct_inv = dict((name, num) for num, name in cond_dct.iteritems()) tab_cond = {COND_EQ: zf, COND_NE: ExprCond(zf, ExprInt(0, 1), ExprInt(1, 1)), @@ -1035,9 +1257,9 @@ def is_pc_written(ir, instr_ir): return False, None -def add_condition_expr(ir, instr, cond, instr_ir): +def add_condition_expr(ir, instr, cond, instr_ir, extra_ir): if cond == COND_AL: - return instr_ir, [] + return instr_ir, extra_ir if not cond in tab_cond: raise ValueError('unknown condition %r' % cond) cond = tab_cond[cond] @@ -1057,7 +1279,7 @@ def add_condition_expr(ir, instr, cond, instr_ir): instr_ir.append(ExprAff(ir.IRDst, lbl_next)) e_do = IRBlock(lbl_do.name, [AssignBlock(instr_ir, instr)]) e = [ExprAff(ir.IRDst, dst_cond)] - return e, [e_do] + return e, [e_do] + extra_ir mnemo_func = {} mnemo_func_cond = {} @@ -1081,6 +1303,9 @@ mnemo_condm0 = {'add': add, 'mvn': mvn, 'neg': neg, + 'sdiv': sdiv, + 'udiv': udiv, + 'mul': mul, 'umull': umull, 'umlal': umlal, @@ -1134,6 +1359,7 @@ mnemo_condm1 = {'adds': add, 'negs': negs, 'muls': muls, + 'mls': mls, 'mlas': mlas, 'blx': blx, @@ -1170,6 +1396,7 @@ mnemo_nocond = {'lsr': lsr, 'lsrs': lsrs, 'lsl': lsl, 'lsls': lsls, + 'rors': rors, 'push': push, 'pop': pop, 'asr': asr, @@ -1177,7 +1404,24 @@ mnemo_nocond = {'lsr': lsr, 'cbz': cbz, 'cbnz': cbnz, 'pld': pld, + 'pldw': pldw, + 'tbb': tbb, + 'tbh': tbh, + 'nop': nop, + 'dsb': dsb, + 'cpsie': cpsie, + 'cpsid': cpsid, + 'wfe': wfe, + 'wfi': wfi, + 'orn': orn, + 'smlabb': smlabb, + 'smlabt': smlabt, + 'smlatb': smlatb, + 'smlatt': smlatt, + 'uadd8': uadd8, + 'sel': sel, } + mn_cond_x = [mnemo_condm0, mnemo_condm1, mnemo_condm2] @@ -1215,8 +1459,8 @@ def get_mnemo_expr(ir, instr, *args): if not instr.name.lower() in mnemo_func_cond: raise ValueError('unknown mnemo %s' % instr) cond, mf = mnemo_func_cond[instr.name.lower()] - instr_ir = mf(ir, instr, *args) - instr, extra_ir = add_condition_expr(ir, instr, cond, instr_ir) + instr_ir, extra_ir = mf(ir, instr, *args) + instr, extra_ir = add_condition_expr(ir, instr, cond, instr_ir, extra_ir) return instr, extra_ir get_arm_instr_expr = get_mnemo_expr @@ -1235,6 +1479,24 @@ class ir_arml(IntermediateRepresentation): self.IRDst = ExprId('IRDst', 32) self.addrsize = 32 + + + def mod_pc(self, instr, instr_ir, extra_ir): + # fix PC (+8 for arm) + pc_fixed = {self.pc: ExprInt(instr.offset + 8, 32)} + + for i, expr in enumerate(instr_ir): + dst, src = expr.dst, expr.src + if dst != self.pc: + dst = dst.replace_expr(pc_fixed) + src = src.replace_expr(pc_fixed) + instr_ir[i] = ExprAff(dst, src) + + for idx, irblock in enumerate(extra_ir): + extra_ir[idx] = irblock.modify_exprs(lambda expr: expr.replace_expr(pc_fixed) \ + if expr != self.pc else expr, + lambda expr: expr.replace_expr(pc_fixed)) + def get_ir(self, instr): args = instr.args # ir = get_mnemo_expr(self, self.name.lower(), *args) @@ -1248,14 +1510,120 @@ class ir_arml(IntermediateRepresentation): args[-1].args[-1][:8].zeroExtend(32)) instr_ir, extra_ir = get_mnemo_expr(self, instr, *args) - pc_fixed = {self.pc: ExprInt(instr.offset + 8, 32)} - for i, expr in enumerate(instr_ir): - instr_ir[i] = ExprAff(expr.dst, expr.src.replace_expr(pc_fixed)) - - new_extra_ir = [irblock.modify_exprs(mod_src=lambda expr: expr.replace_expr(pc_fixed)) - for irblock in extra_ir] + self.mod_pc(instr, instr_ir, extra_ir) + return instr_ir, extra_ir + + def parse_itt(self, instr): + name = instr.name + assert name.startswith('IT') + name = name[1:] + out = [] + for hint in name: + if hint == 'T': + out.append(0) + elif hint == "E": + out.append(1) + else: + raise ValueError("IT name invalid %s" % instr) + return out, instr.args[0] + + def do_it_block(self, label, index, block, assignments, gen_pc_updt): + instr = block.lines[index] + it_hints, it_cond = self.parse_itt(instr) + cond_num = cond_dct_inv[it_cond.name] + cond_eq = tab_cond[cond_num] + + if not index + len(it_hints) <= len(block.lines): + raise NotImplementedError("Splitted IT block non supported yet") + + ir_blocks_all = [] + + # Gen dummy irblock for IT instr + label_next = self.get_next_label(instr) + dst = ExprAff(self.IRDst, ExprId(label_next, 32)) + dst_blk = AssignBlock([dst], instr) + assignments.append(dst_blk) + irblock = IRBlock(label, assignments) + ir_blocks_all.append([irblock]) + + label = label_next + assignments = [] + for hint in it_hints: + irblocks = [] + index += 1 + instr = block.lines[index] + + # Add conditionnal jump to current irblock + label_do = self.symbol_pool.gen_label() + label_next = self.get_next_label(instr) + + if hint: + local_cond = ~cond_eq + else: + local_cond = cond_eq + dst = ExprAff(self.IRDst, ExprCond(local_cond, ExprId(label_do, 32), ExprId(label_next, 32))) + dst_blk = AssignBlock([dst], instr) + assignments.append(dst_blk) + irblock = IRBlock(label, assignments) + + irblocks.append(irblock) + + assignments = [] + label = label_do + split = self.add_instr_to_irblock(block, instr, assignments, + irblocks, gen_pc_updt) + if split: + raise NotImplementedError("Unsupported instr in IT block (%s)" % instr) + + dst = ExprAff(self.IRDst, ExprId(label_next, 32)) + dst_blk = AssignBlock([dst], instr) + assignments.append(dst_blk) + irblock = IRBlock(label, assignments) + irblocks.append(irblock) + label = label_next + assignments = [] + ir_blocks_all.append(irblocks) + return index, ir_blocks_all + + def add_block(self, block, gen_pc_updt=False): + """ + Add a native block to the current IR + @block: native assembly block + @gen_pc_updt: insert PC update effects between instructions + """ + + it_hints = None + it_cond = None + label = None + ir_blocks_all = [] + index = -1 + while index + 1 < len(block.lines): + index += 1 + instr = block.lines[index] + if label is None: + assignments = [] + label = self.get_instr_label(instr) + if instr.name.startswith("IT"): + index, irblocks_it = self.do_it_block(label, index, block, assignments, gen_pc_updt) + for irblocks in irblocks_it: + ir_blocks_all += irblocks + label = None + continue + + split = self.add_instr_to_irblock(block, instr, assignments, + ir_blocks_all, gen_pc_updt) + if split: + ir_blocks_all.append(IRBlock(label, assignments)) + label = None + assignments = [] + if label is not None: + ir_blocks_all.append(IRBlock(label, assignments)) + + new_ir_blocks_all = self.post_add_block(block, ir_blocks_all) + for irblock in new_ir_blocks_all: + self.blocks[irblock.label] = irblock + return new_ir_blocks_all - return instr_ir, new_extra_ir class ir_armb(ir_arml): @@ -1266,7 +1634,8 @@ class ir_armb(ir_arml): self.IRDst = ExprId('IRDst', 32) self.addrsize = 32 -class ir_armtl(IntermediateRepresentation): + +class ir_armtl(ir_arml): def __init__(self, symbol_pool=None): IntermediateRepresentation.__init__(self, mn_armt, "l", symbol_pool) self.pc = PC @@ -1274,8 +1643,23 @@ class ir_armtl(IntermediateRepresentation): self.IRDst = ExprId('IRDst', 32) self.addrsize = 32 - def get_ir(self, instr): - return get_mnemo_expr(self, instr, *instr.args) + + def mod_pc(self, instr, instr_ir, extra_ir): + # fix PC (+4 for thumb) + pc_fixed = {self.pc: ExprInt(instr.offset + 4, 32)} + + for i, expr in enumerate(instr_ir): + dst, src = expr.dst, expr.src + if dst != self.pc: + dst = dst.replace_expr(pc_fixed) + src = src.replace_expr(pc_fixed) + instr_ir[i] = ExprAff(dst, src) + + for idx, irblock in enumerate(extra_ir): + extra_ir[idx] = irblock.modify_exprs(lambda expr: expr.replace_expr(pc_fixed) \ + if expr != self.pc else expr, + lambda expr: expr.replace_expr(pc_fixed)) + class ir_armtb(ir_armtl): def __init__(self, symbol_pool=None): diff --git a/miasm2/arch/mips32/arch.py b/miasm2/arch/mips32/arch.py index 3abdc053..15c59cf0 100644 --- a/miasm2/arch/mips32/arch.py +++ b/miasm2/arch/mips32/arch.py @@ -5,11 +5,13 @@ from collections import defaultdict from pyparsing import Literal, Group, Optional -from miasm2.expression.expression import ExprMem, ExprInt, ExprId +from miasm2.expression.expression import ExprMem, ExprInt, ExprId, ExprOp from miasm2.core.bin_stream import bin_stream import miasm2.arch.mips32.regs as regs import miasm2.core.cpu as cpu +from miasm2.core.asm_ast import AstInt, AstId, AstMem, AstOp + log = logging.getLogger("mips32dis") console_handler = logging.StreamHandler() console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) @@ -20,48 +22,26 @@ log.setLevel(logging.DEBUG) gpregs = cpu.reg_info(regs.regs32_str, regs.regs32_expr) - LPARENTHESIS = Literal("(") RPARENTHESIS = Literal(")") -def deref2expr(s, l, t): - t = t[0] - if len(t) != 4: +def cb_deref(tokens): + if len(tokens) != 4: raise NotImplementedError("TODO") + return AstMem(tokens[2] + tokens[0], 32) - return ExprMem(t[2] + t[0], 32) - -def deref2expr_nooff(s, l, t): - t = t[0] - if len(t) != 3: +def cb_deref_nooff(tokens): + if len(tokens) != 3: raise NotImplementedError("TODO") - return ExprMem(t[1], 32) + return AstMem(tokens[1], 32) base_expr = cpu.base_expr -deref_off = Group(Optional(cpu.base_expr) + LPARENTHESIS + gpregs.parser + \ - RPARENTHESIS).setParseAction(deref2expr) -deref_nooff = Group(LPARENTHESIS + gpregs.parser + \ - RPARENTHESIS).setParseAction(deref2expr_nooff) +deref_off = (Optional(base_expr) + LPARENTHESIS + gpregs.parser + RPARENTHESIS).setParseAction(cb_deref) +deref_nooff = (LPARENTHESIS + gpregs.parser + RPARENTHESIS).setParseAction(cb_deref_nooff) deref = deref_off | deref_nooff -variable, operand, base_expr = cpu.gen_base_expr() - -int_or_expr = base_expr - - -def ast_id2expr(t): - return mn_mips32.regs.all_regs_ids_byname.get(t, t) - - -def ast_int2expr(a): - return ExprInt(a, 32) - - -my_var_parser = cpu.ParseAst(ast_id2expr, ast_int2expr) -base_expr.setParseAction(my_var_parser) - class additional_info: def __init__(self): self.except_on_instr = False @@ -274,8 +254,31 @@ def mips32op(name, fields, args=None, alias=False): type(name, (mn_mips32,), dct) #type(name, (mn_mips32b,), dct) - -class mips32_reg(cpu.reg_noarg, cpu.m_arg): +class mips32_arg(cpu.m_arg): + def asm_ast_to_expr(self, arg, symbol_pool): + if isinstance(arg, AstId): + if isinstance(arg.name, ExprId): + return arg.name + if arg.name in gpregs.str: + return None + label = symbol_pool.getby_name_create(arg.name) + return ExprId(label, 32) + if isinstance(arg, AstOp): + args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in arg.args] + if None in args: + return None + return ExprOp(arg.op, *args) + if isinstance(arg, AstInt): + return ExprInt(arg.value, 32) + if isinstance(arg, AstMem): + ptr = self.asm_ast_to_expr(arg.ptr, symbol_pool) + if ptr is None: + return None + return ExprMem(ptr, arg.size) + return None + + +class mips32_reg(cpu.reg_noarg, mips32_arg): pass class mips32_gpreg(mips32_reg): @@ -335,14 +338,14 @@ class mips32_soff_noarg(mips32_imm): return True -class mips32_s16imm(mips32_s16imm_noarg, cpu.m_arg): +class mips32_s16imm(mips32_s16imm_noarg, mips32_arg): pass -class mips32_soff(mips32_soff_noarg, cpu.m_arg): +class mips32_soff(mips32_soff_noarg, mips32_arg): pass -class mips32_instr_index(mips32_imm, cpu.m_arg): +class mips32_instr_index(mips32_imm, mips32_arg): def decode(self, v): v = v & self.lmask self.expr = ExprInt(v<<2, 32) @@ -361,7 +364,7 @@ class mips32_instr_index(mips32_imm, cpu.m_arg): return True -class mips32_u16imm(mips32_imm, cpu.m_arg): +class mips32_u16imm(mips32_imm, mips32_arg): def decode(self, v): v = v & self.lmask self.expr = ExprInt(v, 32) @@ -375,7 +378,7 @@ class mips32_u16imm(mips32_imm, cpu.m_arg): self.value = v return True -class mips32_dreg_imm(cpu.m_arg): +class mips32_dreg_imm(mips32_arg): parser = deref def decode(self, v): imm = self.parent.imm.expr @@ -408,7 +411,7 @@ class mips32_dreg_imm(cpu.m_arg): assert(len(arg.args) == 2 and arg.op == '+') return "%s(%s)"%(arg.args[1], arg.args[0]) -class mips32_esize(mips32_imm, cpu.m_arg): +class mips32_esize(mips32_imm, mips32_arg): def decode(self, v): v = v & self.lmask self.expr = ExprInt(v+1, 32) @@ -422,7 +425,7 @@ class mips32_esize(mips32_imm, cpu.m_arg): self.value = v return True -class mips32_eposh(mips32_imm, cpu.m_arg): +class mips32_eposh(mips32_imm, mips32_arg): def decode(self, v): self.expr = ExprInt(v-int(self.parent.epos.expr)+1, 32) return True @@ -437,7 +440,7 @@ class mips32_eposh(mips32_imm, cpu.m_arg): -class mips32_cpr(cpu.m_arg): +class mips32_cpr(mips32_arg): parser = regs.regs_cpr0_info.parser def decode(self, v): index = int(self.parent.cpr0.expr) << 3 @@ -605,6 +608,8 @@ bs_bcc = cpu.bs_name(l=5, name = {"BGEZ": 0b00001, }) +bs_code = cpu.bs(l=10) + mips32op("addi", [cpu.bs('001000'), rs, rt, s16imm], [rt, rs, s16imm]) mips32op("addiu", [cpu.bs('001001'), rs, rt, s16imm], [rt, rs, s16imm]) @@ -740,3 +745,7 @@ mips32op("tlbp", [cpu.bs('010000'), cpu.bs('1'), cpu.bs('0'*19), cpu.bs('001000')]) mips32op("tlbwi", [cpu.bs('010000'), cpu.bs('1'), cpu.bs('0'*19), cpu.bs('000010')]) + + +mips32op("teq", [cpu.bs('000000'), rs, rt, bs_code, cpu.bs('110100')], + [rs, rt]) diff --git a/miasm2/arch/mips32/jit.py b/miasm2/arch/mips32/jit.py index 1d2ec483..16d88067 100644 --- a/miasm2/arch/mips32/jit.py +++ b/miasm2/arch/mips32/jit.py @@ -1,6 +1,6 @@ import logging -from miasm2.jitter.jitload import jitter +from miasm2.jitter.jitload import jitter, named_arguments from miasm2.core import asmblock from miasm2.core.utils import pck32, upck32 from miasm2.arch.mips32.sem import ir_mips32l, ir_mips32b @@ -104,6 +104,42 @@ class jitter_mips32l(jitter): jitter.init_run(self, *args, **kwargs) self.cpu.PC = self.pc + # calling conventions + + @named_arguments + def func_args_stdcall(self, n_args): + args = [self.get_arg_n_stdcall(i) for i in xrange(n_args)] + ret_ad = self.cpu.RA + return ret_ad, args + + def func_ret_stdcall(self, ret_addr, ret_value1=None, ret_value2=None): + self.pc = self.cpu.PC = ret_addr + if ret_value1 is not None: + self.cpu.V0 = ret_value1 + if ret_value2 is not None: + self.cpu.V1 = ret_value2 + return True + + def func_prepare_stdcall(self, ret_addr, *args): + for index in xrange(min(len(args), 4)): + setattr(self.cpu, 'A%d' % index, args[index]) + for index in xrange(4, len(args)): + self.vm.set_mem(self.cpu.SP + 4 * (index - 4), pck32(args[index])) + self.cpu.RA = ret_addr + + def get_arg_n_stdcall(self, index): + if index < 4: + arg = getattr(self.cpu, 'A%d' % index) + else: + arg = self.get_stack_arg(index-4) + return arg + + + func_args_systemv = func_args_stdcall + func_ret_systemv = func_ret_stdcall + func_prepare_systemv = func_prepare_stdcall + get_arg_n_systemv = get_arg_n_stdcall + class jitter_mips32b(jitter_mips32l): diff --git a/miasm2/arch/mips32/regs.py b/miasm2/arch/mips32/regs.py index afade869..7ff949f2 100644 --- a/miasm2/arch/mips32/regs.py +++ b/miasm2/arch/mips32/regs.py @@ -4,11 +4,11 @@ from miasm2.expression.expression import ExprId from miasm2.core.cpu import gen_reg, gen_regs -gen_reg('PC', globals()) -gen_reg('PC_FETCH', globals()) +PC, _ = gen_reg('PC') +PC_FETCH, _ = gen_reg('PC_FETCH') -gen_reg('R_LO', globals()) -gen_reg('R_HI', globals()) +R_LO, _ = gen_reg('R_LO') +R_HI, _ = gen_reg('R_HI') exception_flags = ExprId('exception_flags', 32) @@ -55,11 +55,10 @@ regs_flt_expr, regs_flt_init, fltregs = gen_regs(regs_flt_str, globals(), sz=64) regs_fcc_expr, regs_fcc_init, fccregs = gen_regs(regs_fcc_str, globals()) -all_regs_ids = [PC, PC_FETCH, R_LO, R_HI] + gpregs_expr + regs_flt_expr + \ +all_regs_ids = [PC, PC_FETCH, R_LO, R_HI, exception_flags] + gpregs_expr + regs_flt_expr + \ regs_fcc_expr + regs_cpr0_expr all_regs_ids_byname = dict([(x.name, x) for x in all_regs_ids]) -all_regs_ids_init = [PC_init, PC_FETCH_init, R_LO_init, R_HI_init] + \ - gpregs_init + regs_flt_init + regs_fcc_init + regs_cpr0_init +all_regs_ids_init = [ExprId("%s_init" % reg.name, reg.size) for reg in all_regs_ids] all_regs_ids_no_alias = all_regs_ids[:] attrib_to_regs = { diff --git a/miasm2/arch/mips32/sem.py b/miasm2/arch/mips32/sem.py index 789491f6..99c81a33 100644 --- a/miasm2/arch/mips32/sem.py +++ b/miasm2/arch/mips32/sem.py @@ -1,8 +1,9 @@ import miasm2.expression.expression as m2_expr -from miasm2.ir.ir import IntermediateRepresentation, IRBlock +from miasm2.ir.ir import IntermediateRepresentation, IRBlock, AssignBlock from miasm2.arch.mips32.arch import mn_mips32 -from miasm2.arch.mips32.regs import R_LO, R_HI, PC, RA +from miasm2.arch.mips32.regs import R_LO, R_HI, PC, RA, exception_flags from miasm2.core.sembuilder import SemBuilder +from miasm2.jitter.csts import EXCEPT_DIV_BY_ZERO # SemBuilder context @@ -377,6 +378,18 @@ def multu(arg1, arg2): R_HI = result[32:] @sbuild.parse +def div(arg1, arg2): + """Divide (signed) @arg1 by @arg2 and stores the remaining/result in $R_HI/$R_LO""" + R_LO = ExprOp('idiv' ,arg1, arg2) + R_HI = ExprOp('imod', arg1, arg2) + +@sbuild.parse +def divu(arg1, arg2): + """Divide (unsigned) @arg1 by @arg2 and stores the remaining/result in $R_HI/$R_LO""" + R_LO = ExprOp('udiv', arg1, arg2) + R_HI = ExprOp('umod', arg1, arg2) + +@sbuild.parse def mfhi(arg1): "The contents of register $R_HI are moved to the specified register @arg1." arg1 = R_HI @@ -398,6 +411,30 @@ def ei(arg1): def ehb(arg1): "NOP" + +def teq(ir, instr, arg1, arg2): + e = [] + + lbl_except, lbl_except_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_next = ir.get_next_label(instr) + lbl_next_expr = m2_expr.ExprId(lbl_next, ir.IRDst.size) + + do_except = [] + do_except.append(m2_expr.ExprAff(exception_flags, m2_expr.ExprInt( + EXCEPT_DIV_BY_ZERO, exception_flags.size))) + do_except.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) + blk_except = IRBlock(lbl_except, [AssignBlock(do_except, instr)]) + + cond = arg1 - arg2 + + + e = [] + e.append(m2_expr.ExprAff(ir.IRDst, + m2_expr.ExprCond(cond, lbl_next_expr, lbl_except_expr))) + + return e, [blk_except] + + mnemo_func = sbuild.functions mnemo_func.update({ 'add.d': add_d, @@ -423,6 +460,7 @@ mnemo_func.update({ 'subu': l_sub, 'xor': l_xor, 'xori': l_xor, + 'teq': teq, }) def get_mnemo_expr(ir, instr, *args): diff --git a/miasm2/arch/msp430/arch.py b/miasm2/arch/msp430/arch.py index 9728d776..e4d03edb 100644 --- a/miasm2/arch/msp430/arch.py +++ b/miasm2/arch/msp430/arch.py @@ -8,6 +8,7 @@ from collections import defaultdict from miasm2.core.bin_stream import bin_stream import miasm2.arch.msp430.regs as regs_module from miasm2.arch.msp430.regs import * +from miasm2.core.asm_ast import AstInt, AstId, AstMem, AstOp log = logging.getLogger("msp430dis") console_handler = logging.StreamHandler() @@ -19,79 +20,70 @@ conditional_branch = ['jnz', 'jz', 'jnc', 'jc', 'jn', 'jge', 'jl'] unconditional_branch = ['jmp'] -def deref2expr_nooff(s, l, t): - t = t[0] - if len(t) == 1 and isinstance(t[0], ExprId): - return ExprMem(t[0], 16) - elif len(t) == 1 and isinstance(t[0], ExprInt): - return ExprMem(t[0], 16) - raise NotImplementedError('not fully functional') +def cb_deref_nooff(tokens): + assert len(tokens) == 1 + result = AstMem(tokens[0], 16) + return result -def deref2expr_pinc(s, l, t): - t = t[0] - if len(t) == 1 and isinstance(t[0], ExprId): - return ExprOp('autoinc', t[0]) - raise NotImplementedError('not fully functional') +def cb_deref_pinc(tokens): + assert len(tokens) == 1 + result = AstOp('autoinc', *tokens) + return result -def deref2expr_off(s, l, t): - t = t[0] - if len(t) == 2 and isinstance(t[1], ExprId): - return ExprMem(t[1] + t[0], 16) - raise NotImplementedError('not fully functional') +def cb_deref_off(tokens): + assert len(tokens) == 2 + result = AstMem(tokens[1] + tokens[0], 16) + return result -def deref_expr(s, l, t): - t = t[0] - assert(len(t) == 1) - t = t[0] - if isinstance(t, ExprId) or \ - isinstance(t, ExprInt) or \ - isinstance(t, ExprMem) or \ - (isinstance(t, ExprOp) and t.op == "autoinc"): - return t - raise NotImplementedError('not fully functional') +def cb_expr(tokens): + assert(len(tokens) == 1) + result = tokens[0] + return result -def f_reg2expr(t): - t = t[0] - i = regs16_str.index(t) - r = regs16_expr[i] - return r - -# gpregs.parser.setParseAction(f_reg2expr) - ARO = Suppress("@") LPARENT = Suppress("(") RPARENT = Suppress(")") PINC = Suppress("+") - -def ast_id2expr(t): - return mn_msp430.regs.all_regs_ids_byname.get(t, t) - - -def ast_int2expr(a): - return ExprInt(a, 16) - - -variable, operand, base_expr = gen_base_expr() - -my_var_parser = ParseAst(ast_id2expr, ast_int2expr) -base_expr.setParseAction(my_var_parser) - - -deref_nooff = Group(ARO + base_expr).setParseAction(deref2expr_nooff) -deref_pinc = Group(ARO + base_expr + PINC).setParseAction(deref2expr_pinc) -deref_off = Group(base_expr + LPARENT + - gpregs.parser + RPARENT).setParseAction(deref2expr_off) - - -sreg_p = Group(deref_pinc | deref_nooff | - deref_off | base_expr).setParseAction(deref_expr) +deref_nooff = (ARO + base_expr).setParseAction(cb_deref_nooff) +deref_pinc = (ARO + base_expr + PINC).setParseAction(cb_deref_pinc) +deref_off = (base_expr + LPARENT + gpregs.parser + RPARENT).setParseAction(cb_deref_off) +sreg_p = (deref_pinc | deref_nooff | deref_off | base_expr).setParseAction(cb_expr) + + + +class msp430_arg(m_arg): + def asm_ast_to_expr(self, value, symbol_pool): + if isinstance(value, AstId): + name = value.name + if isinstance(name, Expr): + return name + assert isinstance(name, str) + if name in gpregs.str: + index = gpregs.str.index(name) + reg = gpregs.expr[index] + return reg + label = symbol_pool.getby_name_create(value.name) + return ExprId(label, 16) + if isinstance(value, AstOp): + args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in value.args] + if None in args: + return None + return ExprOp(value.op, *args) + if isinstance(value, AstInt): + return ExprInt(value.value, 16) + if isinstance(value, AstMem): + ptr = self.asm_ast_to_expr(value.ptr, symbol_pool) + if ptr is None: + return None + return ExprMem(ptr, value.size) + return None class additional_info: @@ -310,7 +302,7 @@ class bw_mn(bs_mod_name): mn_mod = ['.w', '.b'] -class msp430_sreg_arg(reg_noarg, m_arg): +class msp430_sreg_arg(reg_noarg, msp430_arg): prio = default_prio + 1 reg_info = gpregs parser = sreg_p @@ -512,7 +504,7 @@ class bs_cond_off_d(bs_cond_off_s): raise NotImplementedError("unknown value v[a_d] = %d" % v['a_d']) -class msp430_offs(imm_noarg, m_arg): +class msp430_offs(imm_noarg, msp430_arg): parser = base_expr def int2expr(self, v): diff --git a/miasm2/arch/ppc/arch.py b/miasm2/arch/ppc/arch.py index 672c0c6a..945824a0 100644 --- a/miasm2/arch/ppc/arch.py +++ b/miasm2/arch/ppc/arch.py @@ -8,7 +8,7 @@ from miasm2.core.bin_stream import bin_stream from miasm2.core.asmblock import asm_label import miasm2.arch.ppc.regs as regs_module from miasm2.arch.ppc.regs import * -from pdb import pm +from miasm2.core.asm_ast import AstInt, AstId, AstMem, AstOp log = logging.getLogger("ppcdis") console_handler = logging.StreamHandler() @@ -19,37 +19,43 @@ log.setLevel(logging.DEBUG) LPARENTHESIS = Suppress(Literal("(")) RPARENTHESIS = Suppress(Literal(")")) -def deref2expr_imm_reg(s, l, t): - t = t[0] - if len(t) == 1: - return ExprMem(t[0]) - elif len(t) == 2: - return ExprMem(t[1] + t[0]) +def cb_deref_imm_reg(tokens): + if len(tokens) == 1: + return AstMem(tokens[0], 32) + elif len(tokens) == 2: + return AstMem(tokens[1] + tokens[0], 32) else: - raise NotImplementedError('len(t) > 2') + raise NotImplementedError('len(tokens) > 2') -variable, operand, base_expr = gen_base_expr() -int_or_expr = base_expr - - -def ast_id2expr(t): - if not t in mn_ppc.regs.all_regs_ids_byname: - r = ExprId(asm_label(t)) - else: - r = mn_ppc.regs.all_regs_ids_byname[t] - return r - -def ast_int2expr(a): - return ExprInt(a, 32) - -deref_reg_disp = Group(Optional(int_or_expr) + LPARENTHESIS + gpregs.parser + RPARENTHESIS).setParseAction(deref2expr_imm_reg) -deref_reg = Group(LPARENTHESIS + gpregs.parser + RPARENTHESIS).setParseAction(deref2expr_imm_reg) +deref_reg_disp = (Optional(base_expr) + LPARENTHESIS + gpregs.parser + RPARENTHESIS).setParseAction(cb_deref_imm_reg) +deref_reg = (LPARENTHESIS + gpregs.parser + RPARENTHESIS).setParseAction(cb_deref_imm_reg) deref = deref_reg | deref_reg_disp -my_var_parser = ParseAst(ast_id2expr, ast_int2expr) -base_expr.setParseAction(my_var_parser) + +class ppc_arg(m_arg): + def asm_ast_to_expr(self, arg, symbol_pool): + if isinstance(arg, AstId): + if isinstance(arg.name, ExprId): + return arg.name + if arg.name in gpregs.str: + return None + label = symbol_pool.getby_name_create(arg.name) + return ExprId(label, 32) + if isinstance(arg, AstOp): + args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in arg.args] + if None in args: + return None + return ExprOp(arg.op, *args) + if isinstance(arg, AstInt): + return ExprInt(arg.value, 32) + if isinstance(arg, AstMem): + ptr = self.asm_ast_to_expr(arg.ptr, symbol_pool) + if ptr is None: + return None + return ExprMem(ptr, arg.size) + return None class additional_info: @@ -278,7 +284,7 @@ class mn_ppc(cls_mn): return 32 -class ppc_reg(reg_noarg, m_arg): +class ppc_reg(reg_noarg, ppc_arg): pass @@ -324,7 +330,7 @@ class ppc_crfreg(ppc_reg): reg_info = crfregs parser = reg_info.parser -class ppc_imm(imm_noarg, m_arg): +class ppc_imm(imm_noarg, ppc_arg): parser = base_expr class ppc_s14imm_branch(ppc_imm): @@ -491,7 +497,7 @@ class ppc_divert_conditional_branch(bs_divert): return out -class ppc_deref32(m_arg): +class ppc_deref32(ppc_arg): parser = deref def decode(self, v): diff --git a/miasm2/arch/sh4/arch.py b/miasm2/arch/sh4/arch.py index ecfc9502..14f46265 100644 --- a/miasm2/arch/sh4/arch.py +++ b/miasm2/arch/sh4/arch.py @@ -7,6 +7,9 @@ from collections import defaultdict import miasm2.arch.sh4.regs as regs_module from miasm2.arch.sh4.regs import * + +from miasm2.core.asm_ast import AstInt, AstId, AstMem, AstOp + jra = ExprId('jra', 32) jrb = ExprId('jrb', 32) jrc = ExprId('jrc', 32) @@ -25,99 +28,112 @@ LPARENT = Suppress("(") RPARENT = Suppress(")") -def parse_deref_pcimm(t): - t = t[0] - return t[0] + t[1] - - -def parse_pcandimmimm(t): - t = t[0] - return (t[0] & t[1]) + t[2] - -def ast_id2expr(t): - return mn_sh4.regs.all_regs_ids_byname.get(t, t) +def cb_deref_pcimm(tokens): + return tokens[0] + tokens[1] -def ast_int2expr(a): - return ExprInt(a, 32) +def cb_pcandimmimm(tokens): + return (tokens[0] & tokens[1]) + tokens[2] -my_var_parser = ParseAst(ast_id2expr, ast_int2expr) -base_expr.setParseAction(my_var_parser) -int_or_expr = base_expr -ref_pc = Group(LPARENT + regi_pc.parser + COMMA + - int_or_expr + RPARENT).setParseAction(parse_deref_pcimm) -ref_pcandimm = Group( - LPARENT + regi_pc.parser + AND + int_or_expr + - COMMA + int_or_expr + RPARENT).setParseAction(parse_pcandimmimm) - - -pcdisp = Group(regi_pc.parser + AND + int_or_expr + - PLUS + int_or_expr).setParseAction(parse_pcandimmimm) +ref_pc = (LPARENT + reg_info_pc.parser + COMMA + base_expr + RPARENT).setParseAction(cb_deref_pcimm) +ref_pcandimm = (LPARENT + reg_info_pc.parser + AND + base_expr + COMMA + base_expr + RPARENT).setParseAction(cb_pcandimmimm) +pcdisp = (reg_info_pc.parser + AND + base_expr + PLUS + base_expr).setParseAction(cb_pcandimmimm) PTR = Suppress('PTR') -def parse_deref_mem(s, l, t): - t = t[0] - e = ExprMem(t[0], 32) - return e +def cb_deref_mem(tokens): + assert len(tokens) == 1 + result = AstMem(tokens[0], 32) + return result -def parse_predec(s, l, t): - t = t[0] - e = ExprMem(ExprOp('predec', t[0]), 32) - return e +def cb_predec(tokens): + assert len(tokens) == 1 + result = AstMem(AstOp('predec', tokens[0]), 32) + return result -def parse_postinc(s, l, t): - t = t[0] - e = ExprMem(ExprOp('postinc', t[0]), 32) - return e +def cb_postinc(tokens): + assert len(tokens) == 1 + result = AstMem(AstOp('postinc', tokens[0]), 32) + return result -def parse_regdisp(t): - t = t[0] - e = ExprMem(t[0] + t[1], 32) - return e +def cb_regdisp(tokens): + assert len(tokens) == 2 + result = AstMem(tokens[0] + tokens[1], 32) + return result -def parse_regreg(t): - t = t[0] - e = ExprMem(t[0] + t[1], 32) - return e +def cb_regreg(tokens): + assert len(tokens) == 2 + result = AstMem(tokens[0] + tokens[1], 32) + return result -deref_pc = Group(DEREF + ref_pc).setParseAction(parse_deref_mem) -deref_pcimm = Group(DEREF + ref_pcandimm).setParseAction(parse_deref_mem) +deref_pc = (DEREF + ref_pc).setParseAction(cb_deref_mem) +deref_pcimm = (DEREF + ref_pcandimm).setParseAction(cb_deref_mem) -dgpregs_base = Group(DEREF + gpregs.parser).setParseAction(parse_deref_mem) -dgpregs_predec = Group( - DEREF + MINUS + gpregs.parser).setParseAction(parse_predec) -dgpregs_postinc = Group( - DEREF + gpregs.parser + PLUS).setParseAction(parse_postinc) +dgpregs_base = (DEREF + gpregs.parser).setParseAction(cb_deref_mem) +dgpregs_predec = (DEREF + MINUS + gpregs.parser).setParseAction(cb_predec) +dgpregs_postinc = (DEREF + gpregs.parser + PLUS).setParseAction(cb_postinc) dgpregs = dgpregs_base | dgpregs_predec | dgpregs_postinc -d_gpreg_gpreg = Group(DEREF + - LPARENT + gpregs.parser + COMMA + gpregs.parser + RPARENT - ).setParseAction(parse_regdisp) +d_gpreg_gpreg = (DEREF + LPARENT + gpregs.parser + COMMA + gpregs.parser + RPARENT).setParseAction(cb_regdisp) dgpregs_p = dgpregs_predec | dgpregs_postinc -dgpregs_ir = Group(DEREF + LPARENT + gpregs.parser + - COMMA + int_or_expr + RPARENT).setParseAction(parse_regdisp) +dgpregs_ir = (DEREF + LPARENT + gpregs.parser + COMMA + base_expr + RPARENT).setParseAction(cb_regdisp) dgpregs_ir |= d_gpreg_gpreg -dgbr_imm = Group(DEREF + LPARENT + regi_gbr.parser + - COMMA + int_or_expr + RPARENT).setParseAction(parse_regdisp) - -dgbr_reg = Group(DEREF + LPARENT + regi_gbr.parser + - COMMA + gpregs.parser + RPARENT).setParseAction(parse_regreg) - - -class sh4_reg(reg_noarg, m_arg): +dgbr_imm = (DEREF + LPARENT + reg_info_gbr.parser + COMMA + base_expr + RPARENT).setParseAction(cb_regdisp) + +dgbr_reg = (DEREF + LPARENT + reg_info_gbr.parser + COMMA + gpregs.parser + RPARENT).setParseAction(cb_regreg) + + +class sh4_arg(m_arg): + def asm_ast_to_expr(self, arg, symbol_pool): + if isinstance(arg, AstId): + if isinstance(arg.name, ExprId): + return arg.name + if arg.name in gpregs.str: + return None + label = symbol_pool.getby_name_create(arg.name) + return ExprId(label, 32) + if isinstance(arg, AstOp): + args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in arg.args] + if None in args: + return None + return ExprOp(arg.op, *args) + if isinstance(arg, AstInt): + return ExprInt(arg.value, 32) + if isinstance(arg, AstMem): + ptr = self.asm_ast_to_expr(arg.ptr, symbol_pool) + if ptr is None: + return None + return ExprMem(ptr, arg.size) + return None + + +_, bs_pr = gen_reg_bs('PR', reg_info_pr, (m_reg, sh4_arg,)) +_, bs_r0 = gen_reg_bs('R0', reg_info_r0, (m_reg, sh4_arg,)) +_, bs_sr = gen_reg_bs('SR', reg_info_sr, (m_reg, sh4_arg,)) +_, bs_gbr = gen_reg_bs('GBR', reg_info_gbr, (m_reg, sh4_arg,)) +_, bs_vbr = gen_reg_bs('VBR', reg_info_vbr, (m_reg, sh4_arg,)) +_, bs_ssr = gen_reg_bs('SSR', reg_info_ssr, (m_reg, sh4_arg,)) +_, bs_spc = gen_reg_bs('SPC', reg_info_spc, (m_reg, sh4_arg,)) +_, bs_sgr = gen_reg_bs('SGR', reg_info_sgr, (m_reg, sh4_arg,)) +_, bs_dbr = gen_reg_bs('dbr', reg_info_dbr, (m_reg, sh4_arg,)) +_, bs_mach = gen_reg_bs('mach', reg_info_mach, (m_reg, sh4_arg,)) +_, bs_macl = gen_reg_bs('macl', reg_info_macl, (m_reg, sh4_arg,)) +_, bs_fpul = gen_reg_bs('fpul', reg_info_fpul, (m_reg, sh4_arg,)) +_, bs_fr0 = gen_reg_bs('fr0', reg_info_fr0, (m_reg, sh4_arg,)) + +class sh4_reg(reg_noarg, sh4_arg): pass @@ -146,12 +162,12 @@ class sh4_freg(sh4_reg): parser = reg_info.parser -class sh4_dgpreg(m_arg): +class sh4_dgpreg(sh4_arg): parser = dgpregs_base - def fromstring(self, s, parser_result=None): - start, stop = super(sh4_dgpreg, self).fromstring(s, parser_result) - if start is None: + def fromstring(self, text, symbol_pool, parser_result=None): + start, stop = super(sh4_dgpreg, self).fromstring(text, symbol_pool, parser_result) + if start is None or self.expr == [None]: return start, stop self.expr = ExprMem(self.expr.arg, self.sz) return start, stop @@ -172,12 +188,12 @@ class sh4_dgpreg(m_arg): return True -class sh4_dgpregpinc(m_arg): +class sh4_dgpregpinc(sh4_arg): parser = dgpregs_p - def fromstring(self, s, parser_result=None): - start, stop = super(sh4_dgpregpinc, self).fromstring(s, parser_result) - if self.expr is None: + def fromstring(self, text, symbol_pool, parser_result=None): + start, stop = super(sh4_dgpregpinc, self).fromstring(text, symbol_pool, parser_result) + if self.expr == [None]: return None, None if not isinstance(self.expr.arg, ExprOp): return None, None @@ -207,7 +223,7 @@ class sh4_dgpregpinc(m_arg): return True -class sh4_dgpregpdec(m_arg): +class sh4_dgpregpdec(sh4_arg): parser = dgpregs_postinc op = "preinc" @@ -252,7 +268,7 @@ class sh4_dgpreg_imm(sh4_dgpreg): return True -class sh4_imm(imm_noarg, m_arg): +class sh4_imm(imm_noarg, sh4_arg): parser = base_expr pass @@ -354,7 +370,7 @@ class sh4_dpc32imm(sh4_dpc16imm): return True -class sh4_pc32imm(m_arg): +class sh4_pc32imm(sh4_arg): parser = pcdisp def decode(self, v): @@ -666,11 +682,11 @@ addop("mov_w", [bs('0110', fname="opc"), rn, d16rmpinc, bs('0101')], [d16rmpinc, rn]) addop("mov_l", [bs('0110', fname="opc"), rn, d32rmpinc, bs('0110')], [d32rmpinc, rn]) -addop("mov_b", [bs('10000000', fname='opc'), bsr0, d08rnimm, dimm4]) -addop("mov_w", [bs('10000001', fname='opc'), bsr0, d16rnimm, dimm4]) +addop("mov_b", [bs('10000000', fname='opc'), bs_r0, d08rnimm, dimm4]) +addop("mov_w", [bs('10000001', fname='opc'), bs_r0, d16rnimm, dimm4]) addop("mov_l", [bs('0001', fname='opc'), d32rnimm, rm, dimm4], [rm, d32rnimm]) -addop("mov_b", [bs('10000100', fname='opc'), d08rmimm, dimm4, bsr0]) -addop("mov_w", [bs('10000101', fname='opc'), d16rmimm, dimm4, bsr0]) +addop("mov_b", [bs('10000100', fname='opc'), d08rmimm, dimm4, bs_r0]) +addop("mov_w", [bs('10000101', fname='opc'), d16rmimm, dimm4, bs_r0]) addop("mov_l", [bs('0101', fname='opc'), rn, d32rmimm, dimm4], [d32rmimm, rn]) addop("mov_b", [bs('0000', fname='opc'), bd08r0gp, rm, bs('0100')], [rm, bd08r0gp]) @@ -685,15 +701,15 @@ addop("mov_w", addop("mov_l", [bs('0000', fname='opc'), rn, bd32r0gp, bs('1110')], [bd32r0gp, rn]) -addop("mov_b", [bs('11000000'), bsr0, d08gbrimm8]) -addop("mov_w", [bs('11000001'), bsr0, d16gbrimm8]) -addop("mov_l", [bs('11000010'), bsr0, d32gbrimm8]) +addop("mov_b", [bs('11000000'), bs_r0, d08gbrimm8]) +addop("mov_w", [bs('11000001'), bs_r0, d16gbrimm8]) +addop("mov_l", [bs('11000010'), bs_r0, d32gbrimm8]) -addop("mov_b", [bs('11000100'), d08gbrimm8, bsr0]) -addop("mov_w", [bs('11000101'), d16gbrimm8, bsr0]) -addop("mov_l", [bs('11000110'), d32gbrimm8, bsr0]) +addop("mov_b", [bs('11000100'), d08gbrimm8, bs_r0]) +addop("mov_w", [bs('11000101'), d16gbrimm8, bs_r0]) +addop("mov_l", [bs('11000110'), d32gbrimm8, bs_r0]) -addop("mov", [bs('11000111'), pc32imm, bsr0]) +addop("mov", [bs('11000111'), pc32imm, bs_r0]) addop("swapb", [bs('0110'), rn, rm, bs('1000')], [rm, rn]) addop("swapw", [bs('0110'), rn, rm, bs('1001')], [rm, rn]) @@ -706,7 +722,7 @@ addop("addc", [bs('0011'), rn, rm, bs('1110')], [rm, rn]) addop("addv", [bs('0011'), rn, rm, bs('1111')], [rm, rn]) -addop("cmpeq", [bs('10001000'), s08imm, bsr0]) +addop("cmpeq", [bs('10001000'), s08imm, bs_r0]) addop("cmpeq", [bs('0011'), rn, rm, bs('0000')], [rm, rn]) @@ -754,24 +770,24 @@ addop("subc", [bs('0011'), rn, rm, bs('1010')], [rm, rn]) addop("subv", [bs('0011'), rn, rm, bs('1011')], [rm, rn]) addop("and", [bs('0010'), rn, rm, bs('1001')], [rm, rn]) -addop("and", [bs('11001001'), u08imm, bsr0]) +addop("and", [bs('11001001'), u08imm, bs_r0]) addop("and_b", [bs('11001101'), u08imm, dr0gbr]) addop("not", [bs('0110'), rn, rm, bs('0111')], [rm, rn]) addop("or", [bs('0010'), rn, rm, bs('1011')], [rm, rn]) -addop("or", [bs('11001011'), u08imm, bsr0]) +addop("or", [bs('11001011'), u08imm, bs_r0]) addop("or_b", [bs('11001111'), u08imm, dr0gbr]) addop("tas_b", [bs('0100'), d08gpreg, bs('00011011')]) addop("tst", [bs('0010'), rn, rm, bs('1000')], [rm, rn]) -addop("tst", [bs('11001000'), u08imm, bsr0]) +addop("tst", [bs('11001000'), u08imm, bs_r0]) addop("tst_b", [bs('11001100'), u08imm, dr0gbr]) addop("xor", [bs('0010'), rn, rm, bs('1010')], [rm, rn]) -addop("xor", [bs('11001010'), u08imm, bsr0]) +addop("xor", [bs('11001010'), u08imm, bs_r0]) addop("xor_b", [bs('11001110'), u08imm, dr0gbr]) addop("rotl", [bs('0100'), rn, bs('00000100')]) @@ -883,29 +899,29 @@ addop("clrs", [bs('0000000001001000')]) addop("clrt", [bs('0000000000001000')]) -addop("ldc", [bs('0100'), rm, bssr, bs('00001110')]) -addop("ldc", [bs('0100'), rm, bsgbr, bs('00011110')]) -addop("ldc", [bs('0100'), rm, bsvbr, bs('00101110')]) -addop("ldc", [bs('0100'), rm, bsssr, bs('00111110')]) -addop("ldc", [bs('0100'), rm, bsspc, bs('01001110')]) -addop("ldc", [bs('0100'), rm, bsdbr, bs('11111010')]) +addop("ldc", [bs('0100'), rm, bs_sr, bs('00001110')]) +addop("ldc", [bs('0100'), rm, bs_gbr, bs('00011110')]) +addop("ldc", [bs('0100'), rm, bs_vbr, bs('00101110')]) +addop("ldc", [bs('0100'), rm, bs_ssr, bs('00111110')]) +addop("ldc", [bs('0100'), rm, bs_spc, bs('01001110')]) +addop("ldc", [bs('0100'), rm, bs_dbr, bs('11111010')]) addop("ldc", [bs('0100'), rm, bs('1'), brn, bs('1110')], [rm, brn]) -addop("ldc_l", [bs('0100'), d32rmpinc, bssr, bs('00000111')]) -addop("ldc_l", [bs('0100'), d32rmpinc, bsgbr, bs('00010111')]) -addop("ldc_l", [bs('0100'), d32rmpinc, bsvbr, bs('00100111')]) -addop("ldc_l", [bs('0100'), d32rmpinc, bsssr, bs('00110111')]) -addop("ldc_l", [bs('0100'), d32rmpinc, bsspc, bs('01000111')]) -addop("ldc_l", [bs('0100'), d32rmpinc, bsdbr, bs('11110110')]) +addop("ldc_l", [bs('0100'), d32rmpinc, bs_sr, bs('00000111')]) +addop("ldc_l", [bs('0100'), d32rmpinc, bs_gbr, bs('00010111')]) +addop("ldc_l", [bs('0100'), d32rmpinc, bs_vbr, bs('00100111')]) +addop("ldc_l", [bs('0100'), d32rmpinc, bs_ssr, bs('00110111')]) +addop("ldc_l", [bs('0100'), d32rmpinc, bs_spc, bs('01000111')]) +addop("ldc_l", [bs('0100'), d32rmpinc, bs_dbr, bs('11110110')]) addop("ldc_l", [bs('0100'), d32rmpinc, bs('1'), brn, bs('0111')]) -addop("lds", [bs('0100'), rm, bsmach, bs('00001010')]) -addop("lds", [bs('0100'), rm, bsmacl, bs('00011010')]) -addop("lds", [bs('0100'), rm, bspr, bs('00101010')]) -addop("lds_l", [bs('0100'), d32rmpinc, bsmach, bs('00000110')]) -addop("lds_l", [bs('0100'), d32rmpinc, bsmacl, bs('00010110')]) -addop("lds_l", [bs('0100'), d32rmpinc, bspr, bs('00100110')]) +addop("lds", [bs('0100'), rm, bs_mach, bs('00001010')]) +addop("lds", [bs('0100'), rm, bs_macl, bs('00011010')]) +addop("lds", [bs('0100'), rm, bs_pr, bs('00101010')]) +addop("lds_l", [bs('0100'), d32rmpinc, bs_mach, bs('00000110')]) +addop("lds_l", [bs('0100'), d32rmpinc, bs_macl, bs('00010110')]) +addop("lds_l", [bs('0100'), d32rmpinc, bs_pr, bs('00100110')]) addop("ldtlb", [bs('0000000000111000')]) -addop("movca_l", [bs('0000'), bsr0, d32gpreg, bs('11000011')]) +addop("movca_l", [bs('0000'), bs_r0, d32gpreg, bs('11000011')]) addop("nop", [bs('0000000000001001')]) addop("ocbi_l", [bs('0000'), d32gpreg, bs('10010011')]) addop("ocbp_l", [bs('0000'), d32gpreg, bs('10100011')]) @@ -917,33 +933,33 @@ addop("rte", [bs('0000000000101011')]) addop("sets", [bs('0000000001011000')]) addop("sett", [bs('0000000000011000')]) addop("sleep", [bs('0000000000011011')]) -addop("stc", [bs('0000'), bssr, rn, bs('00000010')]) -addop("stc", [bs('0000'), bsgbr, rn, bs('00010010')]) -addop("stc", [bs('0000'), bsvbr, rn, bs('00100010')]) -addop("stc", [bs('0000'), bsssr, rn, bs('00110010')]) -addop("stc", [bs('0000'), bsspc, rn, bs('01000010')]) -addop("stc", [bs('0000'), bssgr, rn, bs('00111010')]) -addop("stc", [bs('0000'), bsdbr, rn, bs('11111010')]) +addop("stc", [bs('0000'), bs_sr, rn, bs('00000010')]) +addop("stc", [bs('0000'), bs_gbr, rn, bs('00010010')]) +addop("stc", [bs('0000'), bs_vbr, rn, bs('00100010')]) +addop("stc", [bs('0000'), bs_ssr, rn, bs('00110010')]) +addop("stc", [bs('0000'), bs_spc, rn, bs('01000010')]) +addop("stc", [bs('0000'), bs_sgr, rn, bs('00111010')]) +addop("stc", [bs('0000'), bs_dbr, rn, bs('11111010')]) addop("stc", [bs('0000'), rn, bs('1'), brm, bs('0010')], [brm, rn]) -addop("stc_l", [bs('0100'), bssr, d32rmpdec, bs('00000011')]) -addop("stc_l", [bs('0100'), bsgbr, d32rmpdec, bs('00010011')]) -addop("stc_l", [bs('0100'), bsvbr, d32rmpdec, bs('00100011')]) -addop("stc_l", [bs('0100'), bsssr, d32rmpdec, bs('00110011')]) -addop("stc_l", [bs('0100'), bsspc, d32rmpdec, bs('01000011')]) -addop("stc_l", [bs('0100'), bssgr, d32rmpdec, bs('00110010')]) -addop("stc_l", [bs('0100'), bsdbr, d32rmpdec, bs('11110010')]) +addop("stc_l", [bs('0100'), bs_sr, d32rmpdec, bs('00000011')]) +addop("stc_l", [bs('0100'), bs_gbr, d32rmpdec, bs('00010011')]) +addop("stc_l", [bs('0100'), bs_vbr, d32rmpdec, bs('00100011')]) +addop("stc_l", [bs('0100'), bs_ssr, d32rmpdec, bs('00110011')]) +addop("stc_l", [bs('0100'), bs_spc, d32rmpdec, bs('01000011')]) +addop("stc_l", [bs('0100'), bs_sgr, d32rmpdec, bs('00110010')]) +addop("stc_l", [bs('0100'), bs_dbr, d32rmpdec, bs('11110010')]) addop("stc_l", [bs('0100'), d32rnpdec, bs('1'), brm, bs('0011')], [brm, d32rnpdec]) # float -addop("sts", [bs('0000'), bsmach, rm, bs('00001010')]) -addop("sts", [bs('0000'), bsmacl, rm, bs('00011010')]) -addop("sts", [bs('0000'), bspr, rm, bs('00101010')]) -addop("sts_l", [bs('0100'), bsmach, d32rmpdec, bs('00000010')]) -addop("sts_l", [bs('0100'), bsmacl, d32rmpdec, bs('00010010')]) +addop("sts", [bs('0000'), bs_mach, rm, bs('00001010')]) +addop("sts", [bs('0000'), bs_macl, rm, bs('00011010')]) +addop("sts", [bs('0000'), bs_pr, rm, bs('00101010')]) +addop("sts_l", [bs('0100'), bs_mach, d32rmpdec, bs('00000010')]) +addop("sts_l", [bs('0100'), bs_macl, d32rmpdec, bs('00010010')]) addop("sts_l", - [bs('0100'), d32rnpdec, bspr, bs('00100010')], [bspr, d32rnpdec]) + [bs('0100'), d32rnpdec, bs_pr, bs('00100010')], [bs_pr, d32rnpdec]) addop("trapa", [bs('11000011'), u08imm]) addop("fldi0", [bs('1111'), frn, bs('10001101')]) @@ -956,18 +972,18 @@ addop("fmov_s", [bs('1111'), d32gpreg, frm, bs('1010')], [frm, d32gpreg]) addop("fmov_s", [bs('1111'), d32rnpdec, frm, bs('1011')], [frm, d32rnpdec]) addop("fmov_s", [bs('1111'), bd32r0gp, frm, bs('0111')], [frm, bd32r0gp]) -addop("flds", [bs('1111'), frm, bsfpul, bs('00011101')]) -addop("fsts", [bs('1111'), bsfpul, frm, bs('00001101')]) +addop("flds", [bs('1111'), frm, bs_fpul, bs('00011101')]) +addop("fsts", [bs('1111'), bs_fpul, frm, bs('00001101')]) addop("fabs", [bs('1111'), frn, bs('01011101')]) addop("fadd", [bs('1111'), frn, frm, bs('0000')], [frm, frn]) addop("fcmpeq", [bs('1111'), frn, frm, bs('0100')], [frm, frn]) addop("fcmpgt", [bs('1111'), frn, frm, bs('0101')], [frm, frn]) addop("fdiv", [bs('1111'), frn, frm, bs('0011')], [frm, frn]) -addop("float", [bs('1111'), bsfpul, frn, bs('00101101')]) -addop("fmac", [bs('1111'), bsfr0, frn, frm, bs('1110')], [bsfr0, frm, frn]) +addop("float", [bs('1111'), bs_fpul, frn, bs('00101101')]) +addop("fmac", [bs('1111'), bs_fr0, frn, frm, bs('1110')], [bs_fr0, frm, frn]) addop("fmul", [bs('1111'), frn, frm, bs('0010')], [frm, frn]) addop("fneg", [bs('1111'), frn, bs('01001101')]) addop("fsqrt", [bs('1111'), frn, bs('01101101')]) addop("fsub", [bs('1111'), frn, frm, bs('0001')], [frm, frn]) -addop("ftrc", [bs('1111'), frm, bsfpul, bs('00111101')]) +addop("ftrc", [bs('1111'), frm, bs_fpul, bs('00111101')]) diff --git a/miasm2/arch/sh4/regs.py b/miasm2/arch/sh4/regs.py index 89ff42f8..148e74ba 100644 --- a/miasm2/arch/sh4/regs.py +++ b/miasm2/arch/sh4/regs.py @@ -19,20 +19,20 @@ dregs_expr = [ExprId(x, 32) for x in dregs_str] dregs = reg_info(dregs_str, dregs_expr) -gen_reg('PC', globals()) -gen_reg('PR', globals()) -gen_reg('R0', globals()) -gen_reg('GBR', globals()) -gen_reg('SR', globals()) -gen_reg('VBR', globals()) -gen_reg('SSR', globals()) -gen_reg('SPC', globals()) -gen_reg('SGR', globals()) -gen_reg('DBR', globals()) -gen_reg('MACH', globals()) -gen_reg('MACL', globals()) -gen_reg('FPUL', globals()) -gen_reg('FR0', globals()) +PC, reg_info_pc = gen_reg('PC') +PR, reg_info_pr = gen_reg('PR') +R0, reg_info_r0 = gen_reg('R0') +GBR, reg_info_gbr = gen_reg('GBR') +SR, reg_info_sr = gen_reg('SR') +VBR, reg_info_vbr = gen_reg('VBR') +SSR, reg_info_ssr = gen_reg('SSR') +SPC, reg_info_spc = gen_reg('SPC') +SGR, reg_info_sgr = gen_reg('SGR') +DBR, reg_info_dbr = gen_reg('DBR') +MACH, reg_info_mach = gen_reg('MACH') +MACL, reg_info_macl = gen_reg('MACL') +FPUL, reg_info_fpul = gen_reg('FPUL') +FR0, reg_info_fr0 = gen_reg('FR0') R0 = gpregs_expr[0] R1 = gpregs_expr[1] diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py index 29303fdf..fc3a5882 100644 --- a/miasm2/arch/x86/arch.py +++ b/miasm2/arch/x86/arch.py @@ -8,6 +8,8 @@ from collections import defaultdict import miasm2.arch.x86.regs as regs_module from miasm2.arch.x86.regs import * from miasm2.core.asmblock import AsmLabel +from miasm2.core.asm_ast import AstNode, AstInt, AstId, AstMem, AstOp + log = logging.getLogger("x86_arch") console_handler = logging.StreamHandler() @@ -121,148 +123,82 @@ replace_regs = {16: replace_regs16, 64: replace_regs64} -# parser helper ########### -PLUS = Suppress("+") -MULT = Suppress("*") - -COLON = Suppress(":") - - -LBRACK = Suppress("[") -RBRACK = Suppress("]") - -dbreg = Group(gpregs16.parser | gpregs32.parser | gpregs64.parser) -gpreg = (gpregs08.parser | gpregs08_64.parser | gpregs16.parser | - gpregs32.parser | gpregs64.parser | gpregs_xmm.parser | - gpregs_mm.parser | gpregs_bnd.parser) - - -def reg2exprid(r): - if not r.name in all_regs_ids_byname: - raise ValueError('unknown reg') - return all_regs_ids_byname[r.name] - - -def parse_deref_reg(s, l, t): - t = t[0][0] - return t[0] - - -def parse_deref_int(s, l, t): - t = t[0] - return t[0] - - -def parse_deref_regint(s, l, t): - t = t[0] - r1 = reg2exprid(t[0][0]) - i1 = ExprInt(t[1].arg, r1.size) - return r1 + i1 - - -def parse_deref_regreg(s, l, t): - t = t[0] - return t[0][0] + t[1][0] - - -def parse_deref_regregint(s, l, t): - t = t[0] - r1 = reg2exprid(t[0][0]) - r2 = reg2exprid(t[1][0]) - i1 = ExprInt(t[2].arg, r1.size) - return r1 + r2 + i1 +segm2enc = {CS: 1, SS: 2, DS: 3, ES: 4, FS: 5, GS: 6} +enc2segm = dict([(x[1], x[0]) for x in segm2enc.items()]) +segm_info = reg_info_dct(enc2segm) -def parse_deref_reg_intmreg(s, l, t): - t = t[0] - r1 = reg2exprid(t[0][0]) - r2 = reg2exprid(t[1][0]) - i1 = ExprInt(t[2].arg, r1.size) - return r1 + (r2 * i1) -def parse_deref_reg_intmreg_int(s, l, t): - t = t[0] - r1 = reg2exprid(t[0][0]) - r2 = reg2exprid(t[1][0]) - i1 = ExprInt(t[2].arg, r1.size) - i2 = ExprInt(t[3].arg, r1.size) - return r1 + (r2 * i1) + i2 +enc2crx = { + 0: cr0, + 1: cr1, + 2: cr2, + 3: cr3, + 4: cr4, + 5: cr5, + 6: cr6, + 7: cr7, +} +crx_info = reg_info_dct(enc2crx) -def parse_deref_intmreg(s, l, t): - t = t[0] - r1 = reg2exprid(t[0][0]) - i1 = ExprInt(t[1].arg, r1.size) - return r1 * i1 +enc2drx = { + 0: dr0, + 1: dr1, + 2: dr2, + 3: dr3, + 4: dr4, + 5: dr5, + 6: dr6, + 7: dr7, +} -def parse_deref_intmregint(s, l, t): - t = t[0] - r1 = reg2exprid(t[0][0]) - i1 = ExprInt(t[1].arg, r1.size) - i2 = ExprInt(t[1].arg, r1.size) - return (r1 * i1) + i2 +drx_info = reg_info_dct(enc2drx) -def getreg(s, l, t): - t = t[0] - return t[0] +# parser helper ########### +PLUS = Suppress("+") +MULT = Suppress("*") -def parse_deref_ptr(s, l, t): - t = t[0] - return ExprMem(ExprOp('segm', t[0], t[1])) +COLON = Suppress(":") -def parse_deref_segmoff(s, l, t): - t = t[0] - return ExprOp('segm', t[0], t[1]) +LBRACK = Suppress("[") +RBRACK = Suppress("]") -variable, operand, base_expr = gen_base_expr() +gpreg = ( + gpregs08.parser | + gpregs08_64.parser | + gpregs16.parser | + gpregs32.parser | + gpregs64.parser | + gpregs_xmm.parser | + gpregs_mm.parser | + gpregs_bnd.parser +) -def ast_id2expr(t): - return mn_x86.regs.all_regs_ids_byname.get(t, t) -def ast_int2expr(a): - return ExprInt(a, 64) +def cb_deref_segmoff(tokens): + assert len(tokens) == 2 + return AstOp('segm', tokens[0], tokens[1]) -my_var_parser = ParseAst(ast_id2expr, ast_int2expr) -base_expr.setParseAction(my_var_parser) -int_or_expr = base_expr +def cb_deref_base_expr(tokens): + tokens = tokens[0] + assert isinstance(tokens, AstNode) + addr = tokens + return addr -deref_mem_ad = Group(LBRACK + dbreg + RBRACK).setParseAction(parse_deref_reg) -deref_mem_ad |= Group( - LBRACK + int_or_expr + RBRACK).setParseAction(parse_deref_int) -deref_mem_ad |= Group( - LBRACK + dbreg + PLUS + - int_or_expr + RBRACK).setParseAction(parse_deref_regint) -deref_mem_ad |= Group( - LBRACK + dbreg + PLUS + - dbreg + RBRACK).setParseAction(parse_deref_regreg) -deref_mem_ad |= Group( - LBRACK + dbreg + PLUS + dbreg + PLUS + - int_or_expr + RBRACK).setParseAction(parse_deref_regregint) -deref_mem_ad |= Group( - LBRACK + dbreg + PLUS + dbreg + MULT + - int_or_expr + RBRACK).setParseAction(parse_deref_reg_intmreg) -deref_mem_ad |= Group( - LBRACK + dbreg + PLUS + dbreg + MULT + int_or_expr + - PLUS + int_or_expr + RBRACK).setParseAction(parse_deref_reg_intmreg_int) -deref_mem_ad |= Group( - LBRACK + dbreg + MULT + - int_or_expr + RBRACK).setParseAction(parse_deref_intmreg) -deref_mem_ad |= Group( - LBRACK + dbreg + MULT + int_or_expr + - PLUS + int_or_expr + RBRACK).setParseAction(parse_deref_intmregint) +deref_mem_ad = (LBRACK + base_expr + RBRACK).setParseAction(cb_deref_base_expr) -deref_ptr = Group(int_or_expr + COLON + - int_or_expr).setParseAction(parse_deref_segmoff) +deref_ptr = (base_expr + COLON + base_expr).setParseAction(cb_deref_segmoff) PTR = Suppress('PTR') @@ -282,68 +218,120 @@ MEMPREFIX2SIZE = {'BYTE': 8, 'WORD': 16, 'DWORD': 32, SIZE2MEMPREFIX = dict((x[1], x[0]) for x in MEMPREFIX2SIZE.items()) -def parse_deref_mem(s, l, t): - t = t[0] - if len(t) == 2: - s, ptr = t - return ExprMem(ptr, MEMPREFIX2SIZE[s[0]]) - elif len(t) == 3: - s, segm, ptr = t - return ExprMem(ExprOp('segm', segm[0], ptr), MEMPREFIX2SIZE[s[0]]) - else: - raise ValueError('len(t) > 3') +def cb_deref_mem(tokens): + if len(tokens) == 2: + s, ptr = tokens + assert isinstance(ptr, AstNode) + return AstMem(ptr, MEMPREFIX2SIZE[s]) + elif len(tokens) == 3: + s, segm, ptr = tokens + return AstMem(AstOp('segm', segm, ptr), MEMPREFIX2SIZE[s]) + raise ValueError('len(tokens) > 3') + +mem_size = (BYTE | DWORD | QWORD | WORD | TBYTE | XMMWORD) +deref_mem = (mem_size + PTR + Optional((base_expr + COLON))+ deref_mem_ad).setParseAction(cb_deref_mem) + + +rmarg = ( + gpregs08.parser | + gpregs08_64.parser | + gpregs16.parser | + gpregs32.parser | + gpregs64.parser | + gpregs_mm.parser | + gpregs_xmm.parser | + gpregs_bnd.parser +) -mem_size = Group(BYTE | DWORD | QWORD | WORD | TBYTE | XMMWORD) -deref_mem = Group(mem_size + PTR + Optional(Group(int_or_expr + COLON)) - + deref_mem_ad).setParseAction(parse_deref_mem) +rmarg |= deref_mem -rmarg = Group(gpregs08.parser | - gpregs08_64.parser | - gpregs16.parser | - gpregs32.parser | - gpregs64.parser | - gpregs_mm.parser | - gpregs_xmm.parser | - gpregs_bnd.parser - ).setParseAction(getreg) +mem_far = FAR + deref_mem -rmarg |= deref_mem +cl_or_imm = r08_ecx.parser +cl_or_imm |= base_expr -mem_far = FAR + deref_mem -cl_or_imm = Group(r08_ecx.parser).setParseAction(getreg) -cl_or_imm |= int_or_expr +class x86_arg(m_arg): + def asm_ast_to_expr(self, value, symbol_pool, size_hint=None, fixed_size=None): + if size_hint is None: + size_hint = self.parent.v_opmode() + if fixed_size is None: + fixed_size = set() + if isinstance(value, AstId): + if value.name in all_regs_ids_byname: + reg = all_regs_ids_byname[value.name] + fixed_size.add(reg.size) + return reg + if isinstance(value.name, ExprId): + fixed_size.add(value.name.size) + return value.name + if value.name in MEMPREFIX2SIZE: + return None + if value.name in ["FAR"]: + return None + label = symbol_pool.getby_name_create(value.name) + return ExprId(label, size_hint) + if isinstance(value, AstOp): + # First pass to retreive fixed_size + if value.op == "segm": + segm = self.asm_ast_to_expr(value.args[0], symbol_pool) + ptr = self.asm_ast_to_expr(value.args[1], symbol_pool, None, fixed_size) + return ExprOp('segm', segm, ptr) + args = [self.asm_ast_to_expr(arg, symbol_pool, None, fixed_size) for arg in value.args] + if len(fixed_size) == 0: + # No fixed size + pass + elif len(fixed_size) == 1: + # One fixed size, regen all + size = list(fixed_size)[0] + args = [self.asm_ast_to_expr(arg, symbol_pool, size, fixed_size) for arg in value.args] + else: + raise ValueError("Size conflict") + if None in args: + return None + return ExprOp(value.op, *args) + if isinstance(value, AstInt): + if 1 << size_hint < value.value: + size_hint *= 2 + return ExprInt(value.value, size_hint) + if isinstance(value, AstMem): + fixed_size.add(value.size) + ptr = self.asm_ast_to_expr(value.ptr, symbol_pool, None, set()) + if ptr is None: + return None + return ExprMem(ptr, value.size) + return None -class r_al(reg_noarg, m_arg): +class r_al(reg_noarg, x86_arg): reg_info = r08_eax parser = reg_info.parser -class r_ax(reg_noarg, m_arg): +class r_ax(reg_noarg, x86_arg): reg_info = r16_eax parser = reg_info.parser -class r_dx(reg_noarg, m_arg): +class r_dx(reg_noarg, x86_arg): reg_info = r16_edx parser = reg_info.parser -class r_eax(reg_noarg, m_arg): +class r_eax(reg_noarg, x86_arg): reg_info = r32_eax parser = reg_info.parser -class r_rax(reg_noarg, m_arg): +class r_rax(reg_noarg, x86_arg): reg_info = r64_eax parser = reg_info.parser -class r_cl(reg_noarg, m_arg): +class r_cl(reg_noarg, x86_arg): reg_info = r08_ecx parser = reg_info.parser @@ -442,9 +430,6 @@ repeat_mn = ["INS", "OUTS", "CMPSB", "CMPSW", "CMPSD", "CMPSQ", ] -segm2enc = {CS: 1, SS: 2, DS: 3, ES: 4, FS: 5, GS: 6} -enc2segm = dict([(x[1], x[0]) for x in segm2enc.items()]) - class group: @@ -685,19 +670,19 @@ class mn_x86(cls_mn): return [(subcls, name, bases, dct, fields)] @classmethod - def fromstring(cls, s, mode): + def fromstring(cls, text, symbol_pool, mode): pref = 0 - prefix, new_s = get_prefix(s) + prefix, new_s = get_prefix(text) if prefix == "LOCK": pref |= 1 - s = new_s + text = new_s elif prefix == "REPNE": pref |= 2 - s = new_s + text = new_s elif prefix == "REPE": pref |= 4 - s = new_s - c = super(mn_x86, cls).fromstring(s, mode) + text = new_s + c = super(mn_x86, cls).fromstring(text, symbol_pool, mode) c.additional_info.g1.value = pref return c @@ -1065,10 +1050,12 @@ class x86_imm_fix_08(imm_noarg): def decodeval(self, v): return self.ival - def encodeval(self, v): + def encode(self): + v = self.expr2int(self.expr) if v != self.ival: return False - return self.ival + self.value = 0 + return True class x86_08(x86_imm): @@ -1222,7 +1209,7 @@ class x86_s32to64(x86_s08to32): return ExprInt(x, 64) -class bs_eax(m_arg): +class bs_eax(x86_arg): reg_info = r_eax_all rindex = 0 parser = reg_info.parser @@ -1262,7 +1249,7 @@ class bs_eax(m_arg): return False return False -class bs_seg(m_arg): +class bs_seg(x86_arg): reg_info = r_eax_all rindex = 0 parser = reg_info.parser @@ -1324,7 +1311,7 @@ class bs_gs(bs_seg): parser = reg_info.parser -class x86_reg_st(reg_noarg, m_arg): +class x86_reg_st(reg_noarg, x86_arg): reg_info = r_st_all parser = reg_info.parser @@ -1932,15 +1919,14 @@ def modrm2expr(modrm, parent, w8, sx=0, xmm=0, mm=0, bnd=0): return expr -class x86_rm_arg(m_arg): +class x86_rm_arg(x86_arg): parser = rmarg - def fromstring(self, s, parser_result=None): - start, stop = super(x86_rm_arg, self).fromstring(s, parser_result) + def fromstring(self, text, symbol_pool, parser_result=None): + start, stop = super(x86_rm_arg, self).fromstring(text, symbol_pool, parser_result) p = self.parent if start is None: return None, None - s = self.expr.size return start, stop def get_modrm(self): @@ -2072,9 +2058,9 @@ class x86_rm_arg(m_arg): yield x class x86_rm_mem(x86_rm_arg): - def fromstring(self, s, parser_result=None): + def fromstring(self, text, symbol_pool, parser_result=None): self.expr = None - start, stop = super(x86_rm_mem, self).fromstring(s, parser_result) + start, stop = super(x86_rm_mem, self).fromstring(text, symbol_pool, parser_result) if not isinstance(self.expr, ExprMem): return None, None return start, stop @@ -2082,9 +2068,9 @@ class x86_rm_mem(x86_rm_arg): class x86_rm_mem_far(x86_rm_arg): parser = mem_far - def fromstring(self, s, parser_result=None): + def fromstring(self, text, symbol_pool, parser_result=None): self.expr = None - start, stop = super(x86_rm_mem_far, self).fromstring(s, parser_result) + start, stop = super(x86_rm_mem_far, self).fromstring(text, symbol_pool, parser_result) if not isinstance(self.expr, ExprMem): return None, None self.expr = ExprOp('far', self.expr) @@ -2454,24 +2440,28 @@ class x86_rm_reg_noarg(object): parser = gpreg - def fromstring(self, s, parser_result=None): + def fromstring(self, text, symbol_pool, parser_result=None): if not hasattr(self.parent, 'sx') and hasattr(self.parent, "w8"): self.parent.w8.value = 1 if parser_result: - e, start, stop = parser_result[self.parser] - if e is None: + result, start, stop = parser_result[self.parser] + if result == [None]: return None, None - self.expr = e + self.expr = result if self.expr.size == 8: if hasattr(self.parent, 'sx') or not hasattr(self.parent, 'w8'): return None, None self.parent.w8.value = 0 return start, stop try: - v, start, stop = self.parser.scanString(s).next() + result, start, stop = self.parser.scanString(text).next() except StopIteration: return None, None - self.expr = v[0] + expr = self.asm_ast_to_expr(result[0], symbol_pool) + if expr is None: + return None, None + + self.expr = expr if self.expr.size == 0: if hasattr(self.parent, 'sx') or not hasattr(self.parent, 'w8'): return None, None @@ -2540,7 +2530,7 @@ class x86_rm_reg_noarg(object): return True -class x86_rm_reg_mm(x86_rm_reg_noarg, m_arg): +class x86_rm_reg_mm(x86_rm_reg_noarg, x86_arg): selreg = gpregs_mm def decode(self, v): if self.parent.mode == 64 and self.getrexsize(): @@ -2570,7 +2560,7 @@ class x86_rm_reg_xmm(x86_rm_reg_mm): class x86_rm_reg_bnd(x86_rm_reg_mm): selreg = gpregs_bnd -class x86_rm_reg(x86_rm_reg_noarg, m_arg): +class x86_rm_reg(x86_rm_reg_noarg, x86_arg): pass @@ -2602,25 +2592,25 @@ class x86_reg_noarg(x86_rm_reg_noarg): self.parent.rex_b.value = v -class x86_rm_segm(reg_noarg, m_arg): +class x86_rm_segm(reg_noarg, x86_arg): prio = default_prio + 1 reg_info = segmreg parser = reg_info.parser -class x86_rm_cr(reg_noarg, m_arg): +class x86_rm_cr(reg_noarg, x86_arg): prio = default_prio + 1 reg_info = crregs parser = reg_info.parser -class x86_rm_dr(reg_noarg, m_arg): +class x86_rm_dr(reg_noarg, x86_arg): prio = default_prio + 1 reg_info = drregs parser = reg_info.parser -class x86_rm_flt(reg_noarg, m_arg): +class x86_rm_flt(reg_noarg, x86_arg): prio = default_prio + 1 reg_info = fltregs parser = reg_info.parser @@ -2633,7 +2623,7 @@ class bs_fbit(bsi): return True -class bs_cl1(bsi, m_arg): +class bs_cl1(bsi, x86_arg): parser = cl_or_imm def decode(self, v): @@ -2750,16 +2740,16 @@ class bs_cond_disp(bs_cond): return True -class bs_cond_imm(bs_cond_scale, m_arg): - parser = int_or_expr +class bs_cond_imm(bs_cond_scale, x86_arg): + parser = base_expr max_size = 32 - def fromstring(self, s, parser_result=None): + def fromstring(self, text, symbol_pool, parser_result=None): if parser_result: expr, start, stop = parser_result[self.parser] else: try: - expr, start, stop = self.parser.scanString(s).next() + expr, start, stop = self.parser.scanString(text).next() except StopIteration: expr = None self.expr = expr @@ -2774,7 +2764,7 @@ class bs_cond_imm(bs_cond_scale, m_arg): self.expr = ExprInt(v & mask, l) if self.expr is None: - log.debug('cannot fromstring int %r', s) + log.debug('cannot fromstring int %r', text) return None, None return start, stop @@ -2879,14 +2869,14 @@ class bs_cond_imm64(bs_cond_imm): class bs_rel_off(bs_cond_imm): - parser = int_or_expr + parser = base_expr - def fromstring(self, s, parser_result=None): + def fromstring(self, text, symbol_pool, parser_result=None): if parser_result: expr, start, stop = parser_result[self.parser] else: try: - expr, start, stop = self.parser.scanString(s).next() + expr, start, stop = self.parser.scanString(text).next() except StopIteration: expr = None self.expr = expr @@ -2939,7 +2929,7 @@ class bs_rel_off(bs_cond_imm): return True class bs_s08(bs_rel_off): - parser = int_or_expr + parser = base_expr @classmethod def flen(cls, mode, v): @@ -3020,10 +3010,10 @@ class bs_moff(bsi): return True -class bs_movoff(m_arg): +class bs_movoff(x86_arg): parser = deref_mem - def fromstring(self, s, parser_result=None): + def fromstring(self, text, symbol_pool, parser_result=None): if parser_result: e, start, stop = parser_result[self.parser] if e is None: @@ -3035,14 +3025,14 @@ class bs_movoff(m_arg): return None, None return start, stop try: - v, start, stop = self.parser.scanString(s).next() + v, start, stop = self.parser.scanString(text).next() except StopIteration: return None, None if not isinstance(e, ExprMem): return None, None self.expr = v[0] if self.expr is None: - log.debug('cannot fromstring int %r', s) + log.debug('cannot fromstring int %r', text) return None, None return start, stop @@ -3087,10 +3077,10 @@ class bs_movoff(m_arg): return True -class bs_msegoff(m_arg): +class bs_msegoff(x86_arg): parser = deref_ptr - def fromstring(self, s, parser_result=None): + def fromstring(self, text, symbol_pool, parser_result=None): if parser_result: e, start, stop = parser_result[self.parser] if e is None: @@ -3100,12 +3090,12 @@ class bs_msegoff(m_arg): return None, None return start, stop try: - v, start, stop = self.parser.scanString(s).next() + v, start, stop = self.parser.scanString(text).next() except StopIteration: return None, None self.expr = v[0] if self.expr is None: - log.debug('cannot fromstring int %r', s) + log.debug('cannot fromstring int %r', text) return None, None return start, stop @@ -3171,13 +3161,13 @@ disp = bs(l=0, cls=(bs_cond_disp,), fname = "disp") s08 = bs(l=8, cls=(bs_s08, )) -u08 = bs(l=8, cls=(x86_08, m_arg)) -u07 = bs(l=7, cls=(x86_08, m_arg)) -u16 = bs(l=16, cls=(x86_16, m_arg)) -u32 = bs(l=32, cls=(x86_32, m_arg)) -s3264 = bs(l=32, cls=(x86_s32to64, m_arg)) +u08 = bs(l=8, cls=(x86_08, x86_arg)) +u07 = bs(l=7, cls=(x86_08, x86_arg)) +u16 = bs(l=16, cls=(x86_16, x86_arg)) +u32 = bs(l=32, cls=(x86_32, x86_arg)) +s3264 = bs(l=32, cls=(x86_s32to64, x86_arg)) -u08_3 = bs(l=0, cls=(x86_imm_fix_08, m_arg), ival = 3) +u08_3 = bs(l=0, cls=(x86_imm_fix_08, x86_arg), ival = 3) d0 = bs("000", fname='reg') d1 = bs("001", fname='reg') diff --git a/miasm2/core/asm_ast.py b/miasm2/core/asm_ast.py new file mode 100644 index 00000000..3b06ce62 --- /dev/null +++ b/miasm2/core/asm_ast.py @@ -0,0 +1,91 @@ +class AstNode(object): + """ + Ast node object + """ + def __neg__(self): + if isinstance(self, AstInt): + value = AstInt(-self.value) + else: + value = AstOp('-', self) + return value + + def __add__(self, other): + return AstOp('+', self, other) + + def __sub__(self, other): + return AstOp('-', self, other) + + def __div__(self, other): + return AstOp('/', self, other) + + def __mod__(self, other): + return AstOp('%', self, other) + + def __mul__(self, other): + return AstOp('*', self, other) + + def __lshift__(self, other): + return AstOp('<<', self, other) + + def __rshift__(self, other): + return AstOp('>>', self, other) + + def __xor__(self, other): + return AstOp('^', self, other) + + def __or__(self, other): + return AstOp('|', self, other) + + def __and__(self, other): + return AstOp('&', self, other) + + +class AstInt(AstNode): + """ + Ast integer + """ + def __init__(self, value): + self.value = value + + def __str__(self): + return "%s" % self.value + + +class AstId(AstNode): + """ + Ast Id + """ + def __init__(self, name): + self.name = name + + def __str__(self): + return "%s" % self.name + + +class AstMem(AstNode): + """ + Ast memory deref + """ + def __init__(self, ptr, size): + assert isinstance(ptr, AstNode) + assert isinstance(size, (int, long)) + self.ptr = ptr + self.size = size + + def __str__(self): + return "@%d[%s]" % (self.size, self.ptr) + + +class AstOp(AstNode): + """ + Ast operator + """ + def __init__(self, op, *args): + assert all(isinstance(arg, AstNode) for arg in args) + self.op = op + self.args = args + + def __str__(self): + if len(self.args) == 1: + return "(%s %s)" % (self.op, self.args[0]) + return '(' + ("%s" % self.op).join(str(x) for x in self.args) + ')' diff --git a/miasm2/core/asmblock.py b/miasm2/core/asmblock.py index 7d18c4f5..8740aeb7 100644 --- a/miasm2/core/asmblock.py +++ b/miasm2/core/asmblock.py @@ -334,9 +334,20 @@ class asm_block_bad(AsmBlockBad): class AsmSymbolPool(object): + """ + Store symbols. + + A symbol links a name to an (optional) offset + + Rules and limitations: + - two different symbols cannot have the same offset + - two different symbols cannot have the same name + - symbols manipulation (comparison, creation ...) can only be done on + symbols generated by the same symbol pool + """ def __init__(self): - self._labels = [] + self._labels = set() self._name2label = {} self._offset2label = {} self._label_num = 0 @@ -359,7 +370,7 @@ class AsmSymbolPool(object): raise ValueError('symbol %s has same name as %s' % (label, self._name2label[label.name])) - self._labels.append(label) + self._labels.add(label) if label.offset is not None: self._offset2label[label.offset] = label if label.name != "": @@ -427,10 +438,17 @@ class AsmSymbolPool(object): self._offset2label[label.offset] = label @property - def items(self): + def labels(self): """Return all labels""" return self._labels + @property + def items(self): + """Return all labels""" + warnings.warn('DEPRECATION WARNING: use "labels" instead of "items"') + return list(self._labels) + + def __str__(self): return reduce(lambda x, y: x + str(y) + '\n', self._labels, "") @@ -446,7 +464,7 @@ class AsmSymbolPool(object): def merge(self, symbol_pool): """Merge with another @symbol_pool""" - self._labels += symbol_pool._labels + self._labels.update(symbol_pool.labels) self._name2label.update(symbol_pool._name2label) self._offset2label.update(symbol_pool._offset2label) @@ -718,6 +736,14 @@ class AsmCFG(DiGraph): yield predecessor done.add(predecessor) + def getby_offset(self, offset): + """Return block containing @offset""" + for block in self: + if block.lines[0].offset <= offset < \ + (block.lines[-1].offset + block.lines[-1].l): + return block + return None + def sanity_check(self): """Do sanity checks on blocks' constraints: * no pendings diff --git a/miasm2/core/cpu.py b/miasm2/core/cpu.py index aa94773f..d75b99cf 100644 --- a/miasm2/core/cpu.py +++ b/miasm2/core/cpu.py @@ -13,6 +13,9 @@ from miasm2.core.bin_stream import bin_stream, bin_stream_str from miasm2.core.utils import Disasm_Exception from miasm2.expression.simplifications import expr_simp + +from miasm2.core.asm_ast import AstNode, AstInt, AstId, AstMem, AstOp + log = logging.getLogger("cpuhelper") console_handler = logging.StreamHandler() console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) @@ -85,12 +88,19 @@ def literal_list(l): return o -class reg_info: +class reg_info(object): def __init__(self, reg_str, reg_expr): self.str = reg_str self.expr = reg_expr - self.parser = literal_list(reg_str).setParseAction(self.reg2expr) + self.parser = literal_list(reg_str).setParseAction(self.cb_parse) + + def cb_parse(self, tokens): + assert len(tokens) == 1 + i = self.str.index(tokens[0]) + reg = self.expr[i] + result = AstId(reg) + return result def reg2expr(self, s): i = self.str.index(s[0]) @@ -100,34 +110,54 @@ class reg_info: return self.expr.index(e) -def gen_reg(rname, env, sz=32): - """ - Gen reg expr and parser - Equivalent to: - PC = ExprId('PC') - reg_pc_str = ['PC'] - reg_pc_expr = [ExprId(x, sz) for x in reg_pc_str] - regpc = reg_info(reg_pc_str, reg_pc_expr) +class reg_info_dct(object): + + def __init__(self, reg_expr): + self.dct_str_inv = dict((v.name, k) for k, v in reg_expr.iteritems()) + self.dct_expr = reg_expr + self.dct_expr_inv = dict((v, k) for k, v in reg_expr.iteritems()) + reg_str = [v.name for v in reg_expr.itervalues()] + self.parser = literal_list(reg_str).setParseAction(self.cb_parse) + + def cb_parse(self, tokens): + assert len(tokens) == 1 + i = self.dct_str_inv[tokens[0]] + reg = self.dct_expr[i] + result = AstId(reg) + return result + + def reg2expr(self, s): + i = self.dct_str_inv[s[0]] + return self.dct_expr[i] + + def expr2regi(self, e): + return self.dct_expr_inv[e] + - class bs_rname(m_reg): - reg = regi_rname +def gen_reg(reg_name, sz=32): + """Gen reg expr and parser""" + reg_name_lower = reg_name.lower() + reg = m2_expr.ExprId(reg_name, sz) + reginfo = reg_info([reg_name], [reg]) + return reg, reginfo - bsrname = bs(l=0, cls=(bs_rname,)) +def gen_reg_bs(reg_name, reg_info, base_cls): """ - rnamel = rname.lower() - r = m2_expr.ExprId(rname, sz) - reg_str = [rname] - reg_expr = [r] - regi = reg_info(reg_str, reg_expr) - # define as global val - cname = "bs_" + rnamel - c = type(cname, (m_reg,), {'reg': regi}) - env[rname] = r - env["regi_" + rnamel] = regi - env[cname] = c - env["bs" + rnamel] = bs(l=0, cls=(c,)) - return r, regi + Generate: + class bs_reg_name(base_cls): + reg = reg_info + + bs_reg_name = bs(l=0, cls=(bs_reg_name,)) + """ + reg_name_lower = reg_name.lower() + + bs_name = "bs_%s" % reg_name + cls = type(bs_name, base_cls, {'reg': reg_info}) + + bs_obj = bs(l=0, cls=(cls,)) + + return cls, bs_obj def gen_regs(rnames, env, sz=32): @@ -150,45 +180,45 @@ LPARENTHESIS = pyparsing.Literal("(") RPARENTHESIS = pyparsing.Literal(")") -def int2expr(t): - v = t[0] +def int2expr(tokens): + v = tokens[0] return (m2_expr.ExprInt, v) -def parse_op(t): - v = t[0] +def parse_op(tokens): + v = tokens[0] return (m2_expr.ExprOp, v) -def parse_id(t): - v = t[0] +def parse_id(tokens): + v = tokens[0] return (m2_expr.ExprId, v) -def ast_parse_op(t): - if len(t) == 1: - return t[0] - if len(t) == 2: - if t[0] in ['-', '+', '!']: - return m2_expr.ExprOp(t[0], t[1]) - if len(t) == 3: - if t[1] == '-': +def ast_parse_op(tokens): + if len(tokens) == 1: + return tokens[0] + if len(tokens) == 2: + if tokens[0] in ['-', '+', '!']: + return m2_expr.ExprOp(tokens[0], tokens[1]) + if len(tokens) == 3: + if tokens[1] == '-': # a - b => a + (-b) - t[1] = '+' - t[2] = - t[2] - return m2_expr.ExprOp(t[1], t[0], t[2]) - t = t[::-1] - while len(t) >= 3: - o1, op, o2 = t.pop(), t.pop(), t.pop() + tokens[1] = '+' + tokens[2] = - tokens[2] + return m2_expr.ExprOp(tokens[1], tokens[0], tokens[2]) + tokens = tokens[::-1] + while len(tokens) >= 3: + o1, op, o2 = tokens.pop(), tokens.pop(), tokens.pop() if op == '-': # a - b => a + (-b) op = '+' o2 = - o2 e = m2_expr.ExprOp(op, o1, o2) - t.append(e) - if len(t) != 1: + tokens.append(e) + if len(tokens) != 1: raise NotImplementedError('strange op') - return t[0] + return tokens[0] def ast_id2expr(a): @@ -199,135 +229,14 @@ def ast_int2expr(a): return m2_expr.ExprInt(a, 32) - -class ParseAst(object): - - def __init__(self, id2expr, int2expr, default_size=32): - self.id2expr = id2expr - self.int2expr = int2expr - self.default_size = default_size - - def int_from_size(self, size, value): - """Transform a string into ExprInt. - * if @size is None, use provided int2expr - * else, use @size to generate integer - @size: size of int; None if not forced. - @value: string representing an integer - """ - if size is None: - return self.int2expr(value) - else: - return m2_expr.ExprInt(value, size) - - def id_from_size(self, size, value): - """Transform a string into ExprId. - * if @size is None, use provided id2expr - * else, use @size to generate id - @size: size of id; None if not forced. - @value: string representing the id - """ - value = self.id2expr(value) - if isinstance(value, m2_expr.Expr): - return value - if size is None: - size = self.default_size - assert value is not None - return m2_expr.ExprId(asmblock.AsmLabel(value), size) - - def ast_to_expr(self, size, ast): - """Transform a typed ast into a Miasm expression - @size: default size - @ast: typed ast - """ - assert(isinstance(ast, tuple)) - if ast[0] is m2_expr.ExprId: - expr = self.id_from_size(size, ast[1]) - if isinstance(expr, str): - expr = self.id_from_size(size, expr) - elif ast[0] is m2_expr.ExprInt: - expr = self.int_from_size(size, ast[1]) - elif ast[0] is m2_expr.ExprOp: - out = [] - for arg in ast[1]: - if isinstance(arg, tuple): - arg = self.ast_to_expr(size, arg) - out.append(arg) - expr = ast_parse_op(out) - else: - raise TypeError('unknown type') - return expr - - def ast_get_ids(self, ast): - """Retrieve every node of type ExprId in @ast - @ast: typed ast - """ - assert(isinstance(ast, tuple)) - if ast[0] is m2_expr.ExprId: - return set([ast[1]]) - elif ast[0] is m2_expr.ExprInt: - return set() - elif ast[0] is m2_expr.ExprOp: - out = set() - for x in ast[1]: - if isinstance(x, tuple): - out.update(self.ast_get_ids(x)) - return out - raise TypeError('unknown type') - - def _extract_ast_core(self, ast): - assert(isinstance(ast, tuple)) - if ast[0] in [m2_expr.ExprInt, m2_expr.ExprId]: - return ast - elif ast[0] is m2_expr.ExprOp: - out = [] - for arg in ast[1]: - if isinstance(arg, tuple): - arg = self._extract_ast_core(arg) - out.append(arg) - return tuple([ast[0]] + [out]) - else: - raise TypeError('unknown type') - - def extract_ast_core(self, ast): - """ - Trasform an @ast into a Miasm expression. - Use registers size to deduce label and integers sizes. - """ - ast = self._extract_ast_core(ast) - ids = self.ast_get_ids(ast) - ids_expr = [self.id2expr(x) for x in ids] - sizes = set([expr.size for expr in ids_expr - if isinstance(expr, m2_expr.Expr)]) - if not sizes: - size = None - elif len(sizes) == 1: - size = sizes.pop() - else: - # Multiple sizes in ids - raise StopIteration - return self.ast_to_expr(size, ast) - - def __call__(self, ast): - """ - Trasform an @ast into a Miasm expression. - Use registers size to deduce label and integers sizes. - """ - ast = ast[0] - if isinstance(ast, m2_expr.Expr): - return ast - return self.extract_ast_core(ast) - - -def neg_int(t): - x = -t[0] +def neg_int(tokens): + x = -tokens[0] return x -integer = pyparsing.Word(pyparsing.nums).setParseAction(lambda _a, _b, t: - int(t[0])) +integer = pyparsing.Word(pyparsing.nums).setParseAction(lambda tokens: int(tokens[0])) hex_word = pyparsing.Literal('0x') + pyparsing.Word(pyparsing.hexnums) -hex_int = pyparsing.Combine(hex_word).setParseAction(lambda _a, _b, t: - int(t[0], 16)) +hex_int = pyparsing.Combine(hex_word).setParseAction(lambda tokens: int(tokens[0], 16)) # str_int = (Optional('-') + (hex_int | integer)) str_int_pos = (hex_int | integer) @@ -343,29 +252,154 @@ multop = pyparsing.oneOf('* / %') plusop = pyparsing.oneOf('+ -') -def gen_base_expr(): - variable = pyparsing.Word(pyparsing.alphas + "_$.", - pyparsing.alphanums + "_") - variable.setParseAction(parse_id) - operand = str_int | variable - base_expr = pyparsing.operatorPrecedence(operand, - [("!", 1, pyparsing.opAssoc.RIGHT, parse_op), - (logicop, 2, pyparsing.opAssoc.RIGHT, - parse_op), - (signop, 1, pyparsing.opAssoc.RIGHT, - parse_op), - (multop, 2, pyparsing.opAssoc.LEFT, - parse_op), - (plusop, 2, pyparsing.opAssoc.LEFT, - parse_op), - ]) - return variable, operand, base_expr +########################## + +def literal_list(l): + l = l[:] + l.sort() + l = l[::-1] + o = pyparsing.Literal(l[0]) + for x in l[1:]: + o |= pyparsing.Literal(x) + return o + + +def cb_int(tokens): + assert len(tokens) == 1 + integer = AstInt(tokens[0]) + return integer + + +def cb_parse_id(tokens): + assert len(tokens) == 1 + reg = tokens[0] + return AstId(reg) + + +def cb_op_not(tokens): + tokens = tokens[0] + assert len(tokens) == 2 + assert tokens[0] == "!" + result = AstOp("!", tokens[1]) + return result + + +def merge_ops(tokens, op): + args = [] + if len(tokens) >= 3: + args = [tokens.pop(0)] + i = 0 + while i < len(tokens): + op_tmp = tokens[i] + arg = tokens[i+1] + i += 2 + if op_tmp != op: + raise ValueError("Bad operator") + args.append(arg) + result = AstOp(op, *args) + return result + + +def cb_op_and(tokens): + result = merge_ops(tokens[0], "&") + return result + + +def cb_op_xor(tokens): + result = merge_ops(tokens[0], "^") + return result + + +def cb_op_sign(tokens): + assert len(tokens) == 1 + op, value = tokens[0] + return -value + + +def cb_op_div(tokens): + tokens = tokens[0] + assert len(tokens) == 3 + assert tokens[1] == "/" + result = AstOp("/", tokens[0], tokens[2]) + return result + + +def cb_op_plusminus(tokens): + tokens = tokens[0] + if len(tokens) == 3: + # binary op + assert isinstance(tokens[0], AstNode) + assert isinstance(tokens[2], AstNode) + op, args = tokens[1], [tokens[0], tokens[2]] + elif len(tokens) > 3: + args = [tokens.pop(0)] + i = 0 + while i < len(tokens): + op = tokens[i] + arg = tokens[i+1] + i += 2 + if op == '-': + arg = -arg + elif op == '+': + pass + else: + raise ValueError("Bad operator") + args.append(arg) + op = '+' + else: + raise ValueError("Parsing error") + assert all(isinstance(arg, AstNode) for arg in args) + result = AstOp(op, *args) + return result + + +def cb_op_mul(tokens): + tokens = tokens[0] + assert len(tokens) == 3 + assert isinstance(tokens[0], AstNode) + assert isinstance(tokens[2], AstNode) + + # binary op + op, args = tokens[1], [tokens[0], tokens[2]] + result = AstOp(op, *args) + return result + +integer = pyparsing.Word(pyparsing.nums).setParseAction(lambda tokens: int(tokens[0])) +hex_word = pyparsing.Literal('0x') + pyparsing.Word(pyparsing.hexnums) +hex_int = pyparsing.Combine(hex_word).setParseAction(lambda tokens: int(tokens[0], 16)) + +str_int_pos = (hex_int | integer) + +str_int = str_int_pos +str_int.setParseAction(cb_int) + +notop = pyparsing.oneOf('!') +andop = pyparsing.oneOf('&') +orop = pyparsing.oneOf('|') +xorop = pyparsing.oneOf('^') +shiftop = pyparsing.oneOf('>> <<') +rotop = pyparsing.oneOf('<<< >>>') +signop = pyparsing.oneOf('+ -') +mulop = pyparsing.oneOf('*') +plusop = pyparsing.oneOf('+ -') +divop = pyparsing.oneOf('/') + + +variable = pyparsing.Word(pyparsing.alphas + "_$.", pyparsing.alphanums + "_") +variable.setParseAction(cb_parse_id) +operand = str_int | variable -variable, operand, base_expr = gen_base_expr() +base_expr = pyparsing.operatorPrecedence(operand, + [(notop, 1, pyparsing.opAssoc.RIGHT, cb_op_not), + (andop, 2, pyparsing.opAssoc.RIGHT, cb_op_and), + (xorop, 2, pyparsing.opAssoc.RIGHT, cb_op_xor), + (signop, 1, pyparsing.opAssoc.RIGHT, cb_op_sign), + (mulop, 2, pyparsing.opAssoc.RIGHT, cb_op_mul), + (divop, 2, pyparsing.opAssoc.RIGHT, cb_op_div), + (plusop, 2, pyparsing.opAssoc.LEFT, cb_op_plusminus), + ]) -my_var_parser = ParseAst(ast_id2expr, ast_int2expr) -base_expr.setParseAction(my_var_parser) default_prio = 0x1337 @@ -431,13 +465,14 @@ class bs(object): self.cls = cls self.fname = fname self.order = order - self.lmask = lmask self.fbits = fbits self.fmask = fmask self.flen = flen self.value = value self.kargs = kargs + lmask = property(lambda self:(1 << self.l) - 1) + def __getitem__(self, item): return getattr(self, item) @@ -502,7 +537,6 @@ class bsi(object): self.cls = cls self.fname = fname self.order = order - self.lmask = lmask self.fbits = fbits self.fmask = fmask self.flen = flen @@ -510,6 +544,8 @@ class bsi(object): self.kargs = kargs self.__dict__.update(self.kargs) + lmask = property(lambda self:(1 << self.l) - 1) + def decode(self, v): self.value = v & self.lmask return True @@ -636,18 +672,23 @@ class bs_swapargs(bs_divert): class m_arg(object): - def fromstring(self, s, parser_result=None): + def fromstring(self, text, symbol_pool, parser_result=None): if parser_result: e, start, stop = parser_result[self.parser] self.expr = e return start, stop try: - v, start, stop = self.parser.scanString(s).next() + v, start, stop = self.parser.scanString(text).next() except StopIteration: return None, None - self.expr = v[0] + arg = v[0] + expr = self.asm_ast_to_expr(arg, symbol_pool) + self.expr = expr return start, stop + def asm_ast_to_expr(self, arg, symbol_pool): + raise NotImplementedError("Virtual") + class m_reg(m_arg): prio = default_prio @@ -668,16 +709,18 @@ class reg_noarg(object): reg_info = None parser = None - def fromstring(self, s, parser_result=None): + def fromstring(self, text, symbol_pool, parser_result=None): if parser_result: e, start, stop = parser_result[self.parser] self.expr = e return start, stop try: - v, start, stop = self.parser.scanString(s).next() + v, start, stop = self.parser.scanString(text).next() except StopIteration: return None, None - self.expr = v[0] + arg = v[0] + expr = self.parses_to_expr(arg, symbol_pool) + self.expr = expr return start, stop def decode(self, v): @@ -1232,11 +1275,11 @@ class cls_mn(object): return out[0] @classmethod - def fromstring(cls, s, mode = None): + def fromstring(cls, text, symbol_pool, mode = None): global total_scans - name = re.search('(\S+)', s).groups() + name = re.search('(\S+)', text).groups() if not name: - raise ValueError('cannot find name', s) + raise ValueError('cannot find name', text) name = name[0] if not name in cls.all_mn_name: @@ -1249,7 +1292,7 @@ class cls_mn(object): for cc in clist: for c in cls.get_cls_instance(cc, mode): args_expr = [] - args_str = s[len(name):].strip(' ') + args_str = text[len(name):].strip(' ') start = 0 cannot_parse = False @@ -1271,9 +1314,12 @@ class cls_mn(object): v, start, stop = [None], None, None if start != 0: v, start, stop = [None], None, None - parsers[(i, start_i)][p] = v[0], start, stop - - start, stop = f.fromstring(args_str, parsers[(i, start_i)]) + if v != [None]: + v = f.asm_ast_to_expr(v[0], symbol_pool) + if v is None: + v, start, stop = [None], None, None + parsers[(i, start_i)][p] = v, start, stop + start, stop = f.fromstring(args_str, symbol_pool, parsers[(i, start_i)]) if start != 0: log.debug("cannot fromstring %r", args_str) cannot_parse = True @@ -1296,7 +1342,7 @@ class cls_mn(object): break if len(out) == 0: - raise ValueError('cannot fromstring %r' % s) + raise ValueError('cannot fromstring %r' % text) if len(out) != 1: log.debug('fromstring multiple args ret default') c = out[0] @@ -1393,7 +1439,9 @@ class cls_mn(object): log.debug('cannot encode %r', f) can_encode = False break + if f.value is not None and f.l: + assert f.value <= f.lmask cur_len += f.l index += 1 if ret is True: @@ -1510,15 +1558,15 @@ class imm_noarg(object): return None return v - def fromstring(self, s, parser_result=None): + def fromstring(self, text, symbol_pool, parser_result=None): if parser_result: e, start, stop = parser_result[self.parser] else: try: - e, start, stop = self.parser.scanString(s).next() + e, start, stop = self.parser.scanString(text).next() except StopIteration: return None, None - if e is None: + if e == [None]: return None, None assert(isinstance(e, m2_expr.Expr)) @@ -1529,7 +1577,7 @@ class imm_noarg(object): else: raise TypeError('zarb expr') if self.expr is None: - log.debug('cannot fromstring int %r', s) + log.debug('cannot fromstring int %r', text) return None, None return start, stop @@ -1557,6 +1605,8 @@ class imm_noarg(object): v = self.encodeval(v) if v is False: return False + if v > self.lmask: + return False self.value = v return True diff --git a/miasm2/core/ctypesmngr.py b/miasm2/core/ctypesmngr.py index 4bd32cb3..e99e3829 100644 --- a/miasm2/core/ctypesmngr.py +++ b/miasm2/core/ctypesmngr.py @@ -36,6 +36,9 @@ class CTypeBase(object): def _typerepr(self): return self.__repr + def __ne__(self, other): + return not self.__eq__(other) + def eq_base(self, other): """Trivial common equality test""" return self.__class__ == other.__class__ diff --git a/miasm2/core/graph.py b/miasm2/core/graph.py index b74717da..ce17fc75 100644 --- a/miasm2/core/graph.py +++ b/miasm2/core/graph.py @@ -57,6 +57,9 @@ class DiGraph(object): return all((self._nodes == graph.nodes(), sorted(self._edges) == sorted(graph.edges()))) + def __ne__(self, other): + return not self.__eq__(other) + def add_node(self, node): """Add the node @node to the graph. If the node was already present, return False. diff --git a/miasm2/core/interval.py b/miasm2/core/interval.py index 019764d4..3fde83ad 100644 --- a/miasm2/core/interval.py +++ b/miasm2/core/interval.py @@ -120,6 +120,9 @@ class interval(object): def __eq__(self, i): return self.intervals == i.intervals + def __ne__(self, other): + return not self.__eq__(other) + def __add__(self, i): if isinstance(i, interval): i = i.intervals diff --git a/miasm2/core/objc.py b/miasm2/core/objc.py index 91ef6604..80f1e341 100644 --- a/miasm2/core/objc.py +++ b/miasm2/core/objc.py @@ -430,6 +430,9 @@ class CGen(object): return (self.__class__ == other.__class__ and self._ctype == other.ctype) + def __ne__(self, other): + return not self.__eq__(other) + def to_c(self): """Generate corresponding C""" diff --git a/miasm2/core/parse_asm.py b/miasm2/core/parse_asm.py index 8ea0f6b1..df419680 100644 --- a/miasm2/core/parse_asm.py +++ b/miasm2/core/parse_asm.py @@ -3,8 +3,8 @@ import re import miasm2.expression.expression as m2_expr import miasm2.core.asmblock as asmblock -from miasm2.core.cpu import gen_base_expr, ParseAst -from miasm2.core.cpu import instruction +from miasm2.core.cpu import instruction, base_expr +from miasm2.core.asm_ast import AstInt, AstId, AstMem, AstOp declarator = {'byte': 8, 'word': 16, @@ -101,6 +101,16 @@ STATE_NO_BLOC = 0 STATE_IN_BLOC = 1 +def asm_ast_to_expr_with_size(arg, symbol_pool, size): + if isinstance(arg, AstId): + return m2_expr.ExprId(arg.name, size) + if isinstance(arg, AstOp): + args = [asm_ast_to_expr_with_size(tmp, symbol_pool, size) for tmp in arg.args] + return m2_expr.ExprOp(arg.op, *args) + if isinstance(arg, AstInt): + return m2_expr.ExprInt(arg.value, size) + return None + def parse_txt(mnemo, attrib, txt, symbol_pool=None): """Parse an assembly listing. Returns a couple (blocks, symbol_pool), where blocks is a list of asm_bloc and symbol_pool the associated AsmSymbolPool @@ -168,16 +178,12 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): expr_list = [] # parser - base_expr = gen_base_expr()[2] - my_var_parser = ParseAst(lambda x: m2_expr.ExprId(x, size), - lambda x: - m2_expr.ExprInt(x, size)) - base_expr.setParseAction(my_var_parser) for element in data_raw: element = element.strip() - element_expr = base_expr.parseString(element)[0] - expr_list.append(element_expr.canonize()) + element_parsed = base_expr.parseString(element)[0] + element_expr = asm_ast_to_expr_with_size(element_parsed, symbol_pool, size) + expr_list.append(element_expr) raw_data = asmblock.AsmRaw(expr_list) raw_data.element_size = size @@ -216,7 +222,7 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): if ';' in line: line = line[:line.find(';')] line = line.strip(' ').strip('\t') - instr = mnemo.fromstring(line, attrib) + instr = mnemo.fromstring(line, symbol_pool, attrib) # replace orphan AsmLabel with labels from symbol_pool replace_orphan_labels(instr, symbol_pool) diff --git a/miasm2/core/types.py b/miasm2/core/types.py index 374fc804..b0aca07e 100644 --- a/miasm2/core/types.py +++ b/miasm2/core/types.py @@ -340,6 +340,9 @@ class Type(object): def __neq__(self, other): return not self == other + def __ne__(self, other): + return not self.__eq__(other) + class RawStruct(Type): """Dumb struct.pack/unpack field. Mainly used to factorize code. diff --git a/miasm2/expression/expression.py b/miasm2/expression/expression.py index f0491f1a..54cd5a2d 100644 --- a/miasm2/expression/expression.py +++ b/miasm2/expression/expression.py @@ -149,7 +149,6 @@ class Expr(object): __slots__ = ["_hash", "_repr", "_size"] - all_exprs = set() args2expr = {} canon_exprs = set() use_singleton = True @@ -543,6 +542,9 @@ class ExprId(Expr): return self.__class__, state def __new__(cls, name, size=None): + if size is None: + warnings.warn('DEPRECATION WARNING: size is a mandatory argument: use ExprId(name, SIZE)') + size = 32 return Expr.get_object(cls, (name, size)) def __str__(self): @@ -814,6 +816,10 @@ class ExprMem(Expr): return self.__class__, state def __new__(cls, arg, size=None): + if size is None: + warnings.warn('DEPRECATION WARNING: size is a mandatory argument: use ExprMem(arg, SIZE)') + size = 32 + return Expr.get_object(cls, (arg, size)) def __str__(self): diff --git a/miasm2/ir/ir.py b/miasm2/ir/ir.py index 64eb3463..1c6895e0 100644 --- a/miasm2/ir/ir.py +++ b/miasm2/ir/ir.py @@ -177,6 +177,9 @@ class AssignBlock(object): return False return all(other[dst] == src for dst, src in self.iteritems()) + def __ne__(self, other): + return not self.__eq__(other) + def __len__(self): return len(self._assigns) @@ -240,15 +243,18 @@ class AssignBlock(object): return m2_expr.ExprAff(dst, self[dst]) def simplify(self, simplifier): - """Return a new AssignBlock with expression simplified - @simplifier: ExpressionSimplifier instance""" + """ + Return a new AssignBlock with expression simplified + + @simplifier: ExpressionSimplifier instance + """ new_assignblk = {} for dst, src in self.iteritems(): if dst == src: continue - src = simplifier(src) - dst = simplifier(dst) - new_assignblk[dst] = src + new_src = simplifier(src) + new_dst = simplifier(dst) + new_assignblk[new_dst] = new_src return AssignBlock(irs=new_assignblk, instr=self.instr) @@ -687,12 +693,16 @@ class IntermediateRepresentation(object): Simplify expressions in each irblocks @simplifier: ExpressionSimplifier instance """ + modified = False for label, block in self.blocks.iteritems(): assignblks = [] for assignblk in block: new_assignblk = assignblk.simplify(simplifier) + if assignblk != new_assignblk: + modified = True assignblks.append(new_assignblk) self.blocks[label] = IRBlock(label, assignblks) + return modified def replace_expr_in_ir(self, bloc, rep): for assignblk in bloc: @@ -808,6 +818,10 @@ class IntermediateRepresentation(object): continue if not expr_is_label(assignblk[self.IRDst]): continue + dst = assignblk[self.IRDst].name + if dst == block.label: + # Infinite loop block + continue jmp_blocks.add(block.label) # Remove them, relink graph @@ -844,7 +858,7 @@ class IntermediateRepresentation(object): self.graph.add_uniq_edge(lbl, dst_label) modified = True if dst.src1 == dst.src2: - dst = src1 + dst = dst.src1 else: continue new_parent = parent.set_dst(dst) diff --git a/miasm2/ir/symbexec.py b/miasm2/ir/symbexec.py index 3cde2af7..4070f261 100644 --- a/miasm2/ir/symbexec.py +++ b/miasm2/ir/symbexec.py @@ -53,6 +53,9 @@ class SymbolicState(StateEngine): return False return self.symbols == other.symbols + def __ne__(self, other): + return not self.__eq__(other) + def __iter__(self): for dst, src in self._symbols: yield dst, src diff --git a/miasm2/ir/symbexec_top.py b/miasm2/ir/symbexec_top.py index 71837ed0..1e1e76e9 100644 --- a/miasm2/ir/symbexec_top.py +++ b/miasm2/ir/symbexec_top.py @@ -39,6 +39,9 @@ class SymbolicStateTop(StateEngine): return (self.symbols == other.symbols and self.regstop == other.regstop) + def __ne__(self, other): + return not self.__eq__(other) + def __iter__(self): for dst, src in self._symbols: yield dst, src diff --git a/miasm2/ir/translators/C.py b/miasm2/ir/translators/C.py index 95502a15..0e285669 100644 --- a/miasm2/ir/translators/C.py +++ b/miasm2/ir/translators/C.py @@ -94,12 +94,16 @@ class TranslatorC(Translator): self.from_expr(expr.args[0]), self.from_expr(expr.args[1]), size2mask(expr.args[0].size)) - elif (expr.op.startswith('cpuid') or - expr.op.startswith("fcom") or + elif expr.op == 'cpuid': + return "%s(%s, %s)" % (expr.op, + self.from_expr(expr.args[0]), + self.from_expr(expr.args[1])) + elif (expr.op.startswith("fcom") or expr.op in ["fadd", "fsub", "fdiv", 'fmul', "fscale", "fprem", "fprem_lsb", "fyl2x", "fpatan"]): - return "%s(%s, %s)" % (expr.op, self.from_expr(expr.args[0]), - self.from_expr(expr.args[1])) + return "fpu_%s(%s, %s)" % (expr.op, + self.from_expr(expr.args[0]), + self.from_expr(expr.args[1])) elif expr.op == "segm": return "segm2addr(jitcpu, %s, %s)" % ( self.from_expr(expr.args[0]), self.from_expr(expr.args[1])) diff --git a/miasm2/jitter/arch/JitCore_arm.c b/miasm2/jitter/arch/JitCore_arm.c index 6b167da5..cce0997d 100644 --- a/miasm2/jitter/arch/JitCore_arm.c +++ b/miasm2/jitter/arch/JitCore_arm.c @@ -31,6 +31,11 @@ reg_dict gpreg_dict[] = { {.name = "R0", .offset = offsetof(vm_cpu_t, R0)}, {.name = "nf", .offset = offsetof(vm_cpu_t, nf)}, {.name = "of", .offset = offsetof(vm_cpu_t, of)}, {.name = "cf", .offset = offsetof(vm_cpu_t, cf)}, + + {.name = "ge0", .offset = offsetof(vm_cpu_t, ge0)}, + {.name = "ge1", .offset = offsetof(vm_cpu_t, ge1)}, + {.name = "ge2", .offset = offsetof(vm_cpu_t, ge2)}, + {.name = "ge3", .offset = offsetof(vm_cpu_t, ge3)}, }; /************************** JitCpu object **************************/ @@ -65,6 +70,11 @@ PyObject* cpu_get_gpreg(JitCpu* self) get_reg(of); get_reg(cf); + get_reg(ge0); + get_reg(ge1); + get_reg(ge2); + get_reg(ge3); + return dict; } @@ -176,6 +186,16 @@ void check_automod(JitCpu* jitcpu, uint64_t addr, uint64_t size) } + +UDIV(32) + +UMOD(32) + +IDIV(32) + +IMOD(32) + + void MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src) { vm_MEM_WRITE_08(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); @@ -304,6 +324,11 @@ getset_reg_u32(nf); getset_reg_u32(of); getset_reg_u32(cf); +getset_reg_u32(ge0); +getset_reg_u32(ge1); +getset_reg_u32(ge2); +getset_reg_u32(ge3); + PyObject* get_gpreg_offset_all(void) { @@ -335,6 +360,11 @@ PyObject* get_gpreg_offset_all(void) get_reg_off(of); get_reg_off(cf); + get_reg_off(ge0); + get_reg_off(ge1); + get_reg_off(ge2); + get_reg_off(ge3); + return dict; } @@ -374,6 +404,11 @@ static PyGetSetDef JitCpu_getseters[] = { {"of", (getter)JitCpu_get_of, (setter)JitCpu_set_of, "of", NULL}, {"cf", (getter)JitCpu_get_cf, (setter)JitCpu_set_cf, "cf", NULL}, + {"ge0", (getter)JitCpu_get_ge0, (setter)JitCpu_set_ge0, "ge0", NULL}, + {"ge1", (getter)JitCpu_get_ge1, (setter)JitCpu_set_ge0, "ge1", NULL}, + {"ge2", (getter)JitCpu_get_ge2, (setter)JitCpu_set_ge0, "ge2", NULL}, + {"ge3", (getter)JitCpu_get_ge3, (setter)JitCpu_set_ge0, "ge3", NULL}, + {NULL} /* Sentinel */ }; diff --git a/miasm2/jitter/arch/JitCore_arm.h b/miasm2/jitter/arch/JitCore_arm.h index 66d17604..445ac44a 100644 --- a/miasm2/jitter/arch/JitCore_arm.h +++ b/miasm2/jitter/arch/JitCore_arm.h @@ -26,12 +26,25 @@ typedef struct { uint32_t of; uint32_t cf; + /* ge */ + uint32_t ge0; + uint32_t ge1; + uint32_t ge2; + uint32_t ge3; + uint32_t bp_num; }vm_cpu_t; void dump_gpregs(vm_cpu_t* vmcpu); + +uint32_t udiv32(vm_cpu_t* vmcpu, uint32_t a, uint32_t b); +uint32_t umod32(vm_cpu_t* vmcpu, uint32_t a, uint32_t b); +int32_t idiv32(vm_cpu_t* vmcpu, int32_t a, int32_t b); +int32_t imod32(vm_cpu_t* vmcpu, int32_t a, int32_t b); + + #define RETURN_PC return BlockDst; uint32_t clz(uint32_t arg); diff --git a/miasm2/jitter/arch/JitCore_mips32.c b/miasm2/jitter/arch/JitCore_mips32.c index 19b24f1f..7722c055 100644 --- a/miasm2/jitter/arch/JitCore_mips32.c +++ b/miasm2/jitter/arch/JitCore_mips32.c @@ -220,6 +220,26 @@ void check_automod(JitCpu* jitcpu, uint64_t addr, uint64_t size) } + +UDIV(16) +UDIV(32) +UDIV(64) + +UMOD(16) +UMOD(32) +UMOD(64) + + +IDIV(16) +IDIV(32) +IDIV(64) + +IMOD(16) +IMOD(32) +IMOD(64) + + + void MEM_WRITE_08(JitCpu* jitcpu, uint64_t addr, uint8_t src) { vm_MEM_WRITE_08(&((VmMngr*)jitcpu->pyvm)->vm_mngr, addr, src); diff --git a/miasm2/jitter/arch/JitCore_mips32.h b/miasm2/jitter/arch/JitCore_mips32.h index ff2045d7..b45cac2b 100644 --- a/miasm2/jitter/arch/JitCore_mips32.h +++ b/miasm2/jitter/arch/JitCore_mips32.h @@ -335,4 +335,19 @@ typedef struct { void dump_gpregs(vm_cpu_t* vmcpu); +uint64_t udiv64(vm_cpu_t* vmcpu, uint64_t a, uint64_t b); +uint64_t umod64(vm_cpu_t* vmcpu, uint64_t a, uint64_t b); +int64_t idiv64(vm_cpu_t* vmcpu, int64_t a, int64_t b); +int64_t imod64(vm_cpu_t* vmcpu, int64_t a, int64_t b); + +uint32_t udiv32(vm_cpu_t* vmcpu, uint32_t a, uint32_t b); +uint32_t umod32(vm_cpu_t* vmcpu, uint32_t a, uint32_t b); +int32_t idiv32(vm_cpu_t* vmcpu, int32_t a, int32_t b); +int32_t imod32(vm_cpu_t* vmcpu, int32_t a, int32_t b); + +uint16_t udiv16(vm_cpu_t* vmcpu, uint16_t a, uint16_t b); +uint16_t umod16(vm_cpu_t* vmcpu, uint16_t a, uint16_t b); +int16_t idiv16(vm_cpu_t* vmcpu, int16_t a, int16_t b); +int16_t imod16(vm_cpu_t* vmcpu, int16_t a, int16_t b); + #define RETURN_PC return BlockDst; diff --git a/miasm2/jitter/codegen.py b/miasm2/jitter/codegen.py index 9ed55f37..2c546be8 100644 --- a/miasm2/jitter/codegen.py +++ b/miasm2/jitter/codegen.py @@ -572,7 +572,7 @@ class CGen(object): irblocks_list = self.block2assignblks(block) out, instr_offsets = self.gen_init(block) - + assert len(block.lines) == len(irblocks_list) for instr, irblocks in zip(block.lines, irblocks_list): instr_attrib, irblocks_attributes = self.get_attributes(instr, irblocks, log_mn, log_regs) diff --git a/miasm2/jitter/jitload.py b/miasm2/jitter/jitload.py index 499d6c47..b2e16c1d 100644 --- a/miasm2/jitter/jitload.py +++ b/miasm2/jitter/jitload.py @@ -156,6 +156,9 @@ class ExceptionHandle(): return False return (self.except_flag == to_cmp.except_flag) + def __ne__(self, to_cmp): + return not self.__eq__(to_cmp) + class jitter(object): @@ -182,6 +185,9 @@ class jitter(object): from miasm2.jitter.arch import JitCore_x86 as jcore elif arch_name == "arm": from miasm2.jitter.arch import JitCore_arm as jcore + elif arch_name == "armt": + from miasm2.jitter.arch import JitCore_arm as jcore + ir_arch.arch.name = 'arm' elif arch_name == "aarch64": from miasm2.jitter.arch import JitCore_aarch64 as jcore elif arch_name == "msp430": @@ -277,6 +283,13 @@ class jitter(object): self.breakpoints_handler.set_callback(addr, *args) self.jit.add_disassembly_splits(addr) + def get_breakpoint(self, addr): + """ + Return breakpoints handlers for address @addr + @addr: integer + """ + return self.breakpoints_handler.get_callbacks(addr) + def remove_breakpoints_by_callback(self, callback): """Remove callbacks associated with breakpoint. @callback: callback to remove diff --git a/miasm2/jitter/llvmconvert.py b/miasm2/jitter/llvmconvert.py index 0e4368a8..35db1538 100644 --- a/miasm2/jitter/llvmconvert.py +++ b/miasm2/jitter/llvmconvert.py @@ -180,6 +180,10 @@ class LLVMContext_JIT(LLVMContext): from miasm2.arch.mips32.jit import mipsCGen self.cgen_class = mipsCGen self.has_delayslot = True + elif arch.name == "arm": + from miasm2.arch.arm.jit import arm_CGen + self.cgen_class = arm_CGen + self.has_delayslot = False else: self.cgen_class = CGen self.has_delayslot = False diff --git a/miasm2/jitter/vm_mngr.c b/miasm2/jitter/vm_mngr.c index 8e50a6dd..c39a08da 100644 --- a/miasm2/jitter/vm_mngr.c +++ b/miasm2/jitter/vm_mngr.c @@ -1027,7 +1027,7 @@ int64_t double_to_int_64(double d) } -double fadd(double a, double b) +double fpu_fadd(double a, double b) { double c; c = a + b; @@ -1038,7 +1038,7 @@ double fadd(double a, double b) return c; } -double fsub(double a, double b) +double fpu_fsub(double a, double b) { double c; c = a - b; @@ -1049,7 +1049,7 @@ double fsub(double a, double b) return c; } -double fmul(double a, double b) +double fpu_fmul(double a, double b) { double c; c = a * b; @@ -1060,7 +1060,7 @@ double fmul(double a, double b) return c; } -double fdiv(double a, double b) +double fpu_fdiv(double a, double b) { double c; c = a / b; @@ -1071,7 +1071,7 @@ double fdiv(double a, double b) return c; } -double ftan(double a) +double fpu_ftan(double a) { double b; b = tan(a); @@ -1082,7 +1082,7 @@ double ftan(double a) return b; } -double frndint(double a) +double fpu_frndint(double a) { int64_t b; double c; @@ -1095,7 +1095,7 @@ double frndint(double a) return c; } -double fsin(double a) +double fpu_fsin(double a) { double b; b = sin(a); @@ -1106,7 +1106,7 @@ double fsin(double a) return b; } -double fcos(double a) +double fpu_fcos(double a) { double b; b = cos(a); @@ -1118,7 +1118,7 @@ double fcos(double a) } -double fscale(double a, double b) +double fpu_fscale(double a, double b) { double c; c = a * exp2(trunc(b)); @@ -1129,7 +1129,7 @@ double fscale(double a, double b) return c; } -double f2xm1(double a) +double fpu_f2xm1(double a) { double b; b = exp2(a)-1; @@ -1140,7 +1140,7 @@ double f2xm1(double a) return b; } -double fsqrt(double a) +double fpu_fsqrt(double a) { double b; b = sqrt(a); @@ -1151,7 +1151,7 @@ double fsqrt(double a) return b; } -double fabs(double a) +double fpu_fabs(double a) { double b; b = abs(a); @@ -1162,7 +1162,7 @@ double fabs(double a) return b; } -double fprem(double a, double b) +double fpu_fprem(double a, double b) { double c; c = fmod(a, b); @@ -1173,7 +1173,7 @@ double fprem(double a, double b) return c; } -unsigned int fprem_lsb(double a, double b) +unsigned int fpu_fprem_lsb(double a, double b) { // Inspired from qemu/fpu_helper.c double c; @@ -1196,7 +1196,7 @@ unsigned int fprem_lsb(double a, double b) return q; } -double fchs(double a) +double fpu_fchs(double a) { double b; b = -a; @@ -1207,7 +1207,7 @@ double fchs(double a) return b; } -double fyl2x(double a, double b) +double fpu_fyl2x(double a, double b) { double c; c = b * (log(a) / log(2)); @@ -1218,7 +1218,7 @@ double fyl2x(double a, double b) return c; } -double fpatan(double a, double b) +double fpu_fpatan(double a, double b) { double c; c = atan2(b, a); @@ -1229,7 +1229,7 @@ double fpatan(double a, double b) return c; } -unsigned int fcom_c0(double a, double b) +unsigned int fpu_fcom_c0(double a, double b) { if (isnan(a) || isnan(b)) return 1; @@ -1237,18 +1237,18 @@ unsigned int fcom_c0(double a, double b) return 0; return 1; } -unsigned int fcom_c1(double a, double b) +unsigned int fpu_fcom_c1(double a, double b) { //XXX return 0; } -unsigned int fcom_c2(double a, double b) +unsigned int fpu_fcom_c2(double a, double b) { if (isnan(a) || isnan(b)) return 1; return 0; } -unsigned int fcom_c3(double a, double b) +unsigned int fpu_fcom_c3(double a, double b) { if (isnan(a) || isnan(b)) return 1; @@ -1257,7 +1257,7 @@ unsigned int fcom_c3(double a, double b) return 0; } -unsigned int fxam_c0(double a) +unsigned int fpu_fxam_c0(double a) { switch(fpclassify(a)) { case FP_NAN: @@ -1277,14 +1277,14 @@ unsigned int fxam_c0(double a) } } -unsigned int fxam_c1(double a) +unsigned int fpu_fxam_c1(double a) { if ((a < 0) || isnan(a)) return 1; return 0; } -unsigned int fxam_c2(double a) +unsigned int fpu_fxam_c2(double a) { switch(fpclassify(a)) { case FP_NAN: @@ -1304,7 +1304,7 @@ unsigned int fxam_c2(double a) } } -unsigned int fxam_c3(double a) +unsigned int fpu_fxam_c3(double a) { switch(fpclassify(a)) { case FP_NAN: diff --git a/miasm2/jitter/vm_mngr.h b/miasm2/jitter/vm_mngr.h index 8528eb3f..f050f7c0 100644 --- a/miasm2/jitter/vm_mngr.h +++ b/miasm2/jitter/vm_mngr.h @@ -20,7 +20,7 @@ #ifdef __APPLE__ #define __BYTE_ORDER __BYTE_ORDER__ -#elif defined(__NetBSD__) +#elif defined(__NetBSD__) || defined(__OpenBSD__) #define __BYTE_ORDER _BYTE_ORDER #define __BIG_ENDIAN _BIG_ENDIAN #define __LITTLE_ENDIAN _LITTLE_ENDIAN @@ -355,31 +355,31 @@ void func_getproc_fake(void); unsigned int cpuid(unsigned int a, unsigned int reg_num); double int2double(unsigned int m); -double fadd(double a, double b); -double fsub(double a, double b); -double fmul(double a, double b); -double fdiv(double a, double b); -double ftan(double a); -double frndint(double a); -double fsin(double a); -double fcos(double a); -double fscale(double a, double b); -double f2xm1(double a); -double fsqrt(double a); -double fabs(double a); -double fprem(double a, double b); -double fchs(double a); -double fyl2x(double a, double b); -double fpatan(double a, double b); -unsigned int fprem_lsb(double a, double b); -unsigned int fcom_c0(double a, double b); -unsigned int fcom_c1(double a, double b); -unsigned int fcom_c2(double a, double b); -unsigned int fcom_c3(double a, double b); -unsigned int fxam_c0(double a); -unsigned int fxam_c1(double a); -unsigned int fxam_c2(double a); -unsigned int fxam_c3(double a); +double fpu_fadd(double a, double b); +double fpu_fsub(double a, double b); +double fpu_fmul(double a, double b); +double fpu_fdiv(double a, double b); +double fpu_ftan(double a); +double fpu_frndint(double a); +double fpu_fsin(double a); +double fpu_fcos(double a); +double fpu_fscale(double a, double b); +double fpu_f2xm1(double a); +double fpu_fsqrt(double a); +double fpu_fabs(double a); +double fpu_fprem(double a, double b); +double fpu_fchs(double a); +double fpu_fyl2x(double a, double b); +double fpu_fpatan(double a, double b); +unsigned int fpu_fprem_lsb(double a, double b); +unsigned int fpu_fcom_c0(double a, double b); +unsigned int fpu_fcom_c1(double a, double b); +unsigned int fpu_fcom_c2(double a, double b); +unsigned int fpu_fcom_c3(double a, double b); +unsigned int fpu_fxam_c0(double a); +unsigned int fpu_fxam_c1(double a); +unsigned int fpu_fxam_c2(double a); +unsigned int fpu_fxam_c3(double a); double mem_32_to_double(unsigned int m); @@ -390,7 +390,6 @@ double int_64_to_double(uint64_t m); int16_t double_to_int_16(double d); int32_t double_to_int_32(double d); int64_t double_to_int_64(double d); -double fadd(double a, double b); unsigned int double_to_mem_32(double d); uint64_t double_to_mem_64(double d); diff --git a/miasm2/os_dep/win_api_x86_32.py b/miasm2/os_dep/win_api_x86_32.py index c90d7939..a88f4a8a 100644 --- a/miasm2/os_dep/win_api_x86_32.py +++ b/miasm2/os_dep/win_api_x86_32.py @@ -1024,22 +1024,20 @@ def kernel32_IsWow64Process(jitter): jitter.func_ret_stdcall(ret_ad, 1) -def kernel32_GetCommandLineA(jitter): +def kernel32_GetCommandLine(jitter, set_str): ret_ad, _ = jitter.func_args_stdcall(0) - s = winobjs.module_path + '\x00' - s = '"%s"' % s alloc_addr = winobjs.heap.alloc(jitter, 0x1000) + s = set_str('"%s"' % winobjs.module_path) jitter.vm.set_mem(alloc_addr, s) jitter.func_ret_stdcall(ret_ad, alloc_addr) +def kernel32_GetCommandLineA(jitter): + kernel32_GetCommandLine(jitter, set_str_ansi) + + def kernel32_GetCommandLineW(jitter): - ret_ad, _ = jitter.func_args_stdcall(0) - s = winobjs.module_path + '\x00' - s = jitter.set_str_unic('"%s"' % s) - alloc_addr = winobjs.heap.alloc(jitter, 0x1000) - jitter.vm.set_mem(alloc_addr, s) - jitter.func_ret_stdcall(ret_ad, alloc_addr) + kernel32_GetCommandLine(jitter, set_str_unic) def shell32_CommandLineToArgvW(jitter): @@ -1051,10 +1049,9 @@ def shell32_CommandLineToArgvW(jitter): addr_ret = winobjs.heap.alloc(jitter, 4 * (len(tks) + 1)) o = 0 for i, t in enumerate(tks): - x = jitter.set_str_unic(t) + "\x00\x00" + jitter.set_str_unic(addr + o, t) jitter.vm.set_mem(addr_ret + 4 * i, pck32(addr + o)) - jitter.vm.set_mem(addr + o, x) - o += len(x) + 2 + o += len(t)*2 + 2 jitter.vm.set_mem(addr_ret + 4 * i, pck32(0)) jitter.vm.set_mem(args.pnumargs, pck32(len(tks))) @@ -1442,8 +1439,7 @@ def my_strcpy(jitter, funcname, get_str, set_str): def kernel32_lstrcpyW(jitter): - my_strcpy(jitter, whoami(), jitter.get_str_unic, - jitter.set_str_unic) + my_strcpy(jitter, whoami(), jitter.get_str_unic, jitter.set_str_unic) def kernel32_lstrcpyA(jitter): @@ -1591,6 +1587,20 @@ def my_GetEnvironmentVariable(jitter, funcname, get_str, set_str, mylen): jitter.func_ret_stdcall(ret_ad, mylen(v)) +def kernel32_GetEnvironmentVariableA(jitter): + my_GetEnvironmentVariable(jitter, whoami(), + jitter.get_str_ansi, + jitter.set_str_ansi, + len) + + +def kernel32_GetEnvironmentVariableW(jitter): + my_GetEnvironmentVariable(jitter, whoami(), + jitter.get_str_unic, + jitter.set_str_ansi, + len) + + def my_GetSystemDirectory(jitter, funcname, set_str): ret_ad, args = jitter.func_args_stdcall(["lpbuffer", "usize"]) s = "c:\\windows\\system32" @@ -1599,6 +1609,7 @@ def my_GetSystemDirectory(jitter, funcname, set_str): jitter.func_ret_stdcall(ret_ad, l) + def kernel32_GetSystemDirectoryA(jitter): my_GetSystemDirectory(jitter, whoami(), jitter.set_str_ansi) @@ -1621,19 +1632,6 @@ def kernel32_CreateDirectoryA(jitter): my_CreateDirectory(jitter, whoami(), jitter.get_str_ansi) -def kernel32_GetEnvironmentVariableA(jitter): - my_GetEnvironmentVariable(jitter, whoami(), - jitter.get_str_ansi, - jitter.set_str_ansi, - len) - - -def kernel32_GetEnvironmentVariableW(jitter): - my_GetEnvironmentVariable(jitter, whoami(), - jitter.get_str_unic, - jitter.set_str_ansi, - len) - def my_CreateEvent(jitter, funcname, get_str): ret_ad, args = jitter.func_args_stdcall(["lpeventattributes", diff --git a/test/arch/aarch64/arch.py b/test/arch/aarch64/arch.py index 8364fcf1..a6aa7ba5 100644 --- a/test/arch/aarch64/arch.py +++ b/test/arch/aarch64/arch.py @@ -1,6 +1,10 @@ import sys import time +from pdb import pm from miasm2.arch.aarch64.arch import * +from miasm2.core.asmblock import AsmSymbolPool + +symbol_pool = AsmSymbolPool() reg_tests_aarch64 = [ ("XXXXXXXX MOV W1, WZR", @@ -1810,7 +1814,7 @@ for s, l in reg_tests_aarch64[:]: print s print mn assert(str(mn) == s) - l = mn_aarch64.fromstring(s, 'l') + l = mn_aarch64.fromstring(s, symbol_pool, 'l') assert(str(l) == s) a = mn_aarch64.asm(l) print [x for x in a] diff --git a/test/arch/aarch64/unit/asm_test.py b/test/arch/aarch64/unit/asm_test.py index ddb8a08c..ca27ef9d 100644 --- a/test/arch/aarch64/unit/asm_test.py +++ b/test/arch/aarch64/unit/asm_test.py @@ -1,7 +1,6 @@ import sys import os -from miasm2.core.cpu import ParseAst from miasm2.arch.aarch64.arch import mn_aarch64, base_expr, variable from miasm2.core import parse_asm from miasm2.expression.expression import * diff --git a/test/arch/arm/arch.py b/test/arch/arm/arch.py index 3ba2dbd4..90d137d0 100644 --- a/test/arch/arm/arch.py +++ b/test/arch/arm/arch.py @@ -1,6 +1,10 @@ import time from miasm2.arch.arm.arch import * +from miasm2.core.asmblock import AsmSymbolPool +from pdb import pm + +symbol_pool = AsmSymbolPool() if 0: a = bs('00') b = bs('01') @@ -266,7 +270,7 @@ for s, l in reg_tests_arm: assert(str(mn) == s) # print hex(b) # print [str(x.get()) for x in mn.args] - l = mn_arm.fromstring(s, 'l') + l = mn_arm.fromstring(s, symbol_pool, 'l') # print l assert(str(l) == s) a = mn_arm.asm(l) @@ -314,6 +318,9 @@ reg_tests_armt = [ # adcs # sbcs # rors + ("003258b6 RORS R3, R2", + "D341"), + ("0017b754 TST R0, R2", "1042"), ("0006e3fc NEGS R5, R5", @@ -401,6 +408,8 @@ reg_tests_armt = [ "0cb0"), ("0006c1b0 SUB SP, 0x18", "86b0"), + ("0006ff5c SUB SP, SP, 0x670", + "ADF5CE6D"), ("0006aeee POP {R4, PC}", @@ -435,7 +444,7 @@ reg_tests_armt = [ "f2e7"), ("C010163C BLX 0x1F916C", "F9F1B6E8"), - ("C01015E8 BL 0x1F8D5C", + ("C01015E8 BL 0x1F8D60", "F8F1AEFE"), @@ -489,6 +498,215 @@ reg_tests_armt = [ ("xxxxxxxx WFI ", "30bf"), + + ("xxxxxxxx PUSH {R4-R8, LR}", + "2DE9F041"), + ("xxxxxxxx POP {R4-R8, PC}", + "BDE8F081"), + ("xxxxxxxx MOV R12, 0x3", + "4FF0030C"), + ("xxxxxxxx MOVS R12, 0x3", + "5FF0030C"), + ("xxxxxxxx ASR R7, R3, R6", + "43FA06F7"), + ("xxxxxxxx LSL LR, R12, R7", + "0CFA07FE"), + ("xxxxxxxx MVN LR, LR", + "6FEA0E0E"), + ("xxxxxxxx AND R5, LR, R5", + "0EEA0505"), + ("xxxxxxxx ORR R5, R8, R5", + "48EA0505"), + ("xxxxxxxx ORR R5, R8, R5", + "48EA0505"), + ("xxxxxxxx MOV R0, 0x600", + "4FF4C060"), + ("xxxxxxxx MOV R0, 0x811", + "40F61100"), + ("xxxxxxxx MOV R1, R1 LSL 0x10", + "4FEA0141"), + + + ("xxxxxxxx ADD R1, R4, 0x30", + "04F13001"), + + ("xxxxxxxx SDIV R3, R5, R2", + "95FBF2F3"), + + ("xxxxxxxx MLS R5, R2, R3, R5", + "02FB1355"), + + ("xxxxxxxx SMLABB R2, R2, R3, R1", + "12FB0312"), + ("xxxxxxxx SMLABT R2, R2, R3, R1", + "12FB1312"), + ("xxxxxxxx SMLATB R2, R2, R3, R1", + "12FB2312"), + ("xxxxxxxx SMLATT R2, R2, R3, R1", + "12FB3312"), + + ("xxxxxxxx BIC R1, R1, R3", + "21EA0301"), + ("xxxxxxxx BIC R4, R0, 0x400", + "20F48064"), + ("xxxxxxxx ADD R3, R1, R3 LSL 0x1", + "01EB4303"), + ("xxxxxxxx SUB R3, R0, 0x22", + "A0F12203"), + ("xxxxxxxx UDIV R3, R3, R1", + "B3FBF1F3"), + ("xxxxxxxx MLA R2, R6, R3, R2", + "06FB0322"), + + ("xxxxxxxx SUB LR, R3, R2", + "A3EB020E"), + + ("xxxxxxxx ADD R3, R3, 0x908", + "03F60813"), + + ("xxxxxxxx ADD R3, R3, 0x23800", + "03F50E33"), + + + + + ("xxxxxxxx B 0x4", + "00F000B8"), + #("xxxxxxxx BEQ 0x4", + # "00F000A8"), + ("xxxxxxxx BEQ 0x1D4", + "00F0E880"), + + + ("xxxxxxxx UBFX R1, R1, 0x0, 0x9", + "C1F30801"), + ("xxxxxxxx UXTH R9, R8", + "1FFA88F9"), + + ("xxxxxxxx AND R2, R0, 0x1F", + "00F01F02"), + ("xxxxxxxx RSB R3, R3, 0x4", + "C3F10403"), + ("xxxxxxxx RSB R9, R9, R9 LSL 0x4", + "C9EB0919"), + + + ("xxxxxxxx ITT EQ", + "04BF"), + ("xxxxxxxx ITE EQ", + "0CBF"), + ("xxxxxxxx ITT HI", + "84BF"), + ("xxxxxxxx ITTT LT", + "BEBF"), + ("xxxxxxxx ITE NE", + "14BF"), + + ("xxxxxxxx STR R5, [R0, 0xDC]", + "C0F8DC50"), + ("xxxxxxxx STRB R1, [R5, 0x4C]", + "85F84C10"), + ("xxxxxxxx STRB R2, [R3], 0x1", + "03F8012B"), + ("xxxxxxxx STRH R3, [R0, 0xE0]", + "A0F8E030"), + ("xxxxxxxx STRH R3, [R0], 0x2", + "20F8023B"), + + + ("xxxxxxxx LDR R3, [R0, 0xDC]", + "D0F8DC30"), + ("xxxxxxxx LDR R4, [SP], 0x4", + "5DF8044B"), + ("xxxxxxxx LDRH R3, [SP, 0x20]", + "BDF82030"), + + ("xxxxxxxx LDRB R3, [R3, 0xFFFFFFF8]", + "13F8083C"), + ("xxxxxxxx LDRB R2, [R3, 0x30]", + "93F83020"), + ("xxxxxxxx LDRB R5, [R8, R6]", + "18F80650"), + ("xxxxxxxx LDR R3, [R4, R3 LSL 0x2]", + "54F82330"), + ("xxxxxxxx LDRSB R2, [R4, 0x30]", + "94F93020"), + ("xxxxxxxx LDRH R3, [R1], 0x2", + "31F8023B"), + ("xxxxxxxx LDRH R9, [SP, 0x14]", + "BDF81490"), + + ("xxxxxxxx STR R3, [R2, 0xFFFFFFE4]", + "42F81C3C"), + + + + ("xxxxxxxx STR R1, [R0, R3 LSL 0x2]", + "40F82310"), + + ("xxxxxxxx CLZ R3, R3", + "B3FA83F3"), + + ("xxxxxxxx MOV R0, 0x603", + "40F20360"), + ("xxxxxxxx TBB [PC, R0]", + "DFE800F0"), + ("xxxxxxxx TBH [PC, R0 LSL 0x1]", + "DFE810F0"), + + + ("xxxxxxxx STRD R5, R5, [R2, 0xFFFFFFF0]", + "42E90455"), + + ("xxxxxxxx MOV R3, R3 ROR 0x19", + "4FEA7363"), + ("xxxxxxxx MOV R5, R5 LSL 0x3", + "4FEAC505"), + + + ("xxxxxxxx SUB R3, R3, 0x6BE", + "A3F2BE63"), + + ("xxxxxxxx PLD [R0]", + "90F800F0"), + + ("xxxxxxxx LDRD R2, R3, [R1]", + "D1E90023"), + + ("xxxxxxxx TST R4, 0x4", + "14F0040F"), + + ("xxxxxxxx ORN R2, R2, R5", + "62EA0502"), + + ("xxxxxxxx UADD8 R2, R2, R12", + "82FA4CF2"), + + ("xxxxxxxx SEL R2, R4, R12", + "A4FA8CF2"), + + ("xxxxxxxx REV R2, R2", + "12BA"), + + ("xxxxxxxx ADD R8, SP, 0xC8", + "0DF1C808"), + + ("xxxxxxxx CMP R9, 0x80", + "B9F1800F"), + + ("xxxxxxxx MUL R2, R1, R2", + "01FB02F2"), + + ("xxxxxxxx LDRSH R3, [R4, 0xC]", + "B4F90C30"), + + ("xxxxxxxx EOR R3, R3, R1", + "83EA0103"), + + ("xxxxxxxx DSB SY", + "bff34f8f"), + + ] print "#" * 40, 'armthumb', '#' * 40 @@ -504,9 +722,10 @@ for s, l in reg_tests_armt: assert(str(mn) == s) # print hex(b) # print [str(x.get()) for x in mn.args] - l = mn_armt.fromstring(s, 'l') + l = mn_armt.fromstring(s, symbol_pool, 'l') # print l assert(str(l) == s) + print 'Asm..', l a = mn_armt.asm(l) print [x for x in a] print repr(b) diff --git a/test/arch/arm/sem.py b/test/arch/arm/sem.py index 1b14214e..d9e6aa76 100755 --- a/test/arch/arm/sem.py +++ b/test/arch/arm/sem.py @@ -9,11 +9,13 @@ from miasm2.arch.arm.arch import mn_arm as mn from miasm2.arch.arm.sem import ir_arml as ir_arch from miasm2.arch.arm.regs import * from miasm2.expression.expression import * +from miasm2.core.asmblock import AsmSymbolPool from pdb import pm logging.getLogger('cpuhelper').setLevel(logging.ERROR) EXCLUDE_REGS = set([ir_arch().IRDst]) +symbol_pool = AsmSymbolPool() def M(addr): return ExprMem(ExprInt(addr, 16), 16) @@ -24,7 +26,7 @@ def compute(asm, inputstate={}, debug=False): sympool.update({k: ExprInt(v, k.size) for k, v in inputstate.iteritems()}) interm = ir_arch() symexec = SymbolicExecutionEngine(interm, sympool) - instr = mn.fromstring(asm, "l") + instr = mn.fromstring(asm, symbol_pool, "l") code = mn.asm(instr)[0] instr = mn.dis(code, "l") instr.offset = inputstate.get(PC, 0) diff --git a/test/arch/mips32/arch.py b/test/arch/mips32/arch.py index b28e2583..c6b68c0c 100644 --- a/test/arch/mips32/arch.py +++ b/test/arch/mips32/arch.py @@ -1,8 +1,11 @@ import time from pdb import pm +from miasm2.core.asmblock import AsmSymbolPool from miasm2.arch.mips32.arch import * +symbol_pool = AsmSymbolPool() + reg_tests_mips32 = [ ("004496D8 ADDU GP, GP, T9", "0399E021"), @@ -227,7 +230,7 @@ for s, l in reg_tests_mips32: assert(str(mn) == s) # print hex(b) # print [str(x.get()) for x in mn.args] - l = mn_mips32.fromstring(s, 'b') + l = mn_mips32.fromstring(s, symbol_pool, 'b') # print l assert(str(l) == s) a = mn_mips32.asm(l, 'b') diff --git a/test/arch/mips32/unit/asm_test.py b/test/arch/mips32/unit/asm_test.py index 9281f1b6..f03a32d7 100644 --- a/test/arch/mips32/unit/asm_test.py +++ b/test/arch/mips32/unit/asm_test.py @@ -1,8 +1,7 @@ import sys import os -from miasm2.core.cpu import ParseAst -from miasm2.arch.mips32.arch import mn_mips32, base_expr, variable +from miasm2.arch.mips32.arch import mn_mips32 from miasm2.core import parse_asm from miasm2.expression.expression import * from miasm2.core import asmblock diff --git a/test/arch/msp430/arch.py b/test/arch/msp430/arch.py index b3dbac82..3df2becb 100644 --- a/test/arch/msp430/arch.py +++ b/test/arch/msp430/arch.py @@ -1,6 +1,9 @@ import time +from pdb import pm from miasm2.arch.msp430.arch import * +from miasm2.core.asmblock import AsmSymbolPool +symbol_pool = AsmSymbolPool() def h2i(s): return s.replace(' ', '').decode('hex') @@ -94,7 +97,7 @@ for s, l in reg_tests_msp: assert(str(mn) == s) # print hex(b) # print [str(x.get()) for x in mn.args] - l = mn_msp430.fromstring(s, None) + l = mn_msp430.fromstring(s, symbol_pool, None) # print l assert(str(l) == s) a = mn_msp430.asm(l) diff --git a/test/arch/sh4/arch.py b/test/arch/sh4/arch.py index 4d173add..574dcf49 100644 --- a/test/arch/sh4/arch.py +++ b/test/arch/sh4/arch.py @@ -1,6 +1,10 @@ import time +from pdb import pm from sys import stderr from miasm2.arch.sh4.arch import * +from miasm2.core.asmblock import AsmSymbolPool + +symbol_pool = AsmSymbolPool() def h2i(s): return s.replace(' ', '').decode('hex') @@ -396,7 +400,7 @@ for s, l in reg_tests_sh4: assert(str(mn) == s) # print hex(b) # print [str(x.get()) for x in mn.args] - l = mn_sh4.fromstring(s, None) + l = mn_sh4.fromstring(s, symbol_pool, None) # print l assert(str(l) == s) a = mn_sh4.asm(l) diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py index 7b834e2c..05b31815 100644 --- a/test/arch/x86/arch.py +++ b/test/arch/x86/arch.py @@ -1,19 +1,13 @@ import time +from pdb import pm import miasm2.expression.expression as m2_expr -from miasm2.arch.x86.arch import mn_x86, deref_mem_ad, ParseAst, ast_int2expr, \ +from miasm2.arch.x86.arch import mn_x86, deref_mem_ad, \ base_expr, rmarg, print_size from miasm2.arch.x86.sem import ir_x86_16, ir_x86_32, ir_x86_64 from miasm2.core.bin_stream import bin_stream_str +from miasm2.core.asmblock import AsmSymbolPool -for s in ["[EAX]", - "[0x10]", - "[EBX + 0x10]", - "[EBX + ECX*0x10]", - "[EBX + ECX*0x10 + 0x1337]"]: - (e, a, b) = deref_mem_ad.scanString(s).next() - print 'expr', e[0] - -print '---' +symbol_pool = AsmSymbolPool() mylabel16 = m2_expr.ExprId('mylabel16', 16) mylabel32 = m2_expr.ExprId('mylabel32', 32) @@ -26,32 +20,6 @@ reg_and_id.update({'mylabel16': mylabel16, }) -def my_ast_id2expr(t): - r = reg_and_id.get(t, m2_expr.ExprId(t, size=32)) - return r - -my_var_parser = ParseAst(my_ast_id2expr, ast_int2expr) -base_expr.setParseAction(my_var_parser) - -for s in ['EAX', - "BYTE PTR [EAX]", - "WORD PTR [EAX]", - "DWORD PTR [ECX+0x1337]", - "QWORD PTR [RAX+4*RCX + 0x1337]", - "DWORD PTR [EAX+EBX]", - "QWORD PTR [RAX+RBX+0x55667788]", - "BYTE PTR CS:[EAX]", - "QWORD PTR [RAX+RBX+mylabel64]", - "BYTE PTR [RAX+RBX+mylabel64]", - "BYTE PTR [AX+BX+mylabel16]", - "BYTE PTR [mylabel32]", - ]: - print '*' * 80 - print s - (e, a, b) = rmarg.scanString(s).next() - print 'expr', e[0] - e[0].visit(print_size) - def h2i(s): return int(s.replace(' ', '').decode('hex')[::].encode('hex'), 16) @@ -3080,27 +3048,9 @@ reg_tests = [ ] - # mode = 64 - # l = mn_x86.dis('\x4D\x11\x7c\x18\x00', mode) - # print l - #""" - # mode = 64 - # l = mn_x86.fromstring("ADC DWORD PTR [RAX], 0x11223344", mode) - # print 'xx' - # t= time.time() - # import cProfile - # def f(): - # x = l.asm(mode) - # print x - # cProfile.run('f()') - # l.asm(mode) - # print time.time()-t -# reg_tests = reg_tests[-1:] - test_file = {16: open('regression_test16_ia32.bin', 'w'), 32: open('regression_test32_ia32.bin', 'w'), 64: open('regression_test64_ia32.bin', 'w')} - # 64: open('testmnemo', 'r+')} ts = time.time() for mode, s, l, in reg_tests: print "-" * 80 @@ -3115,7 +3065,7 @@ for mode, s, l, in reg_tests: # print hex(b) # print [str(x.get()) for x in mn.args] print 'fromstring', repr(s) - l = mn_x86.fromstring(s, mode) + l = mn_x86.fromstring(s, symbol_pool, mode) # print l print 'str args', [(str(x), x.size) for x in l.args] assert(str(l).strip(' ') == s) diff --git a/test/arch/x86/sem.py b/test/arch/x86/sem.py index eb3c15c0..b3b7e940 100755 --- a/test/arch/x86/sem.py +++ b/test/arch/x86/sem.py @@ -14,11 +14,14 @@ from miasm2.arch.x86.regs import * from miasm2.expression.expression import * from miasm2.expression.simplifications import expr_simp from miasm2.core import parse_asm, asmblock +from miasm2.core.asmblock import AsmSymbolPool logging.getLogger('cpuhelper').setLevel(logging.ERROR) EXCLUDE_REGS = set([ir_32().IRDst, ir_64().IRDst]) +symbol_pool = AsmSymbolPool() + m32 = 32 m64 = 64 @@ -35,7 +38,7 @@ def symb_exec(interm, inputstate, debug): if k not in EXCLUDE_REGS and regs_init.get(k, None) != v} def compute(ir, mode, asm, inputstate={}, debug=False): - instr = mn.fromstring(asm, mode) + instr = mn.fromstring(asm, symbol_pool, mode) code = mn.asm(instr)[0] instr = mn.dis(code, mode) instr.offset = inputstate.get(EIP, 0) diff --git a/test/arch/x86/unit/asm_test.py b/test/arch/x86/unit/asm_test.py index 8a6b215c..961967f9 100644 --- a/test/arch/x86/unit/asm_test.py +++ b/test/arch/x86/unit/asm_test.py @@ -1,7 +1,6 @@ import sys import os -from miasm2.core.cpu import ParseAst from miasm2.arch.x86.arch import mn_x86, base_expr, variable from miasm2.core import parse_asm from miasm2.expression.expression import * diff --git a/test/core/asmblock.py b/test/core/asmblock.py index eb7b54b2..7f0dbc5f 100644 --- a/test/core/asmblock.py +++ b/test/core/asmblock.py @@ -41,6 +41,8 @@ assert len(blocks.pendings) == 0 assert len(blocks.nodes()) == 17 assert len(blocks.edges2constraint) == len(blocks.edges()) assert len(blocks.edges()) == 24 +assert blocks.getby_offset(0x63).lines[0].offset == 0x5f +assert blocks.getby_offset(0x69).lines[0].offset == 0x69 ## Convert to dot open("graph.dot", "w").write(blocks.dot()) diff --git a/test/core/test_types.py b/test/core/test_types.py index ab1d47c4..0b5f6baa 100755 --- a/test/core/test_types.py +++ b/test/core/test_types.py @@ -465,7 +465,7 @@ assert Union([("f2", Num("B")), ("f2", Num("H"))]) != \ assert Union([("f1", Num("B")), ("f2", Num("H"))]) != \ Union([("f1", Num("I")), ("f2", Num("H"))]) assert Bits(Num("I"), 3, 8) == Bits(Num("I"), 3, 8) -assert Bits(Num("I"), 3, 8) != Bits(Num("I"), 3, 8) +assert (Bits(Num("I"), 3, 8) != Bits(Num("I"), 3, 8)) is False assert Bits(Num("H"), 2, 8) != Bits(Num("I"), 3, 8) assert Bits(Num("I"), 3, 7) != Bits(Num("I"), 3, 8) assert BitField(Num("B"), [("f1", 2), ("f2", 4), ("f3", 1)]) == \ diff --git a/test/ir/ir2C.py b/test/ir/ir2C.py index a966938b..c84473c3 100755 --- a/test/ir/ir2C.py +++ b/test/ir/ir2C.py @@ -40,7 +40,7 @@ class TestIrIr2C(unittest.TestCase): self.translationTest( ExprOp('bsr', *args[:1]), r'x86_bsr(0x0, 0x20)') self.translationTest( - ExprOp('cpuid0', *args[:2]), r'cpuid0(0x0, 0x1)') + ExprOp('cpuid', *args[:2]), r'cpuid(0x0, 0x1)') self.translationTest( ExprOp('fcom0', *args[:2]), r'fcom0(0x0, 0x1)') self.translationTest( |