diff options
29 files changed, 1113 insertions, 1030 deletions
diff --git a/example/asm/shellcode.py b/example/asm/shellcode.py index bacb65fb..0c08a8a3 100755 --- a/example/asm/shellcode.py +++ b/example/asm/shellcode.py @@ -64,7 +64,10 @@ else: with open(args.source) as fstream: source = fstream.read() -blocks, symbol_pool = parse_asm.parse_txt(machine.mn, attrib, source) + +symbol_pool = asmblock.AsmSymbolPool() + +blocks, symbol_pool = parse_asm.parse_txt(machine.mn, attrib, source, symbol_pool) # Fix shellcode addrs symbol_pool.set_offset(symbol_pool.getby_name("main"), addr_main) diff --git a/example/disasm/single_instr.py b/example/disasm/single_instr.py index 0e29dcee..59b81de7 100644 --- a/example/disasm/single_instr.py +++ b/example/disasm/single_instr.py @@ -1,7 +1,9 @@ from miasm2.arch.x86.arch import mn_x86 from miasm2.arch.x86.regs import EDX +from miasm2.core.asmblock import AsmSymbolPool -l = mn_x86.fromstring('MOV EAX, EBX', 32) +symbol_pool = AsmSymbolPool() +l = mn_x86.fromstring('MOV EAX, EBX', symbol_pool, 32) print "instruction:", l print "arg:", l.args[0] x = mn_x86.asm(l) diff --git a/example/expression/get_read_write.py b/example/expression/get_read_write.py index d107cfa2..9e3b5caf 100644 --- a/example/expression/get_read_write.py +++ b/example/expression/get_read_write.py @@ -1,6 +1,9 @@ from miasm2.arch.x86.arch import mn_x86 from miasm2.expression.expression import get_rw from miasm2.arch.x86.ira import ir_a_x86_32 +from miasm2.core.asmblock import AsmSymbolPool + +symbol_pool = AsmSymbolPool() print """ @@ -11,7 +14,7 @@ Get read/written registers for a given instruction arch = mn_x86 ir_arch = ir_a_x86_32() -l = arch.fromstring('LODSB', 32) +l = arch.fromstring('LODSB', symbol_pool, 32) l.offset, l.l = 0, 15 ir_arch.add_instr(l) diff --git a/example/expression/solve_condition_stp.py b/example/expression/solve_condition_stp.py index 44b73043..201d9f26 100644 --- a/example/expression/solve_condition_stp.py +++ b/example/expression/solve_condition_stp.py @@ -6,7 +6,6 @@ from pdb import pm from miasm2.analysis.machine import Machine from miasm2.expression.expression import ExprInt, ExprCond, ExprId, \ get_expr_ids, ExprAff -from miasm2.arch.x86.arch import ParseAst from miasm2.core.bin_stream import bin_stream_str from miasm2.core import asmblock from miasm2.ir.symbexec import SymbolicExecutionEngine, get_block @@ -50,7 +49,6 @@ def emul_symb(ir_arch, mdis, states_todo, states_done): symbexec.dump(mems=False) assert addr is not None - if isinstance(addr, ExprCond): # Create 2 states, each including complementary conditions cond_group_a = {addr.cond: ExprInt(0, addr.cond.size)} @@ -67,15 +65,15 @@ def emul_symb(ir_arch, mdis, states_todo, states_done): addr_b = int(addr_b.arg) states_todo.add((addr_a, symbexec.symbols.copy(), tuple(list(conds) + cond_group_a.items()))) states_todo.add((addr_b, symbexec.symbols.copy(), tuple(list(conds) + cond_group_b.items()))) + elif addr == ret_addr: + print 'Return address reached' + continue elif isinstance(addr, ExprInt): addr = int(addr.arg) states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) elif asmblock.expr_is_label(addr): addr = addr.name states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) - elif addr == ret_addr: - print 'Return address reached' - continue else: raise ValueError("Unsupported destination") @@ -92,32 +90,6 @@ if __name__ == '__main__': symbols_init = dict(machine.mn.regs.regs_init) - # config parser for 32 bit - reg_and_id = dict(machine.mn.regs.all_regs_ids_byname) - - def my_ast_int2expr(name): - return ExprInt(name, 32) - - # Modifify parser to avoid label creation in PUSH argc - def my_ast_id2expr(string_parsed): - if string_parsed in reg_and_id: - return reg_and_id[string_parsed] - return ExprId(string_parsed, size=32) - - my_var_parser = ParseAst(my_ast_id2expr, my_ast_int2expr) - machine.base_expr.setParseAction(my_var_parser) - - argc = ExprId('argc', 32) - argv = ExprId('argv', 32) - ret_addr = ExprId('ret_addr', 32) - reg_and_id[argc.name] = argc - reg_and_id[argv.name] = argv - reg_and_id[ret_addr.name] = ret_addr - - my_symbols = [argc, argv, ret_addr] - my_symbols = dict([(x.name, x) for x in my_symbols]) - my_symbols.update(machine.mn.regs.all_regs_ids_byname) - ir_arch = machine.ir(mdis.symbol_pool) symbexec = SymbolicExecutionEngine(ir_arch, symbols_init) @@ -126,7 +98,17 @@ if __name__ == '__main__': PUSH argv PUSH argc PUSH ret_addr - ''') + ''', + symbol_pool=mdis.symbol_pool) + + + argc_lbl = symbol_pool.getby_name('argc') + argv_lbl = symbol_pool.getby_name('argv') + ret_addr_lbl = symbol_pool.getby_name('ret_addr') + + argc = ExprId(argc_lbl, 32) + argv = ExprId(argv_lbl, 32) + ret_addr = ExprId(ret_addr_lbl, 32) b = list(blocks)[0] diff --git a/example/symbol_exec/single_instr.py b/example/symbol_exec/single_instr.py index e5637ad8..22a48fc6 100644 --- a/example/symbol_exec/single_instr.py +++ b/example/symbol_exec/single_instr.py @@ -2,18 +2,22 @@ from miasm2.core.bin_stream import bin_stream_str from miasm2.ir.symbexec import SymbolicExecutionEngine from miasm2.analysis.machine import Machine +from miasm2.core.asmblock import AsmSymbolPool START_ADDR = 0 machine = Machine("x86_32") +symbol_pool = AsmSymbolPool() + + # Assemble and disassemble a MOV ## Ensure that attributes 'offset' and 'l' are set -line = machine.mn.fromstring("MOV EAX, EBX", 32) +line = machine.mn.fromstring("MOV EAX, EBX", symbol_pool, 32) asm = machine.mn.asm(line)[0] # Get back block bin_stream = bin_stream_str(asm) -mdis = machine.dis_engine(bin_stream) +mdis = machine.dis_engine(bin_stream, symbol_pool=symbol_pool) mdis.lines_wd = 1 asm_block = mdis.dis_block(START_ADDR) diff --git a/miasm2/arch/aarch64/arch.py b/miasm2/arch/aarch64/arch.py index c47d15d4..06e73ab4 100644 --- a/miasm2/arch/aarch64/arch.py +++ b/miasm2/arch/aarch64/arch.py @@ -1,6 +1,7 @@ #-*- coding:utf-8 -*- import logging +import math from pyparsing import * from miasm2.expression import expression as m2_expr from miasm2.core.cpu import * @@ -11,7 +12,7 @@ from regs import * from miasm2.core.asmblock import AsmLabel from miasm2.core.cpu import log as log_cpu from miasm2.expression.modint import uint32, uint64, mod_size2int -import math +from miasm2.core.asm_ast import AstInt, AstId, AstMem, AstOp log = logging.getLogger("aarch64dis") console_handler = logging.StreamHandler() @@ -68,43 +69,6 @@ replace_regs = { } -variable, operand, base_expr = gen_base_expr() -_, _, base_expr32 = gen_base_expr() -_, _, base_expr64 = gen_base_expr() - - -def ast_id2expr32(t): - if t in mn_aarch64.regs.all_regs_ids_byname: - t = mn_aarch64.regs.all_regs_ids_byname[t] - if not t.size == 32: - raise StopIteration - return t - -def ast_int2expr32(a): - return m2_expr.ExprInt(a, 32) - - -def ast_id2expr64(t): - if t in mn_aarch64.regs.all_regs_ids_byname: - t = mn_aarch64.regs.all_regs_ids_byname[t] - if not t.size == 64: - raise StopIteration - return t - - -def ast_int2expr64(a): - return m2_expr.ExprInt(a, 64) - -my_var_parser32 = ParseAst(ast_id2expr32, ast_int2expr32, default_size=32) -my_var_parser64 = ParseAst(ast_id2expr64, ast_int2expr64, default_size=64) - -base_expr32.setParseAction(my_var_parser32) -base_expr64.setParseAction(my_var_parser64) - - -int_or_expr = base_expr -int_or_expr32 = base_expr32 -int_or_expr64 = base_expr64 shift2expr_dct = {'LSL': '<<', 'LSR': '>>', 'ASR': 'a>>', 'ROR': '>>>'} @@ -112,122 +76,91 @@ shift_str = ["LSL", "LSR", "ASR", "ROR"] shift_expr = ["<<", ">>", "a>>", '>>>'] -def op_shift2expr(s, l, t): +def cb_shift(t): return shift2expr_dct[t[0]] -def op_shift2expr_slice_at(s, l, t): - return "slice_at" - - -def op_ext_reg(s, l, t): +def cb_extreg(t): return t[0] -def shift2expr(t): +def cb_shiftreg(t): if len(t) == 1: return t[0] elif len(t) == 3: - if t[0].size == 32 and isinstance(t[2], m2_expr.ExprInt): - t[2] = m2_expr.ExprInt(int(t[2]), 32) - return m2_expr.ExprOp(t[1], t[0], t[2]) + result = AstOp(t[1], t[0], t[2]) + return result else: raise ValueError('bad string') -def shift2expr_sc(t): +def cb_shift_sc(t): if len(t) == 1: return t[0] elif len(t) == 3: - if t[0].size == 32 and isinstance(t[2], m2_expr.ExprInt): - t[2] = m2_expr.ExprInt(t[2].arg, 32) if t[1] != '<<': raise ValueError('bad op') - return m2_expr.ExprOp("slice_at", t[0], t[2]) + result = AstOp("slice_at", t[0], t[2]) + return result else: raise ValueError('bad string') -def extend2expr(t): +def cb_extend(t): if len(t) == 1: return t[0] - return m2_expr.ExprOp(t[1], t[0], t[2]) + result = AstOp(t[1], t[0], t[2]) + return result -def shiftext2expr(t): - if len(t) == 1: - return t[0] - else: - return m2_expr.ExprOp(t[1], t[0], t[2]) - -def expr_deref_pc_off(t): - t = t[0] +def cb_deref_pc_off(t): if len(t) == 2 and t[0] == "PC": - return ExprOp('preinc', PC, t[1]) + result = AstOp('preinc', AstId(ExprId('PC', 64)), t[1]) + return result raise ValueError('bad string') -def expr_deref_pc_nooff(t): - t = t[0] +def cb_deref_pc_nooff(t): if len(t) == 1 and t[0] == "PC": - return ExprOp('preinc', PC) + result = AstOp('preinc', AstId(PC)) + return result raise ValueError('bad string') -all_binaryop_lsl_t = literal_list( - shift_str).setParseAction(op_shift2expr) +all_binaryop_lsl_t = literal_list(shift_str).setParseAction(cb_shift) -all_binaryop_shiftleft_t = literal_list( - ["LSL"]).setParseAction(op_shift2expr) +all_binaryop_shiftleft_t = literal_list(["LSL"]).setParseAction(cb_shift) extend_lst = ['UXTB', 'UXTH', 'UXTW', 'UXTX', 'SXTB', 'SXTH', 'SXTW', 'SXTX'] extend2_lst = ['UXTW', 'LSL', 'SXTW', 'SXTX'] -all_extend_t = literal_list(extend_lst).setParseAction(op_ext_reg) -all_extend2_t = literal_list(extend2_lst).setParseAction(op_ext_reg) +all_extend_t = literal_list(extend_lst).setParseAction(cb_extreg) +all_extend2_t = literal_list(extend2_lst).setParseAction(cb_extreg) -gpregz32_extend = (gpregsz32_info.parser + Optional( - all_extend_t + int_or_expr32)).setParseAction(extend2expr) -gpregz64_extend = (gpregsz64_info.parser + Optional( - all_extend_t + int_or_expr64)).setParseAction(extend2expr) +gpregz32_extend = (gpregsz32_info.parser + Optional(all_extend_t + base_expr)).setParseAction(cb_extend) +gpregz64_extend = (gpregsz64_info.parser + Optional(all_extend_t + base_expr)).setParseAction(cb_extend) -shift32_off = (gpregsz32_info.parser + Optional(all_binaryop_lsl_t + - (gpregs32_info.parser | int_or_expr))).setParseAction(shift2expr) -shift64_off = (gpregsz64_info.parser + Optional(all_binaryop_lsl_t + - (gpregs64_info.parser | int_or_expr))).setParseAction(shift2expr) +shift32_off = (gpregsz32_info.parser + Optional(all_binaryop_lsl_t + base_expr)).setParseAction(cb_shiftreg) +shift64_off = (gpregsz64_info.parser + Optional(all_binaryop_lsl_t + base_expr)).setParseAction(cb_shiftreg) -shiftimm_imm_sc = (int_or_expr + all_binaryop_shiftleft_t + - int_or_expr).setParseAction(shift2expr_sc) +shiftimm_imm_sc = (base_expr + all_binaryop_shiftleft_t + base_expr).setParseAction(cb_shift_sc) -shiftimm_off_sc = shiftimm_imm_sc | int_or_expr +shiftimm_off_sc = shiftimm_imm_sc | base_expr shift_off = (shift32_off | shift64_off) reg_ext_off = (gpregz32_extend | gpregz64_extend) gpregs_32_64 = (gpregs32_info.parser | gpregs64_info.parser) -gpregsz_32_64 = (gpregsz32_info.parser | gpregsz64_info.parser | int_or_expr) +gpregsz_32_64 = (gpregsz32_info.parser | gpregsz64_info.parser | base_expr) -simdregs = (simd08_info.parser | simd16_info.parser | - simd32_info.parser | simd64_info.parser) +simdregs = (simd08_info.parser | simd16_info.parser | simd32_info.parser | simd64_info.parser) simdregs_h = (simd32_info.parser | simd64_info.parser | simd128_info.parser) -simdregs_h_zero = (simd32_info.parser | - simd64_info.parser | simd128_info.parser | int_or_expr) - - -def ast_id2expr(t): - if not t in mn_aarch64.regs.all_regs_ids_byname: - r = m2_expr.ExprId(AsmLabel(t), 32) - else: - r = mn_aarch64.regs.all_regs_ids_byname[t] - return r +simdregs_h_zero = (simd32_info.parser | simd64_info.parser | simd128_info.parser | base_expr) -def ast_int2expr(a): - return m2_expr.ExprInt(a, 64) - gpregs_info = {32: gpregs32_info, 64: gpregs64_info} gpregsz_info = {32: gpregsz32_info, @@ -241,72 +174,66 @@ simds_info = {8: simd08_info, 128: simd128_info} -my_var_parser = ParseAst(ast_id2expr, ast_int2expr) -base_expr.setParseAction(my_var_parser) - -def deref2expr_nooff(t): - t = t[0] +def cb_deref_nooff(t): # XXX default - return m2_expr.ExprOp("preinc", t[0], m2_expr.ExprInt(0, 64)) + result = AstOp("preinc", t[0], AstInt(0)) + return result -def deref2expr_post(t): - t = t[0] - if t[1] in regs_module.all_regs_ids: +def cb_deref_post(t): + assert len(t) == 2 + if isinstance(t[1], AstId) and isinstance(t[1].name, ExprId): raise StopIteration - return m2_expr.ExprOp("postinc", t[0], t[1]) + result = AstOp("postinc", *t) + return result -def deref2expr_pre(t): - t = t[0] - if t[1] in regs_module.all_regs_ids: +def cb_deref_pre(t): + assert len(t) == 2 + if isinstance(t[1], AstId) and isinstance(t[1].name, ExprId): raise StopIteration - return m2_expr.ExprOp("preinc", t[0], t[1]) + result = AstOp("preinc", *t) + return result -def deref2expr_pre_wb(t): - t = t[0] - if t[1] in regs_module.all_regs_ids: +def cb_deref_pre_wb(t): + assert len(t) == 2 + if isinstance(t[1], AstId) and isinstance(t[1].name, ExprId): raise StopIteration - return m2_expr.ExprOp("preinc_wb", t[0], t[1]) + result = AstOp("preinc_wb", *t) + return result + LBRACK = Suppress("[") RBRACK = Suppress("]") COMMA = Suppress(",") POSTINC = Suppress("!") -deref_nooff = Group( - LBRACK + gpregs64_info.parser + RBRACK).setParseAction(deref2expr_nooff) -deref_off_post = Group(LBRACK + gpregs64_info.parser + - RBRACK + COMMA + int_or_expr64).setParseAction(deref2expr_post) -deref_off_pre = Group(LBRACK + gpregs64_info.parser + - COMMA + int_or_expr64 + RBRACK).setParseAction(deref2expr_pre) -deref_off_pre_wb = Group(LBRACK + gpregs64_info.parser + COMMA + - int_or_expr64 + RBRACK + POSTINC).setParseAction(deref2expr_pre_wb) +deref_nooff = (LBRACK + gpregs64_info.parser + RBRACK).setParseAction(cb_deref_nooff) +deref_off_post = (LBRACK + gpregs64_info.parser + RBRACK + COMMA + base_expr).setParseAction(cb_deref_post) +deref_off_pre = (LBRACK + gpregs64_info.parser + COMMA + base_expr + RBRACK).setParseAction(cb_deref_pre) +deref_off_pre_wb = (LBRACK + gpregs64_info.parser + COMMA + base_expr + RBRACK + POSTINC).setParseAction(cb_deref_pre_wb) deref = (deref_off_post | deref_off_pre_wb | deref_off_pre | deref_nooff) -deref_pc_off = Group(LBRACK + Literal("PC") + COMMA + int_or_expr64 + RBRACK).setParseAction(expr_deref_pc_off) -deref_pc_nooff = Group(LBRACK + Literal("PC") + RBRACK).setParseAction(expr_deref_pc_nooff) +deref_pc_off = (LBRACK + Literal("PC") + COMMA + base_expr + RBRACK).setParseAction(cb_deref_pc_off) +deref_pc_nooff = (LBRACK + Literal("PC") + RBRACK).setParseAction(cb_deref_pc_nooff) deref_pc = (deref_pc_off | deref_pc_nooff) -def deref_ext2op(t): - t = t[0] +def cb_deref_ext2op(t): if len(t) == 4: - expr = set_imm_to_size(t[1].size, t[3]) - if expr is None: - raise StopIteration - return m2_expr.ExprOp('segm', t[0], m2_expr.ExprOp(t[2], t[1], expr)) + result = AstOp('segm', t[0], AstOp(t[2], t[1], t[3])) + return result elif len(t) == 2: - return m2_expr.ExprOp('segm', t[0], t[1]) + result = AstOp('segm', *t) + return result raise ValueError("cad deref") -deref_ext2 = Group(LBRACK + gpregs_32_64 + COMMA + gpregs_32_64 + - Optional(all_extend2_t + int_or_expr) + RBRACK).setParseAction(deref_ext2op) +deref_ext2 = (LBRACK + gpregs_32_64 + COMMA + gpregs_32_64 + Optional(all_extend2_t + base_expr) + RBRACK).setParseAction(cb_deref_ext2op) class additional_info: @@ -335,6 +262,47 @@ conds_expr, _, conds_info = gen_regs(CONDS, {}) conds_inv_expr, _, conds_inv_info = gen_regs(CONDS_INV, {}) + +class aarch64_arg(m_arg): + def asm_ast_to_expr(self, value, symbol_pool, size_hint=None, fixed_size=None): + if size_hint is None: + size_hint = 64 + if fixed_size is None: + fixed_size = set() + if isinstance(value, AstId): + if value.name in all_regs_ids_byname: + reg = all_regs_ids_byname[value.name] + fixed_size.add(reg.size) + return reg + if isinstance(value.name, ExprId): + fixed_size.add(value.name.size) + return value.name + label = symbol_pool.getby_name_create(value.name) + return ExprId(label, size_hint) + if isinstance(value, AstInt): + assert size_hint is not None + return ExprInt(value.value, size_hint) + if isinstance(value, AstOp): + if value.op == "segm": + segm = self.asm_ast_to_expr(value.args[0], symbol_pool) + ptr = self.asm_ast_to_expr(value.args[1], symbol_pool, None, fixed_size) + return ExprOp('segm', segm, ptr) + + args = [self.asm_ast_to_expr(arg, symbol_pool, None, fixed_size) for arg in value.args] + if len(fixed_size) == 0: + # No fixed size + pass + elif len(fixed_size) == 1: + # One fixed size, regen all + size = list(fixed_size)[0] + args = [self.asm_ast_to_expr(arg, symbol_pool, size, fixed_size) for arg in value.args] + else: + raise ValueError("Size conflict") + + return ExprOp(value.op, *args) + return None + + class instruction_aarch64(instruction): __slots__ = [] delayslot = 0 @@ -567,7 +535,7 @@ class aarch64_gpreg_noarg(reg_noarg): return True -class aarch64_simdreg(reg_noarg, m_arg): +class aarch64_simdreg(reg_noarg, aarch64_arg): parser = simdregs simd_size = [8, 16, 32, 64] @@ -619,7 +587,7 @@ class aarch64_simdreg_32_64_zero(aarch64_simdreg_32_64): return super(aarch64_simdreg_32_64_zero, self).encode() -class aarch64_gpreg_isf(reg_noarg, m_arg): +class aarch64_gpreg_isf(reg_noarg, aarch64_arg): parser = gpregs_32_64 def decode(self, v): @@ -635,7 +603,7 @@ class aarch64_gpreg_isf(reg_noarg, m_arg): return True -class aarch64_gpreg(aarch64_gpreg_noarg, m_arg): +class aarch64_gpreg(aarch64_gpreg_noarg, aarch64_arg): pass @@ -651,12 +619,12 @@ class aarch64_gpreg_n1(aarch64_gpreg): return self.value != 0b11111 -class aarch64_gpregz(aarch64_gpreg_noarg, m_arg): +class aarch64_gpregz(aarch64_gpreg_noarg, aarch64_arg): parser = gpregsz_32_64 gpregs_info = gpregsz_info -class aarch64_gpreg0(bsi, m_arg): +class aarch64_gpreg0(bsi, aarch64_arg): parser = gpregsz_32_64 gpregs_info = gpregsz_info @@ -684,7 +652,7 @@ class aarch64_gpreg0(bsi, m_arg): return True -class aarch64_crreg(reg_noarg, m_arg): +class aarch64_crreg(reg_noarg, aarch64_arg): reg_info = cr_info parser = reg_info.parser @@ -702,7 +670,7 @@ class aarch64_gpreg32_noarg(reg_noarg): parser = reg_info.parser -class aarch64_gpreg32(aarch64_gpreg32_noarg, m_arg): +class aarch64_gpreg32(aarch64_gpreg32_noarg, aarch64_arg): reg_info = gpregs32_info parser = reg_info.parser @@ -712,7 +680,7 @@ class aarch64_gpreg64_noarg(reg_noarg): parser = reg_info.parser -class aarch64_gpreg64(reg_noarg, m_arg): +class aarch64_gpreg64(reg_noarg, aarch64_arg): reg_info = gpregs64_info parser = reg_info.parser @@ -722,7 +690,7 @@ class aarch64_gpregz32_noarg(reg_noarg): parser = reg_info.parser -class aarch64_gpregz32(aarch64_gpreg32_noarg, m_arg): +class aarch64_gpregz32(aarch64_gpreg32_noarg, aarch64_arg): reg_info = gpregsz32_info parser = reg_info.parser @@ -732,7 +700,7 @@ class aarch64_gpregz64_noarg(reg_noarg): parser = reg_info.parser -class aarch64_gpregz64(reg_noarg, m_arg): +class aarch64_gpregz64(reg_noarg, aarch64_arg): reg_info = gpregsz64_info parser = reg_info.parser @@ -742,7 +710,7 @@ class aarch64_simd08_noarg(reg_noarg): parser = reg_info.parser -class aarch64_simd08(aarch64_simd08_noarg, m_arg): +class aarch64_simd08(aarch64_simd08_noarg, aarch64_arg): reg_info = simd08_info parser = reg_info.parser @@ -752,7 +720,7 @@ class aarch64_simd16_noarg(reg_noarg): parser = reg_info.parser -class aarch64_simd16(aarch64_simd16_noarg, m_arg): +class aarch64_simd16(aarch64_simd16_noarg, aarch64_arg): reg_info = simd16_info parser = reg_info.parser @@ -762,7 +730,7 @@ class aarch64_simd32_noarg(reg_noarg): parser = reg_info.parser -class aarch64_simd32(aarch64_simd32_noarg, m_arg): +class aarch64_simd32(aarch64_simd32_noarg, aarch64_arg): reg_info = simd32_info parser = reg_info.parser @@ -772,7 +740,7 @@ class aarch64_simd64_noarg(reg_noarg): parser = reg_info.parser -class aarch64_simd64(aarch64_simd64_noarg, m_arg): +class aarch64_simd64(aarch64_simd64_noarg, aarch64_arg): reg_info = simd64_info parser = reg_info.parser @@ -782,12 +750,12 @@ class aarch64_simd128_noarg(reg_noarg): parser = reg_info.parser -class aarch64_simd128(aarch64_simd128_noarg, m_arg): +class aarch64_simd128(aarch64_simd128_noarg, aarch64_arg): reg_info = simd128_info parser = reg_info.parser -class aarch64_imm_32(imm_noarg, m_arg): +class aarch64_imm_32(imm_noarg, aarch64_arg): parser = base_expr @@ -810,7 +778,7 @@ class aarch64_uint64_noarg(imm_noarg): int2expr = lambda self, x: m2_expr.ExprInt(x, 64) -class aarch64_uint64(aarch64_uint64_noarg, m_arg): +class aarch64_uint64(aarch64_uint64_noarg, aarch64_arg): parser = base_expr @@ -829,8 +797,8 @@ def set_imm_to_size(size, expr): class aarch64_imm_sf(imm_noarg): parser = base_expr - def fromstring(self, text, parser_result=None): - start, stop = super(aarch64_imm_sf, self).fromstring(text, parser_result) + def fromstring(self, text, symbol_pool, parser_result=None): + start, stop = super(aarch64_imm_sf, self).fromstring(text, symbol_pool, parser_result) if start is None: return start, stop size = self.parent.args[0].expr.size @@ -861,7 +829,7 @@ class aarch64_imm_sf(imm_noarg): return True -class aarch64_imm_sft(aarch64_imm_sf, m_arg): +class aarch64_imm_sft(aarch64_imm_sf, aarch64_arg): def encode(self): if not isinstance(self.expr, m2_expr.ExprInt): @@ -895,7 +863,7 @@ OPTION2SIZE = [32, 32, 32, 64, 32, 32, 32, 64] -class aarch64_gpreg_ext(reg_noarg, m_arg): +class aarch64_gpreg_ext(reg_noarg, aarch64_arg): parser = reg_ext_off def encode(self): @@ -934,7 +902,7 @@ EXT2_OP = {0b010: 'UXTW', EXT2_OP_INV = dict([(items[1], items[0]) for items in EXT2_OP.items()]) -class aarch64_gpreg_ext2(reg_noarg, m_arg): +class aarch64_gpreg_ext2(reg_noarg, aarch64_arg): parser = deref_ext2 def get_size(self): @@ -946,6 +914,12 @@ class aarch64_gpreg_ext2(reg_noarg, m_arg): if len(self.expr.args) != 2: return False arg0, arg1 = self.expr.args + if (self.expr.is_op("preinc") and arg0.is_id() and arg1.is_id()): + self.parent.shift.value = 0 + self.parent.rn.value = self.parent.rn.reg_info.expr.index(arg0) + self.value = gpregs_info[arg1.size].expr.index(arg1) + self.parent.option.value = 0b011 + return True if not (isinstance(self.expr, m2_expr.ExprOp) and self.expr.op == 'segm'): return False if not arg0 in self.parent.rn.reg_info.expr: @@ -1022,7 +996,7 @@ def test_set_sf(parent, size): return psize == size -class aarch64_gpreg_sftimm(reg_noarg, m_arg): +class aarch64_gpreg_sftimm(reg_noarg, aarch64_arg): reg_info = gpregsz_info parser = shift_off @@ -1266,7 +1240,7 @@ def EncodeBitMasks(wmask): return immr, imms, immn -class aarch64_imm_nsr(aarch64_imm_sf, m_arg): +class aarch64_imm_nsr(aarch64_imm_sf, aarch64_arg): parser = base_expr def decode(self, v): @@ -1347,7 +1321,7 @@ class aarch64_immhi_page(aarch64_imm_32): return True -class aarch64_imm_hw(m_arg): +class aarch64_imm_hw(aarch64_arg): parser = base_expr shift_op = '<<' @@ -1373,7 +1347,7 @@ class aarch64_imm_hw(m_arg): return False -class aarch64_imm_hw_sc(m_arg): +class aarch64_imm_hw_sc(aarch64_arg): parser = shiftimm_off_sc shift_op = 'slice_at' @@ -1415,7 +1389,7 @@ class aarch64_imm_hw_sc(m_arg): return True -class aarch64_offs(imm_noarg, m_arg): +class aarch64_offs(imm_noarg, aarch64_arg): parser = base_expr def decode(self, v): @@ -1436,7 +1410,7 @@ class aarch64_offs(imm_noarg, m_arg): -class aarch64_offs_pc(imm_noarg, m_arg): +class aarch64_offs_pc(imm_noarg, aarch64_arg): parser = deref_pc def decode(self, v): @@ -1490,7 +1464,7 @@ def get_size(parent): return size -class aarch64_deref(m_arg): +class aarch64_deref(aarch64_arg): parser = deref def decode_w_size(self, off): @@ -1628,17 +1602,17 @@ modf = bs_mod_name(l=1, fname='modf', mn_mod=['', 'S']) sf = bs(l=1, fname='sf', order=-1) -class aarch64_cond_arg(reg_noarg, m_arg): +class aarch64_cond_arg(reg_noarg, aarch64_arg): reg_info = conds_info parser = reg_info.parser -class aarch64_cond_inv_arg(reg_noarg, m_arg): +class aarch64_cond_inv_arg(reg_noarg, aarch64_arg): reg_info = conds_inv_info parser = reg_info.parser -class aarch64_b40(m_arg): +class aarch64_b40(aarch64_arg): parser = base_expr def decode(self, v): @@ -1745,19 +1719,19 @@ imm_sft_12 = bs(l=12, cls=(aarch64_imm_sft,)) imm32_3 = bs(l=3, fname="imm") imm6 = bs(l=6, fname="imm", order=-1) imm3 = bs(l=3, fname="imm", order=-1) -simm6 = bs(l=6, cls=(aarch64_int64_noarg, m_arg), fname="imm", order=-1) +simm6 = bs(l=6, cls=(aarch64_int64_noarg, aarch64_arg), fname="imm", order=-1) simm9 = bs(l=9, cls=(aarch64_int64_noarg,), fname="imm", order=-1) simm7 = bs(l=7, cls=(aarch64_int64_noarg,), fname="imm", order=-1) -nzcv = bs(l=4, cls=(aarch64_uint64_noarg, m_arg), fname="nzcv", order=-1) -uimm5 = bs(l=5, cls=(aarch64_uint64_noarg, m_arg), fname="imm", order=-1) +nzcv = bs(l=4, cls=(aarch64_uint64_noarg, aarch64_arg), fname="nzcv", order=-1) +uimm5 = bs(l=5, cls=(aarch64_uint64_noarg, aarch64_arg), fname="imm", order=-1) uimm12 = bs(l=12, cls=(aarch64_uint64_noarg,), fname="imm", order=-1) -uimm16 = bs(l=16, cls=(aarch64_uint64_noarg, m_arg), fname="imm", order=-1) +uimm16 = bs(l=16, cls=(aarch64_uint64_noarg, aarch64_arg), fname="imm", order=-1) uimm7 = bs(l=7, cls=(aarch64_uint64_noarg,), fname="imm", order=-1) uimm8 = bs(l=8, cls=(aarch64_uint64,), fname="imm", order=-1) -op1 = bs(l=3, cls=(aarch64_uint64, m_arg), fname="op1") -op2 = bs(l=3, cls=(aarch64_uint64, m_arg), fname="op2") +op1 = bs(l=3, cls=(aarch64_uint64, aarch64_arg), fname="op1") +op2 = bs(l=3, cls=(aarch64_uint64, aarch64_arg), fname="op2") imm16 = bs(l=16, fname="imm", order=-1) @@ -1787,8 +1761,8 @@ imm16_hw_sc = bs(l=16, cls=(aarch64_imm_hw_sc,), fname='imm') hw = bs(l=2, fname='hw') -a_imms = bs(l=6, cls=(aarch64_imm_sf, m_arg), fname="imm1", order=-1) -a_immr = bs(l=6, cls=(aarch64_imm_sf, m_arg), fname="imm1", order=-1) +a_imms = bs(l=6, cls=(aarch64_imm_sf, aarch64_arg), fname="imm1", order=-1) +a_immr = bs(l=6, cls=(aarch64_imm_sf, aarch64_arg), fname="imm1", order=-1) diff --git a/miasm2/arch/aarch64/regs.py b/miasm2/arch/aarch64/regs.py index bf1c5cef..c9da0653 100644 --- a/miasm2/arch/aarch64/regs.py +++ b/miasm2/arch/aarch64/regs.py @@ -45,9 +45,9 @@ simd128_expr, simd128_init, simd128_info = gen_regs( simd128_str, globals(), 128) -gen_reg("PC", globals(), 64) -gen_reg("WZR", globals(), 32) -gen_reg("XZR", globals(), 64) +PC, _ = gen_reg("PC", 64) +WZR, _ = gen_reg("WZR", 32) +XZR, _ = gen_reg("XZR", 64) PC_init = ExprId("PC_init", 64) WZR_init = ExprId("WZR_init", 32) diff --git a/miasm2/arch/arm/arch.py b/miasm2/arch/arm/arch.py index b607b6c2..e09619ae 100644 --- a/miasm2/arch/arm/arch.py +++ b/miasm2/arch/arm/arch.py @@ -8,6 +8,7 @@ from collections import defaultdict from miasm2.core.bin_stream import bin_stream import miasm2.arch.arm.regs as regs_module from miasm2.arch.arm.regs import * +from miasm2.core.asm_ast import AstInt, AstId, AstMem, AstOp # A1 encoding @@ -20,7 +21,7 @@ log.setLevel(logging.DEBUG) # arm regs ############## reg_dum = ExprId('DumReg', 32) -gen_reg('PC', globals()) +PC, _ = gen_reg('PC') # GP regs_str = ['R%d' % r for r in xrange(0x10)] @@ -104,13 +105,13 @@ barrier_info = reg_info_dct(barrier_expr) # parser helper ########### -def tok_reg_duo(s, l, t): +def cb_tok_reg_duo(t): t = t[0] - i1 = gpregs.expr.index(t[0]) - i2 = gpregs.expr.index(t[1]) + i1 = gpregs.expr.index(t[0].name) + i2 = gpregs.expr.index(t[1].name) o = [] for i in xrange(i1, i2 + 1): - o.append(gpregs.expr[i]) + o.append(AstId(gpregs.expr[i])) return o LPARENTHESIS = Literal("(") @@ -124,14 +125,14 @@ CIRCUNFLEX = Literal("^") def check_bounds(left_bound, right_bound, value): if left_bound <= value and value <= right_bound: - return ExprInt(value, 32) + return AstInt(value) else: raise ValueError('shift operator immediate value out of bound') def check_values(values, value): if value in values: - return ExprInt(value, 32) + return AstInt(value) else: raise ValueError('shift operator immediate value out of bound') @@ -141,11 +142,11 @@ int_1_32 = str_int.copy().setParseAction(lambda v: check_bounds(1, 32, v[0])) int_8_16_24 = str_int.copy().setParseAction(lambda v: check_values([8, 16, 24], v[0])) -def reglistparse(s, l, t): +def cb_reglistparse(s, l, t): t = t[0] if t[-1] == "^": - return ExprOp('sbit', ExprOp('reglist', *t[:-1])) - return ExprOp('reglist', *t) + return AstOp('sbit', AstOp('reglist', *t[:-1])) + return AstOp('reglist', *t) allshifts = ['<<', '>>', 'a>>', '>>>', 'rrx'] @@ -161,11 +162,11 @@ def op_shift2expr(s, l, t): return shift2expr_dct[t[0]] reg_duo = Group(gpregs.parser + MINUS + - gpregs.parser).setParseAction(tok_reg_duo) + gpregs.parser).setParseAction(cb_tok_reg_duo) reg_or_duo = reg_duo | gpregs.parser gpreg_list = Group(LACC + delimitedList( reg_or_duo, delim=',') + RACC + Optional(CIRCUNFLEX)) -gpreg_list.setParseAction(reglistparse) +gpreg_list.setParseAction(cb_reglistparse) LBRACK = Suppress("[") RBRACK = Suppress("]") @@ -187,130 +188,116 @@ gpreg_p = gpregs.parser psr_p = cpsr_regs.parser | spsr_regs.parser -def shift2expr(t): +def cb_shift(t): if len(t) == 1: ret = t[0] elif len(t) == 2: - ret = ExprOp(t[1], t[0]) + ret = AstOp(t[1], t[0]) elif len(t) == 3: - ret = ExprOp(t[1], t[0], t[2]) + ret = AstOp(t[1], t[0], t[2]) else: raise ValueError("Bad arg") return ret -variable, operand, base_expr = gen_base_expr() - -int_or_expr = base_expr - - -def ast_id2expr(t): - return mn_arm.regs.all_regs_ids_byname.get(t, t) - - -def ast_int2expr(a): - return ExprInt(a, 32) - - -my_var_parser = ParseAst(ast_id2expr, ast_int2expr) -base_expr.setParseAction(my_var_parser) - - shift_off = (gpregs.parser + Optional( (all_unaryop_shifts_t) | (all_binaryop_1_31_shifts_t + (gpregs.parser | int_1_31)) | (all_binaryop_1_32_shifts_t + (gpregs.parser | int_1_32)) -)).setParseAction(shift2expr) +)).setParseAction(cb_shift) shift_off |= base_expr rot2_expr = (gpregs.parser + Optional( (ror_shifts_t + (int_8_16_24)) -)).setParseAction(shift2expr) +)).setParseAction(cb_shift) OP_LSL = Suppress("LSL") -def expr_deref_reg_reg(t): +def cb_deref_reg_reg(t): if len(t) != 2: raise ValueError("Bad mem format") - return ExprMem(t[0] + t[1], 8) + return AstMem(AstOp('+', t[0], t[1]), 8) -def expr_deref_reg_reg_lsl_1(t): +def cb_deref_reg_reg_lsl_1(t): if len(t) != 3: raise ValueError("Bad mem format") reg1, reg2, index = t - if index != ExprInt(1, 32): + if not isinstance(index, AstInt) or index.value != 1: raise ValueError("Bad index") - ret = ExprMem(reg1 + (reg2 << index), 16) + ret = AstMem(AstOp('+', reg1, AstOp('<<', reg2, index)), 16) return ret -deref_reg_reg = (LBRACK + gpregs.parser + COMMA + gpregs.parser + RBRACK).setParseAction(expr_deref_reg_reg) -deref_reg_reg_lsl_1 = (LBRACK + gpregs.parser + COMMA + gpregs.parser + OP_LSL + base_expr + RBRACK).setParseAction(expr_deref_reg_reg_lsl_1) +deref_reg_reg = (LBRACK + gpregs.parser + COMMA + gpregs.parser + RBRACK).setParseAction(cb_deref_reg_reg) +deref_reg_reg_lsl_1 = (LBRACK + gpregs.parser + COMMA + gpregs.parser + OP_LSL + base_expr + RBRACK).setParseAction(cb_deref_reg_reg_lsl_1) (gpregs.parser + Optional( (ror_shifts_t + (int_8_16_24)) -)).setParseAction(shift2expr) +)).setParseAction(cb_shift) +reg_or_base = gpregs.parser | base_expr + def deref2expr_nooff(s, l, t): t = t[0] # XXX default return ExprOp("preinc", t[0], ExprInt(0, 32)) -def deref2expr_pre(s, l, t): +def cb_deref_preinc(t): t = t[0] if len(t) == 1: - return ExprOp("preinc", t[0], ExprInt(0, 32)) + return AstOp("preinc", t[0], AstInt(0)) elif len(t) == 2: - return ExprOp("preinc", t[0], t[1]) + return AstOp("preinc", t[0], t[1]) else: raise NotImplementedError('len(t) > 2') -def deref2expr_pre_mem(s, l, t): +def cb_deref_pre_mem(t): t = t[0] if len(t) == 1: - return ExprMem(ExprOp("preinc", t[0], ExprInt(0, 32)), 32) + return AstMem(AstOp("preinc", t[0], AstInt(0)), 32) elif len(t) == 2: - return ExprMem(ExprOp("preinc", t[0], t[1]), 32) + return AstMem(AstOp("preinc", t[0], t[1]), 32) else: raise NotImplementedError('len(t) > 2') -def deref2expr_post(s, l, t): +def cb_deref_post(t): t = t[0] - return ExprOp("postinc", t[0], t[1]) + return AstOp("postinc", t[0], t[1]) -def deref_wb(s, l, t): +def cb_deref_wb(t): t = t[0] if t[-1] == '!': - return ExprMem(ExprOp('wback', *t[:-1]), 32) - return ExprMem(t[0], 32) + return AstMem(AstOp('wback', *t[:-1]), 32) + return AstMem(t[0], 32) # shift_off.setParseAction(deref_off) deref_nooff = Group( LBRACK + gpregs.parser + RBRACK).setParseAction(deref2expr_nooff) deref_pre = Group(LBRACK + gpregs.parser + Optional( - COMMA + shift_off) + RBRACK).setParseAction(deref2expr_pre) + COMMA + shift_off) + RBRACK).setParseAction(cb_deref_preinc) deref_post = Group(LBRACK + gpregs.parser + RBRACK + - COMMA + shift_off).setParseAction(deref2expr_post) + COMMA + shift_off).setParseAction(cb_deref_post) deref = Group((deref_post | deref_pre | deref_nooff) - + Optional('!')).setParseAction(deref_wb) + + Optional('!')).setParseAction(cb_deref_wb) -def parsegpreg_wb(s, l, t): +def cb_gpreb_wb(t): + assert len(t) == 1 t = t[0] if t[-1] == '!': - return ExprOp('wback', *t[:-1]) + return AstOp('wback', *t[:-1]) return t[0] -gpregs_wb = Group(gpregs.parser + Optional('!')).setParseAction(parsegpreg_wb) +gpregs_wb = Group(gpregs.parser + Optional('!')).setParseAction(cb_gpreb_wb) cond_list_full = ['EQ', 'NE', 'CS', 'CC', 'MI', 'PL', 'VS', 'VC', @@ -780,7 +767,31 @@ class mn_armt(cls_mn): return 32 -class arm_reg(reg_noarg, m_arg): +class arm_arg(m_arg): + def asm_ast_to_expr(self, arg, symbol_pool): + if isinstance(arg, AstId): + if isinstance(arg.name, ExprId): + return arg.name + if arg.name in gpregs.str: + return None + label = symbol_pool.getby_name_create(arg.name) + return ExprId(label, 32) + if isinstance(arg, AstOp): + args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in arg.args] + if None in args: + return None + return ExprOp(arg.op, *args) + if isinstance(arg, AstInt): + return ExprInt(arg.value, 32) + if isinstance(arg, AstMem): + ptr = self.asm_ast_to_expr(arg.ptr, symbol_pool) + if ptr is None: + return None + return ExprMem(ptr, arg.size) + return None + + +class arm_reg(reg_noarg, arm_arg): pass @@ -820,7 +831,7 @@ class arm_reg_wb(arm_reg): return True -class arm_psr(m_arg): +class arm_psr(arm_arg): parser = psr_p def decode(self, v): @@ -856,7 +867,7 @@ class arm_preg(arm_reg): parser = reg_info.parser -class arm_imm(imm_noarg, m_arg): +class arm_imm(imm_noarg, arm_arg): parser = base_expr @@ -900,7 +911,7 @@ class arm_offs(arm_imm): return True -class arm_imm8_12(m_arg): +class arm_imm8_12(arm_arg): parser = deref def decode(self, v): @@ -956,8 +967,8 @@ class arm_imm8_12(m_arg): return True -class arm_imm_4_12(m_arg): - parser = base_expr +class arm_imm_4_12(arm_arg): + parser = reg_or_base def decode(self, v): v = v & self.lmask @@ -976,7 +987,7 @@ class arm_imm_4_12(m_arg): return True -class arm_imm_12_4(m_arg): +class arm_imm_12_4(arm_arg): parser = base_expr def decode(self, v): @@ -996,7 +1007,7 @@ class arm_imm_12_4(m_arg): return True -class arm_op2(m_arg): +class arm_op2(arm_arg): parser = shift_off def str_to_imm_rot_form(self, s, neg=False): @@ -1168,7 +1179,7 @@ class arm_op2imm(arm_imm8_12): # if len(v) <1: # raise ValueError('cannot parse', s) - self.parent.rn.fromstring(e.args[0]) + self.parent.rn.expr = e.args[0] if len(e.args) == 1: self.parent.immop.value = 0 self.value = 0 @@ -1229,7 +1240,7 @@ def reglist2str(rlist): return "{" + ", ".join(out) + '}' -class arm_rlist(m_arg): +class arm_rlist(arm_arg): parser = gpreg_list def encode(self): @@ -1436,7 +1447,7 @@ class mul_part_y(bs_mod_name): mul_x = mul_part_x(l=1, fname='x', mn_mod=['B', 'T']) mul_y = mul_part_y(l=1, fname='y', mn_mod=['B', 'T']) -class arm_immed(m_arg): +class arm_immed(arm_arg): parser = deref def decode(self, v): @@ -1509,7 +1520,7 @@ immedL = bs(l=4, cls=(arm_immed, m_arg), fname='immedL') hb = bs(l=1) -class armt2_rot_rm(m_arg): +class armt2_rot_rm(arm_arg): parser = shift_off def decode(self, v): r = self.parent.rm.expr @@ -1530,7 +1541,7 @@ class armt2_rot_rm(m_arg): rot_rm = bs(l=2, cls=(armt2_rot_rm,), fname="rot_rm") -class arm_mem_rn_imm(m_arg): +class arm_mem_rn_imm(arm_arg): parser = deref def decode(self, v): value = self.parent.imm.value @@ -1695,7 +1706,7 @@ class arm_widthm1(arm_imm, m_arg): return True -class arm_rm_rot2(m_arg): +class arm_rm_rot2(arm_arg): parser = rot2_expr def decode(self, v): expr = gpregs.expr[v] @@ -1755,12 +1766,12 @@ rot2 = bs(l=2, fname="rot2") widthm1 = bs(l=5, cls=(arm_widthm1, m_arg)) lsb = bs(l=5, cls=(arm_imm, m_arg)) -rd_nopc = bs(l=4, cls=(arm_gpreg_nopc,m_arg), fname="rd") -rn_nopc = bs(l=4, cls=(arm_gpreg_nopc,m_arg), fname="rn") -ra_nopc = bs(l=4, cls=(arm_gpreg_nopc,m_arg), fname="ra") -rt_nopc = bs(l=4, cls=(arm_gpreg_nopc,m_arg), fname="rt") +rd_nopc = bs(l=4, cls=(arm_gpreg_nopc, arm_arg), fname="rd") +rn_nopc = bs(l=4, cls=(arm_gpreg_nopc, arm_arg), fname="rn") +ra_nopc = bs(l=4, cls=(arm_gpreg_nopc, arm_arg), fname="ra") +rt_nopc = bs(l=4, cls=(arm_gpreg_nopc, arm_arg), fname="rt") -rn_nosp = bs(l=4, cls=(arm_gpreg_nosp,m_arg), fname="rn") +rn_nosp = bs(l=4, cls=(arm_gpreg_nosp, arm_arg), fname="rn") rn_nopc_noarg = bs(l=4, cls=(arm_gpreg_nopc,), fname="rn") @@ -1783,22 +1794,22 @@ gpregs_sppc = reg_info(regs_str[-1:] + regs_str[13:14], regs_expr[-1:] + regs_expr[13:14]) deref_reg_imm = Group(LBRACK + gpregs.parser + Optional( - COMMA + shift_off) + RBRACK).setParseAction(deref2expr_pre_mem) + COMMA + shift_off) + RBRACK).setParseAction(cb_deref_pre_mem) deref_low = Group(LBRACK + gpregs_l.parser + Optional( - COMMA + shift_off) + RBRACK).setParseAction(deref2expr_pre_mem) + COMMA + shift_off) + RBRACK).setParseAction(cb_deref_pre_mem) deref_pc = Group(LBRACK + gpregs_pc.parser + Optional( - COMMA + shift_off) + RBRACK).setParseAction(deref2expr_pre_mem) + COMMA + shift_off) + RBRACK).setParseAction(cb_deref_pre_mem) deref_sp = Group(LBRACK + gpregs_sp.parser + COMMA + - shift_off + RBRACK).setParseAction(deref2expr_pre_mem) + shift_off + RBRACK).setParseAction(cb_deref_pre_mem) gpregs_l_wb = Group( - gpregs_l.parser + Optional('!')).setParseAction(parsegpreg_wb) + gpregs_l.parser + Optional('!')).setParseAction(cb_gpreb_wb) gpregs_l_13 = reg_info(regs_str[:13], regs_expr[:13]) -class arm_offreg(m_arg): +class arm_offreg(arm_arg): parser = deref_pc def decodeval(self, v): @@ -1909,7 +1920,7 @@ class arm_off7(arm_imm): def encodeval(self, v): return v >> 2 -class arm_deref_reg_imm(m_arg): +class arm_deref_reg_imm(arm_arg): parser = deref_reg_imm def decode(self, v): @@ -2010,7 +2021,7 @@ class arm_offh(imm_noarg): return True -class armt_rlist(m_arg): +class armt_rlist(arm_arg): parser = gpreg_list def encode(self): @@ -2366,7 +2377,7 @@ armtop("sxth", [bs('10110010'), bs('00'), rml, rdl], [rdl, rml]) # # ARM Architecture Reference Manual Thumb-2 Supplement -armt_gpreg_shift_off = (gpregs_nosppc.parser + allshifts_t_armt + (gpregs.parser | int_1_31)).setParseAction(shift2expr) +armt_gpreg_shift_off = (gpregs_nosppc.parser + allshifts_t_armt + (gpregs.parser | int_1_31)).setParseAction(cb_shift) armt_gpreg_shift_off |= gpregs_nosppc.parser @@ -2783,7 +2794,7 @@ aif_expr = [ExprId(x, 32) if x != None else None for x in aif_str] aif_reg = reg_info(aif_str, aif_expr) -class armt_aif(reg_noarg, m_arg): +class armt_aif(reg_noarg, arm_arg): reg_info = aif_reg parser = reg_info.parser @@ -2798,14 +2809,14 @@ class armt_aif(reg_noarg, m_arg): return ret return self.value != 0 - def fromstring(self, text, parser_result=None): - start, stop = super(armt_aif, self).fromstring(text, parser_result) + def fromstring(self, text, symbol_pool, parser_result=None): + start, stop = super(armt_aif, self).fromstring(text, symbol_pool, parser_result) if self.expr.name == "X": return None, None return start, stop -class armt_it_arg(m_arg): +class armt_it_arg(arm_arg): arg_E = ExprId('E', 1) arg_NE = ExprId('NE', 1) @@ -2878,7 +2889,7 @@ class armt_cond_lsb(bs_divert): cond_expr = [ExprId(x, 32) for x in cond_list_full] cond_info = reg_info(cond_list_full, cond_expr) -class armt_cond_arg(m_arg): +class armt_cond_arg(arm_arg): parser = cond_info.parser def decode(self, v): @@ -2948,7 +2959,8 @@ class armt_op2imm(arm_imm8_12): # XXX default self.parent.ppi.value = 1 - self.parent.rn.fromstring(e.args[0]) + self.parent.rn.expr = e.args[0] + if len(e.args) == 1: self.value = 0 return True @@ -3033,7 +3045,7 @@ class armt_deref_reg(arm_imm8_12): return True -class armt_deref_reg_reg(m_arg): +class armt_deref_reg_reg(arm_arg): parser = deref_reg_reg reg_info = gpregs @@ -3116,7 +3128,7 @@ bs_deref_reg_reg = bs(l=4, cls=(armt_deref_reg_reg,)) bs_deref_reg_reg_lsl_1 = bs(l=4, cls=(armt_deref_reg_reg_lsl_1,)) -class armt_barrier_option(reg_noarg, m_arg): +class armt_barrier_option(reg_noarg, arm_arg): reg_info = barrier_info parser = reg_info.parser diff --git a/miasm2/arch/mips32/arch.py b/miasm2/arch/mips32/arch.py index 86e91dee..817954b6 100644 --- a/miasm2/arch/mips32/arch.py +++ b/miasm2/arch/mips32/arch.py @@ -5,11 +5,13 @@ from collections import defaultdict from pyparsing import Literal, Group, Optional -from miasm2.expression.expression import ExprMem, ExprInt, ExprId +from miasm2.expression.expression import ExprMem, ExprInt, ExprId, ExprOp from miasm2.core.bin_stream import bin_stream import miasm2.arch.mips32.regs as regs import miasm2.core.cpu as cpu +from miasm2.core.asm_ast import AstInt, AstId, AstMem, AstOp + log = logging.getLogger("mips32dis") console_handler = logging.StreamHandler() console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) @@ -20,48 +22,26 @@ log.setLevel(logging.DEBUG) gpregs = cpu.reg_info(regs.regs32_str, regs.regs32_expr) - LPARENTHESIS = Literal("(") RPARENTHESIS = Literal(")") -def deref2expr(s, l, t): - t = t[0] +def cb_deref(t): if len(t) != 4: raise NotImplementedError("TODO") + return AstMem(t[2] + t[0], 32) - return ExprMem(t[2] + t[0], 32) - -def deref2expr_nooff(s, l, t): - t = t[0] +def cb_deref_nooff(t): if len(t) != 3: raise NotImplementedError("TODO") - return ExprMem(t[1], 32) + return AstMem(t[1], 32) base_expr = cpu.base_expr -deref_off = Group(Optional(cpu.base_expr) + LPARENTHESIS + gpregs.parser + \ - RPARENTHESIS).setParseAction(deref2expr) -deref_nooff = Group(LPARENTHESIS + gpregs.parser + \ - RPARENTHESIS).setParseAction(deref2expr_nooff) +deref_off = (Optional(base_expr) + LPARENTHESIS + gpregs.parser + RPARENTHESIS).setParseAction(cb_deref) +deref_nooff = (LPARENTHESIS + gpregs.parser + RPARENTHESIS).setParseAction(cb_deref_nooff) deref = deref_off | deref_nooff -variable, operand, base_expr = cpu.gen_base_expr() - -int_or_expr = base_expr - - -def ast_id2expr(t): - return mn_mips32.regs.all_regs_ids_byname.get(t, t) - - -def ast_int2expr(a): - return ExprInt(a, 32) - - -my_var_parser = cpu.ParseAst(ast_id2expr, ast_int2expr) -base_expr.setParseAction(my_var_parser) - class additional_info: def __init__(self): self.except_on_instr = False @@ -274,8 +254,31 @@ def mips32op(name, fields, args=None, alias=False): type(name, (mn_mips32,), dct) #type(name, (mn_mips32b,), dct) - -class mips32_reg(cpu.reg_noarg, cpu.m_arg): +class mips32_arg(cpu.m_arg): + def asm_ast_to_expr(self, arg, symbol_pool): + if isinstance(arg, AstId): + if isinstance(arg.name, ExprId): + return arg.name + if arg.name in gpregs.str: + return None + label = symbol_pool.getby_name_create(arg.name) + return ExprId(label, 32) + if isinstance(arg, AstOp): + args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in arg.args] + if None in args: + return None + return ExprOp(arg.op, *args) + if isinstance(arg, AstInt): + return ExprInt(arg.value, 32) + if isinstance(arg, AstMem): + ptr = self.asm_ast_to_expr(arg.ptr, symbol_pool) + if ptr is None: + return None + return ExprMem(ptr, arg.size) + return None + + +class mips32_reg(cpu.reg_noarg, mips32_arg): pass class mips32_gpreg(mips32_reg): @@ -335,14 +338,14 @@ class mips32_soff_noarg(mips32_imm): return True -class mips32_s16imm(mips32_s16imm_noarg, cpu.m_arg): +class mips32_s16imm(mips32_s16imm_noarg, mips32_arg): pass -class mips32_soff(mips32_soff_noarg, cpu.m_arg): +class mips32_soff(mips32_soff_noarg, mips32_arg): pass -class mips32_instr_index(mips32_imm, cpu.m_arg): +class mips32_instr_index(mips32_imm, mips32_arg): def decode(self, v): v = v & self.lmask self.expr = ExprInt(v<<2, 32) @@ -361,7 +364,7 @@ class mips32_instr_index(mips32_imm, cpu.m_arg): return True -class mips32_u16imm(mips32_imm, cpu.m_arg): +class mips32_u16imm(mips32_imm, mips32_arg): def decode(self, v): v = v & self.lmask self.expr = ExprInt(v, 32) @@ -375,7 +378,7 @@ class mips32_u16imm(mips32_imm, cpu.m_arg): self.value = v return True -class mips32_dreg_imm(cpu.m_arg): +class mips32_dreg_imm(mips32_arg): parser = deref def decode(self, v): imm = self.parent.imm.expr @@ -408,7 +411,7 @@ class mips32_dreg_imm(cpu.m_arg): assert(len(arg.args) == 2 and arg.op == '+') return "%s(%s)"%(arg.args[1], arg.args[0]) -class mips32_esize(mips32_imm, cpu.m_arg): +class mips32_esize(mips32_imm, mips32_arg): def decode(self, v): v = v & self.lmask self.expr = ExprInt(v+1, 32) @@ -422,7 +425,7 @@ class mips32_esize(mips32_imm, cpu.m_arg): self.value = v return True -class mips32_eposh(mips32_imm, cpu.m_arg): +class mips32_eposh(mips32_imm, mips32_arg): def decode(self, v): self.expr = ExprInt(v-int(self.parent.epos.expr)+1, 32) return True @@ -437,7 +440,7 @@ class mips32_eposh(mips32_imm, cpu.m_arg): -class mips32_cpr(cpu.m_arg): +class mips32_cpr(mips32_arg): parser = regs.regs_cpr0_info.parser def decode(self, v): index = int(self.parent.cpr0.expr) << 3 diff --git a/miasm2/arch/mips32/regs.py b/miasm2/arch/mips32/regs.py index fadf7118..7ff949f2 100644 --- a/miasm2/arch/mips32/regs.py +++ b/miasm2/arch/mips32/regs.py @@ -4,11 +4,11 @@ from miasm2.expression.expression import ExprId from miasm2.core.cpu import gen_reg, gen_regs -gen_reg('PC', globals()) -gen_reg('PC_FETCH', globals()) +PC, _ = gen_reg('PC') +PC_FETCH, _ = gen_reg('PC_FETCH') -gen_reg('R_LO', globals()) -gen_reg('R_HI', globals()) +R_LO, _ = gen_reg('R_LO') +R_HI, _ = gen_reg('R_HI') exception_flags = ExprId('exception_flags', 32) diff --git a/miasm2/arch/msp430/arch.py b/miasm2/arch/msp430/arch.py index 9728d776..e27d714e 100644 --- a/miasm2/arch/msp430/arch.py +++ b/miasm2/arch/msp430/arch.py @@ -8,6 +8,7 @@ from collections import defaultdict from miasm2.core.bin_stream import bin_stream import miasm2.arch.msp430.regs as regs_module from miasm2.arch.msp430.regs import * +from miasm2.core.asm_ast import AstInt, AstId, AstMem, AstOp log = logging.getLogger("msp430dis") console_handler = logging.StreamHandler() @@ -19,49 +20,30 @@ conditional_branch = ['jnz', 'jz', 'jnc', 'jc', 'jn', 'jge', 'jl'] unconditional_branch = ['jmp'] -def deref2expr_nooff(s, l, t): - t = t[0] - if len(t) == 1 and isinstance(t[0], ExprId): - return ExprMem(t[0], 16) - elif len(t) == 1 and isinstance(t[0], ExprInt): - return ExprMem(t[0], 16) - raise NotImplementedError('not fully functional') +def cb_deref_nooff(t): + assert len(t) == 1 + result = AstMem(t[0], 16) + return result -def deref2expr_pinc(s, l, t): - t = t[0] - if len(t) == 1 and isinstance(t[0], ExprId): - return ExprOp('autoinc', t[0]) - raise NotImplementedError('not fully functional') +def cb_deref_pinc(t): + assert len(t) == 1 + result = AstOp('autoinc', *t) + return result -def deref2expr_off(s, l, t): - t = t[0] - if len(t) == 2 and isinstance(t[1], ExprId): - return ExprMem(t[1] + t[0], 16) - raise NotImplementedError('not fully functional') +def cb_deref_off(t): + assert len(t) == 2 + result = AstMem(t[1] + t[0], 16) + return result -def deref_expr(s, l, t): - t = t[0] - assert(len(t) == 1) - t = t[0] - if isinstance(t, ExprId) or \ - isinstance(t, ExprInt) or \ - isinstance(t, ExprMem) or \ - (isinstance(t, ExprOp) and t.op == "autoinc"): - return t - - raise NotImplementedError('not fully functional') +def cb_expr(t): + assert(len(t) == 1) + result = t[0] + return result -def f_reg2expr(t): - t = t[0] - i = regs16_str.index(t) - r = regs16_expr[i] - return r - -# gpregs.parser.setParseAction(f_reg2expr) ARO = Suppress("@") LPARENT = Suppress("(") @@ -69,29 +51,39 @@ RPARENT = Suppress(")") PINC = Suppress("+") - -def ast_id2expr(t): - return mn_msp430.regs.all_regs_ids_byname.get(t, t) - - -def ast_int2expr(a): - return ExprInt(a, 16) - - -variable, operand, base_expr = gen_base_expr() - -my_var_parser = ParseAst(ast_id2expr, ast_int2expr) -base_expr.setParseAction(my_var_parser) - - -deref_nooff = Group(ARO + base_expr).setParseAction(deref2expr_nooff) -deref_pinc = Group(ARO + base_expr + PINC).setParseAction(deref2expr_pinc) -deref_off = Group(base_expr + LPARENT + - gpregs.parser + RPARENT).setParseAction(deref2expr_off) - - -sreg_p = Group(deref_pinc | deref_nooff | - deref_off | base_expr).setParseAction(deref_expr) +deref_nooff = (ARO + base_expr).setParseAction(cb_deref_nooff) +deref_pinc = (ARO + base_expr + PINC).setParseAction(cb_deref_pinc) +deref_off = (base_expr + LPARENT + gpregs.parser + RPARENT).setParseAction(cb_deref_off) +sreg_p = (deref_pinc | deref_nooff | deref_off | base_expr).setParseAction(cb_expr) + + + +class msp430_arg(m_arg): + def asm_ast_to_expr(self, value, symbol_pool): + if isinstance(value, AstId): + name = value.name + if isinstance(name, Expr): + return name + assert isinstance(name, str) + if name in gpregs.str: + index = gpregs.str.index(name) + reg = gpregs.expr[index] + return reg + label = symbol_pool.getby_name_create(value.name) + return ExprId(label, 16) + if isinstance(value, AstOp): + args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in value.args] + if None in args: + return None + return ExprOp(value.op, *args) + if isinstance(value, AstInt): + return ExprInt(value.value, 16) + if isinstance(value, AstMem): + ptr = self.asm_ast_to_expr(value.ptr, symbol_pool) + if ptr is None: + return None + return ExprMem(ptr, value.size) + return None class additional_info: @@ -310,7 +302,7 @@ class bw_mn(bs_mod_name): mn_mod = ['.w', '.b'] -class msp430_sreg_arg(reg_noarg, m_arg): +class msp430_sreg_arg(reg_noarg, msp430_arg): prio = default_prio + 1 reg_info = gpregs parser = sreg_p @@ -512,7 +504,7 @@ class bs_cond_off_d(bs_cond_off_s): raise NotImplementedError("unknown value v[a_d] = %d" % v['a_d']) -class msp430_offs(imm_noarg, m_arg): +class msp430_offs(imm_noarg, msp430_arg): parser = base_expr def int2expr(self, v): diff --git a/miasm2/arch/ppc/arch.py b/miasm2/arch/ppc/arch.py index 672c0c6a..d6a2b912 100644 --- a/miasm2/arch/ppc/arch.py +++ b/miasm2/arch/ppc/arch.py @@ -8,7 +8,7 @@ from miasm2.core.bin_stream import bin_stream from miasm2.core.asmblock import asm_label import miasm2.arch.ppc.regs as regs_module from miasm2.arch.ppc.regs import * -from pdb import pm +from miasm2.core.asm_ast import AstInt, AstId, AstMem, AstOp log = logging.getLogger("ppcdis") console_handler = logging.StreamHandler() @@ -19,37 +19,43 @@ log.setLevel(logging.DEBUG) LPARENTHESIS = Suppress(Literal("(")) RPARENTHESIS = Suppress(Literal(")")) -def deref2expr_imm_reg(s, l, t): - t = t[0] +def cb_deref_imm_reg(t): if len(t) == 1: - return ExprMem(t[0]) + return AstMem(t[0], 32) elif len(t) == 2: - return ExprMem(t[1] + t[0]) + return AstMem(t[1] + t[0], 32) else: raise NotImplementedError('len(t) > 2') -variable, operand, base_expr = gen_base_expr() -int_or_expr = base_expr - - -def ast_id2expr(t): - if not t in mn_ppc.regs.all_regs_ids_byname: - r = ExprId(asm_label(t)) - else: - r = mn_ppc.regs.all_regs_ids_byname[t] - return r - -def ast_int2expr(a): - return ExprInt(a, 32) - -deref_reg_disp = Group(Optional(int_or_expr) + LPARENTHESIS + gpregs.parser + RPARENTHESIS).setParseAction(deref2expr_imm_reg) -deref_reg = Group(LPARENTHESIS + gpregs.parser + RPARENTHESIS).setParseAction(deref2expr_imm_reg) +deref_reg_disp = (Optional(base_expr) + LPARENTHESIS + gpregs.parser + RPARENTHESIS).setParseAction(cb_deref_imm_reg) +deref_reg = (LPARENTHESIS + gpregs.parser + RPARENTHESIS).setParseAction(cb_deref_imm_reg) deref = deref_reg | deref_reg_disp -my_var_parser = ParseAst(ast_id2expr, ast_int2expr) -base_expr.setParseAction(my_var_parser) + +class ppc_arg(m_arg): + def asm_ast_to_expr(self, arg, symbol_pool): + if isinstance(arg, AstId): + if isinstance(arg.name, ExprId): + return arg.name + if arg.name in gpregs.str: + return None + label = symbol_pool.getby_name_create(arg.name) + return ExprId(label, 32) + if isinstance(arg, AstOp): + args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in arg.args] + if None in args: + return None + return ExprOp(arg.op, *args) + if isinstance(arg, AstInt): + return ExprInt(arg.value, 32) + if isinstance(arg, AstMem): + ptr = self.asm_ast_to_expr(arg.ptr, symbol_pool) + if ptr is None: + return None + return ExprMem(ptr, arg.size) + return None class additional_info: @@ -278,7 +284,7 @@ class mn_ppc(cls_mn): return 32 -class ppc_reg(reg_noarg, m_arg): +class ppc_reg(reg_noarg, ppc_arg): pass @@ -324,7 +330,7 @@ class ppc_crfreg(ppc_reg): reg_info = crfregs parser = reg_info.parser -class ppc_imm(imm_noarg, m_arg): +class ppc_imm(imm_noarg, ppc_arg): parser = base_expr class ppc_s14imm_branch(ppc_imm): @@ -491,7 +497,7 @@ class ppc_divert_conditional_branch(bs_divert): return out -class ppc_deref32(m_arg): +class ppc_deref32(ppc_arg): parser = deref def decode(self, v): diff --git a/miasm2/arch/sh4/arch.py b/miasm2/arch/sh4/arch.py index 6d43bd3e..59e363f6 100644 --- a/miasm2/arch/sh4/arch.py +++ b/miasm2/arch/sh4/arch.py @@ -7,6 +7,9 @@ from collections import defaultdict import miasm2.arch.sh4.regs as regs_module from miasm2.arch.sh4.regs import * + +from miasm2.core.asm_ast import AstInt, AstId, AstMem, AstOp + jra = ExprId('jra', 32) jrb = ExprId('jrb', 32) jrc = ExprId('jrc', 32) @@ -25,99 +28,112 @@ LPARENT = Suppress("(") RPARENT = Suppress(")") -def parse_deref_pcimm(t): - t = t[0] +def cb_deref_pcimm(t): return t[0] + t[1] -def parse_pcandimmimm(t): - t = t[0] +def cb_pcandimmimm(t): return (t[0] & t[1]) + t[2] -def ast_id2expr(t): - return mn_sh4.regs.all_regs_ids_byname.get(t, t) - -def ast_int2expr(a): - return ExprInt(a, 32) - - -my_var_parser = ParseAst(ast_id2expr, ast_int2expr) -base_expr.setParseAction(my_var_parser) -int_or_expr = base_expr -ref_pc = Group(LPARENT + regi_pc.parser + COMMA + - int_or_expr + RPARENT).setParseAction(parse_deref_pcimm) -ref_pcandimm = Group( - LPARENT + regi_pc.parser + AND + int_or_expr + - COMMA + int_or_expr + RPARENT).setParseAction(parse_pcandimmimm) - - -pcdisp = Group(regi_pc.parser + AND + int_or_expr + - PLUS + int_or_expr).setParseAction(parse_pcandimmimm) +ref_pc = (LPARENT + reg_info_pc.parser + COMMA + base_expr + RPARENT).setParseAction(cb_deref_pcimm) +ref_pcandimm = (LPARENT + reg_info_pc.parser + AND + base_expr + COMMA + base_expr + RPARENT).setParseAction(cb_pcandimmimm) +pcdisp = (reg_info_pc.parser + AND + base_expr + PLUS + base_expr).setParseAction(cb_pcandimmimm) PTR = Suppress('PTR') -def parse_deref_mem(s, l, t): - t = t[0] - e = ExprMem(t[0], 32) - return e +def cb_deref_mem(t): + assert len(t) == 1 + result = AstMem(t[0], 32) + return result -def parse_predec(s, l, t): - t = t[0] - e = ExprMem(ExprOp('predec', t[0]), 32) - return e +def cb_predec(t): + assert len(t) == 1 + result = AstMem(AstOp('predec', t[0]), 32) + return result -def parse_postinc(s, l, t): - t = t[0] - e = ExprMem(ExprOp('postinc', t[0]), 32) - return e +def cb_postinc(t): + assert len(t) == 1 + result = AstMem(AstOp('postinc', t[0]), 32) + return result -def parse_regdisp(t): - t = t[0] - e = ExprMem(t[0] + t[1], 32) - return e +def cb_regdisp(t): + assert len(t) == 2 + result = AstMem(t[0] + t[1], 32) + return result -def parse_regreg(t): - t = t[0] - e = ExprMem(t[0] + t[1], 32) - return e +def cb_regreg(t): + assert len(t) == 2 + result = AstMem(t[0] + t[1], 32) + return result -deref_pc = Group(DEREF + ref_pc).setParseAction(parse_deref_mem) -deref_pcimm = Group(DEREF + ref_pcandimm).setParseAction(parse_deref_mem) +deref_pc = (DEREF + ref_pc).setParseAction(cb_deref_mem) +deref_pcimm = (DEREF + ref_pcandimm).setParseAction(cb_deref_mem) -dgpregs_base = Group(DEREF + gpregs.parser).setParseAction(parse_deref_mem) -dgpregs_predec = Group( - DEREF + MINUS + gpregs.parser).setParseAction(parse_predec) -dgpregs_postinc = Group( - DEREF + gpregs.parser + PLUS).setParseAction(parse_postinc) +dgpregs_base = (DEREF + gpregs.parser).setParseAction(cb_deref_mem) +dgpregs_predec = (DEREF + MINUS + gpregs.parser).setParseAction(cb_predec) +dgpregs_postinc = (DEREF + gpregs.parser + PLUS).setParseAction(cb_postinc) dgpregs = dgpregs_base | dgpregs_predec | dgpregs_postinc -d_gpreg_gpreg = Group(DEREF + - LPARENT + gpregs.parser + COMMA + gpregs.parser + RPARENT - ).setParseAction(parse_regdisp) +d_gpreg_gpreg = (DEREF + LPARENT + gpregs.parser + COMMA + gpregs.parser + RPARENT).setParseAction(cb_regdisp) dgpregs_p = dgpregs_predec | dgpregs_postinc -dgpregs_ir = Group(DEREF + LPARENT + gpregs.parser + - COMMA + int_or_expr + RPARENT).setParseAction(parse_regdisp) +dgpregs_ir = (DEREF + LPARENT + gpregs.parser + COMMA + base_expr + RPARENT).setParseAction(cb_regdisp) dgpregs_ir |= d_gpreg_gpreg -dgbr_imm = Group(DEREF + LPARENT + regi_gbr.parser + - COMMA + int_or_expr + RPARENT).setParseAction(parse_regdisp) - -dgbr_reg = Group(DEREF + LPARENT + regi_gbr.parser + - COMMA + gpregs.parser + RPARENT).setParseAction(parse_regreg) - - -class sh4_reg(reg_noarg, m_arg): +dgbr_imm = (DEREF + LPARENT + reg_info_gbr.parser + COMMA + base_expr + RPARENT).setParseAction(cb_regdisp) + +dgbr_reg = (DEREF + LPARENT + reg_info_gbr.parser + COMMA + gpregs.parser + RPARENT).setParseAction(cb_regreg) + + +class sh4_arg(m_arg): + def asm_ast_to_expr(self, arg, symbol_pool): + if isinstance(arg, AstId): + if isinstance(arg.name, ExprId): + return arg.name + if arg.name in gpregs.str: + return None + label = symbol_pool.getby_name_create(arg.name) + return ExprId(label, 32) + if isinstance(arg, AstOp): + args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in arg.args] + if None in args: + return None + return ExprOp(arg.op, *args) + if isinstance(arg, AstInt): + return ExprInt(arg.value, 32) + if isinstance(arg, AstMem): + ptr = self.asm_ast_to_expr(arg.ptr, symbol_pool) + if ptr is None: + return None + return ExprMem(ptr, arg.size) + return None + + +_, bs_pr = gen_reg_bs('PR', reg_info_pr, (m_reg, sh4_arg,)) +_, bs_r0 = gen_reg_bs('R0', reg_info_r0, (m_reg, sh4_arg,)) +_, bs_sr = gen_reg_bs('SR', reg_info_sr, (m_reg, sh4_arg,)) +_, bs_gbr = gen_reg_bs('GBR', reg_info_gbr, (m_reg, sh4_arg,)) +_, bs_vbr = gen_reg_bs('VBR', reg_info_vbr, (m_reg, sh4_arg,)) +_, bs_ssr = gen_reg_bs('SSR', reg_info_ssr, (m_reg, sh4_arg,)) +_, bs_spc = gen_reg_bs('SPC', reg_info_spc, (m_reg, sh4_arg,)) +_, bs_sgr = gen_reg_bs('SGR', reg_info_sgr, (m_reg, sh4_arg,)) +_, bs_dbr = gen_reg_bs('dbr', reg_info_dbr, (m_reg, sh4_arg,)) +_, bs_mach = gen_reg_bs('mach', reg_info_mach, (m_reg, sh4_arg,)) +_, bs_macl = gen_reg_bs('macl', reg_info_macl, (m_reg, sh4_arg,)) +_, bs_fpul = gen_reg_bs('fpul', reg_info_fpul, (m_reg, sh4_arg,)) +_, bs_fr0 = gen_reg_bs('fr0', reg_info_fr0, (m_reg, sh4_arg,)) + +class sh4_reg(reg_noarg, sh4_arg): pass @@ -146,12 +162,12 @@ class sh4_freg(sh4_reg): parser = reg_info.parser -class sh4_dgpreg(m_arg): +class sh4_dgpreg(sh4_arg): parser = dgpregs_base - def fromstring(self, text, parser_result=None): - start, stop = super(sh4_dgpreg, self).fromstring(text, parser_result) - if start is None: + def fromstring(self, text, symbol_pool, parser_result=None): + start, stop = super(sh4_dgpreg, self).fromstring(text, symbol_pool, parser_result) + if start is None or self.expr == [None]: return start, stop self.expr = ExprMem(self.expr.arg, self.sz) return start, stop @@ -172,12 +188,12 @@ class sh4_dgpreg(m_arg): return True -class sh4_dgpregpinc(m_arg): +class sh4_dgpregpinc(sh4_arg): parser = dgpregs_p - def fromstring(self, text, parser_result=None): - start, stop = super(sh4_dgpregpinc, self).fromstring(text, parser_result) - if self.expr is None: + def fromstring(self, text, symbol_pool, parser_result=None): + start, stop = super(sh4_dgpregpinc, self).fromstring(text, symbol_pool, parser_result) + if self.expr == [None]: return None, None if not isinstance(self.expr.arg, ExprOp): return None, None @@ -207,7 +223,7 @@ class sh4_dgpregpinc(m_arg): return True -class sh4_dgpregpdec(m_arg): +class sh4_dgpregpdec(sh4_arg): parser = dgpregs_postinc op = "preinc" @@ -252,7 +268,7 @@ class sh4_dgpreg_imm(sh4_dgpreg): return True -class sh4_imm(imm_noarg, m_arg): +class sh4_imm(imm_noarg, sh4_arg): parser = base_expr pass @@ -354,7 +370,7 @@ class sh4_dpc32imm(sh4_dpc16imm): return True -class sh4_pc32imm(m_arg): +class sh4_pc32imm(sh4_arg): parser = pcdisp def decode(self, v): @@ -666,11 +682,11 @@ addop("mov_w", [bs('0110', fname="opc"), rn, d16rmpinc, bs('0101')], [d16rmpinc, rn]) addop("mov_l", [bs('0110', fname="opc"), rn, d32rmpinc, bs('0110')], [d32rmpinc, rn]) -addop("mov_b", [bs('10000000', fname='opc'), bsr0, d08rnimm, dimm4]) -addop("mov_w", [bs('10000001', fname='opc'), bsr0, d16rnimm, dimm4]) +addop("mov_b", [bs('10000000', fname='opc'), bs_r0, d08rnimm, dimm4]) +addop("mov_w", [bs('10000001', fname='opc'), bs_r0, d16rnimm, dimm4]) addop("mov_l", [bs('0001', fname='opc'), d32rnimm, rm, dimm4], [rm, d32rnimm]) -addop("mov_b", [bs('10000100', fname='opc'), d08rmimm, dimm4, bsr0]) -addop("mov_w", [bs('10000101', fname='opc'), d16rmimm, dimm4, bsr0]) +addop("mov_b", [bs('10000100', fname='opc'), d08rmimm, dimm4, bs_r0]) +addop("mov_w", [bs('10000101', fname='opc'), d16rmimm, dimm4, bs_r0]) addop("mov_l", [bs('0101', fname='opc'), rn, d32rmimm, dimm4], [d32rmimm, rn]) addop("mov_b", [bs('0000', fname='opc'), bd08r0gp, rm, bs('0100')], [rm, bd08r0gp]) @@ -685,15 +701,15 @@ addop("mov_w", addop("mov_l", [bs('0000', fname='opc'), rn, bd32r0gp, bs('1110')], [bd32r0gp, rn]) -addop("mov_b", [bs('11000000'), bsr0, d08gbrimm8]) -addop("mov_w", [bs('11000001'), bsr0, d16gbrimm8]) -addop("mov_l", [bs('11000010'), bsr0, d32gbrimm8]) +addop("mov_b", [bs('11000000'), bs_r0, d08gbrimm8]) +addop("mov_w", [bs('11000001'), bs_r0, d16gbrimm8]) +addop("mov_l", [bs('11000010'), bs_r0, d32gbrimm8]) -addop("mov_b", [bs('11000100'), d08gbrimm8, bsr0]) -addop("mov_w", [bs('11000101'), d16gbrimm8, bsr0]) -addop("mov_l", [bs('11000110'), d32gbrimm8, bsr0]) +addop("mov_b", [bs('11000100'), d08gbrimm8, bs_r0]) +addop("mov_w", [bs('11000101'), d16gbrimm8, bs_r0]) +addop("mov_l", [bs('11000110'), d32gbrimm8, bs_r0]) -addop("mov", [bs('11000111'), pc32imm, bsr0]) +addop("mov", [bs('11000111'), pc32imm, bs_r0]) addop("swapb", [bs('0110'), rn, rm, bs('1000')], [rm, rn]) addop("swapw", [bs('0110'), rn, rm, bs('1001')], [rm, rn]) @@ -706,7 +722,7 @@ addop("addc", [bs('0011'), rn, rm, bs('1110')], [rm, rn]) addop("addv", [bs('0011'), rn, rm, bs('1111')], [rm, rn]) -addop("cmpeq", [bs('10001000'), s08imm, bsr0]) +addop("cmpeq", [bs('10001000'), s08imm, bs_r0]) addop("cmpeq", [bs('0011'), rn, rm, bs('0000')], [rm, rn]) @@ -754,24 +770,24 @@ addop("subc", [bs('0011'), rn, rm, bs('1010')], [rm, rn]) addop("subv", [bs('0011'), rn, rm, bs('1011')], [rm, rn]) addop("and", [bs('0010'), rn, rm, bs('1001')], [rm, rn]) -addop("and", [bs('11001001'), u08imm, bsr0]) +addop("and", [bs('11001001'), u08imm, bs_r0]) addop("and_b", [bs('11001101'), u08imm, dr0gbr]) addop("not", [bs('0110'), rn, rm, bs('0111')], [rm, rn]) addop("or", [bs('0010'), rn, rm, bs('1011')], [rm, rn]) -addop("or", [bs('11001011'), u08imm, bsr0]) +addop("or", [bs('11001011'), u08imm, bs_r0]) addop("or_b", [bs('11001111'), u08imm, dr0gbr]) addop("tas_b", [bs('0100'), d08gpreg, bs('00011011')]) addop("tst", [bs('0010'), rn, rm, bs('1000')], [rm, rn]) -addop("tst", [bs('11001000'), u08imm, bsr0]) +addop("tst", [bs('11001000'), u08imm, bs_r0]) addop("tst_b", [bs('11001100'), u08imm, dr0gbr]) addop("xor", [bs('0010'), rn, rm, bs('1010')], [rm, rn]) -addop("xor", [bs('11001010'), u08imm, bsr0]) +addop("xor", [bs('11001010'), u08imm, bs_r0]) addop("xor_b", [bs('11001110'), u08imm, dr0gbr]) addop("rotl", [bs('0100'), rn, bs('00000100')]) @@ -883,29 +899,29 @@ addop("clrs", [bs('0000000001001000')]) addop("clrt", [bs('0000000000001000')]) -addop("ldc", [bs('0100'), rm, bssr, bs('00001110')]) -addop("ldc", [bs('0100'), rm, bsgbr, bs('00011110')]) -addop("ldc", [bs('0100'), rm, bsvbr, bs('00101110')]) -addop("ldc", [bs('0100'), rm, bsssr, bs('00111110')]) -addop("ldc", [bs('0100'), rm, bsspc, bs('01001110')]) -addop("ldc", [bs('0100'), rm, bsdbr, bs('11111010')]) +addop("ldc", [bs('0100'), rm, bs_sr, bs('00001110')]) +addop("ldc", [bs('0100'), rm, bs_gbr, bs('00011110')]) +addop("ldc", [bs('0100'), rm, bs_vbr, bs('00101110')]) +addop("ldc", [bs('0100'), rm, bs_ssr, bs('00111110')]) +addop("ldc", [bs('0100'), rm, bs_spc, bs('01001110')]) +addop("ldc", [bs('0100'), rm, bs_dbr, bs('11111010')]) addop("ldc", [bs('0100'), rm, bs('1'), brn, bs('1110')], [rm, brn]) -addop("ldc_l", [bs('0100'), d32rmpinc, bssr, bs('00000111')]) -addop("ldc_l", [bs('0100'), d32rmpinc, bsgbr, bs('00010111')]) -addop("ldc_l", [bs('0100'), d32rmpinc, bsvbr, bs('00100111')]) -addop("ldc_l", [bs('0100'), d32rmpinc, bsssr, bs('00110111')]) -addop("ldc_l", [bs('0100'), d32rmpinc, bsspc, bs('01000111')]) -addop("ldc_l", [bs('0100'), d32rmpinc, bsdbr, bs('11110110')]) +addop("ldc_l", [bs('0100'), d32rmpinc, bs_sr, bs('00000111')]) +addop("ldc_l", [bs('0100'), d32rmpinc, bs_gbr, bs('00010111')]) +addop("ldc_l", [bs('0100'), d32rmpinc, bs_vbr, bs('00100111')]) +addop("ldc_l", [bs('0100'), d32rmpinc, bs_ssr, bs('00110111')]) +addop("ldc_l", [bs('0100'), d32rmpinc, bs_spc, bs('01000111')]) +addop("ldc_l", [bs('0100'), d32rmpinc, bs_dbr, bs('11110110')]) addop("ldc_l", [bs('0100'), d32rmpinc, bs('1'), brn, bs('0111')]) -addop("lds", [bs('0100'), rm, bsmach, bs('00001010')]) -addop("lds", [bs('0100'), rm, bsmacl, bs('00011010')]) -addop("lds", [bs('0100'), rm, bspr, bs('00101010')]) -addop("lds_l", [bs('0100'), d32rmpinc, bsmach, bs('00000110')]) -addop("lds_l", [bs('0100'), d32rmpinc, bsmacl, bs('00010110')]) -addop("lds_l", [bs('0100'), d32rmpinc, bspr, bs('00100110')]) +addop("lds", [bs('0100'), rm, bs_mach, bs('00001010')]) +addop("lds", [bs('0100'), rm, bs_macl, bs('00011010')]) +addop("lds", [bs('0100'), rm, bs_pr, bs('00101010')]) +addop("lds_l", [bs('0100'), d32rmpinc, bs_mach, bs('00000110')]) +addop("lds_l", [bs('0100'), d32rmpinc, bs_macl, bs('00010110')]) +addop("lds_l", [bs('0100'), d32rmpinc, bs_pr, bs('00100110')]) addop("ldtlb", [bs('0000000000111000')]) -addop("movca_l", [bs('0000'), bsr0, d32gpreg, bs('11000011')]) +addop("movca_l", [bs('0000'), bs_r0, d32gpreg, bs('11000011')]) addop("nop", [bs('0000000000001001')]) addop("ocbi_l", [bs('0000'), d32gpreg, bs('10010011')]) addop("ocbp_l", [bs('0000'), d32gpreg, bs('10100011')]) @@ -917,33 +933,33 @@ addop("rte", [bs('0000000000101011')]) addop("sets", [bs('0000000001011000')]) addop("sett", [bs('0000000000011000')]) addop("sleep", [bs('0000000000011011')]) -addop("stc", [bs('0000'), bssr, rn, bs('00000010')]) -addop("stc", [bs('0000'), bsgbr, rn, bs('00010010')]) -addop("stc", [bs('0000'), bsvbr, rn, bs('00100010')]) -addop("stc", [bs('0000'), bsssr, rn, bs('00110010')]) -addop("stc", [bs('0000'), bsspc, rn, bs('01000010')]) -addop("stc", [bs('0000'), bssgr, rn, bs('00111010')]) -addop("stc", [bs('0000'), bsdbr, rn, bs('11111010')]) +addop("stc", [bs('0000'), bs_sr, rn, bs('00000010')]) +addop("stc", [bs('0000'), bs_gbr, rn, bs('00010010')]) +addop("stc", [bs('0000'), bs_vbr, rn, bs('00100010')]) +addop("stc", [bs('0000'), bs_ssr, rn, bs('00110010')]) +addop("stc", [bs('0000'), bs_spc, rn, bs('01000010')]) +addop("stc", [bs('0000'), bs_sgr, rn, bs('00111010')]) +addop("stc", [bs('0000'), bs_dbr, rn, bs('11111010')]) addop("stc", [bs('0000'), rn, bs('1'), brm, bs('0010')], [brm, rn]) -addop("stc_l", [bs('0100'), bssr, d32rmpdec, bs('00000011')]) -addop("stc_l", [bs('0100'), bsgbr, d32rmpdec, bs('00010011')]) -addop("stc_l", [bs('0100'), bsvbr, d32rmpdec, bs('00100011')]) -addop("stc_l", [bs('0100'), bsssr, d32rmpdec, bs('00110011')]) -addop("stc_l", [bs('0100'), bsspc, d32rmpdec, bs('01000011')]) -addop("stc_l", [bs('0100'), bssgr, d32rmpdec, bs('00110010')]) -addop("stc_l", [bs('0100'), bsdbr, d32rmpdec, bs('11110010')]) +addop("stc_l", [bs('0100'), bs_sr, d32rmpdec, bs('00000011')]) +addop("stc_l", [bs('0100'), bs_gbr, d32rmpdec, bs('00010011')]) +addop("stc_l", [bs('0100'), bs_vbr, d32rmpdec, bs('00100011')]) +addop("stc_l", [bs('0100'), bs_ssr, d32rmpdec, bs('00110011')]) +addop("stc_l", [bs('0100'), bs_spc, d32rmpdec, bs('01000011')]) +addop("stc_l", [bs('0100'), bs_sgr, d32rmpdec, bs('00110010')]) +addop("stc_l", [bs('0100'), bs_dbr, d32rmpdec, bs('11110010')]) addop("stc_l", [bs('0100'), d32rnpdec, bs('1'), brm, bs('0011')], [brm, d32rnpdec]) # float -addop("sts", [bs('0000'), bsmach, rm, bs('00001010')]) -addop("sts", [bs('0000'), bsmacl, rm, bs('00011010')]) -addop("sts", [bs('0000'), bspr, rm, bs('00101010')]) -addop("sts_l", [bs('0100'), bsmach, d32rmpdec, bs('00000010')]) -addop("sts_l", [bs('0100'), bsmacl, d32rmpdec, bs('00010010')]) +addop("sts", [bs('0000'), bs_mach, rm, bs('00001010')]) +addop("sts", [bs('0000'), bs_macl, rm, bs('00011010')]) +addop("sts", [bs('0000'), bs_pr, rm, bs('00101010')]) +addop("sts_l", [bs('0100'), bs_mach, d32rmpdec, bs('00000010')]) +addop("sts_l", [bs('0100'), bs_macl, d32rmpdec, bs('00010010')]) addop("sts_l", - [bs('0100'), d32rnpdec, bspr, bs('00100010')], [bspr, d32rnpdec]) + [bs('0100'), d32rnpdec, bs_pr, bs('00100010')], [bs_pr, d32rnpdec]) addop("trapa", [bs('11000011'), u08imm]) addop("fldi0", [bs('1111'), frn, bs('10001101')]) @@ -956,18 +972,18 @@ addop("fmov_s", [bs('1111'), d32gpreg, frm, bs('1010')], [frm, d32gpreg]) addop("fmov_s", [bs('1111'), d32rnpdec, frm, bs('1011')], [frm, d32rnpdec]) addop("fmov_s", [bs('1111'), bd32r0gp, frm, bs('0111')], [frm, bd32r0gp]) -addop("flds", [bs('1111'), frm, bsfpul, bs('00011101')]) -addop("fsts", [bs('1111'), bsfpul, frm, bs('00001101')]) +addop("flds", [bs('1111'), frm, bs_fpul, bs('00011101')]) +addop("fsts", [bs('1111'), bs_fpul, frm, bs('00001101')]) addop("fabs", [bs('1111'), frn, bs('01011101')]) addop("fadd", [bs('1111'), frn, frm, bs('0000')], [frm, frn]) addop("fcmpeq", [bs('1111'), frn, frm, bs('0100')], [frm, frn]) addop("fcmpgt", [bs('1111'), frn, frm, bs('0101')], [frm, frn]) addop("fdiv", [bs('1111'), frn, frm, bs('0011')], [frm, frn]) -addop("float", [bs('1111'), bsfpul, frn, bs('00101101')]) -addop("fmac", [bs('1111'), bsfr0, frn, frm, bs('1110')], [bsfr0, frm, frn]) +addop("float", [bs('1111'), bs_fpul, frn, bs('00101101')]) +addop("fmac", [bs('1111'), bs_fr0, frn, frm, bs('1110')], [bs_fr0, frm, frn]) addop("fmul", [bs('1111'), frn, frm, bs('0010')], [frm, frn]) addop("fneg", [bs('1111'), frn, bs('01001101')]) addop("fsqrt", [bs('1111'), frn, bs('01101101')]) addop("fsub", [bs('1111'), frn, frm, bs('0001')], [frm, frn]) -addop("ftrc", [bs('1111'), frm, bsfpul, bs('00111101')]) +addop("ftrc", [bs('1111'), frm, bs_fpul, bs('00111101')]) diff --git a/miasm2/arch/sh4/regs.py b/miasm2/arch/sh4/regs.py index 89ff42f8..148e74ba 100644 --- a/miasm2/arch/sh4/regs.py +++ b/miasm2/arch/sh4/regs.py @@ -19,20 +19,20 @@ dregs_expr = [ExprId(x, 32) for x in dregs_str] dregs = reg_info(dregs_str, dregs_expr) -gen_reg('PC', globals()) -gen_reg('PR', globals()) -gen_reg('R0', globals()) -gen_reg('GBR', globals()) -gen_reg('SR', globals()) -gen_reg('VBR', globals()) -gen_reg('SSR', globals()) -gen_reg('SPC', globals()) -gen_reg('SGR', globals()) -gen_reg('DBR', globals()) -gen_reg('MACH', globals()) -gen_reg('MACL', globals()) -gen_reg('FPUL', globals()) -gen_reg('FR0', globals()) +PC, reg_info_pc = gen_reg('PC') +PR, reg_info_pr = gen_reg('PR') +R0, reg_info_r0 = gen_reg('R0') +GBR, reg_info_gbr = gen_reg('GBR') +SR, reg_info_sr = gen_reg('SR') +VBR, reg_info_vbr = gen_reg('VBR') +SSR, reg_info_ssr = gen_reg('SSR') +SPC, reg_info_spc = gen_reg('SPC') +SGR, reg_info_sgr = gen_reg('SGR') +DBR, reg_info_dbr = gen_reg('DBR') +MACH, reg_info_mach = gen_reg('MACH') +MACL, reg_info_macl = gen_reg('MACL') +FPUL, reg_info_fpul = gen_reg('FPUL') +FR0, reg_info_fr0 = gen_reg('FR0') R0 = gpregs_expr[0] R1 = gpregs_expr[1] diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py index 34a765e8..9310ce1d 100644 --- a/miasm2/arch/x86/arch.py +++ b/miasm2/arch/x86/arch.py @@ -8,6 +8,8 @@ from collections import defaultdict import miasm2.arch.x86.regs as regs_module from miasm2.arch.x86.regs import * from miasm2.core.asmblock import AsmLabel +from miasm2.core.asm_ast import AstNode, AstInt, AstId, AstMem, AstOp + log = logging.getLogger("x86_arch") console_handler = logging.StreamHandler() @@ -121,148 +123,82 @@ replace_regs = {16: replace_regs16, 64: replace_regs64} -# parser helper ########### -PLUS = Suppress("+") -MULT = Suppress("*") - -COLON = Suppress(":") - - -LBRACK = Suppress("[") -RBRACK = Suppress("]") - -dbreg = Group(gpregs16.parser | gpregs32.parser | gpregs64.parser) -gpreg = (gpregs08.parser | gpregs08_64.parser | gpregs16.parser | - gpregs32.parser | gpregs64.parser | gpregs_xmm.parser | - gpregs_mm.parser | gpregs_bnd.parser) - - -def reg2exprid(r): - if not r.name in all_regs_ids_byname: - raise ValueError('unknown reg') - return all_regs_ids_byname[r.name] - - -def parse_deref_reg(s, l, t): - t = t[0][0] - return t[0] - - -def parse_deref_int(s, l, t): - t = t[0] - return t[0] - - -def parse_deref_regint(s, l, t): - t = t[0] - r1 = reg2exprid(t[0][0]) - i1 = ExprInt(t[1].arg, r1.size) - return r1 + i1 - - -def parse_deref_regreg(s, l, t): - t = t[0] - return t[0][0] + t[1][0] - - -def parse_deref_regregint(s, l, t): - t = t[0] - r1 = reg2exprid(t[0][0]) - r2 = reg2exprid(t[1][0]) - i1 = ExprInt(t[2].arg, r1.size) - return r1 + r2 + i1 +segm2enc = {CS: 1, SS: 2, DS: 3, ES: 4, FS: 5, GS: 6} +enc2segm = dict([(x[1], x[0]) for x in segm2enc.items()]) +segm_info = reg_info_dct(enc2segm) -def parse_deref_reg_intmreg(s, l, t): - t = t[0] - r1 = reg2exprid(t[0][0]) - r2 = reg2exprid(t[1][0]) - i1 = ExprInt(t[2].arg, r1.size) - return r1 + (r2 * i1) -def parse_deref_reg_intmreg_int(s, l, t): - t = t[0] - r1 = reg2exprid(t[0][0]) - r2 = reg2exprid(t[1][0]) - i1 = ExprInt(t[2].arg, r1.size) - i2 = ExprInt(t[3].arg, r1.size) - return r1 + (r2 * i1) + i2 +enc2crx = { + 0: cr0, + 1: cr1, + 2: cr2, + 3: cr3, + 4: cr4, + 5: cr5, + 6: cr6, + 7: cr7, +} +crx_info = reg_info_dct(enc2crx) -def parse_deref_intmreg(s, l, t): - t = t[0] - r1 = reg2exprid(t[0][0]) - i1 = ExprInt(t[1].arg, r1.size) - return r1 * i1 +enc2drx = { + 0: dr0, + 1: dr1, + 2: dr2, + 3: dr3, + 4: dr4, + 5: dr5, + 6: dr6, + 7: dr7, +} -def parse_deref_intmregint(s, l, t): - t = t[0] - r1 = reg2exprid(t[0][0]) - i1 = ExprInt(t[1].arg, r1.size) - i2 = ExprInt(t[1].arg, r1.size) - return (r1 * i1) + i2 +drx_info = reg_info_dct(enc2drx) -def getreg(s, l, t): - t = t[0] - return t[0] +# parser helper ########### +PLUS = Suppress("+") +MULT = Suppress("*") -def parse_deref_ptr(s, l, t): - t = t[0] - return ExprMem(ExprOp('segm', t[0], t[1])) +COLON = Suppress(":") -def parse_deref_segmoff(s, l, t): - t = t[0] - return ExprOp('segm', t[0], t[1]) +LBRACK = Suppress("[") +RBRACK = Suppress("]") -variable, operand, base_expr = gen_base_expr() +gpreg = ( + gpregs08.parser | + gpregs08_64.parser | + gpregs16.parser | + gpregs32.parser | + gpregs64.parser | + gpregs_xmm.parser | + gpregs_mm.parser | + gpregs_bnd.parser +) -def ast_id2expr(t): - return mn_x86.regs.all_regs_ids_byname.get(t, t) -def ast_int2expr(a): - return ExprInt(a, 64) +def cb_deref_segmoff(t): + assert len(t) == 2 + return AstOp('segm', t[0], t[1]) -my_var_parser = ParseAst(ast_id2expr, ast_int2expr) -base_expr.setParseAction(my_var_parser) -int_or_expr = base_expr +def cb_deref_base_expr(t): + tokens = t[0] + assert isinstance(tokens, AstNode) + addr = tokens + return addr -deref_mem_ad = Group(LBRACK + dbreg + RBRACK).setParseAction(parse_deref_reg) -deref_mem_ad |= Group( - LBRACK + int_or_expr + RBRACK).setParseAction(parse_deref_int) -deref_mem_ad |= Group( - LBRACK + dbreg + PLUS + - int_or_expr + RBRACK).setParseAction(parse_deref_regint) -deref_mem_ad |= Group( - LBRACK + dbreg + PLUS + - dbreg + RBRACK).setParseAction(parse_deref_regreg) -deref_mem_ad |= Group( - LBRACK + dbreg + PLUS + dbreg + PLUS + - int_or_expr + RBRACK).setParseAction(parse_deref_regregint) -deref_mem_ad |= Group( - LBRACK + dbreg + PLUS + dbreg + MULT + - int_or_expr + RBRACK).setParseAction(parse_deref_reg_intmreg) -deref_mem_ad |= Group( - LBRACK + dbreg + PLUS + dbreg + MULT + int_or_expr + - PLUS + int_or_expr + RBRACK).setParseAction(parse_deref_reg_intmreg_int) -deref_mem_ad |= Group( - LBRACK + dbreg + MULT + - int_or_expr + RBRACK).setParseAction(parse_deref_intmreg) -deref_mem_ad |= Group( - LBRACK + dbreg + MULT + int_or_expr + - PLUS + int_or_expr + RBRACK).setParseAction(parse_deref_intmregint) +deref_mem_ad = (LBRACK + base_expr + RBRACK).setParseAction(cb_deref_base_expr) -deref_ptr = Group(int_or_expr + COLON + - int_or_expr).setParseAction(parse_deref_segmoff) +deref_ptr = (base_expr + COLON + base_expr).setParseAction(cb_deref_segmoff) PTR = Suppress('PTR') @@ -282,31 +218,30 @@ MEMPREFIX2SIZE = {'BYTE': 8, 'WORD': 16, 'DWORD': 32, SIZE2MEMPREFIX = dict((x[1], x[0]) for x in MEMPREFIX2SIZE.items()) -def parse_deref_mem(s, l, t): - t = t[0] +def cb_deref_mem(t): if len(t) == 2: s, ptr = t - return ExprMem(ptr, MEMPREFIX2SIZE[s[0]]) + assert isinstance(ptr, AstNode) + return AstMem(ptr, MEMPREFIX2SIZE[s]) elif len(t) == 3: s, segm, ptr = t - return ExprMem(ExprOp('segm', segm[0], ptr), MEMPREFIX2SIZE[s[0]]) - else: - raise ValueError('len(t) > 3') + return AstMem(AstOp('segm', segm, ptr), MEMPREFIX2SIZE[s]) + raise ValueError('len(t) > 3') -mem_size = Group(BYTE | DWORD | QWORD | WORD | TBYTE | XMMWORD) -deref_mem = Group(mem_size + PTR + Optional(Group(int_or_expr + COLON)) - + deref_mem_ad).setParseAction(parse_deref_mem) +mem_size = (BYTE | DWORD | QWORD | WORD | TBYTE | XMMWORD) +deref_mem = (mem_size + PTR + Optional((base_expr + COLON))+ deref_mem_ad).setParseAction(cb_deref_mem) -rmarg = Group(gpregs08.parser | - gpregs08_64.parser | - gpregs16.parser | - gpregs32.parser | - gpregs64.parser | - gpregs_mm.parser | - gpregs_xmm.parser | - gpregs_bnd.parser - ).setParseAction(getreg) +rmarg = ( + gpregs08.parser | + gpregs08_64.parser | + gpregs16.parser | + gpregs32.parser | + gpregs64.parser | + gpregs_mm.parser | + gpregs_xmm.parser | + gpregs_bnd.parser +) rmarg |= deref_mem @@ -314,36 +249,89 @@ rmarg |= deref_mem mem_far = FAR + deref_mem -cl_or_imm = Group(r08_ecx.parser).setParseAction(getreg) -cl_or_imm |= int_or_expr +cl_or_imm = r08_ecx.parser +cl_or_imm |= base_expr + + +class x86_arg(m_arg): + def asm_ast_to_expr(self, value, symbol_pool, size_hint=None, fixed_size=None): + if size_hint is None: + size_hint = self.parent.v_opmode() + if fixed_size is None: + fixed_size = set() + if isinstance(value, AstId): + if value.name in all_regs_ids_byname: + reg = all_regs_ids_byname[value.name] + fixed_size.add(reg.size) + return reg + if isinstance(value.name, ExprId): + fixed_size.add(value.name.size) + return value.name + if value.name in MEMPREFIX2SIZE: + return None + if value.name in ["FAR"]: + return None + + label = symbol_pool.getby_name_create(value.name) + return ExprId(label, size_hint) + if isinstance(value, AstOp): + # First pass to retreive fixed_size + if value.op == "segm": + segm = self.asm_ast_to_expr(value.args[0], symbol_pool) + ptr = self.asm_ast_to_expr(value.args[1], symbol_pool, None, fixed_size) + return ExprOp('segm', segm, ptr) + args = [self.asm_ast_to_expr(arg, symbol_pool, None, fixed_size) for arg in value.args] + if len(fixed_size) == 0: + # No fixed size + pass + elif len(fixed_size) == 1: + # One fixed size, regen all + size = list(fixed_size)[0] + args = [self.asm_ast_to_expr(arg, symbol_pool, size, fixed_size) for arg in value.args] + else: + raise ValueError("Size conflict") + if None in args: + return None + return ExprOp(value.op, *args) + if isinstance(value, AstInt): + if 1 << size_hint < value.value: + size_hint *= 2 + return ExprInt(value.value, size_hint) + if isinstance(value, AstMem): + fixed_size.add(value.size) + ptr = self.asm_ast_to_expr(value.ptr, symbol_pool, None, set()) + if ptr is None: + return None + return ExprMem(ptr, value.size) + return None -class r_al(reg_noarg, m_arg): +class r_al(reg_noarg, x86_arg): reg_info = r08_eax parser = reg_info.parser -class r_ax(reg_noarg, m_arg): +class r_ax(reg_noarg, x86_arg): reg_info = r16_eax parser = reg_info.parser -class r_dx(reg_noarg, m_arg): +class r_dx(reg_noarg, x86_arg): reg_info = r16_edx parser = reg_info.parser -class r_eax(reg_noarg, m_arg): +class r_eax(reg_noarg, x86_arg): reg_info = r32_eax parser = reg_info.parser -class r_rax(reg_noarg, m_arg): +class r_rax(reg_noarg, x86_arg): reg_info = r64_eax parser = reg_info.parser -class r_cl(reg_noarg, m_arg): +class r_cl(reg_noarg, x86_arg): reg_info = r08_ecx parser = reg_info.parser @@ -442,9 +430,6 @@ repeat_mn = ["INS", "OUTS", "CMPSB", "CMPSW", "CMPSD", "CMPSQ", ] -segm2enc = {CS: 1, SS: 2, DS: 3, ES: 4, FS: 5, GS: 6} -enc2segm = dict([(x[1], x[0]) for x in segm2enc.items()]) - class group: @@ -685,7 +670,7 @@ class mn_x86(cls_mn): return [(subcls, name, bases, dct, fields)] @classmethod - def fromstring(cls, text, mode): + def fromstring(cls, text, symbol_pool, mode): pref = 0 prefix, new_s = get_prefix(text) if prefix == "LOCK": @@ -697,7 +682,7 @@ class mn_x86(cls_mn): elif prefix == "REPE": pref |= 4 text = new_s - c = super(mn_x86, cls).fromstring(text, mode) + c = super(mn_x86, cls).fromstring(text, symbol_pool, mode) c.additional_info.g1.value = pref return c @@ -1224,7 +1209,7 @@ class x86_s32to64(x86_s08to32): return ExprInt(x, 64) -class bs_eax(m_arg): +class bs_eax(x86_arg): reg_info = r_eax_all rindex = 0 parser = reg_info.parser @@ -1264,7 +1249,7 @@ class bs_eax(m_arg): return False return False -class bs_seg(m_arg): +class bs_seg(x86_arg): reg_info = r_eax_all rindex = 0 parser = reg_info.parser @@ -1326,7 +1311,7 @@ class bs_gs(bs_seg): parser = reg_info.parser -class x86_reg_st(reg_noarg, m_arg): +class x86_reg_st(reg_noarg, x86_arg): reg_info = r_st_all parser = reg_info.parser @@ -1934,11 +1919,11 @@ def modrm2expr(modrm, parent, w8, sx=0, xmm=0, mm=0, bnd=0): return expr -class x86_rm_arg(m_arg): +class x86_rm_arg(x86_arg): parser = rmarg - def fromstring(self, text, parser_result=None): - start, stop = super(x86_rm_arg, self).fromstring(text, parser_result) + def fromstring(self, text, symbol_pool, parser_result=None): + start, stop = super(x86_rm_arg, self).fromstring(text, symbol_pool, parser_result) p = self.parent if start is None: return None, None @@ -2073,9 +2058,9 @@ class x86_rm_arg(m_arg): yield x class x86_rm_mem(x86_rm_arg): - def fromstring(self, text, parser_result=None): + def fromstring(self, text, symbol_pool, parser_result=None): self.expr = None - start, stop = super(x86_rm_mem, self).fromstring(text, parser_result) + start, stop = super(x86_rm_mem, self).fromstring(text, symbol_pool, parser_result) if not isinstance(self.expr, ExprMem): return None, None return start, stop @@ -2083,9 +2068,9 @@ class x86_rm_mem(x86_rm_arg): class x86_rm_mem_far(x86_rm_arg): parser = mem_far - def fromstring(self, text, parser_result=None): + def fromstring(self, text, symbol_pool, parser_result=None): self.expr = None - start, stop = super(x86_rm_mem_far, self).fromstring(text, parser_result) + start, stop = super(x86_rm_mem_far, self).fromstring(text, symbol_pool, parser_result) if not isinstance(self.expr, ExprMem): return None, None self.expr = ExprOp('far', self.expr) @@ -2455,24 +2440,28 @@ class x86_rm_reg_noarg(object): parser = gpreg - def fromstring(self, text, parser_result=None): + def fromstring(self, text, symbol_pool, parser_result=None): if not hasattr(self.parent, 'sx') and hasattr(self.parent, "w8"): self.parent.w8.value = 1 if parser_result: - e, start, stop = parser_result[self.parser] - if e is None: + result, start, stop = parser_result[self.parser] + if result == [None]: return None, None - self.expr = e + self.expr = result if self.expr.size == 8: if hasattr(self.parent, 'sx') or not hasattr(self.parent, 'w8'): return None, None self.parent.w8.value = 0 return start, stop try: - v, start, stop = self.parser.scanString(text).next() + result, start, stop = self.parser.scanString(text).next() except StopIteration: return None, None - self.expr = v[0] + expr = self.asm_ast_to_expr(result[0], symbol_pool) + if expr is None: + return None, None + + self.expr = expr if self.expr.size == 0: if hasattr(self.parent, 'sx') or not hasattr(self.parent, 'w8'): return None, None @@ -2541,7 +2530,7 @@ class x86_rm_reg_noarg(object): return True -class x86_rm_reg_mm(x86_rm_reg_noarg, m_arg): +class x86_rm_reg_mm(x86_rm_reg_noarg, x86_arg): selreg = gpregs_mm def decode(self, v): if self.parent.mode == 64 and self.getrexsize(): @@ -2571,7 +2560,7 @@ class x86_rm_reg_xmm(x86_rm_reg_mm): class x86_rm_reg_bnd(x86_rm_reg_mm): selreg = gpregs_bnd -class x86_rm_reg(x86_rm_reg_noarg, m_arg): +class x86_rm_reg(x86_rm_reg_noarg, x86_arg): pass @@ -2603,25 +2592,25 @@ class x86_reg_noarg(x86_rm_reg_noarg): self.parent.rex_b.value = v -class x86_rm_segm(reg_noarg, m_arg): +class x86_rm_segm(reg_noarg, x86_arg): prio = default_prio + 1 reg_info = segmreg parser = reg_info.parser -class x86_rm_cr(reg_noarg, m_arg): +class x86_rm_cr(reg_noarg, x86_arg): prio = default_prio + 1 reg_info = crregs parser = reg_info.parser -class x86_rm_dr(reg_noarg, m_arg): +class x86_rm_dr(reg_noarg, x86_arg): prio = default_prio + 1 reg_info = drregs parser = reg_info.parser -class x86_rm_flt(reg_noarg, m_arg): +class x86_rm_flt(reg_noarg, x86_arg): prio = default_prio + 1 reg_info = fltregs parser = reg_info.parser @@ -2634,7 +2623,7 @@ class bs_fbit(bsi): return True -class bs_cl1(bsi, m_arg): +class bs_cl1(bsi, x86_arg): parser = cl_or_imm def decode(self, v): @@ -2751,11 +2740,11 @@ class bs_cond_disp(bs_cond): return True -class bs_cond_imm(bs_cond_scale, m_arg): - parser = int_or_expr +class bs_cond_imm(bs_cond_scale, x86_arg): + parser = base_expr max_size = 32 - def fromstring(self, text, parser_result=None): + def fromstring(self, text, symbol_pool, parser_result=None): if parser_result: expr, start, stop = parser_result[self.parser] else: @@ -2880,9 +2869,9 @@ class bs_cond_imm64(bs_cond_imm): class bs_rel_off(bs_cond_imm): - parser = int_or_expr + parser = base_expr - def fromstring(self, text, parser_result=None): + def fromstring(self, text, symbol_pool, parser_result=None): if parser_result: expr, start, stop = parser_result[self.parser] else: @@ -2940,7 +2929,7 @@ class bs_rel_off(bs_cond_imm): return True class bs_s08(bs_rel_off): - parser = int_or_expr + parser = base_expr @classmethod def flen(cls, mode, v): @@ -3021,10 +3010,10 @@ class bs_moff(bsi): return True -class bs_movoff(m_arg): +class bs_movoff(x86_arg): parser = deref_mem - def fromstring(self, s, parser_result=None): + def fromstring(self, text, symbol_pool, parser_result=None): if parser_result: e, start, stop = parser_result[self.parser] if e is None: @@ -3088,10 +3077,10 @@ class bs_movoff(m_arg): return True -class bs_msegoff(m_arg): +class bs_msegoff(x86_arg): parser = deref_ptr - def fromstring(self, s, parser_result=None): + def fromstring(self, text, symbol_pool, parser_result=None): if parser_result: e, start, stop = parser_result[self.parser] if e is None: @@ -3172,13 +3161,13 @@ disp = bs(l=0, cls=(bs_cond_disp,), fname = "disp") s08 = bs(l=8, cls=(bs_s08, )) -u08 = bs(l=8, cls=(x86_08, m_arg)) -u07 = bs(l=7, cls=(x86_08, m_arg)) -u16 = bs(l=16, cls=(x86_16, m_arg)) -u32 = bs(l=32, cls=(x86_32, m_arg)) -s3264 = bs(l=32, cls=(x86_s32to64, m_arg)) +u08 = bs(l=8, cls=(x86_08, x86_arg)) +u07 = bs(l=7, cls=(x86_08, x86_arg)) +u16 = bs(l=16, cls=(x86_16, x86_arg)) +u32 = bs(l=32, cls=(x86_32, x86_arg)) +s3264 = bs(l=32, cls=(x86_s32to64, x86_arg)) -u08_3 = bs(l=0, cls=(x86_imm_fix_08, m_arg), ival = 3) +u08_3 = bs(l=0, cls=(x86_imm_fix_08, x86_arg), ival = 3) d0 = bs("000", fname='reg') d1 = bs("001", fname='reg') diff --git a/miasm2/core/asm_ast.py b/miasm2/core/asm_ast.py new file mode 100644 index 00000000..7a365ba1 --- /dev/null +++ b/miasm2/core/asm_ast.py @@ -0,0 +1,94 @@ +class AstNode(object): + """ + Ast node object + """ + def __neg__(self): + if isinstance(self, AstInt): + value = AstInt(-self.value) + else: + value = AstOp('-', self) + return value + + def __add__(self, other): + return AstOp('+', self, other) + + def __sub__(self, other): + return AstOp('-', self, other) + + def __div__(self, other): + return AstOp('/', self, other) + + def __mod__(self, other): + return AstOp('%', self, other) + + def __mul__(self, other): + return AstOp('*', self, other) + + def __lshift__(self, other): + return AstOp('<<', self, other) + + def __rshift__(self, other): + return AstOp('>>', self, other) + + def __xor__(self, other): + return AstOp('^', self, other) + + def __or__(self, other): + return AstOp('|', self, other) + + def __and__(self, other): + return AstOp('&', self, other) + + def __neg__(self): + return AstOp('-', self) + + +class AstInt(AstNode): + """ + Ast integer + """ + def __init__(self, value): + self.value = value + + def __str__(self): + return "%s" % self.value + + +class AstId(AstNode): + """ + Ast Id + """ + def __init__(self, name): + self.name = name + + def __str__(self): + return "%s" % self.name + + +class AstMem(AstNode): + """ + Ast memory deref + """ + def __init__(self, ptr, size): + assert isinstance(ptr, AstNode) + assert isinstance(size, (int, long)) + self.ptr = ptr + self.size = size + + def __str__(self): + return "@%d[%s]" % (self.size, self.ptr) + + +class AstOp(AstNode): + """ + Ast operator + """ + def __init__(self, op, *args): + assert all(isinstance(arg, AstNode) for arg in args) + self.op = op + self.args = args + + def __str__(self): + if len(self.args) == 1: + return "(%s %s)" % (self.op, self.args[0]) + return '(' + ("%s" % self.op).join(str(x) for x in self.args) + ')' diff --git a/miasm2/core/cpu.py b/miasm2/core/cpu.py index 061752f8..ca419458 100644 --- a/miasm2/core/cpu.py +++ b/miasm2/core/cpu.py @@ -13,6 +13,9 @@ from miasm2.core.bin_stream import bin_stream, bin_stream_str from miasm2.core.utils import Disasm_Exception from miasm2.expression.simplifications import expr_simp + +from miasm2.core.asm_ast import AstNode, AstInt, AstId, AstMem, AstOp + log = logging.getLogger("cpuhelper") console_handler = logging.StreamHandler() console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) @@ -85,12 +88,19 @@ def literal_list(l): return o -class reg_info: +class reg_info(object): def __init__(self, reg_str, reg_expr): self.str = reg_str self.expr = reg_expr - self.parser = literal_list(reg_str).setParseAction(self.reg2expr) + self.parser = literal_list(reg_str).setParseAction(self.cb_parse) + + def cb_parse(self, t): + assert len(t) == 1 + i = self.str.index(t[0]) + reg = self.expr[i] + result = AstId(reg) + return result def reg2expr(self, s): i = self.str.index(s[0]) @@ -100,15 +110,21 @@ class reg_info: return self.expr.index(e) - -class reg_info_dct: +class reg_info_dct(object): def __init__(self, reg_expr): self.dct_str_inv = dict((v.name, k) for k, v in reg_expr.iteritems()) self.dct_expr = reg_expr self.dct_expr_inv = dict((v, k) for k, v in reg_expr.iteritems()) reg_str = [v.name for v in reg_expr.itervalues()] - self.parser = literal_list(reg_str).setParseAction(self.reg2expr) + self.parser = literal_list(reg_str).setParseAction(self.cb_parse) + + def cb_parse(self, t): + assert len(t) == 1 + i = self.dct_str_inv[t[0]] + reg = self.dct_expr[i] + result = AstId(reg) + return result def reg2expr(self, s): i = self.dct_str_inv[s[0]] @@ -118,34 +134,30 @@ class reg_info_dct: return self.dct_expr_inv[e] -def gen_reg(rname, env, sz=32): - """ - Gen reg expr and parser - Equivalent to: - PC = ExprId('PC') - reg_pc_str = ['PC'] - reg_pc_expr = [ExprId(x, sz) for x in reg_pc_str] - regpc = reg_info(reg_pc_str, reg_pc_expr) +def gen_reg(reg_name, sz=32): + """Gen reg expr and parser""" + reg_name_lower = reg_name.lower() + reg = m2_expr.ExprId(reg_name, sz) + reginfo = reg_info([reg_name], [reg]) + return reg, reginfo - class bs_rname(m_reg): - reg = regi_rname - bsrname = bs(l=0, cls=(bs_rname,)) +def gen_reg_bs(reg_name, reg_info, base_cls): + """ + Generate: + class bs_reg_name(base_cls): + reg = reg_info + bs_reg_name = bs(l=0, cls=(bs_reg_name,)) """ - rnamel = rname.lower() - r = m2_expr.ExprId(rname, sz) - reg_str = [rname] - reg_expr = [r] - regi = reg_info(reg_str, reg_expr) - # define as global val - cname = "bs_" + rnamel - c = type(cname, (m_reg,), {'reg': regi}) - env[rname] = r - env["regi_" + rnamel] = regi - env[cname] = c - env["bs" + rnamel] = bs(l=0, cls=(c,)) - return r, regi + reg_name_lower = reg_name.lower() + + bs_name = "bs_%s" % reg_name + cls = type(bs_name, base_cls, {'reg': reg_info}) + + bs_obj = bs(l=0, cls=(cls,)) + + return cls, bs_obj def gen_regs(rnames, env, sz=32): @@ -217,125 +229,6 @@ def ast_int2expr(a): return m2_expr.ExprInt(a, 32) - -class ParseAst(object): - - def __init__(self, id2expr, int2expr, default_size=32): - self.id2expr = id2expr - self.int2expr = int2expr - self.default_size = default_size - - def int_from_size(self, size, value): - """Transform a string into ExprInt. - * if @size is None, use provided int2expr - * else, use @size to generate integer - @size: size of int; None if not forced. - @value: string representing an integer - """ - if size is None: - return self.int2expr(value) - else: - return m2_expr.ExprInt(value, size) - - def id_from_size(self, size, value): - """Transform a string into ExprId. - * if @size is None, use provided id2expr - * else, use @size to generate id - @size: size of id; None if not forced. - @value: string representing the id - """ - value = self.id2expr(value) - if isinstance(value, m2_expr.Expr): - return value - if size is None: - size = self.default_size - assert value is not None - return m2_expr.ExprId(asmblock.AsmLabel(value), size) - - def ast_to_expr(self, size, ast): - """Transform a typed ast into a Miasm expression - @size: default size - @ast: typed ast - """ - assert(isinstance(ast, tuple)) - if ast[0] is m2_expr.ExprId: - expr = self.id_from_size(size, ast[1]) - if isinstance(expr, str): - expr = self.id_from_size(size, expr) - elif ast[0] is m2_expr.ExprInt: - expr = self.int_from_size(size, ast[1]) - elif ast[0] is m2_expr.ExprOp: - out = [] - for arg in ast[1]: - if isinstance(arg, tuple): - arg = self.ast_to_expr(size, arg) - out.append(arg) - expr = ast_parse_op(out) - else: - raise TypeError('unknown type') - return expr - - def ast_get_ids(self, ast): - """Retrieve every node of type ExprId in @ast - @ast: typed ast - """ - assert(isinstance(ast, tuple)) - if ast[0] is m2_expr.ExprId: - return set([ast[1]]) - elif ast[0] is m2_expr.ExprInt: - return set() - elif ast[0] is m2_expr.ExprOp: - out = set() - for x in ast[1]: - if isinstance(x, tuple): - out.update(self.ast_get_ids(x)) - return out - raise TypeError('unknown type') - - def _extract_ast_core(self, ast): - assert(isinstance(ast, tuple)) - if ast[0] in [m2_expr.ExprInt, m2_expr.ExprId]: - return ast - elif ast[0] is m2_expr.ExprOp: - out = [] - for arg in ast[1]: - if isinstance(arg, tuple): - arg = self._extract_ast_core(arg) - out.append(arg) - return tuple([ast[0]] + [out]) - else: - raise TypeError('unknown type') - - def extract_ast_core(self, ast): - """ - Trasform an @ast into a Miasm expression. - Use registers size to deduce label and integers sizes. - """ - ast = self._extract_ast_core(ast) - ids = self.ast_get_ids(ast) - ids_expr = [self.id2expr(x) for x in ids] - sizes = set([expr.size for expr in ids_expr - if isinstance(expr, m2_expr.Expr)]) - if not sizes: - size = None - elif len(sizes) == 1: - size = sizes.pop() - else: - # Multiple sizes in ids - raise StopIteration - return self.ast_to_expr(size, ast) - - def __call__(self, ast): - """ - Trasform an @ast into a Miasm expression. - Use registers size to deduce label and integers sizes. - """ - ast = ast[0] - if isinstance(ast, m2_expr.Expr): - return ast - return self.extract_ast_core(ast) - - def neg_int(t): x = -t[0] return x @@ -361,29 +254,154 @@ multop = pyparsing.oneOf('* / %') plusop = pyparsing.oneOf('+ -') -def gen_base_expr(): - variable = pyparsing.Word(pyparsing.alphas + "_$.", - pyparsing.alphanums + "_") - variable.setParseAction(parse_id) - operand = str_int | variable - base_expr = pyparsing.operatorPrecedence(operand, - [("!", 1, pyparsing.opAssoc.RIGHT, parse_op), - (logicop, 2, pyparsing.opAssoc.RIGHT, - parse_op), - (signop, 1, pyparsing.opAssoc.RIGHT, - parse_op), - (multop, 2, pyparsing.opAssoc.LEFT, - parse_op), - (plusop, 2, pyparsing.opAssoc.LEFT, - parse_op), - ]) - return variable, operand, base_expr +########################## +def literal_list(l): + l = l[:] + l.sort() + l = l[::-1] + o = pyparsing.Literal(l[0]) + for x in l[1:]: + o |= pyparsing.Literal(x) + return o + + +def cb_int(t): + assert len(t) == 1 + integer = AstInt(t[0]) + return integer + + +def cb_parse_id(t): + assert len(t) == 1 + reg = t[0] + return AstId(reg) + + +def cb_op_not(t): + tokens = t[0] + assert len(tokens) == 2 + assert tokens[0] == "!" + result = AstOp("!", tokens[1]) + return result + + +def merge_ops(tokens, op): + args = [] + if len(tokens) >= 3: + args = [tokens.pop(0)] + i = 0 + while i < len(tokens): + op_tmp = tokens[i] + arg = tokens[i+1] + i += 2 + if op_tmp != op: + raise ValueError("Bad operator") + args.append(arg) + result = AstOp(op, *args) + return result + + +def cb_op_and(t): + result = merge_ops(t[0], "&") + return result + + +def cb_op_xor(t): + result = merge_ops(t[0], "^") + return result + + +def cb_op_sign(t): + assert len(t) == 1 + op, value = t[0] + return -value + + +def cb_op_div(t): + tokens = t[0] + assert len(tokens) == 3 + assert tokens[1] == "/" + result = AstOp("/", tokens[0], tokens[2]) + return result + + +def cb_op_plusminus(t): + tokens = t[0] + if len(tokens) == 3: + # binary op + assert isinstance(tokens[0], AstNode) + assert isinstance(tokens[2], AstNode) + op, args = tokens[1], [tokens[0], tokens[2]] + elif len(tokens) > 3: + args = [tokens.pop(0)] + i = 0 + while i < len(tokens): + op = tokens[i] + arg = tokens[i+1] + i += 2 + if op == '-': + arg = -arg + elif op == '+': + pass + else: + raise ValueError("Bad operator") + args.append(arg) + op = '+' + else: + raise ValueError("Parsing error") + assert all(isinstance(arg, AstNode) for arg in args) + result = AstOp(op, *args) + return result -variable, operand, base_expr = gen_base_expr() -my_var_parser = ParseAst(ast_id2expr, ast_int2expr) -base_expr.setParseAction(my_var_parser) +def cb_op_mul(t): + tokens = t[0] + assert len(tokens) == 3 + assert isinstance(tokens[0], AstNode) + assert isinstance(tokens[2], AstNode) + + # binary op + op, args = tokens[1], [tokens[0], tokens[2]] + result = AstOp(op, *args) + return result + + +integer = pyparsing.Word(pyparsing.nums).setParseAction(lambda t: int(t[0])) +hex_word = pyparsing.Literal('0x') + pyparsing.Word(pyparsing.hexnums) +hex_int = pyparsing.Combine(hex_word).setParseAction(lambda t: int(t[0], 16)) + +str_int_pos = (hex_int | integer) + +str_int = str_int_pos +str_int.setParseAction(cb_int) + +notop = pyparsing.oneOf('!') +andop = pyparsing.oneOf('&') +orop = pyparsing.oneOf('|') +xorop = pyparsing.oneOf('^') +shiftop = pyparsing.oneOf('>> <<') +rotop = pyparsing.oneOf('<<< >>>') +signop = pyparsing.oneOf('+ -') +mulop = pyparsing.oneOf('*') +plusop = pyparsing.oneOf('+ -') +divop = pyparsing.oneOf('/') + + +variable = pyparsing.Word(pyparsing.alphas + "_$.", pyparsing.alphanums + "_") +variable.setParseAction(cb_parse_id) +operand = str_int | variable + +base_expr = pyparsing.operatorPrecedence(operand, + [(notop, 1, pyparsing.opAssoc.RIGHT, cb_op_not), + (andop, 2, pyparsing.opAssoc.RIGHT, cb_op_and), + (xorop, 2, pyparsing.opAssoc.RIGHT, cb_op_xor), + (signop, 1, pyparsing.opAssoc.RIGHT, cb_op_sign), + (mulop, 2, pyparsing.opAssoc.RIGHT, cb_op_mul), + (divop, 2, pyparsing.opAssoc.RIGHT, cb_op_div), + (plusop, 2, pyparsing.opAssoc.LEFT, cb_op_plusminus), + ]) + default_prio = 0x1337 @@ -656,7 +674,7 @@ class bs_swapargs(bs_divert): class m_arg(object): - def fromstring(self, text, parser_result=None): + def fromstring(self, text, symbol_pool, parser_result=None): if parser_result: e, start, stop = parser_result[self.parser] self.expr = e @@ -665,9 +683,14 @@ class m_arg(object): v, start, stop = self.parser.scanString(text).next() except StopIteration: return None, None - self.expr = v[0] + arg = v[0] + expr = self.asm_ast_to_expr(arg, symbol_pool) + self.expr = expr return start, stop + def asm_ast_to_expr(self, arg, symbol_pool): + raise NotImplementedError("Virtual") + class m_reg(m_arg): prio = default_prio @@ -688,7 +711,7 @@ class reg_noarg(object): reg_info = None parser = None - def fromstring(self, text, parser_result=None): + def fromstring(self, text, symbol_pool, parser_result=None): if parser_result: e, start, stop = parser_result[self.parser] self.expr = e @@ -697,7 +720,9 @@ class reg_noarg(object): v, start, stop = self.parser.scanString(text).next() except StopIteration: return None, None - self.expr = v[0] + arg = v[0] + expr = self.parses_to_expr(arg, symbol_pool) + self.expr = expr return start, stop def decode(self, v): @@ -1252,7 +1277,7 @@ class cls_mn(object): return out[0] @classmethod - def fromstring(cls, text, mode = None): + def fromstring(cls, text, symbol_pool, mode = None): global total_scans name = re.search('(\S+)', text).groups() if not name: @@ -1291,9 +1316,12 @@ class cls_mn(object): v, start, stop = [None], None, None if start != 0: v, start, stop = [None], None, None - parsers[(i, start_i)][p] = v[0], start, stop - - start, stop = f.fromstring(args_str, parsers[(i, start_i)]) + if v != [None]: + v = f.asm_ast_to_expr(v[0], symbol_pool) + if v is None: + v, start, stop = [None], None, None + parsers[(i, start_i)][p] = v, start, stop + start, stop = f.fromstring(args_str, symbol_pool, parsers[(i, start_i)]) if start != 0: log.debug("cannot fromstring %r", args_str) cannot_parse = True @@ -1532,7 +1560,7 @@ class imm_noarg(object): return None return v - def fromstring(self, text, parser_result=None): + def fromstring(self, text, symbol_pool, parser_result=None): if parser_result: e, start, stop = parser_result[self.parser] else: @@ -1540,7 +1568,7 @@ class imm_noarg(object): e, start, stop = self.parser.scanString(text).next() except StopIteration: return None, None - if e is None: + if e == [None]: return None, None assert(isinstance(e, m2_expr.Expr)) diff --git a/miasm2/core/parse_asm.py b/miasm2/core/parse_asm.py index 8ea0f6b1..df419680 100644 --- a/miasm2/core/parse_asm.py +++ b/miasm2/core/parse_asm.py @@ -3,8 +3,8 @@ import re import miasm2.expression.expression as m2_expr import miasm2.core.asmblock as asmblock -from miasm2.core.cpu import gen_base_expr, ParseAst -from miasm2.core.cpu import instruction +from miasm2.core.cpu import instruction, base_expr +from miasm2.core.asm_ast import AstInt, AstId, AstMem, AstOp declarator = {'byte': 8, 'word': 16, @@ -101,6 +101,16 @@ STATE_NO_BLOC = 0 STATE_IN_BLOC = 1 +def asm_ast_to_expr_with_size(arg, symbol_pool, size): + if isinstance(arg, AstId): + return m2_expr.ExprId(arg.name, size) + if isinstance(arg, AstOp): + args = [asm_ast_to_expr_with_size(tmp, symbol_pool, size) for tmp in arg.args] + return m2_expr.ExprOp(arg.op, *args) + if isinstance(arg, AstInt): + return m2_expr.ExprInt(arg.value, size) + return None + def parse_txt(mnemo, attrib, txt, symbol_pool=None): """Parse an assembly listing. Returns a couple (blocks, symbol_pool), where blocks is a list of asm_bloc and symbol_pool the associated AsmSymbolPool @@ -168,16 +178,12 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): expr_list = [] # parser - base_expr = gen_base_expr()[2] - my_var_parser = ParseAst(lambda x: m2_expr.ExprId(x, size), - lambda x: - m2_expr.ExprInt(x, size)) - base_expr.setParseAction(my_var_parser) for element in data_raw: element = element.strip() - element_expr = base_expr.parseString(element)[0] - expr_list.append(element_expr.canonize()) + element_parsed = base_expr.parseString(element)[0] + element_expr = asm_ast_to_expr_with_size(element_parsed, symbol_pool, size) + expr_list.append(element_expr) raw_data = asmblock.AsmRaw(expr_list) raw_data.element_size = size @@ -216,7 +222,7 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): if ';' in line: line = line[:line.find(';')] line = line.strip(' ').strip('\t') - instr = mnemo.fromstring(line, attrib) + instr = mnemo.fromstring(line, symbol_pool, attrib) # replace orphan AsmLabel with labels from symbol_pool replace_orphan_labels(instr, symbol_pool) diff --git a/test/arch/aarch64/arch.py b/test/arch/aarch64/arch.py index 8364fcf1..a6aa7ba5 100644 --- a/test/arch/aarch64/arch.py +++ b/test/arch/aarch64/arch.py @@ -1,6 +1,10 @@ import sys import time +from pdb import pm from miasm2.arch.aarch64.arch import * +from miasm2.core.asmblock import AsmSymbolPool + +symbol_pool = AsmSymbolPool() reg_tests_aarch64 = [ ("XXXXXXXX MOV W1, WZR", @@ -1810,7 +1814,7 @@ for s, l in reg_tests_aarch64[:]: print s print mn assert(str(mn) == s) - l = mn_aarch64.fromstring(s, 'l') + l = mn_aarch64.fromstring(s, symbol_pool, 'l') assert(str(l) == s) a = mn_aarch64.asm(l) print [x for x in a] diff --git a/test/arch/aarch64/unit/asm_test.py b/test/arch/aarch64/unit/asm_test.py index ddb8a08c..ca27ef9d 100644 --- a/test/arch/aarch64/unit/asm_test.py +++ b/test/arch/aarch64/unit/asm_test.py @@ -1,7 +1,6 @@ import sys import os -from miasm2.core.cpu import ParseAst from miasm2.arch.aarch64.arch import mn_aarch64, base_expr, variable from miasm2.core import parse_asm from miasm2.expression.expression import * diff --git a/test/arch/arm/arch.py b/test/arch/arm/arch.py index 7f3b321e..90d137d0 100644 --- a/test/arch/arm/arch.py +++ b/test/arch/arm/arch.py @@ -1,7 +1,10 @@ import time from miasm2.arch.arm.arch import * +from miasm2.core.asmblock import AsmSymbolPool from pdb import pm + +symbol_pool = AsmSymbolPool() if 0: a = bs('00') b = bs('01') @@ -267,7 +270,7 @@ for s, l in reg_tests_arm: assert(str(mn) == s) # print hex(b) # print [str(x.get()) for x in mn.args] - l = mn_arm.fromstring(s, 'l') + l = mn_arm.fromstring(s, symbol_pool, 'l') # print l assert(str(l) == s) a = mn_arm.asm(l) @@ -719,7 +722,7 @@ for s, l in reg_tests_armt: assert(str(mn) == s) # print hex(b) # print [str(x.get()) for x in mn.args] - l = mn_armt.fromstring(s, 'l') + l = mn_armt.fromstring(s, symbol_pool, 'l') # print l assert(str(l) == s) print 'Asm..', l diff --git a/test/arch/arm/sem.py b/test/arch/arm/sem.py index 1b14214e..d9e6aa76 100755 --- a/test/arch/arm/sem.py +++ b/test/arch/arm/sem.py @@ -9,11 +9,13 @@ from miasm2.arch.arm.arch import mn_arm as mn from miasm2.arch.arm.sem import ir_arml as ir_arch from miasm2.arch.arm.regs import * from miasm2.expression.expression import * +from miasm2.core.asmblock import AsmSymbolPool from pdb import pm logging.getLogger('cpuhelper').setLevel(logging.ERROR) EXCLUDE_REGS = set([ir_arch().IRDst]) +symbol_pool = AsmSymbolPool() def M(addr): return ExprMem(ExprInt(addr, 16), 16) @@ -24,7 +26,7 @@ def compute(asm, inputstate={}, debug=False): sympool.update({k: ExprInt(v, k.size) for k, v in inputstate.iteritems()}) interm = ir_arch() symexec = SymbolicExecutionEngine(interm, sympool) - instr = mn.fromstring(asm, "l") + instr = mn.fromstring(asm, symbol_pool, "l") code = mn.asm(instr)[0] instr = mn.dis(code, "l") instr.offset = inputstate.get(PC, 0) diff --git a/test/arch/mips32/arch.py b/test/arch/mips32/arch.py index b28e2583..c6b68c0c 100644 --- a/test/arch/mips32/arch.py +++ b/test/arch/mips32/arch.py @@ -1,8 +1,11 @@ import time from pdb import pm +from miasm2.core.asmblock import AsmSymbolPool from miasm2.arch.mips32.arch import * +symbol_pool = AsmSymbolPool() + reg_tests_mips32 = [ ("004496D8 ADDU GP, GP, T9", "0399E021"), @@ -227,7 +230,7 @@ for s, l in reg_tests_mips32: assert(str(mn) == s) # print hex(b) # print [str(x.get()) for x in mn.args] - l = mn_mips32.fromstring(s, 'b') + l = mn_mips32.fromstring(s, symbol_pool, 'b') # print l assert(str(l) == s) a = mn_mips32.asm(l, 'b') diff --git a/test/arch/mips32/unit/asm_test.py b/test/arch/mips32/unit/asm_test.py index 9281f1b6..f03a32d7 100644 --- a/test/arch/mips32/unit/asm_test.py +++ b/test/arch/mips32/unit/asm_test.py @@ -1,8 +1,7 @@ import sys import os -from miasm2.core.cpu import ParseAst -from miasm2.arch.mips32.arch import mn_mips32, base_expr, variable +from miasm2.arch.mips32.arch import mn_mips32 from miasm2.core import parse_asm from miasm2.expression.expression import * from miasm2.core import asmblock diff --git a/test/arch/msp430/arch.py b/test/arch/msp430/arch.py index b3dbac82..3df2becb 100644 --- a/test/arch/msp430/arch.py +++ b/test/arch/msp430/arch.py @@ -1,6 +1,9 @@ import time +from pdb import pm from miasm2.arch.msp430.arch import * +from miasm2.core.asmblock import AsmSymbolPool +symbol_pool = AsmSymbolPool() def h2i(s): return s.replace(' ', '').decode('hex') @@ -94,7 +97,7 @@ for s, l in reg_tests_msp: assert(str(mn) == s) # print hex(b) # print [str(x.get()) for x in mn.args] - l = mn_msp430.fromstring(s, None) + l = mn_msp430.fromstring(s, symbol_pool, None) # print l assert(str(l) == s) a = mn_msp430.asm(l) diff --git a/test/arch/sh4/arch.py b/test/arch/sh4/arch.py index 4d173add..574dcf49 100644 --- a/test/arch/sh4/arch.py +++ b/test/arch/sh4/arch.py @@ -1,6 +1,10 @@ import time +from pdb import pm from sys import stderr from miasm2.arch.sh4.arch import * +from miasm2.core.asmblock import AsmSymbolPool + +symbol_pool = AsmSymbolPool() def h2i(s): return s.replace(' ', '').decode('hex') @@ -396,7 +400,7 @@ for s, l in reg_tests_sh4: assert(str(mn) == s) # print hex(b) # print [str(x.get()) for x in mn.args] - l = mn_sh4.fromstring(s, None) + l = mn_sh4.fromstring(s, symbol_pool, None) # print l assert(str(l) == s) a = mn_sh4.asm(l) diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py index 7b834e2c..05b31815 100644 --- a/test/arch/x86/arch.py +++ b/test/arch/x86/arch.py @@ -1,19 +1,13 @@ import time +from pdb import pm import miasm2.expression.expression as m2_expr -from miasm2.arch.x86.arch import mn_x86, deref_mem_ad, ParseAst, ast_int2expr, \ +from miasm2.arch.x86.arch import mn_x86, deref_mem_ad, \ base_expr, rmarg, print_size from miasm2.arch.x86.sem import ir_x86_16, ir_x86_32, ir_x86_64 from miasm2.core.bin_stream import bin_stream_str +from miasm2.core.asmblock import AsmSymbolPool -for s in ["[EAX]", - "[0x10]", - "[EBX + 0x10]", - "[EBX + ECX*0x10]", - "[EBX + ECX*0x10 + 0x1337]"]: - (e, a, b) = deref_mem_ad.scanString(s).next() - print 'expr', e[0] - -print '---' +symbol_pool = AsmSymbolPool() mylabel16 = m2_expr.ExprId('mylabel16', 16) mylabel32 = m2_expr.ExprId('mylabel32', 32) @@ -26,32 +20,6 @@ reg_and_id.update({'mylabel16': mylabel16, }) -def my_ast_id2expr(t): - r = reg_and_id.get(t, m2_expr.ExprId(t, size=32)) - return r - -my_var_parser = ParseAst(my_ast_id2expr, ast_int2expr) -base_expr.setParseAction(my_var_parser) - -for s in ['EAX', - "BYTE PTR [EAX]", - "WORD PTR [EAX]", - "DWORD PTR [ECX+0x1337]", - "QWORD PTR [RAX+4*RCX + 0x1337]", - "DWORD PTR [EAX+EBX]", - "QWORD PTR [RAX+RBX+0x55667788]", - "BYTE PTR CS:[EAX]", - "QWORD PTR [RAX+RBX+mylabel64]", - "BYTE PTR [RAX+RBX+mylabel64]", - "BYTE PTR [AX+BX+mylabel16]", - "BYTE PTR [mylabel32]", - ]: - print '*' * 80 - print s - (e, a, b) = rmarg.scanString(s).next() - print 'expr', e[0] - e[0].visit(print_size) - def h2i(s): return int(s.replace(' ', '').decode('hex')[::].encode('hex'), 16) @@ -3080,27 +3048,9 @@ reg_tests = [ ] - # mode = 64 - # l = mn_x86.dis('\x4D\x11\x7c\x18\x00', mode) - # print l - #""" - # mode = 64 - # l = mn_x86.fromstring("ADC DWORD PTR [RAX], 0x11223344", mode) - # print 'xx' - # t= time.time() - # import cProfile - # def f(): - # x = l.asm(mode) - # print x - # cProfile.run('f()') - # l.asm(mode) - # print time.time()-t -# reg_tests = reg_tests[-1:] - test_file = {16: open('regression_test16_ia32.bin', 'w'), 32: open('regression_test32_ia32.bin', 'w'), 64: open('regression_test64_ia32.bin', 'w')} - # 64: open('testmnemo', 'r+')} ts = time.time() for mode, s, l, in reg_tests: print "-" * 80 @@ -3115,7 +3065,7 @@ for mode, s, l, in reg_tests: # print hex(b) # print [str(x.get()) for x in mn.args] print 'fromstring', repr(s) - l = mn_x86.fromstring(s, mode) + l = mn_x86.fromstring(s, symbol_pool, mode) # print l print 'str args', [(str(x), x.size) for x in l.args] assert(str(l).strip(' ') == s) diff --git a/test/arch/x86/sem.py b/test/arch/x86/sem.py index eb3c15c0..b3b7e940 100755 --- a/test/arch/x86/sem.py +++ b/test/arch/x86/sem.py @@ -14,11 +14,14 @@ from miasm2.arch.x86.regs import * from miasm2.expression.expression import * from miasm2.expression.simplifications import expr_simp from miasm2.core import parse_asm, asmblock +from miasm2.core.asmblock import AsmSymbolPool logging.getLogger('cpuhelper').setLevel(logging.ERROR) EXCLUDE_REGS = set([ir_32().IRDst, ir_64().IRDst]) +symbol_pool = AsmSymbolPool() + m32 = 32 m64 = 64 @@ -35,7 +38,7 @@ def symb_exec(interm, inputstate, debug): if k not in EXCLUDE_REGS and regs_init.get(k, None) != v} def compute(ir, mode, asm, inputstate={}, debug=False): - instr = mn.fromstring(asm, mode) + instr = mn.fromstring(asm, symbol_pool, mode) code = mn.asm(instr)[0] instr = mn.dis(code, mode) instr.offset = inputstate.get(EIP, 0) diff --git a/test/arch/x86/unit/asm_test.py b/test/arch/x86/unit/asm_test.py index 8a6b215c..961967f9 100644 --- a/test/arch/x86/unit/asm_test.py +++ b/test/arch/x86/unit/asm_test.py @@ -1,7 +1,6 @@ import sys import os -from miasm2.core.cpu import ParseAst from miasm2.arch.x86.arch import mn_x86, base_expr, variable from miasm2.core import parse_asm from miasm2.expression.expression import * |