From 22777cc19e6e1f43fbbfb908e5d4d4cd7c76b391 Mon Sep 17 00:00:00 2001 From: Fabrice Desclaux Date: Fri, 27 Mar 2015 14:22:52 +0100 Subject: Asmbloc: updt asmbloc api --- example/asm/shellcode.py | 4 +--- example/asm/simple.py | 4 ++-- example/expression/asm_to_ir.py | 2 +- 3 files changed, 4 insertions(+), 6 deletions(-) (limited to 'example') diff --git a/example/asm/shellcode.py b/example/asm/shellcode.py index 89914b6d..7bf76093 100644 --- a/example/asm/shellcode.py +++ b/example/asm/shellcode.py @@ -88,9 +88,7 @@ graph = asmbloc.bloc2graph(blocs[0]) open("graph.txt", "w").write(graph) # Apply patches -resolved_b, patches = asmbloc.asm_resolve_final(machine.mn, - blocs[0], - symbol_pool) +patches = asmbloc.asm_resolve_final(machine.mn, blocs[0], symbol_pool) if args.encrypt: # Encrypt code ad_start = symbol_pool.getby_name_create(args.encrypt[0]).offset diff --git a/example/asm/simple.py b/example/asm/simple.py index 8664672d..1929961f 100644 --- a/example/asm/simple.py +++ b/example/asm/simple.py @@ -27,10 +27,10 @@ loop: symbol_pool.set_offset(symbol_pool.getby_name("main"), 0x0) # Spread information and resolve instructions offset -resolved_b, patches = asmbloc.asm_resolve_final(mn_x86, blocs[0], symbol_pool) +patches = asmbloc.asm_resolve_final(mn_x86, blocs[0], symbol_pool) # Show resolved blocs -for bloc in resolved_b: +for bloc in blocs[0]: print bloc # Print offset -> bytes diff --git a/example/expression/asm_to_ir.py b/example/expression/asm_to_ir.py index cec32c06..ccb7202e 100644 --- a/example/expression/asm_to_ir.py +++ b/example/expression/asm_to_ir.py @@ -39,7 +39,7 @@ for b in blocs: print "symbols:" print symbol_pool -resolved_b, patches = asmbloc.asm_resolve_final(mn_x86, blocs, symbol_pool) +patches = asmbloc.asm_resolve_final(mn_x86, blocs, symbol_pool) # Translate to IR ir_arch = ir_a_x86_32(symbol_pool) -- cgit 1.4.1 From f9c49e92dada2aa51ca594f435f962617796c116 Mon Sep 17 00:00:00 2001 From: Fabrice Desclaux Date: Fri, 27 Mar 2015 14:31:24 +0100 Subject: Clean: remove unecessary ast_parser modification --- example/asm/shellcode.py | 9 --------- example/expression/asm_to_ir.py | 7 ------- test/arch/x86/unit/asm_test.py | 12 ------------ 3 files changed, 28 deletions(-) (limited to 'example') diff --git a/example/asm/shellcode.py b/example/asm/shellcode.py index 7bf76093..3b22e801 100644 --- a/example/asm/shellcode.py +++ b/example/asm/shellcode.py @@ -58,15 +58,6 @@ else: virt = st output = st -# Fix the AST parser -def my_ast_int2expr(a): - return m2_expr.ExprInt_fromsize(size, a) - -def my_ast_id2expr(t): - return reg_and_id.get(t, m2_expr.ExprId(t, size=size)) - -my_var_parser = parse_ast(my_ast_id2expr, my_ast_int2expr) -base_expr.setParseAction(my_var_parser) # Get and parse the source code with open(args.source) as fstream: diff --git a/example/expression/asm_to_ir.py b/example/expression/asm_to_ir.py index ccb7202e..942e5e19 100644 --- a/example/expression/asm_to_ir.py +++ b/example/expression/asm_to_ir.py @@ -7,13 +7,6 @@ from miasm2.arch.x86.ira import ir_a_x86_32 from pdb import pm -def my_ast_int2expr(a): - return ExprInt32(a) - -my_var_parser = parse_ast(ast_id2expr, my_ast_int2expr) -base_expr.setParseAction(my_var_parser) - - # First, asm code blocs, symbol_pool = parse_asm.parse_txt(mn_x86, 32, ''' main: diff --git a/test/arch/x86/unit/asm_test.py b/test/arch/x86/unit/asm_test.py index b65ef876..c6381d9e 100644 --- a/test/arch/x86/unit/asm_test.py +++ b/test/arch/x86/unit/asm_test.py @@ -20,18 +20,6 @@ if filename and os.path.isfile(filename): reg_and_id = dict(mn_x86.regs.all_regs_ids_byname) - -def my_ast_int2expr(a): - return ExprInt32(a) - - -def my_ast_id2expr(t): - return reg_and_id.get(t, ExprId(t, size=32)) - -my_var_parser = parse_ast(my_ast_id2expr, my_ast_int2expr) -base_expr.setParseAction(my_var_parser) - - class Asm_Test(object): def __init__(self): self.myjit = Machine("x86_32").jitter() -- cgit 1.4.1 From b148652ebd8e8b02aa9514cc3d21098c7002d694 Mon Sep 17 00:00:00 2001 From: Fabrice Desclaux Date: Fri, 27 Mar 2015 14:50:11 +0100 Subject: Parse_asm: generate asm_label on symbol parsing For an unknown symbol, instead of generating ExprId('toto'), it will generate ExprId(asm_label('toto')). As label is generated in the architecture, this label must be catched in the parse_asm module to be inserted in the current symbol_pool. --- example/expression/solve_condition_stp.py | 11 ++++++----- miasm2/arch/arm/arch.py | 3 ++- miasm2/arch/mips32/arch.py | 22 +++++++++++++++++++++- miasm2/arch/msp430/arch.py | 3 ++- miasm2/arch/sh4/arch.py | 12 +++++++++--- miasm2/arch/x86/arch.py | 14 +++++++++----- miasm2/core/parse_asm.py | 23 +++++++++++++++++++++++ 7 files changed, 72 insertions(+), 16 deletions(-) (limited to 'example') diff --git a/example/expression/solve_condition_stp.py b/example/expression/solve_condition_stp.py index 659124d1..a25a7072 100644 --- a/example/expression/solve_condition_stp.py +++ b/example/expression/solve_condition_stp.py @@ -138,12 +138,13 @@ if __name__ == '__main__': def my_ast_int2expr(a): return ExprInt32(a) - def my_ast_id2expr(t): - if t in reg_and_id: - r = reg_and_id[t] + # Modifify parser to avoid label creation in PUSH argc + def my_ast_id2expr(string_parsed): + if string_parsed in reg_and_id: + return reg_and_id[string_parsed] else: - r = ExprId(t, size=32) - return r + return ExprId(string_parsed, size=32) + my_var_parser = parse_ast(my_ast_id2expr, my_ast_int2expr) base_expr.setParseAction(my_var_parser) diff --git a/miasm2/arch/arm/arch.py b/miasm2/arch/arm/arch.py index 740fd1df..87af007a 100644 --- a/miasm2/arch/arm/arch.py +++ b/miasm2/arch/arm/arch.py @@ -9,6 +9,7 @@ from collections import defaultdict from miasm2.core.bin_stream import bin_stream import miasm2.arch.arm.regs as regs_module from miasm2.arch.arm.regs import * +from miasm2.core.asmbloc import asm_label # A1 encoding @@ -167,7 +168,7 @@ int_or_expr = base_expr def ast_id2expr(t): if not t in mn_arm.regs.all_regs_ids_byname: - r = ExprId(t) + r = ExprId(asm_label(t)) else: r = mn_arm.regs.all_regs_ids_byname[t] return r diff --git a/miasm2/arch/mips32/arch.py b/miasm2/arch/mips32/arch.py index 50dc1c86..60b0f5d2 100644 --- a/miasm2/arch/mips32/arch.py +++ b/miasm2/arch/mips32/arch.py @@ -10,7 +10,7 @@ from miasm2.expression.expression import ExprMem, ExprInt, ExprInt32, ExprId from miasm2.core.bin_stream import bin_stream import miasm2.arch.mips32.regs as regs import miasm2.core.cpu as cpu - +from miasm2.core.asmbloc import asm_label log = logging.getLogger("mips32dis") console_handler = logging.StreamHandler() @@ -48,6 +48,26 @@ deref_nooff = Group(LPARENTHESIS + gpregs.parser + \ deref = deref_off | deref_nooff +variable, operand, base_expr = cpu.gen_base_expr() + +int_or_expr = base_expr + + +def ast_id2expr(t): + if not t in mn_mips32.regs.all_regs_ids_byname: + r = ExprId(asm_label(t)) + else: + r = mn_mips32.regs.all_regs_ids_byname[t] + return r + + +def ast_int2expr(a): + return ExprInt32(a) + + +my_var_parser = cpu.parse_ast(ast_id2expr, ast_int2expr) +base_expr.setParseAction(my_var_parser) + class additional_info: def __init__(self): self.except_on_instr = False diff --git a/miasm2/arch/msp430/arch.py b/miasm2/arch/msp430/arch.py index 2f543843..6c622ce7 100644 --- a/miasm2/arch/msp430/arch.py +++ b/miasm2/arch/msp430/arch.py @@ -9,6 +9,7 @@ from collections import defaultdict from miasm2.core.bin_stream import bin_stream import miasm2.arch.msp430.regs as regs_module from miasm2.arch.msp430.regs import * +from miasm2.core.asmbloc import asm_label log = logging.getLogger("armdis") console_handler = logging.StreamHandler() @@ -73,7 +74,7 @@ PINC = Suppress("+") def ast_id2expr(t): if not t in mn_msp430.regs.all_regs_ids_byname: - r = ExprId(t, 16) + r = ExprId(asm_label(t), 16) else: r = mn_msp430.regs.all_regs_ids_byname[t] return r diff --git a/miasm2/arch/sh4/arch.py b/miasm2/arch/sh4/arch.py index a102a37b..7039016c 100644 --- a/miasm2/arch/sh4/arch.py +++ b/miasm2/arch/sh4/arch.py @@ -5,8 +5,9 @@ from pyparsing import * from miasm2.core.cpu import * from miasm2.expression.expression import * from collections import defaultdict +import miasm2.arch.sh4.regs as regs_module from miasm2.arch.sh4.regs import * - +from miasm2.core.asmbloc import asm_label jra = ExprId('jra') jrb = ExprId('jrb') @@ -35,8 +36,12 @@ def parse_pcandimmimm(t): t = t[0] return (t[0] & t[1]) + t[2] -def ast_id2expr(a): - return ExprId(a, 32) +def ast_id2expr(t): + if not t in mn_sh4.regs.all_regs_ids_byname: + r = ExprId(asm_label(t)) + else: + r = mn_sh4.regs.all_regs_ids_byname[t] + return r def ast_int2expr(a): return ExprInt32(a) @@ -465,6 +470,7 @@ class instruction_sh4(instruction): class mn_sh4(cls_mn): bintree = {} + regs = regs_module num = 0 all_mn = [] all_mn_mode = defaultdict(list) diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py index 238567ac..ef6a6fb9 100644 --- a/miasm2/arch/x86/arch.py +++ b/miasm2/arch/x86/arch.py @@ -9,6 +9,7 @@ from collections import defaultdict import miasm2.arch.x86.regs as regs_module from miasm2.arch.x86.regs import * from miasm2.ir.ir import * +from miasm2.core.asmbloc import asm_label log = logging.getLogger("x86_arch") console_handler = logging.StreamHandler() @@ -225,7 +226,7 @@ variable, operand, base_expr = gen_base_expr() def ast_id2expr(t): if not t in mn_x86.regs.all_regs_ids_byname: - r = ExprId(t) + r = ExprId(asm_label(t)) else: r = mn_x86.regs.all_regs_ids_byname[t] return r @@ -486,10 +487,13 @@ class instruction_x86(instruction): if self.additional_info.g1.value & 6 and self.name in repeat_mn: return e = self.args[0] - if isinstance(e, ExprId) and not e.name in all_regs_ids_byname: - l = symbol_pool.getby_name_create(e.name) - s = ExprId(l, e.size) - self.args[0] = s + if isinstance(e, ExprId): + if isinstance(e.name, asm_label): + pass + elif not e.name in all_regs_ids_byname: + l = symbol_pool.getby_name_create(e.name) + s = ExprId(l, e.size) + self.args[0] = s elif isinstance(e, ExprInt): ad = e.arg + int(self.offset) + self.l l = symbol_pool.getby_offset_create(ad) diff --git a/miasm2/core/parse_asm.py b/miasm2/core/parse_asm.py index b42bdbcc..6bec9651 100644 --- a/miasm2/core/parse_asm.py +++ b/miasm2/core/parse_asm.py @@ -30,6 +30,25 @@ def guess_next_new_label(symbol_pool, gen_label_index=0): return symbol_pool.add_label(name) i += 1 +def replace_expr_labels(e, symbol_pool, replace_id): + if not isinstance(e, m2_expr.ExprId): + return e + if not isinstance(e.name, asmbloc.asm_label): + return e + old_lbl = e.name + new_lbl = symbol_pool.getby_name_create(old_lbl.name) + replace_id[e] = m2_expr.ExprId(new_lbl, e.size) + return m2_expr.ExprId(new_lbl, e.size) + +def replace_orphan_labels(instr, symbol_pool): + for i, arg in enumerate(instr.args): + replace_id = {} + arg.visit(lambda e:replace_expr_labels(e, + symbol_pool, + replace_id)) + instr.args[i] = instr.args[i].replace_expr(replace_id) + + def parse_txt(mnemo, attrib, txt, symbol_pool=None, gen_label_index=0): if symbol_pool is None: @@ -147,6 +166,10 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None, gen_label_index=0): line = line[:line.find(';')] line = line.strip(' ').strip('\t') instr = mnemo.fromstring(line, attrib) + + # replace orphan asm_label with labels from symbol_pool + replace_orphan_labels(instr, symbol_pool) + if instr.dstflow(): instr.dstflow2label(symbol_pool) lines.append(instr) -- cgit 1.4.1 From 89680eb28d75e8313ab59dbd8c31930596138bad Mon Sep 17 00:00:00 2001 From: Fabrice Desclaux Date: Fri, 27 Mar 2015 16:12:19 +0100 Subject: Asmbloc: updt api --- example/asm/shellcode.py | 10 ++++++++-- example/samples/armt.S | 2 +- miasm2/arch/arm/arch.py | 12 ++++++++++-- miasm2/arch/msp430/arch.py | 3 +-- miasm2/arch/x86/arch.py | 8 ++------ miasm2/core/cpu.py | 10 ++++++---- 6 files changed, 28 insertions(+), 17 deletions(-) (limited to 'example') diff --git a/example/asm/shellcode.py b/example/asm/shellcode.py index 3b22e801..3f3aa877 100644 --- a/example/asm/shellcode.py +++ b/example/asm/shellcode.py @@ -9,6 +9,7 @@ from miasm2.core.cpu import parse_ast from miasm2.core import parse_asm, asmbloc import miasm2.expression.expression as m2_expr from miasm2.analysis.machine import Machine +from miasm2.core.interval import interval parser = ArgumentParser("Multi-arch (32 bits) assembler") parser.add_argument('architecture', help="architecture: " + \ @@ -34,6 +35,7 @@ except ValueError: size = 32 reg_and_id = dict(machine.mn.regs.all_regs_ids_byname) base_expr = machine.base_expr +dst_interval = None # Output format if args.PE: @@ -50,7 +52,8 @@ if args.PE: addr_main = pe.rva2virt(s_text.addr) virt = pe.virt output = pe - + dst_interval = interval([(pe.rva2virt(s_text.addr), + pe.rva2virt(s_text.addr + s_text.size))]) else: st = StrPatchwork() @@ -79,7 +82,10 @@ graph = asmbloc.bloc2graph(blocs[0]) open("graph.txt", "w").write(graph) # Apply patches -patches = asmbloc.asm_resolve_final(machine.mn, blocs[0], symbol_pool) +patches = asmbloc.asm_resolve_final(machine.mn, + blocs[0], + symbol_pool, + dst_interval) if args.encrypt: # Encrypt code ad_start = symbol_pool.getby_name_create(args.encrypt[0]).offset diff --git a/example/samples/armt.S b/example/samples/armt.S index c50075a6..c833c961 100644 --- a/example/samples/armt.S +++ b/example/samples/armt.S @@ -15,7 +15,7 @@ main: PUSH {LR} SUB SP, 0x100 MOV R0, SP - ADD R1, PC, mystr-$+6 + ADD R1, PC, mystr-$ MOV R0, R0 EORS R2, R2 ADDS R2, R2, 0x4 diff --git a/miasm2/arch/arm/arch.py b/miasm2/arch/arm/arch.py index 87af007a..4ecfbd97 100644 --- a/miasm2/arch/arm/arch.py +++ b/miasm2/arch/arm/arch.py @@ -485,6 +485,10 @@ class instruction_armt(instruction_arm): raise ValueError('strange offset! %r' % off) self.args[0] = ExprInt32(off) + def get_asm_offset(self, x): + # ADR XXX, PC, imm => PC is 4 aligned + imm + new_offset = ((self.offset+self.l)/4)*4 + return ExprInt_from(x, new_offset) class mn_arm(cls_mn): @@ -501,6 +505,7 @@ class mn_arm(cls_mn): sp = {'l':SP, 'b':SP} instruction = instruction_arm max_instruction_len = 4 + alignment = 4 @classmethod def getpc(cls, attrib = None): @@ -599,7 +604,8 @@ class mn_armt(cls_mn): pc = PC sp = SP instruction = instruction_armt - max_instruction_len = 8 + max_instruction_len = 4 + alignment = 4 @classmethod def getpc(cls, attrib = None): @@ -784,7 +790,9 @@ class arm_offs(arm_imm): return v << 2 def encodeval(self, v): - return v >> 2 + if v%4 == 0: + return v >> 2 + return False def decode(self, v): v = v & self.lmask diff --git a/miasm2/arch/msp430/arch.py b/miasm2/arch/msp430/arch.py index 6c622ce7..07a11ae8 100644 --- a/miasm2/arch/msp430/arch.py +++ b/miasm2/arch/msp430/arch.py @@ -188,8 +188,7 @@ class instruction_msp430(instruction): # raise ValueError('dst must be int or label') log.warning('dynamic dst %r', e) return - # return ExprInt32(e.arg - (self.offset + self.l)) - self.args[0] = ExprInt_fromsize(16, e.arg - (self.offset + self.l)) + self.args[0] = ExprInt_fromsize(16, (e.arg - (self.offset + self.l))/2) def get_info(self, c): pass diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py index ef6a6fb9..3b714f79 100644 --- a/miasm2/arch/x86/arch.py +++ b/miasm2/arch/x86/arch.py @@ -488,12 +488,8 @@ class instruction_x86(instruction): return e = self.args[0] if isinstance(e, ExprId): - if isinstance(e.name, asm_label): - pass - elif not e.name in all_regs_ids_byname: - l = symbol_pool.getby_name_create(e.name) - s = ExprId(l, e.size) - self.args[0] = s + if not isinstance(e.name, asm_label) and e not in all_regs_ids: + raise ValueError("ExprId must be a label or a register") elif isinstance(e, ExprInt): ad = e.arg + int(self.offset) + self.l l = symbol_pool.getby_offset_create(ad) diff --git a/miasm2/core/cpu.py b/miasm2/core/cpu.py index bde95200..faba895a 100644 --- a/miasm2/core/cpu.py +++ b/miasm2/core/cpu.py @@ -947,14 +947,14 @@ class instruction(object): for x in ids: if isinstance(x.name, asmbloc.asm_label): name = x.name.name + # special symbol $ + if name == '$': + fixed_ids[x] = self.get_asm_offset(x) + continue if not name in symbols: raise ValueError('unresolved symbol! %r' % x) else: name = x.name - # special symbol - if name == '$': - fixed_ids[x] = self.get_asm_offset(x) - continue if not name in symbols: continue if symbols[name].offset is None: @@ -981,6 +981,8 @@ class cls_mn(object): __metaclass__ = metamn args_symb = [] instruction = instruction + # Block's offset alignement + alignment = 1 @classmethod def guess_mnemo(cls, bs, attrib, pre_dis_info, offset): -- cgit 1.4.1 From 53d82c13f7da6851196e69c67841af24bcf218b2 Mon Sep 17 00:00:00 2001 From: Fabrice Desclaux Date: Wed, 1 Apr 2015 15:58:29 +0200 Subject: Cpu: modify instructions' offset relative encoding The assembler will automatically use instruction len in offset computation In the following instruction: 0x10: EB 02 JMP 0x14 If we assemble this instruction, the requested instruction send to the assembler engine will be: JMP +0x4 And will be encoded to: EB 02 Previously, the assembly of: JMP +0x4 was: EB 04 --- example/samples/msp430.S | 5 ++++ miasm2/arch/arm/arch.py | 61 ++++++++++++++++++--------------------- miasm2/arch/mips32/arch.py | 8 ++++-- miasm2/arch/msp430/arch.py | 21 ++++++++++---- miasm2/arch/msp430/ira.py | 2 +- miasm2/arch/x86/arch.py | 72 ++++++++++++++++++++++++++++++++++++---------- miasm2/core/cpu.py | 15 ++++++---- test/arch/arm/arch.py | 18 ++++++------ test/arch/mips32/arch.py | 20 ++++++------- test/arch/msp430/arch.py | 4 +-- test/arch/x86/arch.py | 31 +++++++++++--------- 11 files changed, 159 insertions(+), 98 deletions(-) (limited to 'example') diff --git a/example/samples/msp430.S b/example/samples/msp430.S index 77f4b448..5f4beb91 100644 --- a/example/samples/msp430.S +++ b/example/samples/msp430.S @@ -1,8 +1,13 @@ main: mov.w 0x10, R10 mov.w 0x0, R11 + call func loop: add.w 1, R11 sub.w 1, R10 jnz loop mov.w @SP+, PC + +func: + add.w 1, R9 + mov.w @SP+, PC diff --git a/miasm2/arch/arm/arch.py b/miasm2/arch/arm/arch.py index 539b4778..a70718d9 100644 --- a/miasm2/arch/arm/arch.py +++ b/miasm2/arch/arm/arch.py @@ -363,9 +363,9 @@ class instruction_arm(instruction): if not isinstance(e, ExprInt): return if self.name == 'BLX': - ad = e.arg + 8 + self.offset + ad = e.arg + self.offset else: - ad = e.arg + 8 + self.offset + ad = e.arg + self.offset l = symbol_pool.getby_offset_create(ad) s = ExprId(l, e.size) self.args[0] = s @@ -406,8 +406,7 @@ class instruction_arm(instruction): if not isinstance(e, ExprInt): log.debug('dyn dst %r', e) return - # Can't find the +4 reason in doc - off = e.arg - (self.offset + 4 + self.l) + off = e.arg - self.offset if int(off % 4): raise ValueError('strange offset! %r' % off) self.args[0] = ExprInt32(off) @@ -438,9 +437,9 @@ class instruction_armt(instruction_arm): if not isinstance(e, ExprInt): return if self.name == 'BLX': - ad = e.arg + 4 + (self.offset & 0xfffffffc) + ad = e.arg + (self.offset & 0xfffffffc) else: - ad = e.arg + 4 + self.offset + ad = e.arg + self.offset l = symbol_pool.getby_offset_create(ad) s = ExprId(l, e.size) if self.name in ["CBZ", "CBNZ"]: @@ -480,7 +479,7 @@ class instruction_armt(instruction_arm): # The first +2 is to compensate instruction len, but strangely, 32 bits # thumb2 instructions len is 2... For the second +2, didn't find it in # the doc. - off = e.arg - (self.offset + 2 + 2) + off = e.arg - self.offset if int(off % 2): raise ValueError('strange offset! %r' % off) self.args[0] = ExprInt32(off) @@ -787,12 +786,17 @@ class arm_offs(arm_imm): return ExprInt_fromsize(self.intsize, v) def decodeval(self, v): - return v << 2 + v <<= 2 + # Add pipeline offset + v += 8 + return v def encodeval(self, v): - if v%4 == 0: - return v >> 2 - return False + if v%4 != 0: + return False + # Remove pipeline offset + v -= 8 + return v >> 2 def decode(self, v): v = v & self.lmask @@ -1202,13 +1206,16 @@ class arm_offs_blx(arm_imm): v = v & self.lmask v = (v << 2) + (self.parent.lowb.value << 1) v = sign_ext(v, 26, 32) + # Add pipeline offset + v += 8 self.expr = ExprInt32(v) return True def encode(self): if not isinstance(self.expr, ExprInt): return False - v = self.expr.arg.arg + # Remove pipeline offset + v = int(self.expr.arg - 8) if v & 0x80000000: v &= (1 << 26) - 1 self.parent.lowb.value = (v >> 1) & 1 @@ -1635,28 +1642,17 @@ class arm_offsp(arm_offpc): class arm_offspc(arm_offs): def decodeval(self, v): - return v << 1 - - def encodeval(self, v): - return v >> 1 - - -class arm_offspchl(arm_offs): - - def decodeval(self, v): - if self.parent.hl.value == 0: - return v << 12 - else: - return v << 1 + v = v << 1 + # Add pipeline offset + v += 2 + 2 + return v def encodeval(self, v): - if v > (1 << 12): - self.parent.hl.value = 0 - v >>= 12 - else: - self.parent.hl.value = 1 - v >>= 1 - return v + # Remove pipeline offset + v -= 2 + 2 + if v % 2 == 0: + return v >> 1 + return False class arm_off8sppc(arm_imm): @@ -1907,7 +1903,6 @@ rbl_wb = bs(l=3, cls=(armt_reg_wb,), fname='rb') offs8 = bs(l=8, cls=(arm_offspc,), fname="offs") offs11 = bs(l=11, cls=(arm_offspc,), fname="offs") -offs11hl = bs(l=11, cls=(arm_offspchl,), fname="offs") hl = bs(l=1, prio=default_prio + 1, fname='hl') off8sppc = bs(l=8, cls=(arm_off8sppc,), fname="off") diff --git a/miasm2/arch/mips32/arch.py b/miasm2/arch/mips32/arch.py index 60b0f5d2..12f4ff8e 100644 --- a/miasm2/arch/mips32/arch.py +++ b/miasm2/arch/mips32/arch.py @@ -179,7 +179,7 @@ class instruction_mips32(cpu.instruction): raise ValueError('symbol not resolved %s' % self.l) if not isinstance(e, ExprInt): return - off = e.arg - (self.offset + self.l) + off = e.arg - self.offset print "diff", e, hex(self.offset) print hex(off) if int(off % 4): @@ -327,13 +327,15 @@ class mips32_soff_noarg(mips32_imm): v = v & self.lmask v <<= 2 v = cpu.sign_ext(v, 16+2, 32) - self.expr = ExprInt32(v) + # Add pipeline offset + self.expr = ExprInt32(v + 4) return True def encode(self): if not isinstance(self.expr, ExprInt): return False - v = self.expr.arg.arg + # Remove pipeline offset + v = int(self.expr.arg - 4) if v & 0x80000000: nv = v & ((1 << 16+2) - 1) assert( v == cpu.sign_ext(nv, 16+2, 32)) diff --git a/miasm2/arch/msp430/arch.py b/miasm2/arch/msp430/arch.py index 07a11ae8..2cac7260 100644 --- a/miasm2/arch/msp430/arch.py +++ b/miasm2/arch/msp430/arch.py @@ -144,7 +144,7 @@ class instruction_msp430(instruction): if self.name == "call": ad = e.arg else: - ad = e.arg + int(self.offset) + self.l + ad = e.arg + int(self.offset) l = symbol_pool.getby_offset_create(ad) s = ExprId(l, e.size) @@ -188,7 +188,11 @@ class instruction_msp430(instruction): # raise ValueError('dst must be int or label') log.warning('dynamic dst %r', e) return - self.args[0] = ExprInt_fromsize(16, (e.arg - (self.offset + self.l))/2) + + # Call argument is an absolute offset + # Other offsets are relative to instruction offset + if self.name != "call": + self.args[0] = ExprInt_fromsize(16, e.arg - self.offset) def get_info(self, c): pass @@ -522,9 +526,16 @@ class msp430_offs(imm_noarg, m_arg): return ExprInt_fromsize(16, v) def decodeval(self, v): - return v << 1 + v <<= 1 + v += self.parent.l + return v def encodeval(self, v): + plen = self.parent.l + self.l + assert(plen % 8 == 0) + v -= plen / 8 + if v % 2 != 0: + return False return v >> 1 def decode(self, v): @@ -574,8 +585,8 @@ bs_f2_nobw = bs_name(l=3, name={'swpb': 1, 'sxt': 3, 'call': 5}) addop("f2_2", [bs('000100'), bs_f2_nobw, bs('0'), a_s, sreg, off_s]) - -offimm = bs(l=10, cls=(msp430_offs,), fname="offs") +# Offset must be decoded in last position to have final instruction len +offimm = bs(l=10, cls=(msp430_offs,), fname="offs", order=-1) bs_f2_jcc = bs_name(l=3, name={'jnz': 0, 'jz': 1, 'jnc': 2, 'jc': 3, 'jn': 4, 'jge': 5, 'jl': 6, 'jmp': 7}) diff --git a/miasm2/arch/msp430/ira.py b/miasm2/arch/msp430/ira.py index ea8bdc2c..26a53a1e 100644 --- a/miasm2/arch/msp430/ira.py +++ b/miasm2/arch/msp430/ira.py @@ -65,7 +65,7 @@ class ir_a_msp430(ir_a_msp430_base): lbl = bloc.get_next() new_lbl = self.gen_label() irs = self.call_effects(pc_val) - irs.append([ExprAff(IRDst, ExprId(lbl, size=self.pc.size))]) + irs.append([ExprAff(self.IRDst, ExprId(lbl, size=self.pc.size))]) nbloc = irbloc(new_lbl, irs) nbloc.lines = [l] self.blocs[new_lbl] = nbloc diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py index 3b714f79..f168d9cb 100644 --- a/miasm2/arch/x86/arch.py +++ b/miasm2/arch/x86/arch.py @@ -491,7 +491,7 @@ class instruction_x86(instruction): if not isinstance(e.name, asm_label) and e not in all_regs_ids: raise ValueError("ExprId must be a label or a register") elif isinstance(e, ExprInt): - ad = e.arg + int(self.offset) + self.l + ad = e.arg + int(self.offset) l = symbol_pool.getby_offset_create(ad) s = ExprId(l, e.size) self.args[0] = s @@ -558,7 +558,7 @@ class instruction_x86(instruction): return # return ExprInt32(e.arg - (self.offset + self.l)) self.args[0] = ExprInt_fromsize( - self.mode, e.arg - (self.offset + self.l)) + self.mode, e.arg - self.offset) def get_info(self, c): self.additional_info.g1.value = c.g1.value @@ -846,9 +846,9 @@ class mn_x86(cls_mn): def post_asm(self, v): return v - def encodefields(self, decoded): - v = super(mn_x86, self).encodefields(decoded) + def gen_prefix(self): + v = "" rex = 0x40 if self.g1.value is None: self.g1.value = 0 @@ -890,9 +890,15 @@ class mn_x86(cls_mn): if hasattr(self, 'no_xmm_pref'): return None v = "\x66" + v - return v + def encodefields(self, decoded): + v = super(mn_x86, self).encodefields(decoded) + prefix = self.gen_prefix() + if prefix is None: + return None + return prefix + v + def getnextflow(self, symbol_pool): raise NotImplementedError('not fully functional') @@ -2834,19 +2840,54 @@ class bs_rel_off(bs_cond_imm): # m_arg): # else: # self.l = 32 l = offsize(self.parent) + prefix = self.parent.gen_prefix() + parent_len = len(prefix) * 8 + self.parent.l + self.l + assert(parent_len % 8 == 0) - # l = self.parent.v_opmode()#self.parent.args[0].expr.size - # print 'imm enc', l, self.parent.rex_w.value + v = int(self.expr.arg - parent_len/8) + if prefix is None: + raise StopIteration + mask = ((1 << self.l) - 1) + if self.l > l: + raise StopIteration + if v != sign_ext(v & mask, self.l, l): + raise StopIteration + self.value = swap_uint(self.l, v & ((1 << self.l) - 1)) + yield True + + def decode(self, v): + v = swap_uint(self.l, v) + size = offsize(self.parent) + v = sign_ext(v, self.l, size) + v += self.parent.l + v = ExprInt_fromsize(size, v) + self.expr = v + return True + +class bs_s08(bs_rel_off): + parser = int_or_expr + + @classmethod + def flen(cls, mode, v): + return 8 + + def encode(self): + if not isinstance(self.expr, ExprInt): + raise StopIteration + arg0_expr = self.parent.args[0].expr + if self.l != 0: + l = self.l + else: + l = self.parent.v_opmode() + self.l = l + l = offsize(self.parent) v = int(self.expr.arg) mask = ((1 << self.l) - 1) - # print 'ext', self.l, l, hex(v), hex(sign_ext(v & ((1< l: raise StopIteration if v != sign_ext(v & mask, self.l, l): raise StopIteration self.value = swap_uint(self.l, v & ((1 << self.l) - 1)) - # print hex(self.value) yield True def decode(self, v): @@ -2855,7 +2896,6 @@ class bs_rel_off(bs_cond_imm): # m_arg): v = sign_ext(v, self.l, size) v = ExprInt_fromsize(size, v) self.expr = v - # print self.expr, repr(self.expr) return True @@ -3002,7 +3042,6 @@ class bs_msegoff(m_arg): except StopIteration: return None, None e = v[0] - print "XXX", e if e is None: log.debug('cannot fromstring int %r', s) return None, None @@ -3075,6 +3114,7 @@ sib_base = bs(l=3, cls=(bs_cond_index,), fname = "sib_base") disp = bs(l=0, cls=(bs_cond_disp,), fname = "disp") +s08 = bs(l=8, cls=(bs_s08, )) u08 = bs(l=8, cls=(x86_08, m_arg)) u07 = bs(l=7, cls=(x86_08, m_arg)) @@ -3130,8 +3170,10 @@ d_ss = bs(l=0, cls=(bs_ss, ), fname='ss') d_fs = bs(l=0, cls=(bs_fs, ), fname='fs') d_gs = bs(l=0, cls=(bs_gs, ), fname='gs') -rel_off = bs(l=0, cls=(bs_rel_off,), fname="off") -rel_off08 = bs(l=8, cls=(bs_rel_off08,), fname="off") +# Offset must be decoded in last position to have final instruction len +rel_off = bs(l=0, cls=(bs_rel_off,), fname="off", order=-1) +# Offset must be decoded in last position to have final instruction len +rel_off08 = bs(l=8, cls=(bs_rel_off08,), fname="off", order=-1) moff = bs(l=0, cls=(bs_moff,), fname="off") msegoff = bs(l=16, cls=(bs_msegoff,), fname="mseg") movoff = bs(l=0, cls=(bs_movoff,), fname="off") @@ -3737,7 +3779,7 @@ addop("prefetchnta", [bs8(0x0f), bs8(0x18)] + rmmod(d0, rm_arg_m08)) addop("push", [bs8(0xff), stk] + rmmod(d6)) addop("push", [bs("01010"), stk, reg]) -addop("push", [bs8(0x6a), rel_off08, stk]) +addop("push", [bs8(0x6a), s08, stk]) addop("push", [bs8(0x68), d_imm, stk]) addop("push", [bs8(0x0e), stk, d_cs]) addop("push", [bs8(0x16), stk, d_ss]) diff --git a/miasm2/core/cpu.py b/miasm2/core/cpu.py index ed124462..efb511ce 100644 --- a/miasm2/core/cpu.py +++ b/miasm2/core/cpu.py @@ -1151,6 +1151,7 @@ class cls_mn(object): if not getok: continue + c.l = prefix_len + total_l / 8 for i in c.to_decode: f = c.fields_order[i] if f.is_present: @@ -1164,7 +1165,6 @@ class cls_mn(object): for a in c.args: a.expr = expr_simp(a.expr) - c.l = prefix_len + total_l / 8 c.b = cls.getbytes(bs, offset_o, c.l) c.offset = offset_o c = c.post_dis() @@ -1335,14 +1335,14 @@ class cls_mn(object): return o def value(self, mode): - todo = [(0, [(x, self.fields_order[x]) for x in self.to_decode[::-1]])] + todo = [(0, 0, [(x, self.fields_order[x]) for x in self.to_decode[::-1]])] result = [] done = [] cpt = 0 while todo: - index, to_decode = todo.pop() + index, cur_len, to_decode = todo.pop() # TEST XXX for i, f in to_decode: setattr(self, f.fname, f) @@ -1353,11 +1353,14 @@ class cls_mn(object): cpt += 1 can_encode = True for i, f in to_decode[index:]: + f.parent.l = cur_len ret = f.encode() if not ret: log.debug('cannot encode %r', f) can_encode = False break + if f.value is not None and f.l: + cur_len += f.l index += 1 if ret is True: continue @@ -1366,14 +1369,14 @@ class cls_mn(object): for i in ret: gcpt += 1 o = [] - if ((index, [xx[1].value for xx in to_decode]) in todo or - (index, [xx[1].value for xx in to_decode]) in done): + if ((index, cur_len, [xx[1].value for xx in to_decode]) in todo or + (index, cur_len, [xx[1].value for xx in to_decode]) in done): raise NotImplementedError('not fully functional') for p, f in to_decode: fnew = f.clone() o.append((p, fnew)) - todo.append((index, o)) + todo.append((index, cur_len, o)) can_encode = False break diff --git a/test/arch/arm/arch.py b/test/arch/arm/arch.py index 19b1236e..701c45af 100644 --- a/test/arch/arm/arch.py +++ b/test/arch/arm/arch.py @@ -66,13 +66,13 @@ reg_tests_arm = [ "002094e0"), ("0003EA9C MVN R7, R2", "0270e0e1"), - ("C00CD4DC BL 0x7C", + ("C00CD4DC BL 0x84", "1F0000EB"), - ("C00CF110 BL 0xFFFFFDEC", + ("C00CF110 BL 0xFFFFFDF4", "7BFFFFEB"), - ("000829b0 BLNE 0xFFF87110", + ("000829b0 BLNE 0xFFF87118", "441cfe1b"), ("C00EC608 TEQ R4, R5", @@ -205,7 +205,7 @@ reg_tests_arm = [ ("C00CFA40 BLX R12", "3CFF2FE1"), - ("C010DE1C BLX 0x1ECCEA", + ("C010DE1C BLX 0x1ECCF2", "3AB307FB"), ("00013028 MOV R9, 0x6E75", @@ -217,7 +217,7 @@ reg_tests_arm = [ ("0004A38C CLZ R3, R2", "123F6FE1"), - ("C0132564 BLX 0xFFFCF06C", + ("C0132564 BLX 0xFFFCF074", "1B3CFFFA"), ("C0297028 QADD R7, R6, R6", @@ -414,15 +414,15 @@ reg_tests_armt = [ ("000a1c16 STMIA R6!, {R0-R3}", "0fc6"), - ("0006af78 BEQ 0x6", + ("0006af78 BEQ 0xA", "03d0"), - ("000747b4 BCC 0xFFFFFFE6", + ("000747b4 BCC 0xFFFFFFEA", "f3d3"), # swi - ("0007479c B 0xE", + ("0007479c B 0x12", "07e0"), - ("0006b946 B 0xFFFFFFE4", + ("0006b946 B 0xFFFFFFE8", "f2e7"), ("C010163C BLX 0x1F916C", "F9F1B6E8"), diff --git a/test/arch/mips32/arch.py b/test/arch/mips32/arch.py index 899748ce..0fb12e1b 100644 --- a/test/arch/mips32/arch.py +++ b/test/arch/mips32/arch.py @@ -46,9 +46,9 @@ reg_tests_mips32 = [ "44097000"), ("00400324 MOV.D F0, F12", "46206006"), - ("00400334 BNE A0, ZERO, 0x28", + ("00400334 BNE A0, ZERO, 0x2C", "1480000A"), - ("00400360 B 0x338", + ("00400360 B 0x33C", "100000CE"), ("00400378 LW T9, 0xFFFF9C90(GP)", "8F999C90"), @@ -58,11 +58,11 @@ reg_tests_mips32 = [ "30420002"), ("00400364 ADD.D F0, F0, F14", "462E0000"), - ("004003A4 BEQ S0, V0, 0x120", + ("004003A4 BEQ S0, V0, 0x124", "12020048"), ("004003A8 SLTI V0, S0, 0x3", "2A020003"), - ("004005A4 BGEZ T3, 0x20", + ("004005A4 BGEZ T3, 0x24", "05610008"), ("00400428 LWC1 F0, 0x4344(V0)", "C4404344"), @@ -80,13 +80,13 @@ reg_tests_mips32 = [ "92228880"), ("004001C4 SB V0, 0xFFFF8880(S1)", "A2228880"), - ("00400274 BAL 0x4", + ("00400274 BAL 0x8", "04110001"), ("0040073C C.LT.D FCC0, F0, F12", "462C003C"), - ("00400744 BC1F FCC0, 0x20", + ("00400744 BC1F FCC0, 0x24", "45000008"), - ("00403A80 BC1T FCC0, 0xB4", + ("00403A80 BC1T FCC0, 0xB8", "4501002D"), ("00400764 MUL.D F12, F0, F0", "46200302"), @@ -104,11 +104,11 @@ reg_tests_mips32 = [ "00431004"), ("00400F60 SRAV V1, S3, V0", "00531807"), - ("00401040 BLTZ S6, 0x58", + ("00401040 BLTZ S6, 0x5C", "06C00016"), - ("00400D18 BLEZ V1, 0x7C", + ("00400D18 BLEZ V1, 0x80", "1860001F"), - ("00401200 BGTZ S4, 0x10", + ("00401200 BGTZ S4, 0x14", "1E800004"), ("004014A4 CVT.D.W F8, F0", "46800221"), diff --git a/test/arch/msp430/arch.py b/test/arch/msp430/arch.py index f3e82955..613af385 100644 --- a/test/arch/msp430/arch.py +++ b/test/arch/msp430/arch.py @@ -72,9 +72,9 @@ reg_tests_msp = [ ("443a call 0x4B66", "b012664b"), - ("4442 jmp 0xFFFA", + ("4442 jmp 0xFFFC", "fd3f"), - ("4422 jnz 0xFFF2", + ("4422 jnz 0xFFF4", "f923"), ("xxxx mov.b @R13+, 0x0(R14)", diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py index 884d545b..9d4e464d 100644 --- a/test/arch/x86/arch.py +++ b/test/arch/x86/arch.py @@ -232,10 +232,10 @@ reg_tests = [ "0fba2842"), - (m32, "00000000 CALL 0x112233", - "e833221100"), - (m64, "00000000 CALL 0x112233", - "e833221100"), + (m32, "00000000 CALL 0x112235", + "e830221100"), + (m64, "00000000 CALL 0x112235", + "e830221100"), (m32, "00000000 CALL DWORD PTR [EAX]", "ff10"), (m64, "00000000 CALL QWORD PTR [RAX]", @@ -761,11 +761,11 @@ reg_tests = [ "48CF"), (m32, "00000000 JA 0x12", - "7712"), + "7710"), (m32, "00000000 JA 0xFFFFFFEE", - "77EE"), + "77EC"), (m64, "00000000 JA 0xFFFFFFFFFFFFFFEE", - "77EE"), + "77EC"), #(m32, "00000000 JA 0xFFEE", # "6677EE"), @@ -774,19 +774,19 @@ reg_tests = [ (m16, "00000000 JCXZ 0xFFEE", - "E3EE"), + "E3EC"), (m16, "00000000 JECXZ 0xFFEE", - "67E3EE"), + "67E3EB"), (m32, "00000000 JECXZ 0xFFFFFFEE", - "E3EE"), + "E3EC"), (m32, "00000000 JCXZ 0xFFFFFFEE", - "67E3EE"), + "67E3EB"), (m32, "00000000 JCXZ 0xFFEE", - "6667E3EE"), + "6667E3EA"), (m64, "00000000 JRCXZ 0xFFFFFFFFFFFFFFEE", - "E3EE"), + "E3EC"), (m64, "00000000 JECXZ 0xFFFFFFFFFFFFFFEE", - "67E3EE"), + "67E3EB"), (m32, "00000000 MOV BYTE PTR [EAX], AL", @@ -1161,6 +1161,9 @@ reg_tests = [ (m64, "00000000 PUSH 0x11223344", "6844332211"), + (m32, "00000000 PUSH 0xFFFFFF80", + "6a80"), + (m32, "00000000 PUSH CS", "0e"), (m32, "00000000 PUSH SS", -- cgit 1.4.1