diff options
| author | Camille Mougey <commial@gmail.com> | 2015-04-02 16:10:11 +0200 |
|---|---|---|
| committer | Camille Mougey <commial@gmail.com> | 2015-04-02 16:10:11 +0200 |
| commit | 55c00f729101259e2706a171a5bff4106bb7efdd (patch) | |
| tree | 6475c9e483c7d2115d26fc0c81a724c5ade93494 | |
| parent | c16ed5171a455535d2e4ec9eaccd50b5c3d1b440 (diff) | |
| parent | 945f985aba4d957241899e56c26211a88977eca0 (diff) | |
| download | miasm-55c00f729101259e2706a171a5bff4106bb7efdd.tar.gz miasm-55c00f729101259e2706a171a5bff4106bb7efdd.zip | |
Merge pull request #137 from serpilliere/clean_group_bloc
Clean asmbloc
| -rw-r--r-- | example/asm/shellcode.py | 21 | ||||
| -rw-r--r-- | example/asm/simple.py | 4 | ||||
| -rw-r--r-- | example/expression/asm_to_ir.py | 9 | ||||
| -rw-r--r-- | example/expression/solve_condition_stp.py | 11 | ||||
| -rw-r--r-- | example/samples/armt.S | 2 | ||||
| -rw-r--r-- | example/samples/msp430.S | 5 | ||||
| -rw-r--r-- | miasm2/arch/arm/arch.py | 72 | ||||
| -rw-r--r-- | miasm2/arch/mips32/arch.py | 30 | ||||
| -rw-r--r-- | miasm2/arch/msp430/arch.py | 25 | ||||
| -rw-r--r-- | miasm2/arch/msp430/ira.py | 2 | ||||
| -rw-r--r-- | miasm2/arch/sh4/arch.py | 12 | ||||
| -rw-r--r-- | miasm2/arch/x86/arch.py | 82 | ||||
| -rw-r--r-- | miasm2/arch/x86/regs.py | 7 | ||||
| -rw-r--r-- | miasm2/core/asmbloc.py | 787 | ||||
| -rw-r--r-- | miasm2/core/cpu.py | 29 | ||||
| -rw-r--r-- | miasm2/core/interval.py | 26 | ||||
| -rw-r--r-- | miasm2/core/parse_asm.py | 45 | ||||
| -rw-r--r-- | test/arch/arm/arch.py | 18 | ||||
| -rw-r--r-- | test/arch/mips32/arch.py | 20 | ||||
| -rw-r--r-- | test/arch/msp430/arch.py | 4 | ||||
| -rw-r--r-- | test/arch/x86/arch.py | 31 | ||||
| -rw-r--r-- | test/arch/x86/sem.py | 5 | ||||
| -rw-r--r-- | test/arch/x86/unit/asm_test.py | 15 | ||||
| -rw-r--r-- | test/core/interval.py | 1 |
24 files changed, 660 insertions, 603 deletions
diff --git a/example/asm/shellcode.py b/example/asm/shellcode.py index 89914b6d..3f3aa877 100644 --- a/example/asm/shellcode.py +++ b/example/asm/shellcode.py @@ -9,6 +9,7 @@ from miasm2.core.cpu import parse_ast from miasm2.core import parse_asm, asmbloc import miasm2.expression.expression as m2_expr from miasm2.analysis.machine import Machine +from miasm2.core.interval import interval parser = ArgumentParser("Multi-arch (32 bits) assembler") parser.add_argument('architecture', help="architecture: " + \ @@ -34,6 +35,7 @@ except ValueError: size = 32 reg_and_id = dict(machine.mn.regs.all_regs_ids_byname) base_expr = machine.base_expr +dst_interval = None # Output format if args.PE: @@ -50,7 +52,8 @@ if args.PE: addr_main = pe.rva2virt(s_text.addr) virt = pe.virt output = pe - + dst_interval = interval([(pe.rva2virt(s_text.addr), + pe.rva2virt(s_text.addr + s_text.size))]) else: st = StrPatchwork() @@ -58,15 +61,6 @@ else: virt = st output = st -# Fix the AST parser -def my_ast_int2expr(a): - return m2_expr.ExprInt_fromsize(size, a) - -def my_ast_id2expr(t): - return reg_and_id.get(t, m2_expr.ExprId(t, size=size)) - -my_var_parser = parse_ast(my_ast_id2expr, my_ast_int2expr) -base_expr.setParseAction(my_var_parser) # Get and parse the source code with open(args.source) as fstream: @@ -88,9 +82,10 @@ graph = asmbloc.bloc2graph(blocs[0]) open("graph.txt", "w").write(graph) # Apply patches -resolved_b, patches = asmbloc.asm_resolve_final(machine.mn, - blocs[0], - symbol_pool) +patches = asmbloc.asm_resolve_final(machine.mn, + blocs[0], + symbol_pool, + dst_interval) if args.encrypt: # Encrypt code ad_start = symbol_pool.getby_name_create(args.encrypt[0]).offset diff --git a/example/asm/simple.py b/example/asm/simple.py index 8664672d..1929961f 100644 --- a/example/asm/simple.py +++ b/example/asm/simple.py @@ -27,10 +27,10 @@ loop: symbol_pool.set_offset(symbol_pool.getby_name("main"), 0x0) # Spread information and resolve instructions offset -resolved_b, patches = asmbloc.asm_resolve_final(mn_x86, blocs[0], symbol_pool) +patches = asmbloc.asm_resolve_final(mn_x86, blocs[0], symbol_pool) # Show resolved blocs -for bloc in resolved_b: +for bloc in blocs[0]: print bloc # Print offset -> bytes diff --git a/example/expression/asm_to_ir.py b/example/expression/asm_to_ir.py index cec32c06..942e5e19 100644 --- a/example/expression/asm_to_ir.py +++ b/example/expression/asm_to_ir.py @@ -7,13 +7,6 @@ from miasm2.arch.x86.ira import ir_a_x86_32 from pdb import pm -def my_ast_int2expr(a): - return ExprInt32(a) - -my_var_parser = parse_ast(ast_id2expr, my_ast_int2expr) -base_expr.setParseAction(my_var_parser) - - # First, asm code blocs, symbol_pool = parse_asm.parse_txt(mn_x86, 32, ''' main: @@ -39,7 +32,7 @@ for b in blocs: print "symbols:" print symbol_pool -resolved_b, patches = asmbloc.asm_resolve_final(mn_x86, blocs, symbol_pool) +patches = asmbloc.asm_resolve_final(mn_x86, blocs, symbol_pool) # Translate to IR ir_arch = ir_a_x86_32(symbol_pool) diff --git a/example/expression/solve_condition_stp.py b/example/expression/solve_condition_stp.py index 659124d1..a25a7072 100644 --- a/example/expression/solve_condition_stp.py +++ b/example/expression/solve_condition_stp.py @@ -138,12 +138,13 @@ if __name__ == '__main__': def my_ast_int2expr(a): return ExprInt32(a) - def my_ast_id2expr(t): - if t in reg_and_id: - r = reg_and_id[t] + # Modifify parser to avoid label creation in PUSH argc + def my_ast_id2expr(string_parsed): + if string_parsed in reg_and_id: + return reg_and_id[string_parsed] else: - r = ExprId(t, size=32) - return r + return ExprId(string_parsed, size=32) + my_var_parser = parse_ast(my_ast_id2expr, my_ast_int2expr) base_expr.setParseAction(my_var_parser) diff --git a/example/samples/armt.S b/example/samples/armt.S index c50075a6..c833c961 100644 --- a/example/samples/armt.S +++ b/example/samples/armt.S @@ -15,7 +15,7 @@ main: PUSH {LR} SUB SP, 0x100 MOV R0, SP - ADD R1, PC, mystr-$+6 + ADD R1, PC, mystr-$ MOV R0, R0 EORS R2, R2 ADDS R2, R2, 0x4 diff --git a/example/samples/msp430.S b/example/samples/msp430.S index 77f4b448..5f4beb91 100644 --- a/example/samples/msp430.S +++ b/example/samples/msp430.S @@ -1,8 +1,13 @@ main: mov.w 0x10, R10 mov.w 0x0, R11 + call func loop: add.w 1, R11 sub.w 1, R10 jnz loop mov.w @SP+, PC + +func: + add.w 1, R9 + mov.w @SP+, PC diff --git a/miasm2/arch/arm/arch.py b/miasm2/arch/arm/arch.py index 740fd1df..a70718d9 100644 --- a/miasm2/arch/arm/arch.py +++ b/miasm2/arch/arm/arch.py @@ -9,6 +9,7 @@ from collections import defaultdict from miasm2.core.bin_stream import bin_stream import miasm2.arch.arm.regs as regs_module from miasm2.arch.arm.regs import * +from miasm2.core.asmbloc import asm_label # A1 encoding @@ -167,7 +168,7 @@ int_or_expr = base_expr def ast_id2expr(t): if not t in mn_arm.regs.all_regs_ids_byname: - r = ExprId(t) + r = ExprId(asm_label(t)) else: r = mn_arm.regs.all_regs_ids_byname[t] return r @@ -362,9 +363,9 @@ class instruction_arm(instruction): if not isinstance(e, ExprInt): return if self.name == 'BLX': - ad = e.arg + 8 + self.offset + ad = e.arg + self.offset else: - ad = e.arg + 8 + self.offset + ad = e.arg + self.offset l = symbol_pool.getby_offset_create(ad) s = ExprId(l, e.size) self.args[0] = s @@ -405,8 +406,7 @@ class instruction_arm(instruction): if not isinstance(e, ExprInt): log.debug('dyn dst %r', e) return - # Can't find the +4 reason in doc - off = e.arg - (self.offset + 4 + self.l) + off = e.arg - self.offset if int(off % 4): raise ValueError('strange offset! %r' % off) self.args[0] = ExprInt32(off) @@ -415,9 +415,9 @@ class instruction_arm(instruction): args = [a for a in self.args] return args - def get_asm_offset(self, x): + def get_asm_offset(self, expr): # LDR XXX, [PC, offset] => PC is self.offset+8 - return ExprInt_from(x, self.offset+8) + return ExprInt_from(expr, self.offset+8) class instruction_armt(instruction_arm): @@ -437,9 +437,9 @@ class instruction_armt(instruction_arm): if not isinstance(e, ExprInt): return if self.name == 'BLX': - ad = e.arg + 4 + (self.offset & 0xfffffffc) + ad = e.arg + (self.offset & 0xfffffffc) else: - ad = e.arg + 4 + self.offset + ad = e.arg + self.offset l = symbol_pool.getby_offset_create(ad) s = ExprId(l, e.size) if self.name in ["CBZ", "CBNZ"]: @@ -479,11 +479,15 @@ class instruction_armt(instruction_arm): # The first +2 is to compensate instruction len, but strangely, 32 bits # thumb2 instructions len is 2... For the second +2, didn't find it in # the doc. - off = e.arg - (self.offset + 2 + 2) + off = e.arg - self.offset if int(off % 2): raise ValueError('strange offset! %r' % off) self.args[0] = ExprInt32(off) + def get_asm_offset(self, expr): + # ADR XXX, PC, imm => PC is 4 aligned + imm + new_offset = ((self.offset+self.l)/4)*4 + return ExprInt_from(expr, new_offset) class mn_arm(cls_mn): @@ -500,6 +504,7 @@ class mn_arm(cls_mn): sp = {'l':SP, 'b':SP} instruction = instruction_arm max_instruction_len = 4 + alignment = 4 @classmethod def getpc(cls, attrib = None): @@ -598,7 +603,8 @@ class mn_armt(cls_mn): pc = PC sp = SP instruction = instruction_armt - max_instruction_len = 8 + max_instruction_len = 4 + alignment = 4 @classmethod def getpc(cls, attrib = None): @@ -780,9 +786,16 @@ class arm_offs(arm_imm): return ExprInt_fromsize(self.intsize, v) def decodeval(self, v): - return v << 2 + v <<= 2 + # Add pipeline offset + v += 8 + return v def encodeval(self, v): + if v%4 != 0: + return False + # Remove pipeline offset + v -= 8 return v >> 2 def decode(self, v): @@ -1193,13 +1206,16 @@ class arm_offs_blx(arm_imm): v = v & self.lmask v = (v << 2) + (self.parent.lowb.value << 1) v = sign_ext(v, 26, 32) + # Add pipeline offset + v += 8 self.expr = ExprInt32(v) return True def encode(self): if not isinstance(self.expr, ExprInt): return False - v = self.expr.arg.arg + # Remove pipeline offset + v = int(self.expr.arg - 8) if v & 0x80000000: v &= (1 << 26) - 1 self.parent.lowb.value = (v >> 1) & 1 @@ -1626,28 +1642,17 @@ class arm_offsp(arm_offpc): class arm_offspc(arm_offs): def decodeval(self, v): - return v << 1 - - def encodeval(self, v): - return v >> 1 - - -class arm_offspchl(arm_offs): - - def decodeval(self, v): - if self.parent.hl.value == 0: - return v << 12 - else: - return v << 1 + v = v << 1 + # Add pipeline offset + v += 2 + 2 + return v def encodeval(self, v): - if v > (1 << 12): - self.parent.hl.value = 0 - v >>= 12 - else: - self.parent.hl.value = 1 - v >>= 1 - return v + # Remove pipeline offset + v -= 2 + 2 + if v % 2 == 0: + return v >> 1 + return False class arm_off8sppc(arm_imm): @@ -1898,7 +1903,6 @@ rbl_wb = bs(l=3, cls=(armt_reg_wb,), fname='rb') offs8 = bs(l=8, cls=(arm_offspc,), fname="offs") offs11 = bs(l=11, cls=(arm_offspc,), fname="offs") -offs11hl = bs(l=11, cls=(arm_offspchl,), fname="offs") hl = bs(l=1, prio=default_prio + 1, fname='hl') off8sppc = bs(l=8, cls=(arm_off8sppc,), fname="off") diff --git a/miasm2/arch/mips32/arch.py b/miasm2/arch/mips32/arch.py index 50dc1c86..12f4ff8e 100644 --- a/miasm2/arch/mips32/arch.py +++ b/miasm2/arch/mips32/arch.py @@ -10,7 +10,7 @@ from miasm2.expression.expression import ExprMem, ExprInt, ExprInt32, ExprId from miasm2.core.bin_stream import bin_stream import miasm2.arch.mips32.regs as regs import miasm2.core.cpu as cpu - +from miasm2.core.asmbloc import asm_label log = logging.getLogger("mips32dis") console_handler = logging.StreamHandler() @@ -48,6 +48,26 @@ deref_nooff = Group(LPARENTHESIS + gpregs.parser + \ deref = deref_off | deref_nooff +variable, operand, base_expr = cpu.gen_base_expr() + +int_or_expr = base_expr + + +def ast_id2expr(t): + if not t in mn_mips32.regs.all_regs_ids_byname: + r = ExprId(asm_label(t)) + else: + r = mn_mips32.regs.all_regs_ids_byname[t] + return r + + +def ast_int2expr(a): + return ExprInt32(a) + + +my_var_parser = cpu.parse_ast(ast_id2expr, ast_int2expr) +base_expr.setParseAction(my_var_parser) + class additional_info: def __init__(self): self.except_on_instr = False @@ -159,7 +179,7 @@ class instruction_mips32(cpu.instruction): raise ValueError('symbol not resolved %s' % self.l) if not isinstance(e, ExprInt): return - off = e.arg - (self.offset + self.l) + off = e.arg - self.offset print "diff", e, hex(self.offset) print hex(off) if int(off % 4): @@ -307,13 +327,15 @@ class mips32_soff_noarg(mips32_imm): v = v & self.lmask v <<= 2 v = cpu.sign_ext(v, 16+2, 32) - self.expr = ExprInt32(v) + # Add pipeline offset + self.expr = ExprInt32(v + 4) return True def encode(self): if not isinstance(self.expr, ExprInt): return False - v = self.expr.arg.arg + # Remove pipeline offset + v = int(self.expr.arg - 4) if v & 0x80000000: nv = v & ((1 << 16+2) - 1) assert( v == cpu.sign_ext(nv, 16+2, 32)) diff --git a/miasm2/arch/msp430/arch.py b/miasm2/arch/msp430/arch.py index 2f543843..2cac7260 100644 --- a/miasm2/arch/msp430/arch.py +++ b/miasm2/arch/msp430/arch.py @@ -9,6 +9,7 @@ from collections import defaultdict from miasm2.core.bin_stream import bin_stream import miasm2.arch.msp430.regs as regs_module from miasm2.arch.msp430.regs import * +from miasm2.core.asmbloc import asm_label log = logging.getLogger("armdis") console_handler = logging.StreamHandler() @@ -73,7 +74,7 @@ PINC = Suppress("+") def ast_id2expr(t): if not t in mn_msp430.regs.all_regs_ids_byname: - r = ExprId(t, 16) + r = ExprId(asm_label(t), 16) else: r = mn_msp430.regs.all_regs_ids_byname[t] return r @@ -143,7 +144,7 @@ class instruction_msp430(instruction): if self.name == "call": ad = e.arg else: - ad = e.arg + int(self.offset) + self.l + ad = e.arg + int(self.offset) l = symbol_pool.getby_offset_create(ad) s = ExprId(l, e.size) @@ -187,8 +188,11 @@ class instruction_msp430(instruction): # raise ValueError('dst must be int or label') log.warning('dynamic dst %r', e) return - # return ExprInt32(e.arg - (self.offset + self.l)) - self.args[0] = ExprInt_fromsize(16, e.arg - (self.offset + self.l)) + + # Call argument is an absolute offset + # Other offsets are relative to instruction offset + if self.name != "call": + self.args[0] = ExprInt_fromsize(16, e.arg - self.offset) def get_info(self, c): pass @@ -522,9 +526,16 @@ class msp430_offs(imm_noarg, m_arg): return ExprInt_fromsize(16, v) def decodeval(self, v): - return v << 1 + v <<= 1 + v += self.parent.l + return v def encodeval(self, v): + plen = self.parent.l + self.l + assert(plen % 8 == 0) + v -= plen / 8 + if v % 2 != 0: + return False return v >> 1 def decode(self, v): @@ -574,8 +585,8 @@ bs_f2_nobw = bs_name(l=3, name={'swpb': 1, 'sxt': 3, 'call': 5}) addop("f2_2", [bs('000100'), bs_f2_nobw, bs('0'), a_s, sreg, off_s]) - -offimm = bs(l=10, cls=(msp430_offs,), fname="offs") +# Offset must be decoded in last position to have final instruction len +offimm = bs(l=10, cls=(msp430_offs,), fname="offs", order=-1) bs_f2_jcc = bs_name(l=3, name={'jnz': 0, 'jz': 1, 'jnc': 2, 'jc': 3, 'jn': 4, 'jge': 5, 'jl': 6, 'jmp': 7}) diff --git a/miasm2/arch/msp430/ira.py b/miasm2/arch/msp430/ira.py index ea8bdc2c..26a53a1e 100644 --- a/miasm2/arch/msp430/ira.py +++ b/miasm2/arch/msp430/ira.py @@ -65,7 +65,7 @@ class ir_a_msp430(ir_a_msp430_base): lbl = bloc.get_next() new_lbl = self.gen_label() irs = self.call_effects(pc_val) - irs.append([ExprAff(IRDst, ExprId(lbl, size=self.pc.size))]) + irs.append([ExprAff(self.IRDst, ExprId(lbl, size=self.pc.size))]) nbloc = irbloc(new_lbl, irs) nbloc.lines = [l] self.blocs[new_lbl] = nbloc diff --git a/miasm2/arch/sh4/arch.py b/miasm2/arch/sh4/arch.py index a102a37b..7039016c 100644 --- a/miasm2/arch/sh4/arch.py +++ b/miasm2/arch/sh4/arch.py @@ -5,8 +5,9 @@ from pyparsing import * from miasm2.core.cpu import * from miasm2.expression.expression import * from collections import defaultdict +import miasm2.arch.sh4.regs as regs_module from miasm2.arch.sh4.regs import * - +from miasm2.core.asmbloc import asm_label jra = ExprId('jra') jrb = ExprId('jrb') @@ -35,8 +36,12 @@ def parse_pcandimmimm(t): t = t[0] return (t[0] & t[1]) + t[2] -def ast_id2expr(a): - return ExprId(a, 32) +def ast_id2expr(t): + if not t in mn_sh4.regs.all_regs_ids_byname: + r = ExprId(asm_label(t)) + else: + r = mn_sh4.regs.all_regs_ids_byname[t] + return r def ast_int2expr(a): return ExprInt32(a) @@ -465,6 +470,7 @@ class instruction_sh4(instruction): class mn_sh4(cls_mn): bintree = {} + regs = regs_module num = 0 all_mn = [] all_mn_mode = defaultdict(list) diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py index 238567ac..f168d9cb 100644 --- a/miasm2/arch/x86/arch.py +++ b/miasm2/arch/x86/arch.py @@ -9,6 +9,7 @@ from collections import defaultdict import miasm2.arch.x86.regs as regs_module from miasm2.arch.x86.regs import * from miasm2.ir.ir import * +from miasm2.core.asmbloc import asm_label log = logging.getLogger("x86_arch") console_handler = logging.StreamHandler() @@ -225,7 +226,7 @@ variable, operand, base_expr = gen_base_expr() def ast_id2expr(t): if not t in mn_x86.regs.all_regs_ids_byname: - r = ExprId(t) + r = ExprId(asm_label(t)) else: r = mn_x86.regs.all_regs_ids_byname[t] return r @@ -486,12 +487,11 @@ class instruction_x86(instruction): if self.additional_info.g1.value & 6 and self.name in repeat_mn: return e = self.args[0] - if isinstance(e, ExprId) and not e.name in all_regs_ids_byname: - l = symbol_pool.getby_name_create(e.name) - s = ExprId(l, e.size) - self.args[0] = s + if isinstance(e, ExprId): + if not isinstance(e.name, asm_label) and e not in all_regs_ids: + raise ValueError("ExprId must be a label or a register") elif isinstance(e, ExprInt): - ad = e.arg + int(self.offset) + self.l + ad = e.arg + int(self.offset) l = symbol_pool.getby_offset_create(ad) s = ExprId(l, e.size) self.args[0] = s @@ -558,7 +558,7 @@ class instruction_x86(instruction): return # return ExprInt32(e.arg - (self.offset + self.l)) self.args[0] = ExprInt_fromsize( - self.mode, e.arg - (self.offset + self.l)) + self.mode, e.arg - self.offset) def get_info(self, c): self.additional_info.g1.value = c.g1.value @@ -846,9 +846,9 @@ class mn_x86(cls_mn): def post_asm(self, v): return v - def encodefields(self, decoded): - v = super(mn_x86, self).encodefields(decoded) + def gen_prefix(self): + v = "" rex = 0x40 if self.g1.value is None: self.g1.value = 0 @@ -890,9 +890,15 @@ class mn_x86(cls_mn): if hasattr(self, 'no_xmm_pref'): return None v = "\x66" + v - return v + def encodefields(self, decoded): + v = super(mn_x86, self).encodefields(decoded) + prefix = self.gen_prefix() + if prefix is None: + return None + return prefix + v + def getnextflow(self, symbol_pool): raise NotImplementedError('not fully functional') @@ -2834,19 +2840,54 @@ class bs_rel_off(bs_cond_imm): # m_arg): # else: # self.l = 32 l = offsize(self.parent) + prefix = self.parent.gen_prefix() + parent_len = len(prefix) * 8 + self.parent.l + self.l + assert(parent_len % 8 == 0) - # l = self.parent.v_opmode()#self.parent.args[0].expr.size - # print 'imm enc', l, self.parent.rex_w.value + v = int(self.expr.arg - parent_len/8) + if prefix is None: + raise StopIteration + mask = ((1 << self.l) - 1) + if self.l > l: + raise StopIteration + if v != sign_ext(v & mask, self.l, l): + raise StopIteration + self.value = swap_uint(self.l, v & ((1 << self.l) - 1)) + yield True + + def decode(self, v): + v = swap_uint(self.l, v) + size = offsize(self.parent) + v = sign_ext(v, self.l, size) + v += self.parent.l + v = ExprInt_fromsize(size, v) + self.expr = v + return True + +class bs_s08(bs_rel_off): + parser = int_or_expr + + @classmethod + def flen(cls, mode, v): + return 8 + + def encode(self): + if not isinstance(self.expr, ExprInt): + raise StopIteration + arg0_expr = self.parent.args[0].expr + if self.l != 0: + l = self.l + else: + l = self.parent.v_opmode() + self.l = l + l = offsize(self.parent) v = int(self.expr.arg) mask = ((1 << self.l) - 1) - # print 'ext', self.l, l, hex(v), hex(sign_ext(v & ((1<<self.l)-1), - # self.l, l)) if self.l > l: raise StopIteration if v != sign_ext(v & mask, self.l, l): raise StopIteration self.value = swap_uint(self.l, v & ((1 << self.l) - 1)) - # print hex(self.value) yield True def decode(self, v): @@ -2855,7 +2896,6 @@ class bs_rel_off(bs_cond_imm): # m_arg): v = sign_ext(v, self.l, size) v = ExprInt_fromsize(size, v) self.expr = v - # print self.expr, repr(self.expr) return True @@ -3002,7 +3042,6 @@ class bs_msegoff(m_arg): except StopIteration: return None, None e = v[0] - print "XXX", e if e is None: log.debug('cannot fromstring int %r', s) return None, None @@ -3075,6 +3114,7 @@ sib_base = bs(l=3, cls=(bs_cond_index,), fname = "sib_base") disp = bs(l=0, cls=(bs_cond_disp,), fname = "disp") +s08 = bs(l=8, cls=(bs_s08, )) u08 = bs(l=8, cls=(x86_08, m_arg)) u07 = bs(l=7, cls=(x86_08, m_arg)) @@ -3130,8 +3170,10 @@ d_ss = bs(l=0, cls=(bs_ss, ), fname='ss') d_fs = bs(l=0, cls=(bs_fs, ), fname='fs') d_gs = bs(l=0, cls=(bs_gs, ), fname='gs') -rel_off = bs(l=0, cls=(bs_rel_off,), fname="off") -rel_off08 = bs(l=8, cls=(bs_rel_off08,), fname="off") +# Offset must be decoded in last position to have final instruction len +rel_off = bs(l=0, cls=(bs_rel_off,), fname="off", order=-1) +# Offset must be decoded in last position to have final instruction len +rel_off08 = bs(l=8, cls=(bs_rel_off08,), fname="off", order=-1) moff = bs(l=0, cls=(bs_moff,), fname="off") msegoff = bs(l=16, cls=(bs_msegoff,), fname="mseg") movoff = bs(l=0, cls=(bs_movoff,), fname="off") @@ -3737,7 +3779,7 @@ addop("prefetchnta", [bs8(0x0f), bs8(0x18)] + rmmod(d0, rm_arg_m08)) addop("push", [bs8(0xff), stk] + rmmod(d6)) addop("push", [bs("01010"), stk, reg]) -addop("push", [bs8(0x6a), rel_off08, stk]) +addop("push", [bs8(0x6a), s08, stk]) addop("push", [bs8(0x68), d_imm, stk]) addop("push", [bs8(0x0e), stk, d_cs]) addop("push", [bs8(0x16), stk, d_ss]) diff --git a/miasm2/arch/x86/regs.py b/miasm2/arch/x86/regs.py index 1bf6969d..31f55483 100644 --- a/miasm2/arch/x86/regs.py +++ b/miasm2/arch/x86/regs.py @@ -388,6 +388,10 @@ all_regs_ids = [ float_stack_ptr, mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, + + exception_flags, ] + fltregs32_expr @@ -406,6 +410,9 @@ all_regs_ids_no_alias = [ dr0, dr1, dr2, dr3, dr4, dr5, dr6, dr7, float_stack_ptr, mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, + exception_flags, ] + fltregs32_expr diff --git a/miasm2/core/asmbloc.py b/miasm2/core/asmbloc.py index 5e4fbe22..612a2e3b 100644 --- a/miasm2/core/asmbloc.py +++ b/miasm2/core/asmbloc.py @@ -10,19 +10,14 @@ from miasm2.expression.simplifications import expr_simp from miasm2.expression.modint import moduint, modint from miasm2.core.utils import Disasm_Exception, pck from miasm2.core.graph import DiGraph +from miasm2.core.interval import interval - -log_asmbloc = logging.getLogger("asmbloc") +log_asmbloc = logging.getLogger("asmblock") console_handler = logging.StreamHandler() console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) log_asmbloc.addHandler(console_handler) log_asmbloc.setLevel(logging.WARNING) - -def whoami(): - return inspect.stack()[2][3] - - def is_int(a): return isinstance(a, int) or isinstance(a, long) or \ isinstance(a, moduint) or isinstance(a, modint) @@ -50,7 +45,6 @@ class asm_label: self.offset = offset else: self.offset = int(offset) - self._hash = hash((self.name, self.offset)) def __str__(self): if isinstance(self.offset, (int, long)): @@ -65,16 +59,6 @@ class asm_label: rep += '>' return rep - def __hash__(self): - return self._hash - - def __eq__(self, a): - if isinstance(a, asm_label): - return self._hash == hash(a) - else: - return False - - class asm_raw: def __init__(self, raw=""): self.raw = raw @@ -82,7 +66,6 @@ class asm_raw: def __str__(self): return repr(self.raw) - class asm_constraint(object): c_to = "c_to" c_next = "c_next" @@ -91,20 +74,10 @@ class asm_constraint(object): def __init__(self, label=None, c_t=c_to): self.label = label self.c_t = c_t - self._hash = hash((self.label, self.c_t)) def __str__(self): return "%s:%s" % (str(self.c_t), str(self.label)) - def __hash__(self): - return self._hash - - def __eq__(self, a): - if isinstance(a, asm_constraint): - return self._hash == a._hash - else: - return False - class asm_constraint_next(asm_constraint): @@ -127,12 +100,13 @@ class asm_constraint_bad(asm_constraint): label, c_t=asm_constraint.c_bad) -class asm_bloc: +class asm_bloc(object): - def __init__(self, label=None): + def __init__(self, label=None, alignment=1): self.bto = set() self.lines = [] self.label = label + self.alignment = alignment def __str__(self): out = [] @@ -320,6 +294,8 @@ class asm_symbol_pool: """ if not label.name in self._name2label: raise ValueError('label %s not in symbol pool' % label) + if offset is not None and offset in self._offset2label: + raise ValueError('Conflict in label %s' % label) self._offset2label.pop(label.offset, None) label.offset = offset if is_int(label.offset): @@ -503,7 +479,6 @@ def split_bloc(mnemo, attrib, pool_bin, blocs, return blocs - def dis_bloc_all(mnemo, pool_bin, offset, job_done, symbol_pool, dont_dis=[], split_dis=[], follow_call=False, dontdis_retcall=False, blocs_wd=None, lines_wd=None, blocs=None, @@ -612,26 +587,26 @@ def conservative_asm(mnemo, instr, symbols, conservative): return c, candidates return candidates[0], candidates -def fix_expr_val(e, symbols): +def fix_expr_val(expr, symbols): + """Resolve an expression @expr using @symbols""" def expr_calc(e): if isinstance(e, m2_expr.ExprId): s = symbols._name2label[e.name] e = m2_expr.ExprInt_from(e, s.offset) return e - e = e.visit(expr_calc) - e = expr_simp(e) - return e + result = expr.visit(expr_calc) + result = expr_simp(result) + if not isinstance(result, m2_expr.ExprInt): + raise RuntimeError('Cannot resolve symbol %s' % expr) + return result -def guess_blocs_size(mnemo, blocs): - """ - Asm and compute max bloc length - """ - for b in blocs: - log_asmbloc.debug('---') - blen = 0 - blen_max = 0 - for instr in b.lines: +def guess_blocks_size(mnemo, blocks): + """Asm and compute max block size""" + + for block in blocks: + size = 0 + for instr in block.lines: if isinstance(instr, asm_raw): # for special asm_raw, only extract len if isinstance(instr.raw, list): @@ -646,431 +621,383 @@ def guess_blocs_size(mnemo, blocs): else: raise NotImplementedError('asm raw') else: - l = mnemo.max_instruction_len + # Assemble the instruction to retrieve its len. + # If the instruction uses symbol it will fail + # In this case, the max_instruction_len is used + try: + candidates = mnemo.asm(instr) + l = len(candidates[-1]) + except: + l = mnemo.max_instruction_len data = None instr.data = data instr.l = l - blen += l + size += l + + block.size = size + block.max_size = size + log_asmbloc.info("size: %d max: %d", block.size, block.max_size) + +def fix_label_offset(symbol_pool, label, offset, modified): + """Fix the @label offset to @offset. If the @offset has changed, add @label + to @modified + @symbol_pool: current symbol_pool + """ + if label.offset == offset: + return + symbol_pool.set_offset(label, offset) + modified.add(label) + - b.blen = blen - # bloc with max rel values encoded - b.blen_max = blen + blen_max - log_asmbloc.info("blen: %d max: %d", b.blen, b.blen_max) +class BlockChain(object): + """Manage blocks linked with an asm_constraint_next""" + def __init__(self, symbol_pool, blocks): + self.symbol_pool = symbol_pool + self.blocks = blocks + self.place() -def group_blocs(blocs): + @property + def pinned(self): + """Return True iff at least one block is pinned""" + return self.pinned_block_idx is not None + + def _set_pinned_block_idx(self): + self.pinned_block_idx = None + for i, block in enumerate(self.blocks): + if is_int(block.label.offset): + if self.pinned_block_idx is not None: + raise ValueError("Multiples pinned block detected") + self.pinned_block_idx = i + + + def place(self): + """Compute BlockChain min_offset and max_offset using pinned block and + blocks' size + """ + self._set_pinned_block_idx() + self.max_size = 0 + for block in self.blocks: + self.max_size += block.max_size + block.alignment - 1 + + # Check if chain has one block pinned + if not self.pinned: + return + + + offset_base = self.blocks[self.pinned_block_idx].label.offset + assert(offset_base % self.blocks[self.pinned_block_idx].alignment == 0) + + self.offset_min = offset_base + for block in self.blocks[:self.pinned_block_idx-1:-1]: + self.offset_min -= block.max_size + (block.alignment - block.max_size) % block.alignment + + self.offset_max = offset_base + for block in self.blocks[self.pinned_block_idx:]: + self.offset_max += block.max_size + (block.alignment - block.max_size) % block.alignment + + def merge(self, chain): + """Best effort merge two block chains + Return the list of resulting blockchains""" + self.blocks += chain.blocks + self.place() + return [self] + + def fix_blocks(self, modified_labels): + """Propagate a pinned to its blocks' neighbour + @modified_labels: store new pinned labels""" + + if not self.pinned: + raise ValueError('Trying to fix unpinned block') + + # Propagate offset to blocks before pinned block + pinned_block = self.blocks[self.pinned_block_idx] + offset = pinned_block.label.offset + if offset % pinned_block.alignment != 0: + raise RuntimeError('Bad alignment') + + for block in self.blocks[:self.pinned_block_idx-1:-1]: + new_offset = offset - block.size + new_offset = new_offset - new_offset % pinned_block.alignment + fix_label_offset(self.symbol_pool, + block.label, + new_offset, + modified_labels) + + # Propagate offset to blocks after pinned block + offset = pinned_block.label.offset + pinned_block.size + + last_block = pinned_block + for block in self.blocks[self.pinned_block_idx+1:]: + offset += (- offset) % last_block.alignment + fix_label_offset(self.symbol_pool, + block.label, + offset, + modified_labels) + offset += block.size + last_block = block + return modified_labels + + +class BlockChainWedge(object): + """Stand for wedges between blocks""" + + def __init__(self, symbol_pool, offset, size): + self.symbol_pool = symbol_pool + self.offset = offset + self.max_size = size + self.offset_min = offset + self.offset_max = offset + size + + def merge(self, chain): + """Best effort merge two block chains + Return the list of resulting blockchains""" + chain.blocks[0].label.offset = self.offset_max + chain.place() + return [self, chain] + + +def group_constrained_blocks(symbol_pool, blocks): """ - this function group asm blocs with next constraints + Return the BlockChains list built from grouped asm blocks linked by + asm_constraint_next + @blocks: a list of asm block """ - log_asmbloc.info('group_blocs') - # group adjacent blocs - rest = blocs[:] - groups_bloc = {} - d = dict([(x.label, x) for x in rest]) - log_asmbloc.debug([str(x.label) for x in rest]) - - while rest: - b = [rest.pop()] - # find recursive son - fini = False - while not fini: - fini = True - for c in b[-1].bto: - if c.c_t != asm_constraint.c_next: - continue - if c.label in d and d[c.label] in rest: - b.append(d[c.label]) - rest.remove(d[c.label]) - fini = False - break - # check if son in group: - found_in_group = False - for c in b[-1].bto: - if c.c_t != asm_constraint.c_next: - continue - if c.label in groups_bloc: - b += groups_bloc[c.label] - del groups_bloc[c.label] - groups_bloc[b[0].label] = b - found_in_group = True + log_asmbloc.info('group_constrained_blocks') + + # Group adjacent blocks + remaining_blocks = list(blocks) + known_block_chains = {} + lbl2block = {block.label:block for block in blocks} + + while remaining_blocks: + # Create a new block chain + block_list = [remaining_blocks.pop()] + + # Find sons in remainings blocks linked with a next constraint + while True: + # Get next block + next_label = block_list[-1].get_next() + if next_label is None or next_label not in lbl2block: break + next_block = lbl2block[next_label] - if not found_in_group: - groups_bloc[b[0].label] = b + # Add the block at the end of the current chain + if next_block not in remaining_blocks: + break + block_list.append(next_block) + remaining_blocks.remove(next_block) - # create max label range for bigbloc - for l in groups_bloc: - l.total_max_l = reduce(lambda x, y: x + y.blen_max, groups_bloc[l], 0) - log_asmbloc.debug(("offset totalmax l", l.offset, l.total_max_l)) - if is_int(l.offset): - hof = hex(int(l.offset)) - else: - hof = l.name - log_asmbloc.debug(("offset totalmax l", hof, l.total_max_l)) - return groups_bloc - - -def gen_free_space_intervals(f, max_offset=0xFFFFFFFF): - interval = {} - offset_label = dict([(x.offset_free, x) for x in f]) - offset_label_order = offset_label.keys() - offset_label_order.sort() - offset_label_order.append(max_offset) - offset_label_order.reverse() - - unfree_stop = 0L - while len(offset_label_order) > 1: - offset = offset_label_order.pop() - offset_end = offset + f[offset_label[offset]] - prev = 0 - if unfree_stop > offset_end: - space = 0 - else: - space = offset_label_order[-1] - offset_end - if space < 0: - space = 0 - interval[offset_label[offset]] = space - if offset_label_order[-1] in offset_label: - prev = offset_label[offset_label_order[-1]] - prev = f[prev] - - interval[offset_label[offset]] = space - - unfree_stop = max( - unfree_stop, offset_end, offset_label_order[-1] + prev) - return interval - - -def add_dont_erase(f, dont_erase=[]): - tmp_symbol_pool = asm_symbol_pool() - for a, b in dont_erase: - l = tmp_symbol_pool.add_label(a, a) - l.offset_free = a - f[l] = b - a - return - - -def gen_non_free_mapping(group_bloc, dont_erase=[]): - non_free_mapping = {} - # calculate free space for bloc placing - for g in group_bloc: - g.fixedblocs = False - # if a label in the group is fixed - diff_offset = 0 - for b in group_bloc[g]: - if not is_int(b.label.offset): - diff_offset += b.blen_max - continue - g.fixedblocs = True - g.offset_free = b.label.offset - diff_offset - break - if g.fixedblocs: - non_free_mapping[g] = g.total_max_l + # Check if son is in a known block group + if next_label is not None and next_label in known_block_chains: + block_list += known_block_chains[next_label] + del known_block_chains[next_label] - log_asmbloc.debug("non free bloc:") - log_asmbloc.debug(non_free_mapping) - add_dont_erase(non_free_mapping, dont_erase) - log_asmbloc.debug("non free more:") - log_asmbloc.debug(non_free_mapping) - return non_free_mapping + known_block_chains[block_list[0].label] = block_list + out_block_chains = [] + for label in known_block_chains: + chain = BlockChain(symbol_pool, known_block_chains[label]) + out_block_chains.append(chain) + return out_block_chains -class AsmBlockLink(object): - """Location contraint between blocks""" +def get_blockchains_address_interval(blockChains, dst_interval): + """Compute the interval used by the pinned @blockChains + Check if the placed chains are in the @dst_interval""" - def __init__(self, label): - self.label = label + allocated_interval = interval() + for chain in blockChains: + if not chain.pinned: + continue + chain_interval = interval([(chain.offset_min, chain.offset_max-1)]) + if chain_interval not in dst_interval: + raise ValueError('Chain placed out of destination interval') + allocated_interval += chain_interval + return allocated_interval - def resolve(self, parent_label, label2block): - """ - Resolve the @parent_label.offset_g - @parent_label: parent label - @label2block: dictionnary which links labels to blocks - """ - raise NotImplementedError("Abstract method") +def resolve_symbol(blockChains, symbol_pool, dst_interval=None): + """Place @blockChains in the @dst_interval""" -class AsmBlockLinkNext(AsmBlockLink): + log_asmbloc.info('resolve_symbol') + if dst_interval is None: + dst_interval = interval([(0, 0xFFFFFFFFFFFFFFFF)]) - def resolve(self, parent_label, label2block): - parent_label.offset_g = self.label.offset_g + label2block[self.label].blen + forbidden_interval = interval([(-1, 0xFFFFFFFFFFFFFFFF+1)]) - dst_interval + allocated_interval = get_blockchains_address_interval(blockChains, + dst_interval) + log_asmbloc.debug('allocated interval: %s', allocated_interval) -class AsmBlockLinkPrev(AsmBlockLink): + pinned_chains = [chain for chain in blockChains if chain.pinned] - def resolve(self, parent_label, label2block): - parent_label.offset_g = self.label.offset_g - label2block[parent_label].blen + # Add wedge in forbidden intervals + for start, stop in forbidden_interval.intervals: + wedge = BlockChainWedge(symbol_pool, offset=start, size=stop+1-start) + pinned_chains.append(wedge) -def resolve_symbol(group_bloc, symbol_pool, dont_erase=[], - max_offset=0xFFFFFFFF): - """ - place all asmblocs - """ - log_asmbloc.info('resolve_symbol') - log_asmbloc.info(str(dont_erase)) - bloc_list = [] - unr_bloc = reduce(lambda x, y: x + group_bloc[y], group_bloc, []) - - non_free_mapping = gen_non_free_mapping(group_bloc, dont_erase) - free_interval = gen_free_space_intervals(non_free_mapping, max_offset) - log_asmbloc.debug(free_interval) - - # first big ones - g_tab = [(x.total_max_l, x) for x in group_bloc] - g_tab.sort() - g_tab.reverse() - g_tab = [x[1] for x in g_tab] - - # g_tab => label of grouped blov - # group_bloc => dict of grouped bloc labeled-key - - # first, near callee placing algo - for g in g_tab: - if g.fixedblocs: - continue - finish = False - for x in group_bloc: - if not x in free_interval.keys(): - continue - if free_interval[x] < g.total_max_l: - continue + # Try to place bigger blockChains first + pinned_chains.sort(key=lambda x:x.offset_min) + blockChains.sort(key=lambda x:-x.max_size) - for b in group_bloc[x]: - for c in b.bto: - if c.label == g: - tmp = free_interval[x] - g.total_max_l - log_asmbloc.debug( - "consumed %d rest: %d", g.total_max_l, int(tmp)) - free_interval[g] = tmp - del free_interval[x] - symbol_pool.set_offset( - g, AsmBlockLinkNext(group_bloc[x][-1].label)) - g.fixedblocs = True - finish = True - break - if finish: - break - if finish: - break + fixed_chains = list(pinned_chains) - # second, bigger in smaller algo - for g in g_tab: - if g.fixedblocs: + log_asmbloc.debug("place chains") + for chain in blockChains: + if chain.pinned: continue - # chose smaller free_interval first - k_tab = [(free_interval[x], x) for x in free_interval] - k_tab.sort() - k_tab = [x[1] for x in k_tab] - # choose free_interval - for k in k_tab: - if g.total_max_l > free_interval[k]: - continue - symbol_pool.set_offset( - g, AsmBlockLinkNext(group_bloc[k][-1].label)) - tmp = free_interval[k] - g.total_max_l - log_asmbloc.debug( - "consumed %d rest: %d", g.total_max_l, int(tmp)) - free_interval[g] = tmp - del free_interval[k] - - g.fixedblocs = True - break - - while unr_bloc: - # propagate know offset - resolving = False - i = 0 - while i < len(unr_bloc): - if unr_bloc[i].label.offset is None: - i += 1 - continue - resolving = True - log_asmbloc.info("bloc %s resolved", unr_bloc[i].label) - bloc_list.append(unr_bloc[i]) - g_found = None - for g in g_tab: - if unr_bloc[i] in group_bloc[g]: - if g_found is not None: - raise ValueError('blocin multiple group!!!') - g_found = g - my_group = group_bloc[g_found] - - index = my_group.index(unr_bloc[i]) - if index > 0 and my_group[index - 1] in unr_bloc: - symbol_pool.set_offset( - my_group[index - 1].label, - AsmBlockLinkPrev(unr_bloc[i].label)) - if index < len(my_group) - 1 and my_group[index + 1] in unr_bloc: - symbol_pool.set_offset( - my_group[index + 1].label, - AsmBlockLinkNext(unr_bloc[i].label)) - del unr_bloc[i] - - if not resolving: - log_asmbloc.warn("cannot resolve symbol! (no symbol fix found)") + fixed = False + for i in xrange(1, len(fixed_chains)): + prev_chain = fixed_chains[i-1] + next_chain = fixed_chains[i] + + if prev_chain.offset_max + chain.max_size < next_chain.offset_min: + new_chains = prev_chain.merge(chain) + fixed_chains[i-1:i] = new_chains + fixed = True + break + if not fixed: + raise RuntimeError('Cannot find enough space to place blocks') + + return [chain for chain in fixed_chains if isinstance(chain, BlockChain)] + +def filter_exprid_label(exprs): + """Extract labels from list of ExprId @exprs""" + return set(expr.name for expr in exprs if isinstance(expr.name, asm_label)) + +def get_block_labels(block): + """Extract labels used by @block""" + symbols = set() + for instr in block.lines: + if isinstance(instr, asm_raw): + if isinstance(instr.raw, list): + for expr in instr.raw: + symbols.update(m2_expr.get_expr_ids(expr)) else: + for arg in instr.args: + symbols.update(m2_expr.get_expr_ids(arg)) + labels = filter_exprid_label(symbols) + return labels + +def assemble_block(mnemo, block, symbol_pool, conservative=False): + """Assemble a @block using @symbol_pool + @conservative: (optional) use original bytes when possible + """ + offset_i = 0 + + for instr in block.lines: + if isinstance(instr, asm_raw): + if isinstance(instr.raw, list): + # Fix special asm_raw + data = "" + for expr in instr.raw: + expr_int = fix_expr_val(expr, symbol_pool) + data += pck[expr_int.size](expr_int.arg) + instr.data = data + + instr.offset = offset_i + offset_i += instr.l continue - for g in g_tab: - log_asmbloc.debug(g) - if g.fixedblocs: - log_asmbloc.debug("fixed") - else: - log_asmbloc.debug("not fixed") - raise ValueError('enable to fix bloc') - return bloc_list + # Assemble an instruction + saved_args = list(instr.args) + instr.offset = block.label.offset + offset_i + # Replace instruction's arguments by resolved ones + instr.args = instr.resolve_args_with_symbols(symbol_pool) -def calc_symbol_offset(symbol_pool, blocks): - """Resolve dependencies between @blocks""" + if instr.dstflow(): + instr.fixDstOffset() - # Labels resolved - pinned_labels = set() - # Link an unreferenced label to its reference label - linked_labels = {} - # Label -> block - label2block = dict((block.label, block) for block in blocks) + old_l = instr.l + cached_candidate, candidates = conservative_asm( + mnemo, instr, symbol_pool, conservative) - # Find pinned labels and labels to resolve - for label in symbol_pool.items: - if label.offset is None: - pass - elif is_int(label.offset): - pinned_labels.add(label) - elif isinstance(label.offset, AsmBlockLink): - # construct dependant blocs tree - linked_labels.setdefault(label.offset.label, set()).add(label) - else: - raise ValueError('Unknown offset type') - label.offset_g = label.offset - - # Resolve labels - while pinned_labels: - ref_label = pinned_labels.pop() - for unresolved_label in linked_labels.get(ref_label, []): - if ref_label.offset_g is None: - raise ValueError("unknown symbol: %s" % str(ref_label.name)) - unresolved_label.offset.resolve(unresolved_label, label2block) - pinned_labels.add(unresolved_label) - - -def asmbloc_final(mnemo, blocs, symbol_pool, symb_reloc_off=None, - conservative=False): - log_asmbloc.info("asmbloc_final") - if symb_reloc_off is None: - symb_reloc_off = {} - fini = False - # asm with minimal instr len - # check if dst label are ok to this encoded form - # recompute if not - # TODO XXXX: implement todo list to remove n^high complexity! - while fini is not True: - - fini = True - my_symb_reloc_off = {} - - calc_symbol_offset(symbol_pool, blocs) - - symbols = asm_symbol_pool() - for s, v in symbol_pool._name2label.items(): - symbols.add_label(s, v.offset_g) - # test if bad encoded relative - for bloc in blocs: - - offset_i = 0 - my_symb_reloc_off[bloc.label] = [] - for instr in bloc.lines: - if isinstance(instr, asm_raw): - if isinstance(instr.raw, list): - # fix special asm_raw - data = "" - for x in instr.raw: - e = fix_expr_val(x, symbols) - data+= pck[e.size](e.arg) - instr.data = data - - offset_i += instr.l - continue - sav_a = instr.args[:] - instr.offset = bloc.label.offset_g + offset_i - args_e = instr.resolve_args_with_symbols(symbols) - for i, e in enumerate(args_e): - instr.args[i] = e - - if instr.dstflow(): - instr.fixDstOffset() - - symbol_reloc_off = [] - old_l = instr.l - c, candidates = conservative_asm( - mnemo, instr, symbol_reloc_off, conservative) - - for i, e in enumerate(sav_a): - instr.args[i] = e - - if len(c) != instr.l: - # good len, bad offset...XXX - bloc.blen = bloc.blen - old_l + len(c) - instr.data = c - instr.l = len(c) - fini = False - continue - found = False - for cpos, c in enumerate(candidates): - if len(c) == instr.l: - instr.data = c - instr.l = len(c) - - found = True - break - if not found: - raise ValueError('something wrong in instr.data') - - if cpos < len(symbol_reloc_off): - my_s = symbol_reloc_off[cpos] - else: - my_s = None + # Restore original arguments + instr.args = saved_args + + # We need to update the block size + block.size = block.size - old_l + len(cached_candidate) + instr.data = cached_candidate + instr.l = len(cached_candidate) + + offset_i += instr.l + + +def asmbloc_final(mnemo, blocks, blockChains, symbol_pool, conservative=False): + """Resolve and assemble @blockChains using @symbol_pool until fixed point is + reached""" + + log_asmbloc.debug("asmbloc_final") + + # Init structures + lbl2block = {block.label:block for block in blocks} + blocks_using_label = {} + for block in blocks: + labels = get_block_labels(block) + for label in labels: + blocks_using_label.setdefault(label, set()).add(block) - if my_s is not None: - my_symb_reloc_off[bloc.label].append(offset_i + my_s) - offset_i += instr.l - assert len(instr.data) == instr.l - # we have fixed all relative values - # recompute good offsets - for label in symbol_pool.items: - symbol_pool.set_offset(label, label.offset_g) + block2chain = {} + for chain in blockChains: + for block in chain.blocks: + block2chain[block] = chain - for a, b in my_symb_reloc_off.items(): - symb_reloc_off[a] = b + # Init worklist + blocks_to_rework = set(blocks) + # Fix and re-assemble blocks until fixed point is reached + while True: -def asm_resolve_final(mnemo, blocs, symbol_pool, dont_erase=[], - max_offset=0xFFFFFFFF, symb_reloc_off=None): - if symb_reloc_off is None: - symb_reloc_off = {} - guess_blocs_size(mnemo, blocs) - bloc_g = group_blocs(blocs) + # Propagate pinned blocks into chains + modified_labels = set() + for chain in blockChains: + chain.fix_blocks(modified_labels) - resolved_b = resolve_symbol(bloc_g, symbol_pool, dont_erase=dont_erase, - max_offset=max_offset) + for label in modified_labels: + # Retrive block with modified reference + if label in lbl2block: + blocks_to_rework.add(lbl2block[label]) - asmbloc_final(mnemo, resolved_b, symbol_pool, symb_reloc_off) - written_bytes = {} + # Enqueue blocks referencing a modified label + if label not in blocks_using_label: + continue + for block in blocks_using_label[label]: + blocks_to_rework.add(block) + + # No more work + if not blocks_to_rework: + break + + while blocks_to_rework: + block = blocks_to_rework.pop() + assemble_block(mnemo, block, symbol_pool, conservative) + +def asm_resolve_final(mnemo, blocks, symbol_pool, dst_interval=None): + """Resolve and assemble @blocks using @symbol_pool into interval + @dst_interval""" + + guess_blocks_size(mnemo, blocks) + blockChains = group_constrained_blocks(symbol_pool, blocks) + resolved_blockChains = resolve_symbol(blockChains, symbol_pool, dst_interval) + + asmbloc_final(mnemo, blocks, resolved_blockChains, symbol_pool) patches = {} - for bloc in resolved_b: - offset = bloc.label.offset - for line in bloc.lines: - assert line.data is not None - patches[offset] = line.data - for cur_pos in xrange(line.l): - if offset + cur_pos in written_bytes: - raise ValueError( - "overlapping bytes in asssembly %X" % int(offset)) - written_bytes[offset + cur_pos] = 1 - line.offset = offset - offset += line.l - - return resolved_b, patches + output_interval = interval() + for block in blocks: + offset = block.label.offset + for instr in block.lines: + if not instr.data: + # Empty line + continue + assert len(instr.data) == instr.l + patches[offset] = instr.data + instruction_interval = interval([(offset, offset + instr.l-1)]) + if not (instruction_interval & output_interval).empty: + raise RuntimeError("overlapping bytes %X" % int(offset)) + instr.offset = offset + offset += instr.l + return patches def blist2graph(ab): """ diff --git a/miasm2/core/cpu.py b/miasm2/core/cpu.py index bde95200..efb511ce 100644 --- a/miasm2/core/cpu.py +++ b/miasm2/core/cpu.py @@ -932,8 +932,8 @@ class instruction(object): o += self.gen_args(args) return o - def get_asm_offset(self, x): - return m2_expr.ExprInt_from(x, self.offset) + def get_asm_offset(self, expr): + return m2_expr.ExprInt_from(expr, self.offset) def resolve_args_with_symbols(self, symbols=None): if symbols is None: @@ -947,14 +947,14 @@ class instruction(object): for x in ids: if isinstance(x.name, asmbloc.asm_label): name = x.name.name + # special symbol $ + if name == '$': + fixed_ids[x] = self.get_asm_offset(x) + continue if not name in symbols: raise ValueError('unresolved symbol! %r' % x) else: name = x.name - # special symbol - if name == '$': - fixed_ids[x] = self.get_asm_offset(x) - continue if not name in symbols: continue if symbols[name].offset is None: @@ -981,6 +981,8 @@ class cls_mn(object): __metaclass__ = metamn args_symb = [] instruction = instruction + # Block's offset alignement + alignment = 1 @classmethod def guess_mnemo(cls, bs, attrib, pre_dis_info, offset): @@ -1149,6 +1151,7 @@ class cls_mn(object): if not getok: continue + c.l = prefix_len + total_l / 8 for i in c.to_decode: f = c.fields_order[i] if f.is_present: @@ -1162,7 +1165,6 @@ class cls_mn(object): for a in c.args: a.expr = expr_simp(a.expr) - c.l = prefix_len + total_l / 8 c.b = cls.getbytes(bs, offset_o, c.l) c.offset = offset_o c = c.post_dis() @@ -1333,14 +1335,14 @@ class cls_mn(object): return o def value(self, mode): - todo = [(0, [(x, self.fields_order[x]) for x in self.to_decode[::-1]])] + todo = [(0, 0, [(x, self.fields_order[x]) for x in self.to_decode[::-1]])] result = [] done = [] cpt = 0 while todo: - index, to_decode = todo.pop() + index, cur_len, to_decode = todo.pop() # TEST XXX for i, f in to_decode: setattr(self, f.fname, f) @@ -1351,11 +1353,14 @@ class cls_mn(object): cpt += 1 can_encode = True for i, f in to_decode[index:]: + f.parent.l = cur_len ret = f.encode() if not ret: log.debug('cannot encode %r', f) can_encode = False break + if f.value is not None and f.l: + cur_len += f.l index += 1 if ret is True: continue @@ -1364,14 +1369,14 @@ class cls_mn(object): for i in ret: gcpt += 1 o = [] - if ((index, [xx[1].value for xx in to_decode]) in todo or - (index, [xx[1].value for xx in to_decode]) in done): + if ((index, cur_len, [xx[1].value for xx in to_decode]) in todo or + (index, cur_len, [xx[1].value for xx in to_decode]) in done): raise NotImplementedError('not fully functional') for p, f in to_decode: fnew = f.clone() o.append((p, fnew)) - todo.append((index, o)) + todo.append((index, cur_len, o)) can_encode = False break diff --git a/miasm2/core/interval.py b/miasm2/core/interval.py index d76cbd15..66445674 100644 --- a/miasm2/core/interval.py +++ b/miasm2/core/interval.py @@ -100,20 +100,20 @@ class interval(object): o = "[]" return o - def __contains__(self, i): - if isinstance(i, interval): - for x in self.intervals: - is_out = True - for y in i.intervals: - if cmp_interval(x, y) in [INT_EQ, INT_B_IN_A]: - is_out = False + def __contains__(self, other): + if isinstance(other, interval): + for intervalB in other.intervals: + is_in = False + for intervalA in self.intervals: + if cmp_interval(intervalA, intervalB) in [INT_EQ, INT_B_IN_A]: + is_in = True break - if is_out: + if not is_in: return False return True else: - for x in self.intervals: - if x[0] <= i <= x[1]: + for intervalA in self.intervals: + if intervalA[0] <= other <= intervalA[1]: return True return False @@ -215,6 +215,12 @@ class interval(object): return None, None return self.intervals[0][0], self.intervals[-1][1] + + @property + def empty(self): + """Return True iff the interval is empty""" + return not self.intervals + def show(self, img_x=1350, img_y=20, dry_run=False): """ show image representing the interval diff --git a/miasm2/core/parse_asm.py b/miasm2/core/parse_asm.py index b42bdbcc..3a5751ac 100644 --- a/miasm2/core/parse_asm.py +++ b/miasm2/core/parse_asm.py @@ -19,6 +19,14 @@ size2pck = {8: 'B', 64: 'Q', } +class DirectiveAlign(object): + """Stand for alignment representation""" + + def __init__(self, alignment=1): + self.alignment = alignment + + def __str__(self): + return "Alignment %s" % self.alignment def guess_next_new_label(symbol_pool, gen_label_index=0): i = 0 @@ -30,6 +38,30 @@ def guess_next_new_label(symbol_pool, gen_label_index=0): return symbol_pool.add_label(name) i += 1 +def replace_expr_labels(expr, symbol_pool, replace_id): + """Create asm_label of the expression @expr in the @symbol_pool + Update @replace_id""" + + if not (isinstance(expr, m2_expr.ExprId) and + isinstance(expr.name, asmbloc.asm_label)): + return expr + + old_lbl = expr.name + new_lbl = symbol_pool.getby_name_create(old_lbl.name) + replace_id[expr] = m2_expr.ExprId(new_lbl, expr.size) + return replace_id[expr] + +def replace_orphan_labels(instr, symbol_pool): + """Link orphan labels used by @instr to the @symbol_pool""" + + for i, arg in enumerate(instr.args): + replace_id = {} + arg.visit(lambda e:replace_expr_labels(e, + symbol_pool, + replace_id)) + instr.args[i] = instr.args[i].replace_expr(replace_id) + + def parse_txt(mnemo, attrib, txt, symbol_pool=None, gen_label_index=0): if symbol_pool is None: @@ -126,6 +158,10 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None, gen_label_index=0): if directive == 'dontsplit': # custom command lines.append(asmbloc.asm_raw()) continue + if directive == "align": + align_value = int(line[r.end():]) + lines.append(DirectiveAlign(align_value)) + continue if directive in ['file', 'intel_syntax', 'globl', 'local', 'type', 'size', 'align', 'ident', 'section']: continue @@ -147,6 +183,10 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None, gen_label_index=0): line = line[:line.find(';')] line = line.strip(' ').strip('\t') instr = mnemo.fromstring(line, attrib) + + # replace orphan asm_label with labels from symbol_pool + replace_orphan_labels(instr, symbol_pool) + if instr.dstflow(): instr.dstflow2label(symbol_pool) lines.append(instr) @@ -172,7 +212,7 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None, gen_label_index=0): lines[i:i] = [l] else: l = lines[i] - b = asmbloc.asm_bloc(l) + b = asmbloc.asm_bloc(l, alignment=mnemo.alignment) b.bloc_num = bloc_num bloc_num += 1 blocs.append(b) @@ -195,6 +235,9 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None, gen_label_index=0): block_may_link = True b.addline(lines[i]) i += 1 + elif isinstance(lines[i], DirectiveAlign): + b.alignment = lines[i].alignment + i += 1 # asmbloc.asm_label elif isinstance(lines[i], asmbloc.asm_label): if block_may_link: diff --git a/test/arch/arm/arch.py b/test/arch/arm/arch.py index 19b1236e..701c45af 100644 --- a/test/arch/arm/arch.py +++ b/test/arch/arm/arch.py @@ -66,13 +66,13 @@ reg_tests_arm = [ "002094e0"), ("0003EA9C MVN R7, R2", "0270e0e1"), - ("C00CD4DC BL 0x7C", + ("C00CD4DC BL 0x84", "1F0000EB"), - ("C00CF110 BL 0xFFFFFDEC", + ("C00CF110 BL 0xFFFFFDF4", "7BFFFFEB"), - ("000829b0 BLNE 0xFFF87110", + ("000829b0 BLNE 0xFFF87118", "441cfe1b"), ("C00EC608 TEQ R4, R5", @@ -205,7 +205,7 @@ reg_tests_arm = [ ("C00CFA40 BLX R12", "3CFF2FE1"), - ("C010DE1C BLX 0x1ECCEA", + ("C010DE1C BLX 0x1ECCF2", "3AB307FB"), ("00013028 MOV R9, 0x6E75", @@ -217,7 +217,7 @@ reg_tests_arm = [ ("0004A38C CLZ R3, R2", "123F6FE1"), - ("C0132564 BLX 0xFFFCF06C", + ("C0132564 BLX 0xFFFCF074", "1B3CFFFA"), ("C0297028 QADD R7, R6, R6", @@ -414,15 +414,15 @@ reg_tests_armt = [ ("000a1c16 STMIA R6!, {R0-R3}", "0fc6"), - ("0006af78 BEQ 0x6", + ("0006af78 BEQ 0xA", "03d0"), - ("000747b4 BCC 0xFFFFFFE6", + ("000747b4 BCC 0xFFFFFFEA", "f3d3"), # swi - ("0007479c B 0xE", + ("0007479c B 0x12", "07e0"), - ("0006b946 B 0xFFFFFFE4", + ("0006b946 B 0xFFFFFFE8", "f2e7"), ("C010163C BLX 0x1F916C", "F9F1B6E8"), diff --git a/test/arch/mips32/arch.py b/test/arch/mips32/arch.py index 899748ce..0fb12e1b 100644 --- a/test/arch/mips32/arch.py +++ b/test/arch/mips32/arch.py @@ -46,9 +46,9 @@ reg_tests_mips32 = [ "44097000"), ("00400324 MOV.D F0, F12", "46206006"), - ("00400334 BNE A0, ZERO, 0x28", + ("00400334 BNE A0, ZERO, 0x2C", "1480000A"), - ("00400360 B 0x338", + ("00400360 B 0x33C", "100000CE"), ("00400378 LW T9, 0xFFFF9C90(GP)", "8F999C90"), @@ -58,11 +58,11 @@ reg_tests_mips32 = [ "30420002"), ("00400364 ADD.D F0, F0, F14", "462E0000"), - ("004003A4 BEQ S0, V0, 0x120", + ("004003A4 BEQ S0, V0, 0x124", "12020048"), ("004003A8 SLTI V0, S0, 0x3", "2A020003"), - ("004005A4 BGEZ T3, 0x20", + ("004005A4 BGEZ T3, 0x24", "05610008"), ("00400428 LWC1 F0, 0x4344(V0)", "C4404344"), @@ -80,13 +80,13 @@ reg_tests_mips32 = [ "92228880"), ("004001C4 SB V0, 0xFFFF8880(S1)", "A2228880"), - ("00400274 BAL 0x4", + ("00400274 BAL 0x8", "04110001"), ("0040073C C.LT.D FCC0, F0, F12", "462C003C"), - ("00400744 BC1F FCC0, 0x20", + ("00400744 BC1F FCC0, 0x24", "45000008"), - ("00403A80 BC1T FCC0, 0xB4", + ("00403A80 BC1T FCC0, 0xB8", "4501002D"), ("00400764 MUL.D F12, F0, F0", "46200302"), @@ -104,11 +104,11 @@ reg_tests_mips32 = [ "00431004"), ("00400F60 SRAV V1, S3, V0", "00531807"), - ("00401040 BLTZ S6, 0x58", + ("00401040 BLTZ S6, 0x5C", "06C00016"), - ("00400D18 BLEZ V1, 0x7C", + ("00400D18 BLEZ V1, 0x80", "1860001F"), - ("00401200 BGTZ S4, 0x10", + ("00401200 BGTZ S4, 0x14", "1E800004"), ("004014A4 CVT.D.W F8, F0", "46800221"), diff --git a/test/arch/msp430/arch.py b/test/arch/msp430/arch.py index f3e82955..613af385 100644 --- a/test/arch/msp430/arch.py +++ b/test/arch/msp430/arch.py @@ -72,9 +72,9 @@ reg_tests_msp = [ ("443a call 0x4B66", "b012664b"), - ("4442 jmp 0xFFFA", + ("4442 jmp 0xFFFC", "fd3f"), - ("4422 jnz 0xFFF2", + ("4422 jnz 0xFFF4", "f923"), ("xxxx mov.b @R13+, 0x0(R14)", diff --git a/test/arch/x86/arch.py b/test/arch/x86/arch.py index 884d545b..9d4e464d 100644 --- a/test/arch/x86/arch.py +++ b/test/arch/x86/arch.py @@ -232,10 +232,10 @@ reg_tests = [ "0fba2842"), - (m32, "00000000 CALL 0x112233", - "e833221100"), - (m64, "00000000 CALL 0x112233", - "e833221100"), + (m32, "00000000 CALL 0x112235", + "e830221100"), + (m64, "00000000 CALL 0x112235", + "e830221100"), (m32, "00000000 CALL DWORD PTR [EAX]", "ff10"), (m64, "00000000 CALL QWORD PTR [RAX]", @@ -761,11 +761,11 @@ reg_tests = [ "48CF"), (m32, "00000000 JA 0x12", - "7712"), + "7710"), (m32, "00000000 JA 0xFFFFFFEE", - "77EE"), + "77EC"), (m64, "00000000 JA 0xFFFFFFFFFFFFFFEE", - "77EE"), + "77EC"), #(m32, "00000000 JA 0xFFEE", # "6677EE"), @@ -774,19 +774,19 @@ reg_tests = [ (m16, "00000000 JCXZ 0xFFEE", - "E3EE"), + "E3EC"), (m16, "00000000 JECXZ 0xFFEE", - "67E3EE"), + "67E3EB"), (m32, "00000000 JECXZ 0xFFFFFFEE", - "E3EE"), + "E3EC"), (m32, "00000000 JCXZ 0xFFFFFFEE", - "67E3EE"), + "67E3EB"), (m32, "00000000 JCXZ 0xFFEE", - "6667E3EE"), + "6667E3EA"), (m64, "00000000 JRCXZ 0xFFFFFFFFFFFFFFEE", - "E3EE"), + "E3EC"), (m64, "00000000 JECXZ 0xFFFFFFFFFFFFFFEE", - "67E3EE"), + "67E3EB"), (m32, "00000000 MOV BYTE PTR [EAX], AL", @@ -1161,6 +1161,9 @@ reg_tests = [ (m64, "00000000 PUSH 0x11223344", "6844332211"), + (m32, "00000000 PUSH 0xFFFFFF80", + "6a80"), + (m32, "00000000 PUSH CS", "0e"), (m32, "00000000 PUSH SS", diff --git a/test/arch/x86/sem.py b/test/arch/x86/sem.py index d2c998c8..b80ab33d 100644 --- a/test/arch/x86/sem.py +++ b/test/arch/x86/sem.py @@ -47,10 +47,9 @@ def compute(ir, mode, asm, inputstate={}, debug=False): def compute_txt(ir, mode, txt, inputstate={}, debug=False): blocs, symbol_pool = parse_asm.parse_txt(mn, mode, txt) symbol_pool.set_offset(symbol_pool.getby_name("main"), 0x0) - resolved_b, patches = asmbloc.asm_resolve_final( - mn, blocs[0], symbol_pool) + patches = asmbloc.asm_resolve_final(mn, blocs[0], symbol_pool) interm = ir(symbol_pool) - for bbl in resolved_b: + for bbl in blocs[0]: interm.add_bloc(bbl) return symb_exec(interm, inputstate, debug) diff --git a/test/arch/x86/unit/asm_test.py b/test/arch/x86/unit/asm_test.py index f28c4d2f..c6381d9e 100644 --- a/test/arch/x86/unit/asm_test.py +++ b/test/arch/x86/unit/asm_test.py @@ -20,18 +20,6 @@ if filename and os.path.isfile(filename): reg_and_id = dict(mn_x86.regs.all_regs_ids_byname) - -def my_ast_int2expr(a): - return ExprInt32(a) - - -def my_ast_id2expr(t): - return reg_and_id.get(t, ExprId(t, size=32)) - -my_var_parser = parse_ast(my_ast_id2expr, my_ast_int2expr) -base_expr.setParseAction(my_var_parser) - - class Asm_Test(object): def __init__(self): self.myjit = Machine("x86_32").jitter() @@ -53,8 +41,7 @@ class Asm_Test(object): # fix shellcode addr symbol_pool.set_offset(symbol_pool.getby_name("main"), 0x0) s = StrPatchwork() - resolved_b, patches = asmbloc.asm_resolve_final( - mn_x86, blocs[0], symbol_pool) + patches = asmbloc.asm_resolve_final(mn_x86, blocs[0], symbol_pool) for offset, raw in patches.items(): s[offset] = raw diff --git a/test/core/interval.py b/test/core/interval.py index 34537d25..4572ac50 100644 --- a/test/core/interval.py +++ b/test/core/interval.py @@ -49,6 +49,7 @@ assert((i2 in i3) is False) assert((i3 in i2)) assert((i2 in i3) is False) +assert((i3 in i14)) assert(interval.cannon_list(i1.intervals) == i1.intervals) |