diff options
| author | serpilliere <serpilliere@users.noreply.github.com> | 2015-01-26 19:52:07 +0100 |
|---|---|---|
| committer | serpilliere <serpilliere@users.noreply.github.com> | 2015-01-26 19:52:07 +0100 |
| commit | a3900fe1a46b585771a576478ce47bc94e35537f (patch) | |
| tree | e06802c76e800e8c57aa8d1a903fd90eb440854b | |
| parent | 9ce67b459cc8946c81181ab22030786450b8b0c5 (diff) | |
| parent | cc48c135915bd71ba23bd895fe6f2d57e61a0995 (diff) | |
| download | miasm-a3900fe1a46b585771a576478ce47bc94e35537f.tar.gz miasm-a3900fe1a46b585771a576478ce47bc94e35537f.zip | |
Merge pull request #49 from commial/refactor-asmbloc
Refactor asmbloc
| -rw-r--r-- | example/asm/simple.py | 2 | ||||
| -rw-r--r-- | miasm2/core/asmbloc.py | 178 | ||||
| -rw-r--r-- | miasm2/jitter/jitcore.py | 3 | ||||
| -rw-r--r-- | test/arch/x86/sem.py | 2 |
4 files changed, 41 insertions, 144 deletions
diff --git a/example/asm/simple.py b/example/asm/simple.py index 1ae3ae05..8664672d 100644 --- a/example/asm/simple.py +++ b/example/asm/simple.py @@ -30,7 +30,7 @@ symbol_pool.set_offset(symbol_pool.getby_name("main"), 0x0) resolved_b, patches = asmbloc.asm_resolve_final(mn_x86, blocs[0], symbol_pool) # Show resolved blocs -for bloc, dummy in resolved_b: +for bloc in resolved_b: print bloc # Print offset -> bytes diff --git a/miasm2/core/asmbloc.py b/miasm2/core/asmbloc.py index b80dd1e3..ee93cba7 100644 --- a/miasm2/core/asmbloc.py +++ b/miasm2/core/asmbloc.py @@ -2,15 +2,15 @@ #-*- coding:utf-8 -*- import logging +import inspect + + import miasm2.expression.expression as m2_expr from miasm2.expression.simplifications import expr_simp - from miasm2.expression.modint import moduint, modint -from miasm2.core.graph import DiGraph from miasm2.core.utils import Disasm_Exception, pck from miasm2.core.graph import DiGraph -import inspect log_asmbloc = logging.getLogger("asmbloc") console_handler = logging.StreamHandler() @@ -29,23 +29,18 @@ def is_int(a): def expr_is_label(e): - if isinstance(e, m2_expr.ExprId) and isinstance(e.name, asm_label): - return True - return False + return isinstance(e, m2_expr.ExprId) and isinstance(e.name, asm_label) def expr_is_int_or_label(e): - if isinstance(e, m2_expr.ExprInt): - return True - if isinstance(e, m2_expr.ExprId) and isinstance(e.name, asm_label): - return True - return False + return isinstance(e, m2_expr.ExprInt) or \ + (isinstance(e, m2_expr.ExprId) and isinstance(e.name, asm_label)) class asm_label: + "Stand for an assembly label" def __init__(self, name="", offset=None): - # print whoami() self.fixedblocs = False if is_int(name): name = "loc_%.16X" % (int(name) & 0xFFFFFFFFFFFFFFFF) @@ -373,8 +368,7 @@ class asm_symbol_pool: def dis_bloc(mnemo, pool_bin, cur_bloc, offset, job_done, symbol_pool, dont_dis=[], split_dis=[ - ], follow_call=False, patch_instr_symb=True, - dontdis_retcall=False, lines_wd=None, + ], follow_call=False, dontdis_retcall=False, lines_wd=None, dis_bloc_callback=None, dont_dis_nulstart_bloc=False, attrib={}): # pool_bin.offset = offset @@ -476,8 +470,6 @@ def dis_bloc(mnemo, pool_bin, cur_bloc, offset, job_done, symbol_pool, def split_bloc(mnemo, attrib, pool_bin, blocs, symbol_pool, more_ref=None, dis_bloc_callback=None): - i = -1 - err = False if not more_ref: more_ref = [] @@ -507,7 +499,6 @@ def split_bloc(mnemo, attrib, pool_bin, blocs, log_asmbloc.debug("split bloc %x" % off) if new_b is None: log_asmbloc.error("cannot split %x!!" % off) - err = True break if dis_bloc_callback: offsets_to_dis = set( @@ -519,16 +510,11 @@ def split_bloc(mnemo, attrib, pool_bin, blocs, blocs.append(new_b) a, b = cb.get_range() - """ - if err: - break - """ return blocs def dis_bloc_all(mnemo, pool_bin, offset, job_done, symbol_pool, dont_dis=[], - split_dis=[], follow_call=False, patch_instr_symb=True, - dontdis_retcall=False, + split_dis=[], follow_call=False, dontdis_retcall=False, blocs_wd=None, lines_wd=None, blocs=None, dis_bloc_callback=None, dont_dis_nulstart_bloc=False, attrib={}): @@ -565,8 +551,7 @@ def dis_bloc_all(mnemo, pool_bin, offset, job_done, symbol_pool, dont_dis=[], l = symbol_pool.getby_offset_create(n) cur_bloc = asm_bloc(l) todo += dis_bloc(mnemo, pool_bin, cur_bloc, n, job_done, symbol_pool, - dont_dis, split_dis, follow_call, patch_instr_symb, - dontdis_retcall, + dont_dis, split_dis, follow_call, dontdis_retcall, dis_bloc_callback=dis_bloc_callback, lines_wd=lines_wd, dont_dis_nulstart_bloc=dont_dis_nulstart_bloc, @@ -647,7 +632,7 @@ def fix_expr_val(e, symbols): return e -def guess_blocs_size(mnemo, blocs, symbols): +def guess_blocs_size(mnemo, blocs): """ Asm and compute max bloc length """ @@ -778,7 +763,6 @@ def gen_non_free_mapping(group_bloc, dont_erase=[]): non_free_mapping = {} # calculate free space for bloc placing for g in group_bloc: - rest_len = 0 g.fixedblocs = False # if a label in the group is fixed diff_offset = 0 @@ -808,7 +792,6 @@ def resolve_symbol(group_bloc, symbol_pool, dont_erase=[], max_offset=0xFFFFFFFF log_asmbloc.info(str(dont_erase)) bloc_list = [] unr_bloc = reduce(lambda x, y: x + group_bloc[y], group_bloc, []) - ending_ad = [] non_free_mapping = gen_non_free_mapping(group_bloc, dont_erase) free_interval = gen_free_space_intervals(non_free_mapping, max_offset) @@ -885,7 +868,7 @@ def resolve_symbol(group_bloc, symbol_pool, dont_erase=[], max_offset=0xFFFFFFFF continue resolving = True log_asmbloc.info("bloc %s resolved" % unr_bloc[i].label) - bloc_list.append((unr_bloc[i], 0)) + bloc_list.append(unr_bloc[i]) g_found = None for g in g_tab: if unr_bloc[i] in group_bloc[g]: @@ -911,11 +894,11 @@ def resolve_symbol(group_bloc, symbol_pool, dont_erase=[], max_offset=0xFFFFFFFF continue for g in g_tab: - print g + log_asmbloc.debug(g) if g.fixedblocs: - print "fixed" + log_asmbloc.debug("fixed") else: - print "not fixed" + log_asmbloc.debug("not fixed") raise ValueError('enable to fix bloc') return bloc_list @@ -973,12 +956,11 @@ def asmbloc_final(mnemo, blocs, symbol_pool, symb_reloc_off=None, conservative = symbols.add_label(s, v.offset_g) # print symbols # test if bad encoded relative - for b, t in blocs: + for bloc in blocs: offset_i = 0 - blen = 0 - my_symb_reloc_off[b.label] = [] - for instr in b.lines: + my_symb_reloc_off[bloc.label] = [] + for instr in bloc.lines: if isinstance(instr, asm_raw): if isinstance(instr.raw, list): # fix special asm_raw @@ -991,7 +973,7 @@ def asmbloc_final(mnemo, blocs, symbol_pool, symb_reloc_off=None, conservative = offset_i += instr.l continue sav_a = instr.args[:] - instr.offset = b.label.offset_g + offset_i + instr.offset = bloc.label.offset_g + offset_i args_e = instr.resolve_args_with_symbols(symbols) for i, e in enumerate(args_e): instr.args[i] = e @@ -1010,17 +992,14 @@ def asmbloc_final(mnemo, blocs, symbol_pool, symb_reloc_off=None, conservative = if len(c) != instr.l: # good len, bad offset...XXX - b.blen = b.blen - old_l + len(c) + bloc.blen = bloc.blen - old_l + len(c) instr.data = c instr.l = len(c) fini = False continue found = False for cpos, c in enumerate(candidates): - # if len(c) == len(instr.data): if len(c) == instr.l: - # print 'UPDD', repr(instr.data), repr(c) - # b.blen = b.blen-old_l+len(c) instr.data = c instr.l = len(c) @@ -1035,9 +1014,8 @@ def asmbloc_final(mnemo, blocs, symbol_pool, symb_reloc_off=None, conservative = my_s = None if my_s is not None: - my_symb_reloc_off[b.label].append(offset_i + my_s) + my_symb_reloc_off[bloc.label].append(offset_i + my_s) offset_i += instr.l - blen += instr.l assert(len(instr.data) == instr.l) # we have fixed all relative values # recompute good offsets @@ -1051,11 +1029,10 @@ def asmbloc_final(mnemo, blocs, symbol_pool, symb_reloc_off=None, conservative = def asm_resolve_final(mnemo, blocs, symbol_pool, dont_erase=[], - max_offset=0xFFFFFFFF, - symb_reloc_off=None, constrain_pos=False): + max_offset=0xFFFFFFFF, symb_reloc_off=None): if symb_reloc_off is None: symb_reloc_off = {} - guess_blocs_size(mnemo, blocs, symbol_pool) + guess_blocs_size(mnemo, blocs) bloc_g = group_blocs(blocs) resolved_b = resolve_symbol(bloc_g, symbol_pool, dont_erase=dont_erase, @@ -1064,19 +1041,18 @@ def asm_resolve_final(mnemo, blocs, symbol_pool, dont_erase=[], asmbloc_final(mnemo, resolved_b, symbol_pool, symb_reloc_off) written_bytes = {} patches = {} - for b, t in resolved_b: - offset = b.label.offset - for i in b.lines: - assert(i.data is not None) - patches[offset] = i.data - for c in range(i.l): - if offset + c in written_bytes: + for bloc in resolved_b: + offset = bloc.label.offset + for line in bloc.lines: + assert(line.data is not None) + patches[offset] = line.data + for cur_pos in xrange(line.l): + if offset + cur_pos in written_bytes: raise ValueError( "overlapping bytes in asssembly %X" % int(offset)) - written_bytes[offset + c] = 1 - i.offset = offset - i.l = i.l - offset += i.l + written_bytes[offset + cur_pos] = 1 + line.offset = offset + offset += line.l return resolved_b, patches @@ -1220,7 +1196,7 @@ def bloc_find_path_next(blocs, blocby_label, a, b, path=None): if x.c_t != asm_constraint.c_next: continue if not x.label in blocby_label: - print 'XXX unknown label' + log_asmbloc.error('XXX unknown label') continue x = blocby_label[x.label] all_path += bloc_find_path_next(blocs, blocby_label, x, b, path + [a]) @@ -1230,87 +1206,13 @@ def bloc_find_path_next(blocs, blocby_label, a, b, path=None): return all_path -def bloc_merge(blocs, symbol_pool, dont_merge=[]): - i = -1 - """ - # TODO XXXX implement find all path for digraph - - g = blist2graph(blocs) - g.lbl2node = dict([(b.label, b) for b in blocs]) - - while i<len(blocs)-1: - i+=1 - b = blocs[i] - if b.label in dont_merge: - continue - - successors = [x for x in g.successors(b.label)] - predecessors = [x for x in g.predecessors(b.label)] - # if bloc doesn't self ref - if b.label in successors: - continue - # and bloc has only one parent - if len(predecessors) != 1: - continue - # may merge - bpl = predecessors[0] - - # and parent has only one son - p_s = [x for x in g.successors(bpl)] - if len(p_s)!=1: - continue - - bp = g.lbl2node[bpl] - # and parent has not a next constraint yet - found = False - for gpl in g.predecessors(bpl): - gp = g.lbl2node[gpl] - for x in gp.bto: - if x.c_t != asm_constraint.c_next: - continue - if x.label == bpl: - found = True - break - if found: - break - if found: - continue - if bp.lines: - l = bp.lines[-1] - #jmp opt; jcc opt - if l.is_subcall(): - continue - if l.breakflow() and l.dstflow(): - bp.lines.pop() - #merge - #sons = b.bto[:] - - # update parents - for s in b.bto: - if not isinstance(s.label, asm_label): continue - if s.label.name == None: - continue - if not s.label in g.lbl2node: - print "unknown parent XXX" - continue - bs = g.lbl2node[s.label] - for p in g.predecessors(bs.label): - if p == b.label: - bs.parents.discard(p) - bs.parents.add(bp.label) - bp.lines+=b.lines - bp.bto = b.bto - #symbol_pool.remove(b.label) - del(blocs[i]) - i = -1 - - return - """ +def bloc_merge(blocs, dont_merge=[]): blocby_label = {} for b in blocs: blocby_label[b.label] = b b.parents = find_parents(blocs, b.label) + i = -1 while i < len(blocs) - 1: i += 1 b = blocs[i] @@ -1354,7 +1256,7 @@ def bloc_merge(blocs, symbol_pool, dont_merge=[]): if s.label.name == None: continue if not s.label in blocby_label: - print "unknown parent XXX" + log_asmbloc.error("unknown parent XXX") continue bs = blocby_label[s.label] for p in list(bs.parents): @@ -1363,7 +1265,7 @@ def bloc_merge(blocs, symbol_pool, dont_merge=[]): bs.parents.add(bp.label) bp.lines += b.lines bp.bto = b.bto - # symbol_pool.remove(b.label) + del(blocs[i]) i = -1 @@ -1378,7 +1280,6 @@ class disasmEngine(object): self.dont_dis = [] self.split_dis = [] self.follow_call = False - self.patch_instr_symb = True self.dontdis_retcall = False self.lines_wd = None self.blocs_wd = None @@ -1388,14 +1289,12 @@ class disasmEngine(object): self.__dict__.update(kwargs) def dis_bloc(self, offset): - job_done = set() l = self.symbol_pool.getby_offset_create(offset) current_bloc = asm_bloc(l) dis_bloc(self.arch, self.bs, current_bloc, offset, self.job_done, self.symbol_pool, dont_dis=self.dont_dis, split_dis=self.split_dis, follow_call=self.follow_call, - patch_instr_symb=self.patch_instr_symb, dontdis_retcall=self.dontdis_retcall, lines_wd=self.lines_wd, dis_bloc_callback=self.dis_bloc_callback, @@ -1408,7 +1307,6 @@ class disasmEngine(object): self.symbol_pool, dont_dis=self.dont_dis, split_dis=self.split_dis, follow_call=self.follow_call, - patch_instr_symb=self.patch_instr_symb, dontdis_retcall=self.dontdis_retcall, blocs_wd=self.blocs_wd, lines_wd=self.lines_wd, diff --git a/miasm2/jitter/jitcore.py b/miasm2/jitter/jitcore.py index 5ebc615e..c025c956 100644 --- a/miasm2/jitter/jitcore.py +++ b/miasm2/jitter/jitcore.py @@ -116,8 +116,7 @@ class JitCore(object): try: asmbloc.dis_bloc(self.ir_arch.arch, self.bs, cur_bloc, addr, set(), self.ir_arch.symbol_pool, [], - follow_call=False, patch_instr_symb=True, - dontdis_retcall=False, + follow_call=False, dontdis_retcall=False, lines_wd=self.options["jit_maxline"], # max 10 asm lines attrib=self.ir_arch.attrib, diff --git a/test/arch/x86/sem.py b/test/arch/x86/sem.py index de2d14f3..d2c998c8 100644 --- a/test/arch/x86/sem.py +++ b/test/arch/x86/sem.py @@ -51,7 +51,7 @@ def compute_txt(ir, mode, txt, inputstate={}, debug=False): mn, blocs[0], symbol_pool) interm = ir(symbol_pool) for bbl in resolved_b: - interm.add_bloc(bbl[0]) + interm.add_bloc(bbl) return symb_exec(interm, inputstate, debug) op_add = lambda a, b: a+b |