diff options
Diffstat (limited to 'miasm2')
47 files changed, 923 insertions, 673 deletions
diff --git a/miasm2/analysis/binary.py b/miasm2/analysis/binary.py index 5d9374da..16e573bb 100644 --- a/miasm2/analysis/binary.py +++ b/miasm2/analysis/binary.py @@ -1,8 +1,9 @@ import logging +import warnings from miasm2.core.bin_stream import bin_stream_str, bin_stream_elf, bin_stream_pe from miasm2.jitter.csts import PAGE_READ -from miasm2.core.asmblock import AsmSymbolPool +from miasm2.core.locationdb import LocationDB log = logging.getLogger("binary") @@ -94,7 +95,7 @@ class Container(object): self._bin_stream = None self._entry_point = None self._arch = None - self._symbol_pool = AsmSymbolPool() + self._loc_db = LocationDB() # Launch parsing self.parse(*args, **kwargs) @@ -120,10 +121,15 @@ class Container(object): return self._arch @property - def symbol_pool(self): - "AsmSymbolPool instance preloaded with container symbols (if any)" - return self._symbol_pool + def loc_db(self): + "LocationDB instance preloaded with container symbols (if any)" + return self._loc_db + @property + def symbol_pool(self): + "[DEPRECATED API]" + warnings.warn("Deprecated API: use 'loc_db'") + return self.loc_db ## Format dependent classes class ContainerPE(Container): @@ -205,13 +211,13 @@ class ContainerELF(Container): if not name: continue try: - self._symbol_pool.add_location(name, offset) + self._loc_db.add_location(name, offset) except ValueError: # Two symbols points on the same offset log.warning("Same offset (%s) for %s and %s", (hex(offset), name, - self._symbol_pool.getby_offset(offset))) + self._loc_db.get_offset_location(offset))) continue diff --git a/miasm2/analysis/depgraph.py b/miasm2/analysis/depgraph.py index 0f4d168d..f5a2b043 100644 --- a/miasm2/analysis/depgraph.py +++ b/miasm2/analysis/depgraph.py @@ -2,7 +2,7 @@ from miasm2.expression.expression import ExprInt, ExprLoc, ExprAff from miasm2.core.graph import DiGraph -from miasm2.core.asmblock import AsmSymbolPool +from miasm2.core.locationdb import LocationDB from miasm2.expression.simplifications import expr_simp from miasm2.ir.symbexec import SymbolicExecutionEngine from miasm2.ir.ir import IRBlock, AssignBlock @@ -297,8 +297,8 @@ class DependencyResult(DependencyState): line_nb).assignblks # Eval the block - symbol_pool = AsmSymbolPool() - temp_loc = symbol_pool.getby_name_create("Temp") + loc_db = LocationDB() + temp_loc = loc_db.get_or_create_name_location("Temp") symb_exec = SymbolicExecutionEngine(self._ira, ctx_init) symb_exec.eval_updt_irblock(IRBlock(temp_loc, assignblks), step=step) @@ -322,10 +322,10 @@ class DependencyResultImplicit(DependencyResult): generated loc_keys """ out = [] - expected = self._ira.symbol_pool.canonize_to_exprloc(expected) + expected = self._ira.loc_db.canonize_to_exprloc(expected) expected_is_loc_key = expected.is_loc() for consval in possible_values(expr): - value = self._ira.symbol_pool.canonize_to_exprloc(consval.value) + value = self._ira.loc_db.canonize_to_exprloc(consval.value) if expected_is_loc_key and value != expected: continue if not expected_is_loc_key and value.is_loc_key(): diff --git a/miasm2/analysis/disasm_cb.py b/miasm2/analysis/disasm_cb.py index 0dc482ac..bb8223e8 100644 --- a/miasm2/analysis/disasm_cb.py +++ b/miasm2/analysis/disasm_cb.py @@ -2,8 +2,8 @@ from miasm2.expression.expression import ExprInt, ExprId, ExprMem, match_expr from miasm2.expression.simplifications import expr_simp -from miasm2.core.asmblock \ - import AsmSymbolPool, AsmConstraintNext, AsmConstraintTo +from miasm2.core.asmblock import AsmConstraintNext, AsmConstraintTo +from miasm2.core.locationdb import LocationDB from miasm2.core.utils import upck32 @@ -21,10 +21,10 @@ def get_ira(mnemo, attrib): def arm_guess_subcall( - mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool): + mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, loc_db): ira = get_ira(mnemo, attrib) - sp = AsmSymbolPool() + sp = LocationDB() ir_arch = ira(sp) print '###' print cur_bloc @@ -49,7 +49,7 @@ def arm_guess_subcall( l = cur_bloc.lines[-1] if lr_val.arg != l.offset + l.l: continue - l = symbol_pool.getby_offset_create(int(lr_val)) + l = loc_db.get_or_create_offset_location(int(lr_val)) c = AsmConstraintNext(l) to_add.add(c) @@ -60,13 +60,13 @@ def arm_guess_subcall( def arm_guess_jump_table( - mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool): + mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, loc_db): ira = get_ira(mnemo, attrib) jra = ExprId('jra') jrb = ExprId('jrb') - sp = AsmSymbolPool() + sp = LocationDB() ir_arch = ira(sp) ir_arch.add_block(cur_bloc) @@ -111,7 +111,7 @@ def arm_guess_jump_table( for ad in addrs: offsets_to_dis.add(ad) - l = symbol_pool.getby_offset_create(ad) + l = loc_db.get_or_create_offset_location(ad) c = AsmConstraintTo(l) cur_bloc.addto(c) @@ -119,6 +119,6 @@ guess_funcs = [] def guess_multi_cb( - mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool): + mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, loc_db): for f in guess_funcs: - f(mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool) + f(mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, loc_db) diff --git a/miasm2/analysis/dse.py b/miasm2/analysis/dse.py index 1fd177bb..87d11e0a 100644 --- a/miasm2/analysis/dse.py +++ b/miasm2/analysis/dse.py @@ -159,14 +159,14 @@ class DSEEngine(object): self.symb_concrete = None # Concrete SymbExec for path desambiguisation self.mdis = None # DisasmEngine - self.symbol_pool = self.ir_arch.symbol_pool + self.loc_db = self.ir_arch.loc_db def prepare(self): """Prepare the environment for attachment with a jitter""" # Disassembler self.mdis = self.machine.dis_engine(bin_stream_vm(self.jitter.vm), lines_wd=1, - symbol_pool=self.symbol_pool) + loc_db=self.loc_db) # Symbexec engine ## Prepare symbexec engines @@ -297,7 +297,7 @@ class DSEEngine(object): # Call callbacks associated to the current address cur_addr = self.jitter.pc if isinstance(cur_addr, LocKey): - lbl = self.ir_arch.symbol_pool.loc_key_to_label(cur_addr) + lbl = self.ir_arch.loc_db.loc_key_to_label(cur_addr) cur_addr = lbl.offset if cur_addr in self.handler: @@ -348,7 +348,7 @@ class DSEEngine(object): self.symb.run_block_at(cur_addr) if not (isinstance(next_addr_concrete, ExprLoc) and - self.ir_arch.symbol_pool.loc_key_to_offset( + self.ir_arch.loc_db.get_location_offset( next_addr_concrete.loc_key ) is None): # Not a lbl_gen, exit @@ -604,17 +604,17 @@ class DSEPathConstraint(DSEEngine): self.cur_solver.add(self.z3_trans.from_expr(cons)) def handle(self, cur_addr): - cur_addr = self.ir_arch.symbol_pool.canonize_to_exprloc(cur_addr) + cur_addr = self.ir_arch.loc_db.canonize_to_exprloc(cur_addr) symb_pc = self.eval_expr(self.ir_arch.IRDst) possibilities = possible_values(symb_pc) cur_path_constraint = set() # path_constraint for the concrete path if len(possibilities) == 1: dst = next(iter(possibilities)).value - dst = self.ir_arch.symbol_pool.canonize_to_exprloc(dst) + dst = self.ir_arch.loc_db.canonize_to_exprloc(dst) assert dst == cur_addr else: for possibility in possibilities: - target_addr = self.ir_arch.symbol_pool.canonize_to_exprloc( + target_addr = self.ir_arch.loc_db.canonize_to_exprloc( possibility.value ) path_constraint = set() # Set of ExprAff for the possible path diff --git a/miasm2/arch/aarch64/arch.py b/miasm2/arch/aarch64/arch.py index a57e585f..529621c4 100644 --- a/miasm2/arch/aarch64/arch.py +++ b/miasm2/arch/aarch64/arch.py @@ -263,7 +263,7 @@ conds_inv_expr, _, conds_inv_info = gen_regs(CONDS_INV, {}) class aarch64_arg(m_arg): - def asm_ast_to_expr(self, value, symbol_pool, size_hint=None, fixed_size=None): + def asm_ast_to_expr(self, value, loc_db, size_hint=None, fixed_size=None): if size_hint is None: size_hint = 64 if fixed_size is None: @@ -276,25 +276,25 @@ class aarch64_arg(m_arg): if isinstance(value.name, ExprId): fixed_size.add(value.name.size) return value.name - loc_key = symbol_pool.getby_name_create(value.name) + loc_key = loc_db.get_or_create_name_location(value.name) return ExprLoc(loc_key, size_hint) if isinstance(value, AstInt): assert size_hint is not None return ExprInt(value.value, size_hint) if isinstance(value, AstOp): if value.op == "segm": - segm = self.asm_ast_to_expr(value.args[0], symbol_pool) - ptr = self.asm_ast_to_expr(value.args[1], symbol_pool, None, fixed_size) + segm = self.asm_ast_to_expr(value.args[0], loc_db) + ptr = self.asm_ast_to_expr(value.args[1], loc_db, None, fixed_size) return ExprOp('segm', segm, ptr) - args = [self.asm_ast_to_expr(arg, symbol_pool, None, fixed_size) for arg in value.args] + args = [self.asm_ast_to_expr(arg, loc_db, None, fixed_size) for arg in value.args] if len(fixed_size) == 0: # No fixed size pass elif len(fixed_size) == 1: # One fixed size, regen all size = list(fixed_size)[0] - args = [self.asm_ast_to_expr(arg, symbol_pool, size, fixed_size) for arg in value.args] + args = [self.asm_ast_to_expr(arg, loc_db, size, fixed_size) for arg in value.args] else: raise ValueError("Size conflict") @@ -310,13 +310,13 @@ class instruction_aarch64(instruction): super(instruction_aarch64, self).__init__(*args, **kargs) @staticmethod - def arg2str(expr, index=None, symbol_pool=None): + def arg2str(expr, index=None, loc_db=None): wb = False if expr.is_id() or expr.is_int(): return str(expr) elif expr.is_loc(): - if symbol_pool is not None: - return symbol_pool.str_loc_key(expr.loc_key) + if loc_db is not None: + return loc_db.pretty_str(expr.loc_key) else: return str(expr) elif isinstance(expr, m2_expr.ExprOp) and expr.op in shift_expr: @@ -368,13 +368,13 @@ class instruction_aarch64(instruction): else: return 0 - def dstflow2label(self, symbol_pool): + def dstflow2label(self, loc_db): index = self.mnemo_flow_to_dst_index(self.name) expr = self.args[index] if not expr.is_int(): return addr = expr.arg + self.offset - loc_key = symbol_pool.getby_offset_create(addr) + loc_key = loc_db.get_or_create_offset_location(addr) self.args[index] = m2_expr.ExprLoc(loc_key, expr.size) def breakflow(self): @@ -383,14 +383,14 @@ class instruction_aarch64(instruction): def is_subcall(self): return self.name in ["BLR", "BL"] - def getdstflow(self, symbol_pool): + def getdstflow(self, loc_db): index = self.mnemo_flow_to_dst_index(self.name) return [self.args[index]] def splitflow(self): return self.name in BRCOND + ["BLR", "BL"] - def get_symbol_size(self, symbol, symbol_pool): + def get_symbol_size(self, symbol, loc_db): return 64 def fixDstOffset(self): @@ -502,7 +502,7 @@ class mn_aarch64(cls_mn): else: raise NotImplementedError('bad attrib') - def get_symbol_size(self, symbol, symbol_pool, mode): + def get_symbol_size(self, symbol, loc_db, mode): return 32 def reset_class(self): @@ -800,8 +800,8 @@ def set_imm_to_size(size, expr): class aarch64_imm_sf(imm_noarg): parser = base_expr - def fromstring(self, text, symbol_pool, parser_result=None): - start, stop = super(aarch64_imm_sf, self).fromstring(text, symbol_pool, parser_result) + def fromstring(self, text, loc_db, parser_result=None): + start, stop = super(aarch64_imm_sf, self).fromstring(text, loc_db, parser_result) if start is None: return start, stop size = self.parent.args[0].expr.size diff --git a/miasm2/arch/aarch64/ira.py b/miasm2/arch/aarch64/ira.py index 5a89e910..a895b549 100644 --- a/miasm2/arch/aarch64/ira.py +++ b/miasm2/arch/aarch64/ira.py @@ -6,22 +6,22 @@ from miasm2.arch.aarch64.sem import ir_aarch64l, ir_aarch64b class ir_a_aarch64l_base(ir_aarch64l, ira): - def __init__(self, symbol_pool=None): - ir_aarch64l.__init__(self, symbol_pool) + def __init__(self, loc_db=None): + ir_aarch64l.__init__(self, loc_db) self.ret_reg = self.arch.regs.X0 class ir_a_aarch64b_base(ir_aarch64b, ira): - def __init__(self, symbol_pool=None): - ir_aarch64b.__init__(self, symbol_pool) + def __init__(self, loc_db=None): + ir_aarch64b.__init__(self, loc_db) self.ret_reg = self.arch.regs.X0 class ir_a_aarch64l(ir_a_aarch64l_base): - def __init__(self, symbol_pool=None): - ir_a_aarch64l_base.__init__(self, symbol_pool) + def __init__(self, loc_db=None): + ir_a_aarch64l_base.__init__(self, loc_db) self.ret_reg = self.arch.regs.X0 def get_out_regs(self, _): @@ -45,6 +45,6 @@ class ir_a_aarch64l(ir_a_aarch64l_base): class ir_a_aarch64b(ir_a_aarch64b_base, ir_a_aarch64l): - def __init__(self, symbol_pool=None): - ir_a_aarch64b_base.__init__(self, symbol_pool) + def __init__(self, loc_db=None): + ir_a_aarch64b_base.__init__(self, loc_db) self.ret_reg = self.arch.regs.X0 diff --git a/miasm2/arch/aarch64/jit.py b/miasm2/arch/aarch64/jit.py index b557a179..91c32c68 100644 --- a/miasm2/arch/aarch64/jit.py +++ b/miasm2/arch/aarch64/jit.py @@ -1,7 +1,7 @@ import logging from miasm2.jitter.jitload import Jitter, named_arguments -from miasm2.core import asmblock +from miasm2.core.locationdb import LocationDB from miasm2.core.utils import pck64, upck64 from miasm2.arch.aarch64.sem import ir_aarch64b, ir_aarch64l @@ -15,8 +15,7 @@ class jitter_aarch64l(Jitter): max_reg_arg = 8 def __init__(self, *args, **kwargs): - sp = asmblock.AsmSymbolPool() - Jitter.__init__(self, ir_aarch64l(sp), *args, **kwargs) + Jitter.__init__(self, ir_aarch64l(LocationDB()), *args, **kwargs) self.vm.set_little_endian() def push_uint64_t(self, value): @@ -76,6 +75,5 @@ class jitter_aarch64l(Jitter): class jitter_aarch64b(jitter_aarch64l): def __init__(self, *args, **kwargs): - sp = asmblock.AsmSymbolPool() - Jitter.__init__(self, ir_aarch64b(sp), *args, **kwargs) + Jitter.__init__(self, ir_aarch64b(LocationDB()), *args, **kwargs) self.vm.set_big_endian() diff --git a/miasm2/arch/aarch64/sem.py b/miasm2/arch/aarch64/sem.py index c232e8dc..a17c0f14 100644 --- a/miasm2/arch/aarch64/sem.py +++ b/miasm2/arch/aarch64/sem.py @@ -861,8 +861,8 @@ class aarch64info: class ir_aarch64l(IntermediateRepresentation): - def __init__(self, symbol_pool=None): - IntermediateRepresentation.__init__(self, mn_aarch64, "l", symbol_pool) + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_aarch64, "l", loc_db) self.pc = PC self.sp = SP self.IRDst = m2_expr.ExprId('IRDst', 64) @@ -945,8 +945,8 @@ class ir_aarch64l(IntermediateRepresentation): class ir_aarch64b(ir_aarch64l): - def __init__(self, symbol_pool=None): - IntermediateRepresentation.__init__(self, mn_aarch64, "b", symbol_pool) + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_aarch64, "b", loc_db) self.pc = PC self.sp = SP self.IRDst = m2_expr.ExprId('IRDst', 64) diff --git a/miasm2/arch/arm/arch.py b/miasm2/arch/arm/arch.py index 624642cf..1810cd6a 100644 --- a/miasm2/arch/arm/arch.py +++ b/miasm2/arch/arm/arch.py @@ -343,13 +343,13 @@ class instruction_arm(instruction): super(instruction_arm, self).__init__(*args, **kargs) @staticmethod - def arg2str(expr, index=None, symbol_pool=None): + def arg2str(expr, index=None, loc_db=None): wb = False if expr.is_id() or expr.is_int(): return str(expr) elif expr.is_loc(): - if symbol_pool is not None: - return symbol_pool.str_loc_key(expr.loc_key) + if loc_db is not None: + return loc_db.pretty_str(expr.loc_key) else: return str(expr) if isinstance(expr, ExprOp) and expr.op in expr2shift_dct: @@ -422,7 +422,7 @@ class instruction_arm(instruction): def dstflow(self): return self.name in conditional_branch + unconditional_branch - def dstflow2label(self, symbol_pool): + def dstflow2label(self, loc_db): expr = self.args[0] if not isinstance(expr, ExprInt): return @@ -430,7 +430,7 @@ class instruction_arm(instruction): addr = expr.arg + self.offset else: addr = expr.arg + self.offset - loc_key = symbol_pool.getby_offset_create(addr) + loc_key = loc_db.get_or_create_offset_location(addr) self.args[0] = ExprLoc(loc_key, expr.size) def breakflow(self): @@ -447,7 +447,7 @@ class instruction_arm(instruction): return True return self.additional_info.lnk - def getdstflow(self, symbol_pool): + def getdstflow(self, loc_db): return [self.args[0]] def splitflow(self): @@ -459,7 +459,7 @@ class instruction_arm(instruction): return False return self.breakflow() and self.additional_info.cond != 14 - def get_symbol_size(self, symbol, symbol_pool): + def get_symbol_size(self, symbol, loc_db): return 32 def fixDstOffset(self): @@ -494,7 +494,7 @@ class instruction_armt(instruction_arm): return True return self.name in conditional_branch + unconditional_branch - def dstflow2label(self, symbol_pool): + def dstflow2label(self, loc_db): if self.name in ["CBZ", "CBNZ"]: expr = self.args[1] else: @@ -512,7 +512,7 @@ class instruction_armt(instruction_arm): else: addr = expr.arg + self.offset - loc_key = symbol_pool.getby_offset_create(addr) + loc_key = loc_db.get_or_create_offset_location(addr) dst = ExprLoc(loc_key, expr.size) if self.name in ["CBZ", "CBNZ"]: @@ -529,7 +529,7 @@ class instruction_armt(instruction_arm): return True return False - def getdstflow(self, symbol_pool): + def getdstflow(self, loc_db): if self.name in ['CBZ', 'CBNZ']: return [self.args[1]] return [self.args[0]] @@ -662,7 +662,7 @@ class mn_arm(cls_mn): raise NotImplementedError('bad attrib') - def get_symbol_size(self, symbol, symbol_pool, mode): + def get_symbol_size(self, symbol, loc_db, mode): return 32 @@ -769,28 +769,28 @@ class mn_armt(cls_mn): args = [a.expr for a in self.args] return args - def get_symbol_size(self, symbol, symbol_pool, mode): + def get_symbol_size(self, symbol, loc_db, mode): return 32 class arm_arg(m_arg): - def asm_ast_to_expr(self, arg, symbol_pool): + def asm_ast_to_expr(self, arg, loc_db): if isinstance(arg, AstId): if isinstance(arg.name, ExprId): return arg.name if arg.name in gpregs.str: return None - loc_key = symbol_pool.getby_name_create(arg.name) + loc_key = loc_db.get_or_create_name_location(arg.name) return ExprLoc(loc_key, 32) if isinstance(arg, AstOp): - args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in arg.args] + args = [self.asm_ast_to_expr(tmp, loc_db) for tmp in arg.args] if None in args: return None return ExprOp(arg.op, *args) if isinstance(arg, AstInt): return ExprInt(arg.value, 32) if isinstance(arg, AstMem): - ptr = self.asm_ast_to_expr(arg.ptr, symbol_pool) + ptr = self.asm_ast_to_expr(arg.ptr, loc_db) if ptr is None: return None return ExprMem(ptr, arg.size) @@ -2809,8 +2809,8 @@ class armt_aif(reg_noarg, arm_arg): return ret return self.value != 0 - def fromstring(self, text, symbol_pool, parser_result=None): - start, stop = super(armt_aif, self).fromstring(text, symbol_pool, parser_result) + def fromstring(self, text, loc_db, parser_result=None): + start, stop = super(armt_aif, self).fromstring(text, loc_db, parser_result) if self.expr.name == "X": return None, None return start, stop diff --git a/miasm2/arch/arm/disasm.py b/miasm2/arch/arm/disasm.py index 8997fa2b..5e21778d 100644 --- a/miasm2/arch/arm/disasm.py +++ b/miasm2/arch/arm/disasm.py @@ -2,7 +2,7 @@ from miasm2.core.asmblock import AsmConstraint, disasmEngine from miasm2.arch.arm.arch import mn_arm, mn_armt -def cb_arm_fix_call(mn, cur_bloc, symbol_pool, offsets_to_dis, *args, **kwargs): +def cb_arm_fix_call(mn, cur_bloc, loc_db, offsets_to_dis, *args, **kwargs): """ for arm: MOV LR, PC @@ -24,7 +24,7 @@ def cb_arm_fix_call(mn, cur_bloc, symbol_pool, offsets_to_dis, *args, **kwargs): return if not l2.args[1] in values: return - loc_key_cst = symbol_pool.getby_offset_create(l1.offset + 4) + loc_key_cst = loc_db.get_or_create_offset_location(l1.offset + 4) cur_bloc.add_cst(loc_key_cst, AsmConstraint.c_next) offsets_to_dis.add(l1.offset + 4) diff --git a/miasm2/arch/arm/ira.py b/miasm2/arch/arm/ira.py index cfcb294c..0c84c919 100644 --- a/miasm2/arch/arm/ira.py +++ b/miasm2/arch/arm/ira.py @@ -5,20 +5,20 @@ from miasm2.arch.arm.sem import ir_arml, ir_armtl, ir_armb, ir_armtb class ir_a_arml_base(ir_arml, ira): - def __init__(self, symbol_pool=None): - ir_arml.__init__(self, symbol_pool) + def __init__(self, loc_db=None): + ir_arml.__init__(self, loc_db) self.ret_reg = self.arch.regs.R0 class ir_a_armb_base(ir_armb, ira): - def __init__(self, symbol_pool=None): - ir_armb.__init__(self, symbol_pool) + def __init__(self, loc_db=None): + ir_armb.__init__(self, loc_db) self.ret_reg = self.arch.regs.R0 class ir_a_arml(ir_a_arml_base): - def __init__(self, symbol_pool=None): - ir_a_arml_base.__init__(self, symbol_pool) + def __init__(self, loc_db=None): + ir_a_arml_base.__init__(self, loc_db) self.ret_reg = self.arch.regs.R0 def get_out_regs(self, _): @@ -41,17 +41,17 @@ class ir_a_arml(ir_a_arml_base): class ir_a_armb(ir_a_armb_base, ir_a_arml): - def __init__(self, symbol_pool=None): - ir_a_armb_base.__init__(self, symbol_pool) + def __init__(self, loc_db=None): + ir_a_armb_base.__init__(self, loc_db) self.ret_reg = self.arch.regs.R0 class ir_a_armtl(ir_armtl, ir_a_arml): - def __init__(self, symbol_pool=None): - ir_armtl.__init__(self, symbol_pool) + def __init__(self, loc_db=None): + ir_armtl.__init__(self, loc_db) self.ret_reg = self.arch.regs.R0 class ir_a_armtb(ir_a_armtl, ir_armtb, ir_a_armb): - def __init__(self, symbol_pool=None): - ir_armtb.__init__(self, symbol_pool) + def __init__(self, loc_db=None): + ir_armtb.__init__(self, loc_db) self.ret_reg = self.arch.regs.R0 diff --git a/miasm2/arch/arm/jit.py b/miasm2/arch/arm/jit.py index ef2e14ae..10a7c644 100644 --- a/miasm2/arch/arm/jit.py +++ b/miasm2/arch/arm/jit.py @@ -1,7 +1,7 @@ import logging from miasm2.jitter.jitload import Jitter, named_arguments -from miasm2.core import asmblock +from miasm2.core.locationdb import LocationDB from miasm2.core.utils import pck32, upck32 from miasm2.arch.arm.sem import ir_armb, ir_arml, ir_armtl, ir_armtb, cond_dct_inv, tab_cond from miasm2.jitter.codegen import CGen @@ -55,7 +55,7 @@ class jitter_arml(Jitter): C_Gen = arm_CGen def __init__(self, *args, **kwargs): - sp = asmblock.AsmSymbolPool() + sp = LocationDB() Jitter.__init__(self, ir_arml(sp), *args, **kwargs) self.vm.set_little_endian() @@ -115,7 +115,7 @@ class jitter_armb(jitter_arml): C_Gen = arm_CGen def __init__(self, *args, **kwargs): - sp = asmblock.AsmSymbolPool() + sp = LocationDB() Jitter.__init__(self, ir_armb(sp), *args, **kwargs) self.vm.set_big_endian() @@ -124,6 +124,6 @@ class jitter_armtl(jitter_arml): C_Gen = arm_CGen def __init__(self, *args, **kwargs): - sp = asmblock.AsmSymbolPool() + sp = LocationDB() Jitter.__init__(self, ir_armtl(sp), *args, **kwargs) self.vm.set_little_endian() diff --git a/miasm2/arch/arm/sem.py b/miasm2/arch/arm/sem.py index ccd56e8f..a3d12514 100644 --- a/miasm2/arch/arm/sem.py +++ b/miasm2/arch/arm/sem.py @@ -441,8 +441,8 @@ def sdiv(ir, instr, a, b, c=None): if c is None: b, c = a, b - loc_div = ExprLoc(ir.symbol_pool.gen_loc_key(), ir.IRDst.size) - loc_except = ExprId(ir.symbol_pool.gen_loc_key(), ir.IRDst.size) + loc_div = ExprLoc(ir.loc_db.add_location(), ir.IRDst.size) + loc_except = ExprId(ir.loc_db.add_location(), ir.IRDst.size) loc_next = ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) e.append(ExprAff(ir.IRDst, ExprCond(c, loc_div, loc_except))) @@ -474,8 +474,8 @@ def udiv(ir, instr, a, b, c=None): - loc_div = ExprLoc(ir.symbol_pool.gen_loc_key(), ir.IRDst.size) - loc_except = ExprLoc(ir.symbol_pool.gen_loc_key(), ir.IRDst.size) + loc_div = ExprLoc(ir.loc_db.add_location(), ir.IRDst.size) + loc_except = ExprLoc(ir.loc_db.add_location(), ir.IRDst.size) loc_next = ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) e.append(ExprAff(ir.IRDst, ExprCond(c, loc_div, loc_except))) @@ -1266,7 +1266,7 @@ def add_condition_expr(ir, instr, cond, instr_ir, extra_ir): loc_next = ir.get_next_loc_key(instr) loc_next_expr = ExprLoc(loc_next, 32) - loc_do = ir.symbol_pool.gen_loc_key() + loc_do = ir.loc_db.add_location() loc_do_expr = ExprLoc(loc_do, 32) dst_cond = ExprCond(cond, loc_do_expr, loc_next_expr) @@ -1474,8 +1474,8 @@ class arminfo: class ir_arml(IntermediateRepresentation): - def __init__(self, symbol_pool=None): - IntermediateRepresentation.__init__(self, mn_arm, "l", symbol_pool) + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_arm, "l", loc_db) self.pc = PC self.sp = SP self.IRDst = ExprId('IRDst', 32) @@ -1556,7 +1556,7 @@ class ir_arml(IntermediateRepresentation): instr = block.lines[index] # Add conditionnal jump to current irblock - loc_do = self.symbol_pool.gen_loc_key() + loc_do = self.loc_db.add_location() loc_next = self.get_next_loc_key(instr) if hint: @@ -1630,8 +1630,8 @@ class ir_arml(IntermediateRepresentation): class ir_armb(ir_arml): - def __init__(self, symbol_pool=None): - IntermediateRepresentation.__init__(self, mn_arm, "b", symbol_pool) + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_arm, "b", loc_db) self.pc = PC self.sp = SP self.IRDst = ExprId('IRDst', 32) @@ -1639,8 +1639,8 @@ class ir_armb(ir_arml): class ir_armtl(ir_arml): - def __init__(self, symbol_pool=None): - IntermediateRepresentation.__init__(self, mn_armt, "l", symbol_pool) + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_armt, "l", loc_db) self.pc = PC self.sp = SP self.IRDst = ExprId('IRDst', 32) @@ -1665,8 +1665,8 @@ class ir_armtl(ir_arml): class ir_armtb(ir_armtl): - def __init__(self, symbol_pool=None): - IntermediateRepresentation.__init__(self, mn_armt, "b", symbol_pool) + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_armt, "b", loc_db) self.pc = PC self.sp = SP self.IRDst = ExprId('IRDst', 32) diff --git a/miasm2/arch/mips32/arch.py b/miasm2/arch/mips32/arch.py index 1502cde4..974644dc 100644 --- a/miasm2/arch/mips32/arch.py +++ b/miasm2/arch/mips32/arch.py @@ -60,12 +60,12 @@ class instruction_mips32(cpu.instruction): @staticmethod - def arg2str(expr, index=None, symbol_pool=None): + def arg2str(expr, index=None, loc_db=None): if expr.is_id() or expr.is_int(): return str(expr) elif expr.is_loc(): - if symbol_pool is not None: - return symbol_pool.str_loc_key(expr.loc_key) + if loc_db is not None: + return loc_db.pretty_str(expr.loc_key) else: return str(expr) assert(isinstance(expr, ExprMem)) @@ -93,11 +93,11 @@ class instruction_mips32(cpu.instruction): raise NotImplementedError("TODO %s"%self) return i - def dstflow2label(self, symbol_pool): + def dstflow2label(self, loc_db): if self.name in ["J", 'JAL']: expr = self.args[0].arg addr = (self.offset & (0xFFFFFFFF ^ ((1<< 28)-1))) + expr - loc_key = symbol_pool.getby_offset_create(addr) + loc_key = loc_db.get_or_create_offset_location(addr) self.args[0] = ExprLoc(loc_key, expr.size) return @@ -107,7 +107,7 @@ class instruction_mips32(cpu.instruction): if not isinstance(expr, ExprInt): return addr = expr.arg + self.offset - loc_key = symbol_pool.getby_offset_create(addr) + loc_key = loc_db.get_or_create_offset_location(addr) self.args[ndx] = ExprLoc(loc_key, expr.size) def breakflow(self): @@ -122,7 +122,7 @@ class instruction_mips32(cpu.instruction): return True return False - def getdstflow(self, symbol_pool): + def getdstflow(self, loc_db): if self.name in br_0: return [self.args[0]] elif self.name in br_1: @@ -147,7 +147,7 @@ class instruction_mips32(cpu.instruction): return True return False - def get_symbol_size(self, symbol, symbol_pool): + def get_symbol_size(self, symbol, loc_db): return 32 def fixDstOffset(self): @@ -259,23 +259,23 @@ def mips32op(name, fields, args=None, alias=False): #type(name, (mn_mips32b,), dct) class mips32_arg(cpu.m_arg): - def asm_ast_to_expr(self, arg, symbol_pool): + def asm_ast_to_expr(self, arg, loc_db): if isinstance(arg, AstId): if isinstance(arg.name, ExprId): return arg.name if arg.name in gpregs.str: return None - loc_key = symbol_pool.getby_name_create(arg.name) + loc_key = loc_db.get_or_create_name_location(arg.name) return ExprLoc(loc_key, 32) if isinstance(arg, AstOp): - args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in arg.args] + args = [self.asm_ast_to_expr(tmp, loc_db) for tmp in arg.args] if None in args: return None return ExprOp(arg.op, *args) if isinstance(arg, AstInt): return ExprInt(arg.value, 32) if isinstance(arg, AstMem): - ptr = self.asm_ast_to_expr(arg.ptr, symbol_pool) + ptr = self.asm_ast_to_expr(arg.ptr, loc_db) if ptr is None: return None return ExprMem(ptr, arg.size) diff --git a/miasm2/arch/mips32/ira.py b/miasm2/arch/mips32/ira.py index b6d92ee0..53c2c6b3 100644 --- a/miasm2/arch/mips32/ira.py +++ b/miasm2/arch/mips32/ira.py @@ -6,8 +6,8 @@ from miasm2.ir.analysis import ira from miasm2.arch.mips32.sem import ir_mips32l, ir_mips32b class ir_a_mips32l(ir_mips32l, ira): - def __init__(self, symbol_pool=None): - ir_mips32l.__init__(self, symbol_pool) + def __init__(self, loc_db=None): + ir_mips32l.__init__(self, loc_db) self.ret_reg = self.arch.regs.V0 def pre_add_instr(self, block, instr, assignments, ir_blocks_all, gen_pc_updt): @@ -28,7 +28,7 @@ class ir_a_mips32l(ir_mips32l, ira): new_irblocks.append(irb) continue if lr_val.is_loc(): - offset = self.symbol_pool.loc_key_to_offset(lr_val.loc_key) + offset = self.loc_db.get_location_offset(lr_val.loc_key) if offset is not None: lr_val = ExprInt(offset, 32) if not lr_val.is_int(): @@ -70,6 +70,6 @@ class ir_a_mips32l(ir_mips32l, ira): class ir_a_mips32b(ir_mips32b, ir_a_mips32l): - def __init__(self, symbol_pool=None): - ir_mips32b.__init__(self, symbol_pool) + def __init__(self, loc_db=None): + ir_mips32b.__init__(self, loc_db) self.ret_reg = self.arch.regs.V0 diff --git a/miasm2/arch/mips32/jit.py b/miasm2/arch/mips32/jit.py index c637fb13..a0df64d6 100644 --- a/miasm2/arch/mips32/jit.py +++ b/miasm2/arch/mips32/jit.py @@ -1,7 +1,7 @@ import logging from miasm2.jitter.jitload import Jitter, named_arguments -from miasm2.core import asmblock +from miasm2.core.locationdb import LocationDB from miasm2.core.utils import pck32, upck32 from miasm2.arch.mips32.sem import ir_mips32l, ir_mips32b from miasm2.jitter.codegen import CGen @@ -70,7 +70,7 @@ class mipsCGen(CGen): """ loc_key = self.get_block_post_label(block) - offset = self.ir_arch.symbol_pool.loc_key_to_offset(loc_key) + offset = self.ir_arch.loc_db.get_location_offset(loc_key) out = (self.CODE_RETURN_NO_EXCEPTION % (loc_key, self.C_PC, m2_expr.ExprId('branch_dst_irdst', 32), @@ -85,7 +85,7 @@ class jitter_mips32l(Jitter): C_Gen = mipsCGen def __init__(self, *args, **kwargs): - sp = asmblock.AsmSymbolPool() + sp = LocationDB() Jitter.__init__(self, ir_mips32l(sp), *args, **kwargs) self.vm.set_little_endian() @@ -145,6 +145,6 @@ class jitter_mips32l(Jitter): class jitter_mips32b(jitter_mips32l): def __init__(self, *args, **kwargs): - sp = asmblock.AsmSymbolPool() + sp = LocationDB() Jitter.__init__(self, ir_mips32b(sp), *args, **kwargs) self.vm.set_big_endian() diff --git a/miasm2/arch/mips32/sem.py b/miasm2/arch/mips32/sem.py index fd4fa655..acf7370f 100644 --- a/miasm2/arch/mips32/sem.py +++ b/miasm2/arch/mips32/sem.py @@ -469,8 +469,8 @@ def get_mnemo_expr(ir, instr, *args): class ir_mips32l(IntermediateRepresentation): - def __init__(self, symbol_pool=None): - IntermediateRepresentation.__init__(self, mn_mips32, 'l', symbol_pool) + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_mips32, 'l', loc_db) self.pc = mn_mips32.getpc() self.sp = mn_mips32.getsp() self.IRDst = m2_expr.ExprId('IRDst', 32) @@ -490,14 +490,14 @@ class ir_mips32l(IntermediateRepresentation): return instr_ir, new_extra_ir def get_next_instr(self, instr): - return self.symbol_pool.getby_offset_create(instr.offset + 4) + return self.loc_db.get_or_create_offset_location(instr.offset + 4) def get_next_break_loc_key(self, instr): - return self.symbol_pool.getby_offset_create(instr.offset + 8) + return self.loc_db.get_or_create_offset_location(instr.offset + 8) class ir_mips32b(ir_mips32l): - def __init__(self, symbol_pool=None): - IntermediateRepresentation.__init__(self, mn_mips32, 'b', symbol_pool) + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_mips32, 'b', loc_db) self.pc = mn_mips32.getpc() self.sp = mn_mips32.getsp() self.IRDst = m2_expr.ExprId('IRDst', 32) diff --git a/miasm2/arch/msp430/arch.py b/miasm2/arch/msp430/arch.py index 1842f577..ecf4cb13 100644 --- a/miasm2/arch/msp430/arch.py +++ b/miasm2/arch/msp430/arch.py @@ -59,7 +59,7 @@ sreg_p = (deref_pinc | deref_nooff | deref_off | base_expr).setParseAction(cb_ex class msp430_arg(m_arg): - def asm_ast_to_expr(self, value, symbol_pool): + def asm_ast_to_expr(self, value, loc_db): if isinstance(value, AstId): name = value.name if isinstance(name, Expr): @@ -69,17 +69,17 @@ class msp430_arg(m_arg): index = gpregs.str.index(name) reg = gpregs.expr[index] return reg - loc_key = symbol_pool.getby_name_create(value.name) + loc_key = loc_db.get_or_create_name_location(value.name) return ExprLoc(loc_key, 16) if isinstance(value, AstOp): - args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in value.args] + args = [self.asm_ast_to_expr(tmp, loc_db) for tmp in value.args] if None in args: return None return ExprOp(value.op, *args) if isinstance(value, AstInt): return ExprInt(value.value, 16) if isinstance(value, AstMem): - ptr = self.asm_ast_to_expr(value.ptr, symbol_pool) + ptr = self.asm_ast_to_expr(value.ptr, loc_db) if ptr is None: return None return ExprMem(ptr, value.size) @@ -102,14 +102,14 @@ class instruction_msp430(instruction): return self.name in ['call'] @staticmethod - def arg2str(expr, index=None, symbol_pool=None): + def arg2str(expr, index=None, loc_db=None): if isinstance(expr, ExprId): o = str(expr) elif isinstance(expr, ExprInt): o = str(expr) elif expr.is_loc(): - if symbol_pool is not None: - return symbol_pool.str_loc_key(expr.loc_key) + if loc_db is not None: + return loc_db.pretty_str(expr.loc_key) else: return str(expr) elif isinstance(expr, ExprOp) and expr.op == "autoinc": @@ -129,7 +129,7 @@ class instruction_msp430(instruction): return o - def dstflow2label(self, symbol_pool): + def dstflow2label(self, loc_db): expr = self.args[0] if not isinstance(expr, ExprInt): return @@ -138,7 +138,7 @@ class instruction_msp430(instruction): else: addr = expr.arg + int(self.offset) - loc_key = symbol_pool.getby_offset_create(addr) + loc_key = loc_db.get_or_create_offset_location(addr) self.args[0] = ExprLoc(loc_key, expr.size) def breakflow(self): @@ -165,10 +165,10 @@ class instruction_msp430(instruction): def is_subcall(self): return self.name in ['call'] - def getdstflow(self, symbol_pool): + def getdstflow(self, loc_db): return [self.args[0]] - def get_symbol_size(self, symbol, symbol_pool): + def get_symbol_size(self, symbol, loc_db): return 16 def fixDstOffset(self): @@ -289,7 +289,7 @@ class mn_msp430(cls_mn): def reset_class(self): super(mn_msp430, self).reset_class() - def getnextflow(self, symbol_pool): + def getnextflow(self, loc_db): raise NotImplementedError('not fully functional') diff --git a/miasm2/arch/msp430/ira.py b/miasm2/arch/msp430/ira.py index 0f88facc..2a850d82 100644 --- a/miasm2/arch/msp430/ira.py +++ b/miasm2/arch/msp430/ira.py @@ -6,15 +6,15 @@ from miasm2.arch.msp430.sem import ir_msp430 class ir_a_msp430_base(ir_msp430, ira): - def __init__(self, symbol_pool=None): - ir_msp430.__init__(self, symbol_pool) + def __init__(self, loc_db=None): + ir_msp430.__init__(self, loc_db) self.ret_reg = self.arch.regs.R15 class ir_a_msp430(ir_a_msp430_base): - def __init__(self, symbol_pool=None): - ir_a_msp430_base.__init__(self, symbol_pool) + def __init__(self, loc_db=None): + ir_a_msp430_base.__init__(self, loc_db) def get_out_regs(self, _): return set([self.ret_reg, self.sp]) diff --git a/miasm2/arch/msp430/jit.py b/miasm2/arch/msp430/jit.py index dcd7e91a..9fbbc639 100644 --- a/miasm2/arch/msp430/jit.py +++ b/miasm2/arch/msp430/jit.py @@ -1,5 +1,5 @@ from miasm2.jitter.jitload import Jitter -from miasm2.core import asmblock +from miasm2.core.locationdb import LocationDB from miasm2.core.utils import pck16, upck16 from miasm2.arch.msp430.sem import ir_msp430 @@ -14,7 +14,7 @@ log.setLevel(logging.CRITICAL) class jitter_msp430(Jitter): def __init__(self, *args, **kwargs): - sp = asmblock.AsmSymbolPool() + sp = LocationDB() Jitter.__init__(self, ir_msp430(sp), *args, **kwargs) self.vm.set_little_endian() diff --git a/miasm2/arch/msp430/sem.py b/miasm2/arch/msp430/sem.py index a3521fb5..191abe75 100644 --- a/miasm2/arch/msp430/sem.py +++ b/miasm2/arch/msp430/sem.py @@ -423,8 +423,8 @@ def ComposeExprAff(dst, src): class ir_msp430(IntermediateRepresentation): - def __init__(self, symbol_pool=None): - IntermediateRepresentation.__init__(self, mn_msp430, None, symbol_pool) + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_msp430, None, loc_db) self.pc = PC self.sp = SP self.IRDst = ExprId('IRDst', 16) diff --git a/miasm2/arch/ppc/arch.py b/miasm2/arch/ppc/arch.py index 5336ea21..c100cde3 100644 --- a/miasm2/arch/ppc/arch.py +++ b/miasm2/arch/ppc/arch.py @@ -34,23 +34,23 @@ deref = deref_reg | deref_reg_disp class ppc_arg(m_arg): - def asm_ast_to_expr(self, arg, symbol_pool): + def asm_ast_to_expr(self, arg, loc_db): if isinstance(arg, AstId): if isinstance(arg.name, ExprId): return arg.name if arg.name in gpregs.str: return None - loc_key = symbol_pool.getby_name_create(arg.name) + loc_key = loc_db.get_or_create_name_location(arg.name) return ExprLoc(loc_key, 32) if isinstance(arg, AstOp): - args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in arg.args] + args = [self.asm_ast_to_expr(tmp, loc_db) for tmp in arg.args] if None in args: return None return ExprOp(arg.op, *args) if isinstance(arg, AstInt): return ExprInt(arg.value, 32) if isinstance(arg, AstMem): - ptr = self.asm_ast_to_expr(arg.ptr, symbol_pool) + ptr = self.asm_ast_to_expr(arg.ptr, loc_db) if ptr is None: return None return ExprMem(ptr, arg.size) @@ -73,7 +73,7 @@ class instruction_ppc(instruction): super(instruction_ppc, self).__init__(*args, **kargs) @staticmethod - def arg2str(e, pos = None, symbol_pool=None): + def arg2str(e, pos = None, loc_db=None): if isinstance(e, ExprId) or isinstance(e, ExprInt): return str(e) elif isinstance(e, ExprMem): @@ -109,7 +109,7 @@ class instruction_ppc(instruction): name[-3:] != 'CTR' and name[-4:] != 'CTRL') - def dstflow2label(self, symbol_pool): + def dstflow2label(self, loc_db): name = self.name if name[-1] == '+' or name[-1] == '-': name = name[:-1] @@ -131,7 +131,7 @@ class instruction_ppc(instruction): ad = e.arg + self.offset else: ad = e.arg - loc_key = symbol_pool.getby_offset_create(ad) + loc_key = loc_db.get_or_create_offset_location(ad) s = ExprLoc(loc_key, e.size) self.args[address_index] = s @@ -144,7 +144,7 @@ class instruction_ppc(instruction): name = name[0:-1] return name[0] == 'B' and (name[-1] == 'L' or name[-2:-1] == 'LA') - def getdstflow(self, symbol_pool): + def getdstflow(self, loc_db): if 'LR' in self.name: return [ LR ] elif 'CTR' in self.name: @@ -163,7 +163,7 @@ class instruction_ppc(instruction): ret = ret or self.is_subcall() return ret - def get_symbol_size(self, symbol, symbol_pool): + def get_symbol_size(self, symbol, loc_db): return 32 def fixDstOffset(self): @@ -279,7 +279,7 @@ class mn_ppc(cls_mn): else: raise NotImplementedError("bad attrib") - def get_symbol_size(self, symbol, symbol_pool, mode): + def get_symbol_size(self, symbol, loc_db, mode): return 32 diff --git a/miasm2/arch/ppc/jit.py b/miasm2/arch/ppc/jit.py index e79faabd..14c203a9 100644 --- a/miasm2/arch/ppc/jit.py +++ b/miasm2/arch/ppc/jit.py @@ -1,5 +1,5 @@ from miasm2.jitter.jitload import Jitter, named_arguments -from miasm2.core import asmblock +from miasm2.core.locationdb import LocationDB from miasm2.arch.ppc.sem import ir_ppc32b import struct @@ -15,7 +15,7 @@ class jitter_ppc32b(Jitter): max_reg_arg = 8 def __init__(self, *args, **kwargs): - super(jitter_ppc32b, self).__init__(ir_ppc32b(asmblock.AsmSymbolPool()), + super(jitter_ppc32b, self).__init__(ir_ppc32b(LocationDB()), *args, **kwargs) self.vm.set_big_endian() diff --git a/miasm2/arch/ppc/sem.py b/miasm2/arch/ppc/sem.py index 8ddb43ef..678ab041 100644 --- a/miasm2/arch/ppc/sem.py +++ b/miasm2/arch/ppc/sem.py @@ -606,8 +606,8 @@ def mn_do_store(ir, instr, arg1, arg2, arg3=None): ret.append(ExprAff(arg2, address)) if is_stwcx: - loc_do = ExprLoc(ir.symbol_pool.gen_loc_key(), ir.IRDst.size) - loc_dont = ExprLoc(ir.symbol_pool.gen_loc_key(), ir.IRDst.size) + loc_do = ExprLoc(ir.loc_db.add_location(), ir.IRDst.size) + loc_dont = ExprLoc(ir.loc_db.add_location(), ir.IRDst.size) loc_next = ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) flags = [ ExprAff(CR0_LT, ExprInt(0,1)), ExprAff(CR0_GT, ExprInt(0,1)), @@ -842,8 +842,8 @@ sem_dir = { class ir_ppc32b(IntermediateRepresentation): - def __init__(self, symbol_pool=None): - super(ir_ppc32b, self).__init__(mn_ppc, 'b', symbol_pool) + def __init__(self, loc_db=None): + super(ir_ppc32b, self).__init__(mn_ppc, 'b', loc_db) self.pc = mn_ppc.getpc() self.sp = mn_ppc.getsp() self.IRDst = expr.ExprId('IRDst', 32) @@ -916,9 +916,9 @@ class ir_ppc32b(IntermediateRepresentation): return instr_ir, extra_ir def get_next_instr(self, instr): - l = self.symbol_pool.getby_offset_create(instr.offset + 4) + l = self.loc_db.get_or_create_offset_location(instr.offset + 4) return l def get_next_break_loc_key(self, instr): - l = self.symbol_pool.getby_offset_create(instr.offset + 4) + l = self.loc_db.get_or_create_offset_location(instr.offset + 4) return l diff --git a/miasm2/arch/sh4/arch.py b/miasm2/arch/sh4/arch.py index 477edeaf..d5e9820e 100644 --- a/miasm2/arch/sh4/arch.py +++ b/miasm2/arch/sh4/arch.py @@ -96,23 +96,23 @@ dgbr_reg = (DEREF + LPARENT + reg_info_gbr.parser + COMMA + gpregs.parser + RPAR class sh4_arg(m_arg): - def asm_ast_to_expr(self, arg, symbol_pool): + def asm_ast_to_expr(self, arg, loc_db): if isinstance(arg, AstId): if isinstance(arg.name, ExprId): return arg.name if arg.name in gpregs.str: return None - loc_key = symbol_pool.getby_name_create(arg.name) + loc_key = loc_db.get_or_create_name_location(arg.name) return ExprLoc(loc_key, 32) if isinstance(arg, AstOp): - args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in arg.args] + args = [self.asm_ast_to_expr(tmp, loc_db) for tmp in arg.args] if None in args: return None return ExprOp(arg.op, *args) if isinstance(arg, AstInt): return ExprInt(arg.value, 32) if isinstance(arg, AstMem): - ptr = self.asm_ast_to_expr(arg.ptr, symbol_pool) + ptr = self.asm_ast_to_expr(arg.ptr, loc_db) if ptr is None: return None return ExprMem(ptr, arg.size) @@ -165,8 +165,8 @@ class sh4_freg(sh4_reg): class sh4_dgpreg(sh4_arg): parser = dgpregs_base - def fromstring(self, text, symbol_pool, parser_result=None): - start, stop = super(sh4_dgpreg, self).fromstring(text, symbol_pool, parser_result) + def fromstring(self, text, loc_db, parser_result=None): + start, stop = super(sh4_dgpreg, self).fromstring(text, loc_db, parser_result) if start is None or self.expr == [None]: return start, stop self.expr = ExprMem(self.expr.arg, self.sz) @@ -191,8 +191,8 @@ class sh4_dgpreg(sh4_arg): class sh4_dgpregpinc(sh4_arg): parser = dgpregs_p - def fromstring(self, text, symbol_pool, parser_result=None): - start, stop = super(sh4_dgpregpinc, self).fromstring(text, symbol_pool, parser_result) + def fromstring(self, text, loc_db, parser_result=None): + start, stop = super(sh4_dgpregpinc, self).fromstring(text, loc_db, parser_result) if self.expr == [None]: return None, None if not isinstance(self.expr.arg, ExprOp): @@ -406,12 +406,12 @@ class instruction_sh4(instruction): return self.name.startswith('J') @staticmethod - def arg2str(expr, index=None, symbol_pool=None): + def arg2str(expr, index=None, loc_db=None): if isinstance(expr, ExprId) or isinstance(expr, ExprInt): return str(expr) elif expr.is_loc(): - if symbol_pool is not None: - return symbol_pool.str_loc_key(expr.loc_key) + if loc_db is not None: + return loc_db.pretty_str(expr.loc_key) else: return str(expr) assert(isinstance(expr, ExprMem)) @@ -435,7 +435,7 @@ class instruction_sh4(instruction): """ - def dstflow2label(self, symbol_pool): + def dstflow2label(self, loc_db): e = self.args[0] if not isinstance(e, ExprInt): return @@ -443,7 +443,7 @@ class instruction_sh4(instruction): ad = e.arg+8+self.offset else: ad = e.arg+8+self.offset - l = symbol_pool.getby_offset_create(ad) + l = loc_db.get_or_create_offset_location(ad) s = ExprId(l, e.size) self.args[0] = s """ @@ -456,13 +456,13 @@ class instruction_sh4(instruction): def is_subcall(self): return self.name == 'JSR' - def getdstflow(self, symbol_pool): + def getdstflow(self, loc_db): return [self.args[0]] def splitflow(self): return self.name == 'JSR' - def get_symbol_size(self, symbol, symbol_pool): + def get_symbol_size(self, symbol, loc_db): return 32 def fixDstOffset(self): @@ -823,10 +823,10 @@ addop("bf", [bs('10001011'), s08imm]) return True def dstflow(self): return True - def dstflow2label(self, symbol_pool): + def dstflow2label(self, loc_db): e = self.args[0].expr ad = e.arg*2+4+self.offset - l = symbol_pool.getby_offset_create(ad) + l = loc_db.get_or_create_offset_location(ad) s = ExprId(l, e.size) self.args[0].expr = s """ @@ -846,10 +846,10 @@ addop("bra", [bs('1010'), s12imm]) return True def dstflow(self): return True - def dstflow2label(self, symbol_pool): + def dstflow2label(self, loc_db): e = self.args[0].expr ad = e.arg*2+4+self.offset - l = symbol_pool.getby_offset_create(ad) + l = loc_db.get_or_create_offset_location(ad) s = ExprId(l, e.size) self.args[0].expr = s """ diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py index 2be64c0e..815eaee6 100644 --- a/miasm2/arch/x86/arch.py +++ b/miasm2/arch/x86/arch.py @@ -254,7 +254,7 @@ cl_or_imm |= base_expr class x86_arg(m_arg): - def asm_ast_to_expr(self, value, symbol_pool, size_hint=None, fixed_size=None): + def asm_ast_to_expr(self, value, loc_db, size_hint=None, fixed_size=None): if size_hint is None: size_hint = self.parent.v_opmode() if fixed_size is None: @@ -272,22 +272,22 @@ class x86_arg(m_arg): if value.name in ["FAR"]: return None - loc_key = symbol_pool.getby_name_create(value.name) + loc_key = loc_db.get_or_create_name_location(value.name) return ExprLoc(loc_key, size_hint) if isinstance(value, AstOp): # First pass to retreive fixed_size if value.op == "segm": - segm = self.asm_ast_to_expr(value.args[0], symbol_pool) - ptr = self.asm_ast_to_expr(value.args[1], symbol_pool, None, fixed_size) + segm = self.asm_ast_to_expr(value.args[0], loc_db) + ptr = self.asm_ast_to_expr(value.args[1], loc_db, None, fixed_size) return ExprOp('segm', segm, ptr) - args = [self.asm_ast_to_expr(arg, symbol_pool, None, fixed_size) for arg in value.args] + args = [self.asm_ast_to_expr(arg, loc_db, None, fixed_size) for arg in value.args] if len(fixed_size) == 0: # No fixed size pass elif len(fixed_size) == 1: # One fixed size, regen all size = list(fixed_size)[0] - args = [self.asm_ast_to_expr(arg, symbol_pool, size, fixed_size) for arg in value.args] + args = [self.asm_ast_to_expr(arg, loc_db, size, fixed_size) for arg in value.args] else: raise ValueError("Size conflict") if None in args: @@ -299,7 +299,7 @@ class x86_arg(m_arg): return ExprInt(value.value, size_hint) if isinstance(value, AstMem): fixed_size.add(value.size) - ptr = self.asm_ast_to_expr(value.ptr, symbol_pool, None, set()) + ptr = self.asm_ast_to_expr(value.ptr, loc_db, None, set()) if ptr is None: return None return ExprMem(ptr, value.size) @@ -469,14 +469,14 @@ class instruction_x86(instruction): return True return self.name in ['CALL'] - def dstflow2label(self, symbol_pool): + def dstflow2label(self, loc_db): if self.additional_info.g1.value & 6 and self.name in repeat_mn: return expr = self.args[0] if not expr.is_int(): return addr = expr.arg + int(self.offset) - loc_key = symbol_pool.getby_offset_create(addr) + loc_key = loc_db.get_or_create_offset_location(addr) self.args[0] = ExprLoc(loc_key, expr.size) def breakflow(self): @@ -511,14 +511,14 @@ class instruction_x86(instruction): def is_subcall(self): return self.name in ['CALL'] - def getdstflow(self, symbol_pool): + def getdstflow(self, loc_db): if self.additional_info.g1.value & 6 and self.name in repeat_mn: addr = int(self.offset) - loc_key = symbol_pool.getby_offset_create(addr) + loc_key = loc_db.get_or_create_offset_location(addr) return [ExprLoc(loc_key, self.v_opmode())] return [self.args[0]] - def get_symbol_size(self, symbol, symbol_pool): + def get_symbol_size(self, symbol, loc_db): return self.mode def fixDstOffset(self): @@ -559,12 +559,12 @@ class instruction_x86(instruction): return args @staticmethod - def arg2str(expr, index=None, symbol_pool=None): + def arg2str(expr, index=None, loc_db=None): if expr.is_id() or expr.is_int(): o = str(expr) elif expr.is_loc(): - if symbol_pool is not None: - o = symbol_pool.str_loc_key(expr.loc_key) + if loc_db is not None: + o = loc_db.pretty_str(expr.loc_key) else: o = str(expr) elif ((isinstance(expr, ExprOp) and expr.op == 'far' and @@ -668,7 +668,7 @@ class mn_x86(cls_mn): return [(subcls, name, bases, dct, fields)] @classmethod - def fromstring(cls, text, symbol_pool, mode): + def fromstring(cls, text, loc_db, mode): pref = 0 prefix, new_s = get_prefix(text) if prefix == "LOCK": @@ -680,7 +680,7 @@ class mn_x86(cls_mn): elif prefix == "REPE": pref |= 4 text = new_s - c = super(mn_x86, cls).fromstring(text, symbol_pool, mode) + c = super(mn_x86, cls).fromstring(text, loc_db, mode) c.additional_info.g1.value = pref return c @@ -877,7 +877,7 @@ class mn_x86(cls_mn): return None return prefix + v - def getnextflow(self, symbol_pool): + def getnextflow(self, loc_db): raise NotImplementedError('not fully functional') def ir_pre_instruction(self): @@ -1920,8 +1920,8 @@ def modrm2expr(modrm, parent, w8, sx=0, xmm=0, mm=0, bnd=0): class x86_rm_arg(x86_arg): parser = rmarg - def fromstring(self, text, symbol_pool, parser_result=None): - start, stop = super(x86_rm_arg, self).fromstring(text, symbol_pool, parser_result) + def fromstring(self, text, loc_db, parser_result=None): + start, stop = super(x86_rm_arg, self).fromstring(text, loc_db, parser_result) p = self.parent if start is None: return None, None @@ -2056,9 +2056,9 @@ class x86_rm_arg(x86_arg): yield x class x86_rm_mem(x86_rm_arg): - def fromstring(self, text, symbol_pool, parser_result=None): + def fromstring(self, text, loc_db, parser_result=None): self.expr = None - start, stop = super(x86_rm_mem, self).fromstring(text, symbol_pool, parser_result) + start, stop = super(x86_rm_mem, self).fromstring(text, loc_db, parser_result) if not isinstance(self.expr, ExprMem): return None, None return start, stop @@ -2066,9 +2066,9 @@ class x86_rm_mem(x86_rm_arg): class x86_rm_mem_far(x86_rm_arg): parser = mem_far - def fromstring(self, text, symbol_pool, parser_result=None): + def fromstring(self, text, loc_db, parser_result=None): self.expr = None - start, stop = super(x86_rm_mem_far, self).fromstring(text, symbol_pool, parser_result) + start, stop = super(x86_rm_mem_far, self).fromstring(text, loc_db, parser_result) if not isinstance(self.expr, ExprMem): return None, None self.expr = ExprOp('far', self.expr) @@ -2438,7 +2438,7 @@ class x86_rm_reg_noarg(object): parser = gpreg - def fromstring(self, text, symbol_pool, parser_result=None): + def fromstring(self, text, loc_db, parser_result=None): if not hasattr(self.parent, 'sx') and hasattr(self.parent, "w8"): self.parent.w8.value = 1 if parser_result: @@ -2455,7 +2455,7 @@ class x86_rm_reg_noarg(object): result, start, stop = self.parser.scanString(text).next() except StopIteration: return None, None - expr = self.asm_ast_to_expr(result[0], symbol_pool) + expr = self.asm_ast_to_expr(result[0], loc_db) if expr is None: return None, None @@ -2742,7 +2742,7 @@ class bs_cond_imm(bs_cond_scale, x86_arg): parser = base_expr max_size = 32 - def fromstring(self, text, symbol_pool, parser_result=None): + def fromstring(self, text, loc_db, parser_result=None): if parser_result: expr, start, stop = parser_result[self.parser] else: @@ -2869,7 +2869,7 @@ class bs_cond_imm64(bs_cond_imm): class bs_rel_off(bs_cond_imm): parser = base_expr - def fromstring(self, text, symbol_pool, parser_result=None): + def fromstring(self, text, loc_db, parser_result=None): if parser_result: expr, start, stop = parser_result[self.parser] else: @@ -3011,7 +3011,7 @@ class bs_moff(bsi): class bs_movoff(x86_arg): parser = deref_mem - def fromstring(self, text, symbol_pool, parser_result=None): + def fromstring(self, text, loc_db, parser_result=None): if parser_result: e, start, stop = parser_result[self.parser] if e is None: @@ -3078,7 +3078,7 @@ class bs_movoff(x86_arg): class bs_msegoff(x86_arg): parser = deref_ptr - def fromstring(self, text, symbol_pool, parser_result=None): + def fromstring(self, text, loc_db, parser_result=None): if parser_result: e, start, stop = parser_result[self.parser] if e is None: diff --git a/miasm2/arch/x86/ira.py b/miasm2/arch/x86/ira.py index d0bebfb6..be10213e 100644 --- a/miasm2/arch/x86/ira.py +++ b/miasm2/arch/x86/ira.py @@ -8,8 +8,8 @@ from miasm2.arch.x86.sem import ir_x86_16, ir_x86_32, ir_x86_64 class ir_a_x86_16(ir_x86_16, ira): - def __init__(self, symbol_pool=None): - ir_x86_16.__init__(self, symbol_pool) + def __init__(self, loc_db=None): + ir_x86_16.__init__(self, loc_db) self.ret_reg = self.arch.regs.AX def get_out_regs(self, _): @@ -17,8 +17,8 @@ class ir_a_x86_16(ir_x86_16, ira): class ir_a_x86_32(ir_x86_32, ir_a_x86_16): - def __init__(self, symbol_pool=None): - ir_x86_32.__init__(self, symbol_pool) + def __init__(self, loc_db=None): + ir_x86_32.__init__(self, loc_db) self.ret_reg = self.arch.regs.EAX def sizeof_char(self): @@ -39,8 +39,8 @@ class ir_a_x86_32(ir_x86_32, ir_a_x86_16): class ir_a_x86_64(ir_x86_64, ir_a_x86_16): - def __init__(self, symbol_pool=None): - ir_x86_64.__init__(self, symbol_pool) + def __init__(self, loc_db=None): + ir_x86_64.__init__(self, loc_db) self.ret_reg = self.arch.regs.RAX def call_effects(self, ad, instr): diff --git a/miasm2/arch/x86/jit.py b/miasm2/arch/x86/jit.py index 5485ed85..bf74051d 100644 --- a/miasm2/arch/x86/jit.py +++ b/miasm2/arch/x86/jit.py @@ -1,10 +1,10 @@ import logging from miasm2.jitter.jitload import Jitter, named_arguments -from miasm2.core import asmblock from miasm2.core.utils import pck16, pck32, pck64, upck16, upck32, upck64 from miasm2.arch.x86.sem import ir_x86_16, ir_x86_32, ir_x86_64 from miasm2.jitter.codegen import CGen +from miasm2.core.locationdb import LocationDB from miasm2.ir.translators.C import TranslatorC log = logging.getLogger('jit_x86') @@ -18,7 +18,7 @@ class x86_32_CGen(CGen): def __init__(self, ir_arch): self.ir_arch = ir_arch self.PC = self.ir_arch.arch.regs.RIP - self.translator = TranslatorC(self.ir_arch.symbol_pool) + self.translator = TranslatorC(self.ir_arch.loc_db) self.init_arch_C() def gen_post_code(self, attrib): @@ -39,7 +39,7 @@ class jitter_x86_16(Jitter): C_Gen = x86_32_CGen def __init__(self, *args, **kwargs): - sp = asmblock.AsmSymbolPool() + sp = LocationDB() Jitter.__init__(self, ir_x86_16(sp), *args, **kwargs) self.vm.set_little_endian() self.ir_arch.do_stk_segm = False @@ -71,7 +71,7 @@ class jitter_x86_32(Jitter): C_Gen = x86_32_CGen def __init__(self, *args, **kwargs): - sp = asmblock.AsmSymbolPool() + sp = LocationDB() Jitter.__init__(self, ir_x86_32(sp), *args, **kwargs) self.vm.set_little_endian() self.ir_arch.do_stk_segm = False @@ -187,7 +187,7 @@ class jitter_x86_64(Jitter): args_regs_stdcall = ['RCX', 'RDX', 'R8', 'R9'] def __init__(self, *args, **kwargs): - sp = asmblock.AsmSymbolPool() + sp = LocationDB() Jitter.__init__(self, ir_x86_64(sp), *args, **kwargs) self.vm.set_little_endian() self.ir_arch.do_stk_segm = False diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index 5989a0b4..f3ca3a62 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -5066,8 +5066,8 @@ mnemo_func = {'mov': mov, class ir_x86_16(IntermediateRepresentation): - def __init__(self, symbol_pool=None): - IntermediateRepresentation.__init__(self, mn_x86, 16, symbol_pool) + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_x86, 16, loc_db) self.do_stk_segm = False self.do_ds_segm = False self.do_str_segm = False @@ -5214,8 +5214,8 @@ class ir_x86_16(IntermediateRepresentation): class ir_x86_32(ir_x86_16): - def __init__(self, symbol_pool=None): - IntermediateRepresentation.__init__(self, mn_x86, 32, symbol_pool) + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_x86, 32, loc_db) self.do_stk_segm = False self.do_ds_segm = False self.do_str_segm = False @@ -5228,8 +5228,8 @@ class ir_x86_32(ir_x86_16): class ir_x86_64(ir_x86_16): - def __init__(self, symbol_pool=None): - IntermediateRepresentation.__init__(self, mn_x86, 64, symbol_pool) + def __init__(self, loc_db=None): + IntermediateRepresentation.__init__(self, mn_x86, 64, loc_db) self.do_stk_segm = False self.do_ds_segm = False self.do_str_segm = False diff --git a/miasm2/core/asmblock.py b/miasm2/core/asmblock.py index 08ff25e9..c8af4056 100644 --- a/miasm2/core/asmblock.py +++ b/miasm2/core/asmblock.py @@ -12,6 +12,7 @@ from miasm2.expression.modint import moduint, modint from miasm2.core.utils import Disasm_Exception, pck from miasm2.core.graph import DiGraph, DiGraphSimplifier, MatchGraphJoker from miasm2.core.interval import interval +from miasm2.core.locationdb import LocationDB log_asmblock = logging.getLogger("asmblock") @@ -35,7 +36,7 @@ class AsmRaw(object): def __str__(self): return repr(self.raw) - def to_string(self, symbol_pool): + def to_string(self, loc_db): return str(self) @@ -67,13 +68,13 @@ class AsmConstraint(object): label = property(get_label, set_label) - def to_string(self, symbol_pool=None): - if symbol_pool is None: + def to_string(self, loc_db=None): + if loc_db is None: return "%s:%s" % (self.c_t, self.loc_key) else: return "%s:%s" % ( self.c_t, - symbol_pool.str_loc_key(self.loc_key) + loc_db.pretty_str(self.loc_key) ) def __str__(self): @@ -136,22 +137,22 @@ class AsmBlock(object): label = property(get_label) - def to_string(self, symbol_pool=None): + def to_string(self, loc_db=None): out = [] - if symbol_pool is None: + if loc_db is None: out.append(str(self.loc_key)) else: - out.append(symbol_pool.str_loc_key(self.loc_key)) + out.append(loc_db.pretty_str(self.loc_key)) for instr in self.lines: - out.append(instr.to_string(symbol_pool)) + out.append(instr.to_string(loc_db)) if self.bto: lbls = ["->"] for dst in self.bto: if dst is None: lbls.append("Unknown? ") else: - lbls.append(dst.to_string(symbol_pool) + " ") + lbls.append(dst.to_string(loc_db) + " ") lbls = '\t'.join(lbls) out.append(lbls) return '\n'.join(out) @@ -166,12 +167,12 @@ class AsmBlock(object): assert isinstance(self.bto, set) self.bto.add(c) - def split(self, symbol_pool, offset): - loc_key = symbol_pool.getby_offset_create(offset) + def split(self, loc_db, offset): + loc_key = loc_db.get_or_create_offset_location(offset) log_asmblock.debug('split at %x', offset) i = -1 offsets = [x.offset for x in self.lines] - offset = symbol_pool.loc_key_to_offset(loc_key) + offset = loc_db.get_location_offset(loc_key) if offset not in offsets: log_asmblock.warning( 'cannot split bloc at %X ' % offset + @@ -344,235 +345,17 @@ class asm_block_bad(AsmBlockBad): warnings.warn('DEPRECATION WARNING: use "AsmBlockBad" instead of "asm_block_bad"') super(asm_block_bad, self).__init__(loc_key, alignment, *args, **kwargs) +class AsmSymbolPool(LocationDB): + """[DEPRECATED API] use 'LocationDB' instead""" -class AsmSymbolPool(object): - """ - Store symbols. - - A symbol links a name to an (optional) offset - - Rules and limitations: - - two different symbols cannot have the same offset - - two different symbols cannot have the same name - - symbols manipulation (comparison, creation ...) can only be done on - symbols generated by the same symbol pool - """ - - def __init__(self): - self._loc_keys = set() - - self._loc_key_to_offset = {} - self._loc_key_to_name = {} - - self._name_to_loc_key = {} - self._offset_to_loc_key = {} - - self._loc_key_num = 0 - - def loc_key_to_offset(self, loc_key): - """ - Return offset of @loc_key, None otherwise. - @loc_key: LocKey instance - """ - assert isinstance(loc_key, LocKey) - return self._loc_key_to_offset.get(loc_key) - - def loc_key_to_name(self, loc_key): - """ - Return name of @loc_key. - @loc_key: LocKey instance - """ - assert isinstance(loc_key, LocKey) - return self._loc_key_to_name[loc_key] - - def add_location(self, name, offset=None): - """ - Create and add a location to the symbol_pool - @name: loc_key's name (never empty). If offset is None and name is int, - generate loc_key with generic name and name as offset - @offset: (optional) loc_key's offset - """ - - if is_int(name): - assert offset is None or offset == name - offset = name - name = "loc_%.16X" % (int(name) & 0xFFFFFFFFFFFFFFFF) - if offset is not None: - offset = int(offset) - - assert name - - # Test for collisions - known_loc_key = self.getby_name(name) - if known_loc_key is not None: - known_offset = self.loc_key_to_offset(known_loc_key) - if known_offset != offset: - raise ValueError( - 'symbol %s with different offset %s %s' % ( - name, offset, known_offset - ) - ) - return known_loc_key - - elif self.getby_offset(offset) is not None: - raise ValueError( - 'offset %s with different names %s' % ( - offset, - name - ) - ) - - loc_key = LocKey(self._loc_key_num) - self._loc_key_num += 1 - - self._loc_keys.add(loc_key) - - if offset is not None: - assert offset not in self._offset_to_loc_key - self._offset_to_loc_key[offset] = loc_key - self._loc_key_to_offset[loc_key] = offset - - self._name_to_loc_key[name] = loc_key - self._loc_key_to_name[loc_key] = name - return loc_key - - def remove_loc_key(self, loc_key): - """ - Delete a @loc_key - """ - name = self._loc_key_to_name.pop(loc_key, None) - self._name_to_loc_key.pop(name, None) - - offset = self._loc_key_to_offset.pop(loc_key, None) - self._offset_to_loc_key.pop(offset, None) - - self._loc_keys.remove(loc_key) - - def del_loc_key_offset(self, loc_key): - """Unpin the @loc_key from its offset""" - offset = self._loc_keys_to_offset.pop(loc_key) - self._offset_to_loc_key.pop(offset, None) - - def getby_offset(self, offset): - """ - Retrieve loc_key using its @offset, None otherwise. - @offset: int - """ - return self._offset_to_loc_key.get(offset) - - def getby_name(self, name): - """ - Retrieve loc_key using its @name, None otherwise. - @name: str - """ - return self._name_to_loc_key.get(name) - - def getby_name_create(self, name): - """Get a loc_key from its @name, create it if it doesn't exist""" - loc_key = self.getby_name(name) - if loc_key is None: - loc_key = self.add_location(name) - return loc_key - - def getby_offset_create(self, offset): - """Get a loc_key from its @offset, create it if it doesn't exist""" - loc_key = self.getby_offset(offset) - if loc_key is None: - loc_key = self.add_location(offset) - return loc_key - - def rename_location(self, loc_key, newname): - """Rename the @loc_key name to @newname""" - if newname in self._name_to_loc_key: - raise ValueError('Symbol already known') - name = self._loc_key_to_name[loc_key] - assert name is not None - self._name_to_loc_key.pop(name) - self._loc_key_to_name[loc_key] = newname - - def set_offset(self, loc_key, offset): - """Pin the @loc_key to an @offset - Note that there is a special case when the offset is a list - it happens when offsets are recomputed in resolve_symbol* - """ - assert isinstance(loc_key, LocKey) - assert offset not in self._offset_to_loc_key - if loc_key not in self._loc_keys: - raise ValueError('Foreign loc_key %s' % loc_key) - - old_offset = self._loc_key_to_offset.pop(loc_key, None) - self._offset_to_loc_key.pop(old_offset, None) - - self._loc_key_to_offset[loc_key] = offset - self._offset_to_loc_key[offset] = loc_key - - @property - def loc_keys(self): - """Return all loc_keys""" - return self._loc_keys - - @property - def items(self): - """Return all loc_keys""" - warnings.warn('DEPRECATION WARNING: use "loc_keys" instead of "items"') - return list(self._loc_keys) - - def __str__(self): - return "".join("%s\n" % loc_key for loc_key in self._loc_keys) - - def __getitem__(self, item): - warnings.warn('DEPRECATION WARNING: use "getby_name" or "getby_offset"') - if item in self._name_to_loc_key: - return self._name_to_loc_key[item] - if item in self._offset_to_loc_key: - return self._offset_to_loc_key[item] - raise KeyError('unknown symbol %r' % item) - - def __contains__(self, item): - warnings.warn('DEPRECATION WARNING: use "getby_name" or "getby_offset"') - return item in self._name_to_loc_key or item in self._offset_to_loc_key - - def merge(self, symbol_pool): - """Merge with another @symbol_pool""" - self._loc_keys.update(symbol_pool.loc_keys) - self._name_to_loc_key.update(symbol_pool._name_to_loc_key) - self._offset_to_loc_key.update(symbol_pool._offset_to_loc_key) - - def canonize_to_exprloc(self, expr): - """ - If expr is ExprInt, return ExprLoc with corresponding loc_key - Else, return expr - - @expr: Expr instance - """ - if expr.is_int(): - loc_key = self.getby_offset_create(int(expr)) - ret = ExprLoc(loc_key, expr.size) - return ret - return expr - - def gen_loc_key(self): - """Generate a new unpinned loc_key""" - loc_key = self.add_location("lbl_gen_%.8X" % (self._loc_key_num)) - return loc_key - - def str_loc_key(self, loc_key): - name = self.loc_key_to_name(loc_key) - offset = self.loc_key_to_offset(loc_key) - if name is None: - name = str(loc_key) - if offset is not None: - offset = hex(offset) - out = name - if offset is not None: - out = "%s:%s" % (out, offset) - return out - + def __init__(self, *args, **kwargs): + warnings.warn("Deprecated API, use 'LocationDB' instead") + super(AsmSymbolPool, self).__init__(*args, **kwargs) class asm_symbol_pool(AsmSymbolPool): def __init__(self): - warnings.warn('DEPRECATION WARNING: use "AsmSymbolPool" instead of "asm_symbol_pool"') + warnings.warn('DEPRECATION WARNING: use "LocationDB" instead of "asm_symbol_pool"') super(asm_symbol_pool, self).__init__() @@ -593,7 +376,7 @@ class AsmCFG(DiGraph): AsmCFGPending = namedtuple("AsmCFGPending", ["waiter", "constraint"]) - def __init__(self, symbol_pool=None, *args, **kwargs): + def __init__(self, loc_db=None, *args, **kwargs): super(AsmCFG, self).__init__(*args, **kwargs) # Edges -> constraint self.edges2constraint = {} @@ -601,13 +384,13 @@ class AsmCFG(DiGraph): self._pendings = {} # Loc_Key2block built on the fly self._loc_key_to_block = {} - # symbol_pool - self.symbol_pool = symbol_pool + # loc_db + self.loc_db = loc_db def copy(self): """Copy the current graph instance""" - graph = self.__class__(self.symbol_pool) + graph = self.__class__(self.loc_db) return graph + self @@ -754,10 +537,10 @@ class AsmCFG(DiGraph): def node2lines(self, node): - if self.symbol_pool is None: + if self.loc_db is None: loc_key_name = str(node) else: - loc_key_name = self.symbol_pool.str_loc_key(node) + loc_key_name = self.loc_db.pretty_str(node) yield self.DotCellDescription(text=loc_key_name, attr={'align': 'center', 'colspan': 2, @@ -778,9 +561,9 @@ class AsmCFG(DiGraph): if self._dot_offset: yield [self.DotCellDescription(text="%.8X" % line.offset, attr={}), - self.DotCellDescription(text=line.to_string(self.symbol_pool), attr={})] + self.DotCellDescription(text=line.to_string(self.loc_db), attr={})] else: - yield self.DotCellDescription(text=line.to_string(self.symbol_pool), attr={}) + yield self.DotCellDescription(text=line.to_string(self.loc_db), attr={}) def node_attr(self, node): block = self._loc_key_to_block.get(node, None) @@ -979,13 +762,13 @@ class AsmCFG(DiGraph): block.max_size = size log_asmblock.info("size: %d max: %d", block.size, block.max_size) - def apply_splitting(self, symbol_pool, dis_block_callback=None, **kwargs): + def apply_splitting(self, loc_db, dis_block_callback=None, **kwargs): """Consider @self' bto destinations and split block in @self if one of these destinations jumps in the middle of this block. In order to work, they must be only one block in @self per loc_key in - @symbol_pool (which is true if @self come from the same disasmEngine). + @loc_db (which is true if @self come from the same disasmEngine). - @symbol_pool: AsmSymbolPool instance associated with @self'loc_keys + @loc_db: LocationDB instance associated with @self'loc_keys @dis_block_callback: (optional) if set, this callback will be called on new block destinations @kwargs: (optional) named arguments to pass to dis_block_callback @@ -994,7 +777,7 @@ class AsmCFG(DiGraph): # offset block_dst = [] for loc_key in self.pendings: - offset = symbol_pool.loc_key_to_offset(loc_key) + offset = loc_db.get_location_offset(loc_key) if offset is not None: block_dst.append(offset) @@ -1010,8 +793,9 @@ class AsmCFG(DiGraph): if not (off > range_start and off < range_stop): continue - # `cur_block` must be splitted at offset `off` - new_b = cur_block.split(symbol_pool, off) + # `cur_block` must be splitted at offset `off`from miasm2.core.locationdb import LocationDB + + new_b = cur_block.split(loc_db, off) log_asmblock.debug("Split block %x", off) if new_b is None: log_asmblock.error("Cannot split %x!!", off) @@ -1028,12 +812,12 @@ class AsmCFG(DiGraph): # The new block destinations may need to be disassembled if dis_block_callback: offsets_to_dis = set( - self.symbol_pool.loc_key_to_offset(constraint.loc_key) + self.loc_db.get_location_offset(constraint.loc_key) for constraint in new_b.bto ) dis_block_callback(cur_bloc=new_b, offsets_to_dis=offsets_to_dis, - symbol_pool=symbol_pool, **kwargs) + loc_db=loc_db, **kwargs) # Update structure rebuild_needed = True @@ -1135,8 +919,8 @@ def fix_expr_val(expr, symbols): # Example: # toto: # .dword label - loc_key = symbols.getby_name(e.name) - offset = symbols.loc_key_to_offset(loc_key) + loc_key = symbols.get_name_location(e.name) + offset = symbols.get_location_offset(loc_key) e = ExprInt(offset, e.size) return e result = expr.visit(expr_calc) @@ -1146,16 +930,16 @@ def fix_expr_val(expr, symbols): return result -def fix_loc_offset(symbol_pool, loc_key, offset, modified): +def fix_loc_offset(loc_db, loc_key, offset, modified): """ Fix the @loc_key offset to @offset. If the @offset has changed, add @loc_key to @modified - @symbol_pool: current symbol_pool + @loc_db: current loc_db """ - loc_offset = symbol_pool.loc_key_to_offset(loc_key) + loc_offset = loc_db.get_location_offset(loc_key) if loc_offset == offset: return - symbol_pool.set_offset(loc_key, offset) + loc_db.set_location_offset(loc_key, offset, force=True) modified.add(loc_key) @@ -1163,8 +947,8 @@ class BlockChain(object): """Manage blocks linked with an asm_constraint_next""" - def __init__(self, symbol_pool, blocks): - self.symbol_pool = symbol_pool + def __init__(self, loc_db, blocks): + self.loc_db = loc_db self.blocks = blocks self.place() @@ -1177,7 +961,7 @@ class BlockChain(object): self.pinned_block_idx = None for i, block in enumerate(self.blocks): loc_key = block.loc_key - if self.symbol_pool.loc_key_to_offset(loc_key) is not None: + if self.loc_db.get_location_offset(loc_key) is not None: if self.pinned_block_idx is not None: raise ValueError("Multiples pinned block detected") self.pinned_block_idx = i @@ -1196,7 +980,7 @@ class BlockChain(object): return loc = self.blocks[self.pinned_block_idx].loc_key - offset_base = self.symbol_pool.loc_key_to_offset(loc) + offset_base = self.loc_db.get_location_offset(loc) assert(offset_base % self.blocks[self.pinned_block_idx].alignment == 0) self.offset_min = offset_base @@ -1225,25 +1009,25 @@ class BlockChain(object): # Propagate offset to blocks before pinned block pinned_block = self.blocks[self.pinned_block_idx] - offset = self.symbol_pool.loc_key_to_offset(pinned_block.loc_key) + offset = self.loc_db.get_location_offset(pinned_block.loc_key) if offset % pinned_block.alignment != 0: raise RuntimeError('Bad alignment') for block in self.blocks[:self.pinned_block_idx - 1:-1]: new_offset = offset - block.size new_offset = new_offset - new_offset % pinned_block.alignment - fix_loc_offset(self.symbol_pool, + fix_loc_offset(self.loc_db, block.loc_key, new_offset, modified_loc_keys) # Propagate offset to blocks after pinned block - offset = self.symbol_pool.loc_key_to_offset(pinned_block.loc_key) + pinned_block.size + offset = self.loc_db.get_location_offset(pinned_block.loc_key) + pinned_block.size last_block = pinned_block for block in self.blocks[self.pinned_block_idx + 1:]: offset += (- offset) % last_block.alignment - fix_loc_offset(self.symbol_pool, + fix_loc_offset(self.loc_db, block.loc_key, offset, modified_loc_keys) @@ -1256,8 +1040,8 @@ class BlockChainWedge(object): """Stand for wedges between blocks""" - def __init__(self, symbol_pool, offset, size): - self.symbol_pool = symbol_pool + def __init__(self, loc_db, offset, size): + self.loc_db = loc_db self.offset = offset self.max_size = size self.offset_min = offset @@ -1266,12 +1050,12 @@ class BlockChainWedge(object): def merge(self, chain): """Best effort merge two block chains Return the list of resulting blockchains""" - self.symbol_pool.set_offset(chain.blocks[0].loc_key, self.offset_max) + self.loc_db.set_location_offset(chain.blocks[0].loc_key, self.offset_max) chain.place() return [self, chain] -def group_constrained_blocks(symbol_pool, asmcfg): +def group_constrained_blocks(loc_db, asmcfg): """ Return the BlockChains list built from grouped blocks in asmcfg linked by asm_constraint_next @@ -1310,7 +1094,7 @@ def group_constrained_blocks(symbol_pool, asmcfg): out_block_chains = [] for loc_key in known_block_chains: - chain = BlockChain(symbol_pool, known_block_chains[loc_key]) + chain = BlockChain(loc_db, known_block_chains[loc_key]) out_block_chains.append(chain) return out_block_chains @@ -1330,7 +1114,7 @@ def get_blockchains_address_interval(blockChains, dst_interval): return allocated_interval -def resolve_symbol(blockChains, symbol_pool, dst_interval=None): +def resolve_symbol(blockChains, loc_db, dst_interval=None): """Place @blockChains in the @dst_interval""" log_asmblock.info('resolve_symbol') @@ -1348,7 +1132,7 @@ def resolve_symbol(blockChains, symbol_pool, dst_interval=None): # Add wedge in forbidden intervals for start, stop in forbidden_interval.intervals: wedge = BlockChainWedge( - symbol_pool, offset=start, size=stop + 1 - start) + loc_db, offset=start, size=stop + 1 - start) pinned_chains.append(wedge) # Try to place bigger blockChains first @@ -1391,8 +1175,8 @@ def get_block_loc_keys(block): return symbols -def assemble_block(mnemo, block, symbol_pool, conservative=False): - """Assemble a @block using @symbol_pool +def assemble_block(mnemo, block, loc_db, conservative=False): + """Assemble a @block using @loc_db @conservative: (optional) use original bytes when possible """ offset_i = 0 @@ -1403,7 +1187,7 @@ def assemble_block(mnemo, block, symbol_pool, conservative=False): # Fix special AsmRaw data = "" for expr in instr.raw: - expr_int = fix_expr_val(expr, symbol_pool) + expr_int = fix_expr_val(expr, loc_db) data += pck[expr_int.size](expr_int.arg) instr.data = data @@ -1413,16 +1197,16 @@ def assemble_block(mnemo, block, symbol_pool, conservative=False): # Assemble an instruction saved_args = list(instr.args) - instr.offset = symbol_pool.loc_key_to_offset(block.loc_key) + offset_i + instr.offset = loc_db.get_location_offset(block.loc_key) + offset_i # Replace instruction's arguments by resolved ones - instr.args = instr.resolve_args_with_symbols(symbol_pool) + instr.args = instr.resolve_args_with_symbols(loc_db) if instr.dstflow(): instr.fixDstOffset() old_l = instr.l - cached_candidate, _ = conservative_asm(mnemo, instr, symbol_pool, + cached_candidate, _ = conservative_asm(mnemo, instr, loc_db, conservative) # Restore original arguments @@ -1436,8 +1220,8 @@ def assemble_block(mnemo, block, symbol_pool, conservative=False): offset_i += instr.l -def asmblock_final(mnemo, asmcfg, blockChains, symbol_pool, conservative=False): - """Resolve and assemble @blockChains using @symbol_pool until fixed point is +def asmblock_final(mnemo, asmcfg, blockChains, loc_db, conservative=False): + """Resolve and assemble @blockChains using @loc_db until fixed point is reached""" log_asmblock.debug("asmbloc_final") @@ -1484,36 +1268,36 @@ def asmblock_final(mnemo, asmcfg, blockChains, symbol_pool, conservative=False): while blocks_to_rework: block = blocks_to_rework.pop() - assemble_block(mnemo, block, symbol_pool, conservative) + assemble_block(mnemo, block, loc_db, conservative) -def asmbloc_final(mnemo, blocks, blockChains, symbol_pool, conservative=False): - """Resolve and assemble @blockChains using @symbol_pool until fixed point is +def asmbloc_final(mnemo, blocks, blockChains, loc_db, conservative=False): + """Resolve and assemble @blockChains using @loc_db until fixed point is reached""" warnings.warn('DEPRECATION WARNING: use "asmblock_final" instead of "asmbloc_final"') - asmblock_final(mnemo, blocks, blockChains, symbol_pool, conservative) + asmblock_final(mnemo, blocks, blockChains, loc_db, conservative) -def asm_resolve_final(mnemo, asmcfg, symbol_pool, dst_interval=None): - """Resolve and assemble @asmcfg using @symbol_pool into interval +def asm_resolve_final(mnemo, asmcfg, loc_db, dst_interval=None): + """Resolve and assemble @asmcfg using @loc_db into interval @dst_interval""" asmcfg.sanity_check() asmcfg.guess_blocks_size(mnemo) - blockChains = group_constrained_blocks(symbol_pool, asmcfg) + blockChains = group_constrained_blocks(loc_db, asmcfg) resolved_blockChains = resolve_symbol( blockChains, - symbol_pool, + loc_db, dst_interval ) - asmblock_final(mnemo, asmcfg, resolved_blockChains, symbol_pool) + asmblock_final(mnemo, asmcfg, resolved_blockChains, loc_db) patches = {} output_interval = interval() for block in asmcfg.blocks: - offset = symbol_pool.loc_key_to_offset(block.loc_key) + offset = loc_db.get_location_offset(block.loc_key) for instr in block.lines: if not instr.data: # Empty line @@ -1552,7 +1336,7 @@ class disasmEngine(object): - blocs_wd: maximum number of distinct disassembled block + callback(arch, attrib, pool_bin, cur_bloc, offsets_to_dis, - symbol_pool) + loc_db) - dis_block_callback: callback after each new disassembled block """ @@ -1566,7 +1350,7 @@ class disasmEngine(object): self.arch = arch self.attrib = attrib self.bin_stream = bin_stream - self.symbol_pool = AsmSymbolPool() + self.loc_db = LocationDB() # Setup options self.dont_dis = [] @@ -1598,6 +1382,10 @@ class disasmEngine(object): warnings.warn("""DEPRECATION WARNING: "dis_bloc_callback" use dis_block_callback.""") self.dis_block_callback = function + @property + def symbol_pool(self): + warnings.warn("""DEPRECATION WARNING: use 'loc_db'""") + return self.loc_db # Deprecated job_done = property(get_job_done, set_job_done) @@ -1616,7 +1404,7 @@ class disasmEngine(object): delayslot_count = self.arch.delayslot offsets_to_dis = set() add_next_offset = False - loc_key = self.symbol_pool.getby_offset_create(offset) + loc_key = self.loc_db.get_or_create_offset_location(offset) cur_block = AsmBlock(loc_key) log_asmblock.debug("dis at %X", int(offset)) while not in_delayslot or delayslot_count > 0: @@ -1631,12 +1419,12 @@ class disasmEngine(object): else: # Block is not empty, stop the desassembly pass and add a # constraint to the next block - loc_key_cst = self.symbol_pool.getby_offset_create(offset) + loc_key_cst = self.loc_db.get_or_create_offset_location(offset) cur_block.add_cst(loc_key_cst, AsmConstraint.c_next) break if lines_cpt > 0 and offset in self.split_dis: - loc_key_cst = self.symbol_pool.getby_offset_create(offset) + loc_key_cst = self.loc_db.get_or_create_offset_location(offset) cur_block.add_cst(loc_key_cst, AsmConstraint.c_next) offsets_to_dis.add(offset) break @@ -1647,7 +1435,7 @@ class disasmEngine(object): break if offset in job_done: - loc_key_cst = self.symbol_pool.getby_offset_create(offset) + loc_key_cst = self.loc_db.get_or_create_offset_location(offset) cur_block.add_cst(loc_key_cst, AsmConstraint.c_next) break @@ -1674,7 +1462,7 @@ class disasmEngine(object): else: # Block is not empty, stop the desassembly pass and add a # constraint to the next block - loc_key_cst = self.symbol_pool.getby_offset_create(off_i) + loc_key_cst = self.loc_db.get_or_create_offset_location(off_i) cur_block.add_cst(loc_key_cst, AsmConstraint.c_next) break @@ -1687,7 +1475,7 @@ class disasmEngine(object): else: # Block is not empty, stop the desassembly pass and add a # constraint to the next block - loc_key_cst = self.symbol_pool.getby_offset_create(off_i) + loc_key_cst = self.loc_db.get_or_create_offset_location(off_i) cur_block.add_cst(loc_key_cst, AsmConstraint.c_next) break @@ -1710,14 +1498,14 @@ class disasmEngine(object): if instr.splitflow() and not (instr.is_subcall() and self.dontdis_retcall): add_next_offset = True if instr.dstflow(): - instr.dstflow2label(self.symbol_pool) - destinations = instr.getdstflow(self.symbol_pool) + instr.dstflow2label(self.loc_db) + destinations = instr.getdstflow(self.loc_db) known_dsts = [] for dst in destinations: if not dst.is_loc(): continue loc_key = dst.loc_key - loc_key_offset = self.symbol_pool.loc_key_to_offset(loc_key) + loc_key_offset = self.loc_db.get_location_offset(loc_key) known_dsts.append(loc_key) if loc_key_offset in self.dont_dis_retcall_funcs: add_next_offset = False @@ -1729,11 +1517,11 @@ class disasmEngine(object): delayslot_count = instr.delayslot for c in cur_block.bto: - loc_key_offset = self.symbol_pool.loc_key_to_offset(c.loc_key) + loc_key_offset = self.loc_db.get_location_offset(c.loc_key) offsets_to_dis.add(loc_key_offset) if add_next_offset: - loc_key_cst = self.symbol_pool.getby_offset_create(offset) + loc_key_cst = self.loc_db.get_or_create_offset_location(offset) cur_block.add_cst(loc_key_cst, AsmConstraint.c_next) offsets_to_dis.add(offset) @@ -1744,7 +1532,9 @@ class disasmEngine(object): self.dis_block_callback(mn=self.arch, attrib=self.attrib, pool_bin=self.bin_stream, cur_bloc=cur_block, offsets_to_dis=offsets_to_dis, - symbol_pool=self.symbol_pool) + loc_db=self.loc_db, + # Deprecated API + symbol_pool=self.loc_db) return cur_block, offsets_to_dis def dis_block(self, offset): @@ -1774,7 +1564,7 @@ class disasmEngine(object): log_asmblock.info("dis bloc all") job_done = set() if blocks is None: - blocks = AsmCFG(self.symbol_pool) + blocks = AsmCFG(self.loc_db) todo = [offset] bloc_cpt = 0 @@ -1792,7 +1582,7 @@ class disasmEngine(object): todo += nexts blocks.add_block(cur_block) - blocks.apply_splitting(self.symbol_pool, + blocks.apply_splitting(self.loc_db, dis_block_callback=self.dis_block_callback, mn=self.arch, attrib=self.attrib, pool_bin=self.bin_stream) diff --git a/miasm2/core/cpu.py b/miasm2/core/cpu.py index 80f81aff..dc6fc392 100644 --- a/miasm2/core/cpu.py +++ b/miasm2/core/cpu.py @@ -671,7 +671,7 @@ class bs_swapargs(bs_divert): class m_arg(object): - def fromstring(self, text, symbol_pool, parser_result=None): + def fromstring(self, text, loc_db, parser_result=None): if parser_result: e, start, stop = parser_result[self.parser] self.expr = e @@ -681,11 +681,11 @@ class m_arg(object): except StopIteration: return None, None arg = v[0] - expr = self.asm_ast_to_expr(arg, symbol_pool) + expr = self.asm_ast_to_expr(arg, loc_db) self.expr = expr return start, stop - def asm_ast_to_expr(self, arg, symbol_pool): + def asm_ast_to_expr(self, arg, loc_db): raise NotImplementedError("Virtual") @@ -708,7 +708,7 @@ class reg_noarg(object): reg_info = None parser = None - def fromstring(self, text, symbol_pool, parser_result=None): + def fromstring(self, text, loc_db, parser_result=None): if parser_result: e, start, stop = parser_result[self.parser] self.expr = e @@ -718,7 +718,7 @@ class reg_noarg(object): except StopIteration: return None, None arg = v[0] - expr = self.parses_to_expr(arg, symbol_pool) + expr = self.parses_to_expr(arg, loc_db) self.expr = expr return start, stop @@ -995,13 +995,13 @@ class instruction(object): def __str__(self): return self.to_string() - def to_string(self, symbol_pool=None): + def to_string(self, loc_db=None): o = "%-10s " % self.name args = [] for i, arg in enumerate(self.args): if not isinstance(arg, m2_expr.Expr): raise ValueError('zarb arg type') - x = self.arg2str(arg, i, symbol_pool) + x = self.arg2str(arg, i, loc_db) args.append(x) o += self.gen_args(args) return o @@ -1022,18 +1022,18 @@ class instruction(object): fixed_expr = {} for exprloc in loc_keys: loc_key = exprloc.loc_key - name = symbols.loc_key_to_name(loc_key) + names = symbols.get_location_names(loc_key) # special symbols - if name == '$': + if '$' in names: fixed_expr[exprloc] = self.get_asm_offset(exprloc) continue - if name == '_': + if '_' in names: fixed_expr[exprloc] = self.get_asm_next_offset(exprloc) continue - if symbols.getby_name(name) is None: + if not names: raise ValueError('Unresolved symbol: %r' % exprloc) - offset = symbols.loc_key_to_offset(loc_key) + offset = symbols.get_location_offset(loc_key) if offset is None: raise ValueError( 'The offset of loc_key "%s" cannot be determined' % name @@ -1280,7 +1280,7 @@ class cls_mn(object): return out[0] @classmethod - def fromstring(cls, text, symbol_pool, mode = None): + def fromstring(cls, text, loc_db, mode = None): global total_scans name = re.search('(\S+)', text).groups() if not name: @@ -1320,11 +1320,11 @@ class cls_mn(object): if start != 0: v, start, stop = [None], None, None if v != [None]: - v = f.asm_ast_to_expr(v[0], symbol_pool) + v = f.asm_ast_to_expr(v[0], loc_db) if v is None: v, start, stop = [None], None, None parsers[(i, start_i)][p] = v, start, stop - start, stop = f.fromstring(args_str, symbol_pool, parsers[(i, start_i)]) + start, stop = f.fromstring(args_str, loc_db, parsers[(i, start_i)]) if start != 0: log.debug("cannot fromstring %r", args_str) cannot_parse = True @@ -1529,12 +1529,12 @@ class cls_mn(object): def parse_prefix(self, v): return 0 - def set_dst_symbol(self, symbol_pool): - dst = self.getdstflow(symbol_pool) + def set_dst_symbol(self, loc_db): + dst = self.getdstflow(loc_db) args = [] for d in dst: if isinstance(d, m2_expr.ExprInt): - l = symbol_pool.getby_offset_create(int(d)) + l = loc_db.get_or_create_offset_location(int(d)) a = m2_expr.ExprId(l.name, d.size) else: @@ -1542,7 +1542,7 @@ class cls_mn(object): args.append(a) self.args_symb = args - def getdstflow(self, symbol_pool): + def getdstflow(self, loc_db): return [self.args[0].expr] @@ -1563,7 +1563,7 @@ class imm_noarg(object): return None return v - def fromstring(self, text, symbol_pool, parser_result=None): + def fromstring(self, text, loc_db, parser_result=None): if parser_result: e, start, stop = parser_result[self.parser] else: diff --git a/miasm2/core/locationdb.py b/miasm2/core/locationdb.py new file mode 100644 index 00000000..39c1c99a --- /dev/null +++ b/miasm2/core/locationdb.py @@ -0,0 +1,453 @@ +import warnings + +from miasm2.expression.expression import LocKey, ExprLoc +from miasm2.expression.modint import moduint, modint + + +def is_int(a): + return isinstance(a, (int, long, moduint, modint)) + + +class LocationDB(object): + """ + LocationDB is a "database" of information associated to location. + + An entry in a LocationDB is uniquely identified with a LocKey. + Additionnal information which can be associated with a LocKey are: + - an offset (uniq per LocationDB) + - several names (each are uniqs per LocationDB) + + As a schema: + loc_key 1 <-> 0..1 offset + 1 <-> 0..n name + + >>> loc_db = LocationDB() + # Add a location with no additionnal information + >>> loc_key1 = loc_db.add_location() + # Add a location with an offset + >>> loc_key2 = loc_db.add_location(offset=0x1234) + # Add a location with several names + >>> loc_key3 = loc_db.add_location(name="first_name") + >>> loc_db.add_location_name(loc_key3, "second_name") + # Associate an offset to an existing location + >>> loc_db.set_location_offset(loc_key3, 0x5678) + # Remove a name from an existing location + >>> loc_db.remove_location_name(loc_key3, "second_name") + + # Get back offset + >>> loc_db.get_location_offset(loc_key1) + None + >>> loc_db.get_location_offset(loc_key2) + 0x1234 + + # Display a location + >>> loc_db.pretty_str(loc_key1) + loc_key_1 + >>> loc_db.pretty_str(loc_key2) + loc_1234 + >>> loc_db.pretty_str(loc_key3) + first_name + """ + + def __init__(self): + # Known LocKeys + self._loc_keys = set() + + # Association tables + self._loc_key_to_offset = {} + self._loc_key_to_names = {} + self._name_to_loc_key = {} + self._offset_to_loc_key = {} + + # Counter for new LocKey generation + self._loc_key_num = 0 + + def get_location_offset(self, loc_key): + """ + Return the offset of @loc_key if any, None otherwise. + @loc_key: LocKey instance + """ + assert isinstance(loc_key, LocKey) + return self._loc_key_to_offset.get(loc_key) + + def get_location_names(self, loc_key): + """ + Return the frozenset of names associated to @loc_key + @loc_key: LocKey instance + """ + assert isinstance(loc_key, LocKey) + return frozenset(self._loc_key_to_names.get(loc_key, set())) + + def get_name_location(self, name): + """ + Return the LocKey of @name if any, None otherwise. + @name: target name + """ + return self._name_to_loc_key.get(name) + + def get_or_create_name_location(self, name): + """ + Return the LocKey of @name if any, create one otherwise. + @name: target name + """ + loc_key = self._name_to_loc_key.get(name) + if loc_key is not None: + return loc_key + return self.add_location(name=name) + + def get_offset_location(self, offset): + """ + Return the LocKey of @offset if any, None otherwise. + @name: target offset + """ + return self._offset_to_loc_key.get(offset) + + def get_or_create_offset_location(self, offset): + """ + Return the LocKey of @offset if any, create one otherwise. + @offset: target offset + """ + loc_key = self._offset_to_loc_key.get(offset) + if loc_key is not None: + return loc_key + return self.add_location(offset=offset) + + def add_location_name(self, loc_key, name): + """Associate a name @name to a given @loc_key + @name: str instance + @loc_key: LocKey instance + """ + assert loc_key in self._loc_keys + already_existing_loc = self._name_to_loc_key.get(name) + if already_existing_loc is not None and already_existing_loc != loc_key: + raise KeyError("%r is already associated to a different loc_key " + "(%r)" % (name, already_existing_loc)) + self._loc_key_to_names.setdefault(loc_key, set()).add(name) + self._name_to_loc_key[name] = loc_key + + def remove_location_name(self, loc_key, name): + """Disassociate a name @name from a given @loc_key + Fail if @name is not already associated to @loc_key + @name: str instance + @loc_key: LocKey instance + """ + assert loc_key in self._loc_keys + already_existing_loc = self._name_to_loc_key.get(name) + if already_existing_loc is None: + raise KeyError("%r is not already associated" % name) + if already_existing_loc != loc_key: + raise KeyError("%r is already associated to a different loc_key " + "(%r)" % (name, already_existing_loc)) + del self._name_to_loc_key[name] + self._loc_key_to_names[loc_key].remove(name) + + def set_location_offset(self, loc_key, offset, force=False): + """Associate the offset @offset to an LocKey @loc_key + + If @force is set, override silently. Otherwise, if an offset is already + associated to @loc_key, an error will be raised + """ + assert loc_key in self._loc_keys + already_existing_loc = self.get_offset_location(offset) + if already_existing_loc is not None and already_existing_loc != loc_key: + raise KeyError("%r is already associated to a different loc_key " + "(%r)" % (offset, already_existing_loc)) + already_existing_off = self._loc_key_to_offset.get(loc_key) + if (already_existing_off is not None and + already_existing_off != offset): + if not force: + raise ValueError( + "%r already has an offset (0x%x). Use 'force=True'" + " for silent overriding" % ( + loc_key, already_existing_off + )) + else: + self.unset_location_offset(loc_key) + self._offset_to_loc_key[offset] = loc_key + self._loc_key_to_offset[loc_key] = offset + + def unset_location_offset(self, loc_key): + """Disassociate LocKey @loc_key's offset + + Fail if there is already no offset associate with it + @loc_key: LocKey + """ + assert loc_key in self._loc_keys + already_existing_off = self._loc_key_to_offset.get(loc_key) + if already_existing_off is None: + raise ValueError("%r already has no offset" % (loc_key)) + del self._offset_to_loc_key[already_existing_off] + del self._loc_key_to_offset[loc_key] + + def consistency_check(self): + """Ensure internal structures are consistent with each others""" + assert set(self._loc_key_to_names).issubset(self._loc_keys) + assert set(self._loc_key_to_offset).issubset(self._loc_keys) + assert self._loc_key_to_offset == {v: k for k, v in self._offset_to_loc_key.iteritems()} + assert reduce( + lambda x, y:x.union(y), + self._loc_key_to_names.itervalues(), + set(), + ) == set(self._name_to_loc_key) + for name, loc_key in self._name_to_loc_key.iteritems(): + assert name in self._loc_key_to_names[loc_key] + + def add_location(self, name=None, offset=None, strict=True): + """Add a new location in the locationDB. Returns the corresponding LocKey. + If @name is set, also associate a name to this new location. + If @offset is set, also associate an offset to this new location. + + Strict mode (set by @strict, default): + If a location with @offset or @name already exists, an error will be + raised. + Otherwise: + If a location with @offset or @name already exists, the corresponding + LocKey will be returned. + """ + + # Deprecation handling + if is_int(name): + assert offset is None or offset == name + warnings.warn("Deprecated API: use 'add_location(offset=)' instead." + " An additionnal 'name=' can be provided to also " + "associate a name (there is no more default name)") + offset = name + name = None + + # Argument cleaning + offset_loc_key = None + if offset is not None: + offset = int(offset) + offset_loc_key = self.get_offset_location(offset) + + # Test for collisions + name_loc_key = None + if name is not None: + name_loc_key = self.get_name_location(name) + + if strict: + if name_loc_key is not None: + raise ValueError("An entry for %r already exists (%r), and " + "strict mode is enabled" % ( + name, name_loc_key + )) + if offset_loc_key is not None: + raise ValueError("An entry for 0x%x already exists (%r), and " + "strict mode is enabled" % ( + offset, offset_loc_key + )) + else: + # Non-strict mode + if name_loc_key is not None: + known_offset = self.get_offset_location(name_loc_key) + if known_offset != offset: + raise ValueError( + "Location with name '%s' already have an offset: 0x%x " + "(!= 0x%x)" % (name, offset, known_offset) + ) + # Name already known, same offset -> nothing to do + return name_loc_key + + elif offset_loc_key is not None: + if name is not None: + # This is an error. Check for already known name are checked above + raise ValueError( + "Location with offset 0x%x already exists." + "To add a name to this location, use the dedicated API" + "'add_location_name(%r, %r)'" % ( + offset_loc_key, + name + )) + # Offset already known, no name specified + return offset_loc_key + + # No collision, this is a brand new location + loc_key = LocKey(self._loc_key_num) + self._loc_key_num += 1 + self._loc_keys.add(loc_key) + + if offset is not None: + assert offset not in self._offset_to_loc_key + self._offset_to_loc_key[offset] = loc_key + self._loc_key_to_offset[loc_key] = offset + + if name is not None: + self._name_to_loc_key[name] = loc_key + self._loc_key_to_names[loc_key] = set([name]) + + return loc_key + + def remove_location(self, loc_key): + """ + Delete the location corresponding to @loc_key + @loc_key: LocKey instance + """ + assert isinstance(loc_key, LocKey) + if loc_key not in self._loc_keys: + raise KeyError("Unknown loc_key %r" % loc_key) + names = self._loc_key_to_names.pop(loc_key, []) + for name in names: + del self._name_to_loc_key[name] + offset = self._loc_key_to_offset.pop(loc_key, None) + self._offset_to_loc_key.pop(offset, None) + self._loc_keys.remove(loc_key) + + def pretty_str(self, loc_key): + """Return a human readable version of @loc_key, according to information + available in this LocationDB instance""" + names = self.get_location_names(loc_key) + if names: + return ",".join(names) + offset = self.get_location_offset(loc_key) + if offset is not None: + return "loc_%x" % offset + return str(loc_key) + + @property + def loc_keys(self): + """Return all loc_keys""" + return self._loc_keys + + @property + def names(self): + """Return all known names""" + return self._name_to_loc_key.keys() + + @property + def offsets(self): + """Return all known offsets""" + return self._offset_to_loc_key.keys() + + def __str__(self): + out = [] + for loc_key in self._loc_keys: + names = self.get_location_names(loc_key) + offset = self.get_location_offset(loc_key) + out.append("%s: %s - %s" % ( + loc_key, + "0x%x" % offset if offset is not None else None, + ",".join(names) + )) + return "\n".join(out) + + def merge(self, location_db): + """Merge with another LocationDB @location_db + + WARNING: old reference to @location_db information (such as LocKeys) + must be retrieved from the updated version of this instance. The + dedicated "get_*" APIs may be used for this task + """ + # A simple merge is not doable here, because LocKey will certainly + # collides + + for foreign_loc_key in location_db.loc_keys: + foreign_names = location_db.get_location_names(foreign_loc_key) + foreign_offset = location_db.get_location_offset(foreign_loc_key) + if foreign_names: + init_name = list(foreign_names)[0] + else: + init_name = None + loc_key = self.add_location(offset=foreign_offset, name=init_name, + strict=False) + cur_names = self.get_location_names(loc_key) + for name in foreign_names: + if name not in cur_names and name != init_name: + self.add_location_name(loc_key, name=name) + + def canonize_to_exprloc(self, expr): + """ + If expr is ExprInt, return ExprLoc with corresponding loc_key + Else, return expr + + @expr: Expr instance + """ + if expr.is_int(): + loc_key = self.get_or_create_offset_location(int(expr)) + ret = ExprLoc(loc_key, expr.size) + return ret + return expr + + # Deprecated APIs + @property + def items(self): + """Return all loc_keys""" + warnings.warn('DEPRECATION WARNING: use "loc_keys" instead of "items"') + return list(self._loc_keys) + + def __getitem__(self, item): + warnings.warn('DEPRECATION WARNING: use "get_name_location" or ' + '"get_offset_location"') + if item in self._name_to_loc_key: + return self._name_to_loc_key[item] + if item in self._offset_to_loc_key: + return self._offset_to_loc_key[item] + raise KeyError('unknown symbol %r' % item) + + def __contains__(self, item): + warnings.warn('DEPRECATION WARNING: use "get_name_location" or ' + '"get_offset_location", or ".offsets" or ".names"') + return item in self._name_to_loc_key or item in self._offset_to_loc_key + + def loc_key_to_name(self, loc_key): + """[DEPRECATED API], see 'get_location_names'""" + warnings.warn("Deprecated API: use 'get_location_names'") + return sorted(self.get_location_names(loc_key))[0] + + def loc_key_to_offset(self, loc_key): + """[DEPRECATED API], see 'get_location_offset'""" + warnings.warn("Deprecated API: use 'get_location_offset'") + return self.get_location_offset(loc_key) + + def remove_loc_key(self, loc_key): + """[DEPRECATED API], see 'remove_location'""" + warnings.warn("Deprecated API: use 'remove_location'") + self.remove_location(loc_key) + + def del_loc_key_offset(self, loc_key): + """[DEPRECATED API], see 'unset_location_offset'""" + warnings.warn("Deprecated API: use 'unset_location_offset'") + self.unset_location_offset(loc_key) + + def getby_offset(self, offset): + """[DEPRECATED API], see 'get_offset_location'""" + warnings.warn("Deprecated API: use 'get_offset_location'") + return self.get_offset_location(offset) + + def getby_name(self, name): + """[DEPRECATED API], see 'get_name_location'""" + warnings.warn("Deprecated API: use 'get_name_location'") + return self.get_name_location(name) + + def getby_offset_create(self, offset): + """[DEPRECATED API], see 'get_or_create_offset_location'""" + warnings.warn("Deprecated API: use 'get_or_create_offset_location'") + return self.get_or_create_offset_location(offset) + + def getby_name_create(self, name): + """[DEPRECATED API], see 'get_or_create_name_location'""" + warnings.warn("Deprecated API: use 'get_or_create_name_location'") + return self.get_or_create_name_location(name) + + def rename_location(self, loc_key, newname): + """[DEPRECATED API], see 'add_name_location' and 'remove_location_name' + """ + warnings.warn("Deprecated API: use 'add_location_name' and " + "'remove_location_name'") + for name in self.get_location_names(loc_key): + self.remove_location_name(loc_key, name) + self.add_location_name(loc_key, name) + + def set_offset(self, loc_key, offset): + """[DEPRECATED API], see 'set_location_offset'""" + warnings.warn("Deprecated API: use 'set_location_offset'") + self.set_location_offset(loc_key, offset, force=True) + + def gen_loc_key(self): + """[DEPRECATED API], see 'add_location'""" + warnings.warn("Deprecated API: use 'add_location'") + return self.add_location() + + def str_loc_key(self, loc_key): + """[DEPRECATED API], see 'pretty_str'""" + warnings.warn("Deprecated API: use 'pretty_str'") + return self.pretty_str(loc_key) diff --git a/miasm2/core/parse_asm.py b/miasm2/core/parse_asm.py index 3b97cbb6..7efa17d0 100644 --- a/miasm2/core/parse_asm.py +++ b/miasm2/core/parse_asm.py @@ -60,16 +60,16 @@ class DirectiveDontSplit(Directive): pass -def guess_next_new_label(symbol_pool): +def guess_next_new_label(loc_db): """Generate a new label - @symbol_pool: the AsmSymbolPool instance""" + @loc_db: the LocationDB instance""" i = 0 gen_name = "loc_%.8X" while True: name = gen_name % i - label = symbol_pool.getby_name(name) + label = loc_db.get_name_location(name) if label is None: - return symbol_pool.add_location(name) + return loc_db.add_location(name) i += 1 @@ -77,30 +77,30 @@ STATE_NO_BLOC = 0 STATE_IN_BLOC = 1 -def asm_ast_to_expr_with_size(arg, symbol_pool, size): +def asm_ast_to_expr_with_size(arg, loc_db, size): if isinstance(arg, AstId): return ExprId(arg.name, size) if isinstance(arg, AstOp): - args = [asm_ast_to_expr_with_size(tmp, symbol_pool, size) for tmp in arg.args] + args = [asm_ast_to_expr_with_size(tmp, loc_db, size) for tmp in arg.args] return ExprOp(arg.op, *args) if isinstance(arg, AstInt): return ExprInt(arg.value, size) return None -def parse_txt(mnemo, attrib, txt, symbol_pool=None): - """Parse an assembly listing. Returns a couple (asmcfg, symbol_pool), where - asmcfg is an AsmCfg instance and symbol_pool the associated AsmSymbolPool +def parse_txt(mnemo, attrib, txt, loc_db=None): + """Parse an assembly listing. Returns a couple (asmcfg, loc_db), where + asmcfg is an AsmCfg instance and loc_db the associated LocationDB @mnemo: architecture used @attrib: architecture attribute @txt: assembly listing - @symbol_pool: (optional) the AsmSymbolPool instance used to handle labels + @loc_db: (optional) the LocationDB instance used to handle labels of the listing """ - if symbol_pool is None: - symbol_pool = asmblock.AsmSymbolPool() + if loc_db is None: + loc_db = asmblock.LocationDB() C_NEXT = asmblock.AsmConstraint.c_next C_TO = asmblock.AsmConstraint.c_to @@ -121,7 +121,7 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): match_re = LABEL_RE.match(line) if match_re: label_name = match_re.group(1) - label = symbol_pool.getby_name_create(label_name) + label = loc_db.get_or_create_name_location(label_name) lines.append(label) continue # directive @@ -158,7 +158,7 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): for element in data_raw: element = element.strip() element_parsed = base_expr.parseString(element)[0] - element_expr = asm_ast_to_expr_with_size(element_parsed, symbol_pool, size) + element_expr = asm_ast_to_expr_with_size(element_parsed, loc_db, size) expr_list.append(element_expr) raw_data = asmblock.AsmRaw(expr_list) @@ -190,7 +190,7 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): match_re = LABEL_RE.match(line) if match_re: label_name = match_re.group(1) - label = symbol_pool.getby_name_create(label_name) + label = loc_db.get_or_create_name_location(label_name) lines.append(label) continue @@ -198,10 +198,10 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): if ';' in line: line = line[:line.find(';')] line = line.strip(' ').strip('\t') - instr = mnemo.fromstring(line, symbol_pool, attrib) + instr = mnemo.fromstring(line, loc_db, attrib) if instr.dstflow(): - instr.dstflow2label(symbol_pool) + instr.dstflow2label(loc_db) lines.append(instr) asmblock.log_asmblock.info("___pre asm oki___") @@ -210,7 +210,7 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): cur_block = None state = STATE_NO_BLOC i = 0 - asmcfg = asmblock.AsmCFG(symbol_pool) + asmcfg = asmblock.AsmCFG(loc_db) block_to_nlink = None delayslot = 0 while i < len(lines): @@ -232,7 +232,7 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): elif not isinstance(line, LocKey): # First line must be a label. If it's not the case, generate # it. - loc = guess_next_new_label(symbol_pool) + loc = guess_next_new_label(loc_db) cur_block = asmblock.AsmBlock(loc, alignment=mnemo.alignment) else: cur_block = asmblock.AsmBlock(line, alignment=mnemo.alignment) @@ -281,7 +281,7 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): if delayslot: raise RuntimeError("Cannot have breakflow in delayslot") if line.dstflow(): - for dst in line.getdstflow(symbol_pool): + for dst in line.getdstflow(loc_db): if not isinstance(dst, ExprId): continue if dst in mnemo.regs.all_regs_ids: @@ -302,4 +302,4 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): # Log block asmblock.log_asmblock.info(block) - return asmcfg, symbol_pool + return asmcfg, loc_db diff --git a/miasm2/core/sembuilder.py b/miasm2/core/sembuilder.py index 530685db..ab1af953 100644 --- a/miasm2/core/sembuilder.py +++ b/miasm2/core/sembuilder.py @@ -146,12 +146,12 @@ class SemBuilder(object): loc_end_expr = "loc_end_expr = ExprLoc(loc_end, ir.IRDst.size)" out = ast.parse(loc_end).body out += ast.parse(loc_end_expr).body - loc_if = "loc_if = ir.symbol_pool.gen_loc_key()" + loc_if = "loc_if = ir.loc_db.add_location()" loc_if_expr = "loc_if_expr = ExprLoc(loc_if, ir.IRDst.size)" out += ast.parse(loc_if).body out += ast.parse(loc_if_expr).body if loc_else: - loc_else = "loc_else = ir.symbol_pool.gen_loc_key()" + loc_else = "loc_else = ir.loc_db.add_location()" loc_else_expr = "loc_else_expr = ExprLoc(loc_else, ir.IRDst.size)" out += ast.parse(loc_else).body out += ast.parse(loc_else_expr).body diff --git a/miasm2/expression/expression.py b/miasm2/expression/expression.py index b8266bf7..8e63e6a2 100644 --- a/miasm2/expression/expression.py +++ b/miasm2/expression/expression.py @@ -169,7 +169,7 @@ class LocKey(object): return "<%s %d>" % (self.__class__.__name__, self._key) def __str__(self): - return "loc_%d" % self.key + return "loc_key_%d" % self.key # IR definitions @@ -644,7 +644,7 @@ class ExprLoc(Expr): return Expr.get_object(cls, (loc_key, size)) def __str__(self): - return "loc_%d" % self._loc_key.key + return str(self._loc_key) def get_r(self, mem_read=False, cst_read=False): return set() diff --git a/miasm2/ir/ir.py b/miasm2/ir/ir.py index 6084e67d..8ee35ed5 100644 --- a/miasm2/ir/ir.py +++ b/miasm2/ir/ir.py @@ -23,8 +23,8 @@ from itertools import chain import miasm2.expression.expression as m2_expr from miasm2.expression.expression_helper import get_missing_interval -from miasm2.core.asmblock import AsmSymbolPool, AsmBlock, \ - AsmConstraint, AsmBlockBad +from miasm2.core.asmblock import AsmBlock, AsmConstraint, AsmBlockBad +from miasm2.core.locationdb import LocationDB from miasm2.core.graph import DiGraph class AssignBlock(object): @@ -357,7 +357,7 @@ class IRBlock(object): def __str__(self): out = [] - out.append('loc_key_%s' % self.loc_key.key) + out.append(str(self.loc_key)) for assignblk in self: for dst, src in assignblk.iteritems(): out.append('\t%s = %s' % (dst, src)) @@ -402,28 +402,37 @@ class DiGraphIR(DiGraph): """DiGraph for IR instances""" - def __init__(self, blocks, symbol_pool=None, *args, **kwargs): + def __init__(self, blocks, loc_db=None, *args, **kwargs): """Instanciate a DiGraphIR @blocks: IR blocks """ - self.symbol_pool = symbol_pool + self.loc_db = loc_db self._blocks = blocks super(DiGraphIR, self).__init__(*args, **kwargs) def _expr_loc_to_symb(self, expr): if not expr.is_loc(): return expr - if self.symbol_pool is None: + if self.loc_db is None: name = str(expr) else: - name = self.symbol_pool.loc_key_to_name(expr.loc_key) + names = self.loc_db.get_location_names(expr.loc_key) + if not names: + name = self.loc_db.pretty_str(expr.loc_key) + else: + # Use only one name for readability + name = sorted(names)[0] return m2_expr.ExprId(name, expr.size) def node2lines(self, node): - if self.symbol_pool is None: + if self.loc_db is None: node_name = str(node) else: - node_name = self.symbol_pool.loc_key_to_name(node) + names = self.loc_db.get_location_names(node) + if not names: + node_name = self.loc_db.pretty_str(node) + else: + node_name = "".join("%s:\n" % name for name in names) yield self.DotCellDescription( text="%s" % node_name, attr={ @@ -481,10 +490,10 @@ class IntermediateRepresentation(object): Allow native assembly to intermediate representation traduction """ - def __init__(self, arch, attrib, symbol_pool=None): - if symbol_pool is None: - symbol_pool = AsmSymbolPool() - self.symbol_pool = symbol_pool + def __init__(self, arch, attrib, loc_db=None): + if loc_db is None: + loc_db = LocationDB() + self.loc_db = loc_db self.blocks = {} self.pc = arch.getpc(attrib) self.sp = arch.getsp(attrib) @@ -498,6 +507,11 @@ class IntermediateRepresentation(object): warnings.warn('DEPRECATION WARNING: use ".blocks" instead of ".blocs"') return self.blocks + @property + def symbol_pool(self): + warnings.warn('DEPRECATION WARNING: use ".loc_db" instead of ".symbol_pool"') + return self.loc_db + def get_ir(self, instr): raise NotImplementedError("Abstract Method") @@ -525,7 +539,7 @@ class IntermediateRepresentation(object): except (ValueError, TypeError): return None - return self.symbol_pool.getby_offset_create(addr) + return self.loc_db.get_or_create_offset_location(addr) def get_block(self, addr): """Returns the irbloc associated to an ExprId/ExprInt/loc_key/int @@ -546,7 +560,7 @@ class IntermediateRepresentation(object): def add_instr(self, line, loc_key=None, gen_pc_updt=False): if loc_key is None: - loc_key = self.symbol_pool.gen_loc_key() + loc_key = self.loc_db.add_location() block = AsmBlock(loc_key) block.lines = [line] self.add_block(block, gen_pc_updt) @@ -682,9 +696,9 @@ class IntermediateRepresentation(object): if block.lines: line = block.lines[-1] if line.offset is not None: - loc_key = self.symbol_pool.getby_offset_create(line.offset + line.l) + loc_key = self.loc_db.get_or_create_offset_location(line.offset + line.l) if loc_key is None: - loc_key = self.symbol_pool.gen_loc_key() + loc_key = self.loc_db.add_location() block.add_cst(loc_key, AsmConstraint.c_next) else: loc_key = next_loc_key @@ -719,18 +733,18 @@ class IntermediateRepresentation(object): def get_loc_key_for_instr(self, instr): """Returns the loc_key associated to an instruction @instr: current instruction""" - return self.symbol_pool.getby_offset_create(instr.offset) + return self.loc_db.get_or_create_offset_location(instr.offset) def gen_loc_key_and_expr(self, size): """ Return a loc_key and it's corresponding ExprLoc @size: size of expression """ - loc_key = self.symbol_pool.gen_loc_key() + loc_key = self.loc_db.add_location() return loc_key, m2_expr.ExprLoc(loc_key, size) def get_next_loc_key(self, instr): - loc_key = self.symbol_pool.getby_offset_create(instr.offset + instr.l) + loc_key = self.loc_db.get_or_create_offset_location(instr.offset + instr.l) return loc_key def simplify(self, simplifier): @@ -814,13 +828,13 @@ class IntermediateRepresentation(object): """ Gen irbloc digraph """ - self._graph = DiGraphIR(self.blocks, self.symbol_pool) + self._graph = DiGraphIR(self.blocks, self.loc_db) for lbl, block in self.blocks.iteritems(): assert isinstance(lbl, m2_expr.LocKey) self._graph.add_node(lbl) for dst in self.dst_trackback(block): if dst.is_int(): - dst_lbl = self.symbol_pool.getby_offset_create(int(dst)) + dst_lbl = self.loc_db.get_or_create_offset_location(int(dst)) dst = m2_expr.ExprLoc(dst_lbl.loc_key, self.pc.size) if dst.is_loc(): self._graph.add_edge(lbl, dst.loc_key) diff --git a/miasm2/ir/symbexec.py b/miasm2/ir/symbexec.py index c75bd9e8..288a46e4 100644 --- a/miasm2/ir/symbexec.py +++ b/miasm2/ir/symbexec.py @@ -19,7 +19,7 @@ def get_block(ir_arch, mdis, addr): """Get IRBlock at address @addr""" lbl = ir_arch.get_loc_key(addr) if not lbl in ir_arch.blocks: - offset = mdis.symbol_pool.loc_key_to_offset(lbl) + offset = mdis.loc_db.get_location_offset(lbl) block = mdis.dis_block(offset) ir_arch.add_block(block) irblock = ir_arch.get_block(lbl) @@ -892,7 +892,7 @@ class SymbolicExecutionEngine(object): def eval_exprloc(self, expr, **kwargs): """[DEV]: Evaluate an ExprLoc using the current state""" - offset = self.ir_arch.symbol_pool.loc_key_to_offset(expr.loc_key) + offset = self.ir_arch.loc_db.get_location_offset(expr.loc_key) if offset is not None: ret = ExprInt(offset, expr.size) else: diff --git a/miasm2/ir/symbexec_top.py b/miasm2/ir/symbexec_top.py index 64d428b4..5fe12996 100644 --- a/miasm2/ir/symbexec_top.py +++ b/miasm2/ir/symbexec_top.py @@ -128,7 +128,7 @@ class SymbExecTopNoMem(SymbolicExecutionEngine): return ret def eval_exprloc(self, expr, **kwargs): - offset = self.ir_arch.symbol_pool.loc_key_to_offset(expr.loc_key) + offset = self.ir_arch.loc_db.get_location_offset(expr.loc_key) if offset is not None: ret = ExprInt(offset, expr.size) else: diff --git a/miasm2/ir/translators/C.py b/miasm2/ir/translators/C.py index b7821e85..cafec7c8 100644 --- a/miasm2/ir/translators/C.py +++ b/miasm2/ir/translators/C.py @@ -18,13 +18,13 @@ class TranslatorC(Translator): '>>>': 'rot_right', } - def __init__(self, symbol_pool=None, **kwargs): + def __init__(self, loc_db=None, **kwargs): """Instance a C translator - @symbol_pool: AsmSymbolPool instance + @loc_db: LocationDB instance """ super(TranslatorC, self).__init__(**kwargs) # symbol pool - self.symbol_pool = symbol_pool + self.loc_db = loc_db def _size2mask(self, size): """Return a C string corresponding to the size2mask operation, with support for @@ -52,10 +52,10 @@ class TranslatorC(Translator): def from_ExprLoc(self, expr): loc_key = expr.loc_key - if self.symbol_pool is None: + if self.loc_db is None: return str(loc_key) - offset = self.symbol_pool.loc_key_to_offset(loc_key) + offset = self.loc_db.get_location_offset(loc_key) if offset is None: return str(loc_key) diff --git a/miasm2/ir/translators/smt2.py b/miasm2/ir/translators/smt2.py index f5d633e0..1a513bfb 100644 --- a/miasm2/ir/translators/smt2.py +++ b/miasm2/ir/translators/smt2.py @@ -119,7 +119,7 @@ class TranslatorSMT2(Translator): # Implemented language __LANG__ = "smt2" - def __init__(self, endianness="<", symbol_pool=None, **kwargs): + def __init__(self, endianness="<", loc_db=None, **kwargs): """Instance a SMT2 translator @endianness: (optional) memory endianness """ @@ -129,7 +129,7 @@ class TranslatorSMT2(Translator): # map of translated bit vectors self._bitvectors = dict() # symbol pool - self.symbol_pool = symbol_pool + self.loc_db = loc_db def from_ExprInt(self, expr): return bit_vec_val(expr.arg.arg, expr.size) @@ -141,20 +141,13 @@ class TranslatorSMT2(Translator): def from_ExprLoc(self, expr): loc_key = expr.loc_key - if self.symbol_pool is None: + if self.loc_db is None or self.loc_db.get_location_offset(loc_key) is None: if str(loc_key) not in self._bitvectors: self._bitvectors[str(loc_key)] = expr.size return str(loc_key) - offset = self.symbol_pool.loc_key_to_offset(loc_key) - name = self.symbol_pool.loc_key_to_name(loc_key) - - if offset is None: - return bit_vec_val(str(offset), expr.size) - name = "|{}|".format(str(name)) - if name not in self._bitvectors: - self._bitvectors[name] = expr.size - return name + offset = self.loc_db.get_location_offset(loc_key) + return bit_vec_val(str(offset), expr.size) def from_ExprMem(self, expr): addr = self.from_expr(expr.arg) diff --git a/miasm2/ir/translators/z3_ir.py b/miasm2/ir/translators/z3_ir.py index d01b73fa..887c68d0 100644 --- a/miasm2/ir/translators/z3_ir.py +++ b/miasm2/ir/translators/z3_ir.py @@ -115,7 +115,7 @@ class TranslatorZ3(Translator): # Operations translation trivial_ops = ["+", "-", "/", "%", "&", "^", "|", "*", "<<"] - def __init__(self, endianness="<", symbol_pool=None, **kwargs): + def __init__(self, endianness="<", loc_db=None, **kwargs): """Instance a Z3 translator @endianness: (optional) memory endianness """ @@ -125,8 +125,7 @@ class TranslatorZ3(Translator): super(TranslatorZ3, self).__init__(**kwargs) self._mem = Z3Mem(endianness) - # symbol pool - self.symbol_pool = symbol_pool + self.loc_db = loc_db def from_ExprInt(self, expr): return z3.BitVecVal(expr.arg.arg, expr.size) @@ -135,16 +134,13 @@ class TranslatorZ3(Translator): return z3.BitVec(str(expr), expr.size) def from_ExprLoc(self, expr): - if self.symbol_pool is None: - # No symbol_pool, fallback to default name + if self.loc_db is None: + # No loc_db, fallback to default name return z3.BitVec(str(expr), expr.size) loc_key = expr.loc_key - offset = self.symbol_pool.loc_key_to_offset(loc_key) - name = self.symbol_pool.loc_key_to_name(loc_key) + offset = self.loc_db.get_location_offset(loc_key) if offset is not None: return z3.BitVecVal(offset, expr.size) - if name is not None: - return z3.BitVec(name, expr.size) # fallback to default name return z3.BitVec(str(loc_key), expr.size) diff --git a/miasm2/jitter/codegen.py b/miasm2/jitter/codegen.py index c9e98d93..10140fd2 100644 --- a/miasm2/jitter/codegen.py +++ b/miasm2/jitter/codegen.py @@ -9,7 +9,7 @@ from miasm2.ir.ir import IRBlock, AssignBlock from miasm2.ir.translators.C import TranslatorC from miasm2.core.asmblock import AsmBlockBad -TRANSLATOR_NO_SYMBOL = TranslatorC(symbol_pool=None) +TRANSLATOR_NO_SYMBOL = TranslatorC(loc_db=None) SIZE_TO_MASK = {size: TRANSLATOR_NO_SYMBOL.from_expr(ExprInt(0, size).mask) for size in (1, 2, 3, 7, 8, 16, 32, 64, 128)} @@ -102,7 +102,7 @@ class CGen(object): def __init__(self, ir_arch): self.ir_arch = ir_arch self.PC = self.ir_arch.pc - self.translator = TranslatorC(self.ir_arch.symbol_pool) + self.translator = TranslatorC(self.ir_arch.loc_db) self.init_arch_C() def init_arch_C(self): @@ -143,7 +143,7 @@ class CGen(object): new_assignblk = dict(assignblk) if self.ir_arch.IRDst not in assignblk: offset = instr.offset + instr.l - loc_key = self.ir_arch.symbol_pool.getby_offset_create(offset) + loc_key = self.ir_arch.loc_db.get_or_create_offset_location(offset) dst = ExprLoc(loc_key, self.ir_arch.IRDst.size) new_assignblk[self.ir_arch.IRDst] = dst irs = [AssignBlock(new_assignblk, instr)] @@ -290,12 +290,12 @@ class CGen(object): "((%s)?(%s):(%s))" % (cond, src1b, src2b)) if isinstance(expr, ExprInt): offset = int(expr) - loc_key = self.ir_arch.symbol_pool.getby_offset_create(offset) + loc_key = self.ir_arch.loc_db.get_or_create_offset_location(offset) self.add_label_index(dst2index, loc_key) return ("%s" % dst2index[loc_key], hex(offset)) if expr.is_loc(): loc_key = expr.loc_key - offset = self.ir_arch.symbol_pool.loc_key_to_offset(expr.loc_key) + offset = self.ir_arch.loc_db.get_location_offset(expr.loc_key) if offset is not None: self.add_label_index(dst2index, loc_key) return ("%s" % dst2index[loc_key], hex(offset)) @@ -339,7 +339,7 @@ class CGen(object): out.append( 'printf("%.8X %s\\n");' % ( instr_attrib.instr.offset, - instr_attrib.instr.to_string(self.ir_arch.symbol_pool) + instr_attrib.instr.to_string(self.ir_arch.loc_db) ) ) return out @@ -367,7 +367,7 @@ class CGen(object): return out assert isinstance(dst, LocKey) - offset = self.ir_arch.symbol_pool.loc_key_to_offset(dst) + offset = self.ir_arch.loc_db.get_location_offset(dst) if offset is None: # Generate goto for local labels return ['goto %s;' % dst] @@ -518,7 +518,7 @@ class CGen(object): last_instr = block.lines[-1] offset = last_instr.offset + last_instr.l - return self.ir_arch.symbol_pool.getby_offset_create(offset) + return self.ir_arch.loc_db.get_or_create_offset_location(offset) def gen_init(self, block): """ @@ -528,7 +528,7 @@ class CGen(object): instr_offsets = [line.offset for line in block.lines] post_label = self.get_block_post_label(block) - post_offset = self.ir_arch.symbol_pool.loc_key_to_offset(post_label) + post_offset = self.ir_arch.loc_db.get_location_offset(post_label) instr_offsets.append(post_offset) lbl_start = block.loc_key return (self.CODE_INIT % lbl_start).split("\n"), instr_offsets @@ -564,7 +564,7 @@ class CGen(object): """ loc_key = self.get_block_post_label(block) - offset = self.ir_arch.symbol_pool.loc_key_to_offset(loc_key) + offset = self.ir_arch.loc_db.get_location_offset(loc_key) dst = self.dst_to_c(offset) code = self.CODE_RETURN_NO_EXCEPTION % (loc_key, self.C_PC, dst, dst) return code.split('\n') diff --git a/miasm2/jitter/jitcore.py b/miasm2/jitter/jitcore.py index b636782d..a26d4c9f 100644 --- a/miasm2/jitter/jitcore.py +++ b/miasm2/jitter/jitcore.py @@ -63,7 +63,7 @@ class JitCore(object): self.mdis = disasmEngine( ir_arch.arch, ir_arch.attrib, bin_stream, lines_wd=self.options["jit_maxline"], - symbol_pool=ir_arch.symbol_pool, + loc_db=ir_arch.loc_db, follow_call=False, dontdis_retcall=False, split_dis=self.split_dis, @@ -101,7 +101,7 @@ class JitCore(object): cur_block.ad_max = cur_block.lines[-1].offset + cur_block.lines[-1].l else: # 1 byte block for unknown mnemonic - offset = ir_arch.symbol_pool.loc_key_to_offset(cur_block.loc_key) + offset = ir_arch.loc_db.get_location_offset(cur_block.loc_key) cur_block.ad_min = offset cur_block.ad_max = offset+1 @@ -138,7 +138,7 @@ class JitCore(object): # Get the block if isinstance(addr, LocKey): - addr = self.ir_arch.symbol_pool.loc_key_to_offset(addr) + addr = self.ir_arch.loc_db.get_location_offset(addr) if addr is None: raise RuntimeError("Unknown offset for LocKey") @@ -151,7 +151,7 @@ class JitCore(object): return cur_block # Logging if self.log_newbloc: - print cur_block.to_string(self.mdis.symbol_pool) + print cur_block.to_string(self.mdis.loc_db) # Update label -> block self.loc_key_to_block[cur_block.loc_key] = cur_block @@ -253,13 +253,13 @@ class JitCore(object): try: for irblock in block.blocks: # Remove offset -> jitted block link - offset = self.ir_arch.symbol_pool.loc_key_to_offset(irblock.loc_key) + offset = self.ir_arch.loc_db.get_location_offset(irblock.loc_key) if offset in self.offset_to_jitted_func: del(self.offset_to_jitted_func[offset]) except AttributeError: # The block has never been translated in IR - offset = self.ir_arch.symbol_pool.loc_key_to_offset(block.loc_key) + offset = self.ir_arch.loc_db.get_location_offset(block.loc_key) if offset in self.offset_to_jitted_func: del(self.offset_to_jitted_func[offset]) @@ -293,7 +293,7 @@ class JitCore(object): @block: asmblock """ block_raw = "".join(line.b for line in block.lines) - offset = self.ir_arch.symbol_pool.loc_key_to_offset(block.loc_key) + offset = self.ir_arch.loc_db.get_location_offset(block.loc_key) block_hash = md5("%X_%s_%s_%s_%s" % (offset, self.arch_name, self.log_mn, diff --git a/miasm2/jitter/jitcore_gcc.py b/miasm2/jitter/jitcore_gcc.py index cd92bab1..dbaa2a08 100644 --- a/miasm2/jitter/jitcore_gcc.py +++ b/miasm2/jitter/jitcore_gcc.py @@ -28,7 +28,7 @@ class JitCore_Gcc(JitCore_Cc_Base): lib = ctypes.cdll.LoadLibrary(fname_so) func = getattr(lib, self.FUNCNAME) addr = ctypes.cast(func, ctypes.c_void_p).value - offset = self.ir_arch.symbol_pool.loc_key_to_offset(label) + offset = self.ir_arch.loc_db.get_location_offset(label) self.offset_to_jitted_func[offset] = addr self.states[offset] = lib diff --git a/miasm2/jitter/jitcore_llvm.py b/miasm2/jitter/jitcore_llvm.py index bc921569..ea4f20ec 100644 --- a/miasm2/jitter/jitcore_llvm.py +++ b/miasm2/jitter/jitcore_llvm.py @@ -118,5 +118,5 @@ class JitCore_LLVM(jitcore.JitCore): # Store a pointer on the function jitted code loc_key = block.loc_key - offset = self.ir_arch.symbol_pool.loc_key_to_offset(loc_key) + offset = self.ir_arch.loc_db.get_location_offset(loc_key) self.offset_to_jitted_func[offset] = ptr diff --git a/miasm2/jitter/jitcore_python.py b/miasm2/jitter/jitcore_python.py index 45b418b5..fa751a68 100644 --- a/miasm2/jitter/jitcore_python.py +++ b/miasm2/jitter/jitcore_python.py @@ -127,7 +127,7 @@ class JitCore_Python(jitcore.JitCore): raise NotImplementedError("Type not handled: %s" % ad) # Associate myfunc with current loc_key - offset = self.ir_arch.symbol_pool.loc_key_to_offset(loc_key) + offset = self.ir_arch.loc_db.get_location_offset(loc_key) assert offset is not None self.offset_to_jitted_func[offset] = myfunc diff --git a/miasm2/jitter/llvmconvert.py b/miasm2/jitter/llvmconvert.py index 16b08cf1..d63351cc 100644 --- a/miasm2/jitter/llvmconvert.py +++ b/miasm2/jitter/llvmconvert.py @@ -625,7 +625,7 @@ class LLVMFunction(): return ret if expr.is_loc(): - offset = self.llvm_context.ir_arch.symbol_pool.loc_key_to_offset(expr.loc_key) + offset = self.llvm_context.ir_arch.loc_db.get_location_offset(expr.loc_key) ret = llvm_ir.Constant(LLVMType.IntType(expr.size), offset) self.update_cache(expr, ret) return ret @@ -1099,13 +1099,13 @@ class LLVMFunction(): self.main_stream = False if isinstance(dst, ExprInt): - loc_key = self.llvm_context.ir_arch.symbol_pool.getby_offset_create(int(dst)) + loc_key = self.llvm_context.ir_arch.loc_db.get_or_create_offset_location(int(dst)) dst = ExprLoc(loc_key, dst.size) if isinstance(dst, ExprLoc): loc_key = dst.loc_key bbl = self.get_basic_block_by_loc_key(loc_key) - offset = self.llvm_context.ir_arch.symbol_pool.loc_key_to_offset(loc_key) + offset = self.llvm_context.ir_arch.loc_db.get_location_offset(loc_key) if bbl is not None: # "local" jump, inside this function if offset is None: @@ -1234,7 +1234,7 @@ class LLVMFunction(): ExprId("status", 32)) self.affect(t_size(m2_csts.EXCEPT_UNK_MNEMO), m2_exception_flag) - offset = self.llvm_context.ir_arch.symbol_pool.loc_key_to_offset(asmblock.loc_key) + offset = self.llvm_context.ir_arch.loc_db.get_location_offset(asmblock.loc_key) self.set_ret(LLVMType.IntType(64)(offset)) def gen_finalize(self, asmblock, codegen): @@ -1280,7 +1280,7 @@ class LLVMFunction(): # Else Block builder.position_at_end(else_block) PC = self.llvm_context.PC - next_label_offset = self.llvm_context.ir_arch.symbol_pool.loc_key_to_offset(next_label) + next_label_offset = self.llvm_context.ir_arch.loc_db.get_location_offset(next_label) to_ret = LLVMType.IntType(PC.size)(next_label_offset) self.affect(to_ret, PC) self.set_ret(to_ret) @@ -1317,7 +1317,7 @@ class LLVMFunction(): # Create basic blocks (for label branchs) entry_bbl, builder = self.entry_bbl, self.builder for instr in asmblock.lines: - lbl = self.llvm_context.ir_arch.symbol_pool.getby_offset_create(instr.offset) + lbl = self.llvm_context.ir_arch.loc_db.get_or_create_offset_location(instr.offset) self.append_basic_block(lbl) # TODO: merge duplicate code with CGen @@ -1333,7 +1333,7 @@ class LLVMFunction(): default_value=eltype(0)) self.local_vars_pointers[element.name] = ptr loc_key = codegen.get_block_post_label(asmblock) - offset = self.llvm_context.ir_arch.symbol_pool.loc_key_to_offset(loc_key) + offset = self.llvm_context.ir_arch.loc_db.get_location_offset(loc_key) instr_offsets.append(offset) self.append_basic_block(loc_key) |