diff options
Diffstat (limited to 'miasm2')
39 files changed, 1074 insertions, 643 deletions
diff --git a/miasm2/analysis/depgraph.py b/miasm2/analysis/depgraph.py index f7949c88..49368508 100644 --- a/miasm2/analysis/depgraph.py +++ b/miasm2/analysis/depgraph.py @@ -1,8 +1,7 @@ """Provide dependency graph""" -import miasm2.expression.expression as m2_expr +from miasm2.expression.expression import ExprInt, ExprLoc, ExprAff from miasm2.core.graph import DiGraph -from miasm2.core.asmblock import AsmLabel, expr_is_int_or_label, expr_is_label from miasm2.expression.simplifications import expr_simp from miasm2.ir.symbexec import SymbolicExecutionEngine from miasm2.ir.ir import IRBlock, AssignBlock @@ -61,7 +60,7 @@ class DependencyNode(object): def __str__(self): """Returns a string representation of DependencyNode""" return "<%s %s %s %s>" % (self.__class__.__name__, - self.label.name, self.element, + self.label, self.element, self.line_nb) def __repr__(self): @@ -297,9 +296,10 @@ class DependencyResult(DependencyState): line_nb).assignblks # Eval the block - temp_label = AsmLabel("Temp") + symbol_pool = AsmSymbolPool() + temp_label = symbol_pool.getby_name_create("Temp") symb_exec = SymbolicExecutionEngine(self._ira, ctx_init) - symb_exec.eval_updt_irblock(IRBlock(temp_label, assignblks), step=step) + symb_exec.eval_updt_irblock(IRBlock(temp_label.loc_key, assignblks), step=step) # Return only inputs values (others could be wrongs) return {element: symb_exec.symbols[element] @@ -314,30 +314,31 @@ class DependencyResultImplicit(DependencyResult): # Z3 Solver instance _solver = None - unsat_expr = m2_expr.ExprAff(m2_expr.ExprInt(0, 1), - m2_expr.ExprInt(1, 1)) + unsat_expr = ExprAff(ExprInt(0, 1), ExprInt(1, 1)) def _gen_path_constraints(self, translator, expr, expected): """Generate path constraint from @expr. Handle special case with generated labels """ out = [] - expected_is_label = expr_is_label(expected) + expected = self._ira.symbol_pool.canonize_to_exprloc(expected) + expected_is_label = expected.is_label() for consval in possible_values(expr): - if (expected_is_label and - consval.value != expected): + value = self._ira.symbol_pool.canonize_to_exprloc(consval.value) + if expected_is_label and value != expected: continue - if (not expected_is_label and - expr_is_label(consval.value)): + if not expected_is_label and value.is_label(): continue conds = z3.And(*[translator.from_expr(cond.to_constraint()) for cond in consval.constraints]) - if expected != consval.value: - conds = z3.And(conds, - translator.from_expr( - m2_expr.ExprAff(consval.value, - expected))) + if expected != value: + conds = z3.And( + conds, + translator.from_expr( + ExprAff(value, + expected)) + ) out.append(conds) if out: @@ -373,10 +374,8 @@ class DependencyResultImplicit(DependencyResult): # Add constraint if hist_nb < history_size: next_label = history[hist_nb] - expected = symb_exec.eval_expr(m2_expr.ExprId(next_label, - size)) - solver.add( - self._gen_path_constraints(translator, dst, expected)) + expected = symb_exec.eval_expr(ExprLoc(next_label, size)) + solver.add(self._gen_path_constraints(translator, dst, expected)) # Save the solver self._solver = solver @@ -491,11 +490,11 @@ class DependencyGraph(object): @follow: set of nodes to follow @nofollow: set of nodes not to follow """ - if isinstance(expr, m2_expr.ExprId): + if expr.is_id(): follow.add(expr) - elif isinstance(expr, m2_expr.ExprInt): + elif expr.is_int(): nofollow.add(expr) - elif isinstance(expr, m2_expr.ExprMem): + elif expr.is_mem(): follow.add(expr) return expr @@ -508,7 +507,7 @@ class DependencyGraph(object): @follow_mem: force the visit of memory sub expressions @follow_call: force the visit of call sub expressions """ - if not follow_mem and isinstance(expr, m2_expr.ExprMem): + if not follow_mem and expr.is_mem(): nofollow.add(expr) return False if not follow_call and expr.is_function_call(): @@ -534,8 +533,9 @@ class DependencyGraph(object): """Do not follow labels""" follow = set() for expr in exprs: - if not expr_is_int_or_label(expr): - follow.add(expr) + if expr.is_int() or expr.is_label(): + continue + follow.add(expr) return follow, set() diff --git a/miasm2/analysis/dse.py b/miasm2/analysis/dse.py index 427a8bd0..66caffc9 100644 --- a/miasm2/analysis/dse.py +++ b/miasm2/analysis/dse.py @@ -56,15 +56,14 @@ except ImportError: z3 = None from miasm2.expression.expression import ExprMem, ExprInt, ExprCompose, \ - ExprAff, ExprId + ExprAff, ExprId, ExprLoc, LocKey from miasm2.core.bin_stream import bin_stream_vm -from miasm2.core.asmblock import expr_is_label from miasm2.jitter.emulatedsymbexec import EmulatedSymbExec from miasm2.expression.expression_helper import possible_values from miasm2.ir.translators import Translator from miasm2.analysis.expression_range import expr_range from miasm2.analysis.modularintervals import ModularIntervals - +from miasm2.core.asmblock import AsmBlockBad DriftInfo = namedtuple("DriftInfo", ["symbol", "computed", "expected"]) @@ -72,7 +71,7 @@ class DriftException(Exception): """Raised when the emulation drift from the reference engine""" def __init__(self, info): - super(Exception, self).__init__() + super(DriftException, self).__init__() self.info = info def __str__(self): @@ -161,11 +160,14 @@ class DSEEngine(object): self.symb_concrete = None # Concrete SymbExec for path desambiguisation self.mdis = None # DisasmEngine + self.symbol_pool = self.ir_arch.symbol_pool + def prepare(self): """Prepare the environment for attachment with a jitter""" # Disassembler self.mdis = self.machine.dis_engine(bin_stream_vm(self.jitter.vm), - lines_wd=1) + lines_wd=1, + symbol_pool=self.symbol_pool) # Symbexec engine ## Prepare symbexec engines @@ -215,7 +217,7 @@ class DSEEngine(object): self.prepare() def handle(self, cur_addr): - """Handle destination + r"""Handle destination @cur_addr: Expr of the next address in concrete execution /!\ cur_addr may be a lbl_gen @@ -295,6 +297,9 @@ class DSEEngine(object): # Call callbacks associated to the current address cur_addr = self.jitter.pc + if isinstance(cur_addr, LocKey): + lbl = self.ir_arch.symbol_pool.loc_key_to_label(cur_addr) + cur_addr = lbl.offset if cur_addr in self.handler: self.handler[cur_addr](self) @@ -321,7 +326,8 @@ class DSEEngine(object): ## Update current state asm_block = self.mdis.dis_block(cur_addr) - self.ir_arch.add_block(asm_block) + if not isinstance(asm_block, AsmBlockBad): + self.ir_arch.add_block(asm_block) self.addr_to_cacheblocks[cur_addr] = dict(self.ir_arch.blocks) # Emulate the current instruction @@ -329,7 +335,7 @@ class DSEEngine(object): # Is the symbolic execution going (potentially) to jump on a lbl_gen? if len(self.ir_arch.blocks) == 1: - next_addr = self.symb.run_at(cur_addr) + self.symb.run_at(cur_addr) else: # Emulation could stuck in generated IR blocks # But concrete execution callback is not enough precise to obtain @@ -339,18 +345,25 @@ class DSEEngine(object): # Update the concrete execution self._update_state_from_concrete_symb(self.symb_concrete) while True: + next_addr_concrete = self.symb_concrete.run_block_at(cur_addr) self.symb.run_block_at(cur_addr) - if not(expr_is_label(next_addr_concrete) and - next_addr_concrete.name.offset is None): + if not (isinstance(next_addr_concrete, ExprLoc) and + self.ir_arch.symbol_pool.loc_key_to_offset( + next_addr_concrete + ) is None): # Not a lbl_gen, exit break + if self.symb.ir_arch.get_block(cur_addr) is None: + break + # Call handle with lbl_gen state self.handle(next_addr_concrete) cur_addr = next_addr_concrete + # At this stage, symbolic engine is one instruction after the concrete # engine @@ -428,7 +441,7 @@ class DSEEngine(object): symbexec.symbols[reg] = value def update_state_from_concrete(self, cpu=True, mem=False): - """Update the symbolic state with concrete values from the concrete + r"""Update the symbolic state with concrete values from the concrete engine @cpu: (optional) if set, update registers' value @@ -596,13 +609,19 @@ class DSEPathConstraint(DSEEngine): self.cur_solver.add(self.z3_trans.from_expr(cons)) def handle(self, cur_addr): + cur_addr = self.ir_arch.symbol_pool.canonize_to_exprloc(cur_addr) symb_pc = self.eval_expr(self.ir_arch.IRDst) possibilities = possible_values(symb_pc) cur_path_constraint = set() # path_constraint for the concrete path if len(possibilities) == 1: - assert next(iter(possibilities)).value == cur_addr + dst = next(iter(possibilities)).value + dst = self.ir_arch.symbol_pool.canonize_to_exprloc(dst) + assert dst == cur_addr else: for possibility in possibilities: + target_addr = self.ir_arch.symbol_pool.canonize_to_exprloc( + possibility.value + ) path_constraint = set() # Set of ExprAff for the possible path # Get constraint associated to the possible path @@ -642,11 +661,11 @@ class DSEPathConstraint(DSEEngine): "address 0x%x" % address) path_constraint.add(ExprAff(expr_mem, value)) - if possibility.value == cur_addr: + if target_addr == cur_addr: # Add path constraint cur_path_constraint = path_constraint - elif self.produce_solution(possibility.value): + elif self.produce_solution(target_addr): # Looking for a new solution self.cur_solver.push() for cons in path_constraint: @@ -657,8 +676,7 @@ class DSEPathConstraint(DSEEngine): result = self.cur_solver.check() if result == z3.sat: model = self.cur_solver.model() - self.handle_solution(model, possibility.value) + self.handle_solution(model, target_addr) self.cur_solver.pop() self.handle_correct_destination(cur_addr, cur_path_constraint) - diff --git a/miasm2/arch/aarch64/arch.py b/miasm2/arch/aarch64/arch.py index 94be74fd..8d4ab052 100644 --- a/miasm2/arch/aarch64/arch.py +++ b/miasm2/arch/aarch64/arch.py @@ -278,7 +278,7 @@ class aarch64_arg(m_arg): fixed_size.add(value.name.size) return value.name label = symbol_pool.getby_name_create(value.name) - return ExprId(label, size_hint) + return ExprLoc(label.loc_key, size_hint) if isinstance(value, AstInt): assert size_hint is not None return ExprInt(value.value, size_hint) @@ -311,44 +311,49 @@ class instruction_aarch64(instruction): super(instruction_aarch64, self).__init__(*args, **kargs) @staticmethod - def arg2str(e, pos=None): + def arg2str(expr, index=None, symbol_pool=None): wb = False - if isinstance(e, m2_expr.ExprId) or isinstance(e, m2_expr.ExprInt): - return str(e) - elif isinstance(e, m2_expr.ExprOp) and e.op in shift_expr: - op_str = shift_str[shift_expr.index(e.op)] - return "%s %s %s" % (e.args[0], op_str, e.args[1]) - elif isinstance(e, m2_expr.ExprOp) and e.op == "slice_at": - return "%s LSL %s" % (e.args[0], e.args[1]) - elif isinstance(e, m2_expr.ExprOp) and e.op in extend_lst: - op_str = e.op - return "%s %s %s" % (e.args[0], op_str, e.args[1]) - elif isinstance(e, m2_expr.ExprOp) and e.op == "postinc": - if e.args[1].arg != 0: - return "[%s], %s" % (e.args[0], e.args[1]) + if expr.is_id() or expr.is_int(): + return str(expr) + elif expr.is_label(): + if symbol_pool is not None: + return str(symbol_pool.loc_key_to_label(expr.loc_key)) else: - return "[%s]" % (e.args[0]) - elif isinstance(e, m2_expr.ExprOp) and e.op == "preinc_wb": - if e.args[1].arg != 0: - return "[%s, %s]!" % (e.args[0], e.args[1]) + return str(expr) + elif isinstance(expr, m2_expr.ExprOp) and expr.op in shift_expr: + op_str = shift_str[shift_expr.index(expr.op)] + return "%s %s %s" % (expr.args[0], op_str, expr.args[1]) + elif isinstance(expr, m2_expr.ExprOp) and expr.op == "slice_at": + return "%s LSL %s" % (expr.args[0], expr.args[1]) + elif isinstance(expr, m2_expr.ExprOp) and expr.op in extend_lst: + op_str = expr.op + return "%s %s %s" % (expr.args[0], op_str, expr.args[1]) + elif isinstance(expr, m2_expr.ExprOp) and expr.op == "postinc": + if expr.args[1].arg != 0: + return "[%s], %s" % (expr.args[0], expr.args[1]) else: - return "[%s]" % (e.args[0]) - elif isinstance(e, m2_expr.ExprOp) and e.op == "preinc": - if len(e.args) == 1: - return "[%s]" % (e.args[0]) - elif not isinstance(e.args[1], m2_expr.ExprInt) or e.args[1].arg != 0: - return "[%s, %s]" % (e.args[0], e.args[1]) + return "[%s]" % (expr.args[0]) + elif isinstance(expr, m2_expr.ExprOp) and expr.op == "preinc_wb": + if expr.args[1].arg != 0: + return "[%s, %s]!" % (expr.args[0], expr.args[1]) else: - return "[%s]" % (e.args[0]) - elif isinstance(e, m2_expr.ExprOp) and e.op == 'segm': - arg = e.args[1] + return "[%s]" % (expr.args[0]) + elif isinstance(expr, m2_expr.ExprOp) and expr.op == "preinc": + if len(expr.args) == 1: + return "[%s]" % (expr.args[0]) + elif not isinstance(expr.args[1], m2_expr.ExprInt) or expr.args[1].arg != 0: + return "[%s, %s]" % (expr.args[0], expr.args[1]) + else: + return "[%s]" % (expr.args[0]) + elif isinstance(expr, m2_expr.ExprOp) and expr.op == 'segm': + arg = expr.args[1] if isinstance(arg, m2_expr.ExprId): arg = str(arg) elif arg.op == 'LSL' and arg.args[1].arg == 0: arg = str(arg.args[0]) else: arg = "%s %s %s" % (arg.args[0], arg.op, arg.args[1]) - return '[%s, %s]' % (e.args[0], arg) + return '[%s, %s]' % (expr.args[0], arg) else: raise NotImplementedError("bad op") @@ -366,13 +371,12 @@ class instruction_aarch64(instruction): def dstflow2label(self, symbol_pool): index = self.mnemo_flow_to_dst_index(self.name) - e = self.args[index] - if not isinstance(e, m2_expr.ExprInt): + expr = self.args[index] + if not expr.is_int(): return - ad = e.arg + self.offset - l = symbol_pool.getby_offset_create(ad) - s = m2_expr.ExprId(l, e.size) - self.args[index] = s + addr = expr.arg + self.offset + label = symbol_pool.getby_offset_create(addr) + self.args[index] = m2_expr.ExprLoc(label.loc_key, expr.size) def breakflow(self): return self.name in BRCOND + ["BR", "BLR", "RET", "ERET", "DRPS", "B", "BL"] diff --git a/miasm2/arch/aarch64/sem.py b/miasm2/arch/aarch64/sem.py index 88b0d0a7..1a213b35 100644 --- a/miasm2/arch/aarch64/sem.py +++ b/miasm2/arch/aarch64/sem.py @@ -593,14 +593,14 @@ def udiv(arg1, arg2, arg3): @sbuild.parse def cbz(arg1, arg2): - dst = m2_expr.ExprId(ir.get_next_label(instr), 64) if arg1 else arg2 + dst = m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, 64) if arg1 else arg2 PC = dst ir.IRDst = dst @sbuild.parse def cbnz(arg1, arg2): - dst = arg2 if arg1 else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg2 if arg1 else m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, 64) PC = dst ir.IRDst = dst @@ -625,14 +625,14 @@ def tbnz(arg1, arg2, arg3): @sbuild.parse def b_ne(arg1): - dst = m2_expr.ExprId(ir.get_next_label(instr), 64) if zf else arg1 + dst = m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, 64) if zf else arg1 PC = dst ir.IRDst = dst @sbuild.parse def b_eq(arg1): - dst = arg1 if zf else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if zf else m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, 64) PC = dst ir.IRDst = dst @@ -640,7 +640,7 @@ def b_eq(arg1): @sbuild.parse def b_ge(arg1): cond = cond2expr['GE'] - dst = arg1 if cond else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if cond else m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, 64) PC = dst ir.IRDst = dst @@ -648,7 +648,7 @@ def b_ge(arg1): @sbuild.parse def b_gt(arg1): cond = cond2expr['GT'] - dst = arg1 if cond else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if cond else m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, 64) PC = dst ir.IRDst = dst @@ -656,7 +656,7 @@ def b_gt(arg1): @sbuild.parse def b_cc(arg1): cond = cond2expr['CC'] - dst = arg1 if cond else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if cond else m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, 64) PC = dst ir.IRDst = dst @@ -664,7 +664,7 @@ def b_cc(arg1): @sbuild.parse def b_cs(arg1): cond = cond2expr['CS'] - dst = arg1 if cond else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if cond else m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, 64) PC = dst ir.IRDst = dst @@ -672,7 +672,7 @@ def b_cs(arg1): @sbuild.parse def b_hi(arg1): cond = cond2expr['HI'] - dst = arg1 if cond else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if cond else m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, 64) PC = dst ir.IRDst = dst @@ -680,7 +680,7 @@ def b_hi(arg1): @sbuild.parse def b_le(arg1): cond = cond2expr['LE'] - dst = arg1 if cond else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if cond else m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, 64) PC = dst ir.IRDst = dst @@ -688,7 +688,7 @@ def b_le(arg1): @sbuild.parse def b_ls(arg1): cond = cond2expr['LS'] - dst = arg1 if cond else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if cond else m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, 64) PC = dst ir.IRDst = dst @@ -696,7 +696,7 @@ def b_ls(arg1): @sbuild.parse def b_lt(arg1): cond = cond2expr['LT'] - dst = arg1 if cond else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if cond else m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, 64) PC = dst ir.IRDst = dst @@ -732,7 +732,7 @@ def br(arg1): def blr(arg1): PC = arg1 ir.IRDst = arg1 - LR = m2_expr.ExprId(ir.get_next_label(instr), 64) + LR = m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, 64) @sbuild.parse def nop(): diff --git a/miasm2/arch/arm/arch.py b/miasm2/arch/arm/arch.py index 35574a84..204bf1b0 100644 --- a/miasm2/arch/arm/arch.py +++ b/miasm2/arch/arm/arch.py @@ -343,62 +343,67 @@ class instruction_arm(instruction): super(instruction_arm, self).__init__(*args, **kargs) @staticmethod - def arg2str(e, pos = None): + def arg2str(expr, index=None, symbol_pool=None): wb = False - if isinstance(e, ExprId) or isinstance(e, ExprInt): - return str(e) - if isinstance(e, ExprOp) and e.op in expr2shift_dct: - if len(e.args) == 1: - return '%s %s' % (e.args[0], expr2shift_dct[e.op]) - elif len(e.args) == 2: - return '%s %s %s' % (e.args[0], expr2shift_dct[e.op], e.args[1]) + if expr.is_id() or expr.is_int(): + return str(expr) + elif expr.is_label(): + if symbol_pool is not None: + return str(symbol_pool.loc_key_to_label(expr.loc_key)) + else: + return str(expr) + if isinstance(expr, ExprOp) and expr.op in expr2shift_dct: + if len(expr.args) == 1: + return '%s %s' % (expr.args[0], expr2shift_dct[expr.op]) + elif len(expr.args) == 2: + return '%s %s %s' % (expr.args[0], expr2shift_dct[expr.op], expr.args[1]) else: raise NotImplementedError('zarb arg2str') sb = False - if isinstance(e, ExprOp) and e.op == "sbit": + if isinstance(expr, ExprOp) and expr.op == "sbit": sb = True - e = e.args[0] - if isinstance(e, ExprOp) and e.op == "reglist": - o = [gpregs.expr.index(x) for x in e.args] + expr = expr.args[0] + if isinstance(expr, ExprOp) and expr.op == "reglist": + o = [gpregs.expr.index(x) for x in expr.args] out = reglist2str(o) if sb: out += "^" return out - if isinstance(e, ExprOp) and e.op == 'wback': + if isinstance(expr, ExprOp) and expr.op == 'wback': wb = True - e = e.args[0] - if isinstance(e, ExprId): - out = str(e) + expr = expr.args[0] + if isinstance(expr, ExprId): + out = str(expr) if wb: out += "!" return out - if not isinstance(e, ExprMem): - return str(e) + if not isinstance(expr, ExprMem): + return str(expr) - e = e.arg - if isinstance(e, ExprOp) and e.op == 'wback': + expr = expr.arg + if isinstance(expr, ExprOp) and expr.op == 'wback': wb = True - e = e.args[0] + expr = expr.args[0] - if isinstance(e, ExprId): - r, s = e, None - elif len(e.args) == 1 and isinstance(e.args[0], ExprId): - r, s = e.args[0], None - elif isinstance(e.args[0], ExprId): - r, s = e.args[0], e.args[1] + if isinstance(expr, ExprId): + r, s = expr, None + elif len(expr.args) == 1 and isinstance(expr.args[0], ExprId): + r, s = expr.args[0], None + elif isinstance(expr.args[0], ExprId): + r, s = expr.args[0], expr.args[1] else: - r, s = e.args[0].args + r, s = expr.args[0].args if isinstance(s, ExprOp) and s.op in expr2shift_dct: s = ' '.join([str(x) for x in s.args[0], expr2shift_dct[s.op], s.args[1]]) - if isinstance(e, ExprOp) and e.op == 'postinc': + if isinstance(expr, ExprOp) and expr.op == 'postinc': o = '[%s]' % r if s and not (isinstance(s, ExprInt) and s.arg == 0): o += ', %s' % s @@ -418,16 +423,15 @@ class instruction_arm(instruction): return self.name in conditional_branch + unconditional_branch def dstflow2label(self, symbol_pool): - e = self.args[0] - if not isinstance(e, ExprInt): + expr = self.args[0] + if not isinstance(expr, ExprInt): return if self.name == 'BLX': - ad = e.arg + self.offset + addr = expr.arg + self.offset else: - ad = e.arg + self.offset - l = symbol_pool.getby_offset_create(ad) - s = ExprId(l, e.size) - self.args[0] = s + addr = expr.arg + self.offset + label = symbol_pool.getby_offset_create(addr) + self.args[0] = ExprLoc(label.loc_key, expr.size) def breakflow(self): if self.name in conditional_branch + unconditional_branch: @@ -492,27 +496,29 @@ class instruction_armt(instruction_arm): def dstflow2label(self, symbol_pool): if self.name in ["CBZ", "CBNZ"]: - e = self.args[1] + expr = self.args[1] else: - e = self.args[0] - if not isinstance(e, ExprInt): + expr = self.args[0] + if not isinstance(expr, ExprInt): return if self.name == 'BLX': - ad = e.arg + (self.offset & 0xfffffffc) + addr = expr.arg + (self.offset & 0xfffffffc) elif self.name == 'BL': - ad = e.arg + self.offset + addr = expr.arg + self.offset elif self.name.startswith('BP'): - ad = e.arg + self.offset + addr = expr.arg + self.offset elif self.name.startswith('CB'): - ad = e.arg + self.offset + self.l + 2 + addr = expr.arg + self.offset + self.l + 2 else: - ad = e.arg + self.offset - l = symbol_pool.getby_offset_create(ad) - s = ExprId(l, e.size) + addr = expr.arg + self.offset + + label = symbol_pool.getby_offset_create(addr) + dst = ExprLoc(label.loc_key, expr.size) + if self.name in ["CBZ", "CBNZ"]: - self.args[1] = s + self.args[1] = dst else: - self.args[0] = s + self.args[0] = dst def breakflow(self): if self.name in conditional_branch + unconditional_branch +["CBZ", "CBNZ", 'TBB', 'TBH']: @@ -775,7 +781,7 @@ class arm_arg(m_arg): if arg.name in gpregs.str: return None label = symbol_pool.getby_name_create(arg.name) - return ExprId(label, 32) + return ExprLoc(label.loc_key, 32) if isinstance(arg, AstOp): args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in arg.args] if None in args: diff --git a/miasm2/arch/arm/jit.py b/miasm2/arch/arm/jit.py index 1a37b7f1..b92e2c32 100644 --- a/miasm2/arch/arm/jit.py +++ b/miasm2/arch/arm/jit.py @@ -7,6 +7,7 @@ from miasm2.arch.arm.sem import ir_armb, ir_arml, ir_armtl, ir_armtb, cond_dct_i from miasm2.jitter.codegen import CGen from miasm2.expression.expression import ExprId, ExprAff, ExprCond from miasm2.ir.ir import IRBlock, AssignBlock +from miasm2.ir.translators.C import TranslatorC log = logging.getLogger('jit_arm') hnd = logging.StreamHandler() @@ -17,11 +18,6 @@ log.setLevel(logging.CRITICAL) class arm_CGen(CGen): - def __init__(self, ir_arch): - self.ir_arch = ir_arch - self.PC = self.ir_arch.arch.regs.PC - self.init_arch_C() - def block2assignblks(self, block): """ diff --git a/miasm2/arch/arm/sem.py b/miasm2/arch/arm/sem.py index 2cf2e5a4..0b67dd2a 100644 --- a/miasm2/arch/arm/sem.py +++ b/miasm2/arch/arm/sem.py @@ -450,7 +450,7 @@ def sdiv(ir, instr, a, b, c=None): do_except = [] do_except.append(ExprAff(exception_flags, ExprInt(EXCEPT_DIV_BY_ZERO, exception_flags.size))) do_except.append(ExprAff(ir.IRDst, lbl_next)) - blk_except = IRBlock(lbl_except.name, [AssignBlock(do_except, instr)]) + blk_except = IRBlock(lbl_except.name.loc_key, [AssignBlock(do_except, instr)]) @@ -462,7 +462,7 @@ def sdiv(ir, instr, a, b, c=None): do_div.append(ExprAff(ir.IRDst, r)) do_div.append(ExprAff(ir.IRDst, lbl_next)) - blk_div = IRBlock(lbl_div.name, [AssignBlock(do_div, instr)]) + blk_div = IRBlock(lbl_div.name.loc_key, [AssignBlock(do_div, instr)]) return e, [blk_div, blk_except] @@ -483,7 +483,7 @@ def udiv(ir, instr, a, b, c=None): do_except = [] do_except.append(ExprAff(exception_flags, ExprInt(EXCEPT_DIV_BY_ZERO, exception_flags.size))) do_except.append(ExprAff(ir.IRDst, lbl_next)) - blk_except = IRBlock(lbl_except.name, [AssignBlock(do_except, instr)]) + blk_except = IRBlock(lbl_except.name.loc_key, [AssignBlock(do_except, instr)]) r = ExprOp("udiv", b, c) @@ -494,7 +494,7 @@ def udiv(ir, instr, a, b, c=None): do_div.append(ExprAff(ir.IRDst, r)) do_div.append(ExprAff(ir.IRDst, lbl_next)) - blk_div = IRBlock(lbl_div.name, [AssignBlock(do_div, instr)]) + blk_div = IRBlock(lbl_div.name.loc_key, [AssignBlock(do_div, instr)]) return e, [blk_div, blk_except] @@ -932,19 +932,20 @@ def pop(ir, instr, a): def cbz(ir, instr, a, b): e = [] - lbl_next = ExprId(ir.get_next_label(instr), 32) - e.append(ExprAff(ir.IRDst, ExprCond(a, lbl_next, b))) + lbl_next = ir.get_next_label(instr) + lbl_next_expr = ExprLoc(lbl_next.loc_key, 32) + e.append(ExprAff(ir.IRDst, ExprCond(a, lbl_next_expr, b))) return e, [] def cbnz(ir, instr, a, b): e = [] - lbl_next = ExprId(ir.get_next_label(instr), 32) - e.append(ExprAff(ir.IRDst, ExprCond(a, b, lbl_next))) + lbl_next = ir.get_next_label(instr) + lbl_next_expr = ExprLoc(lbl_next.loc_key, 32) + e.append(ir.IRDst, ExprCond(a, b, lbl_next_expr)) return e, [] - def uxtb(ir, instr, a, b): e = [] r = b[:8].zeroExtend(32) @@ -1264,10 +1265,14 @@ def add_condition_expr(ir, instr, cond, instr_ir, extra_ir): raise ValueError('unknown condition %r' % cond) cond = tab_cond[cond] - lbl_next = ExprId(ir.get_next_label(instr), 32) - lbl_do = ExprId(ir.gen_label(), 32) - dst_cond = ExprCond(cond, lbl_do, lbl_next) + + lbl_next = ir.get_next_label(instr) + lbl_next_expr = ExprLoc(lbl_next.loc_key, 32) + lbl_do = ir.gen_label() + lbl_do_expr = ExprLoc(lbl_do.loc_key, 32) + + dst_cond = ExprCond(cond, lbl_do_expr, lbl_next_expr) assert(isinstance(instr_ir, list)) has_irdst = False @@ -1276,8 +1281,8 @@ def add_condition_expr(ir, instr, cond, instr_ir, extra_ir): has_irdst = True break if not has_irdst: - instr_ir.append(ExprAff(ir.IRDst, lbl_next)) - e_do = IRBlock(lbl_do.name, [AssignBlock(instr_ir, instr)]) + instr_ir.append(ExprAff(ir.IRDst, lbl_next_expr)) + e_do = IRBlock(lbl_do.loc_key, [AssignBlock(instr_ir, instr)]) e = [ExprAff(ir.IRDst, dst_cond)] return e, [e_do] + extra_ir @@ -1543,7 +1548,7 @@ class ir_arml(IntermediateRepresentation): dst = ExprAff(self.IRDst, ExprId(label_next, 32)) dst_blk = AssignBlock([dst], instr) assignments.append(dst_blk) - irblock = IRBlock(label, assignments) + irblock = IRBlock(label.loc_key, assignments) ir_blocks_all.append([irblock]) label = label_next @@ -1564,7 +1569,7 @@ class ir_arml(IntermediateRepresentation): dst = ExprAff(self.IRDst, ExprCond(local_cond, ExprId(label_do, 32), ExprId(label_next, 32))) dst_blk = AssignBlock([dst], instr) assignments.append(dst_blk) - irblock = IRBlock(label, assignments) + irblock = IRBlock(label.loc_key, assignments) irblocks.append(irblock) @@ -1578,7 +1583,7 @@ class ir_arml(IntermediateRepresentation): dst = ExprAff(self.IRDst, ExprId(label_next, 32)) dst_blk = AssignBlock([dst], instr) assignments.append(dst_blk) - irblock = IRBlock(label, assignments) + irblock = IRBlock(label.loc_key, assignments) irblocks.append(irblock) label = label_next assignments = [] @@ -1594,7 +1599,8 @@ class ir_arml(IntermediateRepresentation): it_hints = None it_cond = None - label = None + label = block.label + assignments = [] ir_blocks_all = [] index = -1 while index + 1 < len(block.lines): @@ -1613,11 +1619,11 @@ class ir_arml(IntermediateRepresentation): split = self.add_instr_to_irblock(block, instr, assignments, ir_blocks_all, gen_pc_updt) if split: - ir_blocks_all.append(IRBlock(label, assignments)) + ir_blocks_all.append(IRBlock(label.loc_key, assignments)) label = None assignments = [] if label is not None: - ir_blocks_all.append(IRBlock(label, assignments)) + ir_blocks_all.append(IRBlock(label.loc_key, assignments)) new_ir_blocks_all = self.post_add_block(block, ir_blocks_all) for irblock in new_ir_blocks_all: diff --git a/miasm2/arch/mips32/arch.py b/miasm2/arch/mips32/arch.py index 15c59cf0..d1b0a8eb 100644 --- a/miasm2/arch/mips32/arch.py +++ b/miasm2/arch/mips32/arch.py @@ -5,7 +5,7 @@ from collections import defaultdict from pyparsing import Literal, Group, Optional -from miasm2.expression.expression import ExprMem, ExprInt, ExprId, ExprOp +from miasm2.expression.expression import ExprMem, ExprInt, ExprId, ExprOp, ExprLoc from miasm2.core.bin_stream import bin_stream import miasm2.arch.mips32.regs as regs import miasm2.core.cpu as cpu @@ -60,11 +60,16 @@ class instruction_mips32(cpu.instruction): @staticmethod - def arg2str(e, pos = None): - if isinstance(e, ExprId) or isinstance(e, ExprInt): - return str(e) - assert(isinstance(e, ExprMem)) - arg = e.arg + def arg2str(expr, index=None, symbol_pool=None): + if expr.is_id() or expr.is_int(): + return str(expr) + elif expr.is_label(): + if symbol_pool is not None: + return str(symbol_pool.loc_key_to_label(expr.loc_key)) + else: + return str(expr) + assert(isinstance(expr, ExprMem)) + arg = expr.arg if isinstance(arg, ExprId): return "(%s)"%arg assert(len(arg.args) == 2 and arg.op == '+') @@ -90,21 +95,20 @@ class instruction_mips32(cpu.instruction): def dstflow2label(self, symbol_pool): if self.name in ["J", 'JAL']: - e = self.args[0].arg - ad = (self.offset & (0xFFFFFFFF ^ ((1<< 28)-1))) + e - l = symbol_pool.getby_offset_create(ad) - self.args[0] = ExprId(l, e.size) + expr = self.args[0].arg + addr = (self.offset & (0xFFFFFFFF ^ ((1<< 28)-1))) + expr + label = symbol_pool.getby_offset_create(addr) + self.args[0] = ExprLoc(label.loc_key, expr.size) return ndx = self.get_dst_num() - e = self.args[ndx] + expr = self.args[ndx] - if not isinstance(e, ExprInt): + if not isinstance(expr, ExprInt): return - ad = e.arg + self.offset - l = symbol_pool.getby_offset_create(ad) - s = ExprId(l, e.size) - self.args[ndx] = s + addr = expr.arg + self.offset + label = symbol_pool.getby_offset_create(addr) + self.args[ndx] = ExprLoc(label.loc_key, expr.size) def breakflow(self): if self.name == 'BREAK': @@ -262,7 +266,7 @@ class mips32_arg(cpu.m_arg): if arg.name in gpregs.str: return None label = symbol_pool.getby_name_create(arg.name) - return ExprId(label, 32) + return ExprLoc(label.loc_key, 32) if isinstance(arg, AstOp): args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in arg.args] if None in args: @@ -403,9 +407,9 @@ class mips32_dreg_imm(mips32_arg): return True @staticmethod - def arg2str(e): - assert(isinstance(e, ExprMem)) - arg = e.arg + def arg2str(expr, index=None): + assert(isinstance(expr, ExprMem)) + arg = expr.arg if isinstance(arg, ExprId): return "(%s)"%arg assert(len(arg.args) == 2 and arg.op == '+') diff --git a/miasm2/arch/mips32/ira.py b/miasm2/arch/mips32/ira.py index 7aefad32..b17ddbd2 100644 --- a/miasm2/arch/mips32/ira.py +++ b/miasm2/arch/mips32/ira.py @@ -4,7 +4,6 @@ from miasm2.expression.expression import ExprAff, ExprInt, ExprId from miasm2.ir.ir import IntermediateRepresentation, IRBlock, AssignBlock from miasm2.ir.analysis import ira from miasm2.arch.mips32.sem import ir_mips32l, ir_mips32b -from miasm2.core.asmblock import expr_is_int_or_label, expr_is_label class ir_a_mips32l(ir_mips32l, ira): def __init__(self, symbol_pool=None): @@ -28,14 +27,15 @@ class ir_a_mips32l(ir_mips32l, ira): if pc_val is None or lr_val is None: new_irblocks.append(irb) continue - if not expr_is_int_or_label(lr_val): - new_irblocks.append(irb) + if lr_val.is_label(): + label = self.symbol_pool.loc_key_to_label(lr_valloc_key) + if label.offset is not None: + lr_val = ExprInt(label.offset, 32) + if not lr_val.is_int(): continue - if expr_is_label(lr_val): - lr_val = ExprInt(lr_val.name.offset, 32) instr = block.lines[-2] - if lr_val.arg != instr.offset + 8: + if int(lr_val) != instr.offset + 8: raise ValueError("Wrong arg") # CALL diff --git a/miasm2/arch/mips32/jit.py b/miasm2/arch/mips32/jit.py index 16d88067..b3cfecbc 100644 --- a/miasm2/arch/mips32/jit.py +++ b/miasm2/arch/mips32/jit.py @@ -57,8 +57,8 @@ class mipsCGen(CGen): self.ir_arch.pc] assignments[self.delay_slot_set] = m2_expr.ExprInt(1, 32) # Replace IRDst with next instruction - assignments[self.ir_arch.IRDst] = m2_expr.ExprId( - self.ir_arch.get_next_instr(assignblock.instr), 32) + dst = self.ir_arch.get_next_instr(assignblock.instr) + assignments[self.ir_arch.IRDst] = m2_expr.ExprLoc(dst.loc_key, 32) irs.append(AssignBlock(assignments, assignblock.instr)) irblocks[blk_idx] = IRBlock(irblock.label, irs) diff --git a/miasm2/arch/mips32/sem.py b/miasm2/arch/mips32/sem.py index 99c81a33..bb0f812d 100644 --- a/miasm2/arch/mips32/sem.py +++ b/miasm2/arch/mips32/sem.py @@ -35,7 +35,7 @@ def jal(arg1): "Jumps to the calculated address @arg1 and stores the return address in $RA" PC = arg1 ir.IRDst = arg1 - RA = ExprId(ir.get_next_break_label(instr), 32) + RA = ExprLoc(ir.get_next_break_label(instr).loc_key, RA.size) @sbuild.parse def jalr(arg1, arg2): @@ -43,13 +43,13 @@ def jalr(arg1, arg2): address in another register @arg2""" PC = arg1 ir.IRDst = arg1 - arg2 = ExprId(ir.get_next_break_label(instr), 32) + arg2 = ExprLoc(ir.get_next_break_label(instr).loc_key, arg2.size) @sbuild.parse def bal(arg1): PC = arg1 ir.IRDst = arg1 - RA = ExprId(ir.get_next_break_label(instr), 32) + RA = ExprLoc(ir.get_next_break_label(instr).loc_key, RA.size) @sbuild.parse def l_b(arg1): @@ -76,7 +76,7 @@ def lb(arg1, arg2): @sbuild.parse def beq(arg1, arg2, arg3): "Branches on @arg3 if the quantities of two registers @arg1, @arg2 are eq" - dst = ExprId(ir.get_next_break_label(instr), 32) if arg1 - arg2 else arg3 + dst = ExprLoc(ir.get_next_break_label(instr).loc_key, ir.IRDst.size) if arg1 - arg2 else arg3 PC = dst ir.IRDst = dst @@ -84,7 +84,7 @@ def beq(arg1, arg2, arg3): def bgez(arg1, arg2): """Branches on @arg2 if the quantities of register @arg1 is greater than or equal to zero""" - dst = ExprId(ir.get_next_break_label(instr), 32) if arg1.msb() else arg2 + dst = ExprLoc(ir.get_next_break_label(instr).loc_key, ir.IRDst.size) if arg1.msb() else arg2 PC = dst ir.IRDst = dst @@ -92,7 +92,7 @@ def bgez(arg1, arg2): def bne(arg1, arg2, arg3): """Branches on @arg3 if the quantities of two registers @arg1, @arg2 are NOT equal""" - dst = arg3 if arg1 - arg2 else ExprId(ir.get_next_break_label(instr), 32) + dst = arg3 if arg1 - arg2 else ExprLoc(ir.get_next_break_label(instr).loc_key, ir.IRDst.size) PC = dst ir.IRDst = dst @@ -230,7 +230,7 @@ def seh(arg1, arg2): @sbuild.parse def bltz(arg1, arg2): """Branches on @arg2 if the register @arg1 is less than zero""" - dst_o = arg2 if arg1.msb() else ExprId(ir.get_next_break_label(instr), 32) + dst_o = arg2 if arg1.msb() else ExprLoc(ir.get_next_break_label(instr).loc_key, ir.IRDst.size) PC = dst_o ir.IRDst = dst_o @@ -238,7 +238,7 @@ def bltz(arg1, arg2): def blez(arg1, arg2): """Branches on @arg2 if the register @arg1 is less than or equal to zero""" cond = (i1(1) if arg1 else i1(0)) | arg1.msb() - dst_o = arg2 if cond else ExprId(ir.get_next_break_label(instr), 32) + dst_o = arg2 if cond else ExprLoc(ir.get_next_break_label(instr).loc_key, ir.IRDst.size) PC = dst_o ir.IRDst = dst_o @@ -246,7 +246,7 @@ def blez(arg1, arg2): def bgtz(arg1, arg2): """Branches on @arg2 if the register @arg1 is greater than zero""" cond = (i1(1) if arg1 else i1(0)) | arg1.msb() - dst_o = ExprId(ir.get_next_break_label(instr), 32) if cond else arg2 + dst_o = ExprLoc(ir.get_next_break_label(instr).loc_key, ir.IRDst.size) if cond else arg2 PC = dst_o ir.IRDst = dst_o @@ -346,13 +346,13 @@ def c_le_d(arg1, arg2, arg3): @sbuild.parse def bc1t(arg1, arg2): - dst_o = arg2 if arg1 else ExprId(ir.get_next_break_label(instr), 32) + dst_o = arg2 if arg1 else ExprLoc(ir.get_next_break_label(instr).loc_key, ir.IRDst.size) PC = dst_o ir.IRDst = dst_o @sbuild.parse def bc1f(arg1, arg2): - dst_o = ExprId(ir.get_next_break_label(instr), 32) if arg1 else arg2 + dst_o = ExprLoc(ir.get_next_break_label(instr).loc_key, ir.IRDst.size) if arg1 else arg2 PC = dst_o ir.IRDst = dst_o @@ -423,7 +423,7 @@ def teq(ir, instr, arg1, arg2): do_except.append(m2_expr.ExprAff(exception_flags, m2_expr.ExprInt( EXCEPT_DIV_BY_ZERO, exception_flags.size))) do_except.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) - blk_except = IRBlock(lbl_except, [AssignBlock(do_except, instr)]) + blk_except = IRBlock(lbl_except.index, [AssignBlock(do_except, instr)]) cond = arg1 - arg2 diff --git a/miasm2/arch/msp430/arch.py b/miasm2/arch/msp430/arch.py index e4d03edb..3248a4bc 100644 --- a/miasm2/arch/msp430/arch.py +++ b/miasm2/arch/msp430/arch.py @@ -70,7 +70,7 @@ class msp430_arg(m_arg): reg = gpregs.expr[index] return reg label = symbol_pool.getby_name_create(value.name) - return ExprId(label, 16) + return ExprLoc(label.loc_key, 16) if isinstance(value, AstOp): args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in value.args] if None in args: @@ -102,40 +102,44 @@ class instruction_msp430(instruction): return self.name in ['call'] @staticmethod - def arg2str(e, pos = None): - if isinstance(e, ExprId): - o = str(e) - elif isinstance(e, ExprInt): - o = str(e) - elif isinstance(e, ExprOp) and e.op == "autoinc": - o = "@%s+" % str(e.args[0]) - elif isinstance(e, ExprMem): - if isinstance(e.arg, ExprId): - if pos == 0: - o = "@%s" % e.arg + def arg2str(expr, index=None, symbol_pool=None): + if isinstance(expr, ExprId): + o = str(expr) + elif isinstance(expr, ExprInt): + o = str(expr) + elif expr.is_label(): + if symbol_pool is not None: + return str(symbol_pool.loc_key_to_label(expr.loc_key)) + else: + return str(expr) + elif isinstance(expr, ExprOp) and expr.op == "autoinc": + o = "@%s+" % str(expr.args[0]) + elif isinstance(expr, ExprMem): + if isinstance(expr.arg, ExprId): + if index == 0: + o = "@%s" % expr.arg else: - o = "0x0(%s)" % e.arg - elif isinstance(e.arg, ExprInt): - o = "@%s" % e.arg - elif isinstance(e.arg, ExprOp): - o = "%s(%s)" % (e.arg.args[1], e.arg.args[0]) + o = "0x0(%s)" % expr.arg + elif isinstance(expr.arg, ExprInt): + o = "@%s" % expr.arg + elif isinstance(expr.arg, ExprOp): + o = "%s(%s)" % (expr.arg.args[1], expr.arg.args[0]) else: - raise NotImplementedError('unknown instance e = %s' % type(e)) + raise NotImplementedError('unknown instance expr = %s' % type(expr)) return o def dstflow2label(self, symbol_pool): - e = self.args[0] - if not isinstance(e, ExprInt): + expr = self.args[0] + if not isinstance(expr, ExprInt): return if self.name == "call": - ad = e.arg + addr = expr.arg else: - ad = e.arg + int(self.offset) + addr = expr.arg + int(self.offset) - l = symbol_pool.getby_offset_create(ad) - s = ExprId(l, e.size) - self.args[0] = s + label = symbol_pool.getby_offset_create(addr) + self.args[0] = ExprLoc(label.loc_key, expr.size) def breakflow(self): if self.name in conditional_branch + unconditional_branch: diff --git a/miasm2/arch/msp430/sem.py b/miasm2/arch/msp430/sem.py index dd24abb1..42f6474e 100644 --- a/miasm2/arch/msp430/sem.py +++ b/miasm2/arch/msp430/sem.py @@ -238,8 +238,11 @@ def push_w(ir, instr, a): def call(ir, instr, a): e, a, dummy = mng_autoinc(a, None, 16) - n = ExprId(ir.get_next_label(instr), 16) - e.append(ExprAff(ExprMem(SP - ExprInt(2, 16), 16), n)) + + lbl_next = ir.get_next_label(instr) + lbl_next_expr = ExprLoc(lbl_next.loc_key, 16) + + e.append(ExprAff(ExprMem(SP - ExprInt(2, 16), 16), lbl_next_expr)) e.append(ExprAff(SP, SP - ExprInt(2, 16))) e.append(ExprAff(PC, a)) e.append(ExprAff(ir.IRDst, a)) @@ -272,50 +275,56 @@ def cmp_b(ir, instr, a, b): def jz(ir, instr, a): - n = ExprId(ir.get_next_label(instr), 16) + lbl_next = ir.get_next_label(instr) + lbl_next_expr = ExprLoc(lbl_next.loc_key, 16) e = [] - e.append(ExprAff(PC, ExprCond(zf, a, n))) - e.append(ExprAff(ir.IRDst, ExprCond(zf, a, n))) + e.append(ExprAff(PC, ExprCond(zf, a, lbl_next_expr))) + e.append(ExprAff(ir.IRDst, ExprCond(zf, a, lbl_next_expr))) return e, [] def jnz(ir, instr, a): - n = ExprId(ir.get_next_label(instr), 16) + lbl_next = ir.get_next_label(instr) + lbl_next_expr = ExprLoc(lbl_next.loc_key, 16) e = [] - e.append(ExprAff(PC, ExprCond(zf, n, a))) - e.append(ExprAff(ir.IRDst, ExprCond(zf, n, a))) + e.append(ExprAff(PC, ExprCond(zf, lbl_next_expr, a))) + e.append(ExprAff(ir.IRDst, ExprCond(zf, lbl_next_expr, a))) return e, [] def jl(ir, instr, a): - n = ExprId(ir.get_next_label(instr), 16) + lbl_next = ir.get_next_label(instr) + lbl_next_expr = ExprLoc(lbl_next.loc_key, 16) e = [] - e.append(ExprAff(PC, ExprCond(nf ^ of, a, n))) - e.append(ExprAff(ir.IRDst, ExprCond(nf ^ of, a, n))) + e.append(ExprAff(PC, ExprCond(nf ^ of, a, lbl_next_expr))) + e.append(ExprAff(ir.IRDst, ExprCond(nf ^ of, a, lbl_next_expr))) return e, [] def jc(ir, instr, a): - n = ExprId(ir.get_next_label(instr), 16) + lbl_next = ir.get_next_label(instr) + lbl_next_expr = ExprLoc(lbl_next.loc_key, 16) e = [] - e.append(ExprAff(PC, ExprCond(cf, a, n))) - e.append(ExprAff(ir.IRDst, ExprCond(cf, a, n))) + e.append(ExprAff(PC, ExprCond(cf, a, lbl_next_expr))) + e.append(ExprAff(ir.IRDst, ExprCond(cf, a, lbl_next_expr))) return e, [] def jnc(ir, instr, a): - n = ExprId(ir.get_next_label(instr), 16) + lbl_next = ir.get_next_label(instr) + lbl_next_expr = ExprLoc(lbl_next.loc_key, 16) e = [] - e.append(ExprAff(PC, ExprCond(cf, n, a))) - e.append(ExprAff(ir.IRDst, ExprCond(cf, n, a))) + e.append(ExprAff(PC, ExprCond(cf, lbl_next_expr, a))) + e.append(ExprAff(ir.IRDst, ExprCond(cf, lbl_next_expr, a))) return e, [] def jge(ir, instr, a): - n = ExprId(ir.get_next_label(instr), 16) + lbl_next = ir.get_next_label(instr) + lbl_next_expr = ExprLoc(lbl_next.loc_key, 16) e = [] - e.append(ExprAff(PC, ExprCond(nf ^ of, n, a))) - e.append(ExprAff(ir.IRDst, ExprCond(nf ^ of, n, a))) + e.append(ExprAff(PC, ExprCond(nf ^ of, lbl_next_expr, a))) + e.append(ExprAff(ir.IRDst, ExprCond(nf ^ of, lbl_next_expr, a))) return e, [] diff --git a/miasm2/arch/ppc/arch.py b/miasm2/arch/ppc/arch.py index 945824a0..429fd22d 100644 --- a/miasm2/arch/ppc/arch.py +++ b/miasm2/arch/ppc/arch.py @@ -42,7 +42,7 @@ class ppc_arg(m_arg): if arg.name in gpregs.str: return None label = symbol_pool.getby_name_create(arg.name) - return ExprId(label, 32) + return ExprLoc(label.loc_key, 32) if isinstance(arg, AstOp): args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in arg.args] if None in args: @@ -74,7 +74,7 @@ class instruction_ppc(instruction): super(instruction_ppc, self).__init__(*args, **kargs) @staticmethod - def arg2str(e, pos = None): + def arg2str(e, pos = None, symbol_pool=None): if isinstance(e, ExprId) or isinstance(e, ExprInt): return str(e) elif isinstance(e, ExprMem): @@ -132,8 +132,8 @@ class instruction_ppc(instruction): ad = e.arg + self.offset else: ad = e.arg - l = symbol_pool.getby_offset_create(ad) - s = ExprId(l, e.size) + label = symbol_pool.getby_offset_create(ad) + s = ExprLoc(label.loc_key, e.size) self.args[address_index] = s def breakflow(self): diff --git a/miasm2/arch/ppc/sem.py b/miasm2/arch/ppc/sem.py index 741ae24b..775e24d3 100644 --- a/miasm2/arch/ppc/sem.py +++ b/miasm2/arch/ppc/sem.py @@ -690,7 +690,8 @@ def mn_b(ir, instr, arg1, arg2 = None): def mn_bl(ir, instr, arg1, arg2 = None): if arg2 is not None: arg1 = arg2 - return [ ExprAff(LR, ExprId(ir.get_next_instr(instr), 32)), + dst = ir.get_next_instr(instr) + return [ ExprAff(LR, ExprLoc(dst.loc_key, 32)), ExprAff(PC, arg1), ExprAff(ir.IRDst, arg1) ], [] @@ -726,13 +727,15 @@ def mn_do_cond_branch(ir, instr, dest): condition = condition & cond_cond else: condition = cond_cond + dst = ir.get_next_instr(instr) dest_expr = ExprCond(condition, dest, - ExprId(ir.get_next_instr(instr), 32)) + ExprLoc(dst.loc_key, 32)) else: dest_expr = dest if instr.name[-1] == 'L' or instr.name[-2:-1] == 'LA': - ret.append(ExprAff(LR, ExprId(ir.get_next_instr(instr), 32))) + dst = ir.get_next_instr(instr) + ret.append(ExprAff(LR, ExprLoc(dst.loc_key, 32))) ret.append(ExprAff(PC, dest_expr)) ret.append(ExprAff(ir.IRDst, dest_expr)) diff --git a/miasm2/arch/sh4/arch.py b/miasm2/arch/sh4/arch.py index 14f46265..dd25cb90 100644 --- a/miasm2/arch/sh4/arch.py +++ b/miasm2/arch/sh4/arch.py @@ -103,7 +103,7 @@ class sh4_arg(m_arg): if arg.name in gpregs.str: return None label = symbol_pool.getby_name_create(arg.name) - return ExprId(label, 32) + return ExprLoc(label.loc_key, 32) if isinstance(arg, AstOp): args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in arg.args] if None in args: @@ -406,24 +406,29 @@ class instruction_sh4(instruction): return self.name.startswith('J') @staticmethod - def arg2str(e, pos = None): - if isinstance(e, ExprId) or isinstance(e, ExprInt): - return str(e) - assert(isinstance(e, ExprMem)) - e = e.arg - - if isinstance(e, ExprOp): - if e.op == "predec": - s = '-%s' % e.args[0] - elif e.op == "postinc": - s = '%s+' % e.args[0] + def arg2str(expr, index=None, symbol_pool=None): + if isinstance(expr, ExprId) or isinstance(expr, ExprInt): + return str(expr) + elif expr.is_label(): + if symbol_pool is not None: + return str(symbol_pool.loc_key_to_label(expr.loc_key)) + else: + return str(expr) + assert(isinstance(expr, ExprMem)) + expr = expr.arg + + if isinstance(expr, ExprOp): + if expr.op == "predec": + s = '-%s' % expr.args[0] + elif expr.op == "postinc": + s = '%s+' % expr.args[0] else: s = ','.join([str(x).replace('(', '').replace(')', '') - for x in e.args]) + for x in expr.args]) s = "(%s)"%s s = "@%s" % s - elif isinstance(e, ExprId): - s = "@%s" % e + elif isinstance(expr, ExprId): + s = "@%s" % expr else: raise NotImplementedError('zarb arg2str') return s diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py index fc3a5882..4a044d6a 100644 --- a/miasm2/arch/x86/arch.py +++ b/miasm2/arch/x86/arch.py @@ -274,7 +274,7 @@ class x86_arg(m_arg): return None label = symbol_pool.getby_name_create(value.name) - return ExprId(label, size_hint) + return ExprLoc(label.loc_key, size_hint) if isinstance(value, AstOp): # First pass to retreive fixed_size if value.op == "segm": @@ -474,16 +474,11 @@ class instruction_x86(instruction): if self.additional_info.g1.value & 6 and self.name in repeat_mn: return expr = self.args[0] - if isinstance(expr, ExprId): - if not isinstance(expr.name, AsmLabel) and expr not in all_regs_ids: - raise ValueError("ExprId must be a label or a register") - elif isinstance(expr, ExprInt): - ad = expr.arg + int(self.offset) - l = symbol_pool.getby_offset_create(ad) - s = ExprId(l, expr.size) - self.args[0] = s - else: + if not expr.is_int(): return + addr = expr.arg + int(self.offset) + label = symbol_pool.getby_offset_create(addr) + self.args[0] = ExprLoc(label.loc_key, expr.size) def breakflow(self): if self.name in conditional_branch + unconditional_branch: @@ -519,10 +514,9 @@ class instruction_x86(instruction): def getdstflow(self, symbol_pool): if self.additional_info.g1.value & 6 and self.name in repeat_mn: - ad = int(self.offset) - l = symbol_pool.getby_offset_create(ad) - s = ExprId(l, self.v_opmode()) - return [s] + addr = int(self.offset) + label = symbol_pool.getby_offset_create(addr) + return [ExprLoc(label.loc_key, self.v_opmode())] return [self.args[0]] def get_symbol_size(self, symbol, symbol_pool): @@ -566,9 +560,14 @@ class instruction_x86(instruction): return args @staticmethod - def arg2str(expr, pos=None): - if isinstance(expr, ExprId) or isinstance(expr, ExprInt): + def arg2str(expr, index=None, symbol_pool=None): + if expr.is_id() or expr.is_int(): o = str(expr) + elif expr.is_label(): + if symbol_pool is not None: + o = str(symbol_pool.loc_key_to_label(expr.loc_key)) + else: + o = str(expr) elif ((isinstance(expr, ExprOp) and expr.op == 'far' and isinstance(expr.args[0], ExprMem)) or isinstance(expr, ExprMem)): diff --git a/miasm2/arch/x86/jit.py b/miasm2/arch/x86/jit.py index 50501060..a12a66f5 100644 --- a/miasm2/arch/x86/jit.py +++ b/miasm2/arch/x86/jit.py @@ -5,6 +5,7 @@ from miasm2.core import asmblock from miasm2.core.utils import pck16, pck32, pck64, upck16, upck32, upck64 from miasm2.arch.x86.sem import ir_x86_16, ir_x86_32, ir_x86_64 from miasm2.jitter.codegen import CGen +from miasm2.ir.translators.C import TranslatorC log = logging.getLogger('jit_x86') hnd = logging.StreamHandler() @@ -17,6 +18,7 @@ class x86_32_CGen(CGen): def __init__(self, ir_arch): self.ir_arch = ir_arch self.PC = self.ir_arch.arch.regs.RIP + self.translator = TranslatorC(self.ir_arch.symbol_pool) self.init_arch_C() def gen_post_code(self, attrib): diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index 276b796f..d524af86 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -240,11 +240,13 @@ def gen_jcc(ir, instr, cond, dst, jmp_if): e = [] meip = mRIP[ir.IRDst.size] - next_lbl = m2_expr.ExprId(ir.get_next_label(instr), dst.size) + lbl_next = ir.get_next_label(instr) + lbl_next_expr = m2_expr.ExprLoc(lbl_next.loc_key, dst.size) + if jmp_if: - dstA, dstB = dst, next_lbl + dstA, dstB = dst, lbl_next_expr else: - dstA, dstB = next_lbl, dst + dstA, dstB = lbl_next_expr, dst mn_dst = m2_expr.ExprCond(cond, dstA.zeroExtend(ir.IRDst.size), dstB.zeroExtend(ir.IRDst.size)) @@ -260,17 +262,18 @@ def gen_fcmov(ir, instr, cond, arg1, arg2, mov_if): @cond: condition @mov_if: invert condition if False""" - lbl_do = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + lbl_do, lbl_do_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_skip = ir.get_next_label(instr) + lbl_skip_expr = m2_expr.ExprLoc(lbl_skip.loc_key, ir.IRDst.size) if mov_if: - dstA, dstB = lbl_do, lbl_skip + dstA, dstB = lbl_do_expr, lbl_skip_expr else: - dstA, dstB = lbl_skip, lbl_do + dstA, dstB = lbl_skip_expr, lbl_do_expr e = [] e_do, extra_irs = [m2_expr.ExprAff(arg1, arg2)], [] - e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip)) + e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip_expr)) e.append(m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(cond, dstA, dstB))) - return e, [IRBlock(lbl_do.name, [AssignBlock(e_do, instr)])] + return e, [IRBlock(lbl_do.loc_key, [AssignBlock(e_do, instr)])] def gen_cmov(ir, instr, cond, dst, src, mov_if): @@ -280,17 +283,18 @@ def gen_cmov(ir, instr, cond, dst, src, mov_if): @cond: condition @mov_if: invert condition if False""" - lbl_do = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + lbl_do, lbl_do_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_skip = ir.get_next_label(instr) + lbl_skip_expr = m2_expr.ExprLoc(lbl_skip.loc_key, ir.IRDst.size) if mov_if: - dstA, dstB = lbl_do, lbl_skip + dstA, dstB = lbl_do_expr, lbl_skip_expr else: - dstA, dstB = lbl_skip, lbl_do + dstA, dstB = lbl_skip_expr, lbl_do_expr e = [] e_do, extra_irs = mov(ir, instr, dst, src) - e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip)) + e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip_expr)) e.append(m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(cond, dstA, dstB))) - return e, [IRBlock(lbl_do.name, [AssignBlock(e_do, instr)])] + return e, [IRBlock(lbl_do.loc_key, [AssignBlock(e_do, instr)])] def mov(_, instr, dst, src): @@ -504,12 +508,14 @@ def _rotate_tpl(ir, instr, dst, src, op, left=False): else: return ([], []) e = [] - lbl_do = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) - e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip)) + lbl_do, lbl_do_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_skip = ir.get_next_label(instr) + lbl_skip_expr = m2_expr.ExprLoc(lbl_skip.loc_key, ir.IRDst.size) + + e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip_expr)) e.append(m2_expr.ExprAff( - ir.IRDst, m2_expr.ExprCond(shifter, lbl_do, lbl_skip))) - return (e, [IRBlock(lbl_do.name, [AssignBlock(e_do, instr)])]) + ir.IRDst, m2_expr.ExprCond(shifter, lbl_do_expr, lbl_skip_expr))) + return (e, [IRBlock(lbl_do.loc_key, [AssignBlock(e_do, instr)])]) def l_rol(ir, instr, dst, src): @@ -551,12 +557,14 @@ def rotate_with_carry_tpl(ir, instr, op, dst, src): else: return ([], []) e = [] - lbl_do = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) - e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip)) + lbl_do, lbl_do_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_skip = ir.get_next_label(instr) + lbl_skip_expr = m2_expr.ExprLoc(lbl_skip.loc_key, ir.IRDst.size) + + e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip_expr)) e.append(m2_expr.ExprAff( - ir.IRDst, m2_expr.ExprCond(shifter, lbl_do, lbl_skip))) - return (e, [IRBlock(lbl_do.name, [AssignBlock(e_do, instr)])]) + ir.IRDst, m2_expr.ExprCond(shifter, lbl_do_expr, lbl_skip_expr))) + return (e, [IRBlock(lbl_do.loc_key, [AssignBlock(e_do, instr)])]) def rcl(ir, instr, dst, src): return rotate_with_carry_tpl(ir, instr, '<<<', dst, src) @@ -638,12 +646,13 @@ def _shift_tpl(op, ir, instr, a, b, c=None, op_inv=None, left=False, return [], [] e = [] - lbl_do = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) - e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip)) - e.append(m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(shifter, lbl_do, - lbl_skip))) - return e, [IRBlock(lbl_do.name, [AssignBlock(e_do, instr)])] + lbl_do, lbl_do_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_skip = ir.get_next_label(instr) + lbl_skip_expr = m2_expr.ExprLoc(lbl_skip.loc_key, ir.IRDst.size) + e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip_expr)) + e.append(m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(shifter, lbl_do_expr, + lbl_skip_expr))) + return e, [IRBlock(lbl_do.loc_key, [AssignBlock(e_do, instr)])] def sar(ir, instr, dst, src): @@ -973,9 +982,9 @@ def bswap(_, instr, dst): def cmps(ir, instr, size): - lbl_df_0 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_df_1 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + lbl_df_0, lbl_df_0_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_df_1, lbl_df_1_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_next_expr = m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, ir.IRDst.size) src1 = mRSI[instr.mode][:instr.v_admode()] src2 = mRDI[instr.mode][:instr.v_admode()] @@ -999,24 +1008,24 @@ def cmps(ir, instr, size): e0 = [] e0.append(m2_expr.ExprAff(src1, src1 + offset)) e0.append(m2_expr.ExprAff(src2, src2 + offset)) - e0.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e0 = IRBlock(lbl_df_0.name, [AssignBlock(e0, instr)]) + e0.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) + e0 = IRBlock(lbl_df_0.loc_key, [AssignBlock(e0, instr)]) e1 = [] e1.append(m2_expr.ExprAff(src1, src1 - offset)) e1.append(m2_expr.ExprAff(src2, src2 - offset)) - e1.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e1 = IRBlock(lbl_df_1.name, [AssignBlock(e1, instr)]) + e1.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) + e1 = IRBlock(lbl_df_1.loc_key, [AssignBlock(e1, instr)]) e.append(m2_expr.ExprAff(ir.IRDst, - m2_expr.ExprCond(df, lbl_df_1, lbl_df_0))) + m2_expr.ExprCond(df, lbl_df_1_expr, lbl_df_0_expr))) return e, [e0, e1] def scas(ir, instr, size): - lbl_df_0 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_df_1 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + lbl_df_0, lbl_df_0_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_df_1, lbl_df_1_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_next_expr = m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, ir.IRDst.size) src = mRDI[instr.mode][:instr.v_admode()] @@ -1036,16 +1045,16 @@ def scas(ir, instr, size): e0 = [] e0.append(m2_expr.ExprAff(src, src + offset)) - e0.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e0 = IRBlock(lbl_df_0.name, [AssignBlock(e0, instr)]) + e0.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) + e0 = IRBlock(lbl_df_0.loc_key, [AssignBlock(e0, instr)]) e1 = [] e1.append(m2_expr.ExprAff(src, src - offset)) - e1.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e1 = IRBlock(lbl_df_1.name, [AssignBlock(e1, instr)]) + e1.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) + e1 = IRBlock(lbl_df_1.loc_key, [AssignBlock(e1, instr)]) e.append(m2_expr.ExprAff(ir.IRDst, - m2_expr.ExprCond(df, lbl_df_1, lbl_df_0))) + m2_expr.ExprCond(df, lbl_df_1_expr, lbl_df_0_expr))) return e, [e0, e1] @@ -1185,7 +1194,7 @@ def call(ir, instr, dst): meip = mRIP[ir.IRDst.size] opmode, admode = s, instr.v_admode() myesp = mRSP[instr.mode][:opmode] - n = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + n = m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, ir.IRDst.size) if isinstance(dst, m2_expr.ExprOp): if dst.op == "segm": @@ -1229,8 +1238,6 @@ def call(ir, instr, dst): e.append(m2_expr.ExprAff(ir.ExprMem(c, size=s), n)) e.append(m2_expr.ExprAff(meip, dst.zeroExtend(ir.IRDst.size))) e.append(m2_expr.ExprAff(ir.IRDst, dst.zeroExtend(ir.IRDst.size))) - # if not expr_is_int_or_label(dst): - # dst = meip return e, [] @@ -1432,7 +1439,7 @@ def loop(ir, instr, dst): admode = instr.v_admode() myecx = mRCX[instr.mode][:admode] - n = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + n = m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, ir.IRDst.size) c = myecx - m2_expr.ExprInt(1, myecx.size) dst_o = m2_expr.ExprCond(c, dst.zeroExtend(ir.IRDst.size), @@ -1449,7 +1456,7 @@ def loopne(ir, instr, dst): admode = instr.v_admode() myecx = mRCX[instr.mode][:admode] - n = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + n = m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, ir.IRDst.size) c = m2_expr.ExprCond(myecx - m2_expr.ExprInt(1, size=myecx.size), m2_expr.ExprInt(1, 1), @@ -1471,7 +1478,7 @@ def loope(ir, instr, dst): admode = instr.v_admode() myecx = mRCX[instr.mode][:admode] - n = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + n = m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, ir.IRDst.size) c = m2_expr.ExprCond(myecx - m2_expr.ExprInt(1, size=myecx.size), m2_expr.ExprInt(1, 1), m2_expr.ExprInt(0, 1)) @@ -1508,24 +1515,25 @@ def div(ir, instr, src1): e.append(m2_expr.ExprAff(s1, c_r[:size])) e.append(m2_expr.ExprAff(s2, c_d[:size])) - lbl_div = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_except = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + lbl_div, lbl_div_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_except, lbl_except_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_next = ir.get_next_label(instr) + lbl_next_expr = m2_expr.ExprLoc(lbl_next.loc_key, ir.IRDst.size) do_div = [] do_div += e - do_div.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - blk_div = IRBlock(lbl_div.name, [AssignBlock(do_div, instr)]) + do_div.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) + blk_div = IRBlock(lbl_div.loc_key, [AssignBlock(do_div, instr)]) do_except = [] do_except.append(m2_expr.ExprAff(exception_flags, m2_expr.ExprInt( EXCEPT_DIV_BY_ZERO, exception_flags.size))) - do_except.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - blk_except = IRBlock(lbl_except.name, [AssignBlock(do_except, instr)]) + do_except.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) + blk_except = IRBlock(lbl_except.loc_key, [AssignBlock(do_except, instr)]) e = [] e.append(m2_expr.ExprAff(ir.IRDst, - m2_expr.ExprCond(src1, lbl_div, lbl_except))) + m2_expr.ExprCond(src1, lbl_div_expr, lbl_except_expr))) return e, [blk_div, blk_except] @@ -1554,24 +1562,25 @@ def idiv(ir, instr, src1): e.append(m2_expr.ExprAff(s1, c_r[:size])) e.append(m2_expr.ExprAff(s2, c_d[:size])) - lbl_div = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_except = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + lbl_div, lbl_div_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_except, lbl_except_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_next = ir.get_next_label(instr) + lbl_next_expr = m2_expr.ExprLoc(lbl_next.loc_key, ir.IRDst.size) do_div = [] do_div += e - do_div.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - blk_div = IRBlock(lbl_div.name, [AssignBlock(do_div, instr)]) + do_div.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) + blk_div = IRBlock(lbl_div.loc_key, [AssignBlock(do_div, instr)]) do_except = [] do_except.append(m2_expr.ExprAff(exception_flags, m2_expr.ExprInt( EXCEPT_DIV_BY_ZERO, exception_flags.size))) - do_except.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - blk_except = IRBlock(lbl_except.name, [AssignBlock(do_except, instr)]) + do_except.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) + blk_except = IRBlock(lbl_except.loc_key, [AssignBlock(do_except, instr)]) e = [] e.append(m2_expr.ExprAff(ir.IRDst, - m2_expr.ExprCond(src1, lbl_div, lbl_except))) + m2_expr.ExprCond(src1, lbl_div_expr, lbl_except_expr))) return e, [blk_div, blk_except] @@ -1713,9 +1722,9 @@ def cqo(_, instr): def stos(ir, instr, size): - lbl_df_0 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_df_1 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + lbl_df_0, lbl_df_0_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_df_1, lbl_df_1_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_next_expr = m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, ir.IRDst.size) addr_o = mRDI[instr.mode][:instr.v_admode()] addr = addr_o @@ -1732,25 +1741,25 @@ def stos(ir, instr, size): e0 = [] e0.append(m2_expr.ExprAff(addr_o, addr_p)) - e0.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e0 = IRBlock(lbl_df_0.name, [AssignBlock(e0, instr)]) + e0.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) + e0 = IRBlock(lbl_df_0.loc_key, [AssignBlock(e0, instr)]) e1 = [] e1.append(m2_expr.ExprAff(addr_o, addr_m)) - e1.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e1 = IRBlock(lbl_df_1.name, [AssignBlock(e1, instr)]) + e1.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) + e1 = IRBlock(lbl_df_1.loc_key, [AssignBlock(e1, instr)]) e = [] e.append(m2_expr.ExprAff(ir.ExprMem(addr, size), b)) e.append(m2_expr.ExprAff(ir.IRDst, - m2_expr.ExprCond(df, lbl_df_1, lbl_df_0))) + m2_expr.ExprCond(df, lbl_df_1_expr, lbl_df_0_expr))) return e, [e0, e1] def lods(ir, instr, size): - lbl_df_0 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_df_1 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + lbl_df_0, lbl_df_0_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_df_1, lbl_df_1_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_next_expr = m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, ir.IRDst.size) e = [] addr_o = mRSI[instr.mode][:instr.v_admode()] @@ -1768,13 +1777,13 @@ def lods(ir, instr, size): e0 = [] e0.append(m2_expr.ExprAff(addr_o, addr_p)) - e0.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e0 = IRBlock(lbl_df_0.name, [AssignBlock(e0, instr)]) + e0.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) + e0 = IRBlock(lbl_df_0.loc_key, [AssignBlock(e0, instr)]) e1 = [] e1.append(m2_expr.ExprAff(addr_o, addr_m)) - e1.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e1 = IRBlock(lbl_df_1.name, [AssignBlock(e1, instr)]) + e1.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) + e1 = IRBlock(lbl_df_1.loc_key, [AssignBlock(e1, instr)]) e = [] if instr.mode == 64 and b.size == 32: @@ -1784,14 +1793,14 @@ def lods(ir, instr, size): e.append(m2_expr.ExprAff(b, ir.ExprMem(addr, size))) e.append(m2_expr.ExprAff(ir.IRDst, - m2_expr.ExprCond(df, lbl_df_1, lbl_df_0))) + m2_expr.ExprCond(df, lbl_df_1_expr, lbl_df_0_expr))) return e, [e0, e1] def movs(ir, instr, size): - lbl_df_0 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_df_1 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + lbl_df_0, lbl_df_0_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_df_1, lbl_df_1_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_next_expr = m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, ir.IRDst.size) dst = mRDI[instr.mode][:instr.v_admode()] src = mRSI[instr.mode][:instr.v_admode()] @@ -1815,17 +1824,17 @@ def movs(ir, instr, size): e0 = [] e0.append(m2_expr.ExprAff(src, src + offset)) e0.append(m2_expr.ExprAff(dst, dst + offset)) - e0.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e0 = IRBlock(lbl_df_0.name, [AssignBlock(e0, instr)]) + e0.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) + e0 = IRBlock(lbl_df_0.loc_key, [AssignBlock(e0, instr)]) e1 = [] e1.append(m2_expr.ExprAff(src, src - offset)) e1.append(m2_expr.ExprAff(dst, dst - offset)) - e1.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e1 = IRBlock(lbl_df_1.name, [AssignBlock(e1, instr)]) + e1.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) + e1 = IRBlock(lbl_df_1.loc_key, [AssignBlock(e1, instr)]) e.append(m2_expr.ExprAff(ir.IRDst, - m2_expr.ExprCond(df, lbl_df_1, lbl_df_0))) + m2_expr.ExprCond(df, lbl_df_1_expr, lbl_df_0_expr))) return e, [e0, e1] @@ -2876,14 +2885,15 @@ def bsr_bsf(ir, instr, dst, src, op_func): ZF = 0 DEST = @op_func(SRC) """ - lbl_src_null = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_src_not_null = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + lbl_src_null, lbl_src_null_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_src_not_null, lbl_src_not_null_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_next = ir.get_next_label(instr) + lbl_next_expr = m2_expr.ExprLoc(lbl_next.loc_key, ir.IRDst.size) - aff_dst = m2_expr.ExprAff(ir.IRDst, lbl_next) + aff_dst = m2_expr.ExprAff(ir.IRDst, lbl_next_expr) e = [m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(src, - lbl_src_not_null, - lbl_src_null))] + lbl_src_not_null_expr, + lbl_src_null_expr))] e_src_null = [] e_src_null.append(m2_expr.ExprAff(zf, m2_expr.ExprInt(1, zf.size))) # XXX destination is undefined @@ -2894,8 +2904,8 @@ def bsr_bsf(ir, instr, dst, src, op_func): e_src_not_null.append(m2_expr.ExprAff(dst, op_func(src))) e_src_not_null.append(aff_dst) - return e, [IRBlock(lbl_src_null.name, [AssignBlock(e_src_null, instr)]), - IRBlock(lbl_src_not_null.name, [AssignBlock(e_src_not_null, instr)])] + return e, [IRBlock(lbl_src_null.loc_key, [AssignBlock(e_src_null, instr)]), + IRBlock(lbl_src_not_null.loc_key, [AssignBlock(e_src_not_null, instr)])] def bsf(ir, instr, dst, src): @@ -3925,9 +3935,10 @@ def pshufd(_, instr, dst, src, imm): def ps_rl_ll(ir, instr, dst, src, op, size): - lbl_zero = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_do = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + lbl_zero, lbl_zero_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_do, lbl_do_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_next = ir.get_next_label(instr) + lbl_next_expr = m2_expr.ExprLoc(lbl_next.loc_key, ir.IRDst.size) if src.size == 8: count = src.zeroExtend(dst.size) @@ -3940,8 +3951,8 @@ def ps_rl_ll(ir, instr, dst, src, op, size): test = expr_simp(count & m2_expr.ExprInt( ((1 << dst.size) - 1) ^ mask, dst.size)) e = [m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(test, - lbl_zero, - lbl_do))] + lbl_zero_expr, + lbl_do_expr))] slices = [] for i in xrange(0, dst.size, size): @@ -3954,12 +3965,12 @@ def ps_rl_ll(ir, instr, dst, src, op, size): return [m2_expr.ExprAff(dst, m2_expr.ExprInt(0, dst.size))], [] e_zero = [m2_expr.ExprAff(dst, m2_expr.ExprInt(0, dst.size)), - m2_expr.ExprAff(ir.IRDst, lbl_next)] + m2_expr.ExprAff(ir.IRDst, lbl_next_expr)] e_do = [] e.append(m2_expr.ExprAff(dst[0:dst.size], m2_expr.ExprCompose(*slices))) - e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - return e, [IRBlock(lbl_do.name, [AssignBlock(e_do, instr)]), - IRBlock(lbl_zero.name, [AssignBlock(e_zero, instr)])] + e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) + return e, [IRBlock(lbl_do.loc_key, [AssignBlock(e_do, instr)]), + IRBlock(lbl_zero.loc_key, [AssignBlock(e_zero, instr)])] def psrlw(ir, instr, dst, src): @@ -4483,7 +4494,7 @@ def maskmovq(ir, instr, src, mask): m2_expr.ExprCond(bit, write_label, next_check_label)) - blks.append(IRBlock(cur_label.name, [AssignBlock([check], instr)])) + blks.append(IRBlock(cur_label.name.loc_key, [AssignBlock([check], instr)])) # Build write blocks dst_addr = mRDI[instr.mode] @@ -4497,7 +4508,7 @@ def maskmovq(ir, instr, src, mask): write_mem = m2_expr.ExprAff(m2_expr.ExprMem(write_addr, 8), src[start: start + 8]) jump = m2_expr.ExprAff(ir.IRDst, next_check_label) - blks.append(IRBlock(cur_label.name, [AssignBlock([write_mem, jump], instr)])) + blks.append(IRBlock(cur_label.name.loc_key, [AssignBlock([write_mem, jump], instr)])) # If mask is null, bypass all e = [m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(mask, @@ -5145,12 +5156,14 @@ class ir_x86_16(IntermediateRepresentation): c_cond = cond_dec | (zf ^ m2_expr.ExprInt(1, 1)) # gen while - lbl_do = m2_expr.ExprId(self.gen_label(), self.IRDst.size) - lbl_end = m2_expr.ExprId(self.gen_label(), self.IRDst.size) - lbl_skip = m2_expr.ExprId(self.get_next_label(instr), self.IRDst.size) - lbl_next = m2_expr.ExprId(self.get_next_label(instr), self.IRDst.size) - - fix_next_lbl = {lbl_next: lbl_end} + lbl_do, lbl_do_expr = self.gen_label_and_expr(self.IRDst.size) + lbl_end, lbl_end_expr = self.gen_label_and_expr(self.IRDst.size) + lbl_skip = self.get_next_label(instr) + lbl_skip_expr = m2_expr.ExprLoc(lbl_skip.loc_key, self.IRDst.size) + lbl_next = self.get_next_label(instr) + lbl_next_expr = m2_expr.ExprLoc(lbl_next.loc_key, self.IRDst.size) + + fix_next_lbl = {lbl_next_expr: lbl_end_expr} new_extra_ir = [irblock.modify_exprs(mod_src=lambda expr: expr.replace_expr(fix_next_lbl)) for irblock in extra_ir] @@ -5159,14 +5172,14 @@ class ir_x86_16(IntermediateRepresentation): c_reg - m2_expr.ExprInt(1, c_reg.size))) cond_bloc.append(m2_expr.ExprAff(self.IRDst, m2_expr.ExprCond(c_cond, - lbl_skip, - lbl_do))) - cond_bloc = IRBlock(lbl_end.name, [AssignBlock(cond_bloc, instr)]) + lbl_skip_expr, + lbl_do_expr))) + cond_bloc = IRBlock(lbl_end.loc_key, [AssignBlock(cond_bloc, instr)]) e_do = instr_ir - c = IRBlock(lbl_do.name, [AssignBlock(e_do, instr)]) - e_n = [m2_expr.ExprAff(self.IRDst, m2_expr.ExprCond(c_reg, lbl_do, - lbl_skip))] + c = IRBlock(lbl_do.loc_key, [AssignBlock(e_do, instr)]) + e_n = [m2_expr.ExprAff(self.IRDst, m2_expr.ExprCond(c_reg, lbl_do_expr, + lbl_skip_expr))] return e_n, [cond_bloc, c] + new_extra_ir def expr_fix_regs_for_mode(self, e, mode=64): diff --git a/miasm2/core/asmblock.py b/miasm2/core/asmblock.py index f763d85f..9764590a 100644 --- a/miasm2/core/asmblock.py +++ b/miasm2/core/asmblock.py @@ -1,11 +1,13 @@ #-*- coding:utf-8 -*- import logging -import inspect import warnings from collections import namedtuple -import miasm2.expression.expression as m2_expr +from miasm2.expression.expression import ExprId, ExprInt, ExprLoc, \ + get_expr_labels +from miasm2.core.asmblock import AsmSymbolPool +from miasm2.expression.expression import LocKey from miasm2.expression.simplifications import expr_simp from miasm2.expression.modint import moduint, modint from miasm2.core.utils import Disasm_Exception, pck @@ -25,20 +27,13 @@ def is_int(a): isinstance(a, moduint) or isinstance(a, modint) -def expr_is_label(e): - return isinstance(e, m2_expr.ExprId) and isinstance(e.name, AsmLabel) - - -def expr_is_int_or_label(e): - return isinstance(e, m2_expr.ExprInt) or \ - (isinstance(e, m2_expr.ExprId) and isinstance(e.name, AsmLabel)) - - class AsmLabel(object): "Stand for an assembly label" - def __init__(self, name="", offset=None): + def __init__(self, loc_key, name="", offset=None): + assert isinstance(loc_key, LocKey) + self.loc_key = loc_key self.fixedblocs = False if is_int(name): name = "loc_%.16X" % (int(name) & 0xFFFFFFFFFFFFFFFF) @@ -77,6 +72,9 @@ class AsmRaw(object): def __str__(self): return repr(self.raw) + def to_string(self, symbol_pool): + return str(self) + class asm_raw(AsmRaw): @@ -363,6 +361,11 @@ class AsmSymbolPool(object): self._name2label = {} self._offset2label = {} self._label_num = 0 + self._loc_key_to_label = {} + + def loc_key_to_label(self, label_index): + assert isinstance(label_index, LocKey) + return self._loc_key_to_label.get(label_index.key, None) def add_label(self, name, offset=None): """ @@ -370,7 +373,7 @@ class AsmSymbolPool(object): @name: label's name @offset: (optional) label's offset """ - label = AsmLabel(name, offset) + label = AsmLabel(LocKey(self._label_num), name, offset) # Test for collisions if (label.offset in self._offset2label and @@ -383,6 +386,9 @@ class AsmSymbolPool(object): (label, self._name2label[label.name])) self._labels.add(label) + self._label_num += 1 + self._loc_key_to_label[label.loc_key.key] = label + if label.offset is not None: self._offset2label[label.offset] = label if label.name != "": @@ -480,6 +486,19 @@ class AsmSymbolPool(object): self._name2label.update(symbol_pool._name2label) self._offset2label.update(symbol_pool._offset2label) + def canonize_to_exprloc(self, expr): + """ + If expr is ExprInt, return ExprLoc with corresponding loc_key + Else, return expr + + @expr: Expr instance + """ + if expr.is_int(): + label = self.getby_offset_create(int(expr)) + ret = ExprLoc(label.loc_key, expr.size) + return ret + return expr + def gen_label(self): """Generate a new unpinned label""" label = self.add_label("lbl_gen_%.8X" % (self._label_num)) @@ -511,7 +530,9 @@ class AsmCFG(DiGraph): AsmCFGPending = namedtuple("AsmCFGPending", ["waiter", "constraint"]) - def __init__(self, *args, **kwargs): + def __init__(self, symbol_pool=None, *args, **kwargs): + if symbol_pool is None: + raise DeprecationWarning("AsmCFG needs a non empty symbol_pool") super(AsmCFG, self).__init__(*args, **kwargs) # Edges -> constraint self.edges2constraint = {} @@ -519,6 +540,15 @@ class AsmCFG(DiGraph): self._pendings = {} # Label2block built on the fly self._label2block = {} + # symbol_pool + self.symbol_pool = symbol_pool + + + def copy(self): + """Copy the current graph instance""" + graph = self.__class__(self.symbol_pool) + return graph + self + # Compatibility with old list API def append(self, *args, **kwargs): @@ -639,9 +669,9 @@ class AsmCFG(DiGraph): if self._dot_offset: yield [self.DotCellDescription(text="%.8X" % line.offset, attr={}), - self.DotCellDescription(text=str(line), attr={})] + self.DotCellDescription(text=line.to_string(self.symbol_pool), attr={})] else: - yield self.DotCellDescription(text=str(line), attr={}) + yield self.DotCellDescription(text=line.to_string(self.symbol_pool), attr={}) def node_attr(self, node): if isinstance(node, AsmBlockBad): @@ -784,7 +814,7 @@ class AsmCFG(DiGraph): if len(pred_next) > 1: raise RuntimeError("Too many next constraints for bloc %r" "(%s)" % (block.label, - map(lambda x: x.label, pred_next))) + [x.label for x in pred_next])) def guess_blocks_size(self, mnemo): """Asm and compute max block size @@ -971,13 +1001,13 @@ def conservative_asm(mnemo, instr, symbols, conservative): def fix_expr_val(expr, symbols): """Resolve an expression @expr using @symbols""" def expr_calc(e): - if isinstance(e, m2_expr.ExprId): + if isinstance(e, ExprId): s = symbols._name2label[e.name] - e = m2_expr.ExprInt(s.offset, e.size) + e = ExprInt(s.offset, e.size) return e result = expr.visit(expr_calc) result = expr_simp(result) - if not isinstance(result, m2_expr.ExprInt): + if not isinstance(result, ExprInt): raise RuntimeError('Cannot resolve symbol %s' % expr) return result @@ -1222,12 +1252,11 @@ def get_block_labels(block): if isinstance(instr, AsmRaw): if isinstance(instr.raw, list): for expr in instr.raw: - symbols.update(m2_expr.get_expr_ids(expr)) + symbols.update(get_expr_labels(expr)) else: for arg in instr.args: - symbols.update(m2_expr.get_expr_ids(arg)) - labels = filter_exprid_label(symbols) - return labels + symbols.update(get_expr_labels(arg)) + return symbols def assemble_block(mnemo, block, symbol_pool, conservative=False): @@ -1285,7 +1314,8 @@ def asmblock_final(mnemo, blocks, blockChains, symbol_pool, conservative=False): lbl2block = {block.label: block for block in blocks} blocks_using_label = {} for block in blocks: - labels = get_block_labels(block) + exprlocs = get_block_labels(block) + labels = set(symbol_pool.loc_key_to_label(expr.loc_key) for expr in exprlocs) for label in labels: blocks_using_label.setdefault(label, set()).add(block) @@ -1544,21 +1574,19 @@ class disasmEngine(object): # test split if instr.splitflow() and not (instr.is_subcall() and self.dontdis_retcall): add_next_offset = True - pass if instr.dstflow(): instr.dstflow2label(self.symbol_pool) - dst = instr.getdstflow(self.symbol_pool) - dstn = [] - for d in dst: - if isinstance(d, m2_expr.ExprId) and \ - isinstance(d.name, AsmLabel): - dstn.append(d.name) - if d.name.offset in self.dont_dis_retcall_funcs: - add_next_offset = False - dst = dstn + destinations = instr.getdstflow(self.symbol_pool) + known_dsts = [] + for dst in destinations: + if not dst.is_label(): + continue + label = self.symbol_pool.loc_key_to_label(dst.loc_key) + known_dsts.append(label) + if label.offset in self.dont_dis_retcall_funcs: + add_next_offset = False if (not instr.is_subcall()) or self.follow_call: - cur_block.bto.update( - [AsmConstraint(x, AsmConstraint.c_to) for x in dst]) + cur_block.bto.update([AsmConstraint(label, AsmConstraint.c_to) for label in known_dsts]) # get in delayslot mode in_delayslot = True @@ -1608,7 +1636,7 @@ class disasmEngine(object): log_asmblock.info("dis bloc all") job_done = set() if blocks is None: - blocks = AsmCFG() + blocks = AsmCFG(self.symbol_pool) todo = [offset] bloc_cpt = 0 diff --git a/miasm2/core/cpu.py b/miasm2/core/cpu.py index d75b99cf..8ea96e22 100644 --- a/miasm2/core/cpu.py +++ b/miasm2/core/cpu.py @@ -8,13 +8,12 @@ from collections import defaultdict import pyparsing import miasm2.expression.expression as m2_expr -from miasm2.core import asmblock from miasm2.core.bin_stream import bin_stream, bin_stream_str from miasm2.core.utils import Disasm_Exception from miasm2.expression.simplifications import expr_simp -from miasm2.core.asm_ast import AstNode, AstInt, AstId, AstMem, AstOp +from miasm2.core.asm_ast import AstNode, AstInt, AstId, AstOp log = logging.getLogger("cpuhelper") console_handler = logging.StreamHandler() @@ -985,18 +984,24 @@ class instruction(object): self.mode = mode self.args = args self.additional_info = additional_info + self.offset = None + self.l = None + self.b = None def gen_args(self, args): out = ', '.join([str(x) for x in args]) return out def __str__(self): + return self.to_string() + + def to_string(self, symbol_pool=None): o = "%-10s " % self.name args = [] for i, arg in enumerate(self.args): if not isinstance(arg, m2_expr.Expr): raise ValueError('zarb arg type') - x = self.arg2str(arg, pos = i) + x = self.arg2str(arg, i, symbol_pool) args.append(x) o += self.gen_args(args) return o @@ -1011,40 +1016,38 @@ class instruction(object): if symbols is None: symbols = {} args_out = [] - for a in self.args: - e = a + for expr in self.args: # try to resolve symbols using symbols (0 for default value) - ids = m2_expr.get_expr_ids(e) - fixed_ids = {} - for x in ids: - if isinstance(x.name, asmblock.AsmLabel): - name = x.name.name - # special symbol $ - if name == '$': - fixed_ids[x] = self.get_asm_offset(x) - continue - if name == '_': - fixed_ids[x] = self.get_asm_next_offset(x) - continue - if not name in symbols: - raise ValueError('unresolved symbol! %r' % x) - else: - name = x.name - if not name in symbols: + labels = m2_expr.get_expr_labels(expr) + fixed_expr = {} + for exprloc in labels: + label = symbols.loc_key_to_label(exprloc.loc_key) + name = label.name + # special symbols + if name == '$': + fixed_expr[exprloc] = self.get_asm_offset(exprloc) + continue + if name == '_': + fixed_expr[exprloc] = self.get_asm_next_offset(exprloc) continue + if not name in symbols: + raise ValueError('Unresolved symbol: %r' % exprloc) + if symbols[name].offset is None: raise ValueError('The offset of label "%s" cannot be ' 'determined' % name) else: - size = x.size + # Fix symbol with its offset + size = exprloc.size if size is None: - default_size = self.get_symbol_size(x, symbols) + default_size = self.get_symbol_size(exprloc, symbols) size = default_size value = m2_expr.ExprInt(symbols[name].offset, size) - fixed_ids[x] = value - e = e.replace_expr(fixed_ids) - e = expr_simp(e) - args_out.append(e) + fixed_expr[exprloc] = value + + expr = expr.replace_expr(fixed_expr) + expr = expr_simp(expr) + args_out.append(expr) return args_out def get_info(self, c): diff --git a/miasm2/core/parse_asm.py b/miasm2/core/parse_asm.py index df419680..07155fd9 100644 --- a/miasm2/core/parse_asm.py +++ b/miasm2/core/parse_asm.py @@ -4,7 +4,7 @@ import re import miasm2.expression.expression as m2_expr import miasm2.core.asmblock as asmblock from miasm2.core.cpu import instruction, base_expr -from miasm2.core.asm_ast import AstInt, AstId, AstMem, AstOp +from miasm2.core.asm_ast import AstInt, AstId, AstOp declarator = {'byte': 8, 'word': 16, @@ -237,7 +237,7 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): cur_block = None state = STATE_NO_BLOC i = 0 - blocks = asmblock.AsmCFG() + blocks = asmblock.AsmCFG(symbol_pool) block_to_nlink = None delayslot = 0 while i < len(lines): diff --git a/miasm2/core/sembuilder.py b/miasm2/core/sembuilder.py index 8d6d3e07..68b4439f 100644 --- a/miasm2/core/sembuilder.py +++ b/miasm2/core/sembuilder.py @@ -142,11 +142,19 @@ class SemBuilder(object): def _create_labels(lbl_else=False): """Return the AST standing for label creations @lbl_else (optional): if set, create a label 'lbl_else'""" - lbl_end = "lbl_end = ExprId(ir.get_next_label(instr), ir.IRDst.size)" + lbl_end = "lbl_end = ir.get_next_label(instr)" + lbl_end_expr = "lbl_end_expr = ExprLoc(lbl_end.loc_key, ir.IRDst.size)" out = ast.parse(lbl_end).body - out += ast.parse("lbl_if = ExprId(ir.gen_label(), ir.IRDst.size)").body + out += ast.parse(lbl_end_expr).body + lbl_if = "lbl_if = ir.gen_label()" + lbl_if_expr = "lbl_if_expr = ExprLoc(lbl_if.loc_key, ir.IRDst.size)" + out += ast.parse(lbl_if).body + out += ast.parse(lbl_if_expr).body if lbl_else: - out += ast.parse("lbl_else = ExprId(ir.gen_label(), ir.IRDst.size)").body + lbl_else = "lbl_else = ir.gen_label()" + lbl_else_expr = "lbl_else_expr = ExprLoc(lbl_else.loc_key, ir.IRDst.size)" + out += ast.parse(lbl_else).body + out += ast.parse(lbl_else_expr).body return out def _parse_body(self, body, argument_names): @@ -200,9 +208,9 @@ class SemBuilder(object): cond = statement.test real_body += self._create_labels(lbl_else=True) - lbl_end = ast.Name(id='lbl_end', ctx=ast.Load()) - lbl_if = ast.Name(id='lbl_if', ctx=ast.Load()) - lbl_else = ast.Name(id='lbl_else', ctx=ast.Load()) \ + lbl_end = ast.Name(id='lbl_end_expr', ctx=ast.Load()) + lbl_if = ast.Name(id='lbl_if_expr', ctx=ast.Load()) + lbl_else = ast.Name(id='lbl_else_expr', ctx=ast.Load()) \ if statement.orelse else lbl_end dst = ast.Call(func=ast.Name(id='ExprCond', ctx=ast.Load()), @@ -261,9 +269,11 @@ class SemBuilder(object): ## Replace the block with a call to 'IRBlock' - lbl_if_name = ast.Attribute(value=ast.Name(id=lbl_name, - ctx=ast.Load()), - attr='name', ctx=ast.Load()) + lbl_if_name = value= ast.Attribute( + value=ast.Name(id=lbl_name, ctx=ast.Load()), + attr="loc_key", + ctx=ast.Load() + ) assignblks = ast.List(elts=[assignblk], ctx=ast.Load()) diff --git a/miasm2/expression/expression.py b/miasm2/expression/expression.py index 54cd5a2d..e0651d7f 100644 --- a/miasm2/expression/expression.py +++ b/miasm2/expression/expression.py @@ -19,6 +19,7 @@ # IR components are : # - ExprInt # - ExprId +# - ExprLoc # - ExprAff # - ExprCond # - ExprMem @@ -48,12 +49,13 @@ TOK_POS_STRICT = "Spos" # Hashing constants EXPRINT = 1 EXPRID = 2 -EXPRAFF = 3 -EXPRCOND = 4 -EXPRMEM = 5 -EXPROP = 6 -EXPRSLICE = 7 -EXPRCOMPOSE = 8 +EXPRLOC = 3 +EXPRAFF = 4 +EXPRCOND = 5 +EXPRMEM = 6 +EXPROP = 7 +EXPRSLICE = 8 +EXPRCOMPOSE = 9 priorities_list = [ @@ -115,6 +117,8 @@ class DiGraphExpr(DiGraph): return node.op elif isinstance(node, ExprId): return node.name + elif isinstance(node, ExprLoc): + return "label_%r" % node.label elif isinstance(node, ExprMem): return "@%d" % node.size elif isinstance(node, ExprCompose): @@ -141,6 +145,29 @@ class DiGraphExpr(DiGraph): return "" + +class LocKey(object): + def __init__(self, key): + self._key = key + + key = property(lambda self: self._key) + + def __hash__(self): + return hash(self._key) + + def __eq__(self, other): + if self is other: + return True + if self.__class__ is not other.__class__: + return False + return self.key == other.key + + def __ne__(self, other): + return not self.__eq__(other) + + def __repr__(self): + return "<%s %d>" % (self.__class__.__name__, self._key) + # IR definitions class Expr(object): @@ -383,6 +410,9 @@ class Expr(object): def is_id(self, name=None): return False + def is_label(self, label=None): + return False + def is_aff(self): return False @@ -532,6 +562,7 @@ class ExprId(Expr): if size is None: warnings.warn('DEPRECATION WARNING: size is a mandatory argument: use ExprId(name, SIZE)') size = 32 + assert isinstance(name, str) super(ExprId, self).__init__(size) self._name = name @@ -584,6 +615,68 @@ class ExprId(Expr): return True +class ExprLoc(Expr): + + """An ExprLoc represent a Label in Miasm IR. + """ + + __slots__ = Expr.__slots__ + ["_loc_key"] + + def __init__(self, loc_key, size): + """Create an identifier + @loc_key: int, label loc_key + @size: int, identifier's size + """ + assert isinstance(loc_key, LocKey) + super(ExprLoc, self).__init__(size) + self._loc_key = loc_key + + loc_key= property(lambda self: self._loc_key) + + def __reduce__(self): + state = self._loc_key, self._size + return self.__class__, state + + def __new__(cls, loc_key, size): + return Expr.get_object(cls, (loc_key, size)) + + def __str__(self): + return "label_%d" % self._loc_key.key + + def get_r(self, mem_read=False, cst_read=False): + return set() + + def get_w(self): + return set() + + def _exprhash(self): + return hash((EXPRLOC, self._loc_key, self._size)) + + def _exprrepr(self): + return "%s(%r, %d)" % (self.__class__.__name__, self._loc_key, self._size) + + def __contains__(self, expr): + return self == expr + + @visit_chk + def visit(self, callback, test_visit=None): + return self + + def copy(self): + return ExprLoc(self._loc_key, self._size) + + def depth(self): + return 1 + + def graph_recursive(self, graph): + graph.add_node(self) + + def is_label(self, loc_key=None): + if loc_key is not None and self._loc_key != loc_key: + return False + return True + + class ExprAff(Expr): """An ExprAff represent an affection from an Expression to another one. @@ -1226,10 +1319,11 @@ class ExprCompose(Expr): # Expression order for comparaison EXPR_ORDER_DICT = {ExprId: 1, - ExprCond: 2, - ExprMem: 3, - ExprOp: 4, - ExprSlice: 5, + ExprLoc: 2, + ExprCond: 3, + ExprMem: 4, + ExprOp: 5, + ExprSlice: 6, ExprCompose: 7, ExprInt: 8, } @@ -1289,6 +1383,11 @@ def compare_exprs(expr1, expr2): if ret: return ret return cmp(expr1.size, expr2.size) + elif cls1 == ExprLoc: + ret = cmp(expr1.loc_key, expr2.loc_key) + if ret: + return ret + return cmp(expr1.size, expr2.size) elif cls1 == ExprAff: raise NotImplementedError( "Comparaison from an ExprAff not yet implemented") @@ -1379,11 +1478,19 @@ def ExprInt_from(expr, i): def get_expr_ids_visit(expr, ids): """Visitor to retrieve ExprId in @expr @expr: Expr""" - if isinstance(expr, ExprId): + if expr.is_id(): ids.add(expr) return expr +def get_expr_labels_visit(expr, labels): + """Visitor to retrieve ExprLoc in @expr + @expr: Expr""" + if expr.is_label(): + labels.add(expr) + return expr + + def get_expr_ids(expr): """Retrieve ExprId in @expr @expr: Expr""" @@ -1392,6 +1499,14 @@ def get_expr_ids(expr): return ids +def get_expr_labels(expr): + """Retrieve ExprLoc in @expr + @expr: Expr""" + ids = set() + expr.visit(lambda x: get_expr_labels_visit(x, ids)) + return ids + + def test_set(expr, pattern, tks, result): """Test if v can correspond to e. If so, update the context in result. Otherwise, return False @@ -1431,6 +1546,9 @@ def match_expr(expr, pattern, tks, result=None): elif expr.is_id(): return test_set(expr, pattern, tks, result) + elif expr.is_label(): + return test_set(expr, pattern, tks, result) + elif expr.is_op(): # expr need to be the same operation than pattern diff --git a/miasm2/expression/expression_helper.py b/miasm2/expression/expression_helper.py index 722d169d..2fe5e26d 100644 --- a/miasm2/expression/expression_helper.py +++ b/miasm2/expression/expression_helper.py @@ -268,6 +268,9 @@ class Variables_Identifier(object): elif isinstance(expr, m2_expr.ExprId): pass + elif isinstance(expr, m2_expr.ExprLoc): + pass + elif isinstance(expr, m2_expr.ExprMem): self.find_variables_rec(expr.arg) @@ -552,7 +555,8 @@ def possible_values(expr): # Terminal expression if (isinstance(expr, m2_expr.ExprInt) or - isinstance(expr, m2_expr.ExprId)): + isinstance(expr, m2_expr.ExprId) or + isinstance(expr, m2_expr.ExprLoc)): consvals.add(ConstrainedValue(frozenset(), expr)) # Unary expression elif isinstance(expr, m2_expr.ExprSlice): diff --git a/miasm2/expression/parser.py b/miasm2/expression/parser.py index b3f3af1c..cbfd58d0 100644 --- a/miasm2/expression/parser.py +++ b/miasm2/expression/parser.py @@ -1,6 +1,6 @@ import pyparsing -from miasm2.expression.expression import ExprInt, ExprId, ExprSlice, ExprMem, \ - ExprCond, ExprCompose, ExprOp, ExprAff +from miasm2.expression.expression import ExprInt, ExprId, ExprLoc, ExprSlice, \ + ExprMem, ExprCond, ExprCompose, ExprOp, ExprAff, LocKey integer = pyparsing.Word(pyparsing.nums).setParseAction(lambda t: int(t[0])) @@ -16,6 +16,7 @@ str_int = str_int_pos | str_int_neg STR_EXPRINT = pyparsing.Suppress("ExprInt") STR_EXPRID = pyparsing.Suppress("ExprId") +STR_EXPRLOC = pyparsing.Suppress("ExprLoc") STR_EXPRSLICE = pyparsing.Suppress("ExprSlice") STR_EXPRMEM = pyparsing.Suppress("ExprMem") STR_EXPRCOND = pyparsing.Suppress("ExprCond") @@ -23,11 +24,17 @@ STR_EXPRCOMPOSE = pyparsing.Suppress("ExprCompose") STR_EXPROP = pyparsing.Suppress("ExprOp") STR_EXPRAFF = pyparsing.Suppress("ExprAff") +LOCKEY = pyparsing.Suppress("LocKey") + STR_COMMA = pyparsing.Suppress(",") LPARENTHESIS = pyparsing.Suppress("(") RPARENTHESIS = pyparsing.Suppress(")") +T_INF = pyparsing.Suppress("<") +T_SUP = pyparsing.Suppress(">") + + string_quote = pyparsing.QuotedString(quoteChar="'", escChar='\\', escQuote='\\') string_dquote = pyparsing.QuotedString(quoteChar='"', escChar='\\', escQuote='\\') @@ -36,26 +43,33 @@ string = string_quote | string_dquote expr = pyparsing.Forward() -expr_int = pyparsing.Group(STR_EXPRINT + LPARENTHESIS + str_int + STR_COMMA + str_int + RPARENTHESIS) -expr_id = pyparsing.Group(STR_EXPRID + LPARENTHESIS + string + STR_COMMA + str_int + RPARENTHESIS) -expr_slice = pyparsing.Group(STR_EXPRSLICE + LPARENTHESIS + expr + STR_COMMA + str_int + STR_COMMA + str_int + RPARENTHESIS) -expr_mem = pyparsing.Group(STR_EXPRMEM + LPARENTHESIS + expr + STR_COMMA + str_int + RPARENTHESIS) -expr_cond = pyparsing.Group(STR_EXPRCOND + LPARENTHESIS + expr + STR_COMMA + expr + STR_COMMA + expr + RPARENTHESIS) -expr_compose = pyparsing.Group(STR_EXPRCOMPOSE + LPARENTHESIS + pyparsing.delimitedList(expr, delim=',') + RPARENTHESIS) -expr_op = pyparsing.Group(STR_EXPROP + LPARENTHESIS + string + STR_COMMA + pyparsing.delimitedList(expr, delim=',') + RPARENTHESIS) -expr_aff = pyparsing.Group(STR_EXPRAFF + LPARENTHESIS + expr + STR_COMMA + expr + RPARENTHESIS) - -expr << (expr_int | expr_id | expr_slice | expr_mem | expr_cond | \ +expr_int = STR_EXPRINT + LPARENTHESIS + str_int + STR_COMMA + str_int + RPARENTHESIS +expr_id = STR_EXPRID + LPARENTHESIS + string + STR_COMMA + str_int + RPARENTHESIS +expr_loc = STR_EXPRLOC + LPARENTHESIS + T_INF + LOCKEY + str_int + T_SUP + STR_COMMA + str_int + RPARENTHESIS +expr_slice = STR_EXPRSLICE + LPARENTHESIS + expr + STR_COMMA + str_int + STR_COMMA + str_int + RPARENTHESIS +expr_mem = STR_EXPRMEM + LPARENTHESIS + expr + STR_COMMA + str_int + RPARENTHESIS +expr_cond = STR_EXPRCOND + LPARENTHESIS + expr + STR_COMMA + expr + STR_COMMA + expr + RPARENTHESIS +expr_compose = STR_EXPRCOMPOSE + LPARENTHESIS + pyparsing.delimitedList(expr, delim=',') + RPARENTHESIS +expr_op = STR_EXPROP + LPARENTHESIS + string + STR_COMMA + pyparsing.delimitedList(expr, delim=',') + RPARENTHESIS +expr_aff = STR_EXPRAFF + LPARENTHESIS + expr + STR_COMMA + expr + RPARENTHESIS + +expr << (expr_int | expr_id | expr_loc | expr_slice | expr_mem | expr_cond | \ expr_compose | expr_op | expr_aff) -expr_int.setParseAction(lambda t: ExprInt(*t[0])) -expr_id.setParseAction(lambda t: ExprId(*t[0])) -expr_slice.setParseAction(lambda t: ExprSlice(*t[0])) -expr_mem.setParseAction(lambda t: ExprMem(*t[0])) -expr_cond.setParseAction(lambda t: ExprCond(*t[0])) -expr_compose.setParseAction(lambda t: ExprCompose(*t[0])) -expr_op.setParseAction(lambda t: ExprOp(*t[0])) -expr_aff.setParseAction(lambda t: ExprAff(*t[0])) +def parse_loc_key(t): + assert len(t) == 2 + loc_key, size = LocKey(t[0]), t[1] + return ExprLoc(loc_key, size) + +expr_int.setParseAction(lambda t: ExprInt(*t)) +expr_id.setParseAction(lambda t: ExprId(*t)) +expr_loc.setParseAction(parse_loc_key) +expr_slice.setParseAction(lambda t: ExprSlice(*t)) +expr_mem.setParseAction(lambda t: ExprMem(*t)) +expr_cond.setParseAction(lambda t: ExprCond(*t)) +expr_compose.setParseAction(lambda t: ExprCompose(*t)) +expr_op.setParseAction(lambda t: ExprOp(*t)) +expr_aff.setParseAction(lambda t: ExprAff(*t)) def str_to_expr(str_in): diff --git a/miasm2/ir/ir.py b/miasm2/ir/ir.py index 1c6895e0..0ea3a836 100644 --- a/miasm2/ir/ir.py +++ b/miasm2/ir/ir.py @@ -23,11 +23,10 @@ from itertools import chain import miasm2.expression.expression as m2_expr from miasm2.expression.expression_helper import get_missing_interval -from miasm2.core.asmblock import AsmSymbolPool, expr_is_label, AsmLabel, \ - AsmBlock +from miasm2.core.asmblock import AsmSymbolPool, AsmLabel, AsmBlock, \ + AsmConstraint, AsmBlockBad from miasm2.core.graph import DiGraph - class AssignBlock(object): """Represent parallel IR assignment, such as: EAX = EBX @@ -272,7 +271,8 @@ class IRBlock(object): @assignblks: list of AssignBlock """ - assert isinstance(label, AsmLabel) + #assert isinstance(label, AsmLabel) + assert isinstance(label, m2_expr.LocKey) self.label = label for assignblk in assignblks: assert isinstance(assignblk, AssignBlock) @@ -351,7 +351,7 @@ class IRBlock(object): def __str__(self): out = [] - out.append('%s' % self.label) + out.append('label_%s' % self.label.key) for assignblk in self: for dst, src in assignblk.iteritems(): out.append('\t%s = %s' % (dst, src)) @@ -396,24 +396,44 @@ class DiGraphIR(DiGraph): """DiGraph for IR instances""" - def __init__(self, blocks, *args, **kwargs): + def __init__(self, blocks, symbol_pool=None, *args, **kwargs): """Instanciate a DiGraphIR @blocks: IR blocks """ + self.symbol_pool = symbol_pool self._blocks = blocks super(DiGraphIR, self).__init__(*args, **kwargs) + def expr_loc_to_symb(self, expr): + if not expr.is_label(): + return expr + if self.symbol_pool is None: + name = str(expr) + else: + name = self.symbol_pool.loc_key_to_label(expr.loc_key).name + return m2_expr.ExprId(name, expr.size) + def node2lines(self, node): - yield self.DotCellDescription(text=str(node.name), - attr={'align': 'center', - 'colspan': 2, - 'bgcolor': 'grey'}) + if self.symbol_pool is None: + node_name = str(node) + else: + node_name = self.symbol_pool.loc_key_to_label(node) + yield self.DotCellDescription( + text="%s" % node_name, + attr={ + 'align': 'center', + 'colspan': 2, + 'bgcolor': 'grey', + } + ) if node not in self._blocks: yield [self.DotCellDescription(text="NOT PRESENT", attr={})] raise StopIteration for i, assignblk in enumerate(self._blocks[node]): for dst, src in assignblk.iteritems(): - line = "%s = %s" % (dst, src) + new_src = src.visit(self.expr_loc_to_symb) + new_dst = dst.visit(self.expr_loc_to_symb) + line = "%s = %s" % (new_dst, new_src) if self._dot_offset: yield [self.DotCellDescription(text="%-4d" % i, attr={}), self.DotCellDescription(text=line, attr={})] @@ -427,11 +447,10 @@ class DiGraphIR(DiGraph): src_irdst = self._blocks[src].dst edge_color = "blue" if isinstance(src_irdst, m2_expr.ExprCond): - if (expr_is_label(src_irdst.src1) and - src_irdst.src1.name == dst): + src1, src2 = src_irdst.src1, src_irdst.src2 + if src1.is_label(dst): edge_color = "limegreen" - elif (expr_is_label(src_irdst.src2) and - src_irdst.src2.name == dst): + elif src2.is_label(dst): edge_color = "red" return {"color": edge_color} @@ -490,11 +509,16 @@ class IntermediateRepresentation(object): """Transforms an ExprId/ExprInt/label/int into a label @addr: an ExprId/ExprInt/label/int""" - if (isinstance(addr, m2_expr.ExprId) and - isinstance(addr.name, AsmLabel)): - addr = addr.name if isinstance(addr, AsmLabel): return addr + if isinstance(addr, m2_expr.LocKey): + return self.symbol_pool.loc_key_to_label(addr) + elif isinstance(addr, m2_expr.ExprLoc): + label = self.symbol_pool.loc_key_to_label(addr.loc_key) + if label is None: + return None + else: + return label try: addr = int(addr) @@ -508,7 +532,9 @@ class IntermediateRepresentation(object): @addr: an ExprId/ExprInt/label/int""" label = self.get_label(addr) - return self.blocks.get(label, None) + if label is None: + return None + return self.blocks.get(label.loc_key, None) def get_bloc(self, addr): """ @@ -519,9 +545,11 @@ class IntermediateRepresentation(object): return self.get_block(addr) def add_instr(self, line, addr=0, gen_pc_updt=False): - block = AsmBlock(self.gen_label()) + lbl = self.gen_label() + block = AsmBlock(lbl) block.lines = [line] self.add_block(block, gen_pc_updt) + return lbl def getby_offset(self, offset): out = set() @@ -593,8 +621,12 @@ class IntermediateRepresentation(object): @gen_pc_updt: insert PC update effects between instructions """ - label = None + label = block.label ir_blocks_all = [] + + assert not isinstance(block, AsmBlockBad) + + assignments = [] for instr in block.lines: if label is None: assignments = [] @@ -602,11 +634,11 @@ class IntermediateRepresentation(object): split = self.add_instr_to_irblock(block, instr, assignments, ir_blocks_all, gen_pc_updt) if split: - ir_blocks_all.append(IRBlock(label, assignments)) + ir_blocks_all.append(IRBlock(label.loc_key, assignments)) label = None assignments = [] if label is not None: - ir_blocks_all.append(IRBlock(label, assignments)) + ir_blocks_all.append(IRBlock(label.loc_key, assignments)) new_ir_blocks_all = self.post_add_block(block, ir_blocks_all) for irblock in new_ir_blocks_all: @@ -645,12 +677,22 @@ class IntermediateRepresentation(object): continue next_lbl = block.get_next() if next_lbl is None: - dst = m2_expr.ExprId(self.get_next_label(block.lines[-1]), - self.pc.size) + lbl = None + if block.lines: + line = block.lines[-1] + if line.offset is not None: + lbl = self.symbol_pool.getby_offset_create(line.offset + line.l) + if lbl is None: + lbl = self.symbol_pool.gen_label() + block.add_cst(lbl, AsmConstraint.c_next, self.symbol_pool) else: - dst = m2_expr.ExprId(next_lbl, - self.pc.size) - assignblk = AssignBlock({self.IRDst: dst}, irblock[-1].instr) + lbl = next_lbl + dst = m2_expr.ExprLoc(lbl.loc_key, self.pc.size) + if irblock.assignblks: + instr = irblock.assignblks[-1].instr + else: + instr = None + assignblk = AssignBlock({self.IRDst: dst}, instr) ir_blocks[index] = IRBlock(irblock.label, list(irblock.assignblks) + [assignblk]) def post_add_block(self, block, ir_blocks): @@ -676,7 +718,6 @@ class IntermediateRepresentation(object): def get_instr_label(self, instr): """Returns the label associated to an instruction @instr: current instruction""" - return self.symbol_pool.getby_offset_create(instr.offset) def gen_label(self): @@ -684,6 +725,14 @@ class IntermediateRepresentation(object): label = self.symbol_pool.gen_label() return label + def gen_label_and_expr(self, size): + """ + Return a label and it's corresponding ExprLoc + @size: size of expression + """ + label = self.gen_label() + return label, m2_expr.ExprLoc(label.loc_key, size) + def get_next_label(self, instr): label = self.symbol_pool.getby_offset_create(instr.offset + instr.l) return label @@ -728,14 +777,14 @@ class IntermediateRepresentation(object): out = set() while todo: dst = todo.pop() - if expr_is_label(dst): + if dst.is_label(): done.add(dst) - elif isinstance(dst, (m2_expr.ExprMem, m2_expr.ExprInt)): + elif dst.is_mem() or dst.is_int(): done.add(dst) - elif isinstance(dst, m2_expr.ExprCond): + elif dst.is_cond(): todo.add(dst.src1) todo.add(dst.src2) - elif isinstance(dst, m2_expr.ExprId): + elif dst.is_id(): out.add(dst) else: done.add(dst) @@ -769,15 +818,16 @@ class IntermediateRepresentation(object): """ Gen irbloc digraph """ - self._graph = DiGraphIR(self.blocks) + self._graph = DiGraphIR(self.blocks, self.symbol_pool) for lbl, block in self.blocks.iteritems(): + assert isinstance(lbl, m2_expr.LocKey) self._graph.add_node(lbl) for dst in self.dst_trackback(block): if dst.is_int(): dst_lbl = self.symbol_pool.getby_offset_create(int(dst)) - dst = m2_expr.ExprId(dst_lbl, self.pc.size) - if expr_is_label(dst): - self._graph.add_edge(lbl, dst.name) + dst = m2_expr.ExprLoc(dst_lbl.loc_key, self.pc.size) + if dst.is_label(): + self._graph.add_edge(lbl, dst.loc_key) @property def graph(self): @@ -816,9 +866,9 @@ class IntermediateRepresentation(object): assert set(assignblk.keys()) == set([self.IRDst]) if len(self.graph.successors(block.label)) != 1: continue - if not expr_is_label(assignblk[self.IRDst]): + if not assignblk[self.IRDst].is_label(): continue - dst = assignblk[self.IRDst].name + dst = assignblk[self.IRDst].loc_key if dst == block.label: # Infinite loop block continue @@ -828,7 +878,7 @@ class IntermediateRepresentation(object): modified = False for label in jmp_blocks: block = self.blocks[label] - dst_label = block.dst.name + dst_loc_key = block.dst parents = self.graph.predecessors(block.label) for lbl in parents: parent = self.blocks.get(lbl, None) @@ -836,7 +886,7 @@ class IntermediateRepresentation(object): continue dst = parent.dst if dst.is_id(block.label): - dst = m2_expr.ExprId(dst_label, dst.size) + dst = m2_expr.ExprLoc(dst_loc_key, dst.size) self.graph.discard_edge(lbl, block.label) self.graph.discard_edge(block.label, dst_label) @@ -846,13 +896,13 @@ class IntermediateRepresentation(object): elif dst.is_cond(): src1, src2 = dst.src1, dst.src2 if src1.is_id(block.label): - dst = m2_expr.ExprCond(dst.cond, m2_expr.ExprId(dst_label, dst.size), dst.src2) + dst = m2_expr.ExprCond(dst.cond, m2_expr.ExprLoc(dst_label, dst.size), dst.src2) self.graph.discard_edge(lbl, block.label) self.graph.discard_edge(block.label, dst_label) self.graph.add_uniq_edge(lbl, dst_label) modified = True if src2.is_id(block.label): - dst = m2_expr.ExprCond(dst.cond, dst.src1, m2_expr.ExprId(dst_label, dst.size)) + dst = m2_expr.ExprCond(dst.cond, dst.src1, m2_expr.ExprLoc(dst_label, dst.size)) self.graph.discard_edge(lbl, block.label) self.graph.discard_edge(block.label, dst_label) self.graph.add_uniq_edge(lbl, dst_label) diff --git a/miasm2/ir/symbexec.py b/miasm2/ir/symbexec.py index 4070f261..d78298a3 100644 --- a/miasm2/ir/symbexec.py +++ b/miasm2/ir/symbexec.py @@ -2,8 +2,8 @@ import warnings import logging from collections import MutableMapping -from miasm2.expression.expression import ExprOp, ExprId, ExprInt, ExprMem, \ - ExprCompose, ExprSlice, ExprCond, ExprAff +from miasm2.expression.expression import ExprOp, ExprId, ExprLoc, ExprInt, \ + ExprMem, ExprCompose, ExprSlice, ExprCond, ExprAff from miasm2.expression.simplifications import expr_simp from miasm2.core import asmblock from miasm2.ir.ir import AssignBlock @@ -812,6 +812,7 @@ class SymbolicExecutionEngine(object): self.expr_to_visitor = { ExprInt: self.eval_exprint, ExprId: self.eval_exprid, + ExprLoc: self.eval_exprloc, ExprMem: self.eval_exprmem, ExprSlice: self.eval_exprslice, ExprCond: self.eval_exprcond, @@ -885,10 +886,16 @@ class SymbolicExecutionEngine(object): def eval_exprid(self, expr, **kwargs): """[DEV]: Evaluate an ExprId using the current state""" - if isinstance(expr.name, asmblock.AsmLabel) and expr.name.offset is not None: - ret = ExprInt(expr.name.offset, expr.size) + ret = self.symbols.read(expr) + return ret + + def eval_exprloc(self, expr, **kwargs): + """[DEV]: Evaluate an ExprLoc using the current state""" + label = self.ir_arch.symbol_pool.loc_key_to_label(expr.loc_key) + if label.offset is not None: + ret = ExprInt(label.offset, expr.size) else: - ret = self.symbols.read(expr) + ret = expr return ret def eval_exprmem(self, expr, **kwargs): @@ -1040,7 +1047,17 @@ class SymbolicExecutionEngine(object): self.dump(mems=False) self.dump(ids=False) print '_' * 80 - return self.eval_expr(self.ir_arch.IRDst) + dst = self.eval_expr(self.ir_arch.IRDst) + + # Best effort to resolve destination as ExprLoc + if dst.is_label(): + ret = dst + elif dst.is_int(): + label = self.ir_arch.symbol_pool.getby_offset_create(int(dst)) + ret = ExprLoc(label.loc_key, dst.size) + else: + ret = dst + return ret def run_block_at(self, addr, step=False): """ diff --git a/miasm2/ir/translators/C.py b/miasm2/ir/translators/C.py index 226f26f1..a8e3a254 100644 --- a/miasm2/ir/translators/C.py +++ b/miasm2/ir/translators/C.py @@ -18,6 +18,14 @@ class TranslatorC(Translator): '>>>': 'rot_right', } + def __init__(self, symbol_pool=None, **kwargs): + """Instance a C translator + @symbol_pool: AsmSymbolPool instance + """ + super(TranslatorC, self).__init__(**kwargs) + # symbol pool + self.symbol_pool = symbol_pool + def _size2mask(self, size): """Return a C string corresponding to the size2mask operation, with support for @size <= 128""" @@ -44,16 +52,31 @@ class TranslatorC(Translator): ) return "0x%x" % expr.arg.arg + def from_ExprLoc(self, expr): + loc_key = expr.loc_key + if self.symbol_pool is None: + return str(loc_key) + offset = self.symbol_pool.loc_key_to_offset(loc_key) + name = self.symbol_pool.loc_key_to_name(loc_key) + + if offset is None: + return name + return "0x%x" % offset + def from_ExprAff(self, expr): - return "%s = %s" % tuple(map(self.from_expr, (expr.dst, expr.src))) + new_dst = self.from_expr(expr.dst) + new_src = self.from_expr(expr.src) + return "%s = %s" % (new_dst, new_src) def from_ExprCond(self, expr): - return "(%s?%s:%s)" % tuple(map(self.from_expr, - (expr.cond, expr.src1, expr.src2))) + new_cond = self.from_expr(expr.cond) + new_src1 = self.from_expr(expr.src1) + new_src2 = self.from_expr(expr.src2) + return "(%s?%s:%s)" % (new_cond, new_src1, new_src2) def from_ExprMem(self, expr): - return "MEM_LOOKUP_%.2d(jitcpu, %s)" % (expr.size, - self.from_expr(expr.arg)) + new_ptr = self.from_expr(expr.arg) + return "MEM_LOOKUP_%.2d(jitcpu, %s)" % (expr.size, new_ptr) def from_ExprOp(self, expr): if len(expr.args) == 1: @@ -63,9 +86,11 @@ class TranslatorC(Translator): self._size2mask(expr.args[0].size), ) elif expr.op in ['cntleadzeros', 'cnttrailzeros']: - return "%s(0x%x, %s)" % (expr.op, - expr.args[0].size, - self.from_expr(expr.args[0])) + return "%s(0x%x, %s)" % ( + expr.op, + expr.args[0].size, + self.from_expr(expr.args[0]) + ) elif expr.op == '!': return "(~ %s)&%s" % ( self.from_expr(expr.args[0]), @@ -78,7 +103,10 @@ class TranslatorC(Translator): expr.op.startswith("fxam_c") or expr.op in ["-", "ftan", "frndint", "f2xm1", "fsin", "fsqrt", "fabs", "fcos", "fchs"]): - return "%s(%s)" % (expr.op, self.from_expr(expr.args[0])) + return "%s(%s)" % ( + expr.op, + self.from_expr(expr.args[0]) + ) else: raise NotImplementedError('Unknown op: %r' % expr.op) @@ -91,10 +119,12 @@ class TranslatorC(Translator): self._size2mask(expr.args[1].size), ) elif expr.op in self.dct_shift: - return 'SHIFT_%s(%d, %s, %s)' % (self.dct_shift[expr.op].upper(), - expr.args[0].size, - self.from_expr(expr.args[0]), - self.from_expr(expr.args[1])) + return 'SHIFT_%s(%d, %s, %s)' % ( + self.dct_shift[expr.op].upper(), + expr.args[0].size, + self.from_expr(expr.args[0]), + self.from_expr(expr.args[1]) + ) elif expr.is_associative() or expr.op in ["%", "/"]: oper = ['(%s&%s)' % ( self.from_expr(arg), @@ -105,9 +135,11 @@ class TranslatorC(Translator): return "((%s)&%s)" % (oper, self._size2mask(expr.args[0].size)) elif expr.op in ['-']: return '(((%s&%s) %s (%s&%s))&%s)' % ( - self.from_expr(expr.args[0]), self._size2mask(expr.args[0].size), + self.from_expr(expr.args[0]), + self._size2mask(expr.args[0].size), str(expr.op), - self.from_expr(expr.args[1]), self._size2mask(expr.args[1].size), + self.from_expr(expr.args[1]), + self._size2mask(expr.args[1].size), self._size2mask(expr.args[0].size) ) elif expr.op in self.dct_rot: @@ -125,21 +157,29 @@ class TranslatorC(Translator): elif (expr.op.startswith("fcom") or expr.op in ["fadd", "fsub", "fdiv", 'fmul', "fscale", "fprem", "fprem_lsb", "fyl2x", "fpatan"]): - return "fpu_%s(%s, %s)" % (expr.op, - self.from_expr(expr.args[0]), - self.from_expr(expr.args[1])) + return "fpu_%s(%s, %s)" % ( + expr.op, + self.from_expr(expr.args[0]), + self.from_expr(expr.args[1]) + ) elif expr.op == "segm": return "segm2addr(jitcpu, %s, %s)" % ( - self.from_expr(expr.args[0]), self.from_expr(expr.args[1])) + self.from_expr(expr.args[0]), + self.from_expr(expr.args[1]) + ) elif expr.op in ['udiv', 'umod', 'idiv', 'imod']: - return '%s%d(%s, %s)' % (expr.op, - expr.args[0].size, - self.from_expr(expr.args[0]), - self.from_expr(expr.args[1])) + return '%s%d(%s, %s)' % ( + expr.op, + expr.args[0].size, + self.from_expr(expr.args[0]), + self.from_expr(expr.args[1]) + ) elif expr.op in ["bcdadd", "bcdadd_cf"]: - return "%s_%d(%s, %s)" % (expr.op, expr.args[0].size, - self.from_expr(expr.args[0]), - self.from_expr(expr.args[1])) + return "%s_%d(%s, %s)" % ( + expr.op, expr.args[0].size, + self.from_expr(expr.args[0]), + self.from_expr(expr.args[1]) + ) else: raise NotImplementedError('Unknown op: %r' % expr.op) @@ -159,9 +199,11 @@ class TranslatorC(Translator): def from_ExprSlice(self, expr): # XXX check mask for 64 bit & 32 bit compat - return "((%s>>%d) &%s)" % (self.from_expr(expr.arg), - expr.start, - self._size2mask(expr.stop - expr.start)) + return "((%s>>%d) &%s)" % ( + self.from_expr(expr.arg), + expr.start, + self._size2mask(expr.stop - expr.start) + ) def from_ExprCompose(self, expr): out = [] @@ -178,10 +220,12 @@ class TranslatorC(Translator): dst_cast = "uint%d_t" % size for index, arg in expr.iter_args(): - out.append("(((%s)(%s & %s)) << %d)" % (dst_cast, - self.from_expr(arg), - self._size2mask(arg.size), - index)) + out.append("(((%s)(%s & %s)) << %d)" % ( + dst_cast, + self.from_expr(arg), + self._size2mask(arg.size), + index) + ) out = ' | '.join(out) return '(' + out + ')' diff --git a/miasm2/ir/translators/python.py b/miasm2/ir/translators/python.py index d7369e9e..e05f5e4d 100644 --- a/miasm2/ir/translators/python.py +++ b/miasm2/ir/translators/python.py @@ -20,6 +20,9 @@ class TranslatorPython(Translator): def from_ExprId(self, expr): return str(expr) + def from_ExprLoc(self, expr): + return str(expr) + def from_ExprMem(self, expr): return "memory(%s, 0x%x)" % (self.from_expr(expr.arg), expr.size / 8) diff --git a/miasm2/ir/translators/smt2.py b/miasm2/ir/translators/smt2.py index 18bcb9bd..6a6fec16 100644 --- a/miasm2/ir/translators/smt2.py +++ b/miasm2/ir/translators/smt2.py @@ -120,7 +120,7 @@ class TranslatorSMT2(Translator): # Implemented language __LANG__ = "smt2" - def __init__(self, endianness="<", **kwargs): + def __init__(self, endianness="<", symbol_pool=None, **kwargs): """Instance a SMT2 translator @endianness: (optional) memory endianness """ @@ -129,6 +129,8 @@ class TranslatorSMT2(Translator): self._mem = SMT2Mem(endianness) # map of translated bit vectors self._bitvectors = dict() + # symbol pool + self.symbol_pool = symbol_pool def from_ExprInt(self, expr): return bit_vec_val(expr.arg.arg, expr.size) @@ -148,6 +150,23 @@ class TranslatorSMT2(Translator): self._bitvectors[str(expr)] = expr.size return str(expr) + def from_ExprLoc(self, expr): + loc_key = expr.loc_key + if self.symbol_pool is None: + if str(loc_key) not in self._bitvectors: + self._bitvectors[str(loc_key)] = expr.size + return str(loc_key) + + offset = self.symbol_pool.loc_key_to_offset(loc_key) + name = self.symbol_pool.loc_key_to_name(loc_key) + + if offset is None: + return bit_vec_val(str(offset), expr.size) + name = "|{}|".format(str(name)) + if name not in self._bitvectors: + self._bitvectors[name] = expr.size + return name + def from_ExprMem(self, expr): addr = self.from_expr(expr.arg) # size to read from memory diff --git a/miasm2/ir/translators/translator.py b/miasm2/ir/translators/translator.py index e3641843..557fdabe 100644 --- a/miasm2/ir/translators/translator.py +++ b/miasm2/ir/translators/translator.py @@ -53,6 +53,12 @@ class Translator(object): """ raise NotImplementedError("Abstract method") + def from_ExprLoc(self, expr): + """Translate an ExprLoc + @expr: ExprLoc to translate + """ + raise NotImplementedError("Abstract method") + def from_ExprCompose(self, expr): """Translate an ExprCompose @expr: ExprCompose to translate @@ -100,6 +106,7 @@ class Translator(object): # Handle Expr type handlers = {m2_expr.ExprInt: self.from_ExprInt, m2_expr.ExprId: self.from_ExprId, + m2_expr.ExprLoc: self.from_ExprLoc, m2_expr.ExprCompose: self.from_ExprCompose, m2_expr.ExprSlice: self.from_ExprSlice, m2_expr.ExprOp: self.from_ExprOp, diff --git a/miasm2/ir/translators/z3_ir.py b/miasm2/ir/translators/z3_ir.py index 536daff1..544dd26f 100644 --- a/miasm2/ir/translators/z3_ir.py +++ b/miasm2/ir/translators/z3_ir.py @@ -116,7 +116,7 @@ class TranslatorZ3(Translator): # Operations translation trivial_ops = ["+", "-", "/", "%", "&", "^", "|", "*", "<<"] - def __init__(self, endianness="<", **kwargs): + def __init__(self, endianness="<", symbol_pool=None, **kwargs): """Instance a Z3 translator @endianness: (optional) memory endianness """ @@ -126,6 +126,8 @@ class TranslatorZ3(Translator): super(TranslatorZ3, self).__init__(**kwargs) self._mem = Z3Mem(endianness) + # symbol pool + self.symbol_pool = symbol_pool def from_ExprInt(self, expr): return z3.BitVecVal(expr.arg.arg, expr.size) @@ -136,6 +138,18 @@ class TranslatorZ3(Translator): else: return z3.BitVec(str(expr), expr.size) + def from_ExprLoc(self, expr): + if self.symbol_pool is None: + # No symbol_pool, fallback to default name + return z3.BitVec(str(expr), expr.size) + label = self.symbol_pool.loc_key_to_label(expr.loc_key) + if label is None: + # No symbol_pool, fallback to default name + return z3.BitVec(str(expr), expr.size) + elif label.offset is None: + return z3.BitVec(label.name, expr.size) + return z3.BitVecVal(label.offset, expr.size) + def from_ExprMem(self, expr): addr = self.from_expr(expr.arg) return self._mem.get(addr, expr.size) diff --git a/miasm2/jitter/codegen.py b/miasm2/jitter/codegen.py index b10a9257..519664e3 100644 --- a/miasm2/jitter/codegen.py +++ b/miasm2/jitter/codegen.py @@ -4,16 +4,17 @@ Module to generate C code for a given native @block import miasm2.expression.expression as m2_expr from miasm2.ir.ir import IRBlock, AssignBlock -from miasm2.ir.translators import Translator -from miasm2.core.asmblock import expr_is_label, AsmBlockBad, AsmLabel -# Miasm to C translator -TRANSLATOR = Translator.to_language("C") +from miasm2.ir.translators.C import TranslatorC +from miasm2.core.asmblock import AsmBlockBad -SIZE_TO_MASK = {size: TRANSLATOR.from_expr(m2_expr.ExprInt(0, size).mask) +TRANSLATOR_NO_SYMBOL = TranslatorC(symbol_pool=None) + +SIZE_TO_MASK = {size: TRANSLATOR_NO_SYMBOL.from_expr(ExprInt(0, size).mask) for size in (1, 2, 3, 7, 8, 16, 32, 64, 128)} + class Attributes(object): """ @@ -100,6 +101,7 @@ class CGen(object): def __init__(self, ir_arch): self.ir_arch = ir_arch self.PC = self.ir_arch.pc + self.translator = TranslatorC(self.ir_arch.symbol_pool) self.init_arch_C() def init_arch_C(self): @@ -128,7 +130,7 @@ class CGen(object): def id_to_c(self, expr): """Translate Expr @expr into corresponding C code""" - return TRANSLATOR.from_expr(self.patch_c_id(expr)) + return self.translator.from_expr(self.patch_c_id(expr)) def add_label_index(self, dst2index, lbl): """Insert @lbl to the dictionnary @dst2index with a uniq value @@ -149,7 +151,7 @@ class CGen(object): dst = m2_expr.ExprInt(offset, self.ir_arch.IRDst.size) new_assignblk[self.ir_arch.IRDst] = dst irs = [AssignBlock(new_assignblk, instr)] - return IRBlock(self.ir_arch.get_instr_label(instr), irs) + return IRBlock(self.ir_arch.get_instr_label(instr).loc_key, irs) def block2assignblks(self, block): """ @@ -292,8 +294,8 @@ class CGen(object): offset = int(expr) self.add_label_index(dst2index, offset) return ("%s" % dst2index[offset], hex(offset)) - if expr_is_label(expr): - label = expr.name + if expr.is_label(): + label = self.ir_arch.symbol_pool.loc_key_to_label(expr.loc_key) if label.offset != None: offset = label.offset self.add_label_index(dst2index, offset) @@ -577,12 +579,13 @@ class CGen(object): for index, irblock in enumerate(irblocks): new_irblock = self.ir_arch.irbloc_fix_regs_for_mode(irblock, self.ir_arch.attrib) - if new_irblock.label.offset is None: + label = self.ir_arch.symbol_pool.loc_key_to_label(new_irblock.label) + if label.offset is None: out.append("%-40s // %.16X %s" % - (str(new_irblock.label.name) + ":", instr.offset, instr)) + (str(label.name) + ":", instr.offset, instr)) else: out.append("%-40s // %.16X %s" % - (self.label_to_jitlabel(new_irblock.label) + ":", instr.offset, instr)) + (self.label_to_jitlabel(label) + ":", instr.offset, instr)) if index == 0: out += self.gen_pre_code(instr_attrib) out += self.gen_irblock(instr_attrib, irblocks_attributes[index], instr_offsets, new_irblock) diff --git a/miasm2/jitter/jitcore_llvm.py b/miasm2/jitter/jitcore_llvm.py index 452b6d84..a8d30f46 100644 --- a/miasm2/jitter/jitcore_llvm.py +++ b/miasm2/jitter/jitcore_llvm.py @@ -78,12 +78,13 @@ class JitCore_LLVM(jitcore.JitCore): """Add a block to JiT and JiT it. @block: the block to add """ + block_hash = self.hash_block(block) fname_out = os.path.join(self.tempdir, "%s.bc" % block_hash) if not os.access(fname_out, os.R_OK): # Build a function in the context - func = LLVMFunction(self.context, LLVMFunction.canonize_label_name(block.label)) + func = LLVMFunction(self.context, block.label) # Set log level func.log_regs = self.log_regs @@ -114,7 +115,7 @@ class JitCore_LLVM(jitcore.JitCore): else: # The cache file exists: function can be loaded from cache - ptr = self.context.get_ptr_from_cache(fname_out, LLVMFunction.canonize_label_name(block.label)) + ptr = self.context.get_ptr_from_cache(fname_out, block.label) # Store a pointer on the function jitted code self.lbl2jitbloc[block.label.offset] = ptr diff --git a/miasm2/jitter/jitcore_python.py b/miasm2/jitter/jitcore_python.py index 799848ab..af9f09e6 100644 --- a/miasm2/jitter/jitcore_python.py +++ b/miasm2/jitter/jitcore_python.py @@ -48,7 +48,7 @@ class JitCore_Python(jitcore.JitCore): vmmngr = cpu.vmmngr # Keep current location in irblocks - cur_label = label + cur_label = label.loc_key # Required to detect new instructions offsets_jitted = set() @@ -57,6 +57,7 @@ class JitCore_Python(jitcore.JitCore): exec_engine = self.symbexec expr_simp = exec_engine.expr_simp + known_loc_keys = set(irb.label for irb in irblocks) # For each irbloc inside irblocks while True: @@ -64,6 +65,7 @@ class JitCore_Python(jitcore.JitCore): for irb in irblocks: if irb.label == cur_label: break + else: raise RuntimeError("Irblocks must end with returning an " "ExprInt instance") @@ -75,7 +77,7 @@ class JitCore_Python(jitcore.JitCore): for assignblk in irb: instr = assignblk.instr # For each new instruction (in assembly) - if instr.offset not in offsets_jitted: + if instr is not None and instr.offset not in offsets_jitted: # Test exceptions vmmngr.check_invalid_code_blocs() vmmngr.check_memory_breakpoint() @@ -120,8 +122,10 @@ class JitCore_Python(jitcore.JitCore): # Manage resulting address if isinstance(ad, m2_expr.ExprInt): return ad.arg.arg - elif isinstance(ad, m2_expr.ExprId): - cur_label = ad.name + elif isinstance(ad, m2_expr.ExprLoc): + cur_label = ad.loc_key + if cur_label not in known_loc_keys: + return cur_label else: raise NotImplementedError("Type not handled: %s" % ad) diff --git a/miasm2/jitter/jitload.py b/miasm2/jitter/jitload.py index db486b4f..28200997 100644 --- a/miasm2/jitter/jitload.py +++ b/miasm2/jitter/jitload.py @@ -313,7 +313,6 @@ class jitter(object): Check exceptions before breakpoints.""" self.pc = pc - # Callback called before exec if self.exec_cb is not None: res = self.exec_cb(self) diff --git a/miasm2/jitter/llvmconvert.py b/miasm2/jitter/llvmconvert.py index dd3afed6..3f7d0c6d 100644 --- a/miasm2/jitter/llvmconvert.py +++ b/miasm2/jitter/llvmconvert.py @@ -69,6 +69,26 @@ class LLVMContext(): self.target_machine = target.create_target_machine() self.init_exec_engine() + + def canonize_label_name(self, label): + """Canonize @label names to a common form. + @label: str or asmlabel instance""" + if isinstance(label, str): + return label + if isinstance(label, m2_expr.Expr) and expr.is_label(): + label = self.llvm_context.ir_arch.symbol_pool.loc_key_to_label(label.index) + if isinstance(label, (int, long)): + fds + label = self.llvm_context.ir_arch.symbol_pool.loc_key_to_label(label) + + if isinstance(label, m2_asmblock.AsmLabel): + if label.offset is None: + return "label_%s" % label.name + else: + return "label_%X" % label.offset + else: + raise ValueError("label must either be str or asmlabel") + def optimise_level(self, level=2): """Set the optimisation level to @level from 0 to 2 0: non-optimized @@ -324,6 +344,7 @@ class LLVMContext_JIT(LLVMContext): def get_ptr_from_cache(self, file_name, func_name): "Load @file_name and return a pointer on the jitter @func_name" # We use an empty module to avoid loosing time on function building + func_name = self.canonize_label_name(func_name) empty_module = llvm.parse_assembly("") empty_module.fname_out = file_name @@ -379,6 +400,7 @@ class LLVMFunction(): def __init__(self, llvm_context, name="fc", new_module=True): "Create a new function with name @name" + name = self.canonize_label_name(name) self.llvm_context = llvm_context if new_module: self.llvm_context.new_module() @@ -483,14 +505,16 @@ class LLVMFunction(): var_casted = var self.builder.ret(var_casted) - @staticmethod - def canonize_label_name(label): + def canonize_label_name(self, label): """Canonize @label names to a common form. @label: str or asmlabel instance""" if isinstance(label, str): return label - if m2_asmblock.expr_is_label(label): - label = label.name + if isinstance(label, m2_expr.Expr) and expr.is_label(): + label = self.llvm_context.ir_arch.symbol_pool.loc_key_to_label(label.index) + if isinstance(label, m2_expr.LocKey): + label = self.llvm_context.ir_arch.symbol_pool.loc_key_to_label(label) + if isinstance(label, m2_asmblock.AsmLabel): if label.offset is None: return "label_%s" % label.name @@ -629,15 +653,15 @@ class LLVMFunction(): self.update_cache(expr, ret) return ret + if expr.is_label(): + label = self.llvm_context.ir_arch.symbol_pool.loc_key_to_label(expr.loc_key) + offset = label.offset + ret = llvm_ir.Constant(LLVMType.IntType(expr.size), offset) + self.update_cache(expr, ret) + return ret + if isinstance(expr, m2_expr.ExprId): name = expr.name - if not isinstance(name, str): - # Resolve label - offset = name.offset - ret = llvm_ir.Constant(LLVMType.IntType(expr.size), offset) - self.update_cache(expr, ret) - return ret - try: # If expr.name is already known (args) return self.local_vars[name] @@ -1078,7 +1102,7 @@ class LLVMFunction(): index = dst2case.get(value, i) to_eval = to_eval.replace_expr({value: m2_expr.ExprInt(index, value.size)}) dst2case[value] = index - if m2_asmblock.expr_is_int_or_label(value): + if value.is_int() or value.is_label(): case2dst[i] = value else: case2dst[i] = self.add_ir(value) @@ -1105,12 +1129,13 @@ class LLVMFunction(): self.main_stream = False if isinstance(dst, m2_expr.ExprInt): - dst = m2_expr.ExprId(self.llvm_context.ir_arch.symbol_pool.getby_offset_create(int(dst)), - dst.size) + label = self.llvm_context.ir_arch.symbol_pool.getby_offset_create(int(dst)) + dst = m2_expr.ExprLoc(label.loc_key, dst.size) - if m2_asmblock.expr_is_label(dst): - bbl = self.get_basic_bloc_by_label(dst) - offset = dst.name.offset + if isinstance(dst, m2_expr.ExprLoc): + label = self.llvm_context.ir_arch.symbol_pool.loc_key_to_label(dst.loc_key) + bbl = self.get_basic_bloc_by_label(label) + offset = label.offset if bbl is not None: # "local" jump, inside this function if offset is None: diff --git a/miasm2/jitter/loader/pe.py b/miasm2/jitter/loader/pe.py index 2fe4cd3f..ea6c2c98 100644 --- a/miasm2/jitter/loader/pe.py +++ b/miasm2/jitter/loader/pe.py @@ -162,8 +162,7 @@ def vm_load_pe(vm, fdata, align_s=True, load_hdr=True, name="", **kargs): new_size = pe.SHList[i + 1].addr - section.addr section.size = new_size section.rawsize = new_size - section.data = strpatchwork.StrPatchwork( - section.data[:new_size]) + section.data = section.data[:new_size] section.offset = section.addr # Last section alignement |