diff options
| author | Fabrice Desclaux <fabrice.desclaux@cea.fr> | 2017-12-11 14:26:23 +0100 |
|---|---|---|
| committer | Fabrice Desclaux <fabrice.desclaux@cea.fr> | 2018-06-08 17:35:05 +0200 |
| commit | a2637cdf0b40df074865d23a7fd71f082ad7f40a (patch) | |
| tree | f6c958ca8481e6e29760078e5d1bdc2d2b64da53 | |
| parent | dadfaabc3fff5edb9bf4ef7e7e8c4cfc4baccb94 (diff) | |
| download | focaccia-miasm-a2637cdf0b40df074865d23a7fd71f082ad7f40a.tar.gz focaccia-miasm-a2637cdf0b40df074865d23a7fd71f082ad7f40a.zip | |
Expr: Add new word ExprLoc
This word represents a location in the binary. Thus, the hack of ExprId containing an AsmLabel ends here.
53 files changed, 1259 insertions, 806 deletions
diff --git a/example/disasm/callback.py b/example/disasm/callback.py index a9bef20b..6b7b2b81 100644 --- a/example/disasm/callback.py +++ b/example/disasm/callback.py @@ -1,5 +1,5 @@ from miasm2.core.bin_stream import bin_stream_str -from miasm2.core.asmblock import AsmLabel, AsmConstraint, expr_is_label +from miasm2.core.asmblock import AsmLabel, AsmConstraint from miasm2.arch.x86.disasm import dis_x86_32, cb_x86_funcs @@ -23,10 +23,12 @@ def cb_x86_callpop(cur_bloc, symbol_pool, *args, **kwargs): return ## The destination must be a label dst = last_instr.args[0] - if not expr_is_label(dst): + if not dst.is_label(): return + + label = symbol_pool.loc_key_to_label(dst.loc_key) ## The destination must be the next instruction - if dst.name.offset != last_instr.offset + last_instr.l: + if label.offset != last_instr.offset + last_instr.l: return # Update instruction instance @@ -34,7 +36,7 @@ def cb_x86_callpop(cur_bloc, symbol_pool, *args, **kwargs): # Update next blocks to process in the disassembly engine cur_bloc.bto.clear() - cur_bloc.add_cst(dst.name.offset, AsmConstraint.c_next, symbol_pool) + cur_bloc.add_cst(label.offset, AsmConstraint.c_next, symbol_pool) # Prepare a tiny shellcode diff --git a/example/disasm/full.py b/example/disasm/full.py index 84c856e1..e693a687 100644 --- a/example/disasm/full.py +++ b/example/disasm/full.py @@ -155,7 +155,7 @@ while not finish and todo: # Generate dotty graph -all_blocks = AsmCFG() +all_blocks = AsmCFG(mdis.symbol_pool) for blocks in all_funcs_blocks.values(): all_blocks += blocks diff --git a/example/expression/graph_dataflow.py b/example/expression/graph_dataflow.py index 26fdd2ec..dd7e37a1 100644 --- a/example/expression/graph_dataflow.py +++ b/example/expression/graph_dataflow.py @@ -24,7 +24,7 @@ def node_x_2_id(n, x): def get_node_name(label, i, n): - n_name = (label.name, i, n) + n_name = (label, i, n) return n_name @@ -93,9 +93,11 @@ def gen_block_data_flow_graph(ir_arch, ad, block_flow_cb): dead_simp(ir_arch) + irblock_0 = None for irblock in ir_arch.blocks.values(): - if irblock.label.offset == ad: + label = ir_arch.symbol_pool.loc_key_to_label(irblock.label) + if label.offset == ad: irblock_0 = irblock break assert(irblock_0 is not None) @@ -144,8 +146,6 @@ for block in blocks: ir_arch.add_block(block) for irblock in ir_arch.blocks.values(): print irblock - if irblock.label.offset != 0: - continue if args.symb: diff --git a/example/expression/solve_condition_stp.py b/example/expression/solve_condition_stp.py index 201d9f26..76dff96c 100644 --- a/example/expression/solve_condition_stp.py +++ b/example/expression/solve_condition_stp.py @@ -5,9 +5,8 @@ from pdb import pm from miasm2.analysis.machine import Machine from miasm2.expression.expression import ExprInt, ExprCond, ExprId, \ - get_expr_ids, ExprAff + get_expr_ids, ExprAff, ExprLoc from miasm2.core.bin_stream import bin_stream_str -from miasm2.core import asmblock from miasm2.ir.symbexec import SymbolicExecutionEngine, get_block from miasm2.expression.simplifications import expr_simp from miasm2.core import parse_asm @@ -55,8 +54,8 @@ def emul_symb(ir_arch, mdis, states_todo, states_done): cond_group_b = {addr.cond: ExprInt(1, addr.cond.size)} addr_a = expr_simp(symbexec.eval_expr(addr.replace_expr(cond_group_a), {})) addr_b = expr_simp(symbexec.eval_expr(addr.replace_expr(cond_group_b), {})) - if not (addr_a.is_int() or asmblock.expr_is_label(addr_a) and - addr_b.is_int() or asmblock.expr_is_label(addr_b)): + if not (addr_a.is_int() or addr_a.is_label() and + addr_b.is_int() or addr_b.is_label()): print str(addr_a), str(addr_b) raise ValueError("Unsupported condition") if isinstance(addr_a, ExprInt): @@ -68,11 +67,11 @@ def emul_symb(ir_arch, mdis, states_todo, states_done): elif addr == ret_addr: print 'Return address reached' continue - elif isinstance(addr, ExprInt): + elif addr.is_int(): addr = int(addr.arg) states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) - elif asmblock.expr_is_label(addr): - addr = addr.name + elif addr.is_label(): + addr = ir_arch.symbol_pool.loc_key_to_label(addr.loc_key) states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) else: raise ValueError("Unsupported destination") @@ -95,6 +94,7 @@ if __name__ == '__main__': symbexec = SymbolicExecutionEngine(ir_arch, symbols_init) blocks, symbol_pool = parse_asm.parse_txt(machine.mn, 32, ''' + init: PUSH argv PUSH argc PUSH ret_addr @@ -105,22 +105,20 @@ if __name__ == '__main__': argc_lbl = symbol_pool.getby_name('argc') argv_lbl = symbol_pool.getby_name('argv') ret_addr_lbl = symbol_pool.getby_name('ret_addr') + init_lbl = symbol_pool.getby_name('init') - argc = ExprId(argc_lbl, 32) - argv = ExprId(argv_lbl, 32) - ret_addr = ExprId(ret_addr_lbl, 32) + argc = ExprLoc(argc_lbl.loc_key, 32) + argv = ExprLoc(argv_lbl.loc_key, 32) + ret_addr = ExprLoc(ret_addr_lbl.loc_key, 32) - b = list(blocks)[0] - print b + block = list(blocks)[0] + print block # add fake address and len to parsed instructions - for i, line in enumerate(b.lines): - line.offset, line.l = i, 1 - ir_arch.add_block(b) - irb = get_block(ir_arch, mdis, 0) + ir_arch.add_block(block) + irb = ir_arch.blocks[init_lbl.loc_key] symbexec.eval_updt_irblock(irb) symbexec.dump(ids=False) - # reset ir_arch blocks ir_arch.blocks = {} diff --git a/example/ida/graph_ir.py b/example/ida/graph_ir.py index 6dfa1f7d..fad793ff 100644 --- a/example/ida/graph_ir.py +++ b/example/ida/graph_ir.py @@ -6,7 +6,7 @@ import idc import idautils from miasm2.core.bin_stream_ida import bin_stream_ida -from miasm2.core.asmblock import expr_is_label, AsmLabel, is_int +from miasm2.core.asmblock import AsmLabel, is_int from miasm2.expression.simplifications import expr_simp from miasm2.analysis.data_flow import dead_simp from miasm2.ir.ir import AssignBlock, IRBlock @@ -74,13 +74,11 @@ class GraphMiasmIR(idaapi.GraphViewer): continue all_dst = self.ir_arch.dst_trackback(irblock) for dst in all_dst: - if not expr_is_label(dst): + if not dst.is_label(): continue - - dst = dst.name - if not dst in self.ir_arch.blocks: + if not dst.loc_key in self.ir_arch.blocks: continue - dst_block = self.ir_arch.blocks[dst] + dst_block = self.ir_arch.blocks[dst.loc_key] node1 = addr_id[irblock] node2 = addr_id[dst_block] self.AddEdge(node1, node2) diff --git a/miasm2/analysis/depgraph.py b/miasm2/analysis/depgraph.py index f7949c88..49368508 100644 --- a/miasm2/analysis/depgraph.py +++ b/miasm2/analysis/depgraph.py @@ -1,8 +1,7 @@ """Provide dependency graph""" -import miasm2.expression.expression as m2_expr +from miasm2.expression.expression import ExprInt, ExprLoc, ExprAff from miasm2.core.graph import DiGraph -from miasm2.core.asmblock import AsmLabel, expr_is_int_or_label, expr_is_label from miasm2.expression.simplifications import expr_simp from miasm2.ir.symbexec import SymbolicExecutionEngine from miasm2.ir.ir import IRBlock, AssignBlock @@ -61,7 +60,7 @@ class DependencyNode(object): def __str__(self): """Returns a string representation of DependencyNode""" return "<%s %s %s %s>" % (self.__class__.__name__, - self.label.name, self.element, + self.label, self.element, self.line_nb) def __repr__(self): @@ -297,9 +296,10 @@ class DependencyResult(DependencyState): line_nb).assignblks # Eval the block - temp_label = AsmLabel("Temp") + symbol_pool = AsmSymbolPool() + temp_label = symbol_pool.getby_name_create("Temp") symb_exec = SymbolicExecutionEngine(self._ira, ctx_init) - symb_exec.eval_updt_irblock(IRBlock(temp_label, assignblks), step=step) + symb_exec.eval_updt_irblock(IRBlock(temp_label.loc_key, assignblks), step=step) # Return only inputs values (others could be wrongs) return {element: symb_exec.symbols[element] @@ -314,30 +314,31 @@ class DependencyResultImplicit(DependencyResult): # Z3 Solver instance _solver = None - unsat_expr = m2_expr.ExprAff(m2_expr.ExprInt(0, 1), - m2_expr.ExprInt(1, 1)) + unsat_expr = ExprAff(ExprInt(0, 1), ExprInt(1, 1)) def _gen_path_constraints(self, translator, expr, expected): """Generate path constraint from @expr. Handle special case with generated labels """ out = [] - expected_is_label = expr_is_label(expected) + expected = self._ira.symbol_pool.canonize_to_exprloc(expected) + expected_is_label = expected.is_label() for consval in possible_values(expr): - if (expected_is_label and - consval.value != expected): + value = self._ira.symbol_pool.canonize_to_exprloc(consval.value) + if expected_is_label and value != expected: continue - if (not expected_is_label and - expr_is_label(consval.value)): + if not expected_is_label and value.is_label(): continue conds = z3.And(*[translator.from_expr(cond.to_constraint()) for cond in consval.constraints]) - if expected != consval.value: - conds = z3.And(conds, - translator.from_expr( - m2_expr.ExprAff(consval.value, - expected))) + if expected != value: + conds = z3.And( + conds, + translator.from_expr( + ExprAff(value, + expected)) + ) out.append(conds) if out: @@ -373,10 +374,8 @@ class DependencyResultImplicit(DependencyResult): # Add constraint if hist_nb < history_size: next_label = history[hist_nb] - expected = symb_exec.eval_expr(m2_expr.ExprId(next_label, - size)) - solver.add( - self._gen_path_constraints(translator, dst, expected)) + expected = symb_exec.eval_expr(ExprLoc(next_label, size)) + solver.add(self._gen_path_constraints(translator, dst, expected)) # Save the solver self._solver = solver @@ -491,11 +490,11 @@ class DependencyGraph(object): @follow: set of nodes to follow @nofollow: set of nodes not to follow """ - if isinstance(expr, m2_expr.ExprId): + if expr.is_id(): follow.add(expr) - elif isinstance(expr, m2_expr.ExprInt): + elif expr.is_int(): nofollow.add(expr) - elif isinstance(expr, m2_expr.ExprMem): + elif expr.is_mem(): follow.add(expr) return expr @@ -508,7 +507,7 @@ class DependencyGraph(object): @follow_mem: force the visit of memory sub expressions @follow_call: force the visit of call sub expressions """ - if not follow_mem and isinstance(expr, m2_expr.ExprMem): + if not follow_mem and expr.is_mem(): nofollow.add(expr) return False if not follow_call and expr.is_function_call(): @@ -534,8 +533,9 @@ class DependencyGraph(object): """Do not follow labels""" follow = set() for expr in exprs: - if not expr_is_int_or_label(expr): - follow.add(expr) + if expr.is_int() or expr.is_label(): + continue + follow.add(expr) return follow, set() diff --git a/miasm2/analysis/dse.py b/miasm2/analysis/dse.py index 427a8bd0..66caffc9 100644 --- a/miasm2/analysis/dse.py +++ b/miasm2/analysis/dse.py @@ -56,15 +56,14 @@ except ImportError: z3 = None from miasm2.expression.expression import ExprMem, ExprInt, ExprCompose, \ - ExprAff, ExprId + ExprAff, ExprId, ExprLoc, LocKey from miasm2.core.bin_stream import bin_stream_vm -from miasm2.core.asmblock import expr_is_label from miasm2.jitter.emulatedsymbexec import EmulatedSymbExec from miasm2.expression.expression_helper import possible_values from miasm2.ir.translators import Translator from miasm2.analysis.expression_range import expr_range from miasm2.analysis.modularintervals import ModularIntervals - +from miasm2.core.asmblock import AsmBlockBad DriftInfo = namedtuple("DriftInfo", ["symbol", "computed", "expected"]) @@ -72,7 +71,7 @@ class DriftException(Exception): """Raised when the emulation drift from the reference engine""" def __init__(self, info): - super(Exception, self).__init__() + super(DriftException, self).__init__() self.info = info def __str__(self): @@ -161,11 +160,14 @@ class DSEEngine(object): self.symb_concrete = None # Concrete SymbExec for path desambiguisation self.mdis = None # DisasmEngine + self.symbol_pool = self.ir_arch.symbol_pool + def prepare(self): """Prepare the environment for attachment with a jitter""" # Disassembler self.mdis = self.machine.dis_engine(bin_stream_vm(self.jitter.vm), - lines_wd=1) + lines_wd=1, + symbol_pool=self.symbol_pool) # Symbexec engine ## Prepare symbexec engines @@ -215,7 +217,7 @@ class DSEEngine(object): self.prepare() def handle(self, cur_addr): - """Handle destination + r"""Handle destination @cur_addr: Expr of the next address in concrete execution /!\ cur_addr may be a lbl_gen @@ -295,6 +297,9 @@ class DSEEngine(object): # Call callbacks associated to the current address cur_addr = self.jitter.pc + if isinstance(cur_addr, LocKey): + lbl = self.ir_arch.symbol_pool.loc_key_to_label(cur_addr) + cur_addr = lbl.offset if cur_addr in self.handler: self.handler[cur_addr](self) @@ -321,7 +326,8 @@ class DSEEngine(object): ## Update current state asm_block = self.mdis.dis_block(cur_addr) - self.ir_arch.add_block(asm_block) + if not isinstance(asm_block, AsmBlockBad): + self.ir_arch.add_block(asm_block) self.addr_to_cacheblocks[cur_addr] = dict(self.ir_arch.blocks) # Emulate the current instruction @@ -329,7 +335,7 @@ class DSEEngine(object): # Is the symbolic execution going (potentially) to jump on a lbl_gen? if len(self.ir_arch.blocks) == 1: - next_addr = self.symb.run_at(cur_addr) + self.symb.run_at(cur_addr) else: # Emulation could stuck in generated IR blocks # But concrete execution callback is not enough precise to obtain @@ -339,18 +345,25 @@ class DSEEngine(object): # Update the concrete execution self._update_state_from_concrete_symb(self.symb_concrete) while True: + next_addr_concrete = self.symb_concrete.run_block_at(cur_addr) self.symb.run_block_at(cur_addr) - if not(expr_is_label(next_addr_concrete) and - next_addr_concrete.name.offset is None): + if not (isinstance(next_addr_concrete, ExprLoc) and + self.ir_arch.symbol_pool.loc_key_to_offset( + next_addr_concrete + ) is None): # Not a lbl_gen, exit break + if self.symb.ir_arch.get_block(cur_addr) is None: + break + # Call handle with lbl_gen state self.handle(next_addr_concrete) cur_addr = next_addr_concrete + # At this stage, symbolic engine is one instruction after the concrete # engine @@ -428,7 +441,7 @@ class DSEEngine(object): symbexec.symbols[reg] = value def update_state_from_concrete(self, cpu=True, mem=False): - """Update the symbolic state with concrete values from the concrete + r"""Update the symbolic state with concrete values from the concrete engine @cpu: (optional) if set, update registers' value @@ -596,13 +609,19 @@ class DSEPathConstraint(DSEEngine): self.cur_solver.add(self.z3_trans.from_expr(cons)) def handle(self, cur_addr): + cur_addr = self.ir_arch.symbol_pool.canonize_to_exprloc(cur_addr) symb_pc = self.eval_expr(self.ir_arch.IRDst) possibilities = possible_values(symb_pc) cur_path_constraint = set() # path_constraint for the concrete path if len(possibilities) == 1: - assert next(iter(possibilities)).value == cur_addr + dst = next(iter(possibilities)).value + dst = self.ir_arch.symbol_pool.canonize_to_exprloc(dst) + assert dst == cur_addr else: for possibility in possibilities: + target_addr = self.ir_arch.symbol_pool.canonize_to_exprloc( + possibility.value + ) path_constraint = set() # Set of ExprAff for the possible path # Get constraint associated to the possible path @@ -642,11 +661,11 @@ class DSEPathConstraint(DSEEngine): "address 0x%x" % address) path_constraint.add(ExprAff(expr_mem, value)) - if possibility.value == cur_addr: + if target_addr == cur_addr: # Add path constraint cur_path_constraint = path_constraint - elif self.produce_solution(possibility.value): + elif self.produce_solution(target_addr): # Looking for a new solution self.cur_solver.push() for cons in path_constraint: @@ -657,8 +676,7 @@ class DSEPathConstraint(DSEEngine): result = self.cur_solver.check() if result == z3.sat: model = self.cur_solver.model() - self.handle_solution(model, possibility.value) + self.handle_solution(model, target_addr) self.cur_solver.pop() self.handle_correct_destination(cur_addr, cur_path_constraint) - diff --git a/miasm2/arch/aarch64/arch.py b/miasm2/arch/aarch64/arch.py index 94be74fd..8d4ab052 100644 --- a/miasm2/arch/aarch64/arch.py +++ b/miasm2/arch/aarch64/arch.py @@ -278,7 +278,7 @@ class aarch64_arg(m_arg): fixed_size.add(value.name.size) return value.name label = symbol_pool.getby_name_create(value.name) - return ExprId(label, size_hint) + return ExprLoc(label.loc_key, size_hint) if isinstance(value, AstInt): assert size_hint is not None return ExprInt(value.value, size_hint) @@ -311,44 +311,49 @@ class instruction_aarch64(instruction): super(instruction_aarch64, self).__init__(*args, **kargs) @staticmethod - def arg2str(e, pos=None): + def arg2str(expr, index=None, symbol_pool=None): wb = False - if isinstance(e, m2_expr.ExprId) or isinstance(e, m2_expr.ExprInt): - return str(e) - elif isinstance(e, m2_expr.ExprOp) and e.op in shift_expr: - op_str = shift_str[shift_expr.index(e.op)] - return "%s %s %s" % (e.args[0], op_str, e.args[1]) - elif isinstance(e, m2_expr.ExprOp) and e.op == "slice_at": - return "%s LSL %s" % (e.args[0], e.args[1]) - elif isinstance(e, m2_expr.ExprOp) and e.op in extend_lst: - op_str = e.op - return "%s %s %s" % (e.args[0], op_str, e.args[1]) - elif isinstance(e, m2_expr.ExprOp) and e.op == "postinc": - if e.args[1].arg != 0: - return "[%s], %s" % (e.args[0], e.args[1]) + if expr.is_id() or expr.is_int(): + return str(expr) + elif expr.is_label(): + if symbol_pool is not None: + return str(symbol_pool.loc_key_to_label(expr.loc_key)) else: - return "[%s]" % (e.args[0]) - elif isinstance(e, m2_expr.ExprOp) and e.op == "preinc_wb": - if e.args[1].arg != 0: - return "[%s, %s]!" % (e.args[0], e.args[1]) + return str(expr) + elif isinstance(expr, m2_expr.ExprOp) and expr.op in shift_expr: + op_str = shift_str[shift_expr.index(expr.op)] + return "%s %s %s" % (expr.args[0], op_str, expr.args[1]) + elif isinstance(expr, m2_expr.ExprOp) and expr.op == "slice_at": + return "%s LSL %s" % (expr.args[0], expr.args[1]) + elif isinstance(expr, m2_expr.ExprOp) and expr.op in extend_lst: + op_str = expr.op + return "%s %s %s" % (expr.args[0], op_str, expr.args[1]) + elif isinstance(expr, m2_expr.ExprOp) and expr.op == "postinc": + if expr.args[1].arg != 0: + return "[%s], %s" % (expr.args[0], expr.args[1]) else: - return "[%s]" % (e.args[0]) - elif isinstance(e, m2_expr.ExprOp) and e.op == "preinc": - if len(e.args) == 1: - return "[%s]" % (e.args[0]) - elif not isinstance(e.args[1], m2_expr.ExprInt) or e.args[1].arg != 0: - return "[%s, %s]" % (e.args[0], e.args[1]) + return "[%s]" % (expr.args[0]) + elif isinstance(expr, m2_expr.ExprOp) and expr.op == "preinc_wb": + if expr.args[1].arg != 0: + return "[%s, %s]!" % (expr.args[0], expr.args[1]) else: - return "[%s]" % (e.args[0]) - elif isinstance(e, m2_expr.ExprOp) and e.op == 'segm': - arg = e.args[1] + return "[%s]" % (expr.args[0]) + elif isinstance(expr, m2_expr.ExprOp) and expr.op == "preinc": + if len(expr.args) == 1: + return "[%s]" % (expr.args[0]) + elif not isinstance(expr.args[1], m2_expr.ExprInt) or expr.args[1].arg != 0: + return "[%s, %s]" % (expr.args[0], expr.args[1]) + else: + return "[%s]" % (expr.args[0]) + elif isinstance(expr, m2_expr.ExprOp) and expr.op == 'segm': + arg = expr.args[1] if isinstance(arg, m2_expr.ExprId): arg = str(arg) elif arg.op == 'LSL' and arg.args[1].arg == 0: arg = str(arg.args[0]) else: arg = "%s %s %s" % (arg.args[0], arg.op, arg.args[1]) - return '[%s, %s]' % (e.args[0], arg) + return '[%s, %s]' % (expr.args[0], arg) else: raise NotImplementedError("bad op") @@ -366,13 +371,12 @@ class instruction_aarch64(instruction): def dstflow2label(self, symbol_pool): index = self.mnemo_flow_to_dst_index(self.name) - e = self.args[index] - if not isinstance(e, m2_expr.ExprInt): + expr = self.args[index] + if not expr.is_int(): return - ad = e.arg + self.offset - l = symbol_pool.getby_offset_create(ad) - s = m2_expr.ExprId(l, e.size) - self.args[index] = s + addr = expr.arg + self.offset + label = symbol_pool.getby_offset_create(addr) + self.args[index] = m2_expr.ExprLoc(label.loc_key, expr.size) def breakflow(self): return self.name in BRCOND + ["BR", "BLR", "RET", "ERET", "DRPS", "B", "BL"] diff --git a/miasm2/arch/aarch64/sem.py b/miasm2/arch/aarch64/sem.py index 88b0d0a7..1a213b35 100644 --- a/miasm2/arch/aarch64/sem.py +++ b/miasm2/arch/aarch64/sem.py @@ -593,14 +593,14 @@ def udiv(arg1, arg2, arg3): @sbuild.parse def cbz(arg1, arg2): - dst = m2_expr.ExprId(ir.get_next_label(instr), 64) if arg1 else arg2 + dst = m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, 64) if arg1 else arg2 PC = dst ir.IRDst = dst @sbuild.parse def cbnz(arg1, arg2): - dst = arg2 if arg1 else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg2 if arg1 else m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, 64) PC = dst ir.IRDst = dst @@ -625,14 +625,14 @@ def tbnz(arg1, arg2, arg3): @sbuild.parse def b_ne(arg1): - dst = m2_expr.ExprId(ir.get_next_label(instr), 64) if zf else arg1 + dst = m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, 64) if zf else arg1 PC = dst ir.IRDst = dst @sbuild.parse def b_eq(arg1): - dst = arg1 if zf else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if zf else m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, 64) PC = dst ir.IRDst = dst @@ -640,7 +640,7 @@ def b_eq(arg1): @sbuild.parse def b_ge(arg1): cond = cond2expr['GE'] - dst = arg1 if cond else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if cond else m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, 64) PC = dst ir.IRDst = dst @@ -648,7 +648,7 @@ def b_ge(arg1): @sbuild.parse def b_gt(arg1): cond = cond2expr['GT'] - dst = arg1 if cond else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if cond else m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, 64) PC = dst ir.IRDst = dst @@ -656,7 +656,7 @@ def b_gt(arg1): @sbuild.parse def b_cc(arg1): cond = cond2expr['CC'] - dst = arg1 if cond else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if cond else m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, 64) PC = dst ir.IRDst = dst @@ -664,7 +664,7 @@ def b_cc(arg1): @sbuild.parse def b_cs(arg1): cond = cond2expr['CS'] - dst = arg1 if cond else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if cond else m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, 64) PC = dst ir.IRDst = dst @@ -672,7 +672,7 @@ def b_cs(arg1): @sbuild.parse def b_hi(arg1): cond = cond2expr['HI'] - dst = arg1 if cond else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if cond else m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, 64) PC = dst ir.IRDst = dst @@ -680,7 +680,7 @@ def b_hi(arg1): @sbuild.parse def b_le(arg1): cond = cond2expr['LE'] - dst = arg1 if cond else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if cond else m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, 64) PC = dst ir.IRDst = dst @@ -688,7 +688,7 @@ def b_le(arg1): @sbuild.parse def b_ls(arg1): cond = cond2expr['LS'] - dst = arg1 if cond else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if cond else m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, 64) PC = dst ir.IRDst = dst @@ -696,7 +696,7 @@ def b_ls(arg1): @sbuild.parse def b_lt(arg1): cond = cond2expr['LT'] - dst = arg1 if cond else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if cond else m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, 64) PC = dst ir.IRDst = dst @@ -732,7 +732,7 @@ def br(arg1): def blr(arg1): PC = arg1 ir.IRDst = arg1 - LR = m2_expr.ExprId(ir.get_next_label(instr), 64) + LR = m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, 64) @sbuild.parse def nop(): diff --git a/miasm2/arch/arm/arch.py b/miasm2/arch/arm/arch.py index 35574a84..204bf1b0 100644 --- a/miasm2/arch/arm/arch.py +++ b/miasm2/arch/arm/arch.py @@ -343,62 +343,67 @@ class instruction_arm(instruction): super(instruction_arm, self).__init__(*args, **kargs) @staticmethod - def arg2str(e, pos = None): + def arg2str(expr, index=None, symbol_pool=None): wb = False - if isinstance(e, ExprId) or isinstance(e, ExprInt): - return str(e) - if isinstance(e, ExprOp) and e.op in expr2shift_dct: - if len(e.args) == 1: - return '%s %s' % (e.args[0], expr2shift_dct[e.op]) - elif len(e.args) == 2: - return '%s %s %s' % (e.args[0], expr2shift_dct[e.op], e.args[1]) + if expr.is_id() or expr.is_int(): + return str(expr) + elif expr.is_label(): + if symbol_pool is not None: + return str(symbol_pool.loc_key_to_label(expr.loc_key)) + else: + return str(expr) + if isinstance(expr, ExprOp) and expr.op in expr2shift_dct: + if len(expr.args) == 1: + return '%s %s' % (expr.args[0], expr2shift_dct[expr.op]) + elif len(expr.args) == 2: + return '%s %s %s' % (expr.args[0], expr2shift_dct[expr.op], expr.args[1]) else: raise NotImplementedError('zarb arg2str') sb = False - if isinstance(e, ExprOp) and e.op == "sbit": + if isinstance(expr, ExprOp) and expr.op == "sbit": sb = True - e = e.args[0] - if isinstance(e, ExprOp) and e.op == "reglist": - o = [gpregs.expr.index(x) for x in e.args] + expr = expr.args[0] + if isinstance(expr, ExprOp) and expr.op == "reglist": + o = [gpregs.expr.index(x) for x in expr.args] out = reglist2str(o) if sb: out += "^" return out - if isinstance(e, ExprOp) and e.op == 'wback': + if isinstance(expr, ExprOp) and expr.op == 'wback': wb = True - e = e.args[0] - if isinstance(e, ExprId): - out = str(e) + expr = expr.args[0] + if isinstance(expr, ExprId): + out = str(expr) if wb: out += "!" return out - if not isinstance(e, ExprMem): - return str(e) + if not isinstance(expr, ExprMem): + return str(expr) - e = e.arg - if isinstance(e, ExprOp) and e.op == 'wback': + expr = expr.arg + if isinstance(expr, ExprOp) and expr.op == 'wback': wb = True - e = e.args[0] + expr = expr.args[0] - if isinstance(e, ExprId): - r, s = e, None - elif len(e.args) == 1 and isinstance(e.args[0], ExprId): - r, s = e.args[0], None - elif isinstance(e.args[0], ExprId): - r, s = e.args[0], e.args[1] + if isinstance(expr, ExprId): + r, s = expr, None + elif len(expr.args) == 1 and isinstance(expr.args[0], ExprId): + r, s = expr.args[0], None + elif isinstance(expr.args[0], ExprId): + r, s = expr.args[0], expr.args[1] else: - r, s = e.args[0].args + r, s = expr.args[0].args if isinstance(s, ExprOp) and s.op in expr2shift_dct: s = ' '.join([str(x) for x in s.args[0], expr2shift_dct[s.op], s.args[1]]) - if isinstance(e, ExprOp) and e.op == 'postinc': + if isinstance(expr, ExprOp) and expr.op == 'postinc': o = '[%s]' % r if s and not (isinstance(s, ExprInt) and s.arg == 0): o += ', %s' % s @@ -418,16 +423,15 @@ class instruction_arm(instruction): return self.name in conditional_branch + unconditional_branch def dstflow2label(self, symbol_pool): - e = self.args[0] - if not isinstance(e, ExprInt): + expr = self.args[0] + if not isinstance(expr, ExprInt): return if self.name == 'BLX': - ad = e.arg + self.offset + addr = expr.arg + self.offset else: - ad = e.arg + self.offset - l = symbol_pool.getby_offset_create(ad) - s = ExprId(l, e.size) - self.args[0] = s + addr = expr.arg + self.offset + label = symbol_pool.getby_offset_create(addr) + self.args[0] = ExprLoc(label.loc_key, expr.size) def breakflow(self): if self.name in conditional_branch + unconditional_branch: @@ -492,27 +496,29 @@ class instruction_armt(instruction_arm): def dstflow2label(self, symbol_pool): if self.name in ["CBZ", "CBNZ"]: - e = self.args[1] + expr = self.args[1] else: - e = self.args[0] - if not isinstance(e, ExprInt): + expr = self.args[0] + if not isinstance(expr, ExprInt): return if self.name == 'BLX': - ad = e.arg + (self.offset & 0xfffffffc) + addr = expr.arg + (self.offset & 0xfffffffc) elif self.name == 'BL': - ad = e.arg + self.offset + addr = expr.arg + self.offset elif self.name.startswith('BP'): - ad = e.arg + self.offset + addr = expr.arg + self.offset elif self.name.startswith('CB'): - ad = e.arg + self.offset + self.l + 2 + addr = expr.arg + self.offset + self.l + 2 else: - ad = e.arg + self.offset - l = symbol_pool.getby_offset_create(ad) - s = ExprId(l, e.size) + addr = expr.arg + self.offset + + label = symbol_pool.getby_offset_create(addr) + dst = ExprLoc(label.loc_key, expr.size) + if self.name in ["CBZ", "CBNZ"]: - self.args[1] = s + self.args[1] = dst else: - self.args[0] = s + self.args[0] = dst def breakflow(self): if self.name in conditional_branch + unconditional_branch +["CBZ", "CBNZ", 'TBB', 'TBH']: @@ -775,7 +781,7 @@ class arm_arg(m_arg): if arg.name in gpregs.str: return None label = symbol_pool.getby_name_create(arg.name) - return ExprId(label, 32) + return ExprLoc(label.loc_key, 32) if isinstance(arg, AstOp): args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in arg.args] if None in args: diff --git a/miasm2/arch/arm/jit.py b/miasm2/arch/arm/jit.py index 1a37b7f1..b92e2c32 100644 --- a/miasm2/arch/arm/jit.py +++ b/miasm2/arch/arm/jit.py @@ -7,6 +7,7 @@ from miasm2.arch.arm.sem import ir_armb, ir_arml, ir_armtl, ir_armtb, cond_dct_i from miasm2.jitter.codegen import CGen from miasm2.expression.expression import ExprId, ExprAff, ExprCond from miasm2.ir.ir import IRBlock, AssignBlock +from miasm2.ir.translators.C import TranslatorC log = logging.getLogger('jit_arm') hnd = logging.StreamHandler() @@ -17,11 +18,6 @@ log.setLevel(logging.CRITICAL) class arm_CGen(CGen): - def __init__(self, ir_arch): - self.ir_arch = ir_arch - self.PC = self.ir_arch.arch.regs.PC - self.init_arch_C() - def block2assignblks(self, block): """ diff --git a/miasm2/arch/arm/sem.py b/miasm2/arch/arm/sem.py index 2cf2e5a4..0b67dd2a 100644 --- a/miasm2/arch/arm/sem.py +++ b/miasm2/arch/arm/sem.py @@ -450,7 +450,7 @@ def sdiv(ir, instr, a, b, c=None): do_except = [] do_except.append(ExprAff(exception_flags, ExprInt(EXCEPT_DIV_BY_ZERO, exception_flags.size))) do_except.append(ExprAff(ir.IRDst, lbl_next)) - blk_except = IRBlock(lbl_except.name, [AssignBlock(do_except, instr)]) + blk_except = IRBlock(lbl_except.name.loc_key, [AssignBlock(do_except, instr)]) @@ -462,7 +462,7 @@ def sdiv(ir, instr, a, b, c=None): do_div.append(ExprAff(ir.IRDst, r)) do_div.append(ExprAff(ir.IRDst, lbl_next)) - blk_div = IRBlock(lbl_div.name, [AssignBlock(do_div, instr)]) + blk_div = IRBlock(lbl_div.name.loc_key, [AssignBlock(do_div, instr)]) return e, [blk_div, blk_except] @@ -483,7 +483,7 @@ def udiv(ir, instr, a, b, c=None): do_except = [] do_except.append(ExprAff(exception_flags, ExprInt(EXCEPT_DIV_BY_ZERO, exception_flags.size))) do_except.append(ExprAff(ir.IRDst, lbl_next)) - blk_except = IRBlock(lbl_except.name, [AssignBlock(do_except, instr)]) + blk_except = IRBlock(lbl_except.name.loc_key, [AssignBlock(do_except, instr)]) r = ExprOp("udiv", b, c) @@ -494,7 +494,7 @@ def udiv(ir, instr, a, b, c=None): do_div.append(ExprAff(ir.IRDst, r)) do_div.append(ExprAff(ir.IRDst, lbl_next)) - blk_div = IRBlock(lbl_div.name, [AssignBlock(do_div, instr)]) + blk_div = IRBlock(lbl_div.name.loc_key, [AssignBlock(do_div, instr)]) return e, [blk_div, blk_except] @@ -932,19 +932,20 @@ def pop(ir, instr, a): def cbz(ir, instr, a, b): e = [] - lbl_next = ExprId(ir.get_next_label(instr), 32) - e.append(ExprAff(ir.IRDst, ExprCond(a, lbl_next, b))) + lbl_next = ir.get_next_label(instr) + lbl_next_expr = ExprLoc(lbl_next.loc_key, 32) + e.append(ExprAff(ir.IRDst, ExprCond(a, lbl_next_expr, b))) return e, [] def cbnz(ir, instr, a, b): e = [] - lbl_next = ExprId(ir.get_next_label(instr), 32) - e.append(ExprAff(ir.IRDst, ExprCond(a, b, lbl_next))) + lbl_next = ir.get_next_label(instr) + lbl_next_expr = ExprLoc(lbl_next.loc_key, 32) + e.append(ir.IRDst, ExprCond(a, b, lbl_next_expr)) return e, [] - def uxtb(ir, instr, a, b): e = [] r = b[:8].zeroExtend(32) @@ -1264,10 +1265,14 @@ def add_condition_expr(ir, instr, cond, instr_ir, extra_ir): raise ValueError('unknown condition %r' % cond) cond = tab_cond[cond] - lbl_next = ExprId(ir.get_next_label(instr), 32) - lbl_do = ExprId(ir.gen_label(), 32) - dst_cond = ExprCond(cond, lbl_do, lbl_next) + + lbl_next = ir.get_next_label(instr) + lbl_next_expr = ExprLoc(lbl_next.loc_key, 32) + lbl_do = ir.gen_label() + lbl_do_expr = ExprLoc(lbl_do.loc_key, 32) + + dst_cond = ExprCond(cond, lbl_do_expr, lbl_next_expr) assert(isinstance(instr_ir, list)) has_irdst = False @@ -1276,8 +1281,8 @@ def add_condition_expr(ir, instr, cond, instr_ir, extra_ir): has_irdst = True break if not has_irdst: - instr_ir.append(ExprAff(ir.IRDst, lbl_next)) - e_do = IRBlock(lbl_do.name, [AssignBlock(instr_ir, instr)]) + instr_ir.append(ExprAff(ir.IRDst, lbl_next_expr)) + e_do = IRBlock(lbl_do.loc_key, [AssignBlock(instr_ir, instr)]) e = [ExprAff(ir.IRDst, dst_cond)] return e, [e_do] + extra_ir @@ -1543,7 +1548,7 @@ class ir_arml(IntermediateRepresentation): dst = ExprAff(self.IRDst, ExprId(label_next, 32)) dst_blk = AssignBlock([dst], instr) assignments.append(dst_blk) - irblock = IRBlock(label, assignments) + irblock = IRBlock(label.loc_key, assignments) ir_blocks_all.append([irblock]) label = label_next @@ -1564,7 +1569,7 @@ class ir_arml(IntermediateRepresentation): dst = ExprAff(self.IRDst, ExprCond(local_cond, ExprId(label_do, 32), ExprId(label_next, 32))) dst_blk = AssignBlock([dst], instr) assignments.append(dst_blk) - irblock = IRBlock(label, assignments) + irblock = IRBlock(label.loc_key, assignments) irblocks.append(irblock) @@ -1578,7 +1583,7 @@ class ir_arml(IntermediateRepresentation): dst = ExprAff(self.IRDst, ExprId(label_next, 32)) dst_blk = AssignBlock([dst], instr) assignments.append(dst_blk) - irblock = IRBlock(label, assignments) + irblock = IRBlock(label.loc_key, assignments) irblocks.append(irblock) label = label_next assignments = [] @@ -1594,7 +1599,8 @@ class ir_arml(IntermediateRepresentation): it_hints = None it_cond = None - label = None + label = block.label + assignments = [] ir_blocks_all = [] index = -1 while index + 1 < len(block.lines): @@ -1613,11 +1619,11 @@ class ir_arml(IntermediateRepresentation): split = self.add_instr_to_irblock(block, instr, assignments, ir_blocks_all, gen_pc_updt) if split: - ir_blocks_all.append(IRBlock(label, assignments)) + ir_blocks_all.append(IRBlock(label.loc_key, assignments)) label = None assignments = [] if label is not None: - ir_blocks_all.append(IRBlock(label, assignments)) + ir_blocks_all.append(IRBlock(label.loc_key, assignments)) new_ir_blocks_all = self.post_add_block(block, ir_blocks_all) for irblock in new_ir_blocks_all: diff --git a/miasm2/arch/mips32/arch.py b/miasm2/arch/mips32/arch.py index 15c59cf0..d1b0a8eb 100644 --- a/miasm2/arch/mips32/arch.py +++ b/miasm2/arch/mips32/arch.py @@ -5,7 +5,7 @@ from collections import defaultdict from pyparsing import Literal, Group, Optional -from miasm2.expression.expression import ExprMem, ExprInt, ExprId, ExprOp +from miasm2.expression.expression import ExprMem, ExprInt, ExprId, ExprOp, ExprLoc from miasm2.core.bin_stream import bin_stream import miasm2.arch.mips32.regs as regs import miasm2.core.cpu as cpu @@ -60,11 +60,16 @@ class instruction_mips32(cpu.instruction): @staticmethod - def arg2str(e, pos = None): - if isinstance(e, ExprId) or isinstance(e, ExprInt): - return str(e) - assert(isinstance(e, ExprMem)) - arg = e.arg + def arg2str(expr, index=None, symbol_pool=None): + if expr.is_id() or expr.is_int(): + return str(expr) + elif expr.is_label(): + if symbol_pool is not None: + return str(symbol_pool.loc_key_to_label(expr.loc_key)) + else: + return str(expr) + assert(isinstance(expr, ExprMem)) + arg = expr.arg if isinstance(arg, ExprId): return "(%s)"%arg assert(len(arg.args) == 2 and arg.op == '+') @@ -90,21 +95,20 @@ class instruction_mips32(cpu.instruction): def dstflow2label(self, symbol_pool): if self.name in ["J", 'JAL']: - e = self.args[0].arg - ad = (self.offset & (0xFFFFFFFF ^ ((1<< 28)-1))) + e - l = symbol_pool.getby_offset_create(ad) - self.args[0] = ExprId(l, e.size) + expr = self.args[0].arg + addr = (self.offset & (0xFFFFFFFF ^ ((1<< 28)-1))) + expr + label = symbol_pool.getby_offset_create(addr) + self.args[0] = ExprLoc(label.loc_key, expr.size) return ndx = self.get_dst_num() - e = self.args[ndx] + expr = self.args[ndx] - if not isinstance(e, ExprInt): + if not isinstance(expr, ExprInt): return - ad = e.arg + self.offset - l = symbol_pool.getby_offset_create(ad) - s = ExprId(l, e.size) - self.args[ndx] = s + addr = expr.arg + self.offset + label = symbol_pool.getby_offset_create(addr) + self.args[ndx] = ExprLoc(label.loc_key, expr.size) def breakflow(self): if self.name == 'BREAK': @@ -262,7 +266,7 @@ class mips32_arg(cpu.m_arg): if arg.name in gpregs.str: return None label = symbol_pool.getby_name_create(arg.name) - return ExprId(label, 32) + return ExprLoc(label.loc_key, 32) if isinstance(arg, AstOp): args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in arg.args] if None in args: @@ -403,9 +407,9 @@ class mips32_dreg_imm(mips32_arg): return True @staticmethod - def arg2str(e): - assert(isinstance(e, ExprMem)) - arg = e.arg + def arg2str(expr, index=None): + assert(isinstance(expr, ExprMem)) + arg = expr.arg if isinstance(arg, ExprId): return "(%s)"%arg assert(len(arg.args) == 2 and arg.op == '+') diff --git a/miasm2/arch/mips32/ira.py b/miasm2/arch/mips32/ira.py index 7aefad32..b17ddbd2 100644 --- a/miasm2/arch/mips32/ira.py +++ b/miasm2/arch/mips32/ira.py @@ -4,7 +4,6 @@ from miasm2.expression.expression import ExprAff, ExprInt, ExprId from miasm2.ir.ir import IntermediateRepresentation, IRBlock, AssignBlock from miasm2.ir.analysis import ira from miasm2.arch.mips32.sem import ir_mips32l, ir_mips32b -from miasm2.core.asmblock import expr_is_int_or_label, expr_is_label class ir_a_mips32l(ir_mips32l, ira): def __init__(self, symbol_pool=None): @@ -28,14 +27,15 @@ class ir_a_mips32l(ir_mips32l, ira): if pc_val is None or lr_val is None: new_irblocks.append(irb) continue - if not expr_is_int_or_label(lr_val): - new_irblocks.append(irb) + if lr_val.is_label(): + label = self.symbol_pool.loc_key_to_label(lr_valloc_key) + if label.offset is not None: + lr_val = ExprInt(label.offset, 32) + if not lr_val.is_int(): continue - if expr_is_label(lr_val): - lr_val = ExprInt(lr_val.name.offset, 32) instr = block.lines[-2] - if lr_val.arg != instr.offset + 8: + if int(lr_val) != instr.offset + 8: raise ValueError("Wrong arg") # CALL diff --git a/miasm2/arch/mips32/jit.py b/miasm2/arch/mips32/jit.py index 16d88067..b3cfecbc 100644 --- a/miasm2/arch/mips32/jit.py +++ b/miasm2/arch/mips32/jit.py @@ -57,8 +57,8 @@ class mipsCGen(CGen): self.ir_arch.pc] assignments[self.delay_slot_set] = m2_expr.ExprInt(1, 32) # Replace IRDst with next instruction - assignments[self.ir_arch.IRDst] = m2_expr.ExprId( - self.ir_arch.get_next_instr(assignblock.instr), 32) + dst = self.ir_arch.get_next_instr(assignblock.instr) + assignments[self.ir_arch.IRDst] = m2_expr.ExprLoc(dst.loc_key, 32) irs.append(AssignBlock(assignments, assignblock.instr)) irblocks[blk_idx] = IRBlock(irblock.label, irs) diff --git a/miasm2/arch/mips32/sem.py b/miasm2/arch/mips32/sem.py index 99c81a33..bb0f812d 100644 --- a/miasm2/arch/mips32/sem.py +++ b/miasm2/arch/mips32/sem.py @@ -35,7 +35,7 @@ def jal(arg1): "Jumps to the calculated address @arg1 and stores the return address in $RA" PC = arg1 ir.IRDst = arg1 - RA = ExprId(ir.get_next_break_label(instr), 32) + RA = ExprLoc(ir.get_next_break_label(instr).loc_key, RA.size) @sbuild.parse def jalr(arg1, arg2): @@ -43,13 +43,13 @@ def jalr(arg1, arg2): address in another register @arg2""" PC = arg1 ir.IRDst = arg1 - arg2 = ExprId(ir.get_next_break_label(instr), 32) + arg2 = ExprLoc(ir.get_next_break_label(instr).loc_key, arg2.size) @sbuild.parse def bal(arg1): PC = arg1 ir.IRDst = arg1 - RA = ExprId(ir.get_next_break_label(instr), 32) + RA = ExprLoc(ir.get_next_break_label(instr).loc_key, RA.size) @sbuild.parse def l_b(arg1): @@ -76,7 +76,7 @@ def lb(arg1, arg2): @sbuild.parse def beq(arg1, arg2, arg3): "Branches on @arg3 if the quantities of two registers @arg1, @arg2 are eq" - dst = ExprId(ir.get_next_break_label(instr), 32) if arg1 - arg2 else arg3 + dst = ExprLoc(ir.get_next_break_label(instr).loc_key, ir.IRDst.size) if arg1 - arg2 else arg3 PC = dst ir.IRDst = dst @@ -84,7 +84,7 @@ def beq(arg1, arg2, arg3): def bgez(arg1, arg2): """Branches on @arg2 if the quantities of register @arg1 is greater than or equal to zero""" - dst = ExprId(ir.get_next_break_label(instr), 32) if arg1.msb() else arg2 + dst = ExprLoc(ir.get_next_break_label(instr).loc_key, ir.IRDst.size) if arg1.msb() else arg2 PC = dst ir.IRDst = dst @@ -92,7 +92,7 @@ def bgez(arg1, arg2): def bne(arg1, arg2, arg3): """Branches on @arg3 if the quantities of two registers @arg1, @arg2 are NOT equal""" - dst = arg3 if arg1 - arg2 else ExprId(ir.get_next_break_label(instr), 32) + dst = arg3 if arg1 - arg2 else ExprLoc(ir.get_next_break_label(instr).loc_key, ir.IRDst.size) PC = dst ir.IRDst = dst @@ -230,7 +230,7 @@ def seh(arg1, arg2): @sbuild.parse def bltz(arg1, arg2): """Branches on @arg2 if the register @arg1 is less than zero""" - dst_o = arg2 if arg1.msb() else ExprId(ir.get_next_break_label(instr), 32) + dst_o = arg2 if arg1.msb() else ExprLoc(ir.get_next_break_label(instr).loc_key, ir.IRDst.size) PC = dst_o ir.IRDst = dst_o @@ -238,7 +238,7 @@ def bltz(arg1, arg2): def blez(arg1, arg2): """Branches on @arg2 if the register @arg1 is less than or equal to zero""" cond = (i1(1) if arg1 else i1(0)) | arg1.msb() - dst_o = arg2 if cond else ExprId(ir.get_next_break_label(instr), 32) + dst_o = arg2 if cond else ExprLoc(ir.get_next_break_label(instr).loc_key, ir.IRDst.size) PC = dst_o ir.IRDst = dst_o @@ -246,7 +246,7 @@ def blez(arg1, arg2): def bgtz(arg1, arg2): """Branches on @arg2 if the register @arg1 is greater than zero""" cond = (i1(1) if arg1 else i1(0)) | arg1.msb() - dst_o = ExprId(ir.get_next_break_label(instr), 32) if cond else arg2 + dst_o = ExprLoc(ir.get_next_break_label(instr).loc_key, ir.IRDst.size) if cond else arg2 PC = dst_o ir.IRDst = dst_o @@ -346,13 +346,13 @@ def c_le_d(arg1, arg2, arg3): @sbuild.parse def bc1t(arg1, arg2): - dst_o = arg2 if arg1 else ExprId(ir.get_next_break_label(instr), 32) + dst_o = arg2 if arg1 else ExprLoc(ir.get_next_break_label(instr).loc_key, ir.IRDst.size) PC = dst_o ir.IRDst = dst_o @sbuild.parse def bc1f(arg1, arg2): - dst_o = ExprId(ir.get_next_break_label(instr), 32) if arg1 else arg2 + dst_o = ExprLoc(ir.get_next_break_label(instr).loc_key, ir.IRDst.size) if arg1 else arg2 PC = dst_o ir.IRDst = dst_o @@ -423,7 +423,7 @@ def teq(ir, instr, arg1, arg2): do_except.append(m2_expr.ExprAff(exception_flags, m2_expr.ExprInt( EXCEPT_DIV_BY_ZERO, exception_flags.size))) do_except.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) - blk_except = IRBlock(lbl_except, [AssignBlock(do_except, instr)]) + blk_except = IRBlock(lbl_except.index, [AssignBlock(do_except, instr)]) cond = arg1 - arg2 diff --git a/miasm2/arch/msp430/arch.py b/miasm2/arch/msp430/arch.py index e4d03edb..3248a4bc 100644 --- a/miasm2/arch/msp430/arch.py +++ b/miasm2/arch/msp430/arch.py @@ -70,7 +70,7 @@ class msp430_arg(m_arg): reg = gpregs.expr[index] return reg label = symbol_pool.getby_name_create(value.name) - return ExprId(label, 16) + return ExprLoc(label.loc_key, 16) if isinstance(value, AstOp): args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in value.args] if None in args: @@ -102,40 +102,44 @@ class instruction_msp430(instruction): return self.name in ['call'] @staticmethod - def arg2str(e, pos = None): - if isinstance(e, ExprId): - o = str(e) - elif isinstance(e, ExprInt): - o = str(e) - elif isinstance(e, ExprOp) and e.op == "autoinc": - o = "@%s+" % str(e.args[0]) - elif isinstance(e, ExprMem): - if isinstance(e.arg, ExprId): - if pos == 0: - o = "@%s" % e.arg + def arg2str(expr, index=None, symbol_pool=None): + if isinstance(expr, ExprId): + o = str(expr) + elif isinstance(expr, ExprInt): + o = str(expr) + elif expr.is_label(): + if symbol_pool is not None: + return str(symbol_pool.loc_key_to_label(expr.loc_key)) + else: + return str(expr) + elif isinstance(expr, ExprOp) and expr.op == "autoinc": + o = "@%s+" % str(expr.args[0]) + elif isinstance(expr, ExprMem): + if isinstance(expr.arg, ExprId): + if index == 0: + o = "@%s" % expr.arg else: - o = "0x0(%s)" % e.arg - elif isinstance(e.arg, ExprInt): - o = "@%s" % e.arg - elif isinstance(e.arg, ExprOp): - o = "%s(%s)" % (e.arg.args[1], e.arg.args[0]) + o = "0x0(%s)" % expr.arg + elif isinstance(expr.arg, ExprInt): + o = "@%s" % expr.arg + elif isinstance(expr.arg, ExprOp): + o = "%s(%s)" % (expr.arg.args[1], expr.arg.args[0]) else: - raise NotImplementedError('unknown instance e = %s' % type(e)) + raise NotImplementedError('unknown instance expr = %s' % type(expr)) return o def dstflow2label(self, symbol_pool): - e = self.args[0] - if not isinstance(e, ExprInt): + expr = self.args[0] + if not isinstance(expr, ExprInt): return if self.name == "call": - ad = e.arg + addr = expr.arg else: - ad = e.arg + int(self.offset) + addr = expr.arg + int(self.offset) - l = symbol_pool.getby_offset_create(ad) - s = ExprId(l, e.size) - self.args[0] = s + label = symbol_pool.getby_offset_create(addr) + self.args[0] = ExprLoc(label.loc_key, expr.size) def breakflow(self): if self.name in conditional_branch + unconditional_branch: diff --git a/miasm2/arch/msp430/sem.py b/miasm2/arch/msp430/sem.py index dd24abb1..42f6474e 100644 --- a/miasm2/arch/msp430/sem.py +++ b/miasm2/arch/msp430/sem.py @@ -238,8 +238,11 @@ def push_w(ir, instr, a): def call(ir, instr, a): e, a, dummy = mng_autoinc(a, None, 16) - n = ExprId(ir.get_next_label(instr), 16) - e.append(ExprAff(ExprMem(SP - ExprInt(2, 16), 16), n)) + + lbl_next = ir.get_next_label(instr) + lbl_next_expr = ExprLoc(lbl_next.loc_key, 16) + + e.append(ExprAff(ExprMem(SP - ExprInt(2, 16), 16), lbl_next_expr)) e.append(ExprAff(SP, SP - ExprInt(2, 16))) e.append(ExprAff(PC, a)) e.append(ExprAff(ir.IRDst, a)) @@ -272,50 +275,56 @@ def cmp_b(ir, instr, a, b): def jz(ir, instr, a): - n = ExprId(ir.get_next_label(instr), 16) + lbl_next = ir.get_next_label(instr) + lbl_next_expr = ExprLoc(lbl_next.loc_key, 16) e = [] - e.append(ExprAff(PC, ExprCond(zf, a, n))) - e.append(ExprAff(ir.IRDst, ExprCond(zf, a, n))) + e.append(ExprAff(PC, ExprCond(zf, a, lbl_next_expr))) + e.append(ExprAff(ir.IRDst, ExprCond(zf, a, lbl_next_expr))) return e, [] def jnz(ir, instr, a): - n = ExprId(ir.get_next_label(instr), 16) + lbl_next = ir.get_next_label(instr) + lbl_next_expr = ExprLoc(lbl_next.loc_key, 16) e = [] - e.append(ExprAff(PC, ExprCond(zf, n, a))) - e.append(ExprAff(ir.IRDst, ExprCond(zf, n, a))) + e.append(ExprAff(PC, ExprCond(zf, lbl_next_expr, a))) + e.append(ExprAff(ir.IRDst, ExprCond(zf, lbl_next_expr, a))) return e, [] def jl(ir, instr, a): - n = ExprId(ir.get_next_label(instr), 16) + lbl_next = ir.get_next_label(instr) + lbl_next_expr = ExprLoc(lbl_next.loc_key, 16) e = [] - e.append(ExprAff(PC, ExprCond(nf ^ of, a, n))) - e.append(ExprAff(ir.IRDst, ExprCond(nf ^ of, a, n))) + e.append(ExprAff(PC, ExprCond(nf ^ of, a, lbl_next_expr))) + e.append(ExprAff(ir.IRDst, ExprCond(nf ^ of, a, lbl_next_expr))) return e, [] def jc(ir, instr, a): - n = ExprId(ir.get_next_label(instr), 16) + lbl_next = ir.get_next_label(instr) + lbl_next_expr = ExprLoc(lbl_next.loc_key, 16) e = [] - e.append(ExprAff(PC, ExprCond(cf, a, n))) - e.append(ExprAff(ir.IRDst, ExprCond(cf, a, n))) + e.append(ExprAff(PC, ExprCond(cf, a, lbl_next_expr))) + e.append(ExprAff(ir.IRDst, ExprCond(cf, a, lbl_next_expr))) return e, [] def jnc(ir, instr, a): - n = ExprId(ir.get_next_label(instr), 16) + lbl_next = ir.get_next_label(instr) + lbl_next_expr = ExprLoc(lbl_next.loc_key, 16) e = [] - e.append(ExprAff(PC, ExprCond(cf, n, a))) - e.append(ExprAff(ir.IRDst, ExprCond(cf, n, a))) + e.append(ExprAff(PC, ExprCond(cf, lbl_next_expr, a))) + e.append(ExprAff(ir.IRDst, ExprCond(cf, lbl_next_expr, a))) return e, [] def jge(ir, instr, a): - n = ExprId(ir.get_next_label(instr), 16) + lbl_next = ir.get_next_label(instr) + lbl_next_expr = ExprLoc(lbl_next.loc_key, 16) e = [] - e.append(ExprAff(PC, ExprCond(nf ^ of, n, a))) - e.append(ExprAff(ir.IRDst, ExprCond(nf ^ of, n, a))) + e.append(ExprAff(PC, ExprCond(nf ^ of, lbl_next_expr, a))) + e.append(ExprAff(ir.IRDst, ExprCond(nf ^ of, lbl_next_expr, a))) return e, [] diff --git a/miasm2/arch/ppc/arch.py b/miasm2/arch/ppc/arch.py index 945824a0..429fd22d 100644 --- a/miasm2/arch/ppc/arch.py +++ b/miasm2/arch/ppc/arch.py @@ -42,7 +42,7 @@ class ppc_arg(m_arg): if arg.name in gpregs.str: return None label = symbol_pool.getby_name_create(arg.name) - return ExprId(label, 32) + return ExprLoc(label.loc_key, 32) if isinstance(arg, AstOp): args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in arg.args] if None in args: @@ -74,7 +74,7 @@ class instruction_ppc(instruction): super(instruction_ppc, self).__init__(*args, **kargs) @staticmethod - def arg2str(e, pos = None): + def arg2str(e, pos = None, symbol_pool=None): if isinstance(e, ExprId) or isinstance(e, ExprInt): return str(e) elif isinstance(e, ExprMem): @@ -132,8 +132,8 @@ class instruction_ppc(instruction): ad = e.arg + self.offset else: ad = e.arg - l = symbol_pool.getby_offset_create(ad) - s = ExprId(l, e.size) + label = symbol_pool.getby_offset_create(ad) + s = ExprLoc(label.loc_key, e.size) self.args[address_index] = s def breakflow(self): diff --git a/miasm2/arch/ppc/sem.py b/miasm2/arch/ppc/sem.py index 741ae24b..775e24d3 100644 --- a/miasm2/arch/ppc/sem.py +++ b/miasm2/arch/ppc/sem.py @@ -690,7 +690,8 @@ def mn_b(ir, instr, arg1, arg2 = None): def mn_bl(ir, instr, arg1, arg2 = None): if arg2 is not None: arg1 = arg2 - return [ ExprAff(LR, ExprId(ir.get_next_instr(instr), 32)), + dst = ir.get_next_instr(instr) + return [ ExprAff(LR, ExprLoc(dst.loc_key, 32)), ExprAff(PC, arg1), ExprAff(ir.IRDst, arg1) ], [] @@ -726,13 +727,15 @@ def mn_do_cond_branch(ir, instr, dest): condition = condition & cond_cond else: condition = cond_cond + dst = ir.get_next_instr(instr) dest_expr = ExprCond(condition, dest, - ExprId(ir.get_next_instr(instr), 32)) + ExprLoc(dst.loc_key, 32)) else: dest_expr = dest if instr.name[-1] == 'L' or instr.name[-2:-1] == 'LA': - ret.append(ExprAff(LR, ExprId(ir.get_next_instr(instr), 32))) + dst = ir.get_next_instr(instr) + ret.append(ExprAff(LR, ExprLoc(dst.loc_key, 32))) ret.append(ExprAff(PC, dest_expr)) ret.append(ExprAff(ir.IRDst, dest_expr)) diff --git a/miasm2/arch/sh4/arch.py b/miasm2/arch/sh4/arch.py index 14f46265..dd25cb90 100644 --- a/miasm2/arch/sh4/arch.py +++ b/miasm2/arch/sh4/arch.py @@ -103,7 +103,7 @@ class sh4_arg(m_arg): if arg.name in gpregs.str: return None label = symbol_pool.getby_name_create(arg.name) - return ExprId(label, 32) + return ExprLoc(label.loc_key, 32) if isinstance(arg, AstOp): args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in arg.args] if None in args: @@ -406,24 +406,29 @@ class instruction_sh4(instruction): return self.name.startswith('J') @staticmethod - def arg2str(e, pos = None): - if isinstance(e, ExprId) or isinstance(e, ExprInt): - return str(e) - assert(isinstance(e, ExprMem)) - e = e.arg - - if isinstance(e, ExprOp): - if e.op == "predec": - s = '-%s' % e.args[0] - elif e.op == "postinc": - s = '%s+' % e.args[0] + def arg2str(expr, index=None, symbol_pool=None): + if isinstance(expr, ExprId) or isinstance(expr, ExprInt): + return str(expr) + elif expr.is_label(): + if symbol_pool is not None: + return str(symbol_pool.loc_key_to_label(expr.loc_key)) + else: + return str(expr) + assert(isinstance(expr, ExprMem)) + expr = expr.arg + + if isinstance(expr, ExprOp): + if expr.op == "predec": + s = '-%s' % expr.args[0] + elif expr.op == "postinc": + s = '%s+' % expr.args[0] else: s = ','.join([str(x).replace('(', '').replace(')', '') - for x in e.args]) + for x in expr.args]) s = "(%s)"%s s = "@%s" % s - elif isinstance(e, ExprId): - s = "@%s" % e + elif isinstance(expr, ExprId): + s = "@%s" % expr else: raise NotImplementedError('zarb arg2str') return s diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py index fc3a5882..4a044d6a 100644 --- a/miasm2/arch/x86/arch.py +++ b/miasm2/arch/x86/arch.py @@ -274,7 +274,7 @@ class x86_arg(m_arg): return None label = symbol_pool.getby_name_create(value.name) - return ExprId(label, size_hint) + return ExprLoc(label.loc_key, size_hint) if isinstance(value, AstOp): # First pass to retreive fixed_size if value.op == "segm": @@ -474,16 +474,11 @@ class instruction_x86(instruction): if self.additional_info.g1.value & 6 and self.name in repeat_mn: return expr = self.args[0] - if isinstance(expr, ExprId): - if not isinstance(expr.name, AsmLabel) and expr not in all_regs_ids: - raise ValueError("ExprId must be a label or a register") - elif isinstance(expr, ExprInt): - ad = expr.arg + int(self.offset) - l = symbol_pool.getby_offset_create(ad) - s = ExprId(l, expr.size) - self.args[0] = s - else: + if not expr.is_int(): return + addr = expr.arg + int(self.offset) + label = symbol_pool.getby_offset_create(addr) + self.args[0] = ExprLoc(label.loc_key, expr.size) def breakflow(self): if self.name in conditional_branch + unconditional_branch: @@ -519,10 +514,9 @@ class instruction_x86(instruction): def getdstflow(self, symbol_pool): if self.additional_info.g1.value & 6 and self.name in repeat_mn: - ad = int(self.offset) - l = symbol_pool.getby_offset_create(ad) - s = ExprId(l, self.v_opmode()) - return [s] + addr = int(self.offset) + label = symbol_pool.getby_offset_create(addr) + return [ExprLoc(label.loc_key, self.v_opmode())] return [self.args[0]] def get_symbol_size(self, symbol, symbol_pool): @@ -566,9 +560,14 @@ class instruction_x86(instruction): return args @staticmethod - def arg2str(expr, pos=None): - if isinstance(expr, ExprId) or isinstance(expr, ExprInt): + def arg2str(expr, index=None, symbol_pool=None): + if expr.is_id() or expr.is_int(): o = str(expr) + elif expr.is_label(): + if symbol_pool is not None: + o = str(symbol_pool.loc_key_to_label(expr.loc_key)) + else: + o = str(expr) elif ((isinstance(expr, ExprOp) and expr.op == 'far' and isinstance(expr.args[0], ExprMem)) or isinstance(expr, ExprMem)): diff --git a/miasm2/arch/x86/jit.py b/miasm2/arch/x86/jit.py index 50501060..a12a66f5 100644 --- a/miasm2/arch/x86/jit.py +++ b/miasm2/arch/x86/jit.py @@ -5,6 +5,7 @@ from miasm2.core import asmblock from miasm2.core.utils import pck16, pck32, pck64, upck16, upck32, upck64 from miasm2.arch.x86.sem import ir_x86_16, ir_x86_32, ir_x86_64 from miasm2.jitter.codegen import CGen +from miasm2.ir.translators.C import TranslatorC log = logging.getLogger('jit_x86') hnd = logging.StreamHandler() @@ -17,6 +18,7 @@ class x86_32_CGen(CGen): def __init__(self, ir_arch): self.ir_arch = ir_arch self.PC = self.ir_arch.arch.regs.RIP + self.translator = TranslatorC(self.ir_arch.symbol_pool) self.init_arch_C() def gen_post_code(self, attrib): diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index 276b796f..d524af86 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -240,11 +240,13 @@ def gen_jcc(ir, instr, cond, dst, jmp_if): e = [] meip = mRIP[ir.IRDst.size] - next_lbl = m2_expr.ExprId(ir.get_next_label(instr), dst.size) + lbl_next = ir.get_next_label(instr) + lbl_next_expr = m2_expr.ExprLoc(lbl_next.loc_key, dst.size) + if jmp_if: - dstA, dstB = dst, next_lbl + dstA, dstB = dst, lbl_next_expr else: - dstA, dstB = next_lbl, dst + dstA, dstB = lbl_next_expr, dst mn_dst = m2_expr.ExprCond(cond, dstA.zeroExtend(ir.IRDst.size), dstB.zeroExtend(ir.IRDst.size)) @@ -260,17 +262,18 @@ def gen_fcmov(ir, instr, cond, arg1, arg2, mov_if): @cond: condition @mov_if: invert condition if False""" - lbl_do = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + lbl_do, lbl_do_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_skip = ir.get_next_label(instr) + lbl_skip_expr = m2_expr.ExprLoc(lbl_skip.loc_key, ir.IRDst.size) if mov_if: - dstA, dstB = lbl_do, lbl_skip + dstA, dstB = lbl_do_expr, lbl_skip_expr else: - dstA, dstB = lbl_skip, lbl_do + dstA, dstB = lbl_skip_expr, lbl_do_expr e = [] e_do, extra_irs = [m2_expr.ExprAff(arg1, arg2)], [] - e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip)) + e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip_expr)) e.append(m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(cond, dstA, dstB))) - return e, [IRBlock(lbl_do.name, [AssignBlock(e_do, instr)])] + return e, [IRBlock(lbl_do.loc_key, [AssignBlock(e_do, instr)])] def gen_cmov(ir, instr, cond, dst, src, mov_if): @@ -280,17 +283,18 @@ def gen_cmov(ir, instr, cond, dst, src, mov_if): @cond: condition @mov_if: invert condition if False""" - lbl_do = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + lbl_do, lbl_do_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_skip = ir.get_next_label(instr) + lbl_skip_expr = m2_expr.ExprLoc(lbl_skip.loc_key, ir.IRDst.size) if mov_if: - dstA, dstB = lbl_do, lbl_skip + dstA, dstB = lbl_do_expr, lbl_skip_expr else: - dstA, dstB = lbl_skip, lbl_do + dstA, dstB = lbl_skip_expr, lbl_do_expr e = [] e_do, extra_irs = mov(ir, instr, dst, src) - e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip)) + e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip_expr)) e.append(m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(cond, dstA, dstB))) - return e, [IRBlock(lbl_do.name, [AssignBlock(e_do, instr)])] + return e, [IRBlock(lbl_do.loc_key, [AssignBlock(e_do, instr)])] def mov(_, instr, dst, src): @@ -504,12 +508,14 @@ def _rotate_tpl(ir, instr, dst, src, op, left=False): else: return ([], []) e = [] - lbl_do = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) - e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip)) + lbl_do, lbl_do_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_skip = ir.get_next_label(instr) + lbl_skip_expr = m2_expr.ExprLoc(lbl_skip.loc_key, ir.IRDst.size) + + e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip_expr)) e.append(m2_expr.ExprAff( - ir.IRDst, m2_expr.ExprCond(shifter, lbl_do, lbl_skip))) - return (e, [IRBlock(lbl_do.name, [AssignBlock(e_do, instr)])]) + ir.IRDst, m2_expr.ExprCond(shifter, lbl_do_expr, lbl_skip_expr))) + return (e, [IRBlock(lbl_do.loc_key, [AssignBlock(e_do, instr)])]) def l_rol(ir, instr, dst, src): @@ -551,12 +557,14 @@ def rotate_with_carry_tpl(ir, instr, op, dst, src): else: return ([], []) e = [] - lbl_do = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) - e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip)) + lbl_do, lbl_do_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_skip = ir.get_next_label(instr) + lbl_skip_expr = m2_expr.ExprLoc(lbl_skip.loc_key, ir.IRDst.size) + + e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip_expr)) e.append(m2_expr.ExprAff( - ir.IRDst, m2_expr.ExprCond(shifter, lbl_do, lbl_skip))) - return (e, [IRBlock(lbl_do.name, [AssignBlock(e_do, instr)])]) + ir.IRDst, m2_expr.ExprCond(shifter, lbl_do_expr, lbl_skip_expr))) + return (e, [IRBlock(lbl_do.loc_key, [AssignBlock(e_do, instr)])]) def rcl(ir, instr, dst, src): return rotate_with_carry_tpl(ir, instr, '<<<', dst, src) @@ -638,12 +646,13 @@ def _shift_tpl(op, ir, instr, a, b, c=None, op_inv=None, left=False, return [], [] e = [] - lbl_do = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) - e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip)) - e.append(m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(shifter, lbl_do, - lbl_skip))) - return e, [IRBlock(lbl_do.name, [AssignBlock(e_do, instr)])] + lbl_do, lbl_do_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_skip = ir.get_next_label(instr) + lbl_skip_expr = m2_expr.ExprLoc(lbl_skip.loc_key, ir.IRDst.size) + e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip_expr)) + e.append(m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(shifter, lbl_do_expr, + lbl_skip_expr))) + return e, [IRBlock(lbl_do.loc_key, [AssignBlock(e_do, instr)])] def sar(ir, instr, dst, src): @@ -973,9 +982,9 @@ def bswap(_, instr, dst): def cmps(ir, instr, size): - lbl_df_0 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_df_1 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + lbl_df_0, lbl_df_0_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_df_1, lbl_df_1_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_next_expr = m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, ir.IRDst.size) src1 = mRSI[instr.mode][:instr.v_admode()] src2 = mRDI[instr.mode][:instr.v_admode()] @@ -999,24 +1008,24 @@ def cmps(ir, instr, size): e0 = [] e0.append(m2_expr.ExprAff(src1, src1 + offset)) e0.append(m2_expr.ExprAff(src2, src2 + offset)) - e0.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e0 = IRBlock(lbl_df_0.name, [AssignBlock(e0, instr)]) + e0.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) + e0 = IRBlock(lbl_df_0.loc_key, [AssignBlock(e0, instr)]) e1 = [] e1.append(m2_expr.ExprAff(src1, src1 - offset)) e1.append(m2_expr.ExprAff(src2, src2 - offset)) - e1.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e1 = IRBlock(lbl_df_1.name, [AssignBlock(e1, instr)]) + e1.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) + e1 = IRBlock(lbl_df_1.loc_key, [AssignBlock(e1, instr)]) e.append(m2_expr.ExprAff(ir.IRDst, - m2_expr.ExprCond(df, lbl_df_1, lbl_df_0))) + m2_expr.ExprCond(df, lbl_df_1_expr, lbl_df_0_expr))) return e, [e0, e1] def scas(ir, instr, size): - lbl_df_0 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_df_1 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + lbl_df_0, lbl_df_0_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_df_1, lbl_df_1_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_next_expr = m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, ir.IRDst.size) src = mRDI[instr.mode][:instr.v_admode()] @@ -1036,16 +1045,16 @@ def scas(ir, instr, size): e0 = [] e0.append(m2_expr.ExprAff(src, src + offset)) - e0.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e0 = IRBlock(lbl_df_0.name, [AssignBlock(e0, instr)]) + e0.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) + e0 = IRBlock(lbl_df_0.loc_key, [AssignBlock(e0, instr)]) e1 = [] e1.append(m2_expr.ExprAff(src, src - offset)) - e1.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e1 = IRBlock(lbl_df_1.name, [AssignBlock(e1, instr)]) + e1.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) + e1 = IRBlock(lbl_df_1.loc_key, [AssignBlock(e1, instr)]) e.append(m2_expr.ExprAff(ir.IRDst, - m2_expr.ExprCond(df, lbl_df_1, lbl_df_0))) + m2_expr.ExprCond(df, lbl_df_1_expr, lbl_df_0_expr))) return e, [e0, e1] @@ -1185,7 +1194,7 @@ def call(ir, instr, dst): meip = mRIP[ir.IRDst.size] opmode, admode = s, instr.v_admode() myesp = mRSP[instr.mode][:opmode] - n = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + n = m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, ir.IRDst.size) if isinstance(dst, m2_expr.ExprOp): if dst.op == "segm": @@ -1229,8 +1238,6 @@ def call(ir, instr, dst): e.append(m2_expr.ExprAff(ir.ExprMem(c, size=s), n)) e.append(m2_expr.ExprAff(meip, dst.zeroExtend(ir.IRDst.size))) e.append(m2_expr.ExprAff(ir.IRDst, dst.zeroExtend(ir.IRDst.size))) - # if not expr_is_int_or_label(dst): - # dst = meip return e, [] @@ -1432,7 +1439,7 @@ def loop(ir, instr, dst): admode = instr.v_admode() myecx = mRCX[instr.mode][:admode] - n = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + n = m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, ir.IRDst.size) c = myecx - m2_expr.ExprInt(1, myecx.size) dst_o = m2_expr.ExprCond(c, dst.zeroExtend(ir.IRDst.size), @@ -1449,7 +1456,7 @@ def loopne(ir, instr, dst): admode = instr.v_admode() myecx = mRCX[instr.mode][:admode] - n = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + n = m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, ir.IRDst.size) c = m2_expr.ExprCond(myecx - m2_expr.ExprInt(1, size=myecx.size), m2_expr.ExprInt(1, 1), @@ -1471,7 +1478,7 @@ def loope(ir, instr, dst): admode = instr.v_admode() myecx = mRCX[instr.mode][:admode] - n = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + n = m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, ir.IRDst.size) c = m2_expr.ExprCond(myecx - m2_expr.ExprInt(1, size=myecx.size), m2_expr.ExprInt(1, 1), m2_expr.ExprInt(0, 1)) @@ -1508,24 +1515,25 @@ def div(ir, instr, src1): e.append(m2_expr.ExprAff(s1, c_r[:size])) e.append(m2_expr.ExprAff(s2, c_d[:size])) - lbl_div = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_except = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + lbl_div, lbl_div_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_except, lbl_except_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_next = ir.get_next_label(instr) + lbl_next_expr = m2_expr.ExprLoc(lbl_next.loc_key, ir.IRDst.size) do_div = [] do_div += e - do_div.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - blk_div = IRBlock(lbl_div.name, [AssignBlock(do_div, instr)]) + do_div.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) + blk_div = IRBlock(lbl_div.loc_key, [AssignBlock(do_div, instr)]) do_except = [] do_except.append(m2_expr.ExprAff(exception_flags, m2_expr.ExprInt( EXCEPT_DIV_BY_ZERO, exception_flags.size))) - do_except.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - blk_except = IRBlock(lbl_except.name, [AssignBlock(do_except, instr)]) + do_except.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) + blk_except = IRBlock(lbl_except.loc_key, [AssignBlock(do_except, instr)]) e = [] e.append(m2_expr.ExprAff(ir.IRDst, - m2_expr.ExprCond(src1, lbl_div, lbl_except))) + m2_expr.ExprCond(src1, lbl_div_expr, lbl_except_expr))) return e, [blk_div, blk_except] @@ -1554,24 +1562,25 @@ def idiv(ir, instr, src1): e.append(m2_expr.ExprAff(s1, c_r[:size])) e.append(m2_expr.ExprAff(s2, c_d[:size])) - lbl_div = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_except = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + lbl_div, lbl_div_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_except, lbl_except_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_next = ir.get_next_label(instr) + lbl_next_expr = m2_expr.ExprLoc(lbl_next.loc_key, ir.IRDst.size) do_div = [] do_div += e - do_div.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - blk_div = IRBlock(lbl_div.name, [AssignBlock(do_div, instr)]) + do_div.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) + blk_div = IRBlock(lbl_div.loc_key, [AssignBlock(do_div, instr)]) do_except = [] do_except.append(m2_expr.ExprAff(exception_flags, m2_expr.ExprInt( EXCEPT_DIV_BY_ZERO, exception_flags.size))) - do_except.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - blk_except = IRBlock(lbl_except.name, [AssignBlock(do_except, instr)]) + do_except.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) + blk_except = IRBlock(lbl_except.loc_key, [AssignBlock(do_except, instr)]) e = [] e.append(m2_expr.ExprAff(ir.IRDst, - m2_expr.ExprCond(src1, lbl_div, lbl_except))) + m2_expr.ExprCond(src1, lbl_div_expr, lbl_except_expr))) return e, [blk_div, blk_except] @@ -1713,9 +1722,9 @@ def cqo(_, instr): def stos(ir, instr, size): - lbl_df_0 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_df_1 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + lbl_df_0, lbl_df_0_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_df_1, lbl_df_1_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_next_expr = m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, ir.IRDst.size) addr_o = mRDI[instr.mode][:instr.v_admode()] addr = addr_o @@ -1732,25 +1741,25 @@ def stos(ir, instr, size): e0 = [] e0.append(m2_expr.ExprAff(addr_o, addr_p)) - e0.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e0 = IRBlock(lbl_df_0.name, [AssignBlock(e0, instr)]) + e0.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) + e0 = IRBlock(lbl_df_0.loc_key, [AssignBlock(e0, instr)]) e1 = [] e1.append(m2_expr.ExprAff(addr_o, addr_m)) - e1.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e1 = IRBlock(lbl_df_1.name, [AssignBlock(e1, instr)]) + e1.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) + e1 = IRBlock(lbl_df_1.loc_key, [AssignBlock(e1, instr)]) e = [] e.append(m2_expr.ExprAff(ir.ExprMem(addr, size), b)) e.append(m2_expr.ExprAff(ir.IRDst, - m2_expr.ExprCond(df, lbl_df_1, lbl_df_0))) + m2_expr.ExprCond(df, lbl_df_1_expr, lbl_df_0_expr))) return e, [e0, e1] def lods(ir, instr, size): - lbl_df_0 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_df_1 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + lbl_df_0, lbl_df_0_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_df_1, lbl_df_1_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_next_expr = m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, ir.IRDst.size) e = [] addr_o = mRSI[instr.mode][:instr.v_admode()] @@ -1768,13 +1777,13 @@ def lods(ir, instr, size): e0 = [] e0.append(m2_expr.ExprAff(addr_o, addr_p)) - e0.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e0 = IRBlock(lbl_df_0.name, [AssignBlock(e0, instr)]) + e0.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) + e0 = IRBlock(lbl_df_0.loc_key, [AssignBlock(e0, instr)]) e1 = [] e1.append(m2_expr.ExprAff(addr_o, addr_m)) - e1.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e1 = IRBlock(lbl_df_1.name, [AssignBlock(e1, instr)]) + e1.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) + e1 = IRBlock(lbl_df_1.loc_key, [AssignBlock(e1, instr)]) e = [] if instr.mode == 64 and b.size == 32: @@ -1784,14 +1793,14 @@ def lods(ir, instr, size): e.append(m2_expr.ExprAff(b, ir.ExprMem(addr, size))) e.append(m2_expr.ExprAff(ir.IRDst, - m2_expr.ExprCond(df, lbl_df_1, lbl_df_0))) + m2_expr.ExprCond(df, lbl_df_1_expr, lbl_df_0_expr))) return e, [e0, e1] def movs(ir, instr, size): - lbl_df_0 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_df_1 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + lbl_df_0, lbl_df_0_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_df_1, lbl_df_1_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_next_expr = m2_expr.ExprLoc(ir.get_next_label(instr).loc_key, ir.IRDst.size) dst = mRDI[instr.mode][:instr.v_admode()] src = mRSI[instr.mode][:instr.v_admode()] @@ -1815,17 +1824,17 @@ def movs(ir, instr, size): e0 = [] e0.append(m2_expr.ExprAff(src, src + offset)) e0.append(m2_expr.ExprAff(dst, dst + offset)) - e0.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e0 = IRBlock(lbl_df_0.name, [AssignBlock(e0, instr)]) + e0.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) + e0 = IRBlock(lbl_df_0.loc_key, [AssignBlock(e0, instr)]) e1 = [] e1.append(m2_expr.ExprAff(src, src - offset)) e1.append(m2_expr.ExprAff(dst, dst - offset)) - e1.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e1 = IRBlock(lbl_df_1.name, [AssignBlock(e1, instr)]) + e1.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) + e1 = IRBlock(lbl_df_1.loc_key, [AssignBlock(e1, instr)]) e.append(m2_expr.ExprAff(ir.IRDst, - m2_expr.ExprCond(df, lbl_df_1, lbl_df_0))) + m2_expr.ExprCond(df, lbl_df_1_expr, lbl_df_0_expr))) return e, [e0, e1] @@ -2876,14 +2885,15 @@ def bsr_bsf(ir, instr, dst, src, op_func): ZF = 0 DEST = @op_func(SRC) """ - lbl_src_null = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_src_not_null = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + lbl_src_null, lbl_src_null_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_src_not_null, lbl_src_not_null_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_next = ir.get_next_label(instr) + lbl_next_expr = m2_expr.ExprLoc(lbl_next.loc_key, ir.IRDst.size) - aff_dst = m2_expr.ExprAff(ir.IRDst, lbl_next) + aff_dst = m2_expr.ExprAff(ir.IRDst, lbl_next_expr) e = [m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(src, - lbl_src_not_null, - lbl_src_null))] + lbl_src_not_null_expr, + lbl_src_null_expr))] e_src_null = [] e_src_null.append(m2_expr.ExprAff(zf, m2_expr.ExprInt(1, zf.size))) # XXX destination is undefined @@ -2894,8 +2904,8 @@ def bsr_bsf(ir, instr, dst, src, op_func): e_src_not_null.append(m2_expr.ExprAff(dst, op_func(src))) e_src_not_null.append(aff_dst) - return e, [IRBlock(lbl_src_null.name, [AssignBlock(e_src_null, instr)]), - IRBlock(lbl_src_not_null.name, [AssignBlock(e_src_not_null, instr)])] + return e, [IRBlock(lbl_src_null.loc_key, [AssignBlock(e_src_null, instr)]), + IRBlock(lbl_src_not_null.loc_key, [AssignBlock(e_src_not_null, instr)])] def bsf(ir, instr, dst, src): @@ -3925,9 +3935,10 @@ def pshufd(_, instr, dst, src, imm): def ps_rl_ll(ir, instr, dst, src, op, size): - lbl_zero = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_do = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + lbl_zero, lbl_zero_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_do, lbl_do_expr = ir.gen_label_and_expr(ir.IRDst.size) + lbl_next = ir.get_next_label(instr) + lbl_next_expr = m2_expr.ExprLoc(lbl_next.loc_key, ir.IRDst.size) if src.size == 8: count = src.zeroExtend(dst.size) @@ -3940,8 +3951,8 @@ def ps_rl_ll(ir, instr, dst, src, op, size): test = expr_simp(count & m2_expr.ExprInt( ((1 << dst.size) - 1) ^ mask, dst.size)) e = [m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(test, - lbl_zero, - lbl_do))] + lbl_zero_expr, + lbl_do_expr))] slices = [] for i in xrange(0, dst.size, size): @@ -3954,12 +3965,12 @@ def ps_rl_ll(ir, instr, dst, src, op, size): return [m2_expr.ExprAff(dst, m2_expr.ExprInt(0, dst.size))], [] e_zero = [m2_expr.ExprAff(dst, m2_expr.ExprInt(0, dst.size)), - m2_expr.ExprAff(ir.IRDst, lbl_next)] + m2_expr.ExprAff(ir.IRDst, lbl_next_expr)] e_do = [] e.append(m2_expr.ExprAff(dst[0:dst.size], m2_expr.ExprCompose(*slices))) - e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - return e, [IRBlock(lbl_do.name, [AssignBlock(e_do, instr)]), - IRBlock(lbl_zero.name, [AssignBlock(e_zero, instr)])] + e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) + return e, [IRBlock(lbl_do.loc_key, [AssignBlock(e_do, instr)]), + IRBlock(lbl_zero.loc_key, [AssignBlock(e_zero, instr)])] def psrlw(ir, instr, dst, src): @@ -4483,7 +4494,7 @@ def maskmovq(ir, instr, src, mask): m2_expr.ExprCond(bit, write_label, next_check_label)) - blks.append(IRBlock(cur_label.name, [AssignBlock([check], instr)])) + blks.append(IRBlock(cur_label.name.loc_key, [AssignBlock([check], instr)])) # Build write blocks dst_addr = mRDI[instr.mode] @@ -4497,7 +4508,7 @@ def maskmovq(ir, instr, src, mask): write_mem = m2_expr.ExprAff(m2_expr.ExprMem(write_addr, 8), src[start: start + 8]) jump = m2_expr.ExprAff(ir.IRDst, next_check_label) - blks.append(IRBlock(cur_label.name, [AssignBlock([write_mem, jump], instr)])) + blks.append(IRBlock(cur_label.name.loc_key, [AssignBlock([write_mem, jump], instr)])) # If mask is null, bypass all e = [m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(mask, @@ -5145,12 +5156,14 @@ class ir_x86_16(IntermediateRepresentation): c_cond = cond_dec | (zf ^ m2_expr.ExprInt(1, 1)) # gen while - lbl_do = m2_expr.ExprId(self.gen_label(), self.IRDst.size) - lbl_end = m2_expr.ExprId(self.gen_label(), self.IRDst.size) - lbl_skip = m2_expr.ExprId(self.get_next_label(instr), self.IRDst.size) - lbl_next = m2_expr.ExprId(self.get_next_label(instr), self.IRDst.size) - - fix_next_lbl = {lbl_next: lbl_end} + lbl_do, lbl_do_expr = self.gen_label_and_expr(self.IRDst.size) + lbl_end, lbl_end_expr = self.gen_label_and_expr(self.IRDst.size) + lbl_skip = self.get_next_label(instr) + lbl_skip_expr = m2_expr.ExprLoc(lbl_skip.loc_key, self.IRDst.size) + lbl_next = self.get_next_label(instr) + lbl_next_expr = m2_expr.ExprLoc(lbl_next.loc_key, self.IRDst.size) + + fix_next_lbl = {lbl_next_expr: lbl_end_expr} new_extra_ir = [irblock.modify_exprs(mod_src=lambda expr: expr.replace_expr(fix_next_lbl)) for irblock in extra_ir] @@ -5159,14 +5172,14 @@ class ir_x86_16(IntermediateRepresentation): c_reg - m2_expr.ExprInt(1, c_reg.size))) cond_bloc.append(m2_expr.ExprAff(self.IRDst, m2_expr.ExprCond(c_cond, - lbl_skip, - lbl_do))) - cond_bloc = IRBlock(lbl_end.name, [AssignBlock(cond_bloc, instr)]) + lbl_skip_expr, + lbl_do_expr))) + cond_bloc = IRBlock(lbl_end.loc_key, [AssignBlock(cond_bloc, instr)]) e_do = instr_ir - c = IRBlock(lbl_do.name, [AssignBlock(e_do, instr)]) - e_n = [m2_expr.ExprAff(self.IRDst, m2_expr.ExprCond(c_reg, lbl_do, - lbl_skip))] + c = IRBlock(lbl_do.loc_key, [AssignBlock(e_do, instr)]) + e_n = [m2_expr.ExprAff(self.IRDst, m2_expr.ExprCond(c_reg, lbl_do_expr, + lbl_skip_expr))] return e_n, [cond_bloc, c] + new_extra_ir def expr_fix_regs_for_mode(self, e, mode=64): diff --git a/miasm2/core/asmblock.py b/miasm2/core/asmblock.py index f763d85f..9764590a 100644 --- a/miasm2/core/asmblock.py +++ b/miasm2/core/asmblock.py @@ -1,11 +1,13 @@ #-*- coding:utf-8 -*- import logging -import inspect import warnings from collections import namedtuple -import miasm2.expression.expression as m2_expr +from miasm2.expression.expression import ExprId, ExprInt, ExprLoc, \ + get_expr_labels +from miasm2.core.asmblock import AsmSymbolPool +from miasm2.expression.expression import LocKey from miasm2.expression.simplifications import expr_simp from miasm2.expression.modint import moduint, modint from miasm2.core.utils import Disasm_Exception, pck @@ -25,20 +27,13 @@ def is_int(a): isinstance(a, moduint) or isinstance(a, modint) -def expr_is_label(e): - return isinstance(e, m2_expr.ExprId) and isinstance(e.name, AsmLabel) - - -def expr_is_int_or_label(e): - return isinstance(e, m2_expr.ExprInt) or \ - (isinstance(e, m2_expr.ExprId) and isinstance(e.name, AsmLabel)) - - class AsmLabel(object): "Stand for an assembly label" - def __init__(self, name="", offset=None): + def __init__(self, loc_key, name="", offset=None): + assert isinstance(loc_key, LocKey) + self.loc_key = loc_key self.fixedblocs = False if is_int(name): name = "loc_%.16X" % (int(name) & 0xFFFFFFFFFFFFFFFF) @@ -77,6 +72,9 @@ class AsmRaw(object): def __str__(self): return repr(self.raw) + def to_string(self, symbol_pool): + return str(self) + class asm_raw(AsmRaw): @@ -363,6 +361,11 @@ class AsmSymbolPool(object): self._name2label = {} self._offset2label = {} self._label_num = 0 + self._loc_key_to_label = {} + + def loc_key_to_label(self, label_index): + assert isinstance(label_index, LocKey) + return self._loc_key_to_label.get(label_index.key, None) def add_label(self, name, offset=None): """ @@ -370,7 +373,7 @@ class AsmSymbolPool(object): @name: label's name @offset: (optional) label's offset """ - label = AsmLabel(name, offset) + label = AsmLabel(LocKey(self._label_num), name, offset) # Test for collisions if (label.offset in self._offset2label and @@ -383,6 +386,9 @@ class AsmSymbolPool(object): (label, self._name2label[label.name])) self._labels.add(label) + self._label_num += 1 + self._loc_key_to_label[label.loc_key.key] = label + if label.offset is not None: self._offset2label[label.offset] = label if label.name != "": @@ -480,6 +486,19 @@ class AsmSymbolPool(object): self._name2label.update(symbol_pool._name2label) self._offset2label.update(symbol_pool._offset2label) + def canonize_to_exprloc(self, expr): + """ + If expr is ExprInt, return ExprLoc with corresponding loc_key + Else, return expr + + @expr: Expr instance + """ + if expr.is_int(): + label = self.getby_offset_create(int(expr)) + ret = ExprLoc(label.loc_key, expr.size) + return ret + return expr + def gen_label(self): """Generate a new unpinned label""" label = self.add_label("lbl_gen_%.8X" % (self._label_num)) @@ -511,7 +530,9 @@ class AsmCFG(DiGraph): AsmCFGPending = namedtuple("AsmCFGPending", ["waiter", "constraint"]) - def __init__(self, *args, **kwargs): + def __init__(self, symbol_pool=None, *args, **kwargs): + if symbol_pool is None: + raise DeprecationWarning("AsmCFG needs a non empty symbol_pool") super(AsmCFG, self).__init__(*args, **kwargs) # Edges -> constraint self.edges2constraint = {} @@ -519,6 +540,15 @@ class AsmCFG(DiGraph): self._pendings = {} # Label2block built on the fly self._label2block = {} + # symbol_pool + self.symbol_pool = symbol_pool + + + def copy(self): + """Copy the current graph instance""" + graph = self.__class__(self.symbol_pool) + return graph + self + # Compatibility with old list API def append(self, *args, **kwargs): @@ -639,9 +669,9 @@ class AsmCFG(DiGraph): if self._dot_offset: yield [self.DotCellDescription(text="%.8X" % line.offset, attr={}), - self.DotCellDescription(text=str(line), attr={})] + self.DotCellDescription(text=line.to_string(self.symbol_pool), attr={})] else: - yield self.DotCellDescription(text=str(line), attr={}) + yield self.DotCellDescription(text=line.to_string(self.symbol_pool), attr={}) def node_attr(self, node): if isinstance(node, AsmBlockBad): @@ -784,7 +814,7 @@ class AsmCFG(DiGraph): if len(pred_next) > 1: raise RuntimeError("Too many next constraints for bloc %r" "(%s)" % (block.label, - map(lambda x: x.label, pred_next))) + [x.label for x in pred_next])) def guess_blocks_size(self, mnemo): """Asm and compute max block size @@ -971,13 +1001,13 @@ def conservative_asm(mnemo, instr, symbols, conservative): def fix_expr_val(expr, symbols): """Resolve an expression @expr using @symbols""" def expr_calc(e): - if isinstance(e, m2_expr.ExprId): + if isinstance(e, ExprId): s = symbols._name2label[e.name] - e = m2_expr.ExprInt(s.offset, e.size) + e = ExprInt(s.offset, e.size) return e result = expr.visit(expr_calc) result = expr_simp(result) - if not isinstance(result, m2_expr.ExprInt): + if not isinstance(result, ExprInt): raise RuntimeError('Cannot resolve symbol %s' % expr) return result @@ -1222,12 +1252,11 @@ def get_block_labels(block): if isinstance(instr, AsmRaw): if isinstance(instr.raw, list): for expr in instr.raw: - symbols.update(m2_expr.get_expr_ids(expr)) + symbols.update(get_expr_labels(expr)) else: for arg in instr.args: - symbols.update(m2_expr.get_expr_ids(arg)) - labels = filter_exprid_label(symbols) - return labels + symbols.update(get_expr_labels(arg)) + return symbols def assemble_block(mnemo, block, symbol_pool, conservative=False): @@ -1285,7 +1314,8 @@ def asmblock_final(mnemo, blocks, blockChains, symbol_pool, conservative=False): lbl2block = {block.label: block for block in blocks} blocks_using_label = {} for block in blocks: - labels = get_block_labels(block) + exprlocs = get_block_labels(block) + labels = set(symbol_pool.loc_key_to_label(expr.loc_key) for expr in exprlocs) for label in labels: blocks_using_label.setdefault(label, set()).add(block) @@ -1544,21 +1574,19 @@ class disasmEngine(object): # test split if instr.splitflow() and not (instr.is_subcall() and self.dontdis_retcall): add_next_offset = True - pass if instr.dstflow(): instr.dstflow2label(self.symbol_pool) - dst = instr.getdstflow(self.symbol_pool) - dstn = [] - for d in dst: - if isinstance(d, m2_expr.ExprId) and \ - isinstance(d.name, AsmLabel): - dstn.append(d.name) - if d.name.offset in self.dont_dis_retcall_funcs: - add_next_offset = False - dst = dstn + destinations = instr.getdstflow(self.symbol_pool) + known_dsts = [] + for dst in destinations: + if not dst.is_label(): + continue + label = self.symbol_pool.loc_key_to_label(dst.loc_key) + known_dsts.append(label) + if label.offset in self.dont_dis_retcall_funcs: + add_next_offset = False if (not instr.is_subcall()) or self.follow_call: - cur_block.bto.update( - [AsmConstraint(x, AsmConstraint.c_to) for x in dst]) + cur_block.bto.update([AsmConstraint(label, AsmConstraint.c_to) for label in known_dsts]) # get in delayslot mode in_delayslot = True @@ -1608,7 +1636,7 @@ class disasmEngine(object): log_asmblock.info("dis bloc all") job_done = set() if blocks is None: - blocks = AsmCFG() + blocks = AsmCFG(self.symbol_pool) todo = [offset] bloc_cpt = 0 diff --git a/miasm2/core/cpu.py b/miasm2/core/cpu.py index d75b99cf..8ea96e22 100644 --- a/miasm2/core/cpu.py +++ b/miasm2/core/cpu.py @@ -8,13 +8,12 @@ from collections import defaultdict import pyparsing import miasm2.expression.expression as m2_expr -from miasm2.core import asmblock from miasm2.core.bin_stream import bin_stream, bin_stream_str from miasm2.core.utils import Disasm_Exception from miasm2.expression.simplifications import expr_simp -from miasm2.core.asm_ast import AstNode, AstInt, AstId, AstMem, AstOp +from miasm2.core.asm_ast import AstNode, AstInt, AstId, AstOp log = logging.getLogger("cpuhelper") console_handler = logging.StreamHandler() @@ -985,18 +984,24 @@ class instruction(object): self.mode = mode self.args = args self.additional_info = additional_info + self.offset = None + self.l = None + self.b = None def gen_args(self, args): out = ', '.join([str(x) for x in args]) return out def __str__(self): + return self.to_string() + + def to_string(self, symbol_pool=None): o = "%-10s " % self.name args = [] for i, arg in enumerate(self.args): if not isinstance(arg, m2_expr.Expr): raise ValueError('zarb arg type') - x = self.arg2str(arg, pos = i) + x = self.arg2str(arg, i, symbol_pool) args.append(x) o += self.gen_args(args) return o @@ -1011,40 +1016,38 @@ class instruction(object): if symbols is None: symbols = {} args_out = [] - for a in self.args: - e = a + for expr in self.args: # try to resolve symbols using symbols (0 for default value) - ids = m2_expr.get_expr_ids(e) - fixed_ids = {} - for x in ids: - if isinstance(x.name, asmblock.AsmLabel): - name = x.name.name - # special symbol $ - if name == '$': - fixed_ids[x] = self.get_asm_offset(x) - continue - if name == '_': - fixed_ids[x] = self.get_asm_next_offset(x) - continue - if not name in symbols: - raise ValueError('unresolved symbol! %r' % x) - else: - name = x.name - if not name in symbols: + labels = m2_expr.get_expr_labels(expr) + fixed_expr = {} + for exprloc in labels: + label = symbols.loc_key_to_label(exprloc.loc_key) + name = label.name + # special symbols + if name == '$': + fixed_expr[exprloc] = self.get_asm_offset(exprloc) + continue + if name == '_': + fixed_expr[exprloc] = self.get_asm_next_offset(exprloc) continue + if not name in symbols: + raise ValueError('Unresolved symbol: %r' % exprloc) + if symbols[name].offset is None: raise ValueError('The offset of label "%s" cannot be ' 'determined' % name) else: - size = x.size + # Fix symbol with its offset + size = exprloc.size if size is None: - default_size = self.get_symbol_size(x, symbols) + default_size = self.get_symbol_size(exprloc, symbols) size = default_size value = m2_expr.ExprInt(symbols[name].offset, size) - fixed_ids[x] = value - e = e.replace_expr(fixed_ids) - e = expr_simp(e) - args_out.append(e) + fixed_expr[exprloc] = value + + expr = expr.replace_expr(fixed_expr) + expr = expr_simp(expr) + args_out.append(expr) return args_out def get_info(self, c): diff --git a/miasm2/core/parse_asm.py b/miasm2/core/parse_asm.py index df419680..07155fd9 100644 --- a/miasm2/core/parse_asm.py +++ b/miasm2/core/parse_asm.py @@ -4,7 +4,7 @@ import re import miasm2.expression.expression as m2_expr import miasm2.core.asmblock as asmblock from miasm2.core.cpu import instruction, base_expr -from miasm2.core.asm_ast import AstInt, AstId, AstMem, AstOp +from miasm2.core.asm_ast import AstInt, AstId, AstOp declarator = {'byte': 8, 'word': 16, @@ -237,7 +237,7 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): cur_block = None state = STATE_NO_BLOC i = 0 - blocks = asmblock.AsmCFG() + blocks = asmblock.AsmCFG(symbol_pool) block_to_nlink = None delayslot = 0 while i < len(lines): diff --git a/miasm2/core/sembuilder.py b/miasm2/core/sembuilder.py index 8d6d3e07..68b4439f 100644 --- a/miasm2/core/sembuilder.py +++ b/miasm2/core/sembuilder.py @@ -142,11 +142,19 @@ class SemBuilder(object): def _create_labels(lbl_else=False): """Return the AST standing for label creations @lbl_else (optional): if set, create a label 'lbl_else'""" - lbl_end = "lbl_end = ExprId(ir.get_next_label(instr), ir.IRDst.size)" + lbl_end = "lbl_end = ir.get_next_label(instr)" + lbl_end_expr = "lbl_end_expr = ExprLoc(lbl_end.loc_key, ir.IRDst.size)" out = ast.parse(lbl_end).body - out += ast.parse("lbl_if = ExprId(ir.gen_label(), ir.IRDst.size)").body + out += ast.parse(lbl_end_expr).body + lbl_if = "lbl_if = ir.gen_label()" + lbl_if_expr = "lbl_if_expr = ExprLoc(lbl_if.loc_key, ir.IRDst.size)" + out += ast.parse(lbl_if).body + out += ast.parse(lbl_if_expr).body if lbl_else: - out += ast.parse("lbl_else = ExprId(ir.gen_label(), ir.IRDst.size)").body + lbl_else = "lbl_else = ir.gen_label()" + lbl_else_expr = "lbl_else_expr = ExprLoc(lbl_else.loc_key, ir.IRDst.size)" + out += ast.parse(lbl_else).body + out += ast.parse(lbl_else_expr).body return out def _parse_body(self, body, argument_names): @@ -200,9 +208,9 @@ class SemBuilder(object): cond = statement.test real_body += self._create_labels(lbl_else=True) - lbl_end = ast.Name(id='lbl_end', ctx=ast.Load()) - lbl_if = ast.Name(id='lbl_if', ctx=ast.Load()) - lbl_else = ast.Name(id='lbl_else', ctx=ast.Load()) \ + lbl_end = ast.Name(id='lbl_end_expr', ctx=ast.Load()) + lbl_if = ast.Name(id='lbl_if_expr', ctx=ast.Load()) + lbl_else = ast.Name(id='lbl_else_expr', ctx=ast.Load()) \ if statement.orelse else lbl_end dst = ast.Call(func=ast.Name(id='ExprCond', ctx=ast.Load()), @@ -261,9 +269,11 @@ class SemBuilder(object): ## Replace the block with a call to 'IRBlock' - lbl_if_name = ast.Attribute(value=ast.Name(id=lbl_name, - ctx=ast.Load()), - attr='name', ctx=ast.Load()) + lbl_if_name = value= ast.Attribute( + value=ast.Name(id=lbl_name, ctx=ast.Load()), + attr="loc_key", + ctx=ast.Load() + ) assignblks = ast.List(elts=[assignblk], ctx=ast.Load()) diff --git a/miasm2/expression/expression.py b/miasm2/expression/expression.py index 54cd5a2d..e0651d7f 100644 --- a/miasm2/expression/expression.py +++ b/miasm2/expression/expression.py @@ -19,6 +19,7 @@ # IR components are : # - ExprInt # - ExprId +# - ExprLoc # - ExprAff # - ExprCond # - ExprMem @@ -48,12 +49,13 @@ TOK_POS_STRICT = "Spos" # Hashing constants EXPRINT = 1 EXPRID = 2 -EXPRAFF = 3 -EXPRCOND = 4 -EXPRMEM = 5 -EXPROP = 6 -EXPRSLICE = 7 -EXPRCOMPOSE = 8 +EXPRLOC = 3 +EXPRAFF = 4 +EXPRCOND = 5 +EXPRMEM = 6 +EXPROP = 7 +EXPRSLICE = 8 +EXPRCOMPOSE = 9 priorities_list = [ @@ -115,6 +117,8 @@ class DiGraphExpr(DiGraph): return node.op elif isinstance(node, ExprId): return node.name + elif isinstance(node, ExprLoc): + return "label_%r" % node.label elif isinstance(node, ExprMem): return "@%d" % node.size elif isinstance(node, ExprCompose): @@ -141,6 +145,29 @@ class DiGraphExpr(DiGraph): return "" + +class LocKey(object): + def __init__(self, key): + self._key = key + + key = property(lambda self: self._key) + + def __hash__(self): + return hash(self._key) + + def __eq__(self, other): + if self is other: + return True + if self.__class__ is not other.__class__: + return False + return self.key == other.key + + def __ne__(self, other): + return not self.__eq__(other) + + def __repr__(self): + return "<%s %d>" % (self.__class__.__name__, self._key) + # IR definitions class Expr(object): @@ -383,6 +410,9 @@ class Expr(object): def is_id(self, name=None): return False + def is_label(self, label=None): + return False + def is_aff(self): return False @@ -532,6 +562,7 @@ class ExprId(Expr): if size is None: warnings.warn('DEPRECATION WARNING: size is a mandatory argument: use ExprId(name, SIZE)') size = 32 + assert isinstance(name, str) super(ExprId, self).__init__(size) self._name = name @@ -584,6 +615,68 @@ class ExprId(Expr): return True +class ExprLoc(Expr): + + """An ExprLoc represent a Label in Miasm IR. + """ + + __slots__ = Expr.__slots__ + ["_loc_key"] + + def __init__(self, loc_key, size): + """Create an identifier + @loc_key: int, label loc_key + @size: int, identifier's size + """ + assert isinstance(loc_key, LocKey) + super(ExprLoc, self).__init__(size) + self._loc_key = loc_key + + loc_key= property(lambda self: self._loc_key) + + def __reduce__(self): + state = self._loc_key, self._size + return self.__class__, state + + def __new__(cls, loc_key, size): + return Expr.get_object(cls, (loc_key, size)) + + def __str__(self): + return "label_%d" % self._loc_key.key + + def get_r(self, mem_read=False, cst_read=False): + return set() + + def get_w(self): + return set() + + def _exprhash(self): + return hash((EXPRLOC, self._loc_key, self._size)) + + def _exprrepr(self): + return "%s(%r, %d)" % (self.__class__.__name__, self._loc_key, self._size) + + def __contains__(self, expr): + return self == expr + + @visit_chk + def visit(self, callback, test_visit=None): + return self + + def copy(self): + return ExprLoc(self._loc_key, self._size) + + def depth(self): + return 1 + + def graph_recursive(self, graph): + graph.add_node(self) + + def is_label(self, loc_key=None): + if loc_key is not None and self._loc_key != loc_key: + return False + return True + + class ExprAff(Expr): """An ExprAff represent an affection from an Expression to another one. @@ -1226,10 +1319,11 @@ class ExprCompose(Expr): # Expression order for comparaison EXPR_ORDER_DICT = {ExprId: 1, - ExprCond: 2, - ExprMem: 3, - ExprOp: 4, - ExprSlice: 5, + ExprLoc: 2, + ExprCond: 3, + ExprMem: 4, + ExprOp: 5, + ExprSlice: 6, ExprCompose: 7, ExprInt: 8, } @@ -1289,6 +1383,11 @@ def compare_exprs(expr1, expr2): if ret: return ret return cmp(expr1.size, expr2.size) + elif cls1 == ExprLoc: + ret = cmp(expr1.loc_key, expr2.loc_key) + if ret: + return ret + return cmp(expr1.size, expr2.size) elif cls1 == ExprAff: raise NotImplementedError( "Comparaison from an ExprAff not yet implemented") @@ -1379,11 +1478,19 @@ def ExprInt_from(expr, i): def get_expr_ids_visit(expr, ids): """Visitor to retrieve ExprId in @expr @expr: Expr""" - if isinstance(expr, ExprId): + if expr.is_id(): ids.add(expr) return expr +def get_expr_labels_visit(expr, labels): + """Visitor to retrieve ExprLoc in @expr + @expr: Expr""" + if expr.is_label(): + labels.add(expr) + return expr + + def get_expr_ids(expr): """Retrieve ExprId in @expr @expr: Expr""" @@ -1392,6 +1499,14 @@ def get_expr_ids(expr): return ids +def get_expr_labels(expr): + """Retrieve ExprLoc in @expr + @expr: Expr""" + ids = set() + expr.visit(lambda x: get_expr_labels_visit(x, ids)) + return ids + + def test_set(expr, pattern, tks, result): """Test if v can correspond to e. If so, update the context in result. Otherwise, return False @@ -1431,6 +1546,9 @@ def match_expr(expr, pattern, tks, result=None): elif expr.is_id(): return test_set(expr, pattern, tks, result) + elif expr.is_label(): + return test_set(expr, pattern, tks, result) + elif expr.is_op(): # expr need to be the same operation than pattern diff --git a/miasm2/expression/expression_helper.py b/miasm2/expression/expression_helper.py index 722d169d..2fe5e26d 100644 --- a/miasm2/expression/expression_helper.py +++ b/miasm2/expression/expression_helper.py @@ -268,6 +268,9 @@ class Variables_Identifier(object): elif isinstance(expr, m2_expr.ExprId): pass + elif isinstance(expr, m2_expr.ExprLoc): + pass + elif isinstance(expr, m2_expr.ExprMem): self.find_variables_rec(expr.arg) @@ -552,7 +555,8 @@ def possible_values(expr): # Terminal expression if (isinstance(expr, m2_expr.ExprInt) or - isinstance(expr, m2_expr.ExprId)): + isinstance(expr, m2_expr.ExprId) or + isinstance(expr, m2_expr.ExprLoc)): consvals.add(ConstrainedValue(frozenset(), expr)) # Unary expression elif isinstance(expr, m2_expr.ExprSlice): diff --git a/miasm2/expression/parser.py b/miasm2/expression/parser.py index b3f3af1c..cbfd58d0 100644 --- a/miasm2/expression/parser.py +++ b/miasm2/expression/parser.py @@ -1,6 +1,6 @@ import pyparsing -from miasm2.expression.expression import ExprInt, ExprId, ExprSlice, ExprMem, \ - ExprCond, ExprCompose, ExprOp, ExprAff +from miasm2.expression.expression import ExprInt, ExprId, ExprLoc, ExprSlice, \ + ExprMem, ExprCond, ExprCompose, ExprOp, ExprAff, LocKey integer = pyparsing.Word(pyparsing.nums).setParseAction(lambda t: int(t[0])) @@ -16,6 +16,7 @@ str_int = str_int_pos | str_int_neg STR_EXPRINT = pyparsing.Suppress("ExprInt") STR_EXPRID = pyparsing.Suppress("ExprId") +STR_EXPRLOC = pyparsing.Suppress("ExprLoc") STR_EXPRSLICE = pyparsing.Suppress("ExprSlice") STR_EXPRMEM = pyparsing.Suppress("ExprMem") STR_EXPRCOND = pyparsing.Suppress("ExprCond") @@ -23,11 +24,17 @@ STR_EXPRCOMPOSE = pyparsing.Suppress("ExprCompose") STR_EXPROP = pyparsing.Suppress("ExprOp") STR_EXPRAFF = pyparsing.Suppress("ExprAff") +LOCKEY = pyparsing.Suppress("LocKey") + STR_COMMA = pyparsing.Suppress(",") LPARENTHESIS = pyparsing.Suppress("(") RPARENTHESIS = pyparsing.Suppress(")") +T_INF = pyparsing.Suppress("<") +T_SUP = pyparsing.Suppress(">") + + string_quote = pyparsing.QuotedString(quoteChar="'", escChar='\\', escQuote='\\') string_dquote = pyparsing.QuotedString(quoteChar='"', escChar='\\', escQuote='\\') @@ -36,26 +43,33 @@ string = string_quote | string_dquote expr = pyparsing.Forward() -expr_int = pyparsing.Group(STR_EXPRINT + LPARENTHESIS + str_int + STR_COMMA + str_int + RPARENTHESIS) -expr_id = pyparsing.Group(STR_EXPRID + LPARENTHESIS + string + STR_COMMA + str_int + RPARENTHESIS) -expr_slice = pyparsing.Group(STR_EXPRSLICE + LPARENTHESIS + expr + STR_COMMA + str_int + STR_COMMA + str_int + RPARENTHESIS) -expr_mem = pyparsing.Group(STR_EXPRMEM + LPARENTHESIS + expr + STR_COMMA + str_int + RPARENTHESIS) -expr_cond = pyparsing.Group(STR_EXPRCOND + LPARENTHESIS + expr + STR_COMMA + expr + STR_COMMA + expr + RPARENTHESIS) -expr_compose = pyparsing.Group(STR_EXPRCOMPOSE + LPARENTHESIS + pyparsing.delimitedList(expr, delim=',') + RPARENTHESIS) -expr_op = pyparsing.Group(STR_EXPROP + LPARENTHESIS + string + STR_COMMA + pyparsing.delimitedList(expr, delim=',') + RPARENTHESIS) -expr_aff = pyparsing.Group(STR_EXPRAFF + LPARENTHESIS + expr + STR_COMMA + expr + RPARENTHESIS) - -expr << (expr_int | expr_id | expr_slice | expr_mem | expr_cond | \ +expr_int = STR_EXPRINT + LPARENTHESIS + str_int + STR_COMMA + str_int + RPARENTHESIS +expr_id = STR_EXPRID + LPARENTHESIS + string + STR_COMMA + str_int + RPARENTHESIS +expr_loc = STR_EXPRLOC + LPARENTHESIS + T_INF + LOCKEY + str_int + T_SUP + STR_COMMA + str_int + RPARENTHESIS +expr_slice = STR_EXPRSLICE + LPARENTHESIS + expr + STR_COMMA + str_int + STR_COMMA + str_int + RPARENTHESIS +expr_mem = STR_EXPRMEM + LPARENTHESIS + expr + STR_COMMA + str_int + RPARENTHESIS +expr_cond = STR_EXPRCOND + LPARENTHESIS + expr + STR_COMMA + expr + STR_COMMA + expr + RPARENTHESIS +expr_compose = STR_EXPRCOMPOSE + LPARENTHESIS + pyparsing.delimitedList(expr, delim=',') + RPARENTHESIS +expr_op = STR_EXPROP + LPARENTHESIS + string + STR_COMMA + pyparsing.delimitedList(expr, delim=',') + RPARENTHESIS +expr_aff = STR_EXPRAFF + LPARENTHESIS + expr + STR_COMMA + expr + RPARENTHESIS + +expr << (expr_int | expr_id | expr_loc | expr_slice | expr_mem | expr_cond | \ expr_compose | expr_op | expr_aff) -expr_int.setParseAction(lambda t: ExprInt(*t[0])) -expr_id.setParseAction(lambda t: ExprId(*t[0])) -expr_slice.setParseAction(lambda t: ExprSlice(*t[0])) -expr_mem.setParseAction(lambda t: ExprMem(*t[0])) -expr_cond.setParseAction(lambda t: ExprCond(*t[0])) -expr_compose.setParseAction(lambda t: ExprCompose(*t[0])) -expr_op.setParseAction(lambda t: ExprOp(*t[0])) -expr_aff.setParseAction(lambda t: ExprAff(*t[0])) +def parse_loc_key(t): + assert len(t) == 2 + loc_key, size = LocKey(t[0]), t[1] + return ExprLoc(loc_key, size) + +expr_int.setParseAction(lambda t: ExprInt(*t)) +expr_id.setParseAction(lambda t: ExprId(*t)) +expr_loc.setParseAction(parse_loc_key) +expr_slice.setParseAction(lambda t: ExprSlice(*t)) +expr_mem.setParseAction(lambda t: ExprMem(*t)) +expr_cond.setParseAction(lambda t: ExprCond(*t)) +expr_compose.setParseAction(lambda t: ExprCompose(*t)) +expr_op.setParseAction(lambda t: ExprOp(*t)) +expr_aff.setParseAction(lambda t: ExprAff(*t)) def str_to_expr(str_in): diff --git a/miasm2/ir/ir.py b/miasm2/ir/ir.py index 1c6895e0..0ea3a836 100644 --- a/miasm2/ir/ir.py +++ b/miasm2/ir/ir.py @@ -23,11 +23,10 @@ from itertools import chain import miasm2.expression.expression as m2_expr from miasm2.expression.expression_helper import get_missing_interval -from miasm2.core.asmblock import AsmSymbolPool, expr_is_label, AsmLabel, \ - AsmBlock +from miasm2.core.asmblock import AsmSymbolPool, AsmLabel, AsmBlock, \ + AsmConstraint, AsmBlockBad from miasm2.core.graph import DiGraph - class AssignBlock(object): """Represent parallel IR assignment, such as: EAX = EBX @@ -272,7 +271,8 @@ class IRBlock(object): @assignblks: list of AssignBlock """ - assert isinstance(label, AsmLabel) + #assert isinstance(label, AsmLabel) + assert isinstance(label, m2_expr.LocKey) self.label = label for assignblk in assignblks: assert isinstance(assignblk, AssignBlock) @@ -351,7 +351,7 @@ class IRBlock(object): def __str__(self): out = [] - out.append('%s' % self.label) + out.append('label_%s' % self.label.key) for assignblk in self: for dst, src in assignblk.iteritems(): out.append('\t%s = %s' % (dst, src)) @@ -396,24 +396,44 @@ class DiGraphIR(DiGraph): """DiGraph for IR instances""" - def __init__(self, blocks, *args, **kwargs): + def __init__(self, blocks, symbol_pool=None, *args, **kwargs): """Instanciate a DiGraphIR @blocks: IR blocks """ + self.symbol_pool = symbol_pool self._blocks = blocks super(DiGraphIR, self).__init__(*args, **kwargs) + def expr_loc_to_symb(self, expr): + if not expr.is_label(): + return expr + if self.symbol_pool is None: + name = str(expr) + else: + name = self.symbol_pool.loc_key_to_label(expr.loc_key).name + return m2_expr.ExprId(name, expr.size) + def node2lines(self, node): - yield self.DotCellDescription(text=str(node.name), - attr={'align': 'center', - 'colspan': 2, - 'bgcolor': 'grey'}) + if self.symbol_pool is None: + node_name = str(node) + else: + node_name = self.symbol_pool.loc_key_to_label(node) + yield self.DotCellDescription( + text="%s" % node_name, + attr={ + 'align': 'center', + 'colspan': 2, + 'bgcolor': 'grey', + } + ) if node not in self._blocks: yield [self.DotCellDescription(text="NOT PRESENT", attr={})] raise StopIteration for i, assignblk in enumerate(self._blocks[node]): for dst, src in assignblk.iteritems(): - line = "%s = %s" % (dst, src) + new_src = src.visit(self.expr_loc_to_symb) + new_dst = dst.visit(self.expr_loc_to_symb) + line = "%s = %s" % (new_dst, new_src) if self._dot_offset: yield [self.DotCellDescription(text="%-4d" % i, attr={}), self.DotCellDescription(text=line, attr={})] @@ -427,11 +447,10 @@ class DiGraphIR(DiGraph): src_irdst = self._blocks[src].dst edge_color = "blue" if isinstance(src_irdst, m2_expr.ExprCond): - if (expr_is_label(src_irdst.src1) and - src_irdst.src1.name == dst): + src1, src2 = src_irdst.src1, src_irdst.src2 + if src1.is_label(dst): edge_color = "limegreen" - elif (expr_is_label(src_irdst.src2) and - src_irdst.src2.name == dst): + elif src2.is_label(dst): edge_color = "red" return {"color": edge_color} @@ -490,11 +509,16 @@ class IntermediateRepresentation(object): """Transforms an ExprId/ExprInt/label/int into a label @addr: an ExprId/ExprInt/label/int""" - if (isinstance(addr, m2_expr.ExprId) and - isinstance(addr.name, AsmLabel)): - addr = addr.name if isinstance(addr, AsmLabel): return addr + if isinstance(addr, m2_expr.LocKey): + return self.symbol_pool.loc_key_to_label(addr) + elif isinstance(addr, m2_expr.ExprLoc): + label = self.symbol_pool.loc_key_to_label(addr.loc_key) + if label is None: + return None + else: + return label try: addr = int(addr) @@ -508,7 +532,9 @@ class IntermediateRepresentation(object): @addr: an ExprId/ExprInt/label/int""" label = self.get_label(addr) - return self.blocks.get(label, None) + if label is None: + return None + return self.blocks.get(label.loc_key, None) def get_bloc(self, addr): """ @@ -519,9 +545,11 @@ class IntermediateRepresentation(object): return self.get_block(addr) def add_instr(self, line, addr=0, gen_pc_updt=False): - block = AsmBlock(self.gen_label()) + lbl = self.gen_label() + block = AsmBlock(lbl) block.lines = [line] self.add_block(block, gen_pc_updt) + return lbl def getby_offset(self, offset): out = set() @@ -593,8 +621,12 @@ class IntermediateRepresentation(object): @gen_pc_updt: insert PC update effects between instructions """ - label = None + label = block.label ir_blocks_all = [] + + assert not isinstance(block, AsmBlockBad) + + assignments = [] for instr in block.lines: if label is None: assignments = [] @@ -602,11 +634,11 @@ class IntermediateRepresentation(object): split = self.add_instr_to_irblock(block, instr, assignments, ir_blocks_all, gen_pc_updt) if split: - ir_blocks_all.append(IRBlock(label, assignments)) + ir_blocks_all.append(IRBlock(label.loc_key, assignments)) label = None assignments = [] if label is not None: - ir_blocks_all.append(IRBlock(label, assignments)) + ir_blocks_all.append(IRBlock(label.loc_key, assignments)) new_ir_blocks_all = self.post_add_block(block, ir_blocks_all) for irblock in new_ir_blocks_all: @@ -645,12 +677,22 @@ class IntermediateRepresentation(object): continue next_lbl = block.get_next() if next_lbl is None: - dst = m2_expr.ExprId(self.get_next_label(block.lines[-1]), - self.pc.size) + lbl = None + if block.lines: + line = block.lines[-1] + if line.offset is not None: + lbl = self.symbol_pool.getby_offset_create(line.offset + line.l) + if lbl is None: + lbl = self.symbol_pool.gen_label() + block.add_cst(lbl, AsmConstraint.c_next, self.symbol_pool) else: - dst = m2_expr.ExprId(next_lbl, - self.pc.size) - assignblk = AssignBlock({self.IRDst: dst}, irblock[-1].instr) + lbl = next_lbl + dst = m2_expr.ExprLoc(lbl.loc_key, self.pc.size) + if irblock.assignblks: + instr = irblock.assignblks[-1].instr + else: + instr = None + assignblk = AssignBlock({self.IRDst: dst}, instr) ir_blocks[index] = IRBlock(irblock.label, list(irblock.assignblks) + [assignblk]) def post_add_block(self, block, ir_blocks): @@ -676,7 +718,6 @@ class IntermediateRepresentation(object): def get_instr_label(self, instr): """Returns the label associated to an instruction @instr: current instruction""" - return self.symbol_pool.getby_offset_create(instr.offset) def gen_label(self): @@ -684,6 +725,14 @@ class IntermediateRepresentation(object): label = self.symbol_pool.gen_label() return label + def gen_label_and_expr(self, size): + """ + Return a label and it's corresponding ExprLoc + @size: size of expression + """ + label = self.gen_label() + return label, m2_expr.ExprLoc(label.loc_key, size) + def get_next_label(self, instr): label = self.symbol_pool.getby_offset_create(instr.offset + instr.l) return label @@ -728,14 +777,14 @@ class IntermediateRepresentation(object): out = set() while todo: dst = todo.pop() - if expr_is_label(dst): + if dst.is_label(): done.add(dst) - elif isinstance(dst, (m2_expr.ExprMem, m2_expr.ExprInt)): + elif dst.is_mem() or dst.is_int(): done.add(dst) - elif isinstance(dst, m2_expr.ExprCond): + elif dst.is_cond(): todo.add(dst.src1) todo.add(dst.src2) - elif isinstance(dst, m2_expr.ExprId): + elif dst.is_id(): out.add(dst) else: done.add(dst) @@ -769,15 +818,16 @@ class IntermediateRepresentation(object): """ Gen irbloc digraph """ - self._graph = DiGraphIR(self.blocks) + self._graph = DiGraphIR(self.blocks, self.symbol_pool) for lbl, block in self.blocks.iteritems(): + assert isinstance(lbl, m2_expr.LocKey) self._graph.add_node(lbl) for dst in self.dst_trackback(block): if dst.is_int(): dst_lbl = self.symbol_pool.getby_offset_create(int(dst)) - dst = m2_expr.ExprId(dst_lbl, self.pc.size) - if expr_is_label(dst): - self._graph.add_edge(lbl, dst.name) + dst = m2_expr.ExprLoc(dst_lbl.loc_key, self.pc.size) + if dst.is_label(): + self._graph.add_edge(lbl, dst.loc_key) @property def graph(self): @@ -816,9 +866,9 @@ class IntermediateRepresentation(object): assert set(assignblk.keys()) == set([self.IRDst]) if len(self.graph.successors(block.label)) != 1: continue - if not expr_is_label(assignblk[self.IRDst]): + if not assignblk[self.IRDst].is_label(): continue - dst = assignblk[self.IRDst].name + dst = assignblk[self.IRDst].loc_key if dst == block.label: # Infinite loop block continue @@ -828,7 +878,7 @@ class IntermediateRepresentation(object): modified = False for label in jmp_blocks: block = self.blocks[label] - dst_label = block.dst.name + dst_loc_key = block.dst parents = self.graph.predecessors(block.label) for lbl in parents: parent = self.blocks.get(lbl, None) @@ -836,7 +886,7 @@ class IntermediateRepresentation(object): continue dst = parent.dst if dst.is_id(block.label): - dst = m2_expr.ExprId(dst_label, dst.size) + dst = m2_expr.ExprLoc(dst_loc_key, dst.size) self.graph.discard_edge(lbl, block.label) self.graph.discard_edge(block.label, dst_label) @@ -846,13 +896,13 @@ class IntermediateRepresentation(object): elif dst.is_cond(): src1, src2 = dst.src1, dst.src2 if src1.is_id(block.label): - dst = m2_expr.ExprCond(dst.cond, m2_expr.ExprId(dst_label, dst.size), dst.src2) + dst = m2_expr.ExprCond(dst.cond, m2_expr.ExprLoc(dst_label, dst.size), dst.src2) self.graph.discard_edge(lbl, block.label) self.graph.discard_edge(block.label, dst_label) self.graph.add_uniq_edge(lbl, dst_label) modified = True if src2.is_id(block.label): - dst = m2_expr.ExprCond(dst.cond, dst.src1, m2_expr.ExprId(dst_label, dst.size)) + dst = m2_expr.ExprCond(dst.cond, dst.src1, m2_expr.ExprLoc(dst_label, dst.size)) self.graph.discard_edge(lbl, block.label) self.graph.discard_edge(block.label, dst_label) self.graph.add_uniq_edge(lbl, dst_label) diff --git a/miasm2/ir/symbexec.py b/miasm2/ir/symbexec.py index 4070f261..d78298a3 100644 --- a/miasm2/ir/symbexec.py +++ b/miasm2/ir/symbexec.py @@ -2,8 +2,8 @@ import warnings import logging from collections import MutableMapping -from miasm2.expression.expression import ExprOp, ExprId, ExprInt, ExprMem, \ - ExprCompose, ExprSlice, ExprCond, ExprAff +from miasm2.expression.expression import ExprOp, ExprId, ExprLoc, ExprInt, \ + ExprMem, ExprCompose, ExprSlice, ExprCond, ExprAff from miasm2.expression.simplifications import expr_simp from miasm2.core import asmblock from miasm2.ir.ir import AssignBlock @@ -812,6 +812,7 @@ class SymbolicExecutionEngine(object): self.expr_to_visitor = { ExprInt: self.eval_exprint, ExprId: self.eval_exprid, + ExprLoc: self.eval_exprloc, ExprMem: self.eval_exprmem, ExprSlice: self.eval_exprslice, ExprCond: self.eval_exprcond, @@ -885,10 +886,16 @@ class SymbolicExecutionEngine(object): def eval_exprid(self, expr, **kwargs): """[DEV]: Evaluate an ExprId using the current state""" - if isinstance(expr.name, asmblock.AsmLabel) and expr.name.offset is not None: - ret = ExprInt(expr.name.offset, expr.size) + ret = self.symbols.read(expr) + return ret + + def eval_exprloc(self, expr, **kwargs): + """[DEV]: Evaluate an ExprLoc using the current state""" + label = self.ir_arch.symbol_pool.loc_key_to_label(expr.loc_key) + if label.offset is not None: + ret = ExprInt(label.offset, expr.size) else: - ret = self.symbols.read(expr) + ret = expr return ret def eval_exprmem(self, expr, **kwargs): @@ -1040,7 +1047,17 @@ class SymbolicExecutionEngine(object): self.dump(mems=False) self.dump(ids=False) print '_' * 80 - return self.eval_expr(self.ir_arch.IRDst) + dst = self.eval_expr(self.ir_arch.IRDst) + + # Best effort to resolve destination as ExprLoc + if dst.is_label(): + ret = dst + elif dst.is_int(): + label = self.ir_arch.symbol_pool.getby_offset_create(int(dst)) + ret = ExprLoc(label.loc_key, dst.size) + else: + ret = dst + return ret def run_block_at(self, addr, step=False): """ diff --git a/miasm2/ir/translators/C.py b/miasm2/ir/translators/C.py index 226f26f1..a8e3a254 100644 --- a/miasm2/ir/translators/C.py +++ b/miasm2/ir/translators/C.py @@ -18,6 +18,14 @@ class TranslatorC(Translator): '>>>': 'rot_right', } + def __init__(self, symbol_pool=None, **kwargs): + """Instance a C translator + @symbol_pool: AsmSymbolPool instance + """ + super(TranslatorC, self).__init__(**kwargs) + # symbol pool + self.symbol_pool = symbol_pool + def _size2mask(self, size): """Return a C string corresponding to the size2mask operation, with support for @size <= 128""" @@ -44,16 +52,31 @@ class TranslatorC(Translator): ) return "0x%x" % expr.arg.arg + def from_ExprLoc(self, expr): + loc_key = expr.loc_key + if self.symbol_pool is None: + return str(loc_key) + offset = self.symbol_pool.loc_key_to_offset(loc_key) + name = self.symbol_pool.loc_key_to_name(loc_key) + + if offset is None: + return name + return "0x%x" % offset + def from_ExprAff(self, expr): - return "%s = %s" % tuple(map(self.from_expr, (expr.dst, expr.src))) + new_dst = self.from_expr(expr.dst) + new_src = self.from_expr(expr.src) + return "%s = %s" % (new_dst, new_src) def from_ExprCond(self, expr): - return "(%s?%s:%s)" % tuple(map(self.from_expr, - (expr.cond, expr.src1, expr.src2))) + new_cond = self.from_expr(expr.cond) + new_src1 = self.from_expr(expr.src1) + new_src2 = self.from_expr(expr.src2) + return "(%s?%s:%s)" % (new_cond, new_src1, new_src2) def from_ExprMem(self, expr): - return "MEM_LOOKUP_%.2d(jitcpu, %s)" % (expr.size, - self.from_expr(expr.arg)) + new_ptr = self.from_expr(expr.arg) + return "MEM_LOOKUP_%.2d(jitcpu, %s)" % (expr.size, new_ptr) def from_ExprOp(self, expr): if len(expr.args) == 1: @@ -63,9 +86,11 @@ class TranslatorC(Translator): self._size2mask(expr.args[0].size), ) elif expr.op in ['cntleadzeros', 'cnttrailzeros']: - return "%s(0x%x, %s)" % (expr.op, - expr.args[0].size, - self.from_expr(expr.args[0])) + return "%s(0x%x, %s)" % ( + expr.op, + expr.args[0].size, + self.from_expr(expr.args[0]) + ) elif expr.op == '!': return "(~ %s)&%s" % ( self.from_expr(expr.args[0]), @@ -78,7 +103,10 @@ class TranslatorC(Translator): expr.op.startswith("fxam_c") or expr.op in ["-", "ftan", "frndint", "f2xm1", "fsin", "fsqrt", "fabs", "fcos", "fchs"]): - return "%s(%s)" % (expr.op, self.from_expr(expr.args[0])) + return "%s(%s)" % ( + expr.op, + self.from_expr(expr.args[0]) + ) else: raise NotImplementedError('Unknown op: %r' % expr.op) @@ -91,10 +119,12 @@ class TranslatorC(Translator): self._size2mask(expr.args[1].size), ) elif expr.op in self.dct_shift: - return 'SHIFT_%s(%d, %s, %s)' % (self.dct_shift[expr.op].upper(), - expr.args[0].size, - self.from_expr(expr.args[0]), - self.from_expr(expr.args[1])) + return 'SHIFT_%s(%d, %s, %s)' % ( + self.dct_shift[expr.op].upper(), + expr.args[0].size, + self.from_expr(expr.args[0]), + self.from_expr(expr.args[1]) + ) elif expr.is_associative() or expr.op in ["%", "/"]: oper = ['(%s&%s)' % ( self.from_expr(arg), @@ -105,9 +135,11 @@ class TranslatorC(Translator): return "((%s)&%s)" % (oper, self._size2mask(expr.args[0].size)) elif expr.op in ['-']: return '(((%s&%s) %s (%s&%s))&%s)' % ( - self.from_expr(expr.args[0]), self._size2mask(expr.args[0].size), + self.from_expr(expr.args[0]), + self._size2mask(expr.args[0].size), str(expr.op), - self.from_expr(expr.args[1]), self._size2mask(expr.args[1].size), + self.from_expr(expr.args[1]), + self._size2mask(expr.args[1].size), self._size2mask(expr.args[0].size) ) elif expr.op in self.dct_rot: @@ -125,21 +157,29 @@ class TranslatorC(Translator): elif (expr.op.startswith("fcom") or expr.op in ["fadd", "fsub", "fdiv", 'fmul', "fscale", "fprem", "fprem_lsb", "fyl2x", "fpatan"]): - return "fpu_%s(%s, %s)" % (expr.op, - self.from_expr(expr.args[0]), - self.from_expr(expr.args[1])) + return "fpu_%s(%s, %s)" % ( + expr.op, + self.from_expr(expr.args[0]), + self.from_expr(expr.args[1]) + ) elif expr.op == "segm": return "segm2addr(jitcpu, %s, %s)" % ( - self.from_expr(expr.args[0]), self.from_expr(expr.args[1])) + self.from_expr(expr.args[0]), + self.from_expr(expr.args[1]) + ) elif expr.op in ['udiv', 'umod', 'idiv', 'imod']: - return '%s%d(%s, %s)' % (expr.op, - expr.args[0].size, - self.from_expr(expr.args[0]), - self.from_expr(expr.args[1])) + return '%s%d(%s, %s)' % ( + expr.op, + expr.args[0].size, + self.from_expr(expr.args[0]), + self.from_expr(expr.args[1]) + ) elif expr.op in ["bcdadd", "bcdadd_cf"]: - return "%s_%d(%s, %s)" % (expr.op, expr.args[0].size, - self.from_expr(expr.args[0]), - self.from_expr(expr.args[1])) + return "%s_%d(%s, %s)" % ( + expr.op, expr.args[0].size, + self.from_expr(expr.args[0]), + self.from_expr(expr.args[1]) + ) else: raise NotImplementedError('Unknown op: %r' % expr.op) @@ -159,9 +199,11 @@ class TranslatorC(Translator): def from_ExprSlice(self, expr): # XXX check mask for 64 bit & 32 bit compat - return "((%s>>%d) &%s)" % (self.from_expr(expr.arg), - expr.start, - self._size2mask(expr.stop - expr.start)) + return "((%s>>%d) &%s)" % ( + self.from_expr(expr.arg), + expr.start, + self._size2mask(expr.stop - expr.start) + ) def from_ExprCompose(self, expr): out = [] @@ -178,10 +220,12 @@ class TranslatorC(Translator): dst_cast = "uint%d_t" % size for index, arg in expr.iter_args(): - out.append("(((%s)(%s & %s)) << %d)" % (dst_cast, - self.from_expr(arg), - self._size2mask(arg.size), - index)) + out.append("(((%s)(%s & %s)) << %d)" % ( + dst_cast, + self.from_expr(arg), + self._size2mask(arg.size), + index) + ) out = ' | '.join(out) return '(' + out + ')' diff --git a/miasm2/ir/translators/python.py b/miasm2/ir/translators/python.py index d7369e9e..e05f5e4d 100644 --- a/miasm2/ir/translators/python.py +++ b/miasm2/ir/translators/python.py @@ -20,6 +20,9 @@ class TranslatorPython(Translator): def from_ExprId(self, expr): return str(expr) + def from_ExprLoc(self, expr): + return str(expr) + def from_ExprMem(self, expr): return "memory(%s, 0x%x)" % (self.from_expr(expr.arg), expr.size / 8) diff --git a/miasm2/ir/translators/smt2.py b/miasm2/ir/translators/smt2.py index 18bcb9bd..6a6fec16 100644 --- a/miasm2/ir/translators/smt2.py +++ b/miasm2/ir/translators/smt2.py @@ -120,7 +120,7 @@ class TranslatorSMT2(Translator): # Implemented language __LANG__ = "smt2" - def __init__(self, endianness="<", **kwargs): + def __init__(self, endianness="<", symbol_pool=None, **kwargs): """Instance a SMT2 translator @endianness: (optional) memory endianness """ @@ -129,6 +129,8 @@ class TranslatorSMT2(Translator): self._mem = SMT2Mem(endianness) # map of translated bit vectors self._bitvectors = dict() + # symbol pool + self.symbol_pool = symbol_pool def from_ExprInt(self, expr): return bit_vec_val(expr.arg.arg, expr.size) @@ -148,6 +150,23 @@ class TranslatorSMT2(Translator): self._bitvectors[str(expr)] = expr.size return str(expr) + def from_ExprLoc(self, expr): + loc_key = expr.loc_key + if self.symbol_pool is None: + if str(loc_key) not in self._bitvectors: + self._bitvectors[str(loc_key)] = expr.size + return str(loc_key) + + offset = self.symbol_pool.loc_key_to_offset(loc_key) + name = self.symbol_pool.loc_key_to_name(loc_key) + + if offset is None: + return bit_vec_val(str(offset), expr.size) + name = "|{}|".format(str(name)) + if name not in self._bitvectors: + self._bitvectors[name] = expr.size + return name + def from_ExprMem(self, expr): addr = self.from_expr(expr.arg) # size to read from memory diff --git a/miasm2/ir/translators/translator.py b/miasm2/ir/translators/translator.py index e3641843..557fdabe 100644 --- a/miasm2/ir/translators/translator.py +++ b/miasm2/ir/translators/translator.py @@ -53,6 +53,12 @@ class Translator(object): """ raise NotImplementedError("Abstract method") + def from_ExprLoc(self, expr): + """Translate an ExprLoc + @expr: ExprLoc to translate + """ + raise NotImplementedError("Abstract method") + def from_ExprCompose(self, expr): """Translate an ExprCompose @expr: ExprCompose to translate @@ -100,6 +106,7 @@ class Translator(object): # Handle Expr type handlers = {m2_expr.ExprInt: self.from_ExprInt, m2_expr.ExprId: self.from_ExprId, + m2_expr.ExprLoc: self.from_ExprLoc, m2_expr.ExprCompose: self.from_ExprCompose, m2_expr.ExprSlice: self.from_ExprSlice, m2_expr.ExprOp: self.from_ExprOp, diff --git a/miasm2/ir/translators/z3_ir.py b/miasm2/ir/translators/z3_ir.py index 536daff1..544dd26f 100644 --- a/miasm2/ir/translators/z3_ir.py +++ b/miasm2/ir/translators/z3_ir.py @@ -116,7 +116,7 @@ class TranslatorZ3(Translator): # Operations translation trivial_ops = ["+", "-", "/", "%", "&", "^", "|", "*", "<<"] - def __init__(self, endianness="<", **kwargs): + def __init__(self, endianness="<", symbol_pool=None, **kwargs): """Instance a Z3 translator @endianness: (optional) memory endianness """ @@ -126,6 +126,8 @@ class TranslatorZ3(Translator): super(TranslatorZ3, self).__init__(**kwargs) self._mem = Z3Mem(endianness) + # symbol pool + self.symbol_pool = symbol_pool def from_ExprInt(self, expr): return z3.BitVecVal(expr.arg.arg, expr.size) @@ -136,6 +138,18 @@ class TranslatorZ3(Translator): else: return z3.BitVec(str(expr), expr.size) + def from_ExprLoc(self, expr): + if self.symbol_pool is None: + # No symbol_pool, fallback to default name + return z3.BitVec(str(expr), expr.size) + label = self.symbol_pool.loc_key_to_label(expr.loc_key) + if label is None: + # No symbol_pool, fallback to default name + return z3.BitVec(str(expr), expr.size) + elif label.offset is None: + return z3.BitVec(label.name, expr.size) + return z3.BitVecVal(label.offset, expr.size) + def from_ExprMem(self, expr): addr = self.from_expr(expr.arg) return self._mem.get(addr, expr.size) diff --git a/miasm2/jitter/codegen.py b/miasm2/jitter/codegen.py index b10a9257..519664e3 100644 --- a/miasm2/jitter/codegen.py +++ b/miasm2/jitter/codegen.py @@ -4,16 +4,17 @@ Module to generate C code for a given native @block import miasm2.expression.expression as m2_expr from miasm2.ir.ir import IRBlock, AssignBlock -from miasm2.ir.translators import Translator -from miasm2.core.asmblock import expr_is_label, AsmBlockBad, AsmLabel -# Miasm to C translator -TRANSLATOR = Translator.to_language("C") +from miasm2.ir.translators.C import TranslatorC +from miasm2.core.asmblock import AsmBlockBad -SIZE_TO_MASK = {size: TRANSLATOR.from_expr(m2_expr.ExprInt(0, size).mask) +TRANSLATOR_NO_SYMBOL = TranslatorC(symbol_pool=None) + +SIZE_TO_MASK = {size: TRANSLATOR_NO_SYMBOL.from_expr(ExprInt(0, size).mask) for size in (1, 2, 3, 7, 8, 16, 32, 64, 128)} + class Attributes(object): """ @@ -100,6 +101,7 @@ class CGen(object): def __init__(self, ir_arch): self.ir_arch = ir_arch self.PC = self.ir_arch.pc + self.translator = TranslatorC(self.ir_arch.symbol_pool) self.init_arch_C() def init_arch_C(self): @@ -128,7 +130,7 @@ class CGen(object): def id_to_c(self, expr): """Translate Expr @expr into corresponding C code""" - return TRANSLATOR.from_expr(self.patch_c_id(expr)) + return self.translator.from_expr(self.patch_c_id(expr)) def add_label_index(self, dst2index, lbl): """Insert @lbl to the dictionnary @dst2index with a uniq value @@ -149,7 +151,7 @@ class CGen(object): dst = m2_expr.ExprInt(offset, self.ir_arch.IRDst.size) new_assignblk[self.ir_arch.IRDst] = dst irs = [AssignBlock(new_assignblk, instr)] - return IRBlock(self.ir_arch.get_instr_label(instr), irs) + return IRBlock(self.ir_arch.get_instr_label(instr).loc_key, irs) def block2assignblks(self, block): """ @@ -292,8 +294,8 @@ class CGen(object): offset = int(expr) self.add_label_index(dst2index, offset) return ("%s" % dst2index[offset], hex(offset)) - if expr_is_label(expr): - label = expr.name + if expr.is_label(): + label = self.ir_arch.symbol_pool.loc_key_to_label(expr.loc_key) if label.offset != None: offset = label.offset self.add_label_index(dst2index, offset) @@ -577,12 +579,13 @@ class CGen(object): for index, irblock in enumerate(irblocks): new_irblock = self.ir_arch.irbloc_fix_regs_for_mode(irblock, self.ir_arch.attrib) - if new_irblock.label.offset is None: + label = self.ir_arch.symbol_pool.loc_key_to_label(new_irblock.label) + if label.offset is None: out.append("%-40s // %.16X %s" % - (str(new_irblock.label.name) + ":", instr.offset, instr)) + (str(label.name) + ":", instr.offset, instr)) else: out.append("%-40s // %.16X %s" % - (self.label_to_jitlabel(new_irblock.label) + ":", instr.offset, instr)) + (self.label_to_jitlabel(label) + ":", instr.offset, instr)) if index == 0: out += self.gen_pre_code(instr_attrib) out += self.gen_irblock(instr_attrib, irblocks_attributes[index], instr_offsets, new_irblock) diff --git a/miasm2/jitter/jitcore_llvm.py b/miasm2/jitter/jitcore_llvm.py index 452b6d84..a8d30f46 100644 --- a/miasm2/jitter/jitcore_llvm.py +++ b/miasm2/jitter/jitcore_llvm.py @@ -78,12 +78,13 @@ class JitCore_LLVM(jitcore.JitCore): """Add a block to JiT and JiT it. @block: the block to add """ + block_hash = self.hash_block(block) fname_out = os.path.join(self.tempdir, "%s.bc" % block_hash) if not os.access(fname_out, os.R_OK): # Build a function in the context - func = LLVMFunction(self.context, LLVMFunction.canonize_label_name(block.label)) + func = LLVMFunction(self.context, block.label) # Set log level func.log_regs = self.log_regs @@ -114,7 +115,7 @@ class JitCore_LLVM(jitcore.JitCore): else: # The cache file exists: function can be loaded from cache - ptr = self.context.get_ptr_from_cache(fname_out, LLVMFunction.canonize_label_name(block.label)) + ptr = self.context.get_ptr_from_cache(fname_out, block.label) # Store a pointer on the function jitted code self.lbl2jitbloc[block.label.offset] = ptr diff --git a/miasm2/jitter/jitcore_python.py b/miasm2/jitter/jitcore_python.py index 799848ab..af9f09e6 100644 --- a/miasm2/jitter/jitcore_python.py +++ b/miasm2/jitter/jitcore_python.py @@ -48,7 +48,7 @@ class JitCore_Python(jitcore.JitCore): vmmngr = cpu.vmmngr # Keep current location in irblocks - cur_label = label + cur_label = label.loc_key # Required to detect new instructions offsets_jitted = set() @@ -57,6 +57,7 @@ class JitCore_Python(jitcore.JitCore): exec_engine = self.symbexec expr_simp = exec_engine.expr_simp + known_loc_keys = set(irb.label for irb in irblocks) # For each irbloc inside irblocks while True: @@ -64,6 +65,7 @@ class JitCore_Python(jitcore.JitCore): for irb in irblocks: if irb.label == cur_label: break + else: raise RuntimeError("Irblocks must end with returning an " "ExprInt instance") @@ -75,7 +77,7 @@ class JitCore_Python(jitcore.JitCore): for assignblk in irb: instr = assignblk.instr # For each new instruction (in assembly) - if instr.offset not in offsets_jitted: + if instr is not None and instr.offset not in offsets_jitted: # Test exceptions vmmngr.check_invalid_code_blocs() vmmngr.check_memory_breakpoint() @@ -120,8 +122,10 @@ class JitCore_Python(jitcore.JitCore): # Manage resulting address if isinstance(ad, m2_expr.ExprInt): return ad.arg.arg - elif isinstance(ad, m2_expr.ExprId): - cur_label = ad.name + elif isinstance(ad, m2_expr.ExprLoc): + cur_label = ad.loc_key + if cur_label not in known_loc_keys: + return cur_label else: raise NotImplementedError("Type not handled: %s" % ad) diff --git a/miasm2/jitter/jitload.py b/miasm2/jitter/jitload.py index db486b4f..28200997 100644 --- a/miasm2/jitter/jitload.py +++ b/miasm2/jitter/jitload.py @@ -313,7 +313,6 @@ class jitter(object): Check exceptions before breakpoints.""" self.pc = pc - # Callback called before exec if self.exec_cb is not None: res = self.exec_cb(self) diff --git a/miasm2/jitter/llvmconvert.py b/miasm2/jitter/llvmconvert.py index dd3afed6..3f7d0c6d 100644 --- a/miasm2/jitter/llvmconvert.py +++ b/miasm2/jitter/llvmconvert.py @@ -69,6 +69,26 @@ class LLVMContext(): self.target_machine = target.create_target_machine() self.init_exec_engine() + + def canonize_label_name(self, label): + """Canonize @label names to a common form. + @label: str or asmlabel instance""" + if isinstance(label, str): + return label + if isinstance(label, m2_expr.Expr) and expr.is_label(): + label = self.llvm_context.ir_arch.symbol_pool.loc_key_to_label(label.index) + if isinstance(label, (int, long)): + fds + label = self.llvm_context.ir_arch.symbol_pool.loc_key_to_label(label) + + if isinstance(label, m2_asmblock.AsmLabel): + if label.offset is None: + return "label_%s" % label.name + else: + return "label_%X" % label.offset + else: + raise ValueError("label must either be str or asmlabel") + def optimise_level(self, level=2): """Set the optimisation level to @level from 0 to 2 0: non-optimized @@ -324,6 +344,7 @@ class LLVMContext_JIT(LLVMContext): def get_ptr_from_cache(self, file_name, func_name): "Load @file_name and return a pointer on the jitter @func_name" # We use an empty module to avoid loosing time on function building + func_name = self.canonize_label_name(func_name) empty_module = llvm.parse_assembly("") empty_module.fname_out = file_name @@ -379,6 +400,7 @@ class LLVMFunction(): def __init__(self, llvm_context, name="fc", new_module=True): "Create a new function with name @name" + name = self.canonize_label_name(name) self.llvm_context = llvm_context if new_module: self.llvm_context.new_module() @@ -483,14 +505,16 @@ class LLVMFunction(): var_casted = var self.builder.ret(var_casted) - @staticmethod - def canonize_label_name(label): + def canonize_label_name(self, label): """Canonize @label names to a common form. @label: str or asmlabel instance""" if isinstance(label, str): return label - if m2_asmblock.expr_is_label(label): - label = label.name + if isinstance(label, m2_expr.Expr) and expr.is_label(): + label = self.llvm_context.ir_arch.symbol_pool.loc_key_to_label(label.index) + if isinstance(label, m2_expr.LocKey): + label = self.llvm_context.ir_arch.symbol_pool.loc_key_to_label(label) + if isinstance(label, m2_asmblock.AsmLabel): if label.offset is None: return "label_%s" % label.name @@ -629,15 +653,15 @@ class LLVMFunction(): self.update_cache(expr, ret) return ret + if expr.is_label(): + label = self.llvm_context.ir_arch.symbol_pool.loc_key_to_label(expr.loc_key) + offset = label.offset + ret = llvm_ir.Constant(LLVMType.IntType(expr.size), offset) + self.update_cache(expr, ret) + return ret + if isinstance(expr, m2_expr.ExprId): name = expr.name - if not isinstance(name, str): - # Resolve label - offset = name.offset - ret = llvm_ir.Constant(LLVMType.IntType(expr.size), offset) - self.update_cache(expr, ret) - return ret - try: # If expr.name is already known (args) return self.local_vars[name] @@ -1078,7 +1102,7 @@ class LLVMFunction(): index = dst2case.get(value, i) to_eval = to_eval.replace_expr({value: m2_expr.ExprInt(index, value.size)}) dst2case[value] = index - if m2_asmblock.expr_is_int_or_label(value): + if value.is_int() or value.is_label(): case2dst[i] = value else: case2dst[i] = self.add_ir(value) @@ -1105,12 +1129,13 @@ class LLVMFunction(): self.main_stream = False if isinstance(dst, m2_expr.ExprInt): - dst = m2_expr.ExprId(self.llvm_context.ir_arch.symbol_pool.getby_offset_create(int(dst)), - dst.size) + label = self.llvm_context.ir_arch.symbol_pool.getby_offset_create(int(dst)) + dst = m2_expr.ExprLoc(label.loc_key, dst.size) - if m2_asmblock.expr_is_label(dst): - bbl = self.get_basic_bloc_by_label(dst) - offset = dst.name.offset + if isinstance(dst, m2_expr.ExprLoc): + label = self.llvm_context.ir_arch.symbol_pool.loc_key_to_label(dst.loc_key) + bbl = self.get_basic_bloc_by_label(label) + offset = label.offset if bbl is not None: # "local" jump, inside this function if offset is None: diff --git a/miasm2/jitter/loader/pe.py b/miasm2/jitter/loader/pe.py index 2fe4cd3f..ea6c2c98 100644 --- a/miasm2/jitter/loader/pe.py +++ b/miasm2/jitter/loader/pe.py @@ -162,8 +162,7 @@ def vm_load_pe(vm, fdata, align_s=True, load_hdr=True, name="", **kargs): new_size = pe.SHList[i + 1].addr - section.addr section.size = new_size section.rawsize = new_size - section.data = strpatchwork.StrPatchwork( - section.data[:new_size]) + section.data = section.data[:new_size] section.offset = section.addr # Last section alignement diff --git a/test/analysis/data_flow.py b/test/analysis/data_flow.py index d0a85e13..24335f45 100644 --- a/test/analysis/data_flow.py +++ b/test/analysis/data_flow.py @@ -1,10 +1,12 @@ """ Test cases for dead code elimination""" from miasm2.expression.expression import ExprId, ExprInt, ExprAff, ExprMem -from miasm2.core.asmblock import AsmLabel +from miasm2.core.asmblock import AsmLabel, AsmSymbolPool from miasm2.analysis.data_flow import * from miasm2.ir.analysis import ira from miasm2.ir.ir import IRBlock, AssignBlock +symbol_pool = AsmSymbolPool() + a = ExprId("a", 32) b = ExprId("b", 32) c = ExprId("c", 32) @@ -24,13 +26,13 @@ CST1 = ExprInt(0x11, 32) CST2 = ExprInt(0x12, 32) CST3 = ExprInt(0x13, 32) -LBL0 = AsmLabel("lbl0") -LBL1 = AsmLabel("lbl1") -LBL2 = AsmLabel("lbl2") -LBL3 = AsmLabel("lbl3") -LBL4 = AsmLabel("lbl4") -LBL5 = AsmLabel("lbl5") -LBL6 = AsmLabel("lbl6") +LBL0 = symbol_pool.add_label("lbl0", 0) +LBL1 = symbol_pool.add_label("lbl1", 1) +LBL2 = symbol_pool.add_label("lbl2", 2) +LBL3 = symbol_pool.add_label("lbl3", 3) +LBL4 = symbol_pool.add_label("lbl4", 4) +LBL5 = symbol_pool.add_label("lbl5", 5) +LBL6 = symbol_pool.add_label("lbl6", 6) IRDst = ExprId('IRDst', 32) dummy = ExprId('dummy', 32) @@ -45,7 +47,7 @@ def gen_irblock(label, exprs_list): irs.append(AssignBlock(exprs)) irs.append(AssignBlock({IRDst:dummy})) - irbl = IRBlock(label, irs) + irbl = IRBlock(label.loc_key, irs) return irbl @@ -77,7 +79,7 @@ class IRATest(ira): # graph 1 : Simple graph with dead and alive variables -G1_IRA = IRATest() +G1_IRA = IRATest(symbol_pool) G1_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)], [ExprAff(b, CST2)]]) G1_IRB1 = gen_irblock(LBL1, [[ExprAff(a, b)]]) @@ -89,7 +91,7 @@ G1_IRA.graph.add_uniq_edge(G1_IRB0.label, G1_IRB1.label) G1_IRA.graph.add_uniq_edge(G1_IRB1.label, G1_IRB2.label) # Expected output for graph 1 -G1_EXP_IRA = IRATest() +G1_EXP_IRA = IRATest(symbol_pool) G1_EXP_IRB0 = gen_irblock(LBL0, [[], [ExprAff(b, CST2)]]) G1_EXP_IRB1 = gen_irblock(LBL1, [[ExprAff(a, b)]]) @@ -100,7 +102,7 @@ G1_EXP_IRA.blocks = {irb.label : irb for irb in [G1_EXP_IRB0, G1_EXP_IRB1, # graph 2 : Natural loop with dead variable -G2_IRA = IRATest() +G2_IRA = IRATest(symbol_pool) G2_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)], [ExprAff(r, CST1)]]) G2_IRB1 = gen_irblock(LBL1, [[ExprAff(a, a+CST1)]]) @@ -113,7 +115,7 @@ G2_IRA.graph.add_uniq_edge(G2_IRB1.label, G2_IRB2.label) G2_IRA.graph.add_uniq_edge(G2_IRB1.label, G2_IRB1.label) # Expected output for graph 2 -G2_EXP_IRA = IRATest() +G2_EXP_IRA = IRATest(symbol_pool) G2_EXP_IRB0 = gen_irblock(LBL0, [[], [ExprAff(r, CST1)]]) G2_EXP_IRB1 = gen_irblock(LBL1, [[]]) @@ -124,7 +126,7 @@ G2_EXP_IRA.blocks = {irb.label : irb for irb in [G2_EXP_IRB0, G2_EXP_IRB1, # graph 3 : Natural loop with alive variables -G3_IRA = IRATest() +G3_IRA = IRATest(symbol_pool) G3_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)]]) G3_IRB1 = gen_irblock(LBL1, [[ExprAff(a, a+CST1)]]) @@ -137,7 +139,7 @@ G3_IRA.graph.add_uniq_edge(G3_IRB1.label, G3_IRB2.label) G3_IRA.graph.add_uniq_edge(G3_IRB1.label, G3_IRB1.label) # Expected output for graph 3 -G3_EXP_IRA = IRATest() +G3_EXP_IRA = IRATest(symbol_pool) G3_EXP_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)]]) G3_EXP_IRB1 = gen_irblock(LBL1, [[ExprAff(a, a+CST1)]]) @@ -148,7 +150,7 @@ G3_EXP_IRA.blocks = {irb.label : irb for irb in [G3_EXP_IRB0, G3_EXP_IRB1, # graph 4 : If/else with dead variables -G4_IRA = IRATest() +G4_IRA = IRATest(symbol_pool) G4_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)]]) G4_IRB1 = gen_irblock(LBL1, [[ExprAff(a, a+CST1)]]) @@ -164,7 +166,7 @@ G4_IRA.graph.add_uniq_edge(G4_IRB1.label, G4_IRB3.label) G4_IRA.graph.add_uniq_edge(G4_IRB2.label, G4_IRB3.label) # Expected output for graph 4 -G4_EXP_IRA = IRATest() +G4_EXP_IRA = IRATest(symbol_pool) G4_EXP_IRB0 = gen_irblock(LBL0, [[]]) G4_EXP_IRB1 = gen_irblock(LBL1, [[]]) @@ -176,7 +178,7 @@ G4_EXP_IRA.blocks = {irb.label : irb for irb in [G4_EXP_IRB0, G4_EXP_IRB1, # graph 5 : Loop and If/else with dead variables -G5_IRA = IRATest() +G5_IRA = IRATest(symbol_pool) G5_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)]]) G5_IRB1 = gen_irblock(LBL1, [[ExprAff(r, CST2)]]) @@ -197,7 +199,7 @@ G5_IRA.graph.add_uniq_edge(G5_IRB4.label, G5_IRB5.label) G5_IRA.graph.add_uniq_edge(G5_IRB4.label, G5_IRB1.label) # Expected output for graph 5 -G5_EXP_IRA = IRATest() +G5_EXP_IRA = IRATest(symbol_pool) G5_EXP_IRB0 = gen_irblock(LBL0, [[]]) G5_EXP_IRB1 = gen_irblock(LBL1, [[ExprAff(r, CST2)]]) @@ -213,7 +215,7 @@ G5_EXP_IRA.blocks = {irb.label : irb for irb in [G5_EXP_IRB0, G5_EXP_IRB1, # graph 6 : Natural loop with dead variables symetric affectation # (a = b <-> b = a ) -G6_IRA = IRATest() +G6_IRA = IRATest(symbol_pool) G6_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)]]) G6_IRB1 = gen_irblock(LBL1, [[ExprAff(b, a)]]) @@ -229,7 +231,7 @@ G6_IRA.graph.add_uniq_edge(G6_IRB2.label, G6_IRB1.label) G6_IRA.graph.add_uniq_edge(G6_IRB2.label, G6_IRB3.label) # Expected output for graph 6 -G6_EXP_IRA = IRATest() +G6_EXP_IRA = IRATest(symbol_pool) G6_EXP_IRB0 = gen_irblock(LBL0, [[]]) G6_EXP_IRB1 = gen_irblock(LBL1, [[]]) @@ -241,7 +243,7 @@ G6_EXP_IRA.blocks = {irb.label : irb for irb in [G6_EXP_IRB0, G6_EXP_IRB1, # graph 7 : Double entry loop with dead variables -G7_IRA = IRATest() +G7_IRA = IRATest(symbol_pool) G7_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)], [ExprAff(r, CST1)]]) G7_IRB1 = gen_irblock(LBL1, [[ExprAff(a, a+CST1)]]) @@ -259,7 +261,7 @@ G7_IRA.graph.add_uniq_edge(G7_IRB0.label, G7_IRB2.label) # Expected output for graph 7 -G7_EXP_IRA = IRATest() +G7_EXP_IRA = IRATest(symbol_pool) G7_EXP_IRB0 = gen_irblock(LBL0, [[], [ExprAff(r, CST1)]]) G7_EXP_IRB1 = gen_irblock(LBL1, [[]]) @@ -271,7 +273,7 @@ G7_EXP_IRA.blocks = {irb.label : irb for irb in [G7_EXP_IRB0, G7_EXP_IRB1, # graph 8 : Nested loops with dead variables -G8_IRA = IRATest() +G8_IRA = IRATest(symbol_pool) G8_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)], [ExprAff(b, CST1)]]) G8_IRB1 = gen_irblock(LBL1, [[ExprAff(a, a+CST1)]]) @@ -291,7 +293,7 @@ G8_IRA.graph.add_uniq_edge(G8_IRB3.label, G8_IRB2.label) # Expected output for graph 8 -G8_EXP_IRA = IRATest() +G8_EXP_IRA = IRATest(symbol_pool) G8_EXP_IRB0 = gen_irblock(LBL0, [[], []]) G8_EXP_IRB1 = gen_irblock(LBL1, [[]]) @@ -303,7 +305,7 @@ G8_EXP_IRA.blocks = {irb.label : irb for irb in [G8_EXP_IRB0, G8_EXP_IRB1, # graph 9 : Miultiple-exits loops with dead variables -G9_IRA = IRATest() +G9_IRA = IRATest(symbol_pool) G9_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)], [ExprAff(b, CST1)]]) G9_IRB1 = gen_irblock(LBL1, [[ExprAff(a, a+CST1)], [ExprAff(b, b+CST1)]]) @@ -326,7 +328,7 @@ G9_IRA.graph.add_uniq_edge(G9_IRB3.label, G9_IRB4.label) # Expected output for graph 9 -G9_EXP_IRA = IRATest() +G9_EXP_IRA = IRATest(symbol_pool) G9_EXP_IRB0 = gen_irblock(LBL0, [[], [ExprAff(b, CST1)]]) G9_EXP_IRB1 = gen_irblock(LBL1, [[], [ExprAff(b, b+CST1)]]) @@ -341,7 +343,7 @@ G9_EXP_IRA.blocks = {irb.label : irb for irb in [G9_EXP_IRB0, G9_EXP_IRB1, # graph 10 : Natural loop with alive variables symetric affectation # (a = b <-> b = a ) -G10_IRA = IRATest() +G10_IRA = IRATest(symbol_pool) G10_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)]]) G10_IRB1 = gen_irblock(LBL1, [[ExprAff(b, a)]]) @@ -357,7 +359,7 @@ G10_IRA.graph.add_uniq_edge(G10_IRB2.label, G10_IRB1.label) G10_IRA.graph.add_uniq_edge(G10_IRB2.label, G10_IRB3.label) # Expected output for graph 10 -G10_EXP_IRA = IRATest() +G10_EXP_IRA = IRATest(symbol_pool) G10_EXP_IRB0 = gen_irblock(LBL0, [[]]) G10_EXP_IRB1 = gen_irblock(LBL1, [[]]) @@ -369,7 +371,7 @@ G10_EXP_IRA.blocks = {irb.label : irb for irb in [G10_EXP_IRB0, G10_EXP_IRB1, # graph 11 : If/Else conditions with alive variables -G11_IRA = IRATest() +G11_IRA = IRATest(symbol_pool) G11_IRB0 = gen_irblock(LBL0, [[ExprAff(a, b)]]) G11_IRB1 = gen_irblock(LBL1, [[ExprAff(b, a)]]) @@ -388,7 +390,7 @@ G11_IRA.graph.add_uniq_edge(G11_IRB1.label, G11_IRB2.label) # Expected output for graph 11 -G11_EXP_IRA = IRATest() +G11_EXP_IRA = IRATest(symbol_pool) G11_EXP_IRB0 = gen_irblock(LBL0, [[ExprAff(a, b)]]) G11_EXP_IRB1 = gen_irblock(LBL1, [[ExprAff(b, a)]]) @@ -402,7 +404,7 @@ G11_EXP_IRA.blocks = {irb.label : irb for irb in [G11_EXP_IRB0, G11_EXP_IRB1, # graph 12 : Graph with multiple out points and useless definitions # of return register -G12_IRA = IRATest() +G12_IRA = IRATest(symbol_pool) G12_IRB0 = gen_irblock(LBL0, [[ExprAff(r, CST1)], [ExprAff(a, CST2)]]) G12_IRB1 = gen_irblock(LBL1, [[ExprAff(r, CST2)]]) @@ -421,7 +423,7 @@ G12_IRA.graph.add_uniq_edge(G12_IRB2.label, G12_IRB4.label) G12_IRA.graph.add_uniq_edge(G12_IRB4.label, G12_IRB5.label) # Expected output for graph 12 -G12_EXP_IRA = IRATest() +G12_EXP_IRA = IRATest(symbol_pool) G12_EXP_IRB0 = gen_irblock(LBL0, [[], []]) G12_EXP_IRB1 = gen_irblock(LBL1, [[ExprAff(r, CST2)]]) @@ -437,7 +439,7 @@ G12_EXP_IRA.blocks = {irb.label : irb for irb in [G12_EXP_IRB0, G12_EXP_IRB1, # graph 13 : Graph where a leaf has lost its son -G13_IRA = IRATest() +G13_IRA = IRATest(symbol_pool) G13_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)], [ExprAff(b, CST2)]]) G13_IRB1 = gen_irblock(LBL1, [[ExprAff(r, b)]]) @@ -455,7 +457,7 @@ G13_IRA.graph.add_uniq_edge(G13_IRB2.label, G13_IRB3.label) G13_IRA.graph.add_uniq_edge(G13_IRB4.label, G13_IRB2.label) # Expected output for graph 13 -G13_EXP_IRA = IRATest() +G13_EXP_IRA = IRATest(symbol_pool) G13_EXP_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)], [ExprAff(b, CST2)]]) G13_EXP_IRB1 = gen_irblock(LBL1, [[ExprAff(r, b)]]) @@ -472,7 +474,7 @@ G13_EXP_IRA.blocks = {irb.label: irb for irb in [G13_EXP_IRB0, G13_EXP_IRB1, # graph 14 : Graph where variable assigned multiple times in a block but still # useful in the end -G14_IRA = IRATest() +G14_IRA = IRATest(symbol_pool) G14_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)], [ExprAff(c, a)], [ExprAff(a, CST2)]]) @@ -483,7 +485,7 @@ G14_IRA.blocks = {irb.label : irb for irb in [G14_IRB0, G14_IRB1]} G14_IRA.graph.add_uniq_edge(G14_IRB0.label, G14_IRB1.label) # Expected output for graph 1 -G14_EXP_IRA = IRATest() +G14_EXP_IRA = IRATest(symbol_pool) G14_EXP_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1)], [ExprAff(c, a)], [ExprAff(a, CST2)]]) @@ -494,7 +496,7 @@ G14_EXP_IRA.blocks = {irb.label: irb for irb in [G14_EXP_IRB0, G14_EXP_IRB1]} # graph 15 : Graph where variable assigned multiple and read at the same time, # but useless -G15_IRA = IRATest() +G15_IRA = IRATest(symbol_pool) G15_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST2)], [ExprAff(a, CST1), ExprAff(b, a+CST2), @@ -506,7 +508,7 @@ G15_IRA.blocks = {irb.label : irb for irb in [G15_IRB0, G15_IRB1]} G15_IRA.graph.add_uniq_edge(G15_IRB0.label, G15_IRB1.label) # Expected output for graph 1 -G15_EXP_IRA = IRATest() +G15_EXP_IRA = IRATest(symbol_pool) G15_EXP_IRB0 = gen_irblock(LBL0, [[], [ExprAff(a, CST1)]]) G15_EXP_IRB1 = gen_irblock(LBL1, [[ExprAff(r, a)]]) @@ -515,7 +517,7 @@ G15_EXP_IRA.blocks = {irb.label: irb for irb in [G15_EXP_IRB0, G15_EXP_IRB1]} # graph 16 : Graph where variable assigned multiple times in the same bloc -G16_IRA = IRATest() +G16_IRA = IRATest(symbol_pool) G16_IRB0 = gen_irblock(LBL0, [[ExprAff(a, CST1), ExprAff(b, CST2), ExprAff(c, CST3)], [ExprAff(a, c+CST1), @@ -531,7 +533,7 @@ G16_IRA.graph.add_uniq_edge(G16_IRB1.label, G16_IRB2.label) G16_IRA.blocks = {irb.label : irb for irb in [G16_IRB0, G16_IRB1]} # Expected output for graph 1 -G16_EXP_IRA = IRATest() +G16_EXP_IRA = IRATest(symbol_pool) G16_EXP_IRB0 = gen_irblock(LBL0, [[ExprAff(c, CST3)], [ExprAff(a, c + CST1), ExprAff(b, c + CST2)]]) @@ -541,7 +543,7 @@ G16_EXP_IRA.blocks = {irb.label: irb for irb in [G16_EXP_IRB0, G16_EXP_IRB1]} # graph 17 : parallel ir -G17_IRA = IRATest() +G17_IRA = IRATest(symbol_pool) G17_IRB0 = gen_irblock(LBL0, [[ExprAff(a, a*b), ExprAff(b, c), @@ -602,7 +604,7 @@ G17_IRA.blocks = {irb.label : irb for irb in [G17_IRB0]} G17_IRA.graph.add_node(G17_IRB0.label) # Expected output for graph 17 -G17_EXP_IRA = IRATest() +G17_EXP_IRA = IRATest(symbol_pool) G17_EXP_IRB0 = gen_irblock(LBL0, [[], diff --git a/test/analysis/depgraph.py b/test/analysis/depgraph.py index 9fb046d0..545269e7 100644 --- a/test/analysis/depgraph.py +++ b/test/analysis/depgraph.py @@ -1,6 +1,6 @@ """Regression test module for DependencyGraph""" -from miasm2.expression.expression import ExprId, ExprInt, ExprAff, ExprCond -from miasm2.core.asmblock import AsmLabel +from miasm2.expression.expression import ExprId, ExprInt, ExprAff, ExprCond, ExprLoc +from miasm2.core.asmblock import AsmLabel, AsmSymbolPool from miasm2.ir.analysis import ira from miasm2.ir.ir import IRBlock, AssignBlock from miasm2.core.graph import DiGraph @@ -9,6 +9,8 @@ from itertools import count from pdb import pm import re +symbol_pool = AsmSymbolPool() + EMULATION = True try: import z3 @@ -41,13 +43,13 @@ CST33 = ExprInt(0x33, 32) CST35 = ExprInt(0x35, 32) CST37 = ExprInt(0x37, 32) -LBL0 = AsmLabel("lbl0") -LBL1 = AsmLabel("lbl1") -LBL2 = AsmLabel("lbl2") -LBL3 = AsmLabel("lbl3") -LBL4 = AsmLabel("lbl4") -LBL5 = AsmLabel("lbl5") -LBL6 = AsmLabel("lbl6") +LBL0 = symbol_pool.add_label("lbl0", 0) +LBL1 = symbol_pool.add_label("lbl1", 1) +LBL2 = symbol_pool.add_label("lbl2", 2) +LBL3 = symbol_pool.add_label("lbl3", 3) +LBL4 = symbol_pool.add_label("lbl4", 4) +LBL5 = symbol_pool.add_label("lbl5", 5) +LBL6 = symbol_pool.add_label("lbl6", 6) def gen_irblock(label, exprs_list): """ Returns an IRBlock. @@ -60,7 +62,7 @@ def gen_irblock(label, exprs_list): else: irs.append(AssignBlock(exprs)) - irbl = IRBlock(label, irs) + irbl = IRBlock(label.loc_key, irs) return irbl @@ -186,7 +188,8 @@ def dg2graph(graph, label=False, lines=True): out_blocks = [] for label in graph.nodes(): if isinstance(label, DependencyNode): - label_name = "%s %s %s" % (label.label.name, + lbl = symbol_pool.loc_key_to_label(label.label) + label_name = "%s %s %s" % (lbl.name, label.element, label.line_nb) else: @@ -228,7 +231,7 @@ DNC3 = DependencyNode(LBL1, C, 0) # graph 1 -G1_IRA = IRATest() +G1_IRA = IRATest(symbol_pool) G1_IRB0 = gen_irblock(LBL0, [[ExprAff(C, CST1)]]) G1_IRB1 = gen_irblock(LBL1, [[ExprAff(B, C)]]) @@ -241,7 +244,7 @@ G1_IRA.blocks = dict([(irb.label, irb) for irb in [G1_IRB0, G1_IRB1, G1_IRB2]]) # graph 2 -G2_IRA = IRATest() +G2_IRA = IRATest(symbol_pool) G2_IRB0 = gen_irblock(LBL0, [[ExprAff(C, CST1)]]) G2_IRB1 = gen_irblock(LBL1, [[ExprAff(B, CST2)]]) @@ -255,7 +258,7 @@ G2_IRA.blocks = dict([(irb.label, irb) for irb in [G2_IRB0, G2_IRB1, G2_IRB2]]) # graph 3 -G3_IRA = IRATest() +G3_IRA = IRATest(symbol_pool) G3_IRB0 = gen_irblock(LBL0, [[ExprAff(C, CST1)]]) G3_IRB1 = gen_irblock(LBL1, [[ExprAff(B, CST2)]]) @@ -272,13 +275,13 @@ G3_IRA.blocks = dict([(irb.label, irb) for irb in [G3_IRB0, G3_IRB1, # graph 4 -G4_IRA = IRATest() +G4_IRA = IRATest(symbol_pool) G4_IRB0 = gen_irblock(LBL0, [[ExprAff(C, CST1)]]) G4_IRB1 = gen_irblock(LBL1, [[ExprAff(C, C + CST2)], [ExprAff(G4_IRA.IRDst, - ExprCond(C, ExprId(LBL2, 32), - ExprId(LBL1, 32)))]]) + ExprCond(C, ExprLoc(LBL2.loc_key, 32), + ExprLoc(LBL1.loc_key, 32)))]]) G4_IRB2 = gen_irblock(LBL2, [[ExprAff(A, B)]]) @@ -291,13 +294,13 @@ G4_IRA.blocks = dict([(irb.label, irb) for irb in [G4_IRB0, G4_IRB1, G4_IRB2]]) # graph 5 -G5_IRA = IRATest() +G5_IRA = IRATest(symbol_pool) G5_IRB0 = gen_irblock(LBL0, [[ExprAff(B, CST1)]]) G5_IRB1 = gen_irblock(LBL1, [[ExprAff(B, B + CST2)], [ExprAff(G5_IRA.IRDst, - ExprCond(B, ExprId(LBL2, 32), - ExprId(LBL1, 32)))]]) + ExprCond(B, ExprLoc(LBL2.loc_key, 32), + ExprLoc(LBL1.loc_key, 32)))]]) G5_IRB2 = gen_irblock(LBL2, [[ExprAff(A, B)]]) @@ -309,7 +312,7 @@ G5_IRA.blocks = dict([(irb.label, irb) for irb in [G5_IRB0, G5_IRB1, G5_IRB2]]) # graph 6 -G6_IRA = IRATest() +G6_IRA = IRATest(symbol_pool) G6_IRB0 = gen_irblock(LBL0, [[ExprAff(B, CST1)]]) G6_IRB1 = gen_irblock(LBL1, [[ExprAff(A, B)]]) @@ -321,7 +324,7 @@ G6_IRA.blocks = dict([(irb.label, irb) for irb in [G6_IRB0, G6_IRB1]]) # graph 7 -G7_IRA = IRATest() +G7_IRA = IRATest(symbol_pool) G7_IRB0 = gen_irblock(LBL0, [[ExprAff(C, CST1)]]) G7_IRB1 = gen_irblock(LBL1, [[ExprAff(B, C)], [ExprAff(A, B)]]) @@ -335,7 +338,7 @@ G7_IRA.blocks = dict([(irb.label, irb) for irb in [G7_IRB0, G7_IRB1, G7_IRB2]]) # graph 8 -G8_IRA = IRATest() +G8_IRA = IRATest(symbol_pool) G8_IRB0 = gen_irblock(LBL0, [[ExprAff(C, CST1)]]) G8_IRB1 = gen_irblock(LBL1, [[ExprAff(B, C)], [ExprAff(C, D)]]) @@ -351,7 +354,7 @@ G8_IRA.blocks = dict([(irb.label, irb) for irb in [G8_IRB0, G8_IRB1, G8_IRB2]]) # graph 10 -G10_IRA = IRATest() +G10_IRA = IRATest(symbol_pool) G10_IRB1 = gen_irblock(LBL1, [[ExprAff(B, B + CST2)]]) G10_IRB2 = gen_irblock(LBL2, [[ExprAff(A, B)]]) @@ -363,7 +366,7 @@ G10_IRA.blocks = dict([(irb.label, irb) for irb in [G10_IRB1, G10_IRB2]]) # graph 11 -G11_IRA = IRATest() +G11_IRA = IRATest(symbol_pool) G11_IRB0 = gen_irblock(LBL0, [[ExprAff(A, CST1), ExprAff(B, CST2)]]) @@ -379,7 +382,7 @@ G11_IRA.blocks = dict([(irb.label, irb) # graph 12 -G12_IRA = IRATest() +G12_IRA = IRATest(symbol_pool) G12_IRB0 = gen_irblock(LBL0, [[ExprAff(B, CST1)]]) G12_IRB1 = gen_irblock(LBL1, [[ExprAff(A, B)], [ExprAff(B, B + CST2)]]) @@ -395,21 +398,21 @@ G12_IRA.blocks = dict([(irb.label, irb) for irb in [G12_IRB0, G12_IRB1, # graph 13 -G13_IRA = IRATest() +G13_IRA = IRATest(symbol_pool) G13_IRB0 = gen_irblock(LBL0, [[ExprAff(A, CST1)], #[ExprAff(B, A)], [ExprAff(G13_IRA.IRDst, - ExprId(LBL1, 32))]]) + ExprLoc(LBL1.loc_key, 32))]]) G13_IRB1 = gen_irblock(LBL1, [[ExprAff(C, A)], #[ExprAff(A, A + CST1)], [ExprAff(G13_IRA.IRDst, - ExprCond(R, ExprId(LBL2, 32), - ExprId(LBL1, 32)))]]) + ExprCond(R, ExprLoc(LBL2.loc_key, 32), + ExprLoc(LBL1.loc_key, 32)))]]) G13_IRB2 = gen_irblock(LBL2, [[ExprAff(B, A + CST3)], [ExprAff(A, B + CST3)], [ExprAff(G13_IRA.IRDst, - ExprId(LBL1, 32))]]) + ExprLoc(LBL1.loc_key, 32))]]) G13_IRB3 = gen_irblock(LBL3, [[ExprAff(R, C)]]) @@ -423,22 +426,22 @@ G13_IRA.blocks = dict([(irb.label, irb) for irb in [G13_IRB0, G13_IRB1, # graph 14 -G14_IRA = IRATest() +G14_IRA = IRATest(symbol_pool) G14_IRB0 = gen_irblock(LBL0, [[ExprAff(A, CST1)], [ExprAff(G14_IRA.IRDst, - ExprId(LBL1, 32))] + ExprLoc(LBL1.loc_key, 32))] ]) G14_IRB1 = gen_irblock(LBL1, [[ExprAff(B, A)], [ExprAff(G14_IRA.IRDst, - ExprCond(C, ExprId(LBL2, 32), - ExprId(LBL3, 32)))] + ExprCond(C, ExprLoc(LBL2.loc_key, 32), + ExprLoc(LBL3.loc_key, 32)))] ]) G14_IRB2 = gen_irblock(LBL2, [[ExprAff(D, A)], [ExprAff(A, D + CST1)], [ExprAff(G14_IRA.IRDst, - ExprId(LBL1, 32))] + ExprLoc(LBL1.loc_key, 32))] ]) G14_IRB3 = gen_irblock(LBL3, [[ExprAff(R, D + B)]]) @@ -453,7 +456,7 @@ G14_IRA.blocks = dict([(irb.label, irb) for irb in [G14_IRB0, G14_IRB1, # graph 16 -G15_IRA = IRATest() +G15_IRA = IRATest(symbol_pool) G15_IRB0 = gen_irblock(LBL0, [[ExprAff(A, CST1)]]) G15_IRB1 = gen_irblock(LBL1, [[ExprAff(D, A + B)], @@ -470,7 +473,7 @@ G15_IRA.blocks = dict([(irb.label, irb) for irb in [G15_IRB0, G15_IRB1, # graph 16 -G16_IRA = IRATest() +G16_IRA = IRATest(symbol_pool) G16_IRB0 = gen_irblock(LBL0, [[ExprAff(A, CST1)]]) G16_IRB1 = gen_irblock(LBL1, [[ExprAff(R, D)]]) @@ -494,7 +497,7 @@ G16_IRA.blocks = dict([(irb.label, irb) for irb in [G16_IRB0, G16_IRB1, # graph 17 -G17_IRA = IRATest() +G17_IRA = IRATest(symbol_pool) G17_IRB0 = gen_irblock(LBL0, [[ExprAff(A, CST1), ExprAff(D, CST2)]]) @@ -638,7 +641,8 @@ def flatNode(node): element = int(node.element.arg) else: RuntimeError("Unsupported type '%s'" % type(enode.element)) - return (node.label.name, + label = symbol_pool.loc_key_to_label(node.label) + return (label.name, element, node.line_nb) else: @@ -736,7 +740,8 @@ def match_results(resultsA, resultsB, nodes): def get_flat_init_depnodes(depnodes): out = [] for node in depnodes: - out.append((node.label.name, + label = symbol_pool.loc_key_to_label(node.label) + out.append((label.name, node.element.name, node.line_nb, 0)) diff --git a/test/arch/arm/sem.py b/test/arch/arm/sem.py index d9e6aa76..05d26f5c 100755 --- a/test/arch/arm/sem.py +++ b/test/arch/arm/sem.py @@ -30,8 +30,8 @@ def compute(asm, inputstate={}, debug=False): code = mn.asm(instr)[0] instr = mn.dis(code, "l") instr.offset = inputstate.get(PC, 0) - interm.add_instr(instr) - symexec.run_at(instr.offset) + lbl = interm.add_instr(instr) + symexec.run_at(lbl) if debug: for k, v in symexec.symbols.items(): if regs_init.get(k, None) != v: diff --git a/test/arch/msp430/sem.py b/test/arch/msp430/sem.py index 3b2c2f2e..a7a9e4be 100755 --- a/test/arch/msp430/sem.py +++ b/test/arch/msp430/sem.py @@ -26,8 +26,8 @@ def compute(asm, inputstate={}, debug=False): code = mn.asm(instr)[0] instr = mn.dis(code, mode) instr.offset = inputstate.get(PC, 0) - interm.add_instr(instr) - symexec.run_at(instr.offset) + lbl = interm.add_instr(instr) + symexec.run_at(lbl, step=True) if debug: for k, v in symexec.symbols.items(): if regs_init.get(k, None) != v: diff --git a/test/arch/x86/sem.py b/test/arch/x86/sem.py index b3b7e940..78cbc243 100755 --- a/test/arch/x86/sem.py +++ b/test/arch/x86/sem.py @@ -25,11 +25,11 @@ symbol_pool = AsmSymbolPool() m32 = 32 m64 = 64 -def symb_exec(interm, inputstate, debug): +def symb_exec(lbl, interm, inputstate, debug): sympool = dict(regs_init) sympool.update(inputstate) symexec = SymbolicExecutionEngine(interm, sympool) - symexec.run_at(0) + symexec.run_at(lbl) if debug: for k, v in symexec.symbols.items(): if regs_init.get(k, None) != v: @@ -43,8 +43,8 @@ def compute(ir, mode, asm, inputstate={}, debug=False): instr = mn.dis(code, mode) instr.offset = inputstate.get(EIP, 0) interm = ir() - interm.add_instr(instr) - return symb_exec(interm, inputstate, debug) + lbl = interm.add_instr(instr) + return symb_exec(lbl, interm, inputstate, debug) def compute_txt(ir, mode, txt, inputstate={}, debug=False): @@ -52,9 +52,10 @@ def compute_txt(ir, mode, txt, inputstate={}, debug=False): symbol_pool.set_offset(symbol_pool.getby_name("main"), 0x0) patches = asmblock.asm_resolve_final(mn, blocks, symbol_pool) interm = ir(symbol_pool) + lbl = symbol_pool.getby_name("main") for bbl in blocks: interm.add_block(bbl) - return symb_exec(interm, inputstate, debug) + return symb_exec(lbl, interm, inputstate, debug) op_add = lambda a, b: a+b op_sub = lambda a, b: a-b diff --git a/test/core/asmblock.py b/test/core/asmblock.py index 7f0dbc5f..5d240c56 100644 --- a/test/core/asmblock.py +++ b/test/core/asmblock.py @@ -104,7 +104,8 @@ open("graph2.dot", "w").write(blocks.dot()) # Test helper methods ## Label2block should always be updated assert blocks.label2block(first_block.label) == first_block -my_block = AsmBlock(AsmLabel("testlabel")) +testlabel = mdis.symbol_pool.getby_name_create("testlabel") +my_block = AsmBlock(testlabel) blocks.add_node(my_block) assert len(blocks) == 3 assert blocks.label2block(first_block.label) == first_block @@ -114,7 +115,8 @@ assert blocks.label2block(my_block.label) == my_block assert len(list(blocks.get_bad_blocks())) == 0 assert len(list(blocks.get_bad_blocks_predecessors())) == 0 ### Add a bad block, not linked -my_bad_block = AsmBlockBad(AsmLabel("testlabel_bad")) +testlabel_bad = mdis.symbol_pool.getby_name_create("testlabel_bad") +my_bad_block = AsmBlockBad(testlabel_bad) blocks.add_node(my_bad_block) assert list(blocks.get_bad_blocks()) == [my_bad_block] assert len(list(blocks.get_bad_blocks_predecessors())) == 0 @@ -132,7 +134,8 @@ assert len(list(blocks.get_bad_blocks_predecessors(strict=True))) == 0 ## Sanity check blocks.sanity_check() ### Next on itself -my_block_ni = AsmBlock(AsmLabel("testlabel_nextitself")) +testlabel_nextitself = mdis.symbol_pool.getby_name_create("testlabel_nextitself") +my_block_ni = AsmBlock(testlabel_nextitself) my_block_ni.bto.add(AsmConstraintNext(my_block_ni.label)) blocks.add_node(my_block_ni) error_raised = False @@ -145,10 +148,13 @@ assert error_raised blocks.del_node(my_block_ni) blocks.sanity_check() ### Multiple next on the same node -my_block_target = AsmBlock(AsmLabel("testlabel_target")) +testlabel_target = mdis.symbol_pool.getby_name_create("testlabel_target") +my_block_target = AsmBlock(testlabel_target) blocks.add_node(my_block_target) -my_block_src1 = AsmBlock(AsmLabel("testlabel_src1")) -my_block_src2 = AsmBlock(AsmLabel("testlabel_src2")) +testlabel_src1 = mdis.symbol_pool.getby_name_create("testlabel_src1") +testlabel_src2 = mdis.symbol_pool.getby_name_create("testlabel_src2") +my_block_src1 = AsmBlock(testlabel_src1) +my_block_src2 = AsmBlock(testlabel_src2) my_block_src1.bto.add(AsmConstraintNext(my_block_target.label)) blocks.add_node(my_block_src1) ### OK for now @@ -177,8 +183,10 @@ assert blocks.label2block(my_block_src1.label).max_size == 0 ## Check pendings ### Create a pending element -my_block_src = AsmBlock(AsmLabel("testlabel_pend_src")) -my_block_dst = AsmBlock(AsmLabel("testlabel_pend_dst")) +testlabel_pend_src = mdis.symbol_pool.getby_name_create("testlabel_pend_src") +testlabel_pend_dst = mdis.symbol_pool.getby_name_create("testlabel_pend_dst") +my_block_src = AsmBlock(testlabel_pend_src) +my_block_dst = AsmBlock(testlabel_pend_dst) my_block_src.bto.add(AsmConstraintTo(my_block_dst.label)) blocks.add_node(my_block_src) ### Check resulting state @@ -238,7 +246,7 @@ assert len(entry_block.lines) == 4 assert map(str, entry_block.lines) == ['XOR EAX, EAX', 'XOR EBX, EBX', 'XOR ECX, ECX', - 'JNZ loc_0000000000000014:0x00000014'] + 'JNZ label_3'] assert len(blocks.successors(entry_block)) == 2 assert len(entry_block.bto) == 2 nextb = blocks.label2block((cons.label for cons in entry_block.bto @@ -249,11 +257,11 @@ assert len(nextb.lines) == 4 assert map(str, nextb.lines) == ['XOR EDX, EDX', 'XOR ESI, ESI', 'XOR EDI, EDI', - 'JMP loc_0000000000000008:0x00000008'] + 'JMP label_4'] assert blocks.successors(nextb) == [nextb] assert len(tob.lines) == 2 assert map(str, tob.lines) == ['XOR EBP, EBP', - 'JMP loc_0000000000000014:0x00000014'] + 'JMP label_3'] assert blocks.successors(tob) == [tob] # Check split_block @@ -278,7 +286,7 @@ assert len(blocks.successors(entry_block)) == 1 newb = blocks.successors(entry_block)[0] assert len(newb.lines) == 2 assert map(str, newb.lines) == ['XOR ECX, ECX', - 'JNZ loc_0000000000000014:0x00000014'] + 'JNZ label_3'] preds = blocks.predecessors(newb) assert len(preds) == 2 assert entry_block in preds diff --git a/test/core/sembuilder.py b/test/core/sembuilder.py index ebf9f385..f3894927 100644 --- a/test/core/sembuilder.py +++ b/test/core/sembuilder.py @@ -11,13 +11,13 @@ class IR(object): IRDst = m2_expr.ExprId("IRDst", 32) def get_next_instr(self, _): - return AsmLabel("NEXT") + return AsmLabel(m2_expr.LocKey(0), "NEXT") def get_next_label(self, _): - return AsmLabel("NEXT") + return AsmLabel(m2_expr.LocKey(0), "NEXT") def gen_label(self): - return AsmLabel("GEN") + return AsmLabel(m2_expr.LocKey(1), "GEN") class Instr(object): mode = 32 diff --git a/test/expression/parser.py b/test/expression/parser.py index 9c01c8a1..1d5889fb 100644 --- a/test/expression/parser.py +++ b/test/expression/parser.py @@ -1,9 +1,10 @@ from miasm2.expression.parser import str_to_expr from miasm2.expression.expression import ExprInt, ExprId, ExprSlice, ExprMem, \ - ExprCond, ExprCompose, ExprOp, ExprAff + ExprCond, ExprCompose, ExprOp, ExprAff, ExprLoc, LocKey for expr_test in [ExprInt(0x12, 32), ExprId('test', 32), + ExprLoc(LocKey(12), 32), ExprSlice(ExprInt(0x10, 32), 0, 8), ExprMem(ExprInt(0x10, 32), 32), ExprCond(ExprInt(0x10, 32), ExprInt(0x11, 32), ExprInt(0x12, 32)), diff --git a/test/ir/translators/z3_ir.py b/test/ir/translators/z3_ir.py index 6ae2dcd0..643c59e4 100644 --- a/test/ir/translators/z3_ir.py +++ b/test/ir/translators/z3_ir.py @@ -1,12 +1,16 @@ import z3 -from miasm2.core.asmblock import AsmLabel +from miasm2.core.asmblock import AsmLabel, AsmSymbolPool from miasm2.expression.expression import * -from miasm2.ir.translators.translator import Translator -from miasm2.ir.translators.z3_ir import Z3Mem +from miasm2.ir.translators.z3_ir import Z3Mem, TranslatorZ3 # Some examples of use/unit tests. +symbol_pool = AsmSymbolPool() +translator1 = TranslatorZ3(endianness="<", symbol_pool=symbol_pool) +translator2 = TranslatorZ3(endianness=">", symbol_pool=symbol_pool) + + def equiv(z3_expr1, z3_expr2): s = z3.Solver() s.add(z3.Not(z3_expr1 == z3_expr2)) @@ -34,17 +38,17 @@ assert equiv(z3.BitVec('a', 32) + z3.BitVecVal(3, 32) - z3.BitVecVal(1, 32), # Z3Mem short tests # -------------------------------------------------------------------------- -mem = Z3Mem(endianness='<') # little endian +mem = Z3Mem(endianness='<') # little endian eax = z3.BitVec('EAX', 32) assert equiv( # @32[EAX] mem.get(eax, 32), # @16[EAX+2] . @16[EAX] - z3.Concat(mem.get(eax+2, 16), + z3.Concat(mem.get(eax+2, 16), mem.get(eax, 16))) # -------------------------------------------------------------------------- -ax = z3.BitVec('AX', 16) +ax = z3.BitVec('AX', 16) assert not equiv( # @16[EAX] with EAX = ZeroExtend(AX) mem.get(z3.ZeroExt(16, ax), 16), @@ -54,7 +58,7 @@ assert not equiv( # TranslatorZ3 tests # -------------------------------------------------------------------------- e = ExprId('x', 32) -ez3 = Translator.to_language('z3').from_expr(e) +ez3 = translator1.from_expr(e) z3_e = z3.BitVec('x', 32) assert equiv(ez3, z3_e) @@ -63,7 +67,7 @@ assert equiv(ez3, z3_e) four = ExprInt(4, 32) five = ExprInt(5, 32) e2 = (e + five + four) * five -ez3 = Translator.to_language('z3').from_expr(e2) +ez3 = translator1.from_expr(e2) z3_four = z3.BitVecVal(4, 32) z3_five = z3.BitVecVal(5, 32) @@ -74,7 +78,7 @@ assert equiv(ez3, z3_e2) emem = ExprMem(ExprInt(0xdeadbeef, 32), size=32) emem2 = ExprMem(ExprInt(0xfee1dead, 32), size=32) e3 = (emem + e) * ExprInt(2, 32) * emem2 -ez3 = Translator.to_language('z3').from_expr(e3) +ez3 = translator1.from_expr(e3) mem = Z3Mem() z3_emem = mem.get(z3.BitVecVal(0xdeadbeef, 32), 32) @@ -84,7 +88,7 @@ assert equiv(ez3, z3_e3) # -------------------------------------------------------------------------- e4 = emem * five -ez3 = Translator.to_language('z3').from_expr(e4) +ez3 = translator1.from_expr(e4) z3_e4 = z3_emem * z3_five assert equiv(ez3, z3_e4) @@ -98,7 +102,7 @@ check_interp(model[mem.get_mem_array(32)], [(0xdeadbeef, 2), (0xdeadbeef + 3, 0)]) # -------------------------------------------------------------------------- -ez3 = Translator.to_language("z3", endianness=">").from_expr(e4) +ez3 = translator2.from_expr(e4) memb = Z3Mem(endianness=">") z3_emem = memb.get(z3.BitVecVal(0xdeadbeef, 32), 32) @@ -115,7 +119,7 @@ check_interp(model[memb.get_mem_array(32)], # -------------------------------------------------------------------------- e5 = ExprSlice(ExprCompose(e, four), 0, 32) * five -ez3 = Translator.to_language('z3').from_expr(e5) +ez3 = translator1.from_expr(e5) z3_e5 = z3.Extract(31, 0, z3.Concat(z3_four, z3_e)) * z3_five assert equiv(ez3, z3_e5) @@ -126,7 +130,7 @@ seven = ExprInt(7, 32) one0seven = ExprInt(0x107, 32) for miasm_int, res in [(five, 1), (four, 0), (seven, 0), (one0seven, 0)]: e6 = ExprOp('parity', miasm_int) - ez3 = Translator.to_language('z3').from_expr(e6) + ez3 = translator1.from_expr(e6) z3_e6 = z3.BitVecVal(res, 1) assert equiv(ez3, z3_e6) @@ -134,37 +138,40 @@ for miasm_int, res in [(five, 1), (four, 0), (seven, 0), (one0seven, 0)]: # '-' for miasm_int, res in [(five, -5), (four, -4)]: e6 = ExprOp('-', miasm_int) - ez3 = Translator.to_language('z3').from_expr(e6) + ez3 = translator1.from_expr(e6) z3_e6 = z3.BitVecVal(res, 32) assert equiv(ez3, z3_e6) # -------------------------------------------------------------------------- -e7 = ExprId(AsmLabel("label_histoire", 0xdeadbeef), 32) -ez3 = Translator.to_language('z3').from_expr(e7) +label_histoire = symbol_pool.add_label("label_histoire", 0xdeadbeef) +e7 = ExprLoc(label_histoire.loc_key, 32) +ez3 = translator1.from_expr(e7) z3_e7 = z3.BitVecVal(0xdeadbeef, 32) assert equiv(ez3, z3_e7) # Should just not throw anything to pass -e8 = ExprId(AsmLabel("label_jambe"), 32) -ez3 = Translator.to_language('z3').from_expr(e8) +lbl_e8 = symbol_pool.add_label("label_jambe") + +e8 = ExprLoc(lbl_e8.loc_key, 32) +ez3 = translator1.from_expr(e8) assert not equiv(ez3, z3_e7) # -------------------------------------------------------------------------- # cntleadzeros, cnttrailzeros # cnttrailzeros(0x1138) == 3 -cnttrailzeros1 = Translator.to_language('z3').from_expr(ExprOp("cnttrailzeros", ExprInt(0x1138, 32))) +cnttrailzeros1 = translator1.from_expr(ExprOp("cnttrailzeros", ExprInt(0x1138, 32))) cnttrailzeros2 = z3.BitVecVal(3, 32) assert(equiv(cnttrailzeros1, cnttrailzeros2)) # cntleadzeros(0x11300) == 0xf -cntleadzeros1 = Translator.to_language('z3').from_expr(ExprOp("cntleadzeros", ExprInt(0x11300, 32))) +cntleadzeros1 = translator1.from_expr(ExprOp("cntleadzeros", ExprInt(0x11300, 32))) cntleadzeros2 = z3.BitVecVal(0xf, 32) assert(equiv(cntleadzeros1, cntleadzeros2)) # cnttrailzeros(0x8000) + 1 == cntleadzeros(0x8000) -cnttrailzeros3 = Translator.to_language('z3').from_expr(ExprOp("cnttrailzeros", ExprInt(0x8000, 32)) + ExprInt(1, 32)) -cntleadzeros3 = Translator.to_language('z3').from_expr(ExprOp("cntleadzeros", ExprInt(0x8000, 32))) +cnttrailzeros3 = translator1.from_expr(ExprOp("cnttrailzeros", ExprInt(0x8000, 32)) + ExprInt(1, 32)) +cntleadzeros3 = translator1.from_expr(ExprOp("cntleadzeros", ExprInt(0x8000, 32))) assert(equiv(cnttrailzeros3, cntleadzeros3)) print "TranslatorZ3 tests are OK." |