diff options
Diffstat (limited to 'miasm2')
49 files changed, 2021 insertions, 1451 deletions
diff --git a/miasm2/analysis/binary.py b/miasm2/analysis/binary.py index 6073e126..f5a727d7 100644 --- a/miasm2/analysis/binary.py +++ b/miasm2/analysis/binary.py @@ -203,7 +203,7 @@ class ContainerELF(Container): if offset == 0: continue try: - self._symbol_pool.add_label(name, offset) + self._symbol_pool.add_location(name, offset) except ValueError: # Two symbols points on the same offset log.warning("Same offset (%s) for %s and %s", diff --git a/miasm2/analysis/cst_propag.py b/miasm2/analysis/cst_propag.py index 18829627..4b5d7834 100644 --- a/miasm2/analysis/cst_propag.py +++ b/miasm2/analysis/cst_propag.py @@ -31,7 +31,7 @@ def add_state(ir_arch, todo, states, addr, state): @addr: address of the concidered block @state: computed state """ - addr = ir_arch.get_label(addr) + addr = ir_arch.get_loc_key(addr) todo.add(addr) if addr not in states: states[addr] = state @@ -108,11 +108,11 @@ class SymbExecStateFix(SymbolicExecutionEngine): for arg in assignblk.instr.args: new_arg = self.propag_expr_cst(arg) links[new_arg] = arg - self.cst_propag_link[(irb.label, index)] = links + self.cst_propag_link[(irb.loc_key, index)] = links self.eval_updt_assignblk(assignblk) assignblks.append(AssignBlock(new_assignblk, assignblk.instr)) - self.ir_arch.blocks[irb.label] = IRBlock(irb.label, assignblks) + self.ir_arch.blocks[irb.loc_key] = IRBlock(irb.loc_key, assignblks) def compute_cst_propagation_states(ir_arch, init_addr, init_infos): @@ -128,7 +128,7 @@ def compute_cst_propagation_states(ir_arch, init_addr, init_infos): done = set() state = SymbExecState.StateEngine(init_infos) - lbl = ir_arch.get_label(init_addr) + lbl = ir_arch.get_loc_key(init_addr) todo = set([lbl]) states = {lbl: state} @@ -153,7 +153,7 @@ def compute_cst_propagation_states(ir_arch, init_addr, init_infos): LOG_CST_PROPAG.warning('Bad destination: %s', value) continue elif value.is_int(): - value = ir_arch.get_label(value) + value = ir_arch.get_loc_key(value) add_state(ir_arch, todo, states, value, symbexec_engine.get_state()) diff --git a/miasm2/analysis/data_analysis.py b/miasm2/analysis/data_analysis.py index bceb0bd8..5e88665e 100644 --- a/miasm2/analysis/data_analysis.py +++ b/miasm2/analysis/data_analysis.py @@ -27,7 +27,7 @@ def intra_block_flow_raw(ir_arch, flow_graph, irb, in_nodes, out_nodes): continue for n in all_mems: - node_n_w = get_node_name(irb.label, i, n) + node_n_w = get_node_name(irb.loc_key, i, n) if not n in nodes_r: continue o_r = n.arg.get_r(mem_read=False, cst_read=True) @@ -35,7 +35,7 @@ def intra_block_flow_raw(ir_arch, flow_graph, irb, in_nodes, out_nodes): if n_r in current_nodes: node_n_r = current_nodes[n_r] else: - node_n_r = get_node_name(irb.label, i, n_r) + node_n_r = get_node_name(irb.loc_key, i, n_r) current_nodes[n_r] = node_n_r in_nodes[n_r] = node_n_r flow_graph.add_uniq_edge(node_n_r, node_n_w) @@ -46,13 +46,13 @@ def intra_block_flow_raw(ir_arch, flow_graph, irb, in_nodes, out_nodes): if n_r in current_nodes: node_n_r = current_nodes[n_r] else: - node_n_r = get_node_name(irb.label, i, n_r) + node_n_r = get_node_name(irb.loc_key, i, n_r) current_nodes[n_r] = node_n_r in_nodes[n_r] = node_n_r flow_graph.add_node(node_n_r) - node_n_w = get_node_name(irb.label, i + 1, node_w) + node_n_w = get_node_name(irb.loc_key, i + 1, node_w) out_nodes[node_w] = node_n_w flow_graph.add_node(node_n_w) @@ -81,13 +81,13 @@ def intra_block_flow_symbexec(ir_arch, flow_graph, irb, in_nodes, out_nodes): continue read_values = v.get_r(cst_read=True) # print n_w, v, [str(x) for x in read_values] - node_n_w = get_node_name(irb.label, len(irb), n_w) + node_n_w = get_node_name(irb.loc_key, len(irb), n_w) for n_r in read_values: if n_r in current_nodes: node_n_r = current_nodes[n_r] else: - node_n_r = get_node_name(irb.label, 0, n_r) + node_n_r = get_node_name(irb.loc_key, 0, n_r) current_nodes[n_r] = node_n_r in_nodes[n_r] = node_n_r @@ -109,7 +109,7 @@ def inter_block_flow_link(ir_arch, flow_graph, irb_in_nodes, irb_out_nodes, todo irb = ir_arch.blocks[lbl] # pp(('IN', lbl, [(str(x[0]), str(x[1])) for x in current_nodes.items()])) to_del = set() - for n_r, node_n_r in irb_in_nodes[irb.label].items(): + for n_r, node_n_r in irb_in_nodes[irb.loc_key].items(): if not n_r in current_nodes: continue # print 'add link', current_nodes[n_r], node_n_r @@ -119,7 +119,7 @@ def inter_block_flow_link(ir_arch, flow_graph, irb_in_nodes, irb_out_nodes, todo # if link exec to data, all nodes depends on exec nodes if link_exec_to_data: for n_x_r in exec_nodes: - for n_r, node_n_r in irb_in_nodes[irb.label].items(): + for n_r, node_n_r in irb_in_nodes[irb.loc_key].items(): if not n_x_r in current_nodes: continue if isinstance(n_r, ExprInt): @@ -127,14 +127,14 @@ def inter_block_flow_link(ir_arch, flow_graph, irb_in_nodes, irb_out_nodes, todo flow_graph.add_uniq_edge(current_nodes[n_x_r], node_n_r) # update current nodes using bloc out_nodes - for n_w, node_n_w in irb_out_nodes[irb.label].items(): + for n_w, node_n_w in irb_out_nodes[irb.loc_key].items(): current_nodes[n_w] = node_n_w # get nodes involved in exec flow x_nodes = tuple(sorted(list(irb.dst.get_r()))) todo = set() - for lbl_dst in ir_arch.graph.successors(irb.label): + for lbl_dst in ir_arch.graph.successors(irb.loc_key): todo.add((lbl_dst, tuple(current_nodes.items()), x_nodes)) # pp(('OUT', lbl, [(str(x[0]), str(x[1])) for x in current_nodes.items()])) @@ -150,13 +150,13 @@ def create_implicit_flow(ir_arch, flow_graph, irb_in_nodes, irb_out_ndes): while todo: lbl = todo.pop() irb = ir_arch.blocks[lbl] - for lbl_son in ir_arch.graph.successors(irb.label): + for lbl_son in ir_arch.graph.successors(irb.loc_key): if not lbl_son in ir_arch.blocks: print "cannot find bloc!!", lbl continue irb_son = ir_arch.blocks[lbl_son] - for n_r in irb_in_nodes[irb_son.label]: - if n_r in irb_out_nodes[irb.label]: + for n_r in irb_in_nodes[irb_son.loc_key]: + if n_r in irb_out_nodes[irb.loc_key]: continue if not isinstance(n_r, ExprId): continue @@ -167,13 +167,13 @@ def create_implicit_flow(ir_arch, flow_graph, irb_in_nodes, irb_out_ndes): # print "###", irb_son # print "###", 'IN', [str(x) for x in irb_son.in_nodes] - node_n_w = irb.label, len(irb), n_r - irb_out_nodes[irb.label][n_r] = node_n_w - if not n_r in irb_in_nodes[irb.label]: - irb_in_nodes[irb.label][n_r] = irb.label, 0, n_r - node_n_r = irb_in_nodes[irb.label][n_r] + node_n_w = irb.loc_key, len(irb), n_r + irb_out_nodes[irb.loc_key][n_r] = node_n_w + if not n_r in irb_in_nodes[irb.loc_key]: + irb_in_nodes[irb.loc_key][n_r] = irb.loc_key, 0, n_r + node_n_r = irb_in_nodes[irb.loc_key][n_r] # print "###", node_n_r - for lbl_p in ir_arch.graph.predecessors(irb.label): + for lbl_p in ir_arch.graph.predecessors(irb.loc_key): todo.add(lbl_p) flow_graph.add_uniq_edge(node_n_r, node_n_w) diff --git a/miasm2/analysis/data_flow.py b/miasm2/analysis/data_flow.py index d9f61c56..e780f70c 100644 --- a/miasm2/analysis/data_flow.py +++ b/miasm2/analysis/data_flow.py @@ -57,15 +57,15 @@ class ReachingDefinitions(dict): the assignblk in block @block. """ predecessor_state = {} - for pred_lbl in self.ir_a.graph.predecessors(block.label): + for pred_lbl in self.ir_a.graph.predecessors(block.loc_key): pred = self.ir_a.blocks[pred_lbl] for lval, definitions in self.get_definitions(pred_lbl, len(pred)).iteritems(): predecessor_state.setdefault(lval, set()).update(definitions) - modified = self.get((block.label, 0)) != predecessor_state + modified = self.get((block.loc_key, 0)) != predecessor_state if not modified: return False - self[(block.label, 0)] = predecessor_state + self[(block.loc_key, 0)] = predecessor_state for index in xrange(len(block)): modified |= self.process_assignblock(block, index) @@ -80,13 +80,13 @@ class ReachingDefinitions(dict): """ assignblk = block[assignblk_index] - defs = self.get_definitions(block.label, assignblk_index).copy() + defs = self.get_definitions(block.loc_key, assignblk_index).copy() for lval in assignblk: - defs.update({lval: set([(block.label, assignblk_index)])}) + defs.update({lval: set([(block.loc_key, assignblk_index)])}) - modified = self.get((block.label, assignblk_index + 1)) != defs + modified = self.get((block.loc_key, assignblk_index + 1)) != defs if modified: - self[(block.label, assignblk_index + 1)] = defs + self[(block.loc_key, assignblk_index + 1)] = defs return modified @@ -149,9 +149,9 @@ class DiGraphDefUse(DiGraph): def _compute_def_use_block(self, block, reaching_defs, deref_mem=False): for index, assignblk in enumerate(block): - assignblk_reaching_defs = reaching_defs.get_definitions(block.label, index) + assignblk_reaching_defs = reaching_defs.get_definitions(block.loc_key, index) for lval, expr in assignblk.iteritems(): - self.add_node(AssignblkNode(block.label, index, lval)) + self.add_node(AssignblkNode(block.loc_key, index, lval)) read_vars = expr.get_r(mem_read=deref_mem) if deref_mem and lval.is_mem(): @@ -159,7 +159,7 @@ class DiGraphDefUse(DiGraph): for read_var in read_vars: for reach in assignblk_reaching_defs.get(read_var, set()): self.add_data_edge(AssignblkNode(reach[0], reach[1], read_var), - AssignblkNode(block.label, index, lval)) + AssignblkNode(block.loc_key, index, lval)) def del_edge(self, src, dst): super(DiGraphDefUse, self).del_edge(src, dst) @@ -257,9 +257,9 @@ def dead_simp(ir_a): for idx, assignblk in enumerate(block): new_assignblk = dict(assignblk) for lval in assignblk: - if AssignblkNode(block.label, idx, lval) not in useful: + if AssignblkNode(block.loc_key, idx, lval) not in useful: del new_assignblk[lval] modified = True irs.append(AssignBlock(new_assignblk, assignblk.instr)) - ir_a.blocks[block.label] = IRBlock(block.label, irs) + ir_a.blocks[block.loc_key] = IRBlock(block.loc_key, irs) return modified diff --git a/miasm2/analysis/depgraph.py b/miasm2/analysis/depgraph.py index f7949c88..0f4d168d 100644 --- a/miasm2/analysis/depgraph.py +++ b/miasm2/analysis/depgraph.py @@ -1,8 +1,8 @@ """Provide dependency graph""" -import miasm2.expression.expression as m2_expr +from miasm2.expression.expression import ExprInt, ExprLoc, ExprAff from miasm2.core.graph import DiGraph -from miasm2.core.asmblock import AsmLabel, expr_is_int_or_label, expr_is_label +from miasm2.core.asmblock import AsmSymbolPool from miasm2.expression.simplifications import expr_simp from miasm2.ir.symbexec import SymbolicExecutionEngine from miasm2.ir.ir import IRBlock, AssignBlock @@ -20,23 +20,23 @@ class DependencyNode(object): """Node elements of a DependencyGraph A dependency node stands for the dependency on the @element at line number - @line_nb in the IRblock named @label, *before* the evaluation of this + @line_nb in the IRblock named @loc_key, *before* the evaluation of this line. """ - __slots__ = ["_label", "_element", "_line_nb", "_hash"] + __slots__ = ["_loc_key", "_element", "_line_nb", "_hash"] - def __init__(self, label, element, line_nb): + def __init__(self, loc_key, element, line_nb): """Create a dependency node with: - @label: AsmLabel instance + @loc_key: LocKey instance @element: Expr instance @line_nb: int """ - self._label = label + self._loc_key = loc_key self._element = element self._line_nb = line_nb self._hash = hash( - (self._label, self._element, self._line_nb)) + (self._loc_key, self._element, self._line_nb)) def __hash__(self): """Returns a hash of @self to uniquely identify @self""" @@ -46,7 +46,7 @@ class DependencyNode(object): """Returns True if @self and @depnode are equals.""" if not isinstance(depnode, self.__class__): return False - return (self.label == depnode.label and + return (self.loc_key == depnode.loc_key and self.element == depnode.element and self.line_nb == depnode.line_nb) @@ -55,13 +55,13 @@ class DependencyNode(object): if not isinstance(node, self.__class__): return cmp(self.__class__, node.__class__) - return cmp((self.label, self.element, self.line_nb), - (node.label, node.element, node.line_nb)) + return cmp((self.loc_key, self.element, self.line_nb), + (node.loc_key, node.element, node.line_nb)) def __str__(self): """Returns a string representation of DependencyNode""" return "<%s %s %s %s>" % (self.__class__.__name__, - self.label.name, self.element, + self.loc_key, self.element, self.line_nb) def __repr__(self): @@ -69,9 +69,9 @@ class DependencyNode(object): return self.__str__() @property - def label(self): + def loc_key(self): "Name of the current IRBlock" - return self._label + return self._loc_key @property def element(self): @@ -90,9 +90,9 @@ class DependencyState(object): Store intermediate depnodes states during dependencygraph analysis """ - def __init__(self, label, pending, line_nb=None): - self.label = label - self.history = [label] + def __init__(self, loc_key, pending, line_nb=None): + self.loc_key = loc_key + self.history = [loc_key] self.pending = {k: set(v) for k, v in pending.iteritems()} self.line_nb = line_nb self.links = set() @@ -101,22 +101,22 @@ class DependencyState(object): self._graph = None def __repr__(self): - return "<State: %r (%r) (%r)>" % (self.label, + return "<State: %r (%r) (%r)>" % (self.loc_key, self.pending, self.links) - def extend(self, label): + def extend(self, loc_key): """Return a copy of itself, with itself in history - @label: AsmLabel instance for the new DependencyState's label + @loc_key: LocKey instance for the new DependencyState's loc_key """ - new_state = self.__class__(label, self.pending) + new_state = self.__class__(loc_key, self.pending) new_state.links = set(self.links) - new_state.history = self.history + [label] + new_state.history = self.history + [loc_key] return new_state def get_done_state(self): """Returns immutable object representing current state""" - return (self.label, frozenset(self.links)) + return (self.loc_key, frozenset(self.links)) def as_graph(self): """Generates a Digraph of dependencies""" @@ -157,7 +157,7 @@ class DependencyState(object): @line_nb: the element's line """ - depnode = DependencyNode(self.label, element, line_nb) + depnode = DependencyNode(self.loc_key, element, line_nb) if not self.pending[element]: # Create start node self.links.add((depnode, None)) @@ -175,14 +175,14 @@ class DependencyState(object): @future_pending: the future dependencies """ - depnode = DependencyNode(self.label, element, line_nb) + depnode = DependencyNode(self.loc_key, element, line_nb) # Update pending, add link to unfollowed nodes for dependency in dependencies: if not dependency.follow: # Add non followed dependencies to the dependency graph parent = DependencyNode( - self.label, dependency.element, line_nb) + self.loc_key, dependency.element, line_nb) self.links.add((parent, depnode)) continue # Create future pending between new dependency and the current @@ -196,7 +196,7 @@ class DependencyResult(DependencyState): def __init__(self, ira, initial_state, state, inputs): self.initial_state = initial_state - self.label = state.label + self.loc_key = state.loc_key self.history = state.history self.pending = state.pending self.line_nb = state.line_nb @@ -225,17 +225,17 @@ class DependencyResult(DependencyState): return output @property - def relevant_labels(self): - """List of labels containing nodes influencing inputs. + def relevant_loc_keys(self): + """List of loc_keys containing nodes influencing inputs. The history order is preserved.""" - # Get used labels - used_labels = set(depnode.label for depnode in self.relevant_nodes) + # Get used loc_keys + used_loc_keys = set(depnode.loc_key for depnode in self.relevant_nodes) # Keep history order output = [] - for label in self.history: - if label in used_labels: - output.append(label) + for loc_key in self.history: + if loc_key in used_loc_keys: + output.append(loc_key) return output @@ -255,7 +255,7 @@ class DependencyResult(DependencyState): assignblks = [] line2elements = {} for depnode in self.relevant_nodes: - if depnode.label != irb.label: + if depnode.loc_key != irb.loc_key: continue line2elements.setdefault(depnode.line_nb, set()).add(depnode.element) @@ -266,11 +266,11 @@ class DependencyResult(DependencyState): assignmnts = {} for element in elements: if element in irb[line_nb]: - # constants, label, ... are not in destination + # constants, loc_key, ... are not in destination assignmnts[element] = irb[line_nb][element] assignblks.append(AssignBlock(assignmnts)) - return IRBlock(irb.label, assignblks) + return IRBlock(irb.loc_key, assignblks) def emul(self, ctx=None, step=False): """Symbolic execution of relevant nodes according to the history @@ -287,19 +287,20 @@ class DependencyResult(DependencyState): assignblks = [] # Build a single affectation block according to history - last_index = len(self.relevant_labels) - for index, label in enumerate(reversed(self.relevant_labels), 1): - if index == last_index and label == self.initial_state.label: + last_index = len(self.relevant_loc_keys) + for index, loc_key in enumerate(reversed(self.relevant_loc_keys), 1): + if index == last_index and loc_key == self.initial_state.loc_key: line_nb = self.initial_state.line_nb else: line_nb = None - assignblks += self.irblock_slice(self._ira.blocks[label], + assignblks += self.irblock_slice(self._ira.blocks[loc_key], line_nb).assignblks # Eval the block - temp_label = AsmLabel("Temp") + symbol_pool = AsmSymbolPool() + temp_loc = symbol_pool.getby_name_create("Temp") symb_exec = SymbolicExecutionEngine(self._ira, ctx_init) - symb_exec.eval_updt_irblock(IRBlock(temp_label, assignblks), step=step) + symb_exec.eval_updt_irblock(IRBlock(temp_loc, assignblks), step=step) # Return only inputs values (others could be wrongs) return {element: symb_exec.symbols[element] @@ -314,30 +315,31 @@ class DependencyResultImplicit(DependencyResult): # Z3 Solver instance _solver = None - unsat_expr = m2_expr.ExprAff(m2_expr.ExprInt(0, 1), - m2_expr.ExprInt(1, 1)) + unsat_expr = ExprAff(ExprInt(0, 1), ExprInt(1, 1)) def _gen_path_constraints(self, translator, expr, expected): """Generate path constraint from @expr. Handle special case with - generated labels + generated loc_keys """ out = [] - expected_is_label = expr_is_label(expected) + expected = self._ira.symbol_pool.canonize_to_exprloc(expected) + expected_is_loc_key = expected.is_loc() for consval in possible_values(expr): - if (expected_is_label and - consval.value != expected): + value = self._ira.symbol_pool.canonize_to_exprloc(consval.value) + if expected_is_loc_key and value != expected: continue - if (not expected_is_label and - expr_is_label(consval.value)): + if not expected_is_loc_key and value.is_loc_key(): continue conds = z3.And(*[translator.from_expr(cond.to_constraint()) for cond in consval.constraints]) - if expected != consval.value: - conds = z3.And(conds, - translator.from_expr( - m2_expr.ExprAff(consval.value, - expected))) + if expected != value: + conds = z3.And( + conds, + translator.from_expr( + ExprAff(value, + expected)) + ) out.append(conds) if out: @@ -360,23 +362,21 @@ class DependencyResultImplicit(DependencyResult): translator = Translator.to_language("z3") size = self._ira.IRDst.size - for hist_nb, label in enumerate(history, 1): - if hist_nb == history_size and label == self.initial_state.label: + for hist_nb, loc_key in enumerate(history, 1): + if hist_nb == history_size and loc_key == self.initial_state.loc_key: line_nb = self.initial_state.line_nb else: line_nb = None - irb = self.irblock_slice(self._ira.blocks[label], line_nb) + irb = self.irblock_slice(self._ira.blocks[loc_key], line_nb) # Emul the block and get back destination dst = symb_exec.eval_updt_irblock(irb, step=step) # Add constraint if hist_nb < history_size: - next_label = history[hist_nb] - expected = symb_exec.eval_expr(m2_expr.ExprId(next_label, - size)) - solver.add( - self._gen_path_constraints(translator, dst, expected)) + next_loc_key = history[hist_nb] + expected = symb_exec.eval_expr(ExprLoc(next_loc_key, size)) + solver.add(self._gen_path_constraints(translator, dst, expected)) # Save the solver self._solver = solver @@ -412,17 +412,17 @@ class FollowExpr(object): return '%s(%r, %r)' % (self.__class__.__name__, self.follow, self.element) @staticmethod - def to_depnodes(follow_exprs, label, line): + def to_depnodes(follow_exprs, loc_key, line): """Build a set of FollowExpr(DependencyNode) from the @follow_exprs set of FollowExpr @follow_exprs: set of FollowExpr - @label: AsmLabel instance + @loc_key: LocKey instance @line: integer """ dependencies = set() for follow_expr in follow_exprs: dependencies.add(FollowExpr(follow_expr.follow, - DependencyNode(label, + DependencyNode(loc_key, follow_expr.element, line))) return dependencies @@ -470,7 +470,7 @@ class DependencyGraph(object): self._cb_follow.append(lambda exprs: self._follow_exprs(exprs, follow_mem, follow_call)) - self._cb_follow.append(self._follow_nolabel) + self._cb_follow.append(self._follow_no_loc_key) @staticmethod def _follow_simp_expr(exprs): @@ -491,11 +491,11 @@ class DependencyGraph(object): @follow: set of nodes to follow @nofollow: set of nodes not to follow """ - if isinstance(expr, m2_expr.ExprId): + if expr.is_id(): follow.add(expr) - elif isinstance(expr, m2_expr.ExprInt): + elif expr.is_int(): nofollow.add(expr) - elif isinstance(expr, m2_expr.ExprMem): + elif expr.is_mem(): follow.add(expr) return expr @@ -508,7 +508,7 @@ class DependencyGraph(object): @follow_mem: force the visit of memory sub expressions @follow_call: force the visit of call sub expressions """ - if not follow_mem and isinstance(expr, m2_expr.ExprMem): + if not follow_mem and expr.is_mem(): nofollow.add(expr) return False if not follow_call and expr.is_function_call(): @@ -530,12 +530,13 @@ class DependencyGraph(object): return follow, nofollow @staticmethod - def _follow_nolabel(exprs): - """Do not follow labels""" + def _follow_no_loc_key(exprs): + """Do not follow loc_keys""" follow = set() for expr in exprs: - if not expr_is_int_or_label(expr): - follow.add(expr) + if expr.is_int() or expr.is_loc(): + continue + follow.add(expr) return follow, set() @@ -580,25 +581,25 @@ class DependencyGraph(object): """Follow dependencies tracked in @state in the current irbloc @state: instance of DependencyState""" - irb = self._ira.blocks[state.label] + irb = self._ira.blocks[state.loc_key] line_nb = len(irb) if state.line_nb is None else state.line_nb for cur_line_nb, assignblk in reversed(list(enumerate(irb[:line_nb]))): self._track_exprs(state, assignblk, cur_line_nb) - def get(self, label, elements, line_nb, heads): + def get(self, loc_key, elements, line_nb, heads): """Compute the dependencies of @elements at line number @line_nb in - the block named @label in the current IRA, before the execution of + the block named @loc_key in the current IRA, before the execution of this line. Dependency check stop if one of @heads is reached - @label: AsmLabel instance + @loc_key: LocKey instance @element: set of Expr instances @line_nb: int - @heads: set of AsmLabel instances + @heads: set of LocKey instances Return an iterator on DiGraph(DependencyNode) """ # Init the algorithm inputs = {element: set() for element in elements} - initial_state = DependencyState(label, inputs, line_nb) + initial_state = DependencyState(loc_key, inputs, line_nb) todo = set([initial_state]) done = set() dpResultcls = DependencyResultImplicit if self._implicit else DependencyResult @@ -611,8 +612,8 @@ class DependencyGraph(object): continue done.add(done_state) if (not state.pending or - state.label in heads or - not self._ira.graph.predecessors(state.label)): + state.loc_key in heads or + not self._ira.graph.predecessors(state.loc_key)): yield dpResultcls(self._ira, initial_state, state, elements) if not state.pending: continue @@ -622,16 +623,16 @@ class DependencyGraph(object): state.pending[self._ira.IRDst] = set() # Propagate state to parents - for pred in self._ira.graph.predecessors_iter(state.label): + for pred in self._ira.graph.predecessors_iter(state.loc_key): todo.add(state.extend(pred)) def get_from_depnodes(self, depnodes, heads): """Alias for the get() method. Use the attributes of @depnodes as argument. - PRE: Labels and lines of depnodes have to be equals + PRE: Loc_Keys and lines of depnodes have to be equals @depnodes: set of DependencyNode instances - @heads: set of AsmLabel instances + @heads: set of LocKey instances """ lead = list(depnodes)[0] elements = set(depnode.element for depnode in depnodes) - return self.get(lead.label, elements, lead.line_nb, heads) + return self.get(lead.loc_key, elements, lead.line_nb, heads) diff --git a/miasm2/analysis/dse.py b/miasm2/analysis/dse.py index 427a8bd0..0502ea42 100644 --- a/miasm2/analysis/dse.py +++ b/miasm2/analysis/dse.py @@ -56,15 +56,14 @@ except ImportError: z3 = None from miasm2.expression.expression import ExprMem, ExprInt, ExprCompose, \ - ExprAff, ExprId + ExprAff, ExprId, ExprLoc, LocKey from miasm2.core.bin_stream import bin_stream_vm -from miasm2.core.asmblock import expr_is_label from miasm2.jitter.emulatedsymbexec import EmulatedSymbExec from miasm2.expression.expression_helper import possible_values from miasm2.ir.translators import Translator from miasm2.analysis.expression_range import expr_range from miasm2.analysis.modularintervals import ModularIntervals - +from miasm2.core.asmblock import AsmBlockBad DriftInfo = namedtuple("DriftInfo", ["symbol", "computed", "expected"]) @@ -72,7 +71,7 @@ class DriftException(Exception): """Raised when the emulation drift from the reference engine""" def __init__(self, info): - super(Exception, self).__init__() + super(DriftException, self).__init__() self.info = info def __str__(self): @@ -161,11 +160,14 @@ class DSEEngine(object): self.symb_concrete = None # Concrete SymbExec for path desambiguisation self.mdis = None # DisasmEngine + self.symbol_pool = self.ir_arch.symbol_pool + def prepare(self): """Prepare the environment for attachment with a jitter""" # Disassembler self.mdis = self.machine.dis_engine(bin_stream_vm(self.jitter.vm), - lines_wd=1) + lines_wd=1, + symbol_pool=self.symbol_pool) # Symbexec engine ## Prepare symbexec engines @@ -188,7 +190,7 @@ class DSEEngine(object): self.jitter.exec_cb = self.callback # Clean jit cache to avoid multi-line basic blocks already jitted - self.jitter.jit.lbl2jitbloc.clear() + self.jitter.jit.loc_key_to_jit_block.clear() def attach(self, emulator): """Attach the DSE to @emulator @@ -215,9 +217,9 @@ class DSEEngine(object): self.prepare() def handle(self, cur_addr): - """Handle destination + r"""Handle destination @cur_addr: Expr of the next address in concrete execution - /!\ cur_addr may be a lbl_gen + /!\ cur_addr may be a loc_key In this method, self.symb is in the "just before branching" state """ @@ -295,6 +297,9 @@ class DSEEngine(object): # Call callbacks associated to the current address cur_addr = self.jitter.pc + if isinstance(cur_addr, LocKey): + lbl = self.ir_arch.symbol_pool.loc_key_to_label(cur_addr) + cur_addr = lbl.offset if cur_addr in self.handler: self.handler[cur_addr](self) @@ -321,7 +326,8 @@ class DSEEngine(object): ## Update current state asm_block = self.mdis.dis_block(cur_addr) - self.ir_arch.add_block(asm_block) + if not isinstance(asm_block, AsmBlockBad): + self.ir_arch.add_block(asm_block) self.addr_to_cacheblocks[cur_addr] = dict(self.ir_arch.blocks) # Emulate the current instruction @@ -329,7 +335,7 @@ class DSEEngine(object): # Is the symbolic execution going (potentially) to jump on a lbl_gen? if len(self.ir_arch.blocks) == 1: - next_addr = self.symb.run_at(cur_addr) + self.symb.run_at(cur_addr) else: # Emulation could stuck in generated IR blocks # But concrete execution callback is not enough precise to obtain @@ -339,18 +345,25 @@ class DSEEngine(object): # Update the concrete execution self._update_state_from_concrete_symb(self.symb_concrete) while True: + next_addr_concrete = self.symb_concrete.run_block_at(cur_addr) self.symb.run_block_at(cur_addr) - if not(expr_is_label(next_addr_concrete) and - next_addr_concrete.name.offset is None): + if not (isinstance(next_addr_concrete, ExprLoc) and + self.ir_arch.symbol_pool.loc_key_to_offset( + next_addr_concrete + ) is None): # Not a lbl_gen, exit break + if self.symb.ir_arch.get_block(cur_addr) is None: + break + # Call handle with lbl_gen state self.handle(next_addr_concrete) cur_addr = next_addr_concrete + # At this stage, symbolic engine is one instruction after the concrete # engine @@ -428,7 +441,7 @@ class DSEEngine(object): symbexec.symbols[reg] = value def update_state_from_concrete(self, cpu=True, mem=False): - """Update the symbolic state with concrete values from the concrete + r"""Update the symbolic state with concrete values from the concrete engine @cpu: (optional) if set, update registers' value @@ -596,13 +609,19 @@ class DSEPathConstraint(DSEEngine): self.cur_solver.add(self.z3_trans.from_expr(cons)) def handle(self, cur_addr): + cur_addr = self.ir_arch.symbol_pool.canonize_to_exprloc(cur_addr) symb_pc = self.eval_expr(self.ir_arch.IRDst) possibilities = possible_values(symb_pc) cur_path_constraint = set() # path_constraint for the concrete path if len(possibilities) == 1: - assert next(iter(possibilities)).value == cur_addr + dst = next(iter(possibilities)).value + dst = self.ir_arch.symbol_pool.canonize_to_exprloc(dst) + assert dst == cur_addr else: for possibility in possibilities: + target_addr = self.ir_arch.symbol_pool.canonize_to_exprloc( + possibility.value + ) path_constraint = set() # Set of ExprAff for the possible path # Get constraint associated to the possible path @@ -642,11 +661,11 @@ class DSEPathConstraint(DSEEngine): "address 0x%x" % address) path_constraint.add(ExprAff(expr_mem, value)) - if possibility.value == cur_addr: + if target_addr == cur_addr: # Add path constraint cur_path_constraint = path_constraint - elif self.produce_solution(possibility.value): + elif self.produce_solution(target_addr): # Looking for a new solution self.cur_solver.push() for cons in path_constraint: @@ -657,8 +676,7 @@ class DSEPathConstraint(DSEEngine): result = self.cur_solver.check() if result == z3.sat: model = self.cur_solver.model() - self.handle_solution(model, possibility.value) + self.handle_solution(model, target_addr) self.cur_solver.pop() self.handle_correct_destination(cur_addr, cur_path_constraint) - diff --git a/miasm2/arch/aarch64/arch.py b/miasm2/arch/aarch64/arch.py index 94be74fd..a57e585f 100644 --- a/miasm2/arch/aarch64/arch.py +++ b/miasm2/arch/aarch64/arch.py @@ -9,7 +9,6 @@ from collections import defaultdict from miasm2.core.bin_stream import bin_stream import regs as regs_module from regs import * -from miasm2.core.asmblock import AsmLabel from miasm2.core.cpu import log as log_cpu from miasm2.expression.modint import uint32, uint64, mod_size2int from miasm2.core.asm_ast import AstInt, AstId, AstMem, AstOp @@ -277,8 +276,8 @@ class aarch64_arg(m_arg): if isinstance(value.name, ExprId): fixed_size.add(value.name.size) return value.name - label = symbol_pool.getby_name_create(value.name) - return ExprId(label, size_hint) + loc_key = symbol_pool.getby_name_create(value.name) + return ExprLoc(loc_key, size_hint) if isinstance(value, AstInt): assert size_hint is not None return ExprInt(value.value, size_hint) @@ -311,44 +310,49 @@ class instruction_aarch64(instruction): super(instruction_aarch64, self).__init__(*args, **kargs) @staticmethod - def arg2str(e, pos=None): + def arg2str(expr, index=None, symbol_pool=None): wb = False - if isinstance(e, m2_expr.ExprId) or isinstance(e, m2_expr.ExprInt): - return str(e) - elif isinstance(e, m2_expr.ExprOp) and e.op in shift_expr: - op_str = shift_str[shift_expr.index(e.op)] - return "%s %s %s" % (e.args[0], op_str, e.args[1]) - elif isinstance(e, m2_expr.ExprOp) and e.op == "slice_at": - return "%s LSL %s" % (e.args[0], e.args[1]) - elif isinstance(e, m2_expr.ExprOp) and e.op in extend_lst: - op_str = e.op - return "%s %s %s" % (e.args[0], op_str, e.args[1]) - elif isinstance(e, m2_expr.ExprOp) and e.op == "postinc": - if e.args[1].arg != 0: - return "[%s], %s" % (e.args[0], e.args[1]) + if expr.is_id() or expr.is_int(): + return str(expr) + elif expr.is_loc(): + if symbol_pool is not None: + return symbol_pool.str_loc_key(expr.loc_key) else: - return "[%s]" % (e.args[0]) - elif isinstance(e, m2_expr.ExprOp) and e.op == "preinc_wb": - if e.args[1].arg != 0: - return "[%s, %s]!" % (e.args[0], e.args[1]) + return str(expr) + elif isinstance(expr, m2_expr.ExprOp) and expr.op in shift_expr: + op_str = shift_str[shift_expr.index(expr.op)] + return "%s %s %s" % (expr.args[0], op_str, expr.args[1]) + elif isinstance(expr, m2_expr.ExprOp) and expr.op == "slice_at": + return "%s LSL %s" % (expr.args[0], expr.args[1]) + elif isinstance(expr, m2_expr.ExprOp) and expr.op in extend_lst: + op_str = expr.op + return "%s %s %s" % (expr.args[0], op_str, expr.args[1]) + elif isinstance(expr, m2_expr.ExprOp) and expr.op == "postinc": + if expr.args[1].arg != 0: + return "[%s], %s" % (expr.args[0], expr.args[1]) else: - return "[%s]" % (e.args[0]) - elif isinstance(e, m2_expr.ExprOp) and e.op == "preinc": - if len(e.args) == 1: - return "[%s]" % (e.args[0]) - elif not isinstance(e.args[1], m2_expr.ExprInt) or e.args[1].arg != 0: - return "[%s, %s]" % (e.args[0], e.args[1]) + return "[%s]" % (expr.args[0]) + elif isinstance(expr, m2_expr.ExprOp) and expr.op == "preinc_wb": + if expr.args[1].arg != 0: + return "[%s, %s]!" % (expr.args[0], expr.args[1]) else: - return "[%s]" % (e.args[0]) - elif isinstance(e, m2_expr.ExprOp) and e.op == 'segm': - arg = e.args[1] + return "[%s]" % (expr.args[0]) + elif isinstance(expr, m2_expr.ExprOp) and expr.op == "preinc": + if len(expr.args) == 1: + return "[%s]" % (expr.args[0]) + elif not isinstance(expr.args[1], m2_expr.ExprInt) or expr.args[1].arg != 0: + return "[%s, %s]" % (expr.args[0], expr.args[1]) + else: + return "[%s]" % (expr.args[0]) + elif isinstance(expr, m2_expr.ExprOp) and expr.op == 'segm': + arg = expr.args[1] if isinstance(arg, m2_expr.ExprId): arg = str(arg) elif arg.op == 'LSL' and arg.args[1].arg == 0: arg = str(arg.args[0]) else: arg = "%s %s %s" % (arg.args[0], arg.op, arg.args[1]) - return '[%s, %s]' % (e.args[0], arg) + return '[%s, %s]' % (expr.args[0], arg) else: raise NotImplementedError("bad op") @@ -366,13 +370,12 @@ class instruction_aarch64(instruction): def dstflow2label(self, symbol_pool): index = self.mnemo_flow_to_dst_index(self.name) - e = self.args[index] - if not isinstance(e, m2_expr.ExprInt): + expr = self.args[index] + if not expr.is_int(): return - ad = e.arg + self.offset - l = symbol_pool.getby_offset_create(ad) - s = m2_expr.ExprId(l, e.size) - self.args[index] = s + addr = expr.arg + self.offset + loc_key = symbol_pool.getby_offset_create(addr) + self.args[index] = m2_expr.ExprLoc(loc_key, expr.size) def breakflow(self): return self.name in BRCOND + ["BR", "BLR", "RET", "ERET", "DRPS", "B", "BL"] diff --git a/miasm2/arch/aarch64/sem.py b/miasm2/arch/aarch64/sem.py index 88b0d0a7..ad582878 100644 --- a/miasm2/arch/aarch64/sem.py +++ b/miasm2/arch/aarch64/sem.py @@ -593,14 +593,14 @@ def udiv(arg1, arg2, arg3): @sbuild.parse def cbz(arg1, arg2): - dst = m2_expr.ExprId(ir.get_next_label(instr), 64) if arg1 else arg2 + dst = m2_expr.ExprLoc(ir.get_next_loc_key(instr), 64) if arg1 else arg2 PC = dst ir.IRDst = dst @sbuild.parse def cbnz(arg1, arg2): - dst = arg2 if arg1 else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg2 if arg1 else m2_expr.ExprLoc(ir.get_next_loc_key(instr), 64) PC = dst ir.IRDst = dst @@ -609,7 +609,7 @@ def cbnz(arg1, arg2): def tbz(arg1, arg2, arg3): bitmask = m2_expr.ExprInt(1, arg1.size) << arg2 dst = m2_expr.ExprId( - ir.get_next_label(instr), 64) if arg1 & bitmask else arg3 + ir.get_next_loc_key(instr), 64) if arg1 & bitmask else arg3 PC = dst ir.IRDst = dst @@ -618,21 +618,21 @@ def tbz(arg1, arg2, arg3): def tbnz(arg1, arg2, arg3): bitmask = m2_expr.ExprInt(1, arg1.size) << arg2 dst = arg3 if arg1 & bitmask else m2_expr.ExprId( - ir.get_next_label(instr), 64) + ir.get_next_loc_key(instr), 64) PC = dst ir.IRDst = dst @sbuild.parse def b_ne(arg1): - dst = m2_expr.ExprId(ir.get_next_label(instr), 64) if zf else arg1 + dst = m2_expr.ExprLoc(ir.get_next_loc_key(instr), 64) if zf else arg1 PC = dst ir.IRDst = dst @sbuild.parse def b_eq(arg1): - dst = arg1 if zf else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if zf else m2_expr.ExprLoc(ir.get_next_loc_key(instr), 64) PC = dst ir.IRDst = dst @@ -640,7 +640,7 @@ def b_eq(arg1): @sbuild.parse def b_ge(arg1): cond = cond2expr['GE'] - dst = arg1 if cond else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if cond else m2_expr.ExprLoc(ir.get_next_loc_key(instr), 64) PC = dst ir.IRDst = dst @@ -648,7 +648,7 @@ def b_ge(arg1): @sbuild.parse def b_gt(arg1): cond = cond2expr['GT'] - dst = arg1 if cond else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if cond else m2_expr.ExprLoc(ir.get_next_loc_key(instr), 64) PC = dst ir.IRDst = dst @@ -656,7 +656,7 @@ def b_gt(arg1): @sbuild.parse def b_cc(arg1): cond = cond2expr['CC'] - dst = arg1 if cond else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if cond else m2_expr.ExprLoc(ir.get_next_loc_key(instr), 64) PC = dst ir.IRDst = dst @@ -664,7 +664,7 @@ def b_cc(arg1): @sbuild.parse def b_cs(arg1): cond = cond2expr['CS'] - dst = arg1 if cond else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if cond else m2_expr.ExprLoc(ir.get_next_loc_key(instr), 64) PC = dst ir.IRDst = dst @@ -672,7 +672,7 @@ def b_cs(arg1): @sbuild.parse def b_hi(arg1): cond = cond2expr['HI'] - dst = arg1 if cond else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if cond else m2_expr.ExprLoc(ir.get_next_loc_key(instr), 64) PC = dst ir.IRDst = dst @@ -680,7 +680,7 @@ def b_hi(arg1): @sbuild.parse def b_le(arg1): cond = cond2expr['LE'] - dst = arg1 if cond else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if cond else m2_expr.ExprLoc(ir.get_next_loc_key(instr), 64) PC = dst ir.IRDst = dst @@ -688,7 +688,7 @@ def b_le(arg1): @sbuild.parse def b_ls(arg1): cond = cond2expr['LS'] - dst = arg1 if cond else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if cond else m2_expr.ExprLoc(ir.get_next_loc_key(instr), 64) PC = dst ir.IRDst = dst @@ -696,7 +696,7 @@ def b_ls(arg1): @sbuild.parse def b_lt(arg1): cond = cond2expr['LT'] - dst = arg1 if cond else m2_expr.ExprId(ir.get_next_label(instr), 64) + dst = arg1 if cond else m2_expr.ExprLoc(ir.get_next_loc_key(instr), 64) PC = dst ir.IRDst = dst @@ -732,7 +732,7 @@ def br(arg1): def blr(arg1): PC = arg1 ir.IRDst = arg1 - LR = m2_expr.ExprId(ir.get_next_label(instr), 64) + LR = m2_expr.ExprLoc(ir.get_next_loc_key(instr), 64) @sbuild.parse def nop(): @@ -877,7 +877,7 @@ class ir_aarch64l(IntermediateRepresentation): src = self.expr_fix_regs_for_mode(src) new_assignblk[dst] = src irs.append(AssignBlock(new_assignblk, assignblk.instr)) - return IRBlock(irblock.label, irs) + return IRBlock(irblock.loc_key, irs) def mod_pc(self, instr, instr_ir, extra_ir): "Replace PC by the instruction's offset" @@ -908,7 +908,7 @@ class ir_aarch64l(IntermediateRepresentation): new_dsts = {dst:src for dst, src in assignblk.iteritems() if dst not in regs_to_fix} irs.append(AssignBlock(new_dsts, assignblk.instr)) - new_irblocks.append(IRBlock(irblock.label, irs)) + new_irblocks.append(IRBlock(irblock.loc_key, irs)) return instr_ir, new_irblocks diff --git a/miasm2/arch/arm/arch.py b/miasm2/arch/arm/arch.py index 35574a84..17b57ba4 100644 --- a/miasm2/arch/arm/arch.py +++ b/miasm2/arch/arm/arch.py @@ -343,62 +343,67 @@ class instruction_arm(instruction): super(instruction_arm, self).__init__(*args, **kargs) @staticmethod - def arg2str(e, pos = None): + def arg2str(expr, index=None, symbol_pool=None): wb = False - if isinstance(e, ExprId) or isinstance(e, ExprInt): - return str(e) - if isinstance(e, ExprOp) and e.op in expr2shift_dct: - if len(e.args) == 1: - return '%s %s' % (e.args[0], expr2shift_dct[e.op]) - elif len(e.args) == 2: - return '%s %s %s' % (e.args[0], expr2shift_dct[e.op], e.args[1]) + if expr.is_id() or expr.is_int(): + return str(expr) + elif expr.is_loc(): + if symbol_pool is not None: + return symbol_pool.str_loc_key(expr.loc_key) + else: + return str(expr) + if isinstance(expr, ExprOp) and expr.op in expr2shift_dct: + if len(expr.args) == 1: + return '%s %s' % (expr.args[0], expr2shift_dct[expr.op]) + elif len(expr.args) == 2: + return '%s %s %s' % (expr.args[0], expr2shift_dct[expr.op], expr.args[1]) else: raise NotImplementedError('zarb arg2str') sb = False - if isinstance(e, ExprOp) and e.op == "sbit": + if isinstance(expr, ExprOp) and expr.op == "sbit": sb = True - e = e.args[0] - if isinstance(e, ExprOp) and e.op == "reglist": - o = [gpregs.expr.index(x) for x in e.args] + expr = expr.args[0] + if isinstance(expr, ExprOp) and expr.op == "reglist": + o = [gpregs.expr.index(x) for x in expr.args] out = reglist2str(o) if sb: out += "^" return out - if isinstance(e, ExprOp) and e.op == 'wback': + if isinstance(expr, ExprOp) and expr.op == 'wback': wb = True - e = e.args[0] - if isinstance(e, ExprId): - out = str(e) + expr = expr.args[0] + if isinstance(expr, ExprId): + out = str(expr) if wb: out += "!" return out - if not isinstance(e, ExprMem): - return str(e) + if not isinstance(expr, ExprMem): + return str(expr) - e = e.arg - if isinstance(e, ExprOp) and e.op == 'wback': + expr = expr.arg + if isinstance(expr, ExprOp) and expr.op == 'wback': wb = True - e = e.args[0] + expr = expr.args[0] - if isinstance(e, ExprId): - r, s = e, None - elif len(e.args) == 1 and isinstance(e.args[0], ExprId): - r, s = e.args[0], None - elif isinstance(e.args[0], ExprId): - r, s = e.args[0], e.args[1] + if isinstance(expr, ExprId): + r, s = expr, None + elif len(expr.args) == 1 and isinstance(expr.args[0], ExprId): + r, s = expr.args[0], None + elif isinstance(expr.args[0], ExprId): + r, s = expr.args[0], expr.args[1] else: - r, s = e.args[0].args + r, s = expr.args[0].args if isinstance(s, ExprOp) and s.op in expr2shift_dct: s = ' '.join([str(x) for x in s.args[0], expr2shift_dct[s.op], s.args[1]]) - if isinstance(e, ExprOp) and e.op == 'postinc': + if isinstance(expr, ExprOp) and expr.op == 'postinc': o = '[%s]' % r if s and not (isinstance(s, ExprInt) and s.arg == 0): o += ', %s' % s @@ -418,16 +423,15 @@ class instruction_arm(instruction): return self.name in conditional_branch + unconditional_branch def dstflow2label(self, symbol_pool): - e = self.args[0] - if not isinstance(e, ExprInt): + expr = self.args[0] + if not isinstance(expr, ExprInt): return if self.name == 'BLX': - ad = e.arg + self.offset + addr = expr.arg + self.offset else: - ad = e.arg + self.offset - l = symbol_pool.getby_offset_create(ad) - s = ExprId(l, e.size) - self.args[0] = s + addr = expr.arg + self.offset + loc_key = symbol_pool.getby_offset_create(addr) + self.args[0] = ExprLoc(loc_key, expr.size) def breakflow(self): if self.name in conditional_branch + unconditional_branch: @@ -492,27 +496,29 @@ class instruction_armt(instruction_arm): def dstflow2label(self, symbol_pool): if self.name in ["CBZ", "CBNZ"]: - e = self.args[1] + expr = self.args[1] else: - e = self.args[0] - if not isinstance(e, ExprInt): + expr = self.args[0] + if not isinstance(expr, ExprInt): return if self.name == 'BLX': - ad = e.arg + (self.offset & 0xfffffffc) + addr = expr.arg + (self.offset & 0xfffffffc) elif self.name == 'BL': - ad = e.arg + self.offset + addr = expr.arg + self.offset elif self.name.startswith('BP'): - ad = e.arg + self.offset + addr = expr.arg + self.offset elif self.name.startswith('CB'): - ad = e.arg + self.offset + self.l + 2 + addr = expr.arg + self.offset + self.l + 2 else: - ad = e.arg + self.offset - l = symbol_pool.getby_offset_create(ad) - s = ExprId(l, e.size) + addr = expr.arg + self.offset + + loc_key = symbol_pool.getby_offset_create(addr) + dst = ExprLoc(loc_key, expr.size) + if self.name in ["CBZ", "CBNZ"]: - self.args[1] = s + self.args[1] = dst else: - self.args[0] = s + self.args[0] = dst def breakflow(self): if self.name in conditional_branch + unconditional_branch +["CBZ", "CBNZ", 'TBB', 'TBH']: @@ -774,8 +780,8 @@ class arm_arg(m_arg): return arg.name if arg.name in gpregs.str: return None - label = symbol_pool.getby_name_create(arg.name) - return ExprId(label, 32) + loc_key = symbol_pool.getby_name_create(arg.name) + return ExprLoc(loc_key, 32) if isinstance(arg, AstOp): args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in arg.args] if None in args: diff --git a/miasm2/arch/arm/disasm.py b/miasm2/arch/arm/disasm.py index 586fa903..956a894b 100644 --- a/miasm2/arch/arm/disasm.py +++ b/miasm2/arch/arm/disasm.py @@ -24,7 +24,8 @@ def cb_arm_fix_call(mn, cur_bloc, symbol_pool, offsets_to_dis, *args, **kwargs): return if not l2.args[1] in values: return - cur_bloc.add_cst(l1.offset + 4, AsmConstraint.c_next, symbol_pool) + loc_key_cst = self.symbol_pool.getby_offset_create(l1.offset + 4) + cur_bloc.add_cst(loc_key_cst, AsmConstraint.c_next, symbol_pool) offsets_to_dis.add(l1.offset + 4) cb_arm_funcs = [cb_arm_fix_call] diff --git a/miasm2/arch/arm/jit.py b/miasm2/arch/arm/jit.py index 1a37b7f1..b92e2c32 100644 --- a/miasm2/arch/arm/jit.py +++ b/miasm2/arch/arm/jit.py @@ -7,6 +7,7 @@ from miasm2.arch.arm.sem import ir_armb, ir_arml, ir_armtl, ir_armtb, cond_dct_i from miasm2.jitter.codegen import CGen from miasm2.expression.expression import ExprId, ExprAff, ExprCond from miasm2.ir.ir import IRBlock, AssignBlock +from miasm2.ir.translators.C import TranslatorC log = logging.getLogger('jit_arm') hnd = logging.StreamHandler() @@ -17,11 +18,6 @@ log.setLevel(logging.CRITICAL) class arm_CGen(CGen): - def __init__(self, ir_arch): - self.ir_arch = ir_arch - self.PC = self.ir_arch.arch.regs.PC - self.init_arch_C() - def block2assignblks(self, block): """ diff --git a/miasm2/arch/arm/sem.py b/miasm2/arch/arm/sem.py index 2cf2e5a4..c80e9826 100644 --- a/miasm2/arch/arm/sem.py +++ b/miasm2/arch/arm/sem.py @@ -441,16 +441,16 @@ def sdiv(ir, instr, a, b, c=None): if c is None: b, c = a, b - lbl_div = ExprId(ir.gen_label(), ir.IRDst.size) - lbl_except = ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = ExprId(ir.get_next_label(instr), ir.IRDst.size) + loc_div = ExprLoc(ir.symbol_pool.gen_loc_key(), ir.IRDst.size) + loc_except = ExprId(ir.symbol_pool.gen_loc_key(), ir.IRDst.size) + loc_next = ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) - e.append(ExprAff(ir.IRDst, ExprCond(c, lbl_div, lbl_except))) + e.append(ExprAff(ir.IRDst, ExprCond(c, loc_div, loc_except))) do_except = [] do_except.append(ExprAff(exception_flags, ExprInt(EXCEPT_DIV_BY_ZERO, exception_flags.size))) - do_except.append(ExprAff(ir.IRDst, lbl_next)) - blk_except = IRBlock(lbl_except.name, [AssignBlock(do_except, instr)]) + do_except.append(ExprAff(ir.IRDst, loc_next)) + blk_except = IRBlock(loc_except.loc_key, [AssignBlock(do_except, instr)]) @@ -461,8 +461,8 @@ def sdiv(ir, instr, a, b, c=None): if dst is not None: do_div.append(ExprAff(ir.IRDst, r)) - do_div.append(ExprAff(ir.IRDst, lbl_next)) - blk_div = IRBlock(lbl_div.name, [AssignBlock(do_div, instr)]) + do_div.append(ExprAff(ir.IRDst, loc_next)) + blk_div = IRBlock(loc_div.loc_key, [AssignBlock(do_div, instr)]) return e, [blk_div, blk_except] @@ -474,16 +474,16 @@ def udiv(ir, instr, a, b, c=None): - lbl_div = ExprId(ir.gen_label(), ir.IRDst.size) - lbl_except = ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = ExprId(ir.get_next_label(instr), ir.IRDst.size) + loc_div = ExprLoc(ir.symbol_pool.gen_loc_key(), ir.IRDst.size) + loc_except = ExprLoc(ir.symbol_pool.gen_loc_key(), ir.IRDst.size) + loc_next = ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) - e.append(ExprAff(ir.IRDst, ExprCond(c, lbl_div, lbl_except))) + e.append(ExprAff(ir.IRDst, ExprCond(c, loc_div, loc_except))) do_except = [] do_except.append(ExprAff(exception_flags, ExprInt(EXCEPT_DIV_BY_ZERO, exception_flags.size))) - do_except.append(ExprAff(ir.IRDst, lbl_next)) - blk_except = IRBlock(lbl_except.name, [AssignBlock(do_except, instr)]) + do_except.append(ExprAff(ir.IRDst, loc_next)) + blk_except = IRBlock(loc_except.loc_key, [AssignBlock(do_except, instr)]) r = ExprOp("udiv", b, c) @@ -493,8 +493,8 @@ def udiv(ir, instr, a, b, c=None): if dst is not None: do_div.append(ExprAff(ir.IRDst, r)) - do_div.append(ExprAff(ir.IRDst, lbl_next)) - blk_div = IRBlock(lbl_div.name, [AssignBlock(do_div, instr)]) + do_div.append(ExprAff(ir.IRDst, loc_next)) + blk_div = IRBlock(loc_div.loc_key, [AssignBlock(do_div, instr)]) return e, [blk_div, blk_except] @@ -932,19 +932,20 @@ def pop(ir, instr, a): def cbz(ir, instr, a, b): e = [] - lbl_next = ExprId(ir.get_next_label(instr), 32) - e.append(ExprAff(ir.IRDst, ExprCond(a, lbl_next, b))) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = ExprLoc(loc_next, 32) + e.append(ExprAff(ir.IRDst, ExprCond(a, loc_next_expr, b))) return e, [] def cbnz(ir, instr, a, b): e = [] - lbl_next = ExprId(ir.get_next_label(instr), 32) - e.append(ExprAff(ir.IRDst, ExprCond(a, b, lbl_next))) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = ExprLoc(loc_next, 32) + e.append(ir.IRDst, ExprCond(a, b, loc_next_expr)) return e, [] - def uxtb(ir, instr, a, b): e = [] r = b[:8].zeroExtend(32) @@ -1264,10 +1265,14 @@ def add_condition_expr(ir, instr, cond, instr_ir, extra_ir): raise ValueError('unknown condition %r' % cond) cond = tab_cond[cond] - lbl_next = ExprId(ir.get_next_label(instr), 32) - lbl_do = ExprId(ir.gen_label(), 32) - dst_cond = ExprCond(cond, lbl_do, lbl_next) + + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = ExprLoc(loc_next, 32) + loc_do = ir.symbol_pool.gen_loc_key() + loc_do_expr = ExprLoc(loc_do, 32) + + dst_cond = ExprCond(cond, loc_do_expr, loc_next_expr) assert(isinstance(instr_ir, list)) has_irdst = False @@ -1276,8 +1281,8 @@ def add_condition_expr(ir, instr, cond, instr_ir, extra_ir): has_irdst = True break if not has_irdst: - instr_ir.append(ExprAff(ir.IRDst, lbl_next)) - e_do = IRBlock(lbl_do.name, [AssignBlock(instr_ir, instr)]) + instr_ir.append(ExprAff(ir.IRDst, loc_next_expr)) + e_do = IRBlock(loc_do, [AssignBlock(instr_ir, instr)]) e = [ExprAff(ir.IRDst, dst_cond)] return e, [e_do] + extra_ir @@ -1527,7 +1532,7 @@ class ir_arml(IntermediateRepresentation): raise ValueError("IT name invalid %s" % instr) return out, instr.args[0] - def do_it_block(self, label, index, block, assignments, gen_pc_updt): + def do_it_block(self, loc, index, block, assignments, gen_pc_updt): instr = block.lines[index] it_hints, it_cond = self.parse_itt(instr) cond_num = cond_dct_inv[it_cond.name] @@ -1539,14 +1544,14 @@ class ir_arml(IntermediateRepresentation): ir_blocks_all = [] # Gen dummy irblock for IT instr - label_next = self.get_next_label(instr) - dst = ExprAff(self.IRDst, ExprId(label_next, 32)) + loc_next = self.get_next_loc_key(instr) + dst = ExprAff(self.IRDst, ExprId(loc_next, 32)) dst_blk = AssignBlock([dst], instr) assignments.append(dst_blk) - irblock = IRBlock(label, assignments) + irblock = IRBlock(loc, assignments) ir_blocks_all.append([irblock]) - label = label_next + loc = loc_next assignments = [] for hint in it_hints: irblocks = [] @@ -1554,33 +1559,33 @@ class ir_arml(IntermediateRepresentation): instr = block.lines[index] # Add conditionnal jump to current irblock - label_do = self.symbol_pool.gen_label() - label_next = self.get_next_label(instr) + loc_do = self.symbol_pool.gen_loc_key() + loc_next = self.get_next_loc_key(instr) if hint: local_cond = ~cond_eq else: local_cond = cond_eq - dst = ExprAff(self.IRDst, ExprCond(local_cond, ExprId(label_do, 32), ExprId(label_next, 32))) + dst = ExprAff(self.IRDst, ExprCond(local_cond, ExprLoc(loc_do, 32), ExprLoc(loc_next, 32))) dst_blk = AssignBlock([dst], instr) assignments.append(dst_blk) - irblock = IRBlock(label, assignments) + irblock = IRBlock(loc, assignments) irblocks.append(irblock) assignments = [] - label = label_do + loc = loc_do split = self.add_instr_to_irblock(block, instr, assignments, irblocks, gen_pc_updt) if split: raise NotImplementedError("Unsupported instr in IT block (%s)" % instr) - dst = ExprAff(self.IRDst, ExprId(label_next, 32)) + dst = ExprAff(self.IRDst, ExprId(loc_next, 32)) dst_blk = AssignBlock([dst], instr) assignments.append(dst_blk) - irblock = IRBlock(label, assignments) + irblock = IRBlock(loc, assignments) irblocks.append(irblock) - label = label_next + loc = loc_next assignments = [] ir_blocks_all.append(irblocks) return index, ir_blocks_all @@ -1594,7 +1599,8 @@ class ir_arml(IntermediateRepresentation): it_hints = None it_cond = None - label = None + label = block.loc_key + assignments = [] ir_blocks_all = [] index = -1 while index + 1 < len(block.lines): @@ -1602,7 +1608,7 @@ class ir_arml(IntermediateRepresentation): instr = block.lines[index] if label is None: assignments = [] - label = self.get_instr_label(instr) + label = self.get_loc_key_for_instr(instr) if instr.name.startswith("IT"): index, irblocks_it = self.do_it_block(label, index, block, assignments, gen_pc_updt) for irblocks in irblocks_it: @@ -1621,7 +1627,7 @@ class ir_arml(IntermediateRepresentation): new_ir_blocks_all = self.post_add_block(block, ir_blocks_all) for irblock in new_ir_blocks_all: - self.blocks[irblock.label] = irblock + self.blocks[irblock.loc_key] = irblock return new_ir_blocks_all diff --git a/miasm2/arch/mips32/arch.py b/miasm2/arch/mips32/arch.py index 15c59cf0..939ce5b0 100644 --- a/miasm2/arch/mips32/arch.py +++ b/miasm2/arch/mips32/arch.py @@ -5,7 +5,7 @@ from collections import defaultdict from pyparsing import Literal, Group, Optional -from miasm2.expression.expression import ExprMem, ExprInt, ExprId, ExprOp +from miasm2.expression.expression import ExprMem, ExprInt, ExprId, ExprOp, ExprLoc from miasm2.core.bin_stream import bin_stream import miasm2.arch.mips32.regs as regs import miasm2.core.cpu as cpu @@ -60,11 +60,16 @@ class instruction_mips32(cpu.instruction): @staticmethod - def arg2str(e, pos = None): - if isinstance(e, ExprId) or isinstance(e, ExprInt): - return str(e) - assert(isinstance(e, ExprMem)) - arg = e.arg + def arg2str(expr, index=None, symbol_pool=None): + if expr.is_id() or expr.is_int(): + return str(expr) + elif expr.is_loc(): + if symbol_pool is not None: + return symbol_pool.str_loc_key(expr.loc_key) + else: + return str(expr) + assert(isinstance(expr, ExprMem)) + arg = expr.arg if isinstance(arg, ExprId): return "(%s)"%arg assert(len(arg.args) == 2 and arg.op == '+') @@ -90,21 +95,20 @@ class instruction_mips32(cpu.instruction): def dstflow2label(self, symbol_pool): if self.name in ["J", 'JAL']: - e = self.args[0].arg - ad = (self.offset & (0xFFFFFFFF ^ ((1<< 28)-1))) + e - l = symbol_pool.getby_offset_create(ad) - self.args[0] = ExprId(l, e.size) + expr = self.args[0].arg + addr = (self.offset & (0xFFFFFFFF ^ ((1<< 28)-1))) + expr + label = symbol_pool.getby_offset_create(addr) + self.args[0] = ExprLoc(label.loc_key, expr.size) return ndx = self.get_dst_num() - e = self.args[ndx] + expr = self.args[ndx] - if not isinstance(e, ExprInt): + if not isinstance(expr, ExprInt): return - ad = e.arg + self.offset - l = symbol_pool.getby_offset_create(ad) - s = ExprId(l, e.size) - self.args[ndx] = s + addr = expr.arg + self.offset + loc_key = symbol_pool.getby_offset_create(addr) + self.args[ndx] = ExprLoc(loc_key, expr.size) def breakflow(self): if self.name == 'BREAK': @@ -261,8 +265,8 @@ class mips32_arg(cpu.m_arg): return arg.name if arg.name in gpregs.str: return None - label = symbol_pool.getby_name_create(arg.name) - return ExprId(label, 32) + loc_key = symbol_pool.getby_name_create(arg.name) + return ExprLoc(loc_key, 32) if isinstance(arg, AstOp): args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in arg.args] if None in args: @@ -403,9 +407,9 @@ class mips32_dreg_imm(mips32_arg): return True @staticmethod - def arg2str(e): - assert(isinstance(e, ExprMem)) - arg = e.arg + def arg2str(expr, index=None): + assert(isinstance(expr, ExprMem)) + arg = expr.arg if isinstance(arg, ExprId): return "(%s)"%arg assert(len(arg.args) == 2 and arg.op == '+') diff --git a/miasm2/arch/mips32/ira.py b/miasm2/arch/mips32/ira.py index 7aefad32..b6d92ee0 100644 --- a/miasm2/arch/mips32/ira.py +++ b/miasm2/arch/mips32/ira.py @@ -4,7 +4,6 @@ from miasm2.expression.expression import ExprAff, ExprInt, ExprId from miasm2.ir.ir import IntermediateRepresentation, IRBlock, AssignBlock from miasm2.ir.analysis import ira from miasm2.arch.mips32.sem import ir_mips32l, ir_mips32b -from miasm2.core.asmblock import expr_is_int_or_label, expr_is_label class ir_a_mips32l(ir_mips32l, ira): def __init__(self, symbol_pool=None): @@ -28,14 +27,15 @@ class ir_a_mips32l(ir_mips32l, ira): if pc_val is None or lr_val is None: new_irblocks.append(irb) continue - if not expr_is_int_or_label(lr_val): - new_irblocks.append(irb) + if lr_val.is_loc(): + offset = self.symbol_pool.loc_key_to_offset(lr_val.loc_key) + if offset is not None: + lr_val = ExprInt(offset, 32) + if not lr_val.is_int(): continue - if expr_is_label(lr_val): - lr_val = ExprInt(lr_val.name.offset, 32) instr = block.lines[-2] - if lr_val.arg != instr.offset + 8: + if int(lr_val) != instr.offset + 8: raise ValueError("Wrong arg") # CALL diff --git a/miasm2/arch/mips32/jit.py b/miasm2/arch/mips32/jit.py index 16d88067..180f8b0a 100644 --- a/miasm2/arch/mips32/jit.py +++ b/miasm2/arch/mips32/jit.py @@ -57,10 +57,10 @@ class mipsCGen(CGen): self.ir_arch.pc] assignments[self.delay_slot_set] = m2_expr.ExprInt(1, 32) # Replace IRDst with next instruction - assignments[self.ir_arch.IRDst] = m2_expr.ExprId( - self.ir_arch.get_next_instr(assignblock.instr), 32) + dst_loc_key = self.ir_arch.get_next_instr(assignblock.instr) + assignments[self.ir_arch.IRDst] = m2_expr.ExprLoc(dst_loc_key, 32) irs.append(AssignBlock(assignments, assignblock.instr)) - irblocks[blk_idx] = IRBlock(irblock.label, irs) + irblocks[blk_idx] = IRBlock(irblock.loc_key, irs) return irblocks_list @@ -69,12 +69,13 @@ class mipsCGen(CGen): Generate the C code for the final block instruction """ - lbl = self.get_block_post_label(block) - out = (self.CODE_RETURN_NO_EXCEPTION % (self.label_to_jitlabel(lbl), + loc_key = self.get_block_post_label(block) + offset = self.ir_arch.symbol_pool.loc_key_to_offset(loc_key) + out = (self.CODE_RETURN_NO_EXCEPTION % (self.loc_key_to_jitlabel(loc_key), self.C_PC, m2_expr.ExprId('branch_dst_irdst', 32), m2_expr.ExprId('branch_dst_irdst', 32), - self.id_to_c(m2_expr.ExprInt(lbl.offset, 32))) + self.id_to_c(m2_expr.ExprInt(offset, 32))) ).split('\n') return out diff --git a/miasm2/arch/mips32/sem.py b/miasm2/arch/mips32/sem.py index 99c81a33..fd4fa655 100644 --- a/miasm2/arch/mips32/sem.py +++ b/miasm2/arch/mips32/sem.py @@ -35,7 +35,7 @@ def jal(arg1): "Jumps to the calculated address @arg1 and stores the return address in $RA" PC = arg1 ir.IRDst = arg1 - RA = ExprId(ir.get_next_break_label(instr), 32) + RA = ExprLoc(ir.get_next_break_loc_key(instr), RA.size) @sbuild.parse def jalr(arg1, arg2): @@ -43,13 +43,13 @@ def jalr(arg1, arg2): address in another register @arg2""" PC = arg1 ir.IRDst = arg1 - arg2 = ExprId(ir.get_next_break_label(instr), 32) + arg2 = ExprLoc(ir.get_next_break_loc_key(instr), arg2.size) @sbuild.parse def bal(arg1): PC = arg1 ir.IRDst = arg1 - RA = ExprId(ir.get_next_break_label(instr), 32) + RA = ExprLoc(ir.get_next_break_loc_key(instr), RA.size) @sbuild.parse def l_b(arg1): @@ -76,7 +76,7 @@ def lb(arg1, arg2): @sbuild.parse def beq(arg1, arg2, arg3): "Branches on @arg3 if the quantities of two registers @arg1, @arg2 are eq" - dst = ExprId(ir.get_next_break_label(instr), 32) if arg1 - arg2 else arg3 + dst = ExprLoc(ir.get_next_break_loc_key(instr), ir.IRDst.size) if arg1 - arg2 else arg3 PC = dst ir.IRDst = dst @@ -84,7 +84,7 @@ def beq(arg1, arg2, arg3): def bgez(arg1, arg2): """Branches on @arg2 if the quantities of register @arg1 is greater than or equal to zero""" - dst = ExprId(ir.get_next_break_label(instr), 32) if arg1.msb() else arg2 + dst = ExprLoc(ir.get_next_break_loc_key(instr), ir.IRDst.size) if arg1.msb() else arg2 PC = dst ir.IRDst = dst @@ -92,7 +92,7 @@ def bgez(arg1, arg2): def bne(arg1, arg2, arg3): """Branches on @arg3 if the quantities of two registers @arg1, @arg2 are NOT equal""" - dst = arg3 if arg1 - arg2 else ExprId(ir.get_next_break_label(instr), 32) + dst = arg3 if arg1 - arg2 else ExprLoc(ir.get_next_break_loc_key(instr), ir.IRDst.size) PC = dst ir.IRDst = dst @@ -230,7 +230,7 @@ def seh(arg1, arg2): @sbuild.parse def bltz(arg1, arg2): """Branches on @arg2 if the register @arg1 is less than zero""" - dst_o = arg2 if arg1.msb() else ExprId(ir.get_next_break_label(instr), 32) + dst_o = arg2 if arg1.msb() else ExprLoc(ir.get_next_break_loc_key(instr), ir.IRDst.size) PC = dst_o ir.IRDst = dst_o @@ -238,7 +238,7 @@ def bltz(arg1, arg2): def blez(arg1, arg2): """Branches on @arg2 if the register @arg1 is less than or equal to zero""" cond = (i1(1) if arg1 else i1(0)) | arg1.msb() - dst_o = arg2 if cond else ExprId(ir.get_next_break_label(instr), 32) + dst_o = arg2 if cond else ExprLoc(ir.get_next_break_loc_key(instr), ir.IRDst.size) PC = dst_o ir.IRDst = dst_o @@ -246,7 +246,7 @@ def blez(arg1, arg2): def bgtz(arg1, arg2): """Branches on @arg2 if the register @arg1 is greater than zero""" cond = (i1(1) if arg1 else i1(0)) | arg1.msb() - dst_o = ExprId(ir.get_next_break_label(instr), 32) if cond else arg2 + dst_o = ExprLoc(ir.get_next_break_loc_key(instr), ir.IRDst.size) if cond else arg2 PC = dst_o ir.IRDst = dst_o @@ -346,13 +346,13 @@ def c_le_d(arg1, arg2, arg3): @sbuild.parse def bc1t(arg1, arg2): - dst_o = arg2 if arg1 else ExprId(ir.get_next_break_label(instr), 32) + dst_o = arg2 if arg1 else ExprLoc(ir.get_next_break_loc_key(instr), ir.IRDst.size) PC = dst_o ir.IRDst = dst_o @sbuild.parse def bc1f(arg1, arg2): - dst_o = ExprId(ir.get_next_break_label(instr), 32) if arg1 else arg2 + dst_o = ExprLoc(ir.get_next_break_loc_key(instr), ir.IRDst.size) if arg1 else arg2 PC = dst_o ir.IRDst = dst_o @@ -415,22 +415,22 @@ def ehb(arg1): def teq(ir, instr, arg1, arg2): e = [] - lbl_except, lbl_except_expr = ir.gen_label_and_expr(ir.IRDst.size) - lbl_next = ir.get_next_label(instr) - lbl_next_expr = m2_expr.ExprId(lbl_next, ir.IRDst.size) + loc_except, loc_except_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = m2_expr.ExprLoc(loc_next, ir.IRDst.size) do_except = [] do_except.append(m2_expr.ExprAff(exception_flags, m2_expr.ExprInt( EXCEPT_DIV_BY_ZERO, exception_flags.size))) - do_except.append(m2_expr.ExprAff(ir.IRDst, lbl_next_expr)) - blk_except = IRBlock(lbl_except, [AssignBlock(do_except, instr)]) + do_except.append(m2_expr.ExprAff(ir.IRDst, loc_next_expr)) + blk_except = IRBlock(loc_except.index, [AssignBlock(do_except, instr)]) cond = arg1 - arg2 e = [] e.append(m2_expr.ExprAff(ir.IRDst, - m2_expr.ExprCond(cond, lbl_next_expr, lbl_except_expr))) + m2_expr.ExprCond(cond, loc_next_expr, loc_except_expr))) return e, [blk_except] @@ -492,7 +492,7 @@ class ir_mips32l(IntermediateRepresentation): def get_next_instr(self, instr): return self.symbol_pool.getby_offset_create(instr.offset + 4) - def get_next_break_label(self, instr): + def get_next_break_loc_key(self, instr): return self.symbol_pool.getby_offset_create(instr.offset + 8) class ir_mips32b(ir_mips32l): diff --git a/miasm2/arch/msp430/arch.py b/miasm2/arch/msp430/arch.py index e4d03edb..1842f577 100644 --- a/miasm2/arch/msp430/arch.py +++ b/miasm2/arch/msp430/arch.py @@ -69,8 +69,8 @@ class msp430_arg(m_arg): index = gpregs.str.index(name) reg = gpregs.expr[index] return reg - label = symbol_pool.getby_name_create(value.name) - return ExprId(label, 16) + loc_key = symbol_pool.getby_name_create(value.name) + return ExprLoc(loc_key, 16) if isinstance(value, AstOp): args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in value.args] if None in args: @@ -102,40 +102,44 @@ class instruction_msp430(instruction): return self.name in ['call'] @staticmethod - def arg2str(e, pos = None): - if isinstance(e, ExprId): - o = str(e) - elif isinstance(e, ExprInt): - o = str(e) - elif isinstance(e, ExprOp) and e.op == "autoinc": - o = "@%s+" % str(e.args[0]) - elif isinstance(e, ExprMem): - if isinstance(e.arg, ExprId): - if pos == 0: - o = "@%s" % e.arg + def arg2str(expr, index=None, symbol_pool=None): + if isinstance(expr, ExprId): + o = str(expr) + elif isinstance(expr, ExprInt): + o = str(expr) + elif expr.is_loc(): + if symbol_pool is not None: + return symbol_pool.str_loc_key(expr.loc_key) + else: + return str(expr) + elif isinstance(expr, ExprOp) and expr.op == "autoinc": + o = "@%s+" % str(expr.args[0]) + elif isinstance(expr, ExprMem): + if isinstance(expr.arg, ExprId): + if index == 0: + o = "@%s" % expr.arg else: - o = "0x0(%s)" % e.arg - elif isinstance(e.arg, ExprInt): - o = "@%s" % e.arg - elif isinstance(e.arg, ExprOp): - o = "%s(%s)" % (e.arg.args[1], e.arg.args[0]) + o = "0x0(%s)" % expr.arg + elif isinstance(expr.arg, ExprInt): + o = "@%s" % expr.arg + elif isinstance(expr.arg, ExprOp): + o = "%s(%s)" % (expr.arg.args[1], expr.arg.args[0]) else: - raise NotImplementedError('unknown instance e = %s' % type(e)) + raise NotImplementedError('unknown instance expr = %s' % type(expr)) return o def dstflow2label(self, symbol_pool): - e = self.args[0] - if not isinstance(e, ExprInt): + expr = self.args[0] + if not isinstance(expr, ExprInt): return if self.name == "call": - ad = e.arg + addr = expr.arg else: - ad = e.arg + int(self.offset) + addr = expr.arg + int(self.offset) - l = symbol_pool.getby_offset_create(ad) - s = ExprId(l, e.size) - self.args[0] = s + loc_key = symbol_pool.getby_offset_create(addr) + self.args[0] = ExprLoc(loc_key, expr.size) def breakflow(self): if self.name in conditional_branch + unconditional_branch: diff --git a/miasm2/arch/msp430/sem.py b/miasm2/arch/msp430/sem.py index dd24abb1..877c2a70 100644 --- a/miasm2/arch/msp430/sem.py +++ b/miasm2/arch/msp430/sem.py @@ -238,8 +238,11 @@ def push_w(ir, instr, a): def call(ir, instr, a): e, a, dummy = mng_autoinc(a, None, 16) - n = ExprId(ir.get_next_label(instr), 16) - e.append(ExprAff(ExprMem(SP - ExprInt(2, 16), 16), n)) + + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = ExprLoc(loc_next, 16) + + e.append(ExprAff(ExprMem(SP - ExprInt(2, 16), 16), loc_next_expr)) e.append(ExprAff(SP, SP - ExprInt(2, 16))) e.append(ExprAff(PC, a)) e.append(ExprAff(ir.IRDst, a)) @@ -272,50 +275,56 @@ def cmp_b(ir, instr, a, b): def jz(ir, instr, a): - n = ExprId(ir.get_next_label(instr), 16) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = ExprLoc(loc_next, 16) e = [] - e.append(ExprAff(PC, ExprCond(zf, a, n))) - e.append(ExprAff(ir.IRDst, ExprCond(zf, a, n))) + e.append(ExprAff(PC, ExprCond(zf, a, loc_next_expr))) + e.append(ExprAff(ir.IRDst, ExprCond(zf, a, loc_next_expr))) return e, [] def jnz(ir, instr, a): - n = ExprId(ir.get_next_label(instr), 16) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = ExprLoc(loc_next, 16) e = [] - e.append(ExprAff(PC, ExprCond(zf, n, a))) - e.append(ExprAff(ir.IRDst, ExprCond(zf, n, a))) + e.append(ExprAff(PC, ExprCond(zf, loc_next_expr, a))) + e.append(ExprAff(ir.IRDst, ExprCond(zf, loc_next_expr, a))) return e, [] def jl(ir, instr, a): - n = ExprId(ir.get_next_label(instr), 16) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = ExprLoc(loc_next, 16) e = [] - e.append(ExprAff(PC, ExprCond(nf ^ of, a, n))) - e.append(ExprAff(ir.IRDst, ExprCond(nf ^ of, a, n))) + e.append(ExprAff(PC, ExprCond(nf ^ of, a, loc_next_expr))) + e.append(ExprAff(ir.IRDst, ExprCond(nf ^ of, a, loc_next_expr))) return e, [] def jc(ir, instr, a): - n = ExprId(ir.get_next_label(instr), 16) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = ExprLoc(loc_next, 16) e = [] - e.append(ExprAff(PC, ExprCond(cf, a, n))) - e.append(ExprAff(ir.IRDst, ExprCond(cf, a, n))) + e.append(ExprAff(PC, ExprCond(cf, a, loc_next_expr))) + e.append(ExprAff(ir.IRDst, ExprCond(cf, a, loc_next_expr))) return e, [] def jnc(ir, instr, a): - n = ExprId(ir.get_next_label(instr), 16) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = ExprLoc(loc_next, 16) e = [] - e.append(ExprAff(PC, ExprCond(cf, n, a))) - e.append(ExprAff(ir.IRDst, ExprCond(cf, n, a))) + e.append(ExprAff(PC, ExprCond(cf, loc_next_expr, a))) + e.append(ExprAff(ir.IRDst, ExprCond(cf, loc_next_expr, a))) return e, [] def jge(ir, instr, a): - n = ExprId(ir.get_next_label(instr), 16) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = ExprLoc(loc_next, 16) e = [] - e.append(ExprAff(PC, ExprCond(nf ^ of, n, a))) - e.append(ExprAff(ir.IRDst, ExprCond(nf ^ of, n, a))) + e.append(ExprAff(PC, ExprCond(nf ^ of, loc_next_expr, a))) + e.append(ExprAff(ir.IRDst, ExprCond(nf ^ of, loc_next_expr, a))) return e, [] diff --git a/miasm2/arch/ppc/arch.py b/miasm2/arch/ppc/arch.py index 945824a0..5336ea21 100644 --- a/miasm2/arch/ppc/arch.py +++ b/miasm2/arch/ppc/arch.py @@ -5,7 +5,6 @@ from miasm2.expression.expression import * from miasm2.core.cpu import * from collections import defaultdict from miasm2.core.bin_stream import bin_stream -from miasm2.core.asmblock import asm_label import miasm2.arch.ppc.regs as regs_module from miasm2.arch.ppc.regs import * from miasm2.core.asm_ast import AstInt, AstId, AstMem, AstOp @@ -41,8 +40,8 @@ class ppc_arg(m_arg): return arg.name if arg.name in gpregs.str: return None - label = symbol_pool.getby_name_create(arg.name) - return ExprId(label, 32) + loc_key = symbol_pool.getby_name_create(arg.name) + return ExprLoc(loc_key, 32) if isinstance(arg, AstOp): args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in arg.args] if None in args: @@ -74,7 +73,7 @@ class instruction_ppc(instruction): super(instruction_ppc, self).__init__(*args, **kargs) @staticmethod - def arg2str(e, pos = None): + def arg2str(e, pos = None, symbol_pool=None): if isinstance(e, ExprId) or isinstance(e, ExprInt): return str(e) elif isinstance(e, ExprMem): @@ -132,8 +131,8 @@ class instruction_ppc(instruction): ad = e.arg + self.offset else: ad = e.arg - l = symbol_pool.getby_offset_create(ad) - s = ExprId(l, e.size) + loc_key = symbol_pool.getby_offset_create(ad) + s = ExprLoc(loc_key, e.size) self.args[address_index] = s def breakflow(self): diff --git a/miasm2/arch/ppc/sem.py b/miasm2/arch/ppc/sem.py index 741ae24b..8ddb43ef 100644 --- a/miasm2/arch/ppc/sem.py +++ b/miasm2/arch/ppc/sem.py @@ -606,21 +606,21 @@ def mn_do_store(ir, instr, arg1, arg2, arg3=None): ret.append(ExprAff(arg2, address)) if is_stwcx: - lbl_do = ExprId(ir.gen_label(), ir.IRDst.size) - lbl_dont = ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = ExprId(ir.get_next_label(instr), ir.IRDst.size) + loc_do = ExprLoc(ir.symbol_pool.gen_loc_key(), ir.IRDst.size) + loc_dont = ExprLoc(ir.symbol_pool.gen_loc_key(), ir.IRDst.size) + loc_next = ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) flags = [ ExprAff(CR0_LT, ExprInt(0,1)), ExprAff(CR0_GT, ExprInt(0,1)), ExprAff(CR0_SO, XER_SO)] ret += flags ret.append(ExprAff(CR0_EQ, ExprInt(1,1))) - ret.append(ExprAff(ir.IRDst, lbl_next)) + ret.append(ExprAff(ir.IRDst, loc_next)) dont = flags + [ ExprAff(CR0_EQ, ExprInt(0,1)), - ExprAff(ir.IRDst, lbl_next) ] - additional_ir = [ IRBlock(lbl_do.name, [ AssignBlock(ret) ]), - IRBlock(lbl_dont.name, [ AssignBlock(dont) ]) ] + ExprAff(ir.IRDst, loc_next) ] + additional_ir = [ IRBlock(loc_do, [ AssignBlock(ret) ]), + IRBlock(loc_dont, [ AssignBlock(dont) ]) ] ret = [ ExprAff(reserve, ExprInt(0, 1)), - ExprAff(ir.IRDst, ExprCond(reserve, lbl_do, lbl_dont)) ] + ExprAff(ir.IRDst, ExprCond(reserve, loc_do, loc_dont)) ] return ret, additional_ir @@ -690,7 +690,8 @@ def mn_b(ir, instr, arg1, arg2 = None): def mn_bl(ir, instr, arg1, arg2 = None): if arg2 is not None: arg1 = arg2 - return [ ExprAff(LR, ExprId(ir.get_next_instr(instr), 32)), + dst = ir.get_next_instr(instr) + return [ ExprAff(LR, ExprLoc(dst, 32)), ExprAff(PC, arg1), ExprAff(ir.IRDst, arg1) ], [] @@ -726,13 +727,15 @@ def mn_do_cond_branch(ir, instr, dest): condition = condition & cond_cond else: condition = cond_cond + dst = ir.get_next_instr(instr) dest_expr = ExprCond(condition, dest, - ExprId(ir.get_next_instr(instr), 32)) + ExprLoc(dst, 32)) else: dest_expr = dest if instr.name[-1] == 'L' or instr.name[-2:-1] == 'LA': - ret.append(ExprAff(LR, ExprId(ir.get_next_instr(instr), 32))) + dst = ir.get_next_instr(instr) + ret.append(ExprAff(LR, ExprLoc(dst, 32))) ret.append(ExprAff(PC, dest_expr)) ret.append(ExprAff(ir.IRDst, dest_expr)) @@ -916,6 +919,6 @@ class ir_ppc32b(IntermediateRepresentation): l = self.symbol_pool.getby_offset_create(instr.offset + 4) return l - def get_next_break_label(self, instr): + def get_next_break_loc_key(self, instr): l = self.symbol_pool.getby_offset_create(instr.offset + 4) return l diff --git a/miasm2/arch/sh4/arch.py b/miasm2/arch/sh4/arch.py index 14f46265..477edeaf 100644 --- a/miasm2/arch/sh4/arch.py +++ b/miasm2/arch/sh4/arch.py @@ -102,8 +102,8 @@ class sh4_arg(m_arg): return arg.name if arg.name in gpregs.str: return None - label = symbol_pool.getby_name_create(arg.name) - return ExprId(label, 32) + loc_key = symbol_pool.getby_name_create(arg.name) + return ExprLoc(loc_key, 32) if isinstance(arg, AstOp): args = [self.asm_ast_to_expr(tmp, symbol_pool) for tmp in arg.args] if None in args: @@ -406,24 +406,29 @@ class instruction_sh4(instruction): return self.name.startswith('J') @staticmethod - def arg2str(e, pos = None): - if isinstance(e, ExprId) or isinstance(e, ExprInt): - return str(e) - assert(isinstance(e, ExprMem)) - e = e.arg - - if isinstance(e, ExprOp): - if e.op == "predec": - s = '-%s' % e.args[0] - elif e.op == "postinc": - s = '%s+' % e.args[0] + def arg2str(expr, index=None, symbol_pool=None): + if isinstance(expr, ExprId) or isinstance(expr, ExprInt): + return str(expr) + elif expr.is_loc(): + if symbol_pool is not None: + return symbol_pool.str_loc_key(expr.loc_key) + else: + return str(expr) + assert(isinstance(expr, ExprMem)) + expr = expr.arg + + if isinstance(expr, ExprOp): + if expr.op == "predec": + s = '-%s' % expr.args[0] + elif expr.op == "postinc": + s = '%s+' % expr.args[0] else: s = ','.join([str(x).replace('(', '').replace(')', '') - for x in e.args]) + for x in expr.args]) s = "(%s)"%s s = "@%s" % s - elif isinstance(e, ExprId): - s = "@%s" % e + elif isinstance(expr, ExprId): + s = "@%s" % expr else: raise NotImplementedError('zarb arg2str') return s diff --git a/miasm2/arch/x86/arch.py b/miasm2/arch/x86/arch.py index fc3a5882..2be64c0e 100644 --- a/miasm2/arch/x86/arch.py +++ b/miasm2/arch/x86/arch.py @@ -7,7 +7,6 @@ from miasm2.core.cpu import * from collections import defaultdict import miasm2.arch.x86.regs as regs_module from miasm2.arch.x86.regs import * -from miasm2.core.asmblock import AsmLabel from miasm2.core.asm_ast import AstNode, AstInt, AstId, AstMem, AstOp @@ -273,8 +272,8 @@ class x86_arg(m_arg): if value.name in ["FAR"]: return None - label = symbol_pool.getby_name_create(value.name) - return ExprId(label, size_hint) + loc_key = symbol_pool.getby_name_create(value.name) + return ExprLoc(loc_key, size_hint) if isinstance(value, AstOp): # First pass to retreive fixed_size if value.op == "segm": @@ -474,16 +473,11 @@ class instruction_x86(instruction): if self.additional_info.g1.value & 6 and self.name in repeat_mn: return expr = self.args[0] - if isinstance(expr, ExprId): - if not isinstance(expr.name, AsmLabel) and expr not in all_regs_ids: - raise ValueError("ExprId must be a label or a register") - elif isinstance(expr, ExprInt): - ad = expr.arg + int(self.offset) - l = symbol_pool.getby_offset_create(ad) - s = ExprId(l, expr.size) - self.args[0] = s - else: + if not expr.is_int(): return + addr = expr.arg + int(self.offset) + loc_key = symbol_pool.getby_offset_create(addr) + self.args[0] = ExprLoc(loc_key, expr.size) def breakflow(self): if self.name in conditional_branch + unconditional_branch: @@ -519,10 +513,9 @@ class instruction_x86(instruction): def getdstflow(self, symbol_pool): if self.additional_info.g1.value & 6 and self.name in repeat_mn: - ad = int(self.offset) - l = symbol_pool.getby_offset_create(ad) - s = ExprId(l, self.v_opmode()) - return [s] + addr = int(self.offset) + loc_key = symbol_pool.getby_offset_create(addr) + return [ExprLoc(loc_key, self.v_opmode())] return [self.args[0]] def get_symbol_size(self, symbol, symbol_pool): @@ -566,9 +559,14 @@ class instruction_x86(instruction): return args @staticmethod - def arg2str(expr, pos=None): - if isinstance(expr, ExprId) or isinstance(expr, ExprInt): + def arg2str(expr, index=None, symbol_pool=None): + if expr.is_id() or expr.is_int(): o = str(expr) + elif expr.is_loc(): + if symbol_pool is not None: + o = symbol_pool.str_loc_key(expr.loc_key) + else: + o = str(expr) elif ((isinstance(expr, ExprOp) and expr.op == 'far' and isinstance(expr.args[0], ExprMem)) or isinstance(expr, ExprMem)): diff --git a/miasm2/arch/x86/jit.py b/miasm2/arch/x86/jit.py index 50501060..a12a66f5 100644 --- a/miasm2/arch/x86/jit.py +++ b/miasm2/arch/x86/jit.py @@ -5,6 +5,7 @@ from miasm2.core import asmblock from miasm2.core.utils import pck16, pck32, pck64, upck16, upck32, upck64 from miasm2.arch.x86.sem import ir_x86_16, ir_x86_32, ir_x86_64 from miasm2.jitter.codegen import CGen +from miasm2.ir.translators.C import TranslatorC log = logging.getLogger('jit_x86') hnd = logging.StreamHandler() @@ -17,6 +18,7 @@ class x86_32_CGen(CGen): def __init__(self, ir_arch): self.ir_arch = ir_arch self.PC = self.ir_arch.arch.regs.RIP + self.translator = TranslatorC(self.ir_arch.symbol_pool) self.init_arch_C() def gen_post_code(self, attrib): diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index 276b796f..d53677be 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -240,11 +240,13 @@ def gen_jcc(ir, instr, cond, dst, jmp_if): e = [] meip = mRIP[ir.IRDst.size] - next_lbl = m2_expr.ExprId(ir.get_next_label(instr), dst.size) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = m2_expr.ExprLoc(loc_next, dst.size) + if jmp_if: - dstA, dstB = dst, next_lbl + dstA, dstB = dst, loc_next_expr else: - dstA, dstB = next_lbl, dst + dstA, dstB = loc_next_expr, dst mn_dst = m2_expr.ExprCond(cond, dstA.zeroExtend(ir.IRDst.size), dstB.zeroExtend(ir.IRDst.size)) @@ -260,17 +262,18 @@ def gen_fcmov(ir, instr, cond, arg1, arg2, mov_if): @cond: condition @mov_if: invert condition if False""" - lbl_do = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + loc_do, loc_do_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_skip = ir.get_next_loc_key(instr) + loc_skip_expr = m2_expr.ExprLoc(loc_skip, ir.IRDst.size) if mov_if: - dstA, dstB = lbl_do, lbl_skip + dstA, dstB = loc_do_expr, loc_skip_expr else: - dstA, dstB = lbl_skip, lbl_do + dstA, dstB = loc_skip_expr, loc_do_expr e = [] e_do, extra_irs = [m2_expr.ExprAff(arg1, arg2)], [] - e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip)) + e_do.append(m2_expr.ExprAff(ir.IRDst, loc_skip_expr)) e.append(m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(cond, dstA, dstB))) - return e, [IRBlock(lbl_do.name, [AssignBlock(e_do, instr)])] + return e, [IRBlock(loc_do, [AssignBlock(e_do, instr)])] def gen_cmov(ir, instr, cond, dst, src, mov_if): @@ -280,17 +283,18 @@ def gen_cmov(ir, instr, cond, dst, src, mov_if): @cond: condition @mov_if: invert condition if False""" - lbl_do = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + loc_do, loc_do_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_skip = ir.get_next_loc_key(instr) + loc_skip_expr = m2_expr.ExprLoc(loc_skip, ir.IRDst.size) if mov_if: - dstA, dstB = lbl_do, lbl_skip + dstA, dstB = loc_do_expr, loc_skip_expr else: - dstA, dstB = lbl_skip, lbl_do + dstA, dstB = loc_skip_expr, loc_do_expr e = [] e_do, extra_irs = mov(ir, instr, dst, src) - e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip)) + e_do.append(m2_expr.ExprAff(ir.IRDst, loc_skip_expr)) e.append(m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(cond, dstA, dstB))) - return e, [IRBlock(lbl_do.name, [AssignBlock(e_do, instr)])] + return e, [IRBlock(loc_do, [AssignBlock(e_do, instr)])] def mov(_, instr, dst, src): @@ -504,12 +508,14 @@ def _rotate_tpl(ir, instr, dst, src, op, left=False): else: return ([], []) e = [] - lbl_do = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) - e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip)) + loc_do, loc_do_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_skip = ir.get_next_loc_key(instr) + loc_skip_expr = m2_expr.ExprLoc(loc_skip, ir.IRDst.size) + + e_do.append(m2_expr.ExprAff(ir.IRDst, loc_skip_expr)) e.append(m2_expr.ExprAff( - ir.IRDst, m2_expr.ExprCond(shifter, lbl_do, lbl_skip))) - return (e, [IRBlock(lbl_do.name, [AssignBlock(e_do, instr)])]) + ir.IRDst, m2_expr.ExprCond(shifter, loc_do_expr, loc_skip_expr))) + return (e, [IRBlock(loc_do, [AssignBlock(e_do, instr)])]) def l_rol(ir, instr, dst, src): @@ -551,12 +557,14 @@ def rotate_with_carry_tpl(ir, instr, op, dst, src): else: return ([], []) e = [] - lbl_do = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) - e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip)) + loc_do, loc_do_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_skip = ir.get_next_loc_key(instr) + loc_skip_expr = m2_expr.ExprLoc(loc_skip, ir.IRDst.size) + + e_do.append(m2_expr.ExprAff(ir.IRDst, loc_skip_expr)) e.append(m2_expr.ExprAff( - ir.IRDst, m2_expr.ExprCond(shifter, lbl_do, lbl_skip))) - return (e, [IRBlock(lbl_do.name, [AssignBlock(e_do, instr)])]) + ir.IRDst, m2_expr.ExprCond(shifter, loc_do_expr, loc_skip_expr))) + return (e, [IRBlock(loc_do, [AssignBlock(e_do, instr)])]) def rcl(ir, instr, dst, src): return rotate_with_carry_tpl(ir, instr, '<<<', dst, src) @@ -638,12 +646,13 @@ def _shift_tpl(op, ir, instr, a, b, c=None, op_inv=None, left=False, return [], [] e = [] - lbl_do = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_skip = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) - e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_skip)) - e.append(m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(shifter, lbl_do, - lbl_skip))) - return e, [IRBlock(lbl_do.name, [AssignBlock(e_do, instr)])] + loc_do, loc_do_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_skip = ir.get_next_loc_key(instr) + loc_skip_expr = m2_expr.ExprLoc(loc_skip, ir.IRDst.size) + e_do.append(m2_expr.ExprAff(ir.IRDst, loc_skip_expr)) + e.append(m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(shifter, loc_do_expr, + loc_skip_expr))) + return e, [IRBlock(loc_do, [AssignBlock(e_do, instr)])] def sar(ir, instr, dst, src): @@ -973,9 +982,9 @@ def bswap(_, instr, dst): def cmps(ir, instr, size): - lbl_df_0 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_df_1 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + loc_df_0, loc_df_0_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_df_1, loc_df_1_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_next_expr = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) src1 = mRSI[instr.mode][:instr.v_admode()] src2 = mRDI[instr.mode][:instr.v_admode()] @@ -999,24 +1008,24 @@ def cmps(ir, instr, size): e0 = [] e0.append(m2_expr.ExprAff(src1, src1 + offset)) e0.append(m2_expr.ExprAff(src2, src2 + offset)) - e0.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e0 = IRBlock(lbl_df_0.name, [AssignBlock(e0, instr)]) + e0.append(m2_expr.ExprAff(ir.IRDst, loc_next_expr)) + e0 = IRBlock(loc_df_0, [AssignBlock(e0, instr)]) e1 = [] e1.append(m2_expr.ExprAff(src1, src1 - offset)) e1.append(m2_expr.ExprAff(src2, src2 - offset)) - e1.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e1 = IRBlock(lbl_df_1.name, [AssignBlock(e1, instr)]) + e1.append(m2_expr.ExprAff(ir.IRDst, loc_next_expr)) + e1 = IRBlock(loc_df_1, [AssignBlock(e1, instr)]) e.append(m2_expr.ExprAff(ir.IRDst, - m2_expr.ExprCond(df, lbl_df_1, lbl_df_0))) + m2_expr.ExprCond(df, loc_df_1_expr, loc_df_0_expr))) return e, [e0, e1] def scas(ir, instr, size): - lbl_df_0 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_df_1 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + loc_df_0, loc_df_0_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_df_1, loc_df_1_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_next_expr = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) src = mRDI[instr.mode][:instr.v_admode()] @@ -1036,16 +1045,16 @@ def scas(ir, instr, size): e0 = [] e0.append(m2_expr.ExprAff(src, src + offset)) - e0.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e0 = IRBlock(lbl_df_0.name, [AssignBlock(e0, instr)]) + e0.append(m2_expr.ExprAff(ir.IRDst, loc_next_expr)) + e0 = IRBlock(loc_df_0, [AssignBlock(e0, instr)]) e1 = [] e1.append(m2_expr.ExprAff(src, src - offset)) - e1.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e1 = IRBlock(lbl_df_1.name, [AssignBlock(e1, instr)]) + e1.append(m2_expr.ExprAff(ir.IRDst, loc_next_expr)) + e1 = IRBlock(loc_df_1, [AssignBlock(e1, instr)]) e.append(m2_expr.ExprAff(ir.IRDst, - m2_expr.ExprCond(df, lbl_df_1, lbl_df_0))) + m2_expr.ExprCond(df, loc_df_1_expr, loc_df_0_expr))) return e, [e0, e1] @@ -1185,7 +1194,7 @@ def call(ir, instr, dst): meip = mRIP[ir.IRDst.size] opmode, admode = s, instr.v_admode() myesp = mRSP[instr.mode][:opmode] - n = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + n = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) if isinstance(dst, m2_expr.ExprOp): if dst.op == "segm": @@ -1229,8 +1238,6 @@ def call(ir, instr, dst): e.append(m2_expr.ExprAff(ir.ExprMem(c, size=s), n)) e.append(m2_expr.ExprAff(meip, dst.zeroExtend(ir.IRDst.size))) e.append(m2_expr.ExprAff(ir.IRDst, dst.zeroExtend(ir.IRDst.size))) - # if not expr_is_int_or_label(dst): - # dst = meip return e, [] @@ -1432,7 +1439,7 @@ def loop(ir, instr, dst): admode = instr.v_admode() myecx = mRCX[instr.mode][:admode] - n = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + n = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) c = myecx - m2_expr.ExprInt(1, myecx.size) dst_o = m2_expr.ExprCond(c, dst.zeroExtend(ir.IRDst.size), @@ -1449,7 +1456,7 @@ def loopne(ir, instr, dst): admode = instr.v_admode() myecx = mRCX[instr.mode][:admode] - n = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + n = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) c = m2_expr.ExprCond(myecx - m2_expr.ExprInt(1, size=myecx.size), m2_expr.ExprInt(1, 1), @@ -1471,7 +1478,7 @@ def loope(ir, instr, dst): admode = instr.v_admode() myecx = mRCX[instr.mode][:admode] - n = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + n = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) c = m2_expr.ExprCond(myecx - m2_expr.ExprInt(1, size=myecx.size), m2_expr.ExprInt(1, 1), m2_expr.ExprInt(0, 1)) @@ -1508,24 +1515,25 @@ def div(ir, instr, src1): e.append(m2_expr.ExprAff(s1, c_r[:size])) e.append(m2_expr.ExprAff(s2, c_d[:size])) - lbl_div = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_except = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + loc_div, loc_div_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_except, loc_except_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = m2_expr.ExprLoc(loc_next, ir.IRDst.size) do_div = [] do_div += e - do_div.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - blk_div = IRBlock(lbl_div.name, [AssignBlock(do_div, instr)]) + do_div.append(m2_expr.ExprAff(ir.IRDst, loc_next_expr)) + blk_div = IRBlock(loc_div, [AssignBlock(do_div, instr)]) do_except = [] do_except.append(m2_expr.ExprAff(exception_flags, m2_expr.ExprInt( EXCEPT_DIV_BY_ZERO, exception_flags.size))) - do_except.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - blk_except = IRBlock(lbl_except.name, [AssignBlock(do_except, instr)]) + do_except.append(m2_expr.ExprAff(ir.IRDst, loc_next_expr)) + blk_except = IRBlock(loc_except, [AssignBlock(do_except, instr)]) e = [] e.append(m2_expr.ExprAff(ir.IRDst, - m2_expr.ExprCond(src1, lbl_div, lbl_except))) + m2_expr.ExprCond(src1, loc_div_expr, loc_except_expr))) return e, [blk_div, blk_except] @@ -1554,24 +1562,25 @@ def idiv(ir, instr, src1): e.append(m2_expr.ExprAff(s1, c_r[:size])) e.append(m2_expr.ExprAff(s2, c_d[:size])) - lbl_div = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_except = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + loc_div, loc_div_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_except, loc_except_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = m2_expr.ExprLoc(loc_next, ir.IRDst.size) do_div = [] do_div += e - do_div.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - blk_div = IRBlock(lbl_div.name, [AssignBlock(do_div, instr)]) + do_div.append(m2_expr.ExprAff(ir.IRDst, loc_next_expr)) + blk_div = IRBlock(loc_div, [AssignBlock(do_div, instr)]) do_except = [] do_except.append(m2_expr.ExprAff(exception_flags, m2_expr.ExprInt( EXCEPT_DIV_BY_ZERO, exception_flags.size))) - do_except.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - blk_except = IRBlock(lbl_except.name, [AssignBlock(do_except, instr)]) + do_except.append(m2_expr.ExprAff(ir.IRDst, loc_next_expr)) + blk_except = IRBlock(loc_except, [AssignBlock(do_except, instr)]) e = [] e.append(m2_expr.ExprAff(ir.IRDst, - m2_expr.ExprCond(src1, lbl_div, lbl_except))) + m2_expr.ExprCond(src1, loc_div_expr, loc_except_expr))) return e, [blk_div, blk_except] @@ -1713,9 +1722,9 @@ def cqo(_, instr): def stos(ir, instr, size): - lbl_df_0 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_df_1 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + loc_df_0, loc_df_0_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_df_1, loc_df_1_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_next_expr = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) addr_o = mRDI[instr.mode][:instr.v_admode()] addr = addr_o @@ -1732,25 +1741,25 @@ def stos(ir, instr, size): e0 = [] e0.append(m2_expr.ExprAff(addr_o, addr_p)) - e0.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e0 = IRBlock(lbl_df_0.name, [AssignBlock(e0, instr)]) + e0.append(m2_expr.ExprAff(ir.IRDst, loc_next_expr)) + e0 = IRBlock(loc_df_0, [AssignBlock(e0, instr)]) e1 = [] e1.append(m2_expr.ExprAff(addr_o, addr_m)) - e1.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e1 = IRBlock(lbl_df_1.name, [AssignBlock(e1, instr)]) + e1.append(m2_expr.ExprAff(ir.IRDst, loc_next_expr)) + e1 = IRBlock(loc_df_1, [AssignBlock(e1, instr)]) e = [] e.append(m2_expr.ExprAff(ir.ExprMem(addr, size), b)) e.append(m2_expr.ExprAff(ir.IRDst, - m2_expr.ExprCond(df, lbl_df_1, lbl_df_0))) + m2_expr.ExprCond(df, loc_df_1_expr, loc_df_0_expr))) return e, [e0, e1] def lods(ir, instr, size): - lbl_df_0 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_df_1 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + loc_df_0, loc_df_0_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_df_1, loc_df_1_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_next_expr = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) e = [] addr_o = mRSI[instr.mode][:instr.v_admode()] @@ -1768,13 +1777,13 @@ def lods(ir, instr, size): e0 = [] e0.append(m2_expr.ExprAff(addr_o, addr_p)) - e0.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e0 = IRBlock(lbl_df_0.name, [AssignBlock(e0, instr)]) + e0.append(m2_expr.ExprAff(ir.IRDst, loc_next_expr)) + e0 = IRBlock(loc_df_0, [AssignBlock(e0, instr)]) e1 = [] e1.append(m2_expr.ExprAff(addr_o, addr_m)) - e1.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e1 = IRBlock(lbl_df_1.name, [AssignBlock(e1, instr)]) + e1.append(m2_expr.ExprAff(ir.IRDst, loc_next_expr)) + e1 = IRBlock(loc_df_1, [AssignBlock(e1, instr)]) e = [] if instr.mode == 64 and b.size == 32: @@ -1784,14 +1793,14 @@ def lods(ir, instr, size): e.append(m2_expr.ExprAff(b, ir.ExprMem(addr, size))) e.append(m2_expr.ExprAff(ir.IRDst, - m2_expr.ExprCond(df, lbl_df_1, lbl_df_0))) + m2_expr.ExprCond(df, loc_df_1_expr, loc_df_0_expr))) return e, [e0, e1] def movs(ir, instr, size): - lbl_df_0 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_df_1 = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + loc_df_0, loc_df_0_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_df_1, loc_df_1_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_next_expr = m2_expr.ExprLoc(ir.get_next_loc_key(instr), ir.IRDst.size) dst = mRDI[instr.mode][:instr.v_admode()] src = mRSI[instr.mode][:instr.v_admode()] @@ -1815,17 +1824,17 @@ def movs(ir, instr, size): e0 = [] e0.append(m2_expr.ExprAff(src, src + offset)) e0.append(m2_expr.ExprAff(dst, dst + offset)) - e0.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e0 = IRBlock(lbl_df_0.name, [AssignBlock(e0, instr)]) + e0.append(m2_expr.ExprAff(ir.IRDst, loc_next_expr)) + e0 = IRBlock(loc_df_0, [AssignBlock(e0, instr)]) e1 = [] e1.append(m2_expr.ExprAff(src, src - offset)) e1.append(m2_expr.ExprAff(dst, dst - offset)) - e1.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - e1 = IRBlock(lbl_df_1.name, [AssignBlock(e1, instr)]) + e1.append(m2_expr.ExprAff(ir.IRDst, loc_next_expr)) + e1 = IRBlock(loc_df_1, [AssignBlock(e1, instr)]) e.append(m2_expr.ExprAff(ir.IRDst, - m2_expr.ExprCond(df, lbl_df_1, lbl_df_0))) + m2_expr.ExprCond(df, loc_df_1_expr, loc_df_0_expr))) return e, [e0, e1] @@ -2876,14 +2885,15 @@ def bsr_bsf(ir, instr, dst, src, op_func): ZF = 0 DEST = @op_func(SRC) """ - lbl_src_null = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_src_not_null = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + loc_src_null, loc_src_null_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_src_not_null, loc_src_not_null_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = m2_expr.ExprLoc(loc_next, ir.IRDst.size) - aff_dst = m2_expr.ExprAff(ir.IRDst, lbl_next) + aff_dst = m2_expr.ExprAff(ir.IRDst, loc_next_expr) e = [m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(src, - lbl_src_not_null, - lbl_src_null))] + loc_src_not_null_expr, + loc_src_null_expr))] e_src_null = [] e_src_null.append(m2_expr.ExprAff(zf, m2_expr.ExprInt(1, zf.size))) # XXX destination is undefined @@ -2894,8 +2904,8 @@ def bsr_bsf(ir, instr, dst, src, op_func): e_src_not_null.append(m2_expr.ExprAff(dst, op_func(src))) e_src_not_null.append(aff_dst) - return e, [IRBlock(lbl_src_null.name, [AssignBlock(e_src_null, instr)]), - IRBlock(lbl_src_not_null.name, [AssignBlock(e_src_not_null, instr)])] + return e, [IRBlock(loc_src_null, [AssignBlock(e_src_null, instr)]), + IRBlock(loc_src_not_null, [AssignBlock(e_src_not_null, instr)])] def bsf(ir, instr, dst, src): @@ -3925,9 +3935,10 @@ def pshufd(_, instr, dst, src, imm): def ps_rl_ll(ir, instr, dst, src, op, size): - lbl_zero = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_do = m2_expr.ExprId(ir.gen_label(), ir.IRDst.size) - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + loc_zero, loc_zero_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_do, loc_do_expr = ir.gen_loc_key_and_expr(ir.IRDst.size) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = m2_expr.ExprLoc(loc_next, ir.IRDst.size) if src.size == 8: count = src.zeroExtend(dst.size) @@ -3940,8 +3951,8 @@ def ps_rl_ll(ir, instr, dst, src, op, size): test = expr_simp(count & m2_expr.ExprInt( ((1 << dst.size) - 1) ^ mask, dst.size)) e = [m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(test, - lbl_zero, - lbl_do))] + loc_zero_expr, + loc_do_expr))] slices = [] for i in xrange(0, dst.size, size): @@ -3954,12 +3965,12 @@ def ps_rl_ll(ir, instr, dst, src, op, size): return [m2_expr.ExprAff(dst, m2_expr.ExprInt(0, dst.size))], [] e_zero = [m2_expr.ExprAff(dst, m2_expr.ExprInt(0, dst.size)), - m2_expr.ExprAff(ir.IRDst, lbl_next)] + m2_expr.ExprAff(ir.IRDst, loc_next_expr)] e_do = [] e.append(m2_expr.ExprAff(dst[0:dst.size], m2_expr.ExprCompose(*slices))) - e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) - return e, [IRBlock(lbl_do.name, [AssignBlock(e_do, instr)]), - IRBlock(lbl_zero.name, [AssignBlock(e_zero, instr)])] + e_do.append(m2_expr.ExprAff(ir.IRDst, loc_next_expr)) + return e, [IRBlock(loc_do, [AssignBlock(e_do, instr)]), + IRBlock(loc_zero, [AssignBlock(e_zero, instr)])] def psrlw(ir, instr, dst, src): @@ -4463,7 +4474,8 @@ paddsw = vec_vertical_instr('+', 16, _saturation_add_signed) # Others SSE operations def maskmovq(ir, instr, src, mask): - lbl_next = m2_expr.ExprId(ir.get_next_label(instr), ir.IRDst.size) + loc_next = ir.get_next_loc_key(instr) + loc_next_expr = m2_expr.ExprLoc(loc_next, ir.IRDst.size) blks = [] # For each possibility, check if a write is necessary @@ -4477,32 +4489,32 @@ def maskmovq(ir, instr, src, mask): for i, start in enumerate(xrange(0, mask.size, 8)): bit = mask[start + 7: start + 8] cur_label = check_labels[i] - next_check_label = check_labels[i + 1] if (i + 1) < len(check_labels) else lbl_next + next_check_label = check_labels[i + 1] if (i + 1) < len(check_labels) else loc_next_expr write_label = write_labels[i] check = m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(bit, write_label, next_check_label)) - blks.append(IRBlock(cur_label.name, [AssignBlock([check], instr)])) + blks.append(IRBlock(cur_label.name.loc_key, [AssignBlock([check], instr)])) # Build write blocks dst_addr = mRDI[instr.mode] for i, start in enumerate(xrange(0, mask.size, 8)): bit = mask[start + 7: start + 8] cur_label = write_labels[i] - next_check_label = check_labels[i + 1] if (i + 1) < len(check_labels) else lbl_next + next_check_label = check_labels[i + 1] if (i + 1) < len(check_labels) else loc_next_expr write_addr = dst_addr + m2_expr.ExprInt(i, dst_addr.size) # @8[DI/EDI/RDI + i] = src[byte i] write_mem = m2_expr.ExprAff(m2_expr.ExprMem(write_addr, 8), src[start: start + 8]) jump = m2_expr.ExprAff(ir.IRDst, next_check_label) - blks.append(IRBlock(cur_label.name, [AssignBlock([write_mem, jump], instr)])) + blks.append(IRBlock(cur_label.name.loc_key, [AssignBlock([write_mem, jump], instr)])) # If mask is null, bypass all e = [m2_expr.ExprAff(ir.IRDst, m2_expr.ExprCond(mask, check_labels[0], - lbl_next))] + loc_next_expr))] return e, blks @@ -5145,13 +5157,15 @@ class ir_x86_16(IntermediateRepresentation): c_cond = cond_dec | (zf ^ m2_expr.ExprInt(1, 1)) # gen while - lbl_do = m2_expr.ExprId(self.gen_label(), self.IRDst.size) - lbl_end = m2_expr.ExprId(self.gen_label(), self.IRDst.size) - lbl_skip = m2_expr.ExprId(self.get_next_label(instr), self.IRDst.size) - lbl_next = m2_expr.ExprId(self.get_next_label(instr), self.IRDst.size) - - fix_next_lbl = {lbl_next: lbl_end} - new_extra_ir = [irblock.modify_exprs(mod_src=lambda expr: expr.replace_expr(fix_next_lbl)) + loc_do, loc_do_expr = self.gen_loc_key_and_expr(self.IRDst.size) + loc_end, loc_end_expr = self.gen_loc_key_and_expr(self.IRDst.size) + loc_skip = self.get_next_loc_key(instr) + loc_skip_expr = m2_expr.ExprLoc(loc_skip, self.IRDst.size) + loc_next = self.get_next_loc_key(instr) + loc_next_expr = m2_expr.ExprLoc(loc_next, self.IRDst.size) + + fix_next_loc = {loc_next_expr: loc_end_expr} + new_extra_ir = [irblock.modify_exprs(mod_src=lambda expr: expr.replace_expr(fix_next_loc)) for irblock in extra_ir] cond_bloc = [] @@ -5159,14 +5173,14 @@ class ir_x86_16(IntermediateRepresentation): c_reg - m2_expr.ExprInt(1, c_reg.size))) cond_bloc.append(m2_expr.ExprAff(self.IRDst, m2_expr.ExprCond(c_cond, - lbl_skip, - lbl_do))) - cond_bloc = IRBlock(lbl_end.name, [AssignBlock(cond_bloc, instr)]) + loc_skip_expr, + loc_do_expr))) + cond_bloc = IRBlock(loc_end, [AssignBlock(cond_bloc, instr)]) e_do = instr_ir - c = IRBlock(lbl_do.name, [AssignBlock(e_do, instr)]) - e_n = [m2_expr.ExprAff(self.IRDst, m2_expr.ExprCond(c_reg, lbl_do, - lbl_skip))] + c = IRBlock(loc_do, [AssignBlock(e_do, instr)]) + e_n = [m2_expr.ExprAff(self.IRDst, m2_expr.ExprCond(c_reg, loc_do_expr, + loc_skip_expr))] return e_n, [cond_bloc, c] + new_extra_ir def expr_fix_regs_for_mode(self, e, mode=64): @@ -5195,7 +5209,7 @@ class ir_x86_16(IntermediateRepresentation): src = self.expr_fix_regs_for_mode(src, mode) new_assignblk[dst] = src irs.append(AssignBlock(new_assignblk, assignblk.instr)) - return IRBlock(irblock.label, irs) + return IRBlock(irblock.loc_key, irs) class ir_x86_32(ir_x86_16): diff --git a/miasm2/core/asmblock.py b/miasm2/core/asmblock.py index f763d85f..1f54a7e4 100644 --- a/miasm2/core/asmblock.py +++ b/miasm2/core/asmblock.py @@ -1,11 +1,12 @@ #-*- coding:utf-8 -*- import logging -import inspect import warnings from collections import namedtuple -import miasm2.expression.expression as m2_expr +from miasm2.expression.expression import ExprId, ExprInt, ExprLoc, \ + get_expr_locs +from miasm2.expression.expression import LocKey from miasm2.expression.simplifications import expr_simp from miasm2.expression.modint import moduint, modint from miasm2.core.utils import Disasm_Exception, pck @@ -25,49 +26,6 @@ def is_int(a): isinstance(a, moduint) or isinstance(a, modint) -def expr_is_label(e): - return isinstance(e, m2_expr.ExprId) and isinstance(e.name, AsmLabel) - - -def expr_is_int_or_label(e): - return isinstance(e, m2_expr.ExprInt) or \ - (isinstance(e, m2_expr.ExprId) and isinstance(e.name, AsmLabel)) - - -class AsmLabel(object): - - "Stand for an assembly label" - - def __init__(self, name="", offset=None): - self.fixedblocs = False - if is_int(name): - name = "loc_%.16X" % (int(name) & 0xFFFFFFFFFFFFFFFF) - self.name = name - self.attrib = None - if offset is None: - self.offset = None - else: - self.offset = int(offset) - - def __str__(self): - if isinstance(self.offset, (int, long)): - return "%s:0x%08x" % (self.name, self.offset) - else: - return "%s:%s" % (self.name, str(self.offset)) - - def __repr__(self): - rep = '<%s ' % self.__class__.__name__ - if self.name: - rep += repr(self.name) + ' ' - rep += '>' - return rep - - -class asm_label(AsmLabel): - - def __init__(self, name="", offset=None): - warnings.warn('DEPRECATION WARNING: use "AsmLabel" instead of "asm_label"') - super(asm_label, self).__init__(name, offset) class AsmRaw(object): @@ -77,6 +35,9 @@ class AsmRaw(object): def __str__(self): return repr(self.raw) + def to_string(self, symbol_pool): + return str(self) + class asm_raw(AsmRaw): @@ -89,63 +50,86 @@ class AsmConstraint(object): c_to = "c_to" c_next = "c_next" - def __init__(self, label, c_t=c_to): + def __init__(self, loc_key, c_t=c_to): # Sanity check - assert isinstance(label, AsmLabel) + assert isinstance(loc_key, LocKey) - self.label = label + self.loc_key = loc_key self.c_t = c_t + def get_label(self): + warnings.warn('DEPRECATION WARNING: use ".loc_key" instead of ".label"') + return self.loc_key + + def set_label(self, loc_key): + warnings.warn('DEPRECATION WARNING: use ".loc_key" instead of ".label"') + self.loc_key = loc_key + + label = property(get_label, set_label) + def __str__(self): - return "%s:%s" % (str(self.c_t), str(self.label)) + return "%s:%s" % (str(self.c_t), str(self.loc_key)) class asm_constraint(AsmConstraint): - def __init__(self, label, c_t=AsmConstraint.c_to): + def __init__(self, loc_key, c_t=AsmConstraint.c_to): warnings.warn('DEPRECATION WARNING: use "AsmConstraint" instead of "asm_constraint"') - super(asm_constraint, self).__init__(label, c_t) + super(asm_constraint, self).__init__(loc_key, c_t) class AsmConstraintNext(AsmConstraint): - def __init__(self, label): + def __init__(self, loc_key): super(AsmConstraintNext, self).__init__( - label, c_t=AsmConstraint.c_next) + loc_key, + c_t=AsmConstraint.c_next + ) class asm_constraint_next(AsmConstraint): - def __init__(self, label): + def __init__(self, loc_key): warnings.warn('DEPRECATION WARNING: use "AsmConstraintNext" instead of "asm_constraint_next"') - super(asm_constraint_next, self).__init__(label) + super(asm_constraint_next, self).__init__(loc_key) class AsmConstraintTo(AsmConstraint): - def __init__(self, label): + def __init__(self, loc_key): super(AsmConstraintTo, self).__init__( - label, c_t=AsmConstraint.c_to) + loc_key, + c_t=AsmConstraint.c_to + ) class asm_constraint_to(AsmConstraint): - def __init__(self, label): + def __init__(self, loc_key): warnings.warn('DEPRECATION WARNING: use "AsmConstraintTo" instead of "asm_constraint_to"') - super(asm_constraint_to, self).__init__(label) + super(asm_constraint_to, self).__init__(loc_key) class AsmBlock(object): - def __init__(self, label, alignment=1): - assert isinstance(label, AsmLabel) + def __init__(self, loc_key, alignment=1): + assert isinstance(loc_key, LocKey) + self.bto = set() self.lines = [] - self.label = label + self._loc_key = loc_key self.alignment = alignment + def get_label(self): + warnings.warn('DEPRECATION WARNING: use ".loc_key" instead of ".label"') + return self.loc_key + + loc_key = property(lambda self:self._loc_key) + label = property(get_label) + + def __str__(self): out = [] - out.append(str(self.label)) + out.append(str(self.loc_key)) for l in self.lines: out.append(str(l)) if self.bto: @@ -166,23 +150,25 @@ class AsmBlock(object): assert isinstance(self.bto, set) self.bto.add(c) - def split(self, offset, l): + def split(self, symbol_pool, offset): + loc_key = symbol_pool.getby_offset_create(offset) log_asmblock.debug('split at %x', offset) i = -1 offsets = [x.offset for x in self.lines] - if not l.offset in offsets: + offset = symbol_pool.loc_key_to_offset(loc_key) + if offset not in offsets: log_asmblock.warning( 'cannot split bloc at %X ' % offset + 'middle instruction? default middle') offsets.sort() return None - new_bloc = AsmBlock(l) + new_bloc = AsmBlock(loc_key) i = offsets.index(offset) self.lines, new_bloc.lines = self.lines[:i], self.lines[i:] flow_mod_instr = self.get_flow_instr() log_asmblock.debug('flow mod %r', flow_mod_instr) - c = AsmConstraint(l, AsmConstraint.c_next) + c = AsmConstraint(loc_key, AsmConstraint.c_next) # move dst if flowgraph modifier was in original bloc # (usecase: split delayslot bloc) if flow_mod_instr: @@ -209,16 +195,9 @@ class AsmBlock(object): def get_offsets(self): return [x.offset for x in self.lines] - def add_cst(self, offset, c_t, symbol_pool): - if isinstance(offset, (int, long)): - l = symbol_pool.getby_offset_create(offset) - elif isinstance(offset, str): - l = symbol_pool.getby_name_create(offset) - elif isinstance(offset, AsmLabel): - l = offset - else: - raise ValueError('unknown offset type %r' % offset) - c = AsmConstraint(l, c_t) + def add_cst(self, loc_key, c_t, symbol_pool): + assert isinstance(loc_key, LocKey) + c = AsmConstraint(loc_key, c_t) self.bto.add(c) def get_flow_instr(self): @@ -244,9 +223,9 @@ class AsmBlock(object): return None def get_next(self): - for x in self.bto: - if x.c_t == AsmConstraint.c_next: - return x.label + for constraint in self.bto: + if constraint.c_t == AsmConstraint.c_next: + return constraint.loc_key return None @staticmethod @@ -279,7 +258,7 @@ class AsmBlock(object): # destination -> associated constraints dests = {} for constraint in self.bto: - dests.setdefault(constraint.label, set()).add(constraint) + dests.setdefault(constraint.loc_key, set()).add(constraint) self.bto = set(self._filter_constraint(constraints) for constraints in dests.itervalues()) @@ -287,9 +266,9 @@ class AsmBlock(object): class asm_bloc(object): - def __init__(self, label, alignment=1): + def __init__(self, loc_key, alignment=1): warnings.warn('DEPRECATION WARNING: use "AsmBlock" instead of "asm_bloc"') - super(asm_bloc, self).__init__(label, alignment) + super(asm_bloc, self).__init__(loc_key, alignment) class AsmBlockBad(AsmBlock): @@ -313,19 +292,19 @@ class AsmBlockBad(AsmBlock): ERROR_IO: "IOError", } - def __init__(self, label=None, alignment=1, errno=ERROR_UNKNOWN, *args, **kwargs): + def __init__(self, loc_key=None, alignment=1, errno=ERROR_UNKNOWN, *args, **kwargs): """Instanciate an AsmBlock_bad. - @label, @alignement: same as AsmBlock.__init__ + @loc_key, @alignement: same as AsmBlock.__init__ @errno: (optional) specify a error type associated with the block """ - super(AsmBlockBad, self).__init__(label, alignment, *args, **kwargs) + super(AsmBlockBad, self).__init__(loc_key, alignment, *args, **kwargs) self._errno = errno errno = property(lambda self: self._errno) def __str__(self): error_txt = self.ERROR_TYPES.get(self._errno, self._errno) - return "\n".join([str(self.label), + return "\n".join([str(self.loc_key), "\tBad block: %s" % error_txt]) def addline(self, *args, **kwargs): @@ -340,9 +319,9 @@ class AsmBlockBad(AsmBlock): class asm_block_bad(AsmBlockBad): - def __init__(self, label=None, alignment=1, errno=-1, *args, **kwargs): + def __init__(self, loc_key=None, alignment=1, errno=-1, *args, **kwargs): warnings.warn('DEPRECATION WARNING: use "AsmBlockBad" instead of "asm_block_bad"') - super(asm_block_bad, self).__init__(label, alignment, *args, **kwargs) + super(asm_block_bad, self).__init__(loc_key, alignment, *args, **kwargs) class AsmSymbolPool(object): @@ -359,132 +338,212 @@ class AsmSymbolPool(object): """ def __init__(self): - self._labels = set() - self._name2label = {} - self._offset2label = {} - self._label_num = 0 + self._loc_keys = set() + + self._loc_key_to_offset = {} + self._loc_key_to_name = {} + + self._name_to_loc_key = {} + self._offset_to_loc_key = {} + + self._loc_key_num = 0 + + def loc_key_to_offset(self, loc_key): + """ + Return offset of @loc_key, None otherwise. + @loc_key: LocKey instance + """ + return self._loc_key_to_offset.get(loc_key) + + def loc_key_to_name(self, loc_key): + """ + Return name of @loc_key. + @loc_key: LocKey instance + """ + return self._loc_key_to_name[loc_key] - def add_label(self, name, offset=None): + def add_location(self, name, offset=None): """ - Create and add a label to the symbol_pool - @name: label's name - @offset: (optional) label's offset + Create and add a location to the symbol_pool + @name: loc_key's name (never empty). If offset is None and name is int, + generate loc_key with generic name and name as offset + @offset: (optional) loc_key's offset """ - label = AsmLabel(name, offset) + + if is_int(name): + assert offset is None or offset == name + offset = name + name = "loc_%.16X" % (int(name) & 0xFFFFFFFFFFFFFFFF) + if offset is not None: + offset = int(offset) + + assert name # Test for collisions - if (label.offset in self._offset2label and - label != self._offset2label[label.offset]): - raise ValueError('symbol %s has same offset as %s' % - (label, self._offset2label[label.offset])) - if (label.name in self._name2label and - label != self._name2label[label.name]): - raise ValueError('symbol %s has same name as %s' % - (label, self._name2label[label.name])) - - self._labels.add(label) - if label.offset is not None: - self._offset2label[label.offset] = label - if label.name != "": - self._name2label[label.name] = label - return label - - def remove_label(self, label): + known_loc_key = self.getby_name(name) + if known_loc_key is not None: + known_offset = self.loc_key_to_offset(known_loc_key) + if known_offset != offset: + raise ValueError( + 'symbol %s with different offset %s %s' % ( + name, offset, known_offset + ) + ) + return known_loc_key + + elif self.getby_offset(offset) is not None: + raise ValueError( + 'offset %s with different names %s' % ( + offset, + name + ) + ) + + loc_key = LocKey(self._loc_key_num) + self._loc_key_num += 1 + + self._loc_keys.add(loc_key) + + if offset is not None: + assert offset not in self._offset_to_loc_key + self._offset_to_loc_key[offset] = loc_key + self._loc_key_to_offset[loc_key] = offset + + self._name_to_loc_key[name] = loc_key + self._loc_key_to_name[loc_key] = name + return loc_key + + def remove_loc_key(self, loc_key): """ - Delete a @label + Delete a @loc_key """ - self._name2label.pop(label.name, None) - self._offset2label.pop(label.offset, None) - if label in self._labels: - self._labels.remove(label) + name = self._loc_key_to_name.pop(loc_key, None) + self._name_to_loc_key.pop(name, None) - def del_label_offset(self, label): - """Unpin the @label from its offset""" - self._offset2label.pop(label.offset, None) - label.offset = None + offset = self._loc_key_to_offset.pop(loc_key, None) + self._offset_to_loc_key.pop(offset, None) + + self._loc_keys.remove(loc_key) + + def del_loc_key_offset(self, loc_key): + """Unpin the @loc_key from its offset""" + offset = self._loc_keys_to_offset.pop(loc_key) + self._offset_to_loc_key.pop(offset, None) def getby_offset(self, offset): - """Retrieve label using its @offset""" - return self._offset2label.get(offset, None) + """ + Retrieve loc_key using its @offset, None otherwise. + @offset: int + """ + return self._offset_to_loc_key.get(offset) def getby_name(self, name): - """Retrieve label using its @name""" - return self._name2label.get(name, None) + """ + Retrieve loc_key using its @name, None otherwise. + @name: str + """ + return self._name_to_loc_key.get(name) def getby_name_create(self, name): - """Get a label from its @name, create it if it doesn't exist""" - label = self.getby_name(name) - if label is None: - label = self.add_label(name) - return label + """Get a loc_key from its @name, create it if it doesn't exist""" + loc_key = self.getby_name(name) + if loc_key is None: + loc_key = self.add_location(name) + return loc_key def getby_offset_create(self, offset): - """Get a label from its @offset, create it if it doesn't exist""" - label = self.getby_offset(offset) - if label is None: - label = self.add_label(offset, offset) - return label - - def rename_label(self, label, newname): - """Rename the @label name to @newname""" - if newname in self._name2label: + """Get a loc_key from its @offset, create it if it doesn't exist""" + loc_key = self.getby_offset(offset) + if loc_key is None: + loc_key = self.add_location(offset) + return loc_key + + def rename_location(self, loc_key, newname): + """Rename the @loc_key name to @newname""" + if newname in self._name_to_loc_key: raise ValueError('Symbol already known') - self._name2label.pop(label.name, None) - label.name = newname - self._name2label[label.name] = label + name = self._loc_key_to_name[loc_key] + assert name is not None + self._name_to_loc_key.pop(name) + self._loc_key_to_name[loc_key] = newname - def set_offset(self, label, offset): - """Pin the @label from at @offset + def set_offset(self, loc_key, offset): + """Pin the @loc_key to an @offset Note that there is a special case when the offset is a list it happens when offsets are recomputed in resolve_symbol* """ - if label is None: - raise ValueError('label should not be None') - if not label.name in self._name2label: - raise ValueError('label %s not in symbol pool' % label) - if offset is not None and offset in self._offset2label: - raise ValueError('Conflict in label %s' % label) - self._offset2label.pop(label.offset, None) - label.offset = offset - if is_int(label.offset): - self._offset2label[label.offset] = label + assert isinstance(loc_key, LocKey) + assert offset not in self._offset_to_loc_key + if loc_key not in self._loc_keys: + raise ValueError('Foreign loc_key %s' % loc_key) + + old_offset = self._loc_key_to_offset.pop(loc_key, None) + self._offset_to_loc_key.pop(old_offset, None) + + self._loc_key_to_offset[loc_key] = offset + self._offset_to_loc_key[offset] = loc_key @property - def labels(self): - """Return all labels""" - return self._labels + def loc_keys(self): + """Return all loc_keys""" + return self._loc_keys @property def items(self): - """Return all labels""" - warnings.warn('DEPRECATION WARNING: use "labels" instead of "items"') - return list(self._labels) - + """Return all loc_keys""" + warnings.warn('DEPRECATION WARNING: use "loc_keys" instead of "items"') + return list(self._loc_keys) def __str__(self): - return reduce(lambda x, y: x + str(y) + '\n', self._labels, "") + return "".join("%s\n" % loc_key for loc_key in self._loc_keys) def __getitem__(self, item): - if item in self._name2label: - return self._name2label[item] - if item in self._offset2label: - return self._offset2label[item] + warnings.warn('DEPRECATION WARNING: use "offset_to_loc_key" or "name_to_loc_key"') + if item in self._name_to_loc_key: + return self._name_to_loc_key[item] + if item in self._offset_to_loc_key: + return self._offset_to_loc_key[item] raise KeyError('unknown symbol %r' % item) def __contains__(self, item): - return item in self._name2label or item in self._offset2label + warnings.warn('DEPRECATION WARNING: use "offset_to_loc_key" or "name_to_loc_key"') + return item in self._name_to_loc_key or item in self._offset_to_loc_key def merge(self, symbol_pool): """Merge with another @symbol_pool""" - self._labels.update(symbol_pool.labels) - self._name2label.update(symbol_pool._name2label) - self._offset2label.update(symbol_pool._offset2label) + self._loc_keys.update(symbol_pool.loc_keys) + self._name_to_loc_key.update(symbol_pool._name_to_loc_key) + self._offset_to_loc_key.update(symbol_pool._offset_to_loc_key) + + def canonize_to_exprloc(self, expr): + """ + If expr is ExprInt, return ExprLoc with corresponding loc_key + Else, return expr - def gen_label(self): - """Generate a new unpinned label""" - label = self.add_label("lbl_gen_%.8X" % (self._label_num)) - self._label_num += 1 - return label + @expr: Expr instance + """ + if expr.is_int(): + loc_key = self.getby_offset_create(int(expr)) + ret = ExprLoc(loc_key, expr.size) + return ret + return expr + + def gen_loc_key(self): + """Generate a new unpinned loc_key""" + loc_key = self.add_location("lbl_gen_%.8X" % (self._loc_key_num)) + return loc_key + + def str_loc_key(self, loc_key): + name = self.loc_key_to_name(loc_key) + offset = self.loc_key_to_offset(loc_key) + if name is None: + name = str(loc_key) + if offset is not None: + offset = hex(offset) + out = name + if offset is not None: + out = "%s:%s" % (out, offset) + return out class asm_symbol_pool(AsmSymbolPool): @@ -503,7 +562,7 @@ class AsmCFG(DiGraph): Specialized the .dot export and force the relation between block to be uniq, and associated with a constraint. - Offer helpers on AsmCFG management, such as research by label, sanity + Offer helpers on AsmCFG management, such as research by loc_key, sanity checking and mnemonic size guessing. """ @@ -511,14 +570,23 @@ class AsmCFG(DiGraph): AsmCFGPending = namedtuple("AsmCFGPending", ["waiter", "constraint"]) - def __init__(self, *args, **kwargs): + def __init__(self, symbol_pool=None, *args, **kwargs): super(AsmCFG, self).__init__(*args, **kwargs) # Edges -> constraint self.edges2constraint = {} - # Expected AsmLabel -> set( (src, dst), constraint ) + # Expected LocKey -> set( (src, dst), constraint ) self._pendings = {} - # Label2block built on the fly - self._label2block = {} + # Loc_Key2block built on the fly + self._loc_key_to_block = {} + # symbol_pool + self.symbol_pool = symbol_pool + + + def copy(self): + """Copy the current graph instance""" + graph = self.__class__(self.symbol_pool) + return graph + self + # Compatibility with old list API def append(self, *args, **kwargs): @@ -530,121 +598,173 @@ class AsmCFG(DiGraph): def __getitem__(self, *args, **kwargs): raise DeprecationWarning("Order of AsmCFG elements is not reliable") + def __contains__(self, _): + """ + DEPRECATED. Use: + - loc_key in AsmCFG.nodes() to test loc_key existence + """ + raise RuntimeError("DEPRECATED") + def __iter__(self): - """Iterator on AsmBlock composing the current graph""" - return iter(self._nodes) + """ + DEPRECATED. Use: + - AsmCFG.blocks() to iter on blocks + - loc_key in AsmCFG.nodes() to test loc_key existence + """ + raise RuntimeError("DEPRECATED") def __len__(self): """Return the number of blocks in AsmCFG""" return len(self._nodes) + blocks = property(lambda x:x._loc_key_to_block.itervalues()) + # Manage graph with associated constraints def add_edge(self, src, dst, constraint): """Add an edge to the graph - @src: AsmBlock instance, source - @dst: AsmBlock instance, destination + @src: LocKey instance, source + @dst: LocKey instance, destination @constraint: constraint associated to this edge """ # Sanity check - assert (src, dst) not in self.edges2constraint + assert isinstance(src, LocKey) + assert isinstance(dst, LocKey) + known_cst = self.edges2constraint.get((src, dst), None) + if known_cst is not None: + assert known_cst == constraint + return # Add the edge to src.bto if needed - if dst.label not in [cons.label for cons in src.bto]: - src.bto.add(AsmConstraint(dst.label, constraint)) + block_src = self.loc_key_to_block(src) + if block_src: + if dst not in [cons.loc_key for cons in block_src.bto]: + block_src.bto.add(AsmConstraint(dst, constraint)) # Add edge self.edges2constraint[(src, dst)] = constraint super(AsmCFG, self).add_edge(src, dst) def add_uniq_edge(self, src, dst, constraint): - """Add an edge from @src to @dst if it doesn't already exist""" + """ + Add an edge from @src to @dst if it doesn't already exist + @src: LocKey instance, source + @dst: LocKey instance, destination + @constraint: constraint associated to this edge + """ if (src not in self._nodes_succ or dst not in self._nodes_succ[src]): self.add_edge(src, dst, constraint) def del_edge(self, src, dst): """Delete the edge @src->@dst and its associated constraint""" + src_blk = self.loc_key_to_block(src) + dst_blk = self.loc_key_to_block(dst) + assert src_blk is not None + assert dst_blk is not None # Delete from src.bto - to_remove = [cons for cons in src.bto if cons.label == dst.label] + to_remove = [cons for cons in src_blk.bto if cons.loc_key == dst] if to_remove: assert len(to_remove) == 1 - src.bto.remove(to_remove[0]) + src_blk.bto.remove(to_remove[0]) # Del edge del self.edges2constraint[(src, dst)] super(AsmCFG, self).del_edge(src, dst) - def add_node(self, block): - """Add the block @block to the current instance, if it is not already in + def del_block(self, block): + super(AsmCFG, self).del_node(block.loc_key) + del self._loc_key_to_block[block.loc_key] + + + def add_node(self, node): + assert isinstance(node, LocKey) + return super(AsmCFG, self).add_node(node) + + def add_block(self, block): + """ + Add the block @block to the current instance, if it is not already in @block: AsmBlock instance Edges will be created for @block.bto, if destinations are already in this instance. If not, they will be resolved when adding these aforementionned destinations. `self.pendings` indicates which blocks are not yet resolved. + """ - status = super(AsmCFG, self).add_node(block) + status = super(AsmCFG, self).add_node(block.loc_key) + if not status: return status # Update waiters - if block.label in self._pendings: - for bblpend in self._pendings[block.label]: - self.add_edge(bblpend.waiter, block, bblpend.constraint) - del self._pendings[block.label] + if block.loc_key in self._pendings: + for bblpend in self._pendings[block.loc_key]: + self.add_edge(bblpend.waiter.loc_key, block.loc_key, bblpend.constraint) + del self._pendings[block.loc_key] # Synchronize edges with block destinations - self._label2block[block.label] = block + self._loc_key_to_block[block.loc_key] = block + for constraint in block.bto: - dst = self._label2block.get(constraint.label, - None) + dst = self._loc_key_to_block.get(constraint.loc_key, + None) if dst is None: # Block is yet unknown, add it to pendings to_add = self.AsmCFGPending(waiter=block, constraint=constraint.c_t) - self._pendings.setdefault(constraint.label, + self._pendings.setdefault(constraint.loc_key, set()).add(to_add) else: # Block is already in known nodes - self.add_edge(block, dst, constraint.c_t) + self.add_edge(block.loc_key, dst.loc_key, constraint.c_t) return status - def del_node(self, block): - super(AsmCFG, self).del_node(block) - del self._label2block[block.label] - def merge(self, graph): """Merge with @graph, taking in account constraints""" # -> add_edge(x, y, constraint) - for node in graph._nodes: - self.add_node(node) + self._loc_key_to_block.update(graph._loc_key_to_block) + for loc_key in graph._nodes: + self.add_node(loc_key) + if loc_key in graph._loc_key_to_block: + self.add_block(graph._loc_key_to_block[loc_key]) for edge in graph._edges: # Use "_uniq_" beacause the edge can already exist due to add_node - self.add_uniq_edge(*edge, constraint=graph.edges2constraint[edge]) + self.add_edge(*edge, constraint=graph.edges2constraint[edge]) + def node2lines(self, node): - yield self.DotCellDescription(text=str(node.label.name), + if self.symbol_pool is None: + loc_key_name = str(node) + else: + loc_key_name = self.symbol_pool.str_loc_key(node) + yield self.DotCellDescription(text=loc_key_name, attr={'align': 'center', 'colspan': 2, 'bgcolor': 'grey'}) - - if isinstance(node, AsmBlockBad): - yield [self.DotCellDescription( - text=node.ERROR_TYPES.get(node._errno, - node._errno), - attr={})] + block = self._loc_key_to_block.get(node, None) + if block is None: raise StopIteration - for line in node.lines: + if isinstance(block, AsmBlockBad): + yield [ + self.DotCellDescription( + text=block.ERROR_TYPES.get(block._errno, + block._errno + ), + attr={}) + ] + raise StopIteration + for line in block.lines: if self._dot_offset: yield [self.DotCellDescription(text="%.8X" % line.offset, attr={}), - self.DotCellDescription(text=str(line), attr={})] + self.DotCellDescription(text=line.to_string(self.symbol_pool), attr={})] else: - yield self.DotCellDescription(text=str(line), attr={}) + yield self.DotCellDescription(text=line.to_string(self.symbol_pool), attr={}) def node_attr(self, node): - if isinstance(node, AsmBlockBad): + block = self._loc_key_to_block.get(node, None) + if isinstance(block, AsmBlockBad): return {'style': 'filled', 'fillcolor': 'red'} return {} @@ -670,22 +790,19 @@ class AsmCFG(DiGraph): # Helpers @property def pendings(self): - """Dictionary of label -> set(AsmCFGPending instance) indicating - which label are missing in the current instance. - A label is missing if a block which is already in nodes has constraints + """Dictionary of loc_key -> set(AsmCFGPending instance) indicating + which loc_key are missing in the current instance. + A loc_key is missing if a block which is already in nodes has constraints with him (thanks to its .bto) and the corresponding block is not yet in nodes """ return self._pendings - def _build_label2block(self): - self._label2block = {block.label: block - for block in self._nodes} - - def label2block(self, label): - """Return the block corresponding to label @label - @label: AsmLabel instance or ExprId(AsmLabel) instance""" - return self._label2block[label] + def label2block(self, loc_key): + """Return the block corresponding to loc_key @loc_key + @loc_key: LocKey instance""" + warnings.warn('DEPRECATION WARNING: use "loc_key_to_block" instead of "label2block"') + return self.loc_key_to_block(loc_key) def rebuild_edges(self): """Consider blocks '.bto' and rebuild edges according to them, ie: @@ -696,20 +813,25 @@ class AsmCFG(DiGraph): This method should be called if a block's '.bto' in nodes have been modified without notifying this instance to resynchronize edges. """ - self._build_label2block() - for block in self._nodes: + for block in self.blocks: edges = [] # Rebuild edges from bto for constraint in block.bto: - dst = self._label2block.get(constraint.label, - None) + dst = self._loc_key_to_block.get(constraint.loc_key, + None) if dst is None: # Missing destination, add to pendings - self._pendings.setdefault(constraint.label, - set()).add(self.AsmCFGPending(block, - constraint.c_t)) + self._pendings.setdefault( + constraint.loc_key, + set() + ).add( + self.AsmCFGPending( + block, + constraint.c_t + ) + ) continue - edge = (block, dst) + edge = (block.loc_key, dst.loc_key) edges.append(edge) if edge in self._edges: # Already known edge, constraint may have changed @@ -719,43 +841,52 @@ class AsmCFG(DiGraph): self.add_edge(edge[0], edge[1], constraint.c_t) # Remove useless edges - for succ in self.successors(block): - edge = (block, succ) + for succ in self.successors(block.loc_key): + edge = (block.loc_key, succ) if edge not in edges: self.del_edge(*edge) def get_bad_blocks(self): """Iterator on AsmBlockBad elements""" # A bad asm block is always a leaf - for block in self.leaves(): + for loc_key in self.leaves(): + block = self._loc_key_to_block.get(loc_key, None) if isinstance(block, AsmBlockBad): yield block def get_bad_blocks_predecessors(self, strict=False): - """Iterator on block with an AsmBlockBad destination - @strict: (optional) if set, return block with only bad + """Iterator on loc_keys with an AsmBlockBad destination + @strict: (optional) if set, return loc_key with only bad successors """ # Avoid returning the same block done = set() for badblock in self.get_bad_blocks(): - for predecessor in self.predecessors_iter(badblock): + for predecessor in self.predecessors_iter(badblock.loc_key): if predecessor not in done: if (strict and - not all(isinstance(block, AsmBlockBad) + not all(isinstance(self._loc_key_to_block.get(block, None), AsmBlockBad) for block in self.successors_iter(predecessor))): continue yield predecessor done.add(predecessor) def getby_offset(self, offset): - """Return block containing @offset""" - for block in self: + """Return asmblock containing @offset""" + for block in self.blocks: if block.lines[0].offset <= offset < \ (block.lines[-1].offset + block.lines[-1].l): return block return None + def loc_key_to_block(self, loc_key): + """ + Return the asmblock corresponding to loc_key @loc_key, None if unknown + loc_key + @loc_key: LocKey instance + """ + return self._loc_key_to_block.get(loc_key, None) + def sanity_check(self): """Do sanity checks on blocks' constraints: * no pendings @@ -764,33 +895,37 @@ class AsmCFG(DiGraph): """ if len(self._pendings) != 0: - raise RuntimeError("Some blocks are missing: %s" % map(str, - self._pendings.keys())) + raise RuntimeError("Some blocks are missing: %s" % map( + str, + self._pendings.keys() + )) next_edges = {edge: constraint for edge, constraint in self.edges2constraint.iteritems() if constraint == AsmConstraint.c_next} - for block in self._nodes: + for loc_key in self._nodes: + if loc_key not in self._loc_key_to_block: + raise RuntimeError("Not supported yet: every node must have a corresponding AsmBlock") # No next constraint to self - if (block, block) in next_edges: + if (loc_key, loc_key) in next_edges: raise RuntimeError('Bad constraint: self in next') # No multiple next constraint to same block - pred_next = list(pblock - for (pblock, dblock) in next_edges - if dblock == block) + pred_next = list(ploc_key + for (ploc_key, dloc_key) in next_edges + if dloc_key == loc_key) if len(pred_next) > 1: raise RuntimeError("Too many next constraints for bloc %r" - "(%s)" % (block.label, - map(lambda x: x.label, pred_next))) + "(%s)" % (loc_key, + pred_next)) def guess_blocks_size(self, mnemo): """Asm and compute max block size Add a 'size' and 'max_size' attribute on each block @mnemo: metamn instance""" - for block in self._nodes: + for block in self.blocks: size = 0 for instr in block.lines: if isinstance(instr, AsmRaw): @@ -827,21 +962,23 @@ class AsmCFG(DiGraph): def apply_splitting(self, symbol_pool, dis_block_callback=None, **kwargs): """Consider @self' bto destinations and split block in @self if one of these destinations jumps in the middle of this block. - In order to work, they must be only one block in @self per label in + In order to work, they must be only one block in @self per loc_key in @symbol_pool (which is true if @self come from the same disasmEngine). - @symbol_pool: AsmSymbolPool instance associated with @self'labels + @symbol_pool: AsmSymbolPool instance associated with @self'loc_keys @dis_block_callback: (optional) if set, this callback will be called on new block destinations @kwargs: (optional) named arguments to pass to dis_block_callback """ # Get all possible destinations not yet resolved, with a resolved # offset - block_dst = [label.offset - for label in self.pendings - if label.offset is not None] + block_dst = [] + for loc_key in self.pendings: + offset = symbol_pool.loc_key_to_offset(loc_key) + if offset is not None: + block_dst.append(offset) - todo = self.nodes().copy() + todo = set(self.blocks) rebuild_needed = False while todo: @@ -854,8 +991,7 @@ class AsmCFG(DiGraph): continue # `cur_block` must be splitted at offset `off` - label = symbol_pool.getby_offset_create(off) - new_b = cur_block.split(off, label) + new_b = cur_block.split(symbol_pool, off) log_asmblock.debug("Split block %x", off) if new_b is None: log_asmblock.error("Cannot split %x!!", off) @@ -864,22 +1000,24 @@ class AsmCFG(DiGraph): # Remove pending from cur_block # Links from new_b will be generated in rebuild_edges for dst in new_b.bto: - if dst.label not in self.pendings: + if dst.loc_key not in self.pendings: continue - self.pendings[dst.label] = set(pending for pending in self.pendings[dst.label] - if pending.waiter != cur_block) + self.pendings[dst.loc_key] = set(pending for pending in self.pendings[dst.loc_key] + if pending.waiter != cur_block) # The new block destinations may need to be disassembled if dis_block_callback: - offsets_to_dis = set(constraint.label.offset - for constraint in new_b.bto) + offsets_to_dis = set( + self.symbol_pool.loc_key_to_offset(constraint.loc_key) + for constraint in new_b.bto + ) dis_block_callback(cur_bloc=new_b, offsets_to_dis=offsets_to_dis, symbol_pool=symbol_pool, **kwargs) # Update structure rebuild_needed = True - self.add_node(new_b) + self.add_block(new_b) # The new block must be considered todo.add(new_b) @@ -891,18 +1029,18 @@ class AsmCFG(DiGraph): def __str__(self): out = [] - for node in self.nodes(): - out.append(str(node)) - for nodeA, nodeB in self.edges(): - out.append("%s -> %s" % (nodeA.label, nodeB.label)) + for block in self.blocks: + out.append(str(block)) + for loc_key_a, loc_key_b in self.edges(): + out.append("%s -> %s" % (loc_key_a, loc_key_b)) return '\n'.join(out) def __repr__(self): return "<%s %s>" % (self.__class__.__name__, hex(id(self))) # Out of _merge_blocks to be computed only once -_acceptable_block = lambda block: (not isinstance(block, AsmBlockBad) and - len(block.lines) > 0) +_acceptable_block = lambda graph, loc_key: (not isinstance(graph.loc_key_to_block(loc_key), AsmBlockBad) and + len(graph.loc_key_to_block(loc_key).lines) > 0) _parent = MatchGraphJoker(restrict_in=False, filt=_acceptable_block) _son = MatchGraphJoker(restrict_out=False, filt=_acceptable_block) _expgraph = _parent >> _son @@ -918,7 +1056,9 @@ def _merge_blocks(dg, graph): for match in _expgraph.match(graph): # Get matching blocks - block, succ = match[_parent], match[_son] + lbl_block, lbl_succ = match[_parent], match[_son] + block = graph.loc_key_to_block(lbl_block) + succ = graph.loc_key_to_block(lbl_succ) # Ignore already deleted blocks if (block in to_ignore or @@ -938,11 +1078,11 @@ def _merge_blocks(dg, graph): # Merge block block.lines += succ.lines - for nextb in graph.successors_iter(succ): - graph.add_edge(block, nextb, graph.edges2constraint[(succ, nextb)]) + for nextb in graph.successors_iter(lbl_succ): + graph.add_edge(lbl_block, nextb, graph.edges2constraint[(lbl_succ, nextb)]) - graph.del_node(succ) - to_ignore.add(succ) + graph.del_block(succ) + to_ignore.add(lbl_succ) bbl_simplifier = DiGraphSimplifier() @@ -971,26 +1111,32 @@ def conservative_asm(mnemo, instr, symbols, conservative): def fix_expr_val(expr, symbols): """Resolve an expression @expr using @symbols""" def expr_calc(e): - if isinstance(e, m2_expr.ExprId): - s = symbols._name2label[e.name] - e = m2_expr.ExprInt(s.offset, e.size) + if isinstance(e, ExprId): + # Example: + # toto: + # .dword label + loc_key = symbols.getby_name(e.name) + offset = symbols.loc_key_to_offset(loc_key) + e = ExprInt(offset, e.size) return e result = expr.visit(expr_calc) result = expr_simp(result) - if not isinstance(result, m2_expr.ExprInt): + if not isinstance(result, ExprInt): raise RuntimeError('Cannot resolve symbol %s' % expr) return result -def fix_label_offset(symbol_pool, label, offset, modified): - """Fix the @label offset to @offset. If the @offset has changed, add @label +def fix_loc_offset(symbol_pool, loc_key, offset, modified): + """ + Fix the @loc_key offset to @offset. If the @offset has changed, add @loc_key to @modified @symbol_pool: current symbol_pool """ - if label.offset == offset: + loc_offset = symbol_pool.loc_key_to_offset(loc_key) + if loc_offset == offset: return - symbol_pool.set_offset(label, offset) - modified.add(label) + symbol_pool.set_offset(loc_key, offset) + modified.add(loc_key) class BlockChain(object): @@ -1010,7 +1156,8 @@ class BlockChain(object): def _set_pinned_block_idx(self): self.pinned_block_idx = None for i, block in enumerate(self.blocks): - if is_int(block.label.offset): + loc_key = block.loc_key + if self.symbol_pool.loc_key_to_offset(loc_key) is not None: if self.pinned_block_idx is not None: raise ValueError("Multiples pinned block detected") self.pinned_block_idx = i @@ -1028,7 +1175,8 @@ class BlockChain(object): if not self.pinned: return - offset_base = self.blocks[self.pinned_block_idx].label.offset + loc = self.blocks[self.pinned_block_idx].loc_key + offset_base = self.symbol_pool.loc_key_to_offset(loc) assert(offset_base % self.blocks[self.pinned_block_idx].alignment == 0) self.offset_min = offset_base @@ -1048,40 +1196,40 @@ class BlockChain(object): self.place() return [self] - def fix_blocks(self, modified_labels): + def fix_blocks(self, modified_loc_keys): """Propagate a pinned to its blocks' neighbour - @modified_labels: store new pinned labels""" + @modified_loc_keys: store new pinned loc_keys""" if not self.pinned: raise ValueError('Trying to fix unpinned block') # Propagate offset to blocks before pinned block pinned_block = self.blocks[self.pinned_block_idx] - offset = pinned_block.label.offset + offset = self.symbol_pool.loc_key_to_offset(pinned_block.loc_key) if offset % pinned_block.alignment != 0: raise RuntimeError('Bad alignment') for block in self.blocks[:self.pinned_block_idx - 1:-1]: new_offset = offset - block.size new_offset = new_offset - new_offset % pinned_block.alignment - fix_label_offset(self.symbol_pool, - block.label, - new_offset, - modified_labels) + fix_loc_offset(self.symbol_pool, + block.loc_key, + new_offset, + modified_loc_keys) # Propagate offset to blocks after pinned block - offset = pinned_block.label.offset + pinned_block.size + offset = self.symbol_pool.loc_key_to_offset(pinned_block.loc_key) + pinned_block.size last_block = pinned_block for block in self.blocks[self.pinned_block_idx + 1:]: offset += (- offset) % last_block.alignment - fix_label_offset(self.symbol_pool, - block.label, - offset, - modified_labels) + fix_loc_offset(self.symbol_pool, + block.loc_key, + offset, + modified_loc_keys) offset += block.size last_block = block - return modified_labels + return modified_loc_keys class BlockChainWedge(object): @@ -1098,23 +1246,22 @@ class BlockChainWedge(object): def merge(self, chain): """Best effort merge two block chains Return the list of resulting blockchains""" - self.symbol_pool.set_offset(chain.blocks[0].label, self.offset_max) + self.symbol_pool.set_offset(chain.blocks[0].loc_key, self.offset_max) chain.place() return [self, chain] -def group_constrained_blocks(symbol_pool, blocks): +def group_constrained_blocks(symbol_pool, asmcfg): """ - Return the BlockChains list built from grouped asm blocks linked by + Return the BlockChains list built from grouped blocks in asmcfg linked by asm_constraint_next - @blocks: a list of asm block + @asmcfg: an AsmCfg instance """ log_asmblock.info('group_constrained_blocks') - # Group adjacent blocks - remaining_blocks = list(blocks) + # Group adjacent asmcfg + remaining_blocks = list(asmcfg.blocks) known_block_chains = {} - lbl2block = {block.label: block for block in blocks} while remaining_blocks: # Create a new block chain @@ -1123,10 +1270,10 @@ def group_constrained_blocks(symbol_pool, blocks): # Find sons in remainings blocks linked with a next constraint while True: # Get next block - next_label = block_list[-1].get_next() - if next_label is None or next_label not in lbl2block: + next_loc_key = block_list[-1].get_next() + if next_loc_key is None or asmcfg.loc_key_to_block(next_loc_key) is None: break - next_block = lbl2block[next_label] + next_block = asmcfg.loc_key_to_block(next_loc_key) # Add the block at the end of the current chain if next_block not in remaining_blocks: @@ -1135,15 +1282,15 @@ def group_constrained_blocks(symbol_pool, blocks): remaining_blocks.remove(next_block) # Check if son is in a known block group - if next_label is not None and next_label in known_block_chains: - block_list += known_block_chains[next_label] - del known_block_chains[next_label] + if next_loc_key is not None and next_loc_key in known_block_chains: + block_list += known_block_chains[next_loc_key] + del known_block_chains[next_loc_key] - known_block_chains[block_list[0].label] = block_list + known_block_chains[block_list[0].loc_key] = block_list out_block_chains = [] - for label in known_block_chains: - chain = BlockChain(symbol_pool, known_block_chains[label]) + for loc_key in known_block_chains: + chain = BlockChain(symbol_pool, known_block_chains[loc_key]) out_block_chains.append(chain) return out_block_chains @@ -1210,24 +1357,18 @@ def resolve_symbol(blockChains, symbol_pool, dst_interval=None): return [chain for chain in fixed_chains if isinstance(chain, BlockChain)] -def filter_exprid_label(exprs): - """Extract labels from list of ExprId @exprs""" - return set(expr.name for expr in exprs if isinstance(expr.name, AsmLabel)) - - -def get_block_labels(block): - """Extract labels used by @block""" +def get_block_loc_keys(block): + """Extract loc_keys used by @block""" symbols = set() for instr in block.lines: if isinstance(instr, AsmRaw): if isinstance(instr.raw, list): for expr in instr.raw: - symbols.update(m2_expr.get_expr_ids(expr)) + symbols.update(get_expr_locs(expr)) else: for arg in instr.args: - symbols.update(m2_expr.get_expr_ids(arg)) - labels = filter_exprid_label(symbols) - return labels + symbols.update(get_expr_locs(arg)) + return symbols def assemble_block(mnemo, block, symbol_pool, conservative=False): @@ -1252,7 +1393,7 @@ def assemble_block(mnemo, block, symbol_pool, conservative=False): # Assemble an instruction saved_args = list(instr.args) - instr.offset = block.label.offset + offset_i + instr.offset = symbol_pool.loc_key_to_offset(block.loc_key) + offset_i # Replace instruction's arguments by resolved ones instr.args = instr.resolve_args_with_symbols(symbol_pool) @@ -1275,19 +1416,19 @@ def assemble_block(mnemo, block, symbol_pool, conservative=False): offset_i += instr.l -def asmblock_final(mnemo, blocks, blockChains, symbol_pool, conservative=False): +def asmblock_final(mnemo, asmcfg, blockChains, symbol_pool, conservative=False): """Resolve and assemble @blockChains using @symbol_pool until fixed point is reached""" log_asmblock.debug("asmbloc_final") # Init structures - lbl2block = {block.label: block for block in blocks} - blocks_using_label = {} - for block in blocks: - labels = get_block_labels(block) - for label in labels: - blocks_using_label.setdefault(label, set()).add(block) + blocks_using_loc_key = {} + for block in asmcfg.blocks: + exprlocs = get_block_loc_keys(block) + loc_keys = set(expr.loc_key for expr in exprlocs) + for loc_key in loc_keys: + blocks_using_loc_key.setdefault(loc_key, set()).add(block) block2chain = {} for chain in blockChains: @@ -1295,25 +1436,26 @@ def asmblock_final(mnemo, blocks, blockChains, symbol_pool, conservative=False): block2chain[block] = chain # Init worklist - blocks_to_rework = set(blocks) + blocks_to_rework = set(asmcfg.blocks) # Fix and re-assemble blocks until fixed point is reached while True: # Propagate pinned blocks into chains - modified_labels = set() + modified_loc_keys = set() for chain in blockChains: - chain.fix_blocks(modified_labels) + chain.fix_blocks(modified_loc_keys) - for label in modified_labels: + for loc_key in modified_loc_keys: # Retrive block with modified reference - if label in lbl2block: - blocks_to_rework.add(lbl2block[label]) + mod_block = asmcfg.loc_key_to_block(loc_key) + if mod_block is not None: + blocks_to_rework.add(mod_block) - # Enqueue blocks referencing a modified label - if label not in blocks_using_label: + # Enqueue blocks referencing a modified loc_key + if loc_key not in blocks_using_loc_key: continue - for block in blocks_using_label[label]: + for block in blocks_using_loc_key[loc_key]: blocks_to_rework.add(block) # No more work @@ -1332,23 +1474,26 @@ def asmbloc_final(mnemo, blocks, blockChains, symbol_pool, conservative=False): warnings.warn('DEPRECATION WARNING: use "asmblock_final" instead of "asmbloc_final"') asmblock_final(mnemo, blocks, blockChains, symbol_pool, conservative) -def asm_resolve_final(mnemo, blocks, symbol_pool, dst_interval=None): - """Resolve and assemble @blocks using @symbol_pool into interval +def asm_resolve_final(mnemo, asmcfg, symbol_pool, dst_interval=None): + """Resolve and assemble @asmcfg using @symbol_pool into interval @dst_interval""" - blocks.sanity_check() + asmcfg.sanity_check() - blocks.guess_blocks_size(mnemo) - blockChains = group_constrained_blocks(symbol_pool, blocks) + asmcfg.guess_blocks_size(mnemo) + blockChains = group_constrained_blocks(symbol_pool, asmcfg) resolved_blockChains = resolve_symbol( - blockChains, symbol_pool, dst_interval) + blockChains, + symbol_pool, + dst_interval + ) - asmblock_final(mnemo, blocks, resolved_blockChains, symbol_pool) + asmblock_final(mnemo, asmcfg, resolved_blockChains, symbol_pool) patches = {} output_interval = interval() - for block in blocks: - offset = block.label.offset + for block in asmcfg.blocks: + offset = symbol_pool.loc_key_to_offset(block.loc_key) for instr in block.lines: if not instr.data: # Empty line @@ -1451,8 +1596,8 @@ class disasmEngine(object): delayslot_count = self.arch.delayslot offsets_to_dis = set() add_next_offset = False - label = self.symbol_pool.getby_offset_create(offset) - cur_block = AsmBlock(label) + loc_key = self.symbol_pool.getby_offset_create(offset) + cur_block = AsmBlock(loc_key) log_asmblock.debug("dis at %X", int(offset)) while not in_delayslot or delayslot_count > 0: if in_delayslot: @@ -1462,17 +1607,25 @@ class disasmEngine(object): if not cur_block.lines: job_done.add(offset) # Block is empty -> bad block - cur_block = AsmBlockBad(label, errno=AsmBlockBad.ERROR_FORBIDDEN) + cur_block = AsmBlockBad(loc_key, errno=AsmBlockBad.ERROR_FORBIDDEN) else: # Block is not empty, stop the desassembly pass and add a # constraint to the next block - cur_block.add_cst(offset, AsmConstraint.c_next, - self.symbol_pool) + loc_key_cst = self.symbol_pool.getby_offset_create(offset) + cur_block.add_cst( + loc_key_cst, + AsmConstraint.c_next, + self.symbol_pool + ) break if lines_cpt > 0 and offset in self.split_dis: - cur_block.add_cst(offset, AsmConstraint.c_next, - self.symbol_pool) + loc_key_cst = self.symbol_pool.getby_offset_create(offset) + cur_block.add_cst( + loc_key_cst, + AsmConstraint.c_next, + self.symbol_pool + ) offsets_to_dis.add(offset) break @@ -1482,8 +1635,12 @@ class disasmEngine(object): break if offset in job_done: - cur_block.add_cst(offset, AsmConstraint.c_next, - self.symbol_pool) + loc_key_cst = self.symbol_pool.getby_offset_create(offset) + cur_block.add_cst( + loc_key_cst, + AsmConstraint.c_next, + self.symbol_pool + ) break off_i = offset @@ -1505,12 +1662,16 @@ class disasmEngine(object): if not cur_block.lines: job_done.add(offset) # Block is empty -> bad block - cur_block = AsmBlockBad(label, errno=error) + cur_block = AsmBlockBad(loc_key, errno=error) else: # Block is not empty, stop the desassembly pass and add a # constraint to the next block - cur_block.add_cst(off_i, AsmConstraint.c_next, - self.symbol_pool) + loc_key_cst = self.symbol_pool.getby_offset_create(off_i) + cur_block.add_cst( + loc_key_cst, + AsmConstraint.c_next, + self.symbol_pool + ) break # XXX TODO nul start block option @@ -1518,12 +1679,16 @@ class disasmEngine(object): log_asmblock.warning("reach nul instr at %X", int(off_i)) if not cur_block.lines: # Block is empty -> bad block - cur_block = AsmBlockBad(label, errno=AsmBlockBad.ERROR_NULL_STARTING_BLOCK) + cur_block = AsmBlockBad(loc_key, errno=AsmBlockBad.ERROR_NULL_STARTING_BLOCK) else: # Block is not empty, stop the desassembly pass and add a # constraint to the next block - cur_block.add_cst(off_i, AsmConstraint.c_next, - self.symbol_pool) + loc_key_cst = self.symbol_pool.getby_offset_create(off_i) + cur_block.add_cst( + loc_key_cst, + AsmConstraint.c_next, + self.symbol_pool + ) break # special case: flow graph modificator in delayslot @@ -1544,31 +1709,36 @@ class disasmEngine(object): # test split if instr.splitflow() and not (instr.is_subcall() and self.dontdis_retcall): add_next_offset = True - pass if instr.dstflow(): instr.dstflow2label(self.symbol_pool) - dst = instr.getdstflow(self.symbol_pool) - dstn = [] - for d in dst: - if isinstance(d, m2_expr.ExprId) and \ - isinstance(d.name, AsmLabel): - dstn.append(d.name) - if d.name.offset in self.dont_dis_retcall_funcs: - add_next_offset = False - dst = dstn + destinations = instr.getdstflow(self.symbol_pool) + known_dsts = [] + for dst in destinations: + if not dst.is_loc(): + continue + loc_key = dst.loc_key + loc_key_offset = self.symbol_pool.loc_key_to_offset(loc_key) + known_dsts.append(loc_key) + if loc_key_offset in self.dont_dis_retcall_funcs: + add_next_offset = False if (not instr.is_subcall()) or self.follow_call: - cur_block.bto.update( - [AsmConstraint(x, AsmConstraint.c_to) for x in dst]) + cur_block.bto.update([AsmConstraint(loc_key, AsmConstraint.c_to) for loc_key in known_dsts]) # get in delayslot mode in_delayslot = True delayslot_count = instr.delayslot for c in cur_block.bto: - offsets_to_dis.add(c.label.offset) + loc_key_offset = self.symbol_pool.loc_key_to_offset(c.loc_key) + offsets_to_dis.add(loc_key_offset) if add_next_offset: - cur_block.add_cst(offset, AsmConstraint.c_next, self.symbol_pool) + loc_key_cst = self.symbol_pool.getby_offset_create(offset) + cur_block.add_cst( + loc_key_cst, + AsmConstraint.c_next, + self.symbol_pool + ) offsets_to_dis.add(offset) # Fix multiple constraints @@ -1608,7 +1778,7 @@ class disasmEngine(object): log_asmblock.info("dis bloc all") job_done = set() if blocks is None: - blocks = AsmCFG() + blocks = AsmCFG(self.symbol_pool) todo = [offset] bloc_cpt = 0 @@ -1624,7 +1794,7 @@ class disasmEngine(object): continue cur_block, nexts = self._dis_block(target_offset, job_done) todo += nexts - blocks.add_node(cur_block) + blocks.add_block(cur_block) blocks.apply_splitting(self.symbol_pool, dis_block_callback=self.dis_block_callback, diff --git a/miasm2/core/cpu.py b/miasm2/core/cpu.py index d75b99cf..a142ab77 100644 --- a/miasm2/core/cpu.py +++ b/miasm2/core/cpu.py @@ -8,13 +8,12 @@ from collections import defaultdict import pyparsing import miasm2.expression.expression as m2_expr -from miasm2.core import asmblock from miasm2.core.bin_stream import bin_stream, bin_stream_str from miasm2.core.utils import Disasm_Exception from miasm2.expression.simplifications import expr_simp -from miasm2.core.asm_ast import AstNode, AstInt, AstId, AstMem, AstOp +from miasm2.core.asm_ast import AstNode, AstInt, AstId, AstOp log = logging.getLogger("cpuhelper") console_handler = logging.StreamHandler() @@ -985,18 +984,24 @@ class instruction(object): self.mode = mode self.args = args self.additional_info = additional_info + self.offset = None + self.l = None + self.b = None def gen_args(self, args): out = ', '.join([str(x) for x in args]) return out def __str__(self): + return self.to_string() + + def to_string(self, symbol_pool=None): o = "%-10s " % self.name args = [] for i, arg in enumerate(self.args): if not isinstance(arg, m2_expr.Expr): raise ValueError('zarb arg type') - x = self.arg2str(arg, pos = i) + x = self.arg2str(arg, i, symbol_pool) args.append(x) o += self.gen_args(args) return o @@ -1011,40 +1016,40 @@ class instruction(object): if symbols is None: symbols = {} args_out = [] - for a in self.args: - e = a + for expr in self.args: # try to resolve symbols using symbols (0 for default value) - ids = m2_expr.get_expr_ids(e) - fixed_ids = {} - for x in ids: - if isinstance(x.name, asmblock.AsmLabel): - name = x.name.name - # special symbol $ - if name == '$': - fixed_ids[x] = self.get_asm_offset(x) - continue - if name == '_': - fixed_ids[x] = self.get_asm_next_offset(x) - continue - if not name in symbols: - raise ValueError('unresolved symbol! %r' % x) - else: - name = x.name - if not name in symbols: + loc_keys = m2_expr.get_expr_locs(expr) + fixed_expr = {} + for exprloc in loc_keys: + loc_key = exprloc.loc_key + name = symbols.loc_key_to_name(loc_key) + # special symbols + if name == '$': + fixed_expr[exprloc] = self.get_asm_offset(exprloc) + continue + if name == '_': + fixed_expr[exprloc] = self.get_asm_next_offset(exprloc) continue - if symbols[name].offset is None: - raise ValueError('The offset of label "%s" cannot be ' - 'determined' % name) + if not name in symbols: + raise ValueError('Unresolved symbol: %r' % exprloc) + + offset = symbols.loc_key_to_offset(loc_key) + if offset is None: + raise ValueError( + 'The offset of loc_key "%s" cannot be determined' % name + ) else: - size = x.size + # Fix symbol with its offset + size = exprloc.size if size is None: - default_size = self.get_symbol_size(x, symbols) + default_size = self.get_symbol_size(exprloc, symbols) size = default_size - value = m2_expr.ExprInt(symbols[name].offset, size) - fixed_ids[x] = value - e = e.replace_expr(fixed_ids) - e = expr_simp(e) - args_out.append(e) + value = m2_expr.ExprInt(offset, size) + fixed_expr[exprloc] = value + + expr = expr.replace_expr(fixed_expr) + expr = expr_simp(expr) + args_out.append(expr) return args_out def get_info(self, c): diff --git a/miasm2/core/graph.py b/miasm2/core/graph.py index ce17fc75..d88f8721 100644 --- a/miasm2/core/graph.py +++ b/miasm2/core/graph.py @@ -715,11 +715,12 @@ class MatchGraphJoker(object): matched node must be the same than the joker node in the associated MatchGraph @restrict_out: (optional) counterpart of @restrict_in for successors - @filt: (optional) function(node) -> boolean for filtering candidate node + @filt: (optional) function(graph, node) -> boolean for filtering + candidate node @name: (optional) helper for displaying the current joker """ if filt is None: - filt = lambda node: True + filt = lambda graph, node: True self.filt = filt if name is None: name = str(id(self)) @@ -816,7 +817,7 @@ class MatchGraph(DiGraph): return False # Check lambda filtering - if not expected.filt(candidate): + if not expected.filt(graph, candidate): return False # Check arity diff --git a/miasm2/core/parse_asm.py b/miasm2/core/parse_asm.py index df419680..86871c37 100644 --- a/miasm2/core/parse_asm.py +++ b/miasm2/core/parse_asm.py @@ -1,10 +1,11 @@ #-*- coding:utf-8 -*- import re -import miasm2.expression.expression as m2_expr +from miasm2.expression.expression import ExprId, ExprInt, ExprOp, ExprLoc, \ + LocKey import miasm2.core.asmblock as asmblock from miasm2.core.cpu import instruction, base_expr -from miasm2.core.asm_ast import AstInt, AstId, AstMem, AstOp +from miasm2.core.asm_ast import AstInt, AstId, AstOp declarator = {'byte': 8, 'word': 16, @@ -68,21 +69,20 @@ def guess_next_new_label(symbol_pool): name = gen_name % i label = symbol_pool.getby_name(name) if label is None: - return symbol_pool.add_label(name) + return symbol_pool.add_location(name) i += 1 def replace_expr_labels(expr, symbol_pool, replace_id): - """Create AsmLabel of the expression @expr in the @symbol_pool + """Create LocKey of the expression @expr in the @symbol_pool Update @replace_id""" - if not (isinstance(expr, m2_expr.ExprId) and - isinstance(expr.name, asmblock.AsmLabel)): + if not expr.is_loc(): return expr - old_lbl = expr.name - new_lbl = symbol_pool.getby_name_create(old_lbl.name) - replace_id[expr] = m2_expr.ExprId(new_lbl, expr.size) + old_name = symbol_pool.loc_key_to_name(expr.loc_key) + new_lbl = symbol_pool.getby_name_create(old_name) + replace_id[expr] = ExprLoc(new_lbl, expr.size) return replace_id[expr] @@ -103,17 +103,17 @@ STATE_IN_BLOC = 1 def asm_ast_to_expr_with_size(arg, symbol_pool, size): if isinstance(arg, AstId): - return m2_expr.ExprId(arg.name, size) + return ExprId(arg.name, size) if isinstance(arg, AstOp): args = [asm_ast_to_expr_with_size(tmp, symbol_pool, size) for tmp in arg.args] - return m2_expr.ExprOp(arg.op, *args) + return ExprOp(arg.op, *args) if isinstance(arg, AstInt): - return m2_expr.ExprInt(arg.value, size) + return ExprInt(arg.value, size) return None def parse_txt(mnemo, attrib, txt, symbol_pool=None): - """Parse an assembly listing. Returns a couple (blocks, symbol_pool), where - blocks is a list of asm_bloc and symbol_pool the associated AsmSymbolPool + """Parse an assembly listing. Returns a couple (asmcfg, symbol_pool), where + asmcfg is an AsmCfg instance and symbol_pool the associated AsmSymbolPool @mnemo: architecture used @attrib: architecture attribute @@ -224,7 +224,6 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): line = line.strip(' ').strip('\t') instr = mnemo.fromstring(line, symbol_pool, attrib) - # replace orphan AsmLabel with labels from symbol_pool replace_orphan_labels(instr, symbol_pool) if instr.dstflow(): @@ -232,12 +231,12 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): lines.append(instr) asmblock.log_asmblock.info("___pre asm oki___") - # make blocks + # make asmcfg cur_block = None state = STATE_NO_BLOC i = 0 - blocks = asmblock.AsmCFG() + asmcfg = asmblock.AsmCFG(symbol_pool) block_to_nlink = None delayslot = 0 while i < len(lines): @@ -256,21 +255,24 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): block_to_nlink = None i += 1 continue - elif not isinstance(line, asmblock.AsmLabel): + elif not isinstance(line, LocKey): # First line must be a label. If it's not the case, generate # it. - label = guess_next_new_label(symbol_pool) - cur_block = asmblock.AsmBlock(label, alignment=mnemo.alignment) + loc = guess_next_new_label(symbol_pool) + cur_block = asmblock.AsmBlock(loc, alignment=mnemo.alignment) else: cur_block = asmblock.AsmBlock(line, alignment=mnemo.alignment) i += 1 # Generate the current bloc - blocks.add_node(cur_block) + asmcfg.add_block(cur_block) state = STATE_IN_BLOC if block_to_nlink: block_to_nlink.addto( - asmblock.AsmConstraint(cur_block.label, - C_NEXT)) + asmblock.AsmConstraint( + cur_block.loc_key, + C_NEXT + ) + ) block_to_nlink = None continue @@ -287,10 +289,11 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): elif isinstance(line, asmblock.AsmRaw): cur_block.addline(line) block_to_nlink = cur_block - elif isinstance(line, asmblock.AsmLabel): + elif isinstance(line, LocKey): if block_to_nlink: cur_block.addto( - asmblock.AsmConstraint(line, C_NEXT)) + asmblock.AsmConstraint(line, C_NEXT) + ) block_to_nlink = None state = STATE_NO_BLOC continue @@ -305,7 +308,7 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): raise RuntimeError("Cannot have breakflow in delayslot") if line.dstflow(): for dst in line.getdstflow(symbol_pool): - if not isinstance(dst, m2_expr.ExprId): + if not isinstance(dst, ExprId): continue if dst in mnemo.regs.all_regs_ids: continue @@ -319,10 +322,10 @@ def parse_txt(mnemo, attrib, txt, symbol_pool=None): raise RuntimeError("unknown class %s" % line.__class__) i += 1 - for block in blocks: + for block in asmcfg.blocks: # Fix multiple constraints block.fix_constraints() # Log block asmblock.log_asmblock.info(block) - return blocks, symbol_pool + return asmcfg, symbol_pool diff --git a/miasm2/core/sembuilder.py b/miasm2/core/sembuilder.py index 8d6d3e07..530685db 100644 --- a/miasm2/core/sembuilder.py +++ b/miasm2/core/sembuilder.py @@ -139,14 +139,22 @@ class SemBuilder(object): return self._functions.copy() @staticmethod - def _create_labels(lbl_else=False): + def _create_labels(loc_else=False): """Return the AST standing for label creations - @lbl_else (optional): if set, create a label 'lbl_else'""" - lbl_end = "lbl_end = ExprId(ir.get_next_label(instr), ir.IRDst.size)" - out = ast.parse(lbl_end).body - out += ast.parse("lbl_if = ExprId(ir.gen_label(), ir.IRDst.size)").body - if lbl_else: - out += ast.parse("lbl_else = ExprId(ir.gen_label(), ir.IRDst.size)").body + @loc_else (optional): if set, create a label 'loc_else'""" + loc_end = "loc_end = ir.get_next_loc_key(instr)" + loc_end_expr = "loc_end_expr = ExprLoc(loc_end, ir.IRDst.size)" + out = ast.parse(loc_end).body + out += ast.parse(loc_end_expr).body + loc_if = "loc_if = ir.symbol_pool.gen_loc_key()" + loc_if_expr = "loc_if_expr = ExprLoc(loc_if, ir.IRDst.size)" + out += ast.parse(loc_if).body + out += ast.parse(loc_if_expr).body + if loc_else: + loc_else = "loc_else = ir.symbol_pool.gen_loc_key()" + loc_else_expr = "loc_else_expr = ExprLoc(loc_else, ir.IRDst.size)" + out += ast.parse(loc_else).body + out += ast.parse(loc_else_expr).body return out def _parse_body(self, body, argument_names): @@ -195,20 +203,20 @@ class SemBuilder(object): real_body.append(statement) elif isinstance(statement, ast.If): - # Create jumps : ir.IRDst = lbl_if if cond else lbl_end + # Create jumps : ir.IRDst = loc_if if cond else loc_end # if .. else .. are also handled cond = statement.test - real_body += self._create_labels(lbl_else=True) + real_body += self._create_labels(loc_else=True) - lbl_end = ast.Name(id='lbl_end', ctx=ast.Load()) - lbl_if = ast.Name(id='lbl_if', ctx=ast.Load()) - lbl_else = ast.Name(id='lbl_else', ctx=ast.Load()) \ - if statement.orelse else lbl_end + loc_end = ast.Name(id='loc_end_expr', ctx=ast.Load()) + loc_if = ast.Name(id='loc_if_expr', ctx=ast.Load()) + loc_else = ast.Name(id='loc_else_expr', ctx=ast.Load()) \ + if statement.orelse else loc_end dst = ast.Call(func=ast.Name(id='ExprCond', ctx=ast.Load()), args=[cond, - lbl_if, - lbl_else], + loc_if, + loc_else], keywords=[], starargs=None, kwargs=None) @@ -230,10 +238,10 @@ class SemBuilder(object): kwargs=None)) # Create the new blocks - elements = [(statement.body, 'lbl_if')] + elements = [(statement.body, 'loc_if')] if statement.orelse: - elements.append((statement.orelse, 'lbl_else')) - for content, lbl_name in elements: + elements.append((statement.orelse, 'loc_else')) + for content, loc_name in elements: sub_blocks, sub_body = self._parse_body(content, argument_names) if len(sub_blocks) > 1: @@ -242,7 +250,7 @@ class SemBuilder(object): ## Close the last block jmp_end = ast.Call(func=ast.Name(id='ExprAff', ctx=ast.Load()), - args=[IRDst, lbl_end], + args=[IRDst, loc_end], keywords=[], starargs=None, kwargs=None) @@ -261,16 +269,14 @@ class SemBuilder(object): ## Replace the block with a call to 'IRBlock' - lbl_if_name = ast.Attribute(value=ast.Name(id=lbl_name, - ctx=ast.Load()), - attr='name', ctx=ast.Load()) + loc_if_name = ast.Name(id=loc_name, ctx=ast.Load()) assignblks = ast.List(elts=[assignblk], ctx=ast.Load()) sub_blocks[-1] = ast.Call(func=ast.Name(id='IRBlock', ctx=ast.Load()), - args=[lbl_if_name, + args=[loc_if_name, assignblks], keywords=[], starargs=None, diff --git a/miasm2/expression/expression.py b/miasm2/expression/expression.py index 54cd5a2d..5ea596ae 100644 --- a/miasm2/expression/expression.py +++ b/miasm2/expression/expression.py @@ -19,6 +19,7 @@ # IR components are : # - ExprInt # - ExprId +# - ExprLoc # - ExprAff # - ExprCond # - ExprMem @@ -48,12 +49,13 @@ TOK_POS_STRICT = "Spos" # Hashing constants EXPRINT = 1 EXPRID = 2 -EXPRAFF = 3 -EXPRCOND = 4 -EXPRMEM = 5 -EXPROP = 6 -EXPRSLICE = 7 -EXPRCOMPOSE = 8 +EXPRLOC = 3 +EXPRAFF = 4 +EXPRCOND = 5 +EXPRMEM = 6 +EXPROP = 7 +EXPRSLICE = 8 +EXPRCOMPOSE = 9 priorities_list = [ @@ -115,6 +117,8 @@ class DiGraphExpr(DiGraph): return node.op elif isinstance(node, ExprId): return node.name + elif isinstance(node, ExprLoc): + return "%s" % node.loc_key elif isinstance(node, ExprMem): return "@%d" % node.size elif isinstance(node, ExprCompose): @@ -141,6 +145,32 @@ class DiGraphExpr(DiGraph): return "" + +class LocKey(object): + def __init__(self, key): + self._key = key + + key = property(lambda self: self._key) + + def __hash__(self): + return hash(self._key) + + def __eq__(self, other): + if self is other: + return True + if self.__class__ is not other.__class__: + return False + return self.key == other.key + + def __ne__(self, other): + return not self.__eq__(other) + + def __repr__(self): + return "<%s %d>" % (self.__class__.__name__, self._key) + + def __str__(self): + return "loc_%d" % self.key + # IR definitions class Expr(object): @@ -383,6 +413,9 @@ class Expr(object): def is_id(self, name=None): return False + def is_loc(self, label=None): + return False + def is_aff(self): return False @@ -532,6 +565,7 @@ class ExprId(Expr): if size is None: warnings.warn('DEPRECATION WARNING: size is a mandatory argument: use ExprId(name, SIZE)') size = 32 + assert isinstance(name, str) super(ExprId, self).__init__(size) self._name = name @@ -584,6 +618,68 @@ class ExprId(Expr): return True +class ExprLoc(Expr): + + """An ExprLoc represent a Label in Miasm IR. + """ + + __slots__ = Expr.__slots__ + ["_loc_key"] + + def __init__(self, loc_key, size): + """Create an identifier + @loc_key: int, label loc_key + @size: int, identifier's size + """ + assert isinstance(loc_key, LocKey) + super(ExprLoc, self).__init__(size) + self._loc_key = loc_key + + loc_key= property(lambda self: self._loc_key) + + def __reduce__(self): + state = self._loc_key, self._size + return self.__class__, state + + def __new__(cls, loc_key, size): + return Expr.get_object(cls, (loc_key, size)) + + def __str__(self): + return "label_%d" % self._loc_key.key + + def get_r(self, mem_read=False, cst_read=False): + return set() + + def get_w(self): + return set() + + def _exprhash(self): + return hash((EXPRLOC, self._loc_key, self._size)) + + def _exprrepr(self): + return "%s(%r, %d)" % (self.__class__.__name__, self._loc_key, self._size) + + def __contains__(self, expr): + return self == expr + + @visit_chk + def visit(self, callback, test_visit=None): + return self + + def copy(self): + return ExprLoc(self._loc_key, self._size) + + def depth(self): + return 1 + + def graph_recursive(self, graph): + graph.add_node(self) + + def is_loc(self, loc_key=None): + if loc_key is not None and self._loc_key != loc_key: + return False + return True + + class ExprAff(Expr): """An ExprAff represent an affection from an Expression to another one. @@ -1226,10 +1322,11 @@ class ExprCompose(Expr): # Expression order for comparaison EXPR_ORDER_DICT = {ExprId: 1, - ExprCond: 2, - ExprMem: 3, - ExprOp: 4, - ExprSlice: 5, + ExprLoc: 2, + ExprCond: 3, + ExprMem: 4, + ExprOp: 5, + ExprSlice: 6, ExprCompose: 7, ExprInt: 8, } @@ -1289,6 +1386,11 @@ def compare_exprs(expr1, expr2): if ret: return ret return cmp(expr1.size, expr2.size) + elif cls1 == ExprLoc: + ret = cmp(expr1.loc_key, expr2.loc_key) + if ret: + return ret + return cmp(expr1.size, expr2.size) elif cls1 == ExprAff: raise NotImplementedError( "Comparaison from an ExprAff not yet implemented") @@ -1379,11 +1481,19 @@ def ExprInt_from(expr, i): def get_expr_ids_visit(expr, ids): """Visitor to retrieve ExprId in @expr @expr: Expr""" - if isinstance(expr, ExprId): + if expr.is_id(): ids.add(expr) return expr +def get_expr_locs_visit(expr, locs): + """Visitor to retrieve ExprLoc in @expr + @expr: Expr""" + if expr.is_loc(): + locs.add(expr) + return expr + + def get_expr_ids(expr): """Retrieve ExprId in @expr @expr: Expr""" @@ -1392,6 +1502,14 @@ def get_expr_ids(expr): return ids +def get_expr_locs(expr): + """Retrieve ExprLoc in @expr + @expr: Expr""" + locs = set() + expr.visit(lambda x: get_expr_locs_visit(x, locs)) + return locs + + def test_set(expr, pattern, tks, result): """Test if v can correspond to e. If so, update the context in result. Otherwise, return False @@ -1431,6 +1549,9 @@ def match_expr(expr, pattern, tks, result=None): elif expr.is_id(): return test_set(expr, pattern, tks, result) + elif expr.is_loc(): + return test_set(expr, pattern, tks, result) + elif expr.is_op(): # expr need to be the same operation than pattern diff --git a/miasm2/expression/expression_helper.py b/miasm2/expression/expression_helper.py index 722d169d..2fe5e26d 100644 --- a/miasm2/expression/expression_helper.py +++ b/miasm2/expression/expression_helper.py @@ -268,6 +268,9 @@ class Variables_Identifier(object): elif isinstance(expr, m2_expr.ExprId): pass + elif isinstance(expr, m2_expr.ExprLoc): + pass + elif isinstance(expr, m2_expr.ExprMem): self.find_variables_rec(expr.arg) @@ -552,7 +555,8 @@ def possible_values(expr): # Terminal expression if (isinstance(expr, m2_expr.ExprInt) or - isinstance(expr, m2_expr.ExprId)): + isinstance(expr, m2_expr.ExprId) or + isinstance(expr, m2_expr.ExprLoc)): consvals.add(ConstrainedValue(frozenset(), expr)) # Unary expression elif isinstance(expr, m2_expr.ExprSlice): diff --git a/miasm2/expression/parser.py b/miasm2/expression/parser.py index b3f3af1c..cbfd58d0 100644 --- a/miasm2/expression/parser.py +++ b/miasm2/expression/parser.py @@ -1,6 +1,6 @@ import pyparsing -from miasm2.expression.expression import ExprInt, ExprId, ExprSlice, ExprMem, \ - ExprCond, ExprCompose, ExprOp, ExprAff +from miasm2.expression.expression import ExprInt, ExprId, ExprLoc, ExprSlice, \ + ExprMem, ExprCond, ExprCompose, ExprOp, ExprAff, LocKey integer = pyparsing.Word(pyparsing.nums).setParseAction(lambda t: int(t[0])) @@ -16,6 +16,7 @@ str_int = str_int_pos | str_int_neg STR_EXPRINT = pyparsing.Suppress("ExprInt") STR_EXPRID = pyparsing.Suppress("ExprId") +STR_EXPRLOC = pyparsing.Suppress("ExprLoc") STR_EXPRSLICE = pyparsing.Suppress("ExprSlice") STR_EXPRMEM = pyparsing.Suppress("ExprMem") STR_EXPRCOND = pyparsing.Suppress("ExprCond") @@ -23,11 +24,17 @@ STR_EXPRCOMPOSE = pyparsing.Suppress("ExprCompose") STR_EXPROP = pyparsing.Suppress("ExprOp") STR_EXPRAFF = pyparsing.Suppress("ExprAff") +LOCKEY = pyparsing.Suppress("LocKey") + STR_COMMA = pyparsing.Suppress(",") LPARENTHESIS = pyparsing.Suppress("(") RPARENTHESIS = pyparsing.Suppress(")") +T_INF = pyparsing.Suppress("<") +T_SUP = pyparsing.Suppress(">") + + string_quote = pyparsing.QuotedString(quoteChar="'", escChar='\\', escQuote='\\') string_dquote = pyparsing.QuotedString(quoteChar='"', escChar='\\', escQuote='\\') @@ -36,26 +43,33 @@ string = string_quote | string_dquote expr = pyparsing.Forward() -expr_int = pyparsing.Group(STR_EXPRINT + LPARENTHESIS + str_int + STR_COMMA + str_int + RPARENTHESIS) -expr_id = pyparsing.Group(STR_EXPRID + LPARENTHESIS + string + STR_COMMA + str_int + RPARENTHESIS) -expr_slice = pyparsing.Group(STR_EXPRSLICE + LPARENTHESIS + expr + STR_COMMA + str_int + STR_COMMA + str_int + RPARENTHESIS) -expr_mem = pyparsing.Group(STR_EXPRMEM + LPARENTHESIS + expr + STR_COMMA + str_int + RPARENTHESIS) -expr_cond = pyparsing.Group(STR_EXPRCOND + LPARENTHESIS + expr + STR_COMMA + expr + STR_COMMA + expr + RPARENTHESIS) -expr_compose = pyparsing.Group(STR_EXPRCOMPOSE + LPARENTHESIS + pyparsing.delimitedList(expr, delim=',') + RPARENTHESIS) -expr_op = pyparsing.Group(STR_EXPROP + LPARENTHESIS + string + STR_COMMA + pyparsing.delimitedList(expr, delim=',') + RPARENTHESIS) -expr_aff = pyparsing.Group(STR_EXPRAFF + LPARENTHESIS + expr + STR_COMMA + expr + RPARENTHESIS) - -expr << (expr_int | expr_id | expr_slice | expr_mem | expr_cond | \ +expr_int = STR_EXPRINT + LPARENTHESIS + str_int + STR_COMMA + str_int + RPARENTHESIS +expr_id = STR_EXPRID + LPARENTHESIS + string + STR_COMMA + str_int + RPARENTHESIS +expr_loc = STR_EXPRLOC + LPARENTHESIS + T_INF + LOCKEY + str_int + T_SUP + STR_COMMA + str_int + RPARENTHESIS +expr_slice = STR_EXPRSLICE + LPARENTHESIS + expr + STR_COMMA + str_int + STR_COMMA + str_int + RPARENTHESIS +expr_mem = STR_EXPRMEM + LPARENTHESIS + expr + STR_COMMA + str_int + RPARENTHESIS +expr_cond = STR_EXPRCOND + LPARENTHESIS + expr + STR_COMMA + expr + STR_COMMA + expr + RPARENTHESIS +expr_compose = STR_EXPRCOMPOSE + LPARENTHESIS + pyparsing.delimitedList(expr, delim=',') + RPARENTHESIS +expr_op = STR_EXPROP + LPARENTHESIS + string + STR_COMMA + pyparsing.delimitedList(expr, delim=',') + RPARENTHESIS +expr_aff = STR_EXPRAFF + LPARENTHESIS + expr + STR_COMMA + expr + RPARENTHESIS + +expr << (expr_int | expr_id | expr_loc | expr_slice | expr_mem | expr_cond | \ expr_compose | expr_op | expr_aff) -expr_int.setParseAction(lambda t: ExprInt(*t[0])) -expr_id.setParseAction(lambda t: ExprId(*t[0])) -expr_slice.setParseAction(lambda t: ExprSlice(*t[0])) -expr_mem.setParseAction(lambda t: ExprMem(*t[0])) -expr_cond.setParseAction(lambda t: ExprCond(*t[0])) -expr_compose.setParseAction(lambda t: ExprCompose(*t[0])) -expr_op.setParseAction(lambda t: ExprOp(*t[0])) -expr_aff.setParseAction(lambda t: ExprAff(*t[0])) +def parse_loc_key(t): + assert len(t) == 2 + loc_key, size = LocKey(t[0]), t[1] + return ExprLoc(loc_key, size) + +expr_int.setParseAction(lambda t: ExprInt(*t)) +expr_id.setParseAction(lambda t: ExprId(*t)) +expr_loc.setParseAction(parse_loc_key) +expr_slice.setParseAction(lambda t: ExprSlice(*t)) +expr_mem.setParseAction(lambda t: ExprMem(*t)) +expr_cond.setParseAction(lambda t: ExprCond(*t)) +expr_compose.setParseAction(lambda t: ExprCompose(*t)) +expr_op.setParseAction(lambda t: ExprOp(*t)) +expr_aff.setParseAction(lambda t: ExprAff(*t)) def str_to_expr(str_in): diff --git a/miasm2/ir/ir.py b/miasm2/ir/ir.py index 1c6895e0..0a7d1d84 100644 --- a/miasm2/ir/ir.py +++ b/miasm2/ir/ir.py @@ -23,11 +23,10 @@ from itertools import chain import miasm2.expression.expression as m2_expr from miasm2.expression.expression_helper import get_missing_interval -from miasm2.core.asmblock import AsmSymbolPool, expr_is_label, AsmLabel, \ - AsmBlock +from miasm2.core.asmblock import AsmSymbolPool, AsmBlock, \ + AsmConstraint, AsmBlockBad from miasm2.core.graph import DiGraph - class AssignBlock(object): """Represent parallel IR assignment, such as: EAX = EBX @@ -264,16 +263,16 @@ class IRBlock(object): Stand for an intermediate representation basic block. """ - __slots__ = ["label", "_assignblks", "_dst", "_dst_linenb"] + __slots__ = ["_loc_key", "_assignblks", "_dst", "_dst_linenb"] - def __init__(self, label, assignblks): + def __init__(self, loc_key, assignblks): """ - @label: AsmLabel of the IR basic block + @loc_key: LocKey of the IR basic block @assignblks: list of AssignBlock """ - assert isinstance(label, AsmLabel) - self.label = label + assert isinstance(loc_key, m2_expr.LocKey) + self._loc_key = loc_key for assignblk in assignblks: assert isinstance(assignblk, AssignBlock) self._assignblks = tuple(assignblks) @@ -281,6 +280,13 @@ class IRBlock(object): self._dst_linenb = None + def get_label(self): + warnings.warn('DEPRECATION WARNING: use ".loc_key" instead of ".label"') + return self.loc_key + + loc_key = property(lambda self:self._loc_key) + label = property(get_label) + @property def assignblks(self): return self._assignblks @@ -340,7 +346,7 @@ class IRBlock(object): else: new_assignblk[dst] = src irs.append(AssignBlock(new_assignblk, assignblk.instr)) - return IRBlock(self.label, irs) + return IRBlock(self.loc_key, irs) @property def dst_linenb(self): @@ -351,7 +357,7 @@ class IRBlock(object): def __str__(self): out = [] - out.append('%s' % self.label) + out.append('loc_key_%s' % self.loc_key.key) for assignblk in self: for dst, src in assignblk.iteritems(): out.append('\t%s = %s' % (dst, src)) @@ -378,7 +384,7 @@ class IRBlock(object): for dst, src in assignblk.iteritems(): new_assignblk[mod_dst(dst)] = mod_src(src) assignblks.append(AssignBlock(new_assignblk, assignblk.instr)) - return IRBlock(self.label, assignblks) + return IRBlock(self.loc_key, assignblks) class irbloc(IRBlock): @@ -387,33 +393,53 @@ class irbloc(IRBlock): Use IRBlock instead of irbloc """ - def __init__(self, label, irs, lines=None): + def __init__(self, loc_key, irs, lines=None): warnings.warn('DEPRECATION WARNING: use "IRBlock" instead of "irblock"') - super(irbloc, self).__init__(label, irs) + super(irbloc, self).__init__(loc_key, irs) class DiGraphIR(DiGraph): """DiGraph for IR instances""" - def __init__(self, blocks, *args, **kwargs): + def __init__(self, blocks, symbol_pool=None, *args, **kwargs): """Instanciate a DiGraphIR @blocks: IR blocks """ + self.symbol_pool = symbol_pool self._blocks = blocks super(DiGraphIR, self).__init__(*args, **kwargs) + def _expr_loc_to_symb(self, expr): + if not expr.is_loc(): + return expr + if self.symbol_pool is None: + name = str(expr) + else: + name = self.symbol_pool.loc_key_to_name(expr.loc_key) + return m2_expr.ExprId(name, expr.size) + def node2lines(self, node): - yield self.DotCellDescription(text=str(node.name), - attr={'align': 'center', - 'colspan': 2, - 'bgcolor': 'grey'}) + if self.symbol_pool is None: + node_name = str(node) + else: + node_name = self.symbol_pool.loc_key_to_name(node) + yield self.DotCellDescription( + text="%s" % node_name, + attr={ + 'align': 'center', + 'colspan': 2, + 'bgcolor': 'grey', + } + ) if node not in self._blocks: yield [self.DotCellDescription(text="NOT PRESENT", attr={})] raise StopIteration for i, assignblk in enumerate(self._blocks[node]): for dst, src in assignblk.iteritems(): - line = "%s = %s" % (dst, src) + new_src = src.visit(self._expr_loc_to_symb) + new_dst = dst.visit(self._expr_loc_to_symb) + line = "%s = %s" % (new_dst, new_src) if self._dot_offset: yield [self.DotCellDescription(text="%-4d" % i, attr={}), self.DotCellDescription(text=line, attr={})] @@ -427,11 +453,10 @@ class DiGraphIR(DiGraph): src_irdst = self._blocks[src].dst edge_color = "blue" if isinstance(src_irdst, m2_expr.ExprCond): - if (expr_is_label(src_irdst.src1) and - src_irdst.src1.name == dst): + src1, src2 = src_irdst.src1, src_irdst.src2 + if src1.is_loc(dst): edge_color = "limegreen" - elif (expr_is_label(src_irdst.src2) and - src_irdst.src2.name == dst): + elif src2.is_loc(dst): edge_color = "red" return {"color": edge_color} @@ -482,19 +507,18 @@ class IntermediateRepresentation(object): irs = [] for assignblk in irb: irs.append(AssignBlock(assignblk, instr)) - extra_irblocks[index] = IRBlock(irb.label, irs) + extra_irblocks[index] = IRBlock(irb.loc_key, irs) assignblk = AssignBlock(ir_bloc_cur, instr) return assignblk, extra_irblocks - def get_label(self, addr): - """Transforms an ExprId/ExprInt/label/int into a label - @addr: an ExprId/ExprInt/label/int""" + def get_loc_key(self, addr): + """Transforms an ExprId/ExprInt/loc_key/int into a loc_key + @addr: an ExprId/ExprInt/loc_key/int""" - if (isinstance(addr, m2_expr.ExprId) and - isinstance(addr.name, AsmLabel)): - addr = addr.name - if isinstance(addr, AsmLabel): + if isinstance(addr, m2_expr.LocKey): return addr + elif isinstance(addr, m2_expr.ExprLoc): + return addr.loc_key try: addr = int(addr) @@ -504,11 +528,13 @@ class IntermediateRepresentation(object): return self.symbol_pool.getby_offset_create(addr) def get_block(self, addr): - """Returns the irbloc associated to an ExprId/ExprInt/label/int - @addr: an ExprId/ExprInt/label/int""" + """Returns the irbloc associated to an ExprId/ExprInt/loc_key/int + @addr: an ExprId/ExprInt/loc_key/int""" - label = self.get_label(addr) - return self.blocks.get(label, None) + loc_key = self.get_loc_key(addr) + if loc_key is None: + return None + return self.blocks.get(loc_key, None) def get_bloc(self, addr): """ @@ -518,16 +544,21 @@ class IntermediateRepresentation(object): warnings.warn('DEPRECATION WARNING: use "get_block" instead of "get_bloc"') return self.get_block(addr) - def add_instr(self, line, addr=0, gen_pc_updt=False): - block = AsmBlock(self.gen_label()) + def add_instr(self, line, loc_key=None, gen_pc_updt=False): + if loc_key is None: + loc_key = self.symbol_pool.gen_loc_key() + block = AsmBlock(loc_key) block.lines = [line] self.add_block(block, gen_pc_updt) + return loc_key def getby_offset(self, offset): out = set() for irb in self.blocks.values(): for assignblk in irb: instr = assignblk.instr + if instr is None: + continue if instr.offset <= offset < instr.offset + instr.l: out.add(irb) return out @@ -593,24 +624,26 @@ class IntermediateRepresentation(object): @gen_pc_updt: insert PC update effects between instructions """ - label = None + loc_key = block.loc_key ir_blocks_all = [] + + assignments = [] for instr in block.lines: - if label is None: + if loc_key is None: assignments = [] - label = self.get_instr_label(instr) + loc_key = self.get_loc_key_for_instr(instr) split = self.add_instr_to_irblock(block, instr, assignments, ir_blocks_all, gen_pc_updt) if split: - ir_blocks_all.append(IRBlock(label, assignments)) - label = None + ir_blocks_all.append(IRBlock(loc_key, assignments)) + loc_key = None assignments = [] - if label is not None: - ir_blocks_all.append(IRBlock(label, assignments)) + if loc_key is not None: + ir_blocks_all.append(IRBlock(loc_key, assignments)) new_ir_blocks_all = self.post_add_block(block, ir_blocks_all) for irblock in new_ir_blocks_all: - self.blocks[irblock.label] = irblock + self.blocks[irblock.loc_key] = irblock return new_ir_blocks_all def add_bloc(self, block, gen_pc_updt=False): @@ -643,15 +676,25 @@ class IntermediateRepresentation(object): for index, irblock in enumerate(ir_blocks): if irblock.dst is not None: continue - next_lbl = block.get_next() - if next_lbl is None: - dst = m2_expr.ExprId(self.get_next_label(block.lines[-1]), - self.pc.size) + next_loc_key = block.get_next() + if next_loc_key is None: + loc_key = None + if block.lines: + line = block.lines[-1] + if line.offset is not None: + loc_key = self.symbol_pool.getby_offset_create(line.offset + line.l) + if loc_key is None: + loc_key = self.symbol_pool.gen_loc_key() + block.add_cst(loc_key, AsmConstraint.c_next, self.symbol_pool) + else: + loc_key = next_loc_key + dst = m2_expr.ExprLoc(loc_key, self.pc.size) + if irblock.assignblks: + instr = irblock.assignblks[-1].instr else: - dst = m2_expr.ExprId(next_lbl, - self.pc.size) - assignblk = AssignBlock({self.IRDst: dst}, irblock[-1].instr) - ir_blocks[index] = IRBlock(irblock.label, list(irblock.assignblks) + [assignblk]) + instr = None + assignblk = AssignBlock({self.IRDst: dst}, instr) + ir_blocks[index] = IRBlock(irblock.loc_key, list(irblock.assignblks) + [assignblk]) def post_add_block(self, block, ir_blocks): self.set_empty_dst_to_next(block, ir_blocks) @@ -659,7 +702,7 @@ class IntermediateRepresentation(object): new_irblocks = [] for irblock in ir_blocks: new_irblock = self.irbloc_fix_regs_for_mode(irblock, self.attrib) - self.blocks[irblock.label] = new_irblock + self.blocks[irblock.loc_key] = new_irblock new_irblocks.append(new_irblock) # Forget graph if any self._graph = None @@ -673,20 +716,22 @@ class IntermediateRepresentation(object): warnings.warn('DEPRECATION WARNING: use "post_add_block" instead of "post_add_bloc"') return self.post_add_block(block, ir_blocks) - def get_instr_label(self, instr): - """Returns the label associated to an instruction + def get_loc_key_for_instr(self, instr): + """Returns the loc_key associated to an instruction @instr: current instruction""" - return self.symbol_pool.getby_offset_create(instr.offset) - def gen_label(self): - # TODO: fix hardcoded offset - label = self.symbol_pool.gen_label() - return label + def gen_loc_key_and_expr(self, size): + """ + Return a loc_key and it's corresponding ExprLoc + @size: size of expression + """ + loc_key = self.symbol_pool.gen_loc_key() + return loc_key, m2_expr.ExprLoc(loc_key, size) - def get_next_label(self, instr): - label = self.symbol_pool.getby_offset_create(instr.offset + instr.l) - return label + def get_next_loc_key(self, instr): + loc_key = self.symbol_pool.getby_offset_create(instr.offset + instr.l) + return loc_key def simplify(self, simplifier): """ @@ -694,14 +739,14 @@ class IntermediateRepresentation(object): @simplifier: ExpressionSimplifier instance """ modified = False - for label, block in self.blocks.iteritems(): + for loc_key, block in self.blocks.iteritems(): assignblks = [] for assignblk in block: new_assignblk = assignblk.simplify(simplifier) if assignblk != new_assignblk: modified = True assignblks.append(new_assignblk) - self.blocks[label] = IRBlock(label, assignblks) + self.blocks[loc_key] = IRBlock(loc_key, assignblks) return modified def replace_expr_in_ir(self, bloc, rep): @@ -728,14 +773,14 @@ class IntermediateRepresentation(object): out = set() while todo: dst = todo.pop() - if expr_is_label(dst): + if dst.is_loc(): done.add(dst) - elif isinstance(dst, (m2_expr.ExprMem, m2_expr.ExprInt)): + elif dst.is_mem() or dst.is_int(): done.add(dst) - elif isinstance(dst, m2_expr.ExprCond): + elif dst.is_cond(): todo.add(dst.src1) todo.add(dst.src2) - elif isinstance(dst, m2_expr.ExprId): + elif dst.is_id(): out.add(dst) else: done.add(dst) @@ -769,15 +814,16 @@ class IntermediateRepresentation(object): """ Gen irbloc digraph """ - self._graph = DiGraphIR(self.blocks) + self._graph = DiGraphIR(self.blocks, self.symbol_pool) for lbl, block in self.blocks.iteritems(): + assert isinstance(lbl, m2_expr.LocKey) self._graph.add_node(lbl) for dst in self.dst_trackback(block): if dst.is_int(): dst_lbl = self.symbol_pool.getby_offset_create(int(dst)) - dst = m2_expr.ExprId(dst_lbl, self.pc.size) - if expr_is_label(dst): - self._graph.add_edge(lbl, dst.name) + dst = m2_expr.ExprLoc(dst_lbl.loc_key, self.pc.size) + if dst.is_loc(): + self._graph.add_edge(lbl, dst.loc_key) @property def graph(self): @@ -789,14 +835,14 @@ class IntermediateRepresentation(object): def remove_empty_assignblks(self): modified = False - for label, block in self.blocks.iteritems(): + for loc_key, block in self.blocks.iteritems(): irs = [] for assignblk in block: if len(assignblk): irs.append(assignblk) else: modified = True - self.blocks[label] = IRBlock(label, irs) + self.blocks[loc_key] = IRBlock(loc_key, irs) return modified def remove_jmp_blocks(self): @@ -814,62 +860,62 @@ class IntermediateRepresentation(object): if len(assignblk) > 1: continue assert set(assignblk.keys()) == set([self.IRDst]) - if len(self.graph.successors(block.label)) != 1: + if len(self.graph.successors(block.loc_key)) != 1: continue - if not expr_is_label(assignblk[self.IRDst]): + if not assignblk[self.IRDst].is_loc(): continue - dst = assignblk[self.IRDst].name - if dst == block.label: + dst = assignblk[self.IRDst].loc_key + if dst == block.loc_key: # Infinite loop block continue - jmp_blocks.add(block.label) + jmp_blocks.add(block.loc_key) # Remove them, relink graph modified = False - for label in jmp_blocks: - block = self.blocks[label] - dst_label = block.dst.name - parents = self.graph.predecessors(block.label) + for loc_key in jmp_blocks: + block = self.blocks[loc_key] + dst_loc_key = block.dst + parents = self.graph.predecessors(block.loc_key) for lbl in parents: parent = self.blocks.get(lbl, None) if parent is None: continue dst = parent.dst - if dst.is_id(block.label): - dst = m2_expr.ExprId(dst_label, dst.size) + if dst.is_id(block.loc_key): + dst = m2_expr.ExprLoc(dst_loc_key, dst.size) - self.graph.discard_edge(lbl, block.label) - self.graph.discard_edge(block.label, dst_label) + self.graph.discard_edge(lbl, block.loc_key) + self.graph.discard_edge(block.loc_key, dst_loc_key) - self.graph.add_uniq_edge(lbl, dst_label) + self.graph.add_uniq_edge(lbl, dst_loc_key) modified = True elif dst.is_cond(): src1, src2 = dst.src1, dst.src2 - if src1.is_id(block.label): - dst = m2_expr.ExprCond(dst.cond, m2_expr.ExprId(dst_label, dst.size), dst.src2) - self.graph.discard_edge(lbl, block.label) - self.graph.discard_edge(block.label, dst_label) - self.graph.add_uniq_edge(lbl, dst_label) + if src1.is_id(block.loc_key): + dst = m2_expr.ExprCond(dst.cond, m2_expr.ExprLoc(dst_loc_key, dst.size), dst.src2) + self.graph.discard_edge(lbl, block.loc_key) + self.graph.discard_edge(block.loc_key, dst_loc_key) + self.graph.add_uniq_edge(lbl, dst_loc_key) modified = True - if src2.is_id(block.label): - dst = m2_expr.ExprCond(dst.cond, dst.src1, m2_expr.ExprId(dst_label, dst.size)) - self.graph.discard_edge(lbl, block.label) - self.graph.discard_edge(block.label, dst_label) - self.graph.add_uniq_edge(lbl, dst_label) + if src2.is_id(block.loc_key): + dst = m2_expr.ExprCond(dst.cond, dst.src1, m2_expr.ExprLoc(dst_loc_key, dst.size)) + self.graph.discard_edge(lbl, block.loc_key) + self.graph.discard_edge(block.loc_key, dst_loc_key) + self.graph.add_uniq_edge(lbl, dst_loc_key) modified = True if dst.src1 == dst.src2: dst = dst.src1 else: continue new_parent = parent.set_dst(dst) - self.blocks[parent.label] = new_parent + self.blocks[parent.loc_key] = new_parent # Remove unlinked useless nodes - for label in jmp_blocks: - if (len(self.graph.predecessors(label)) == 0 and - len(self.graph.successors(label)) == 0): - self.graph.del_node(label) - del self.blocks[label] + for loc_key in jmp_blocks: + if (len(self.graph.predecessors(loc_key)) == 0 and + len(self.graph.successors(loc_key)) == 0): + self.graph.del_node(loc_key) + del self.blocks[loc_key] return modified def merge_blocks(self): @@ -926,6 +972,6 @@ class ir(IntermediateRepresentation): Use IntermediateRepresentation instead of ir """ - def __init__(self, label, irs, lines=None): + def __init__(self, loc_key, irs, lines=None): warnings.warn('DEPRECATION WARNING: use "IntermediateRepresentation" instead of "ir"') - super(ir, self).__init__(label, irs, lines) + super(ir, self).__init__(loc_key, irs, lines) diff --git a/miasm2/ir/symbexec.py b/miasm2/ir/symbexec.py index 4070f261..7ee55f97 100644 --- a/miasm2/ir/symbexec.py +++ b/miasm2/ir/symbexec.py @@ -2,8 +2,8 @@ import warnings import logging from collections import MutableMapping -from miasm2.expression.expression import ExprOp, ExprId, ExprInt, ExprMem, \ - ExprCompose, ExprSlice, ExprCond, ExprAff +from miasm2.expression.expression import ExprOp, ExprId, ExprLoc, ExprInt, \ + ExprMem, ExprCompose, ExprSlice, ExprCond, ExprAff from miasm2.expression.simplifications import expr_simp from miasm2.core import asmblock from miasm2.ir.ir import AssignBlock @@ -17,9 +17,10 @@ log.setLevel(logging.INFO) def get_block(ir_arch, mdis, addr): """Get IRBlock at address @addr""" - lbl = ir_arch.get_label(addr) + lbl = ir_arch.get_loc_key(addr) if not lbl in ir_arch.blocks: - block = mdis.dis_block(lbl.offset) + offset = mdis.symbol_pool.loc_key_to_offset(lbl) + block = mdis.dis_block(offset) ir_arch.add_block(block) irblock = ir_arch.get_block(lbl) if irblock is None: @@ -812,6 +813,7 @@ class SymbolicExecutionEngine(object): self.expr_to_visitor = { ExprInt: self.eval_exprint, ExprId: self.eval_exprid, + ExprLoc: self.eval_exprloc, ExprMem: self.eval_exprmem, ExprSlice: self.eval_exprslice, ExprCond: self.eval_exprcond, @@ -885,10 +887,16 @@ class SymbolicExecutionEngine(object): def eval_exprid(self, expr, **kwargs): """[DEV]: Evaluate an ExprId using the current state""" - if isinstance(expr.name, asmblock.AsmLabel) and expr.name.offset is not None: - ret = ExprInt(expr.name.offset, expr.size) + ret = self.symbols.read(expr) + return ret + + def eval_exprloc(self, expr, **kwargs): + """[DEV]: Evaluate an ExprLoc using the current state""" + offset = self.ir_arch.symbol_pool.loc_key_to_offset(expr.loc_key) + if offset is not None: + ret = ExprInt(offset, expr.size) else: - ret = self.symbols.read(expr) + ret = expr return ret def eval_exprmem(self, expr, **kwargs): @@ -1040,7 +1048,17 @@ class SymbolicExecutionEngine(object): self.dump(mems=False) self.dump(ids=False) print '_' * 80 - return self.eval_expr(self.ir_arch.IRDst) + dst = self.eval_expr(self.ir_arch.IRDst) + + # Best effort to resolve destination as ExprLoc + if dst.is_loc(): + ret = dst + elif dst.is_int(): + label = self.ir_arch.symbol_pool.getby_offset_create(int(dst)) + ret = ExprLoc(label, dst.size) + else: + ret = dst + return ret def run_block_at(self, addr, step=False): """ @@ -1057,14 +1075,14 @@ class SymbolicExecutionEngine(object): """ Symbolic execution starting at @addr @addr: address to execute (int or ExprInt or label) - @lbl_stop: AsmLabel to stop execution on + @lbl_stop: LocKey to stop execution on @step: display intermediate steps """ while True: irblock = self.ir_arch.get_block(addr) if irblock is None: break - if irblock.label == lbl_stop: + if irblock.loc_key == lbl_stop: break addr = self.eval_updt_irblock(irblock, step=step) return addr diff --git a/miasm2/ir/symbexec_top.py b/miasm2/ir/symbexec_top.py index 1e1e76e9..64d428b4 100644 --- a/miasm2/ir/symbexec_top.py +++ b/miasm2/ir/symbexec_top.py @@ -121,14 +121,20 @@ class SymbExecTopNoMem(SymbolicExecutionEngine): def eval_exprid(self, expr, **kwargs): """[DEV]: Evaluate an ExprId using the current state""" - if isinstance(expr.name, asmblock.AsmLabel) and expr.name.offset is not None: - ret = ExprInt(expr.name.offset, expr.size) - elif expr in self.regstop: + if expr in self.regstop: ret = exprid_top(expr) else: ret = self.symbols.read(expr) return ret + def eval_exprloc(self, expr, **kwargs): + offset = self.ir_arch.symbol_pool.loc_key_to_offset(expr.loc_key) + if offset is not None: + ret = ExprInt(offset, expr.size) + else: + ret = expr + return ret + def eval_exprcond(self, expr, **kwargs): """[DEV]: Evaluate an ExprCond using the current state""" cond = self.eval_expr_visitor(expr.cond, **kwargs) diff --git a/miasm2/ir/translators/C.py b/miasm2/ir/translators/C.py index 226f26f1..2f354d47 100644 --- a/miasm2/ir/translators/C.py +++ b/miasm2/ir/translators/C.py @@ -18,6 +18,14 @@ class TranslatorC(Translator): '>>>': 'rot_right', } + def __init__(self, symbol_pool=None, **kwargs): + """Instance a C translator + @symbol_pool: AsmSymbolPool instance + """ + super(TranslatorC, self).__init__(**kwargs) + # symbol pool + self.symbol_pool = symbol_pool + def _size2mask(self, size): """Return a C string corresponding to the size2mask operation, with support for @size <= 128""" @@ -31,8 +39,6 @@ class TranslatorC(Translator): return "0x%x" % mask def from_ExprId(self, expr): - if isinstance(expr.name, asmblock.AsmLabel): - return "0x%x" % expr.name.offset return str(expr) def from_ExprInt(self, expr): @@ -44,16 +50,32 @@ class TranslatorC(Translator): ) return "0x%x" % expr.arg.arg + def from_ExprLoc(self, expr): + loc_key = expr.loc_key + if self.symbol_pool is None: + return str(loc_key) + + offset = self.symbol_pool.loc_key_to_offset(loc_key) + name = self.symbol_pool.loc_key_to_name(loc_key) + + if offset is None: + return name + return "0x%x" % offset + def from_ExprAff(self, expr): - return "%s = %s" % tuple(map(self.from_expr, (expr.dst, expr.src))) + new_dst = self.from_expr(expr.dst) + new_src = self.from_expr(expr.src) + return "%s = %s" % (new_dst, new_src) def from_ExprCond(self, expr): - return "(%s?%s:%s)" % tuple(map(self.from_expr, - (expr.cond, expr.src1, expr.src2))) + new_cond = self.from_expr(expr.cond) + new_src1 = self.from_expr(expr.src1) + new_src2 = self.from_expr(expr.src2) + return "(%s?%s:%s)" % (new_cond, new_src1, new_src2) def from_ExprMem(self, expr): - return "MEM_LOOKUP_%.2d(jitcpu, %s)" % (expr.size, - self.from_expr(expr.arg)) + new_ptr = self.from_expr(expr.arg) + return "MEM_LOOKUP_%.2d(jitcpu, %s)" % (expr.size, new_ptr) def from_ExprOp(self, expr): if len(expr.args) == 1: @@ -63,9 +85,11 @@ class TranslatorC(Translator): self._size2mask(expr.args[0].size), ) elif expr.op in ['cntleadzeros', 'cnttrailzeros']: - return "%s(0x%x, %s)" % (expr.op, - expr.args[0].size, - self.from_expr(expr.args[0])) + return "%s(0x%x, %s)" % ( + expr.op, + expr.args[0].size, + self.from_expr(expr.args[0]) + ) elif expr.op == '!': return "(~ %s)&%s" % ( self.from_expr(expr.args[0]), @@ -78,7 +102,10 @@ class TranslatorC(Translator): expr.op.startswith("fxam_c") or expr.op in ["-", "ftan", "frndint", "f2xm1", "fsin", "fsqrt", "fabs", "fcos", "fchs"]): - return "%s(%s)" % (expr.op, self.from_expr(expr.args[0])) + return "%s(%s)" % ( + expr.op, + self.from_expr(expr.args[0]) + ) else: raise NotImplementedError('Unknown op: %r' % expr.op) @@ -91,10 +118,12 @@ class TranslatorC(Translator): self._size2mask(expr.args[1].size), ) elif expr.op in self.dct_shift: - return 'SHIFT_%s(%d, %s, %s)' % (self.dct_shift[expr.op].upper(), - expr.args[0].size, - self.from_expr(expr.args[0]), - self.from_expr(expr.args[1])) + return 'SHIFT_%s(%d, %s, %s)' % ( + self.dct_shift[expr.op].upper(), + expr.args[0].size, + self.from_expr(expr.args[0]), + self.from_expr(expr.args[1]) + ) elif expr.is_associative() or expr.op in ["%", "/"]: oper = ['(%s&%s)' % ( self.from_expr(arg), @@ -105,9 +134,11 @@ class TranslatorC(Translator): return "((%s)&%s)" % (oper, self._size2mask(expr.args[0].size)) elif expr.op in ['-']: return '(((%s&%s) %s (%s&%s))&%s)' % ( - self.from_expr(expr.args[0]), self._size2mask(expr.args[0].size), + self.from_expr(expr.args[0]), + self._size2mask(expr.args[0].size), str(expr.op), - self.from_expr(expr.args[1]), self._size2mask(expr.args[1].size), + self.from_expr(expr.args[1]), + self._size2mask(expr.args[1].size), self._size2mask(expr.args[0].size) ) elif expr.op in self.dct_rot: @@ -125,21 +156,29 @@ class TranslatorC(Translator): elif (expr.op.startswith("fcom") or expr.op in ["fadd", "fsub", "fdiv", 'fmul', "fscale", "fprem", "fprem_lsb", "fyl2x", "fpatan"]): - return "fpu_%s(%s, %s)" % (expr.op, - self.from_expr(expr.args[0]), - self.from_expr(expr.args[1])) + return "fpu_%s(%s, %s)" % ( + expr.op, + self.from_expr(expr.args[0]), + self.from_expr(expr.args[1]) + ) elif expr.op == "segm": return "segm2addr(jitcpu, %s, %s)" % ( - self.from_expr(expr.args[0]), self.from_expr(expr.args[1])) + self.from_expr(expr.args[0]), + self.from_expr(expr.args[1]) + ) elif expr.op in ['udiv', 'umod', 'idiv', 'imod']: - return '%s%d(%s, %s)' % (expr.op, - expr.args[0].size, - self.from_expr(expr.args[0]), - self.from_expr(expr.args[1])) + return '%s%d(%s, %s)' % ( + expr.op, + expr.args[0].size, + self.from_expr(expr.args[0]), + self.from_expr(expr.args[1]) + ) elif expr.op in ["bcdadd", "bcdadd_cf"]: - return "%s_%d(%s, %s)" % (expr.op, expr.args[0].size, - self.from_expr(expr.args[0]), - self.from_expr(expr.args[1])) + return "%s_%d(%s, %s)" % ( + expr.op, expr.args[0].size, + self.from_expr(expr.args[0]), + self.from_expr(expr.args[1]) + ) else: raise NotImplementedError('Unknown op: %r' % expr.op) @@ -159,9 +198,11 @@ class TranslatorC(Translator): def from_ExprSlice(self, expr): # XXX check mask for 64 bit & 32 bit compat - return "((%s>>%d) &%s)" % (self.from_expr(expr.arg), - expr.start, - self._size2mask(expr.stop - expr.start)) + return "((%s>>%d) &%s)" % ( + self.from_expr(expr.arg), + expr.start, + self._size2mask(expr.stop - expr.start) + ) def from_ExprCompose(self, expr): out = [] @@ -178,10 +219,12 @@ class TranslatorC(Translator): dst_cast = "uint%d_t" % size for index, arg in expr.iter_args(): - out.append("(((%s)(%s & %s)) << %d)" % (dst_cast, - self.from_expr(arg), - self._size2mask(arg.size), - index)) + out.append("(((%s)(%s & %s)) << %d)" % ( + dst_cast, + self.from_expr(arg), + self._size2mask(arg.size), + index) + ) out = ' | '.join(out) return '(' + out + ')' diff --git a/miasm2/ir/translators/python.py b/miasm2/ir/translators/python.py index d7369e9e..e05f5e4d 100644 --- a/miasm2/ir/translators/python.py +++ b/miasm2/ir/translators/python.py @@ -20,6 +20,9 @@ class TranslatorPython(Translator): def from_ExprId(self, expr): return str(expr) + def from_ExprLoc(self, expr): + return str(expr) + def from_ExprMem(self, expr): return "memory(%s, 0x%x)" % (self.from_expr(expr.arg), expr.size / 8) diff --git a/miasm2/ir/translators/smt2.py b/miasm2/ir/translators/smt2.py index 18bcb9bd..f5d633e0 100644 --- a/miasm2/ir/translators/smt2.py +++ b/miasm2/ir/translators/smt2.py @@ -1,7 +1,6 @@ import logging import operator -from miasm2.core.asmblock import AsmLabel from miasm2.ir.translators.translator import Translator from miasm2.expression.smt2_helper import * @@ -120,7 +119,7 @@ class TranslatorSMT2(Translator): # Implemented language __LANG__ = "smt2" - def __init__(self, endianness="<", **kwargs): + def __init__(self, endianness="<", symbol_pool=None, **kwargs): """Instance a SMT2 translator @endianness: (optional) memory endianness """ @@ -129,24 +128,33 @@ class TranslatorSMT2(Translator): self._mem = SMT2Mem(endianness) # map of translated bit vectors self._bitvectors = dict() + # symbol pool + self.symbol_pool = symbol_pool def from_ExprInt(self, expr): return bit_vec_val(expr.arg.arg, expr.size) def from_ExprId(self, expr): - if isinstance(expr.name, AsmLabel): - if expr.name.offset is not None: - return bit_vec_val(str(expr.name.offset), expr.size) - else: - # SMT2-escape expression name - name = "|{}|".format(str(expr.name)) - if name not in self._bitvectors: - self._bitvectors[name] = expr.size - return name - else: - if str(expr) not in self._bitvectors: - self._bitvectors[str(expr)] = expr.size - return str(expr) + if str(expr) not in self._bitvectors: + self._bitvectors[str(expr)] = expr.size + return str(expr) + + def from_ExprLoc(self, expr): + loc_key = expr.loc_key + if self.symbol_pool is None: + if str(loc_key) not in self._bitvectors: + self._bitvectors[str(loc_key)] = expr.size + return str(loc_key) + + offset = self.symbol_pool.loc_key_to_offset(loc_key) + name = self.symbol_pool.loc_key_to_name(loc_key) + + if offset is None: + return bit_vec_val(str(offset), expr.size) + name = "|{}|".format(str(name)) + if name not in self._bitvectors: + self._bitvectors[name] = expr.size + return name def from_ExprMem(self, expr): addr = self.from_expr(expr.arg) diff --git a/miasm2/ir/translators/translator.py b/miasm2/ir/translators/translator.py index e3641843..557fdabe 100644 --- a/miasm2/ir/translators/translator.py +++ b/miasm2/ir/translators/translator.py @@ -53,6 +53,12 @@ class Translator(object): """ raise NotImplementedError("Abstract method") + def from_ExprLoc(self, expr): + """Translate an ExprLoc + @expr: ExprLoc to translate + """ + raise NotImplementedError("Abstract method") + def from_ExprCompose(self, expr): """Translate an ExprCompose @expr: ExprCompose to translate @@ -100,6 +106,7 @@ class Translator(object): # Handle Expr type handlers = {m2_expr.ExprInt: self.from_ExprInt, m2_expr.ExprId: self.from_ExprId, + m2_expr.ExprLoc: self.from_ExprLoc, m2_expr.ExprCompose: self.from_ExprCompose, m2_expr.ExprSlice: self.from_ExprSlice, m2_expr.ExprOp: self.from_ExprOp, diff --git a/miasm2/ir/translators/z3_ir.py b/miasm2/ir/translators/z3_ir.py index 536daff1..d01b73fa 100644 --- a/miasm2/ir/translators/z3_ir.py +++ b/miasm2/ir/translators/z3_ir.py @@ -5,7 +5,6 @@ import operator # Raise an ImportError if z3 is not available WITHOUT actually importing it imp.find_module("z3") -from miasm2.core.asmblock import AsmLabel from miasm2.ir.translators.translator import Translator log = logging.getLogger("translator_z3") @@ -116,7 +115,7 @@ class TranslatorZ3(Translator): # Operations translation trivial_ops = ["+", "-", "/", "%", "&", "^", "|", "*", "<<"] - def __init__(self, endianness="<", **kwargs): + def __init__(self, endianness="<", symbol_pool=None, **kwargs): """Instance a Z3 translator @endianness: (optional) memory endianness """ @@ -126,15 +125,28 @@ class TranslatorZ3(Translator): super(TranslatorZ3, self).__init__(**kwargs) self._mem = Z3Mem(endianness) + # symbol pool + self.symbol_pool = symbol_pool def from_ExprInt(self, expr): return z3.BitVecVal(expr.arg.arg, expr.size) def from_ExprId(self, expr): - if isinstance(expr.name, AsmLabel) and expr.name.offset is not None: - return z3.BitVecVal(expr.name.offset, expr.size) - else: + return z3.BitVec(str(expr), expr.size) + + def from_ExprLoc(self, expr): + if self.symbol_pool is None: + # No symbol_pool, fallback to default name return z3.BitVec(str(expr), expr.size) + loc_key = expr.loc_key + offset = self.symbol_pool.loc_key_to_offset(loc_key) + name = self.symbol_pool.loc_key_to_name(loc_key) + if offset is not None: + return z3.BitVecVal(offset, expr.size) + if name is not None: + return z3.BitVec(name, expr.size) + # fallback to default name + return z3.BitVec(str(loc_key), expr.size) def from_ExprMem(self, expr): addr = self.from_expr(expr.arg) diff --git a/miasm2/jitter/codegen.py b/miasm2/jitter/codegen.py index b10a9257..c6232642 100644 --- a/miasm2/jitter/codegen.py +++ b/miasm2/jitter/codegen.py @@ -2,18 +2,20 @@ Module to generate C code for a given native @block """ -import miasm2.expression.expression as m2_expr +from miasm2.expression.expression import Expr, ExprId, ExprLoc, ExprInt, \ + ExprMem, ExprCond, LocKey from miasm2.ir.ir import IRBlock, AssignBlock -from miasm2.ir.translators import Translator -from miasm2.core.asmblock import expr_is_label, AsmBlockBad, AsmLabel -# Miasm to C translator -TRANSLATOR = Translator.to_language("C") +from miasm2.ir.translators.C import TranslatorC +from miasm2.core.asmblock import AsmBlockBad -SIZE_TO_MASK = {size: TRANSLATOR.from_expr(m2_expr.ExprInt(0, size).mask) +TRANSLATOR_NO_SYMBOL = TranslatorC(symbol_pool=None) + +SIZE_TO_MASK = {size: TRANSLATOR_NO_SYMBOL.from_expr(ExprInt(0, size).mask) for size in (1, 2, 3, 7, 8, 16, 32, 64, 128)} + class Attributes(object): """ @@ -100,26 +102,27 @@ class CGen(object): def __init__(self, ir_arch): self.ir_arch = ir_arch self.PC = self.ir_arch.pc + self.translator = TranslatorC(self.ir_arch.symbol_pool) self.init_arch_C() def init_arch_C(self): """Iinitialize jitter internals""" self.id_to_c_id = {} for reg in self.ir_arch.arch.regs.all_regs_ids: - self.id_to_c_id[reg] = m2_expr.ExprId('mycpu->%s' % reg, reg.size) + self.id_to_c_id[reg] = ExprId('mycpu->%s' % reg, reg.size) self.C_PC = self.id_to_c(self.PC) - @staticmethod - def label_to_jitlabel(lbl): - """Convert AsmLabel to a jitter label name""" - assert lbl.offset is not None - return "jitblock_%X" % lbl.offset + def loc_key_to_jitlabel(self, lbl): + """Convert LocKey to a jitter label name""" + offset = self.ir_arch.symbol_pool.loc_key_to_offset(lbl) + assert offset is not None + return "jitblock_%X" % offset def dst_to_c(self, src): """Translate Expr @src into C code""" - if not isinstance(src, m2_expr.Expr): - src = m2_expr.ExprInt(src, self.PC.size) + if not isinstance(src, Expr): + src = ExprInt(src, self.PC.size) return self.id_to_c(src) def patch_c_id(self, expr): @@ -128,14 +131,14 @@ class CGen(object): def id_to_c(self, expr): """Translate Expr @expr into corresponding C code""" - return TRANSLATOR.from_expr(self.patch_c_id(expr)) + return self.translator.from_expr(self.patch_c_id(expr)) - def add_label_index(self, dst2index, lbl): + def add_label_index(self, dst2index, loc_key): """Insert @lbl to the dictionnary @dst2index with a uniq value - @dst2index: AsmLabel -> uniq value - @lbl: AsmLabel istance""" + @dst2index: LocKey -> uniq value + @loc_key: LocKey istance""" - dst2index[lbl] = len(dst2index) + dst2index[loc_key] = len(dst2index) def assignblk_to_irbloc(self, instr, assignblk): """ @@ -146,10 +149,11 @@ class CGen(object): new_assignblk = dict(assignblk) if self.ir_arch.IRDst not in assignblk: offset = instr.offset + instr.l - dst = m2_expr.ExprInt(offset, self.ir_arch.IRDst.size) + loc_key = self.ir_arch.symbol_pool.getby_offset_create(offset) + dst = ExprLoc(loc_key, self.ir_arch.IRDst.size) new_assignblk[self.ir_arch.IRDst] = dst irs = [AssignBlock(new_assignblk, instr)] - return IRBlock(self.ir_arch.get_instr_label(instr), irs) + return IRBlock(self.ir_arch.get_loc_key_for_instr(instr), irs) def block2assignblks(self, block): """ @@ -167,6 +171,7 @@ class CGen(object): for irblock in irblocks: assert irblock.dst is not None irblocks_list.append(irblocks) + return irblocks_list def add_local_var(self, dst_var, dst_index, expr): @@ -182,7 +187,7 @@ class CGen(object): if size not in dst_index: raise RuntimeError("Unsupported operand size %s", size) var_num = dst_index[size] - dst = m2_expr.ExprId("var_%.2d_%.2d" % (size, var_num), size) + dst = ExprId("var_%.2d_%.2d" % (size, var_num), size) dst_index[size] += 1 dst_var[expr] = dst return dst @@ -198,12 +203,13 @@ class CGen(object): # Prefetch memory read for expr in assignblk.get_r(mem_read=True): - if not isinstance(expr, m2_expr.ExprMem): + if not isinstance(expr, ExprMem): continue var_num = mem_index[expr.size] mem_index[expr.size] += 1 - var = m2_expr.ExprId( - "prefetch_%.2d_%.2d" % (expr.size, var_num), expr.size) + var = ExprId( + "prefetch_%.2d_%.2d" % (expr.size, var_num), expr.size + ) mem_var[expr] = var # Generate memory prefetch @@ -237,7 +243,7 @@ class CGen(object): src = src.replace_expr(prefetchers) if dst is self.ir_arch.IRDst: pass - elif isinstance(dst, m2_expr.ExprId): + elif isinstance(dst, ExprId): new_dst = self.add_local_var(dst_var, dst_index, dst) if dst in self.ir_arch.arch.regs.regs_flt_expr: # Dont mask float affectation @@ -248,9 +254,9 @@ class CGen(object): '%s = (%s)&%s;' % (self.id_to_c(new_dst), self.id_to_c(src), SIZE_TO_MASK[src.size])) - elif isinstance(dst, m2_expr.ExprMem): + elif isinstance(dst, ExprMem): ptr = dst.arg.replace_expr(prefetchers) - new_dst = m2_expr.ExprMem(ptr, dst.size) + new_dst = ExprMem(ptr, dst.size) str_dst = self.id_to_c(new_dst).replace('MEM_LOOKUP', 'MEM_WRITE') c_mem.append('%s, %s);' % (str_dst[:-1], self.id_to_c(src))) else: @@ -282,25 +288,25 @@ class CGen(object): @dst2index: dictionnary to link label to its index """ - if isinstance(expr, m2_expr.ExprCond): + if isinstance(expr, ExprCond): cond = self.id_to_c(expr.cond) src1, src1b = self.traverse_expr_dst(expr.src1, dst2index) src2, src2b = self.traverse_expr_dst(expr.src2, dst2index) return ("((%s)?(%s):(%s))" % (cond, src1, src2), "((%s)?(%s):(%s))" % (cond, src1b, src2b)) - if isinstance(expr, m2_expr.ExprInt): + if isinstance(expr, ExprInt): offset = int(expr) - self.add_label_index(dst2index, offset) - return ("%s" % dst2index[offset], hex(offset)) - if expr_is_label(expr): - label = expr.name - if label.offset != None: - offset = label.offset - self.add_label_index(dst2index, offset) - return ("%s" % dst2index[offset], hex(offset)) - self.add_label_index(dst2index, label) - return ("%s" % dst2index[label], "0") - + loc_key = self.ir_arch.symbol_pool.getby_offset_create(offset) + self.add_label_index(dst2index, loc_key) + return ("%s" % dst2index[loc_key], hex(offset)) + if expr.is_loc(): + loc_key = expr.loc_key + offset = self.ir_arch.symbol_pool.loc_key_to_offset(expr.loc_key) + if offset is not None: + self.add_label_index(dst2index, loc_key) + return ("%s" % dst2index[loc_key], hex(offset)) + self.add_label_index(dst2index, loc_key) + return ("%s" % dst2index[loc_key], "0") dst2index[expr] = -1 return ("-1", self.id_to_c(expr)) @@ -353,24 +359,28 @@ class CGen(object): @attrib: instruction Attributes @instr_offsets: instructions offsets list @dst: potential instruction destination""" - if isinstance(dst, AsmLabel) and dst.offset is None: - # Generate goto for local labels - return ['goto %s;' % dst.name] - offset = None - if isinstance(dst, AsmLabel) and dst.offset is not None: - offset = dst.offset - elif isinstance(dst, (int, long)): - offset = dst + out = [] - if (offset is not None and - offset > attrib.instr.offset and + if isinstance(dst, Expr): + out += self.gen_post_code(attrib) + out.append('BlockDst->address = DST_value;') + out += self.gen_post_instr_checks(attrib) + out.append('\t\treturn JIT_RET_NO_EXCEPTION;') + return out + + assert isinstance(dst, LocKey) + offset = self.ir_arch.symbol_pool.loc_key_to_offset(dst) + if offset is None: + # Generate goto for local labels + name = self.ir_arch.symbol_pool.loc_key_to_name(dst) + return ['goto %s;' % name] + if (offset > attrib.instr.offset and offset in instr_offsets): # Only generate goto for next instructions. # (consecutive instructions) - lbl = self.ir_arch.symbol_pool.getby_offset_create(dst) out += self.gen_post_code(attrib) out += self.gen_post_instr_checks(attrib) - out.append('goto %s;' % self.label_to_jitlabel(lbl)) + out.append('goto %s;' % self.loc_key_to_jitlabel(dst)) else: out += self.gen_post_code(attrib) out.append('BlockDst->address = DST_value;') @@ -466,10 +476,10 @@ class CGen(object): element_read = assignblk.get_r(mem_read=True) # Check mem read - attrib.mem_read = any(isinstance(expr, m2_expr.ExprMem) + attrib.mem_read = any(isinstance(expr, ExprMem) for expr in element_read) # Check mem write - attrib.mem_write = any(isinstance(dst, m2_expr.ExprMem) + attrib.mem_write = any(isinstance(dst, ExprMem) for dst in assignblk) def get_attributes(self, instr, irblocks, log_mn=False, log_regs=False): @@ -520,9 +530,11 @@ class CGen(object): """ instr_offsets = [line.offset for line in block.lines] - instr_offsets.append(self.get_block_post_label(block).offset) + post_label = self.get_block_post_label(block) + post_offset = self.ir_arch.symbol_pool.loc_key_to_offset(post_label) + instr_offsets.append(post_offset) lbl_start = self.ir_arch.symbol_pool.getby_offset_create(instr_offsets[0]) - return (self.CODE_INIT % self.label_to_jitlabel(lbl_start)).split("\n"), instr_offsets + return (self.CODE_INIT % self.loc_key_to_jitlabel(lbl_start)).split("\n"), instr_offsets def gen_irblock(self, instr_attrib, attributes, instr_offsets, irblock): """ @@ -555,8 +567,9 @@ class CGen(object): """ lbl = self.get_block_post_label(block) - dst = self.dst_to_c(lbl.offset) - code = self.CODE_RETURN_NO_EXCEPTION % (self.label_to_jitlabel(lbl), self.C_PC, dst, dst) + offset = self.ir_arch.symbol_pool.loc_key_to_offset(lbl) + dst = self.dst_to_c(offset) + code = self.CODE_RETURN_NO_EXCEPTION % (self.loc_key_to_jitlabel(lbl), self.C_PC, dst, dst) return code.split('\n') def gen_c(self, block, log_mn=False, log_regs=False): @@ -569,23 +582,25 @@ class CGen(object): if isinstance(block, AsmBlockBad): return self.gen_bad_block() irblocks_list = self.block2assignblks(block) - out, instr_offsets = self.gen_init(block) assert len(block.lines) == len(irblocks_list) for instr, irblocks in zip(block.lines, irblocks_list): instr_attrib, irblocks_attributes = self.get_attributes(instr, irblocks, log_mn, log_regs) - for index, irblock in enumerate(irblocks): new_irblock = self.ir_arch.irbloc_fix_regs_for_mode(irblock, self.ir_arch.attrib) - if new_irblock.label.offset is None: + label = new_irblock.loc_key + offset = self.ir_arch.symbol_pool.loc_key_to_offset(label) + if offset is None: + name = self.ir_arch.symbol_pool.loc_key_to_name(label) out.append("%-40s // %.16X %s" % - (str(new_irblock.label.name) + ":", instr.offset, instr)) + (str(name) + ":", instr.offset, instr)) else: out.append("%-40s // %.16X %s" % - (self.label_to_jitlabel(new_irblock.label) + ":", instr.offset, instr)) + (self.loc_key_to_jitlabel(label) + ":", instr.offset, instr)) if index == 0: out += self.gen_pre_code(instr_attrib) out += self.gen_irblock(instr_attrib, irblocks_attributes[index], instr_offsets, new_irblock) out += self.gen_finalize(block) + return ['\t' + line for line in out] diff --git a/miasm2/jitter/jitcore.py b/miasm2/jitter/jitcore.py index 4402ef49..8fd3453e 100644 --- a/miasm2/jitter/jitcore.py +++ b/miasm2/jitter/jitcore.py @@ -17,9 +17,10 @@ # from hashlib import md5 -from miasm2.core.asmblock import disasmEngine, AsmLabel, AsmBlockBad +from miasm2.core.asmblock import disasmEngine, AsmBlockBad from miasm2.core.interval import interval from miasm2.core.utils import BoundedDict +from miasm2.expression.expression import LocKey from miasm2.jitter.csts import * @@ -40,7 +41,7 @@ class JitCore(object): self.arch_name = "%s%s" % (self.ir_arch.arch.name, self.ir_arch.attrib) self.bs = bs self.known_blocs = {} - self.lbl2jitbloc = BoundedDict(self.jitted_block_max_size, + self.loc_key_to_jit_block = BoundedDict(self.jitted_block_max_size, delete_cb=self.jitted_block_delete_cb) self.lbl2bloc = {} self.log_mn = False @@ -75,7 +76,7 @@ class JitCore(object): def clear_jitted_blocks(self): "Reset all jitted blocks" - self.lbl2jitbloc.clear() + self.loc_key_to_jit_block.clear() self.lbl2bloc.clear() self.blocs_mem_interval = interval() @@ -100,8 +101,9 @@ class JitCore(object): cur_block.ad_max = cur_block.lines[-1].offset + cur_block.lines[-1].l else: # 1 byte block for unknown mnemonic - cur_block.ad_min = cur_block.label.offset - cur_block.ad_max = cur_block.label.offset+1 + offset = ir_arch.symbol_pool.loc_key_to_offset(cur_block.loc_key) + cur_block.ad_min = offset + cur_block.ad_max = offset+1 def add_bloc_to_mem_interval(self, vm, block): @@ -125,20 +127,21 @@ class JitCore(object): """Add a block to JiT and JiT it. @block: asm_bloc to add """ - irblocks = self.ir_arch.add_block(block, gen_pc_updt = True) block.blocks = irblocks - self.jitirblocs(block.label, irblocks) + self.jitirblocs(block.loc_key, irblocks) def disbloc(self, addr, vm): """Disassemble a new block and JiT it - @addr: address of the block to disassemble (AsmLabel or int) + @addr: address of the block to disassemble (LocKey or int) @vm: VmMngr instance """ # Get the block - if isinstance(addr, AsmLabel): - addr = addr.offset + if isinstance(addr, LocKey): + addr = self.ir_arch.symbol_pool.loc_key_to_offset(addr) + if addr is None: + raise RuntimeError("Unknown offset for LocKey") # Prepare disassembler self.mdis.lines_wd = self.options["jit_maxline"] @@ -153,7 +156,7 @@ class JitCore(object): print cur_block # Update label -> block - self.lbl2bloc[cur_block.label] = cur_block + self.lbl2bloc[cur_block.loc_key] = cur_block # Store min/max block address needed in jit automod code self.get_bloc_min_max(cur_block) @@ -174,7 +177,7 @@ class JitCore(object): if lbl is None: lbl = getattr(cpu, self.ir_arch.pc.name) - if not lbl in self.lbl2jitbloc: + if not lbl in self.loc_key_to_jit_block: # Need to JiT the block cur_block = self.disbloc(lbl, cpu.vmmngr) if isinstance(cur_block, AsmBlockBad): @@ -188,7 +191,7 @@ class JitCore(object): return lbl # Run the block and update cpu/vmmngr state - return self.exec_wrapper(lbl, cpu, self.lbl2jitbloc.data, breakpoints, + return self.exec_wrapper(lbl, cpu, self.loc_key_to_jit_block.data, breakpoints, self.options["max_exec_per_call"]) def blocs2memrange(self, blocks): @@ -245,16 +248,18 @@ class JitCore(object): try: for irblock in block.blocks: # Remove offset -> jitted block link - if irblock.label.offset in self.lbl2jitbloc: - del(self.lbl2jitbloc[irblock.label.offset]) + offset = self.ir_arch.symbol_pool.loc_key_to_offset(irblock.loc_key) + if offset in self.loc_key_to_jit_block: + del(self.loc_key_to_jit_block[offset]) except AttributeError: # The block has never been translated in IR - if block.label.offset in self.lbl2jitbloc: - del(self.lbl2jitbloc[block.label.offset]) + offset = self.ir_arch.symbol_pool.loc_key_to_offset(block.loc_key) + if offset in self.loc_key_to_jit_block: + del(self.loc_key_to_jit_block[offset]) # Remove label -> block link - del(self.lbl2bloc[block.label]) + del(self.lbl2bloc[block.loc_key]) return modified_blocks @@ -283,7 +288,8 @@ class JitCore(object): @block: asmblock """ block_raw = "".join(line.b for line in block.lines) - block_hash = md5("%X_%s_%s_%s_%s" % (block.label.offset, + offset = self.ir_arch.symbol_pool.loc_key_to_offset(block.loc_key) + block_hash = md5("%X_%s_%s_%s_%s" % (offset, self.arch_name, self.log_mn, self.log_regs, diff --git a/miasm2/jitter/jitcore_cc_base.py b/miasm2/jitter/jitcore_cc_base.py index 4dd8825a..f0a75cf4 100644 --- a/miasm2/jitter/jitcore_cc_base.py +++ b/miasm2/jitter/jitcore_cc_base.py @@ -85,20 +85,20 @@ class JitCore_Cc_Base(JitCore): """ self.codegen = codegen - def label2fname(self, label): + def loc_key_to_filename(self, loc_key): """ - Generate function name from @label - @label: AsmLabel instance + Generate function name from @loc_key + @loc_key: LocKey instance """ - return "block_%s" % self.codegen.label_to_jitlabel(label) + return "block_%s" % self.codegen.loc_key_to_jitlabel(loc_key) - def gen_c_code(self, label, block): + def gen_c_code(self, loc_key, block): """ Return the C code corresponding to the @irblocks - @label: AsmLabel of the block to jit + @loc_key: LocKey of the block to jit @irblocks: list of irblocks """ - f_name = self.label2fname(label) + f_name = self.loc_key_to_filename(loc_key) f_declaration = 'int %s(block_id * BlockDst, JitCpu* jitcpu)' % f_name out = self.codegen.gen_c(block, log_mn=self.log_mn, log_regs=self.log_regs) out = [f_declaration + '{'] + out + ['}\n'] diff --git a/miasm2/jitter/jitcore_gcc.py b/miasm2/jitter/jitcore_gcc.py index ccccc37a..d9da3160 100644 --- a/miasm2/jitter/jitcore_gcc.py +++ b/miasm2/jitter/jitcore_gcc.py @@ -25,12 +25,13 @@ class JitCore_Gcc(JitCore_Cc_Base): del self.states[offset] def load_code(self, label, fname_so): - f_name = self.label2fname(label) + f_name = self.loc_key_to_filename(label) lib = ctypes.cdll.LoadLibrary(fname_so) func = getattr(lib, f_name) addr = ctypes.cast(func, ctypes.c_void_p).value - self.lbl2jitbloc[label.offset] = addr - self.states[label.offset] = lib + offset = self.ir_arch.symbol_pool.loc_key_to_offset(label) + self.loc_key_to_jit_block[offset] = addr + self.states[offset] = lib def add_bloc(self, block): """Add a bloc to JiT and JiT it. @@ -40,7 +41,7 @@ class JitCore_Gcc(JitCore_Cc_Base): fname_out = os.path.join(self.tempdir, "%s.so" % block_hash) if not os.access(fname_out, os.R_OK | os.X_OK): - func_code = self.gen_c_code(block.label, block) + func_code = self.gen_c_code(block.loc_key, block) # Create unique C file fdesc, fname_in = tempfile.mkstemp(suffix=".c") @@ -60,7 +61,7 @@ class JitCore_Gcc(JitCore_Cc_Base): os.rename(fname_tmp, fname_out) os.remove(fname_in) - self.load_code(block.label, fname_out) + self.load_code(block.loc_key, fname_out) @staticmethod def gen_C_source(ir_arch, func_code): diff --git a/miasm2/jitter/jitcore_llvm.py b/miasm2/jitter/jitcore_llvm.py index 452b6d84..5152cf9e 100644 --- a/miasm2/jitter/jitcore_llvm.py +++ b/miasm2/jitter/jitcore_llvm.py @@ -78,12 +78,13 @@ class JitCore_LLVM(jitcore.JitCore): """Add a block to JiT and JiT it. @block: the block to add """ + block_hash = self.hash_block(block) fname_out = os.path.join(self.tempdir, "%s.bc" % block_hash) if not os.access(fname_out, os.R_OK): # Build a function in the context - func = LLVMFunction(self.context, LLVMFunction.canonize_label_name(block.label)) + func = LLVMFunction(self.context, block.loc_key) # Set log level func.log_regs = self.log_regs @@ -114,7 +115,9 @@ class JitCore_LLVM(jitcore.JitCore): else: # The cache file exists: function can be loaded from cache - ptr = self.context.get_ptr_from_cache(fname_out, LLVMFunction.canonize_label_name(block.label)) + ptr = self.context.get_ptr_from_cache(fname_out, block.loc_key) # Store a pointer on the function jitted code - self.lbl2jitbloc[block.label.offset] = ptr + loc_key = block.loc_key + offset = self.ir_arch.symbol_pool.loc_key_to_offset(loc_key) + self.loc_key_to_jit_block[offset] = ptr diff --git a/miasm2/jitter/jitcore_python.py b/miasm2/jitter/jitcore_python.py index 799848ab..785e3fa1 100644 --- a/miasm2/jitter/jitcore_python.py +++ b/miasm2/jitter/jitcore_python.py @@ -34,9 +34,9 @@ class JitCore_Python(jitcore.JitCore): "Preload symbols according to current architecture" self.symbexec.reset_regs() - def jitirblocs(self, label, irblocks): + def jitirblocs(self, loc_key, irblocks): """Create a python function corresponding to an irblocks' group. - @label: the label of the irblocks + @loc_key: the loc_key of the irblocks @irblocks: a gorup of irblocks """ @@ -48,7 +48,7 @@ class JitCore_Python(jitcore.JitCore): vmmngr = cpu.vmmngr # Keep current location in irblocks - cur_label = label + cur_loc_key = loc_key # Required to detect new instructions offsets_jitted = set() @@ -57,13 +57,14 @@ class JitCore_Python(jitcore.JitCore): exec_engine = self.symbexec expr_simp = exec_engine.expr_simp + known_loc_keys = set(irb.loc_key for irb in irblocks) # For each irbloc inside irblocks while True: - # Get the current bloc for irb in irblocks: - if irb.label == cur_label: + if irb.loc_key == cur_loc_key: break + else: raise RuntimeError("Irblocks must end with returning an " "ExprInt instance") @@ -75,7 +76,7 @@ class JitCore_Python(jitcore.JitCore): for assignblk in irb: instr = assignblk.instr # For each new instruction (in assembly) - if instr.offset not in offsets_jitted: + if instr is not None and instr.offset not in offsets_jitted: # Test exceptions vmmngr.check_invalid_code_blocs() vmmngr.check_memory_breakpoint() @@ -120,23 +121,25 @@ class JitCore_Python(jitcore.JitCore): # Manage resulting address if isinstance(ad, m2_expr.ExprInt): return ad.arg.arg - elif isinstance(ad, m2_expr.ExprId): - cur_label = ad.name + elif isinstance(ad, m2_expr.ExprLoc): + cur_loc_key = ad.loc_key else: raise NotImplementedError("Type not handled: %s" % ad) - # Associate myfunc with current label - self.lbl2jitbloc[label.offset] = myfunc + # Associate myfunc with current loc_key + offset = self.ir_arch.symbol_pool.loc_key_to_offset(loc_key) + assert offset is not None + self.loc_key_to_jit_block[offset] = myfunc - def exec_wrapper(self, label, cpu, _lbl2jitbloc, _breakpoints, + def exec_wrapper(self, loc_key, cpu, _loc_key_to_jit_block, _breakpoints, _max_exec_per_call): - """Call the function @label with @cpu - @label: function's label + """Call the function @loc_key with @cpu + @loc_key: function's loc_key @cpu: JitCpu instance """ - # Get Python function corresponding to @label - fc_ptr = self.lbl2jitbloc[label] + # Get Python function corresponding to @loc_key + fc_ptr = self.loc_key_to_jit_block[loc_key] # Execute the function return fc_ptr(cpu) diff --git a/miasm2/jitter/jitload.py b/miasm2/jitter/jitload.py index db486b4f..28200997 100644 --- a/miasm2/jitter/jitload.py +++ b/miasm2/jitter/jitload.py @@ -313,7 +313,6 @@ class jitter(object): Check exceptions before breakpoints.""" self.pc = pc - # Callback called before exec if self.exec_cb is not None: res = self.exec_cb(self) diff --git a/miasm2/jitter/llvmconvert.py b/miasm2/jitter/llvmconvert.py index dd3afed6..2045f083 100644 --- a/miasm2/jitter/llvmconvert.py +++ b/miasm2/jitter/llvmconvert.py @@ -14,7 +14,8 @@ import os from llvmlite import binding as llvm from llvmlite import ir as llvm_ir -import miasm2.expression.expression as m2_expr +from miasm2.expression.expression import ExprId, ExprInt, ExprMem, ExprSlice, \ + ExprCond, ExprLoc, ExprOp, ExprCompose, LocKey import miasm2.jitter.csts as m2_csts import miasm2.core.asmblock as m2_asmblock from miasm2.jitter.codegen import CGen @@ -43,7 +44,7 @@ class LLVMType(llvm_ir.Type): @classmethod def generic(cls, e): "Generic value for execution" - if isinstance(e, m2_expr.ExprInt): + if isinstance(e, ExprInt): return llvm_e.GenericValue.int(LLVMType.IntType(e.size), int(e.arg)) elif isinstance(e, llvm_e.GenericValue): return e @@ -69,6 +70,22 @@ class LLVMContext(): self.target_machine = target.create_target_machine() self.init_exec_engine() + def canonize_label_name(self, label): + """Canonize @label names to a common form. + @label: str or asmlabel instance""" + if isinstance(label, str): + return label + if not isinstance(label, LocKey): + raise ValueError("label must either be str or LocKey") + + offset = self.ir_arch.symbol_pool.loc_key_to_offset(label) + + if offset is None: + name = self.ir_arch.symbol_pool.loc_key_to_name(label) + return "%s" % name + else: + return "label_off_%X" % offset + def optimise_level(self, level=2): """Set the optimisation level to @level from 0 to 2 0: non-optimized @@ -324,6 +341,7 @@ class LLVMContext_JIT(LLVMContext): def get_ptr_from_cache(self, file_name, func_name): "Load @file_name and return a pointer on the jitter @func_name" # We use an empty module to avoid loosing time on function building + func_name = self.canonize_label_name(func_name) empty_module = llvm.parse_assembly("") empty_module.fname_out = file_name @@ -380,6 +398,7 @@ class LLVMFunction(): def __init__(self, llvm_context, name="fc", new_module=True): "Create a new function with name @name" self.llvm_context = llvm_context + name = self.llvm_context.canonize_label_name(name) if new_module: self.llvm_context.new_module() self.mod = self.llvm_context.get_module() @@ -405,7 +424,7 @@ class LLVMFunction(): @label: str or asmlabel @overwrite: if False, do nothing if a bbl with the same name already exists Return the corresponding LLVM Basic Block""" - name = self.canonize_label_name(label) + name = self.llvm_context.canonize_label_name(label) bbl = self.name2bbl.get(name, None) if not overwrite and bbl is not None: return bbl @@ -483,25 +502,9 @@ class LLVMFunction(): var_casted = var self.builder.ret(var_casted) - @staticmethod - def canonize_label_name(label): - """Canonize @label names to a common form. - @label: str or asmlabel instance""" - if isinstance(label, str): - return label - if m2_asmblock.expr_is_label(label): - label = label.name - if isinstance(label, m2_asmblock.AsmLabel): - if label.offset is None: - return "label_%s" % label.name - else: - return "label_%X" % label.offset - else: - raise ValueError("label must either be str or asmlabel") - - def get_basic_bloc_by_label(self, label): + def get_basic_block_by_loc_key(self, loc_key): "Return the bbl corresponding to label, None otherwise" - return self.name2bbl.get(self.canonize_label_name(label), None) + return self.name2bbl.get(self.llvm_context.canonize_label_name(loc_key), None) def global_constant(self, name, value): """ @@ -567,11 +570,11 @@ class LLVMFunction(): # Destination builder = self.builder - if isinstance(dst, m2_expr.ExprId): + if isinstance(dst, ExprId): ptr_casted = self.get_ptr_by_expr(dst) builder.store(src, ptr_casted) - elif isinstance(dst, m2_expr.ExprMem): + elif isinstance(dst, ExprMem): addr = self.add_ir(dst.arg) self.llvm_context.memory_write(self, addr, dst.size, src) else: @@ -624,20 +627,19 @@ class LLVMFunction(): builder = self.builder - if isinstance(expr, m2_expr.ExprInt): + if isinstance(expr, ExprInt): ret = llvm_ir.Constant(LLVMType.IntType(expr.size), int(expr.arg)) self.update_cache(expr, ret) return ret - if isinstance(expr, m2_expr.ExprId): - name = expr.name - if not isinstance(name, str): - # Resolve label - offset = name.offset - ret = llvm_ir.Constant(LLVMType.IntType(expr.size), offset) - self.update_cache(expr, ret) - return ret + if expr.is_loc(): + offset = self.llvm_context.ir_arch.symbol_pool.loc_key_to_offset(expr.loc_key) + ret = llvm_ir.Constant(LLVMType.IntType(expr.size), offset) + self.update_cache(expr, ret) + return ret + if isinstance(expr, ExprId): + name = expr.name try: # If expr.name is already known (args) return self.local_vars[name] @@ -650,7 +652,7 @@ class LLVMFunction(): self.update_cache(expr, var) return var - if isinstance(expr, m2_expr.ExprOp): + if isinstance(expr, ExprOp): op = expr.op if (op in self.op_translate or @@ -857,12 +859,12 @@ class LLVMFunction(): raise NotImplementedError() - if isinstance(expr, m2_expr.ExprMem): + if isinstance(expr, ExprMem): addr = self.add_ir(expr.arg) return self.llvm_context.memory_lookup(self, addr, expr.size) - if isinstance(expr, m2_expr.ExprCond): + if isinstance(expr, ExprCond): # Compute cond cond = self.add_ir(expr.cond) zero_casted = LLVMType.IntType(expr.cond.size)(0) @@ -875,7 +877,7 @@ class LLVMFunction(): self.update_cache(expr, ret) return ret - if isinstance(expr, m2_expr.ExprSlice): + if isinstance(expr, ExprSlice): src = self.add_ir(expr.arg) @@ -901,7 +903,7 @@ class LLVMFunction(): self.update_cache(expr, ret) return ret - if isinstance(expr, m2_expr.ExprCompose): + if isinstance(expr, ExprCompose): args = [] @@ -982,9 +984,9 @@ class LLVMFunction(): builder.position_at_end(then_block) PC = self.llvm_context.PC if isinstance(offset, (int, long)): - offset = self.add_ir(m2_expr.ExprInt(offset, PC.size)) + offset = self.add_ir(ExprInt(offset, PC.size)) self.affect(offset, PC) - self.affect(self.add_ir(m2_expr.ExprInt(1, 8)), m2_expr.ExprId("status", 32)) + self.affect(self.add_ir(ExprInt(1, 8)), ExprId("status", 32)) self.set_ret(offset) builder.position_at_end(merge_block) @@ -1029,9 +1031,9 @@ class LLVMFunction(): builder.position_at_end(then_block) PC = self.llvm_context.PC if isinstance(offset, (int, long)): - offset = self.add_ir(m2_expr.ExprInt(offset, PC.size)) + offset = self.add_ir(ExprInt(offset, PC.size)) self.affect(offset, PC) - self.affect(self.add_ir(m2_expr.ExprInt(1, 8)), m2_expr.ExprId("status", 32)) + self.affect(self.add_ir(ExprInt(1, 8)), ExprId("status", 32)) self.set_ret(offset) builder.position_at_end(merge_block) @@ -1076,9 +1078,9 @@ class LLVMFunction(): for i, solution in enumerate(possible_values(expr)): value = solution.value index = dst2case.get(value, i) - to_eval = to_eval.replace_expr({value: m2_expr.ExprInt(index, value.size)}) + to_eval = to_eval.replace_expr({value: ExprInt(index, value.size)}) dst2case[value] = index - if m2_asmblock.expr_is_int_or_label(value): + if value.is_int() or value.is_loc(): case2dst[i] = value else: case2dst[i] = self.add_ir(value) @@ -1104,13 +1106,14 @@ class LLVMFunction(): # We are no longer in the main stream, deactivate cache self.main_stream = False - if isinstance(dst, m2_expr.ExprInt): - dst = m2_expr.ExprId(self.llvm_context.ir_arch.symbol_pool.getby_offset_create(int(dst)), - dst.size) + if isinstance(dst, ExprInt): + loc_key = self.llvm_context.ir_arch.symbol_pool.getby_offset_create(int(dst)) + dst = ExprLoc(loc_key, dst.size) - if m2_asmblock.expr_is_label(dst): - bbl = self.get_basic_bloc_by_label(dst) - offset = dst.name.offset + if isinstance(dst, ExprLoc): + loc_key = dst.loc_key + bbl = self.get_basic_block_by_loc_key(loc_key) + offset = self.llvm_context.ir_arch.symbol_pool.loc_key_to_offset(loc_key) if bbl is not None: # "local" jump, inside this function if offset is None: @@ -1130,7 +1133,7 @@ class LLVMFunction(): # extern # "extern" jump on a defined offset, return to the caller - dst = self.add_ir(m2_expr.ExprInt(offset, PC.size)) + dst = self.add_ir(ExprInt(offset, PC.size)) # "extern" jump with a computed value, return to the caller assert isinstance(dst, (llvm_ir.Instruction, llvm_ir.Value)) @@ -1142,7 +1145,7 @@ class LLVMFunction(): self.gen_post_code(attrib) self.affect(dst, PC) self.gen_post_instr_checks(attrib, dst) - self.affect(self.add_ir(m2_expr.ExprInt(0, 8)), m2_expr.ExprId("status", 32)) + self.affect(self.add_ir(ExprInt(0, 8)), ExprId("status", 32)) self.set_ret(dst) @@ -1166,7 +1169,7 @@ class LLVMFunction(): # Prefetch memory for element in assignblk.get_r(mem_read=True): - if isinstance(element, m2_expr.ExprMem): + if isinstance(element, ExprMem): self.add_ir(element) # Evaluate expressions @@ -1184,7 +1187,7 @@ class LLVMFunction(): # Update the memory for dst, src in values.iteritems(): - if isinstance(dst, m2_expr.ExprMem): + if isinstance(dst, ExprMem): self.affect(src, dst) # Check memory write exception @@ -1194,7 +1197,7 @@ class LLVMFunction(): # Update registers values for dst, src in values.iteritems(): - if not isinstance(dst, m2_expr.ExprMem): + if not isinstance(dst, ExprMem): self.affect(src, dst) # Check post assignblk exception flags @@ -1235,11 +1238,12 @@ class LLVMFunction(): builder = self.builder m2_exception_flag = self.llvm_context.ir_arch.arch.regs.exception_flags t_size = LLVMType.IntType(m2_exception_flag.size) - self.affect(self.add_ir(m2_expr.ExprInt(1, 8)), - m2_expr.ExprId("status", 32)) + self.affect(self.add_ir(ExprInt(1, 8)), + ExprId("status", 32)) self.affect(t_size(m2_csts.EXCEPT_UNK_MNEMO), m2_exception_flag) - self.set_ret(LLVMType.IntType(64)(asmblock.label.offset)) + offset = self.llvm_context.ir_arch.symbol_pool.loc_key_to_offset(asmblock.loc_key) + self.set_ret(LLVMType.IntType(64)(offset)) def gen_finalize(self, asmblock, codegen): """ @@ -1250,11 +1254,11 @@ class LLVMFunction(): next_label = codegen.get_block_post_label(asmblock) builder = self.builder - builder.position_at_end(self.get_basic_bloc_by_label(next_label)) + builder.position_at_end(self.get_basic_block_by_loc_key(next_label)) # Common code - self.affect(self.add_ir(m2_expr.ExprInt(0, 8)), - m2_expr.ExprId("status", 32)) + self.affect(self.add_ir(ExprInt(0, 8)), + ExprId("status", 32)) # Check if IRDst has been set zero_casted = LLVMType.IntType(codegen.delay_slot_set.size)(0) @@ -1277,14 +1281,15 @@ class LLVMFunction(): PC = self.llvm_context.PC to_ret = self.add_ir(codegen.delay_slot_dst) self.affect(to_ret, PC) - self.affect(self.add_ir(m2_expr.ExprInt(0, 8)), - m2_expr.ExprId("status", 32)) + self.affect(self.add_ir(ExprInt(0, 8)), + ExprId("status", 32)) self.set_ret(to_ret) # Else Block builder.position_at_end(else_block) PC = self.llvm_context.PC - to_ret = LLVMType.IntType(PC.size)(next_label.offset) + next_label_offset = self.llvm_context.ir_arch.symbol_pool.loc_key_to_offset(next_label) + to_ret = LLVMType.IntType(PC.size)(next_label_offset) self.affect(to_ret, PC) self.set_ret(to_ret) @@ -1293,16 +1298,16 @@ class LLVMFunction(): Prototype : f(i8* jitcpu, i8* vmcpu, i8* vmmngr, i8* status)""" # Build function signature - self.my_args.append((m2_expr.ExprId("jitcpu", 32), + self.my_args.append((ExprId("jitcpu", 32), llvm_ir.PointerType(LLVMType.IntType(8)), "jitcpu")) - self.my_args.append((m2_expr.ExprId("vmcpu", 32), + self.my_args.append((ExprId("vmcpu", 32), llvm_ir.PointerType(LLVMType.IntType(8)), "vmcpu")) - self.my_args.append((m2_expr.ExprId("vmmngr", 32), + self.my_args.append((ExprId("vmmngr", 32), llvm_ir.PointerType(LLVMType.IntType(8)), "vmmngr")) - self.my_args.append((m2_expr.ExprId("status", 32), + self.my_args.append((ExprId("status", 32), llvm_ir.PointerType(LLVMType.IntType(8)), "status")) ret_size = 64 @@ -1335,9 +1340,10 @@ class LLVMFunction(): ptr = self.CreateEntryBlockAlloca(eltype, default_value=eltype(0)) self.local_vars_pointers[element.name] = ptr - lbl = codegen.get_block_post_label(asmblock) - instr_offsets.append(lbl.offset) - self.append_basic_block(lbl) + loc_key = codegen.get_block_post_label(asmblock) + offset = self.llvm_context.ir_arch.symbol_pool.loc_key_to_offset(loc_key) + instr_offsets.append(offset) + self.append_basic_block(loc_key) # Add content builder.position_at_end(entry_bbl) @@ -1350,7 +1356,7 @@ class LLVMFunction(): # Pre-create basic blocks for irblock in irblocks: - self.append_basic_block(irblock.label, overwrite=False) + self.append_basic_block(irblock.loc_key, overwrite=False) # Generate the corresponding code for index, irblock in enumerate(irblocks): @@ -1358,8 +1364,7 @@ class LLVMFunction(): irblock, self.llvm_context.ir_arch.attrib) # Set the builder at the begining of the correct bbl - name = self.canonize_label_name(new_irblock.label) - self.builder.position_at_end(self.get_basic_bloc_by_label(name)) + self.builder.position_at_end(self.get_basic_block_by_loc_key(new_irblock.loc_key)) if index == 0: self.gen_pre_code(instr_attrib) @@ -1370,7 +1375,7 @@ class LLVMFunction(): # Branch entry_bbl on first label builder.position_at_end(entry_bbl) - first_label_bbl = self.get_basic_bloc_by_label(asmblock.label) + first_label_bbl = self.get_basic_block_by_loc_key(asmblock.loc_key) builder.branch(first_label_bbl) diff --git a/miasm2/jitter/loader/pe.py b/miasm2/jitter/loader/pe.py index 2fe4cd3f..9bc0ef8b 100644 --- a/miasm2/jitter/loader/pe.py +++ b/miasm2/jitter/loader/pe.py @@ -163,7 +163,8 @@ def vm_load_pe(vm, fdata, align_s=True, load_hdr=True, name="", **kargs): section.size = new_size section.rawsize = new_size section.data = strpatchwork.StrPatchwork( - section.data[:new_size]) + section.data[:new_size] + ) section.offset = section.addr # Last section alignement |