diff options
| author | Camille Mougey <commial@gmail.com> | 2015-02-23 13:35:38 +0100 |
|---|---|---|
| committer | Camille Mougey <commial@gmail.com> | 2015-02-23 13:35:38 +0100 |
| commit | 4713279e25625b33b8867c35d7da686781b8b9a7 (patch) | |
| tree | df358b3d6cc18ca7f8d4060b5266c394a3ce299f | |
| parent | 855bc7be4f06a0ea7eb4c8a6f009b21703272a30 (diff) | |
| parent | 319c5da3966c9d29ab50c241562dc92b9e0fdc27 (diff) | |
| download | miasm-4713279e25625b33b8867c35d7da686781b8b9a7.tar.gz miasm-4713279e25625b33b8867c35d7da686781b8b9a7.zip | |
Merge pull request #83 from serpilliere/expr_immutable
Expr immutable
| -rw-r--r-- | example/ida/depgraph.py | 6 | ||||
| -rw-r--r-- | example/ida/graph_ir.py | 4 | ||||
| -rw-r--r-- | miasm2/arch/arm/sem.py | 5 | ||||
| -rw-r--r-- | miasm2/arch/msp430/sem.py | 5 | ||||
| -rw-r--r-- | miasm2/arch/x86/sem.py | 31 | ||||
| -rw-r--r-- | miasm2/expression/expression.py | 480 | ||||
| -rw-r--r-- | miasm2/expression/expression_helper.py | 6 | ||||
| -rw-r--r-- | miasm2/expression/simplifications_cond.py | 29 | ||||
| -rw-r--r-- | miasm2/ir/ir.py | 9 | ||||
| -rw-r--r-- | miasm2/ir/translators/C.py | 2 |
10 files changed, 314 insertions, 263 deletions
diff --git a/example/ida/depgraph.py b/example/ida/depgraph.py index 65a3c88f..33aaa6cb 100644 --- a/example/ida/depgraph.py +++ b/example/ida/depgraph.py @@ -3,6 +3,8 @@ import sys from idaapi import GraphViewer from miasm2.core.bin_stream_ida import bin_stream_ida from miasm2.core.asmbloc import * +from miasm2.expression import expression as m2_expr + from miasm2.expression.simplifications import expr_simp from miasm2.analysis.machine import Machine from miasm2.analysis.depgraph import DependencyGraph, DependencyGraph_NoMemory @@ -133,8 +135,8 @@ for bloc in blocs: # Simplify affectations for irb in ir_arch.blocs.values(): for irs in irb.irs: - for i, e in enumerate(irs): - e.dst, e.src = expr_simp(e.dst), expr_simp(e.src) + for i, expr in enumerate(irs): + irs[i] = m2_expr.ExprAff(expr_simp(expr.dst), expr_simp(expr.src)) # Build the IRA Graph ir_arch.gen_graph() diff --git a/example/ida/graph_ir.py b/example/ida/graph_ir.py index a1db6cbd..3a936e37 100644 --- a/example/ida/graph_ir.py +++ b/example/ida/graph_ir.py @@ -139,8 +139,8 @@ print "IR ok... %x" % ad for irb in ir_arch.blocs.values(): for irs in irb.irs: - for i, e in enumerate(irs): - e.dst, e.src = expr_simp(e.dst), expr_simp(e.src) + for i, expr in enumerate(irs): + irs[i] = ExprAff(expr_simp(expr.dst), expr_simp(expr.src)) ir_arch.gen_graph() out = ir_arch.graph() diff --git a/miasm2/arch/arm/sem.py b/miasm2/arch/arm/sem.py index 06f6bddf..bbab8d59 100644 --- a/miasm2/arch/arm/sem.py +++ b/miasm2/arch/arm/sem.py @@ -1193,8 +1193,9 @@ class ir_arml(ir): [(args[-1].args[0][1:], 0, 31), (cf, 31, 32)]) elif (args[-1].op in ['<<', '>>', '<<a', 'a>>', '<<<', '>>>'] and isinstance(args[-1].args[-1], ExprId)): - args[-1].args = args[-1].args[:-1] + ( - args[-1].args[-1][:8].zeroExtend(32),) + args[-1] = ExprOp(args[-1].op, + args[-1].args[0], + args[-1].args[-1][:8].zeroExtend(32)) instr_ir, extra_ir = get_mnemo_expr(self, instr, *args) # if self.name.startswith('B'): # return instr_ir, extra_ir diff --git a/miasm2/arch/msp430/sem.py b/miasm2/arch/msp430/sem.py index ac4c7e9c..cec8f36f 100644 --- a/miasm2/arch/msp430/sem.py +++ b/miasm2/arch/msp430/sem.py @@ -432,11 +432,12 @@ class ir_msp430(ir): def mod_sr(self, instr, instr_ir, extra_ir): for i, x in enumerate(instr_ir): - x.src = x.src.replace_expr({SR: composed_sr}) + x = ExprAff(x.dst, x.src.replace_expr({SR: composed_sr})) + instr_ir[i] = x if x.dst != SR: continue xx = ComposeExprAff(composed_sr, x.src) - instr_ir[i:i + 1] = xx + instr_ir[i] = xx for i, x in enumerate(instr_ir): x = ExprAff(x.dst, x.src.replace_expr( {self.pc: ExprInt16(instr.offset + instr.l)})) diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index 52cec344..50cb8f05 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -3515,8 +3515,9 @@ class ir_x86_16(ir): for b in extra_ir: for ir in b.irs: - for e in ir: - e.src = e.src.replace_expr({lbl_next: lbl_end}) + for i, e in enumerate(ir): + src = e.src.replace_expr({lbl_next: lbl_end}) + ir[i] = m2_expr.ExprAff(e.dst, src) cond_bloc = [] cond_bloc.append(m2_expr.ExprAff(c_reg, c_reg - m2_expr.ExprInt_from(c_reg, @@ -3586,19 +3587,21 @@ class ir_x86_64(ir_x86_16): def mod_pc(self, instr, instr_ir, extra_ir): # fix RIP for 64 bit - for i, x in enumerate(instr_ir): - if x.dst != self.pc: - x.dst = x.dst.replace_expr( + for i, expr in enumerate(instr_ir): + dst, src = expr.dst, expr.src + if dst != self.pc: + dst = dst.replace_expr( {self.pc: m2_expr.ExprInt64(instr.offset + instr.l)}) - x = m2_expr.ExprAff(x.dst, x.src.replace_expr( - {self.pc: m2_expr.ExprInt64(instr.offset + instr.l)})) - instr_ir[i] = x + src = src.replace_expr( + {self.pc: m2_expr.ExprInt64(instr.offset + instr.l)}) + instr_ir[i] = m2_expr.ExprAff(dst, src) for b in extra_ir: for irs in b.irs: - for i, x in enumerate(irs): - if x.dst != self.pc: + for i, expr in enumerate(irs): + dst, src = expr.dst, expr.src + if dst != self.pc: new_pc = m2_expr.ExprInt64(instr.offset + instr.l) - x.dst = x.dst.replace_expr({self.pc: new_pc}) - x = m2_expr.ExprAff(x.dst, x.src.replace_expr( - {self.pc: m2_expr.ExprInt64(instr.offset + instr.l)})) - irs[i] = x + dst = dst.replace_expr({self.pc: new_pc}) + src = src.replace_expr( + {self.pc: m2_expr.ExprInt64(instr.offset + instr.l)}) + irs[i] = m2_expr.ExprAff(dst, src) diff --git a/miasm2/expression/expression.py b/miasm2/expression/expression.py index 74d67b5d..fdd342cc 100644 --- a/miasm2/expression/expression.py +++ b/miasm2/expression/expression.py @@ -29,9 +29,31 @@ import itertools +from operator import itemgetter from miasm2.expression.modint import * from miasm2.core.graph import DiGraph +# Define tokens +TOK_INF = "<" +TOK_INF_SIGNED = TOK_INF + "s" +TOK_INF_UNSIGNED = TOK_INF + "u" +TOK_INF_EQUAL = "<=" +TOK_INF_EQUAL_SIGNED = TOK_INF_EQUAL + "s" +TOK_INF_EQUAL_UNSIGNED = TOK_INF_EQUAL + "u" +TOK_EQUAL = "==" +TOK_POS = "pos" +TOK_POS_STRICT = "Spos" + +# Hashing constants +EXPRINT = 1 +EXPRID = 2 +EXPRAFF = 3 +EXPRCOND = 4 +EXPRMEM = 5 +EXPROP = 6 +EXPRSLICE = 5 +EXPRCOMPOSE = 5 + def visit_chk(visitor): "Function decorator launching callback on Expression visit" @@ -45,15 +67,6 @@ def visit_chk(visitor): return e_new2 return wrapped -# Hashing constants -EXPRINT = 1 -EXPRID = 2 -EXPRAFF = 3 -EXPRCOND = 4 -EXPRMEM = 5 -EXPROP = 6 -EXPRSLICE = 5 -EXPRCOMPOSE = 5 # Expression display @@ -108,12 +121,14 @@ class Expr(object): def set_size(self, value): raise ValueError('size is not mutable') - size = property(lambda self: self._size) def __init__(self, arg): self.arg = arg + size = property(lambda self: self._size) + # Common operations + def __str__(self): return str(self.arg) @@ -142,7 +157,7 @@ class Expr(object): def __eq__(self, a): if isinstance(a, Expr): - return self._hash == a._hash + return hash(self) == hash(a) else: return False @@ -201,27 +216,27 @@ class Expr(object): if e in dct: return dct[e] return e + return self.visit(lambda e: my_replace(e, dct)) def canonize(self): "Canonize the Expression" def must_canon(e): - # print 'test VISIT', e - return not e.is_simp + return not e.is_canon - def my_canon(e): - if e.is_simp: + def canonize_visitor(e): + if e.is_canon: return e if isinstance(e, ExprOp): if e.is_associative(): # ((a+b) + c) => (a + b + c) args = [] - for a in e.args: - if isinstance(a, ExprOp) and e.op == a.op: - args += a.args + for arg in e.args: + if isinstance(arg, ExprOp) and e.op == arg.op: + args += arg.args else: - args.append(a) + args.append(arg) args = canonize_expr_list(args) new_e = ExprOp(e.op, *args) else: @@ -230,8 +245,10 @@ class Expr(object): new_e = ExprCompose(canonize_expr_list_compose(e.args)) else: new_e = e + new_e.is_canon = True return new_e - return self.visit(my_canon, must_canon) + + return self.visit(canonize_visitor, must_canon) def msb(self): "Return the Most Significant Bit" @@ -288,6 +305,7 @@ class Expr(object): def set_mask(self, value): raise ValueError('mask is not mutable') + mask = property(lambda self: ExprInt_fromsize(self.size, -1)) @@ -308,16 +326,18 @@ class ExprInt(Expr): if not is_modint(arg): raise ValueError('arg must by numpy int! %s' % arg) - self.arg = arg + self._arg = arg self._size = self.arg.size - self._hash = self.myhash() + self._hash = self._exprhash() + + arg = property(lambda self: self._arg) def __get_int(self): "Return self integer representation" - return int(self.arg & size2mask(self.size)) + return int(self._arg & size2mask(self._size)) def __str__(self): - if self.arg < 0: + if self._arg < 0: return str("-0x%X" % (- self.__get_int())) else: return str("0x%X" % self.__get_int()) @@ -331,12 +351,12 @@ class ExprInt(Expr): def get_w(self): return set() + def _exprhash(self): + return hash((EXPRINT, self._arg, self._size)) + def __contains__(self, e): return self == e - def myhash(self): - return hash((EXPRINT, self.arg, self.size)) - def __repr__(self): return Expr.__repr__(self)[:-1] + " 0x%X>" % self.__get_int() @@ -345,7 +365,7 @@ class ExprInt(Expr): return self def copy(self): - return ExprInt(self.arg) + return ExprInt(self._arg) def depth(self): return 1 @@ -364,19 +384,19 @@ class ExprId(Expr): - variable v1 """ - def __init__(self, name, size=32, is_term=False): + def __init__(self, name, size=32): """Create an identifier @name: str, identifier's name @size: int, identifier's size - @is_term: boolean, is the identifier a terminal expression ? """ - self.name, self._size = name, size - self.is_term = is_term - self._hash = self.myhash() + self._name, self._size = name, size + self._hash = self._exprhash() + + name = property(lambda self: self._name) def __str__(self): - return str(self.name) + return str(self._name) def get_r(self, mem_read=False, cst_read=False): return set([self]) @@ -384,22 +404,22 @@ class ExprId(Expr): def get_w(self): return set([self]) + def _exprhash(self): + # TODO XXX: hash size ?? + return hash((EXPRID, self._name, self._size)) + def __contains__(self, e): return self == e - def myhash(self): - # TODO XXX: hash size ?? - return hash((EXPRID, self.name, self._size)) - def __repr__(self): - return Expr.__repr__(self)[:-1] + " %s>" % self.name + return Expr.__repr__(self)[:-1] + " %s>" % self._name @visit_chk def visit(self, cb, tv=None): return self def copy(self): - return ExprId(self.name, self._size) + return ExprId(self._name, self._size) def depth(self): return 1 @@ -421,85 +441,87 @@ class ExprAff(Expr): @dst: Expr, affectation destination @src: Expr, affectation source """ - if dst.size != src.size: raise ValueError( "sanitycheck: ExprAff args must have same size! %s" % - ([(str(x), x.size) for x in [dst, src]])) + ([(str(arg), arg.size) for arg in [dst, src]])) if isinstance(dst, ExprSlice): # Complete the source with missing slice parts - self.dst = dst.arg + self._dst = dst.arg rest = [(ExprSlice(dst.arg, r[0], r[1]), r[0], r[1]) for r in dst.slice_rest()] all_a = [(src, dst.start, dst.stop)] + rest all_a.sort(key=lambda x: x[1]) - self.src = ExprCompose(all_a) + self._src = ExprCompose(all_a) else: - self.dst, self.src = dst, src + self._dst, self._src = dst, src - self._hash = self.myhash() self._size = self.dst.size + self._hash = self._exprhash() + + dst = property(lambda self: self._dst) + src = property(lambda self: self._src) def __str__(self): - return "%s = %s" % (str(self.dst), str(self.src)) + return "%s = %s" % (str(self._dst), str(self._src)) def get_r(self, mem_read=False, cst_read=False): - r = self.src.get_r(mem_read, cst_read) - if isinstance(self.dst, ExprMem): - r.update(self.dst.arg.get_r(mem_read, cst_read)) - return r + elements = self._src.get_r(mem_read, cst_read) + if isinstance(self._dst, ExprMem): + elements.update(self._dst.arg.get_r(mem_read, cst_read)) + return elements def get_w(self): - if isinstance(self.dst, ExprMem): - return set([self.dst]) # [memreg] + if isinstance(self._dst, ExprMem): + return set([self._dst]) # [memreg] else: - return self.dst.get_w() + return self._dst.get_w() - def __contains__(self, e): - return self == e or self.src.__contains__(e) or self.dst.__contains__(e) + def _exprhash(self): + return hash((EXPRAFF, hash(self._dst), hash(self._src))) - def myhash(self): - return hash((EXPRAFF, self.dst._hash, self.src._hash)) + def __contains__(self, e): + return self == e or self._src.__contains__(e) or self._dst.__contains__(e) # XXX /!\ for hackish expraff to slice def get_modified_slice(self): """Return an Expr list of extra expressions needed during the object instanciation""" - dst = self.dst - if not isinstance(self.src, ExprCompose): + dst = self._dst + if not isinstance(self._src, ExprCompose): raise ValueError("Get mod slice not on expraff slice", str(self)) modified_s = [] - for x in self.src.args: - if (not isinstance(x[0], ExprSlice) or - x[0].arg != dst or - x[1] != x[0].start or - x[2] != x[0].stop): + for arg in self._src.args: + if (not isinstance(arg[0], ExprSlice) or + arg[0].arg != dst or + arg[1] != arg[0].start or + arg[2] != arg[0].stop): # If x is not the initial expression - modified_s.append(x) + modified_s.append(arg) return modified_s @visit_chk def visit(self, cb, tv=None): - dst, src = self.dst.visit(cb, tv), self.src.visit(cb, tv) - if dst == self.dst and src == self.src: + dst, src = self._dst.visit(cb, tv), self._src.visit(cb, tv) + if dst == self._dst and src == self._src: return self else: return ExprAff(dst, src) def copy(self): - return ExprAff(self.dst.copy(), self.src.copy()) + return ExprAff(self._dst.copy(), self._src.copy()) def depth(self): - return max(self.src.depth(), self.dst.depth()) + 1 + return max(self._src.depth(), self._dst.depth()) + 1 def graph_recursive(self, graph): graph.add_node(self) - for a in [self.src, self.dst]: - a.graph_recursive(graph) - graph.add_uniq_edge(self, a) + for arg in [self._src, self._dst]: + arg.graph_recursive(graph) + graph.add_uniq_edge(self, arg) class ExprCond(Expr): @@ -519,59 +541,64 @@ class ExprCond(Expr): @src2: Expr, value if condition is evaled zero """ - self.cond, self.src1, self.src2 = cond, src1, src2 assert(src1.size == src2.size) - self._hash = self.myhash() + + self._cond, self._src1, self._src2 = cond, src1, src2 self._size = self.src1.size + self._hash = self._exprhash() + + cond = property(lambda self: self._cond) + src1 = property(lambda self: self._src1) + src2 = property(lambda self: self._src2) def __str__(self): - return "(%s?(%s,%s))" % (str(self.cond), str(self.src1), str(self.src2)) + return "(%s?(%s,%s))" % (str(self._cond), str(self._src1), str(self._src2)) def get_r(self, mem_read=False, cst_read=False): - out_src1 = self.src1.get_r(mem_read, cst_read) - out_src2 = self.src2.get_r(mem_read, cst_read) - return self.cond.get_r(mem_read, - cst_read).union(out_src1).union(out_src2) + out_src1 = self._src1.get_r(mem_read, cst_read) + out_src2 = self._src2.get_r(mem_read, cst_read) + return self._cond.get_r(mem_read, + cst_read).union(out_src1).union(out_src2) def get_w(self): return set() + def _exprhash(self): + return hash((EXPRCOND, hash(self.cond), + hash(self._src1), hash(self._src2))) + def __contains__(self, e): return (self == e or - self.cond.__contains__(e) or - self.src1.__contains__(e) or - self.src2.__contains__(e)) - - def myhash(self): - return hash((EXPRCOND, self.cond._hash, - self.src1._hash, self.src2._hash)) + self._cond.__contains__(e) or + self._src1.__contains__(e) or + self._src2.__contains__(e)) @visit_chk def visit(self, cb, tv=None): - cond = self.cond.visit(cb, tv) - src1 = self.src1.visit(cb, tv) - src2 = self.src2.visit(cb, tv) - if cond == self.cond and \ - src1 == self.src1 and \ - src2 == self.src2: + cond = self._cond.visit(cb, tv) + src1 = self._src1.visit(cb, tv) + src2 = self._src2.visit(cb, tv) + if (cond == self._cond and + src1 == self._src1 and + src2 == self._src2): return self return ExprCond(cond, src1, src2) def copy(self): - return ExprCond(self.cond.copy(), - self.src1.copy(), - self.src2.copy()) + return ExprCond(self._cond.copy(), + self._src1.copy(), + self._src2.copy()) def depth(self): - return max(self.cond.depth(), - self.src1.depth(), - self.src2.depth()) + 1 + return max(self._cond.depth(), + self._src1.depth(), + self._src2.depth()) + 1 def graph_recursive(self, graph): graph.add_node(self) - for a in [self.cond, self.src1, self.src2]: - a.graph_recursive(graph) - graph.add_uniq_edge(self, a) + for arg in [self._cond, self._src1, self._src2]: + arg.graph_recursive(graph) + graph.add_uniq_edge(self, arg) class ExprMem(Expr): @@ -592,48 +619,50 @@ class ExprMem(Expr): raise ValueError( 'ExprMem: arg must be an Expr (not %s)' % type(arg)) - self.arg, self._size = arg, size - self._hash = self.myhash() + self._arg, self._size = arg, size + self._hash = self._exprhash() + + arg = property(lambda self: self._arg) def __str__(self): - return "@%d[%s]" % (self._size, str(self.arg)) + return "@%d[%s]" % (self._size, str(self._arg)) def get_r(self, mem_read=False, cst_read=False): if mem_read: - return set(self.arg.get_r(mem_read, cst_read).union(set([self]))) + return set(self._arg.get_r(mem_read, cst_read).union(set([self]))) else: return set([self]) def get_w(self): return set([self]) # [memreg] - def __contains__(self, e): - return self == e or self.arg.__contains__(e) + def _exprhash(self): + return hash((EXPRMEM, hash(self._arg), self._size)) - def myhash(self): - return hash((EXPRMEM, self.arg._hash, self._size)) + def __contains__(self, e): + return self == e or self._arg.__contains__(e) @visit_chk def visit(self, cb, tv=None): - arg = self.arg.visit(cb, tv) - if arg == self.arg: + arg = self._arg.visit(cb, tv) + if arg == self._arg: return self return ExprMem(arg, self._size) def copy(self): - arg = self.arg.copy() + arg = self._arg.copy() return ExprMem(arg, size=self._size) def is_op_segm(self): - return isinstance(self.arg, ExprOp) and self.arg.op == 'segm' + return isinstance(self._arg, ExprOp) and self._arg.op == 'segm' def depth(self): - return self.arg.depth() + 1 + return self._arg.depth() + 1 def graph_recursive(self, graph): graph.add_node(self) - self.arg.graph_recursive(graph) - graph.add_uniq_edge(self, self.arg) + self._arg.graph_recursive(graph) + graph.add_uniq_edge(self, self._arg) class ExprOp(Expr): @@ -652,42 +681,48 @@ class ExprOp(Expr): @*args: Expr, operand list """ - sizes = set([x.size for x in args]) + sizes = set([arg.size for arg in args]) - if None not in sizes and len(sizes) != 1: + if len(sizes) != 1: # Special cases : operande sizes can differ if op not in ["segm"]: raise ValueError( "sanitycheck: ExprOp args must have same size! %s" % - ([(str(x), x.size) for x in args])) + ([(str(arg), arg.size) for arg in args])) if not isinstance(op, str): raise ValueError("ExprOp: 'op' argument must be a string") - self.op, self.args = op, tuple(args) - self._hash = self.myhash() + self._op, self._args = op, tuple(args) # Set size for special cases - if self.op in [ - '==', 'parity', 'fcom_c0', 'fcom_c1', 'fcom_c2', 'fcom_c3', - "access_segment_ok", "load_segment_limit_ok", "bcdadd_cf", + if self._op in [ + '==', 'parity', 'fcom_c0', 'fcom_c1', 'fcom_c2', 'fcom_c3', + "access_segment_ok", "load_segment_limit_ok", "bcdadd_cf", "ucomiss_zf", "ucomiss_pf", "ucomiss_cf"]: sz = 1 - elif self.op in ['mem_16_to_double', 'mem_32_to_double', - 'mem_64_to_double', 'mem_80_to_double', - 'int_16_to_double', 'int_32_to_double', - 'int_64_to_double', 'int_80_to_double']: + elif self._op in [TOK_INF, TOK_INF_SIGNED, + TOK_INF_UNSIGNED, TOK_INF_EQUAL, + TOK_INF_EQUAL_SIGNED, TOK_INF_EQUAL_UNSIGNED, + TOK_EQUAL, TOK_POS, + TOK_POS_STRICT, + ]: + sz = 1 + elif self._op in ['mem_16_to_double', 'mem_32_to_double', + 'mem_64_to_double', 'mem_80_to_double', + 'int_16_to_double', 'int_32_to_double', + 'int_64_to_double', 'int_80_to_double']: sz = 64 - elif self.op in ['double_to_mem_16', 'double_to_int_16', 'double_trunc_to_int_16']: + elif self._op in ['double_to_mem_16', 'double_to_int_16', 'double_trunc_to_int_16']: sz = 16 - elif self.op in ['double_to_mem_32', 'double_to_int_32', 'double_trunc_to_int_32']: + elif self._op in ['double_to_mem_32', 'double_to_int_32', 'double_trunc_to_int_32']: sz = 32 - elif self.op in ['double_to_mem_64', 'double_to_int_64', 'double_trunc_to_int_64']: + elif self._op in ['double_to_mem_64', 'double_to_int_64', 'double_trunc_to_int_64']: sz = 64 - elif self.op in ['double_to_mem_80', 'double_to_int_80', 'double_trunc_to_int_80']: + elif self._op in ['double_to_mem_80', 'double_to_int_80', 'double_trunc_to_int_80']: sz = 80 - elif self.op in ['segm']: - sz = self.args[1].size + elif self._op in ['segm']: + sz = self._args[1].size else: if None in sizes: sz = None @@ -696,130 +731,139 @@ class ExprOp(Expr): sz = list(sizes)[0] self._size = sz + self._hash = self._exprhash() + + op = property(lambda self: self._op) + args = property(lambda self: self._args) def __str__(self): if self.is_associative(): - return '(' + self.op.join([str(x) for x in self.args]) + ')' - if len(self.args) == 2: - return '(' + str(self.args[0]) + \ - ' ' + self.op + ' ' + str(self.args[1]) + ')' - elif len(self.args) > 2: - return self.op + '(' + ', '.join([str(x) for x in self.args]) + ')' + return '(' + self._op.join([str(arg) for arg in self._args]) + ')' + if len(self._args) == 2: + return ('(' + str(self._args[0]) + + ' ' + self.op + ' ' + str(self._args[1]) + ')') + elif len(self._args) > 2: + return self._op + '(' + ', '.join([str(arg) for arg in self._args]) + ')' else: return reduce(lambda x, y: x + ' ' + str(y), - self.args, - '(' + str(self.op)) + ')' + self._args, + '(' + str(self._op)) + ')' def get_r(self, mem_read=False, cst_read=False): - return reduce(lambda x, y: - x.union(y.get_r(mem_read, cst_read)), self.args, set()) + return reduce(lambda elements, arg: + elements.union(arg.get_r(mem_read, cst_read)), self._args, set()) def get_w(self): raise ValueError('op cannot be written!', self) + def _exprhash(self): + h_hargs = [hash(arg) for arg in self._args] + return hash((EXPROP, self._op, tuple(h_hargs))) + def __contains__(self, e): if self == e: return True - for a in self.args: - if a.__contains__(e): + for arg in self._args: + if arg.__contains__(e): return True return False - def myhash(self): - h_hargs = [x._hash for x in self.args] - return hash((EXPROP, self.op, tuple(h_hargs))) - def is_associative(self): "Return True iff current operation is associative" - return (self.op in ['+', '*', '^', '&', '|']) + return (self._op in ['+', '*', '^', '&', '|']) def is_commutative(self): "Return True iff current operation is commutative" - return (self.op in ['+', '*', '^', '&', '|']) + return (self._op in ['+', '*', '^', '&', '|']) @visit_chk def visit(self, cb, tv=None): - args = [a.visit(cb, tv) for a in self.args] - modified = any([x[0] != x[1] for x in zip(self.args, args)]) + args = [arg.visit(cb, tv) for arg in self._args] + modified = any([arg[0] != arg[1] for arg in zip(self._args, args)]) if modified: - return ExprOp(self.op, *args) + return ExprOp(self._op, *args) return self def copy(self): - args = [a.copy() for a in self.args] - return ExprOp(self.op, *args) + args = [arg.copy() for arg in self._args] + return ExprOp(self._op, *args) def depth(self): - depth = [a.depth() for a in self.args] + depth = [arg.depth() for arg in self._args] return max(depth) + 1 def graph_recursive(self, graph): graph.add_node(self) - for a in self.args: - a.graph_recursive(graph) - graph.add_uniq_edge(self, a) + for arg in self._args: + arg.graph_recursive(graph) + graph.add_uniq_edge(self, arg) class ExprSlice(Expr): def __init__(self, arg, start, stop): assert(start < stop) - self.arg, self.start, self.stop = arg, start, stop - self._hash = self.myhash() - self._size = self.stop - self.start + + self._arg, self._start, self._stop = arg, start, stop + self._size = self._stop - self._start + self._hash = self._exprhash() + + arg = property(lambda self: self._arg) + start = property(lambda self: self._start) + stop = property(lambda self: self._stop) def __str__(self): - return "%s[%d:%d]" % (str(self.arg), self.start, self.stop) + return "%s[%d:%d]" % (str(self._arg), self._start, self._stop) def get_r(self, mem_read=False, cst_read=False): - return self.arg.get_r(mem_read, cst_read) + return self._arg.get_r(mem_read, cst_read) def get_w(self): - return self.arg.get_w() + return self._arg.get_w() + + def _exprhash(self): + return hash((EXPRSLICE, hash(self._arg), self._start, self._stop)) def __contains__(self, e): if self == e: return True - return self.arg.__contains__(e) - - def myhash(self): - return hash((EXPRSLICE, self.arg._hash, self.start, self.stop)) + return self._arg.__contains__(e) @visit_chk def visit(self, cb, tv=None): - arg = self.arg.visit(cb, tv) - if arg == self.arg: + arg = self._arg.visit(cb, tv) + if arg == self._arg: return self - return ExprSlice(arg, self.start, self.stop) + return ExprSlice(arg, self._start, self._stop) def copy(self): - return ExprSlice(self.arg.copy(), self.start, self.stop) + return ExprSlice(self._arg.copy(), self._start, self._stop) def depth(self): - return self.arg.depth() + 1 + return self._arg.depth() + 1 def slice_rest(self): "Return the completion of the current slice" - size = self.arg.size - if self.start >= size or self.stop > size: + size = self._arg.size + if self._start >= size or self._stop > size: raise ValueError('bad slice rest %s %s %s' % - (size, self.start, self.stop)) + (size, self._start, self._stop)) - if self.start == self.stop: + if self._start == self._stop: return [(0, size)] rest = [] - if self.start != 0: - rest.append((0, self.start)) - if self.stop < size: - rest.append((self.stop, size)) + if self._start != 0: + rest.append((0, self._start)) + if self._stop < size: + rest.append((self._stop, size)) return rest def graph_recursive(self, graph): graph.add_node(self) - self.arg.graph_recursive(graph) - graph.add_uniq_edge(self, self.arg) + self._arg.graph_recursive(graph) + graph.add_uniq_edge(self, self._arg) class ExprCompose(Expr): @@ -836,73 +880,83 @@ class ExprCompose(Expr): def __init__(self, args): """Create an ExprCompose + The ExprCompose is contiguous and starts at 0 @args: tuple(Expr, int, int) """ + last_stop = 0 + args = sorted(args, key=itemgetter(1)) for e, start, stop in args: if e.size != stop - start: raise ValueError( "sanitycheck: ExprCompose args must have correct size!" + " %r %r %r" % (e, e.size, stop - start)) + if last_stop != start: + raise ValueError( + "sanitycheck: ExprCompose args must be contiguous!" + + " %r" % (args)) + last_stop = stop # Transform args to lists o = [] for e, a, b in args: assert(a >= 0 and b >= 0) o.append(tuple([e, a, b])) - self.args = tuple(o) + self._args = tuple(o) - self._hash = self.myhash() - self._size = max([x[2] - for x in self.args]) - min([x[1] for x in self.args]) + self._size = self._args[-1][2] + self._hash = self._exprhash() + + args = property(lambda self: self._args) def __str__(self): return '{' + ', '.join(['%s,%d,%d' % - (str(x[0]), x[1], x[2]) for x in self.args]) + '}' + (str(arg[0]), arg[1], arg[2]) for arg in self._args]) + '}' def get_r(self, mem_read=False, cst_read=False): - return reduce(lambda x, y: - x.union(y[0].get_r(mem_read, cst_read)), self.args, set()) + return reduce(lambda elements, arg: + elements.union(arg[0].get_r(mem_read, cst_read)), self._args, set()) def get_w(self): - return reduce(lambda x, y: - x.union(y[0].get_w()), self.args, set()) + return reduce(lambda elements, arg: + elements.union(arg[0].get_w()), self._args, set()) + + def _exprhash(self): + h_args = [EXPRCOMPOSE] + [(hash(arg[0]), arg[1], arg[2]) + for arg in self._args] + return hash(tuple(h_args)) def __contains__(self, e): if self == e: return True - for a in self.args: - if a == e: + for arg in self._args: + if arg == e: return True - if a[0].__contains__(e): + if arg[0].__contains__(e): return True return False - def myhash(self): - h_args = [EXPRCOMPOSE] + [(x[0]._hash, x[1], x[2]) for x in self.args] - return hash(tuple(h_args)) - @visit_chk def visit(self, cb, tv=None): - args = [(a[0].visit(cb, tv), a[1], a[2]) for a in self.args] - modified = any([x[0] != x[1] for x in zip(self.args, args)]) + args = [(arg[0].visit(cb, tv), arg[1], arg[2]) for arg in self._args] + modified = any([arg[0] != arg[1] for arg in zip(self._args, args)]) if modified: return ExprCompose(args) return self def copy(self): - args = [(a[0].copy(), a[1], a[2]) for a in self.args] + args = [(arg[0].copy(), arg[1], arg[2]) for arg in self._args] return ExprCompose(args) def depth(self): - depth = [a[0].depth() for a in self.args] + depth = [arg[0].depth() for arg in self._args] return max(depth) + 1 def graph_recursive(self, graph): graph.add_node(self) - for a in self.args: - a[0].graph_recursive(graph) - graph.add_uniq_edge(self, a[0]) + for arg in self.args: + arg[0].graph_recursive(graph) + graph.add_uniq_edge(self, arg[0]) # Expression order for comparaison @@ -966,7 +1020,7 @@ def compare_exprs(e1, e2): x = cmp(e1.name, e2.name) if x: return x - return cmp(e1._size, e2._size) + return cmp(e1.size, e2.size) elif c1 == ExprAff: raise NotImplementedError( "Comparaison from an ExprAff not yet implemented") @@ -983,7 +1037,7 @@ def compare_exprs(e1, e2): x = compare_exprs(e1.arg, e2.arg) if x: return x - return cmp(e1._size, e2._size) + return cmp(e1.size, e2.size) elif c1 == ExprOp: if e1.op != e2.op: return cmp(e1.op, e2.op) @@ -1138,7 +1192,7 @@ def MatchExpr(e, m, tks, result=None): elif isinstance(e, ExprMem): if not isinstance(m, ExprMem): return False - if e._size != m._size: + if e.size != m.size: return False return MatchExpr(e.arg, m.arg, tks, result) diff --git a/miasm2/expression/expression_helper.py b/miasm2/expression/expression_helper.py index 825cad60..3555530a 100644 --- a/miasm2/expression/expression_helper.py +++ b/miasm2/expression/expression_helper.py @@ -64,9 +64,11 @@ def merge_sliceto_slice(args): final_sources = [] sorted_s = [] for x in sources_int.values(): + x = list(x) # mask int v = x[0].arg & ((1 << (x[2] - x[1])) - 1) - x[0].arg = v + x[0] = m2_expr.ExprInt_from(x[0], v) + x = tuple(x) sorted_s.append((x[1], x)) sorted_s.sort() while sorted_s: @@ -81,7 +83,7 @@ def merge_sliceto_slice(args): a = m2_expr.mod_size2uint[size]( (int(out[0].arg) << (out[1] - s_start)) + int(sorted_s[-1][1][0].arg)) - out[0].arg = a + out[0] = m2_expr.ExprInt(a) sorted_s.pop() out[1] = s_start out[0] = m2_expr.ExprInt_fromsize(size, out[0].arg) diff --git a/miasm2/expression/simplifications_cond.py b/miasm2/expression/simplifications_cond.py index e6c890ab..c6e455b6 100644 --- a/miasm2/expression/simplifications_cond.py +++ b/miasm2/expression/simplifications_cond.py @@ -16,16 +16,6 @@ import miasm2.expression.expression as m2_expr -# Define tokens -TOK_INF = "<" -TOK_INF_SIGNED = TOK_INF + "s" -TOK_INF_UNSIGNED = TOK_INF + "u" -TOK_INF_EQUAL = "<=" -TOK_INF_EQUAL_SIGNED = TOK_INF_EQUAL + "s" -TOK_INF_EQUAL_UNSIGNED = TOK_INF_EQUAL + "u" -TOK_EQUAL = "==" -TOK_POS = "pos" -TOK_POS_STRICT = "Spos" # Jokers for expression matching @@ -40,22 +30,21 @@ jok_small = m2_expr.ExprId("jok_small", 1) def __ExprOp_cond(op, arg1, arg2): "Return an ExprOp standing for arg1 op arg2 with size to 1" ec = m2_expr.ExprOp(op, arg1, arg2) - ec._size = 1 return ec def ExprOp_inf_signed(arg1, arg2): "Return an ExprOp standing for arg1 <s arg2" - return __ExprOp_cond(TOK_INF_SIGNED, arg1, arg2) + return __ExprOp_cond(m2_expr.TOK_INF_SIGNED, arg1, arg2) def ExprOp_inf_unsigned(arg1, arg2): "Return an ExprOp standing for arg1 <s arg2" - return __ExprOp_cond(TOK_INF_UNSIGNED, arg1, arg2) + return __ExprOp_cond(m2_expr.TOK_INF_UNSIGNED, arg1, arg2) def ExprOp_equal(arg1, arg2): "Return an ExprOp standing for arg1 == arg2" - return __ExprOp_cond(TOK_EQUAL, arg1, arg2) + return __ExprOp_cond(m2_expr.TOK_EQUAL, arg1, arg2) # Catching conditions forms @@ -153,9 +142,9 @@ def expr_simp_inverse(expr_simp, e): if r is False: return e - cur_sig = TOK_INF_SIGNED + cur_sig = m2_expr.TOK_INF_SIGNED else: - cur_sig = TOK_INF_UNSIGNED + cur_sig = m2_expr.TOK_INF_UNSIGNED arg = __check_msb(r[jok_small]) @@ -172,7 +161,7 @@ def expr_simp_inverse(expr_simp, e): if r[jok1] not in op_args or r[jok2] not in op_args: return e - if cur_sig == TOK_INF_UNSIGNED: + if cur_sig == m2_expr.TOK_INF_UNSIGNED: return ExprOp_inf_signed(r[jok1], r[jok2]) else: return ExprOp_inf_unsigned(r[jok1], r[jok2]) @@ -193,7 +182,7 @@ def expr_simp_equal(expr_simp, e): def exec_inf_unsigned(expr_simp, e): "Compute x <u y" - if e.op != TOK_INF_UNSIGNED: + if e.op != m2_expr.TOK_INF_UNSIGNED: return e arg1, arg2 = e.args @@ -221,7 +210,7 @@ def __comp_signed(arg1, arg2): def exec_inf_signed(expr_simp, e): "Compute x <s y" - if e.op != TOK_INF_SIGNED: + if e.op != m2_expr.TOK_INF_SIGNED: return e arg1, arg2 = e.args @@ -234,7 +223,7 @@ def exec_inf_signed(expr_simp, e): def exec_equal(expr_simp, e): "Compute x == y" - if e.op != TOK_EQUAL: + if e.op != m2_expr.TOK_EQUAL: return e arg1, arg2 = e.args diff --git a/miasm2/ir/ir.py b/miasm2/ir/ir.py index 73a38242..3a8cbb14 100644 --- a/miasm2/ir/ir.py +++ b/miasm2/ir/ir.py @@ -55,12 +55,12 @@ class irbloc(object): """Find and replace the IRDst affectation's source by @value""" dst = None for ir in self.irs: - for i in ir: - if isinstance(i.dst, m2_expr.ExprId) and i.dst.name == "IRDst": + for i, expr in enumerate(ir): + if isinstance(expr.dst, m2_expr.ExprId) and expr.dst.name == "IRDst": if dst is not None: raise ValueError('Multiple destinations!') dst = value - i.src = value + ir[i] = m2_expr.ExprAff(expr.dst, value) self._dst = value dst = property(get_dst, set_dst) @@ -296,8 +296,7 @@ class ir(object): for b in self.blocs.values(): for ir in b.irs: for i, r in enumerate(ir): - ir[i].src = expr_simp(r.src) - ir[i].dst = expr_simp(r.dst) + ir[i] = m2_expr.ExprAff(expr_simp(r.dst), expr_simp(r.src)) def replace_expr_in_ir(self, bloc, rep): for irs in bloc.irs: diff --git a/miasm2/ir/translators/C.py b/miasm2/ir/translators/C.py index 007f8378..feb7539b 100644 --- a/miasm2/ir/translators/C.py +++ b/miasm2/ir/translators/C.py @@ -50,7 +50,7 @@ class TranslatorC(Translator): @classmethod def from_ExprMem(cls, expr): - return "MEM_LOOKUP_%.2d(vm_mngr, %s)" % (expr._size, + return "MEM_LOOKUP_%.2d(vm_mngr, %s)" % (expr.size, cls.from_expr(expr.arg)) @classmethod |