From 8e8a60b5d55db6209d05577d038ed3b4dc961b60 Mon Sep 17 00:00:00 2001 From: serpilliere Date: Sat, 21 Feb 2015 20:51:31 +0100 Subject: Expression: Use singleton pattern for Expression Start the transformation of Expression into immutable. Multiple problems were present in Expression class. One of them was comparison done through hash, which could generate collisions. The attributes is_simp/is_canon where linked to the instance, and could not survive to expression simplification. --- miasm2/expression/expression.py | 268 ++++++++++++++++++++++++---------------- 1 file changed, 164 insertions(+), 104 deletions(-) (limited to 'miasm2/expression/expression.py') diff --git a/miasm2/expression/expression.py b/miasm2/expression/expression.py index d04530c3..ad76f01c 100644 --- a/miasm2/expression/expression.py +++ b/miasm2/expression/expression.py @@ -115,27 +115,65 @@ class Expr(object): "Parent class for Miasm Expressions" __slots__ = ["is_term", "is_simp", "is_canon", - "is_eval", "_hash", "_repr", "_size", + "is_eval", "__hash", "__repr", "__size", "is_var_ident"] + all_exprs = set() + args2expr = {} + simp_exprs = set() + canon_exprs = set() + use_singleton = True + + is_term = False # Terminal expression def set_size(self, value): raise ValueError('size is not mutable') def __init__(self): - self.is_term = False # Terminal expression - self.is_simp = False # Expression already simplified - self.is_canon = False # Expression already canonised - self.is_eval = False # Expression already evalued - self.is_var_ident = False # Expression not identifier - - self._hash = None - self._repr = None + self.__hash = None + self.__repr = None + self.__size = None size = property(lambda self: self._size) + @staticmethod + def get_object(cls, args): + if not cls.use_singleton: + return object.__new__(cls, args) + + expr = Expr.args2expr.get((cls, args)) + if expr is None: + expr = object.__new__(cls, args) + Expr.args2expr[(cls, args)] = expr + return expr + + def __new__(cls, *args, **kwargs): + expr = object.__new__(cls, *args, **kwargs) + return expr + + def get_is_simp(self): + return self in Expr.simp_exprs + + def set_is_simp(self, value): + assert(value is True) + Expr.simp_exprs.add(self) + + is_simp = property(get_is_simp, set_is_simp) + + def get_is_canon(self): + return self in Expr.canon_exprs + + def set_is_canon(self, value): + assert(value is True) + Expr.canon_exprs.add(self) + + is_canon = property(get_is_canon, set_is_canon) + # Common operations + def __str__(self): + raise NotImplementedError("Abstract Method") + def __getitem__(self, i): if not isinstance(i, slice): raise TypeError("Expression: Bad slice: %s" % i) @@ -158,9 +196,9 @@ class Expr(object): return self._repr def __hash__(self): - if self._hash is None: - self._hash = self._exprhash() - return self._hash + if self.__hash is None: + self.__hash = self._exprhash() + return self.__hash def pre_eq(self, other): """Return True if ids are equal; @@ -341,7 +379,7 @@ class ExprInt(Expr): - Constant 0x12345678 on 32bits """ - __slots__ = ["_arg"] + __slots__ = ["__arg"] def __init__(self, num, size=None): """Create an ExprInt from a modint or num/size @@ -361,14 +399,14 @@ class ExprInt(Expr): else: raise ValueError('arg must by modint or (int,size)! %s' % num) - arg = property(lambda self: self._arg) + self.__arg = arg + self.__size = self.arg.size - def __eq__(self, other): - res = self.pre_eq(other) - if res is not None: - return res - return (self._arg == other._arg and - self._size == other._size) + size = property(lambda self: self.__size) + arg = property(lambda self: self.__arg) + + def __new__(cls, arg): + return Expr.get_object(cls, (arg, arg.size)) def __get_int(self): "Return self integer representation" @@ -393,7 +431,7 @@ class ExprInt(Expr): return hash((EXPRINT, self._arg, self._size)) def _exprrepr(self): - return "%s(%r)" % (self.__class__.__name__, self._arg) + return "%s(0x%X)" % (self.__class__.__name__, self.__get_int()) def __contains__(self, e): return self == e @@ -437,16 +475,14 @@ class ExprId(Expr): """ super(ExprId, self).__init__() - self._name, self._size = name, size + self.__name, self.__size = name, size + self.is_term = is_term - name = property(lambda self: self._name) + size = property(lambda self: self.__size) + name = property(lambda self: self.__name) - def __eq__(self, other): - res = self.pre_eq(other) - if res is not None: - return res - return (self._name == other._name and - self._size == other._size) + def __new__(cls, name, size=32): + return Expr.get_object(cls, (name, size)) def __str__(self): return str(self._name) @@ -459,10 +495,10 @@ class ExprId(Expr): def _exprhash(self): # TODO XXX: hash size ?? - return hash((EXPRID, self._name, self._size)) + return hash((EXPRID, self.__name, self.__size)) def _exprrepr(self): - return "%s(%r, %d)" % (self.__class__.__name__, self._name, self._size) + return "%s(%r, %d)" % (self.__class__.__name__, self.__name, self.__size) def __contains__(self, e): return self == e @@ -472,7 +508,7 @@ class ExprId(Expr): return self def copy(self): - return ExprId(self._name, self._size) + return ExprId(self.__name, self.__size) def depth(self): return 1 @@ -506,20 +542,24 @@ class ExprAff(Expr): if isinstance(dst, ExprSlice): # Complete the source with missing slice parts - self._dst = dst.arg + self.__dst = dst.arg rest = [(ExprSlice(dst.arg, r[0], r[1]), r[0], r[1]) for r in dst.slice_rest()] all_a = [(src, dst.start, dst.stop)] + rest all_a.sort(key=lambda x: x[1]) - self._src = ExprCompose(all_a) + self.__src = ExprCompose(all_a) else: - self._dst, self._src = dst, src + self.__dst, self.__src = dst, src + + self.__size = self.dst.size - self._size = self.dst.size + size = property(lambda self: self.__size) + dst = property(lambda self: self.__dst) + src = property(lambda self: self.__src) - dst = property(lambda self: self._dst) - src = property(lambda self: self._src) + def __new__(cls, dst, src): + return Expr.get_object(cls, (dst, src)) def __str__(self): return "%s = %s" % (str(self._dst), str(self._src)) @@ -542,8 +582,10 @@ class ExprAff(Expr): def _exprrepr(self): return "%s(%r, %r)" % (self.__class__.__name__, self._dst, self._src) - def __contains__(self, e): - return self == e or self._src.__contains__(e) or self._dst.__contains__(e) + def __contains__(self, expr): + return (self == expr or + self._src.__contains__(expr) or + self._dst.__contains__(expr)) # XXX /!\ for hackish expraff to slice def get_modified_slice(self): @@ -605,23 +647,26 @@ class ExprCond(Expr): super(ExprCond, self).__init__() + self.__cond, self.__src1, self.__src2 = cond, src1, src2 assert(src1.size == src2.size) + self.__size = self.src1.size - self._cond, self._src1, self._src2 = cond, src1, src2 - self._size = self.src1.size + size = property(lambda self: self.__size) + cond = property(lambda self: self.__cond) + src1 = property(lambda self: self.__src1) + src2 = property(lambda self: self.__src2) - cond = property(lambda self: self._cond) - src1 = property(lambda self: self._src1) - src2 = property(lambda self: self._src2) + def __new__(cls, cond, src1, src2): + return Expr.get_object(cls, (cond, src1, src2)) def __str__(self): return "(%s?(%s,%s))" % (str(self._cond), str(self._src1), str(self._src2)) def get_r(self, mem_read=False, cst_read=False): - out_src1 = self._src1.get_r(mem_read, cst_read) - out_src2 = self._src2.get_r(mem_read, cst_read) - return self._cond.get_r(mem_read, - cst_read).union(out_src1).union(out_src2) + out_src1 = self.src1.get_r(mem_read, cst_read) + out_src2 = self.src2.get_r(mem_read, cst_read) + return self.cond.get_r(mem_read, + cst_read).union(out_src1).union(out_src2) def get_w(self): return set() @@ -636,9 +681,9 @@ class ExprCond(Expr): def __contains__(self, e): return (self == e or - self._cond.__contains__(e) or - self._src1.__contains__(e) or - self._src2.__contains__(e)) + self.cond.__contains__(e) or + self.src1.__contains__(e) or + self.src2.__contains__(e)) @visit_chk def visit(self, cb, tv=None): @@ -691,12 +736,16 @@ class ExprMem(Expr): raise ValueError( 'ExprMem: arg must be an Expr (not %s)' % type(arg)) - self._arg, self._size = arg, size + self.__arg, self.__size = arg, size + + size = property(lambda self: self.__size) + arg = property(lambda self: self.__arg) - arg = property(lambda self: self._arg) + def __new__(cls, arg, size=32): + return Expr.get_object(cls, (arg, size)) def __str__(self): - return "@%d[%s]" % (self._size, str(self._arg)) + return "@%d[%s]" % (self.size, str(self.arg)) def get_r(self, mem_read=False, cst_read=False): if mem_read: @@ -714,19 +763,19 @@ class ExprMem(Expr): return "%s(%r, %r)" % (self.__class__.__name__, self._arg, self._size) - def __contains__(self, e): - return self == e or self._arg.__contains__(e) + def __contains__(self, expr): + return self == expr or self._arg.__contains__(expr) @visit_chk def visit(self, cb, tv=None): arg = self._arg.visit(cb, tv) if arg == self._arg: return self - return ExprMem(arg, self._size) + return ExprMem(arg, self.size) def copy(self): - arg = self._arg.copy() - return ExprMem(arg, size=self._size) + arg = self.arg.copy() + return ExprMem(arg, size=self.size) def is_op_segm(self): return isinstance(self._arg, ExprOp) and self._arg.op == 'segm' @@ -772,43 +821,43 @@ class ExprOp(Expr): if not isinstance(op, str): raise ValueError("ExprOp: 'op' argument must be a string") - self._op, self._args = op, tuple(args) + self.__op, self._args = op, tuple(args) # Set size for special cases - if self._op in [ + if self.__op in [ '==', 'parity', 'fcom_c0', 'fcom_c1', 'fcom_c2', 'fcom_c3', 'fxam_c0', 'fxam_c1', 'fxam_c2', 'fxam_c3', "access_segment_ok", "load_segment_limit_ok", "bcdadd_cf", "ucomiss_zf", "ucomiss_pf", "ucomiss_cf"]: sz = 1 - elif self._op in [TOK_INF, TOK_INF_SIGNED, - TOK_INF_UNSIGNED, TOK_INF_EQUAL, - TOK_INF_EQUAL_SIGNED, TOK_INF_EQUAL_UNSIGNED, - TOK_EQUAL, TOK_POS, - TOK_POS_STRICT, - ]: + elif self.__op in [TOK_INF, TOK_INF_SIGNED, + TOK_INF_UNSIGNED, TOK_INF_EQUAL, + TOK_INF_EQUAL_SIGNED, TOK_INF_EQUAL_UNSIGNED, + TOK_EQUAL, TOK_POS, + TOK_POS_STRICT, + ]: sz = 1 - elif self._op in ['mem_16_to_double', 'mem_32_to_double', - 'mem_64_to_double', 'mem_80_to_double', - 'int_16_to_double', 'int_32_to_double', - 'int_64_to_double', 'int_80_to_double']: + elif self.__op in ['mem_16_to_double', 'mem_32_to_double', + 'mem_64_to_double', 'mem_80_to_double', + 'int_16_to_double', 'int_32_to_double', + 'int_64_to_double', 'int_80_to_double']: sz = 64 - elif self._op in ['double_to_mem_16', 'double_to_int_16', - 'float_trunc_to_int_16', 'double_trunc_to_int_16']: + elif self.__op in ['double_to_mem_16', 'double_to_int_16', + 'float_trunc_to_int_16', 'double_trunc_to_int_16']: sz = 16 - elif self._op in ['double_to_mem_32', 'double_to_int_32', - 'float_trunc_to_int_32', 'double_trunc_to_int_32', - 'double_to_float']: + elif self.__op in ['double_to_mem_32', 'double_to_int_32', + 'float_trunc_to_int_32', 'double_trunc_to_int_32', + 'double_to_float']: sz = 32 - elif self._op in ['double_to_mem_64', 'double_to_int_64', - 'float_trunc_to_int_64', 'double_trunc_to_int_64', - 'float_to_double']: + elif self.__op in ['double_to_mem_64', 'double_to_int_64', + 'float_trunc_to_int_64', 'double_trunc_to_int_64', + 'float_to_double']: sz = 64 - elif self._op in ['double_to_mem_80', 'double_to_int_80', - 'float_trunc_to_int_80', - 'double_trunc_to_int_80']: + elif self.__op in ['double_to_mem_80', 'double_to_int_80', + 'float_trunc_to_int_80', + 'double_trunc_to_int_80']: sz = 80 - elif self._op in ['segm']: + elif self.__op in ['segm']: sz = self._args[1].size else: if None in sizes: @@ -817,10 +866,14 @@ class ExprOp(Expr): # All arguments have the same size sz = list(sizes)[0] - self._size = sz + self.__size = sz + + size = property(lambda self: self.__size) + op = property(lambda self: self.__op) + args = property(lambda self: self.__args) - op = property(lambda self: self._op) - args = property(lambda self: self._args) + def __new__(cls, op, *args): + return Expr.get_object(cls, (op, args)) def __str__(self): if self.is_associative(): @@ -840,7 +893,7 @@ class ExprOp(Expr): def get_r(self, mem_read=False, cst_read=False): return reduce(lambda elements, arg: - elements.union(arg.get_r(mem_read, cst_read)), self._args, set()) + elements.union(arg.get_r(mem_read, cst_read)), self.__args, set()) def get_w(self): raise ValueError('op cannot be written!', self) @@ -903,13 +956,16 @@ class ExprSlice(Expr): super(ExprSlice, self).__init__() assert(start < stop) + self.__arg, self.__start, self.__stop = arg, start, stop + self.__size = self.__stop - self.__start - self._arg, self._start, self._stop = arg, start, stop - self._size = self._stop - self._start + size = property(lambda self: self.__size) + arg = property(lambda self: self.__arg) + start = property(lambda self: self.__start) + stop = property(lambda self: self.__stop) - arg = property(lambda self: self._arg) - start = property(lambda self: self._start) - stop = property(lambda self: self._stop) + def __new__(cls, arg, start, stop): + return Expr.get_object(cls, (arg, start, stop)) def __str__(self): return "%s[%d:%d]" % (str(self._arg), self._start, self._stop) @@ -927,10 +983,10 @@ class ExprSlice(Expr): return "%s(%r, %d, %d)" % (self.__class__.__name__, self._arg, self._start, self._stop) - def __contains__(self, e): - if self == e: + def __contains__(self, expr): + if self == expr: return True - return self._arg.__contains__(e) + return self.__arg.__contains__(expr) @visit_chk def visit(self, cb, tv=None): @@ -1009,31 +1065,35 @@ class ExprCompose(Expr): for e, a, b in args: assert(a >= 0 and b >= 0) o.append(tuple([e, a, b])) - self._args = tuple(o) + self.__args = tuple(o) + + self.__size = self.__args[-1][2] - self._size = self._args[-1][2] + size = property(lambda self: self.__size) + args = property(lambda self: self.__args) - args = property(lambda self: self._args) + def __new__(cls, args): + return Expr.get_object(cls, tuple(args)) def __str__(self): return '{' + ', '.join(['%s,%d,%d' % - (str(arg[0]), arg[1], arg[2]) for arg in self._args]) + '}' + (str(arg[0]), arg[1], arg[2]) for arg in self.__args]) + '}' def get_r(self, mem_read=False, cst_read=False): return reduce(lambda elements, arg: - elements.union(arg[0].get_r(mem_read, cst_read)), self._args, set()) + elements.union(arg[0].get_r(mem_read, cst_read)), self.__args, set()) def get_w(self): return reduce(lambda elements, arg: - elements.union(arg[0].get_w()), self._args, set()) + elements.union(arg[0].get_w()), self.__args, set()) def _exprhash(self): h_args = [EXPRCOMPOSE] + [(hash(arg[0]), arg[1], arg[2]) - for arg in self._args] + for arg in self.__args] return hash(tuple(h_args)) def _exprrepr(self): - return "%s(%r)" % (self.__class__.__name__, self._args) + return "%s(%r)" % (self.__class__.__name__, self.__args) def __contains__(self, e): if self == e: -- cgit 1.4.1 From 895263f702876a2cdf3bd1e9359ce1b0340f87fd Mon Sep 17 00:00:00 2001 From: serpilliere Date: Sat, 21 Feb 2015 21:34:52 +0100 Subject: Expression: remove is_term attribute --- miasm2/analysis/data_analysis.py | 7 +- miasm2/arch/sh4/regs.py | 1 - miasm2/arch/x86/regs.py | 1 - miasm2/expression/expression.py | 248 +++++++------- miasm2/expression/expression_helper.py | 10 +- miasm2/ir/analysis.py | 7 +- miasm2/ir/symbexec.py | 603 ++++++++++++++++----------------- test/ir/symbexec.py | 24 +- 8 files changed, 435 insertions(+), 466 deletions(-) (limited to 'miasm2/expression/expression.py') diff --git a/miasm2/analysis/data_analysis.py b/miasm2/analysis/data_analysis.py index 88f9b680..9fee22af 100644 --- a/miasm2/analysis/data_analysis.py +++ b/miasm2/analysis/data_analysis.py @@ -75,12 +75,7 @@ def intra_bloc_flow_symbexec(ir_arch, flow_graph, irb): out_nodes = {} current_nodes = {} - symbols_init = {} - for r in ir_arch.arch.regs.all_regs_ids: - # symbols_init[r] = ir_arch.arch.regs.all_regs_ids_init[i] - x = ExprId(r.name, r.size) - x.is_term = True - symbols_init[r] = x + symbols_init = dict(ir_arch.arch.regs.all_regs_ids_init) sb = symbexec(ir_arch, dict(symbols_init)) sb.emulbloc(irb) diff --git a/miasm2/arch/sh4/regs.py b/miasm2/arch/sh4/regs.py index cfbca187..89ff42f8 100644 --- a/miasm2/arch/sh4/regs.py +++ b/miasm2/arch/sh4/regs.py @@ -78,7 +78,6 @@ all_regs_ids_init = [ExprId("%s_init" % x.name, x.size) for x in all_regs_ids] regs_init = {} for i, r in enumerate(all_regs_ids): - all_regs_ids_init[i].is_term = True regs_init[r] = all_regs_ids_init[i] regs_flt_expr = [] diff --git a/miasm2/arch/x86/regs.py b/miasm2/arch/x86/regs.py index 18083a04..34585dae 100644 --- a/miasm2/arch/x86/regs.py +++ b/miasm2/arch/x86/regs.py @@ -430,7 +430,6 @@ all_regs_ids_init = [ExprId("%s_init" % x.name, x.size) for x in all_regs_ids] regs_init = {} for i, r in enumerate(all_regs_ids): - all_regs_ids_init[i].is_term = True regs_init[r] = all_regs_ids_init[i] regs_flt_expr = [float_st0, float_st1, float_st2, float_st3, diff --git a/miasm2/expression/expression.py b/miasm2/expression/expression.py index ad76f01c..59e809a5 100644 --- a/miasm2/expression/expression.py +++ b/miasm2/expression/expression.py @@ -114,18 +114,12 @@ class Expr(object): "Parent class for Miasm Expressions" - __slots__ = ["is_term", "is_simp", "is_canon", - "is_eval", "__hash", "__repr", "__size", - "is_var_ident"] - all_exprs = set() args2expr = {} simp_exprs = set() canon_exprs = set() use_singleton = True - is_term = False # Terminal expression - def set_size(self, value): raise ValueError('size is not mutable') @@ -134,7 +128,7 @@ class Expr(object): self.__repr = None self.__size = None - size = property(lambda self: self._size) + size = property(lambda self: self.__size) @staticmethod def get_object(cls, args): @@ -191,9 +185,9 @@ class Expr(object): return False def __repr__(self): - if self._repr is None: - self._repr = self._exprrepr() - return self._repr + if self.__repr is None: + self.__repr = self._exprrepr() + return self.__repr def __hash__(self): if self.__hash is None: @@ -379,7 +373,6 @@ class ExprInt(Expr): - Constant 0x12345678 on 32bits """ - __slots__ = ["__arg"] def __init__(self, num, size=None): """Create an ExprInt from a modint or num/size @@ -389,31 +382,30 @@ class ExprInt(Expr): super(ExprInt, self).__init__() if is_modint(num): - self._arg = num - self._size = self.arg.size + self.__arg = num + self.__size = self.arg.size if size is not None and num.size != size: raise RuntimeError("size must match modint size") elif size is not None: - self._arg = mod_size2uint[size](num) - self._size = self.arg.size + self.__arg = mod_size2uint[size](num) + self.__size = self.arg.size else: raise ValueError('arg must by modint or (int,size)! %s' % num) - self.__arg = arg - self.__size = self.arg.size - size = property(lambda self: self.__size) arg = property(lambda self: self.__arg) - def __new__(cls, arg): - return Expr.get_object(cls, (arg, arg.size)) + def __new__(cls, arg, size=None): + if size is None: + size = arg.size + return Expr.get_object(cls, (arg, size)) def __get_int(self): "Return self integer representation" - return int(self._arg & size2mask(self._size)) + return int(self.__arg & size2mask(self.__size)) def __str__(self): - if self._arg < 0: + if self.__arg < 0: return str("-0x%X" % (- self.__get_int())) else: return str("0x%X" % self.__get_int()) @@ -428,7 +420,7 @@ class ExprInt(Expr): return set() def _exprhash(self): - return hash((EXPRINT, self._arg, self._size)) + return hash((EXPRINT, self.__arg, self.__size)) def _exprrepr(self): return "%s(0x%X)" % (self.__class__.__name__, self.__get_int()) @@ -441,7 +433,7 @@ class ExprInt(Expr): return self def copy(self): - return ExprInt(self._arg) + return ExprInt(self.__arg) def depth(self): return 1 @@ -466,7 +458,6 @@ class ExprId(Expr): - variable v1 """ - __slots__ = ["_name"] def __init__(self, name, size=32): """Create an identifier @@ -476,7 +467,6 @@ class ExprId(Expr): super(ExprId, self).__init__() self.__name, self.__size = name, size - self.is_term = is_term size = property(lambda self: self.__size) name = property(lambda self: self.__name) @@ -485,7 +475,7 @@ class ExprId(Expr): return Expr.get_object(cls, (name, size)) def __str__(self): - return str(self._name) + return str(self.__name) def get_r(self, mem_read=False, cst_read=False): return set([self]) @@ -525,7 +515,6 @@ class ExprAff(Expr): - var1 <- 2 """ - __slots__ = ["_src", "_dst"] def __init__(self, dst, src): """Create an ExprAff for dst <- src @@ -562,41 +551,41 @@ class ExprAff(Expr): return Expr.get_object(cls, (dst, src)) def __str__(self): - return "%s = %s" % (str(self._dst), str(self._src)) + return "%s = %s" % (str(self.__dst), str(self.__src)) def get_r(self, mem_read=False, cst_read=False): - elements = self._src.get_r(mem_read, cst_read) - if isinstance(self._dst, ExprMem) and mem_read: - elements.update(self._dst.arg.get_r(mem_read, cst_read)) + elements = self.__src.get_r(mem_read, cst_read) + if isinstance(self.__dst, ExprMem) and mem_read: + elements.update(self.__dst.arg.get_r(mem_read, cst_read)) return elements def get_w(self): - if isinstance(self._dst, ExprMem): - return set([self._dst]) # [memreg] + if isinstance(self.__dst, ExprMem): + return set([self.__dst]) # [memreg] else: - return self._dst.get_w() + return self.__dst.get_w() def _exprhash(self): - return hash((EXPRAFF, hash(self._dst), hash(self._src))) + return hash((EXPRAFF, hash(self.__dst), hash(self.__src))) def _exprrepr(self): - return "%s(%r, %r)" % (self.__class__.__name__, self._dst, self._src) + return "%s(%r, %r)" % (self.__class__.__name__, self.__dst, self.__src) def __contains__(self, expr): return (self == expr or - self._src.__contains__(expr) or - self._dst.__contains__(expr)) + self.__src.__contains__(expr) or + self.__dst.__contains__(expr)) # XXX /!\ for hackish expraff to slice def get_modified_slice(self): """Return an Expr list of extra expressions needed during the object instanciation""" - dst = self._dst - if not isinstance(self._src, ExprCompose): + dst = self.__dst + if not isinstance(self.__src, ExprCompose): raise ValueError("Get mod slice not on expraff slice", str(self)) modified_s = [] - for arg in self._src.args: + for arg in self.__src.args: if (not isinstance(arg[0], ExprSlice) or arg[0].arg != dst or arg[1] != arg[0].start or @@ -607,21 +596,21 @@ class ExprAff(Expr): @visit_chk def visit(self, cb, tv=None): - dst, src = self._dst.visit(cb, tv), self._src.visit(cb, tv) - if dst == self._dst and src == self._src: + dst, src = self.__dst.visit(cb, tv), self.__src.visit(cb, tv) + if dst == self.__dst and src == self.__src: return self else: return ExprAff(dst, src) def copy(self): - return ExprAff(self._dst.copy(), self._src.copy()) + return ExprAff(self.__dst.copy(), self.__src.copy()) def depth(self): - return max(self._src.depth(), self._dst.depth()) + 1 + return max(self.__src.depth(), self.__dst.depth()) + 1 def graph_recursive(self, graph): graph.add_node(self) - for arg in [self._src, self._dst]: + for arg in [self.__src, self.__dst]: arg.graph_recursive(graph) graph.add_uniq_edge(self, arg) @@ -636,7 +625,6 @@ class ExprCond(Expr): - if (cond) then ... else ... """ - __slots__ = ["_cond", "_src1", "_src2"] def __init__(self, cond, src1, src2): """Create an ExprCond @@ -660,7 +648,7 @@ class ExprCond(Expr): return Expr.get_object(cls, (cond, src1, src2)) def __str__(self): - return "(%s?(%s,%s))" % (str(self._cond), str(self._src1), str(self._src2)) + return "(%s?(%s,%s))" % (str(self.__cond), str(self.__src1), str(self.__src2)) def get_r(self, mem_read=False, cst_read=False): out_src1 = self.src1.get_r(mem_read, cst_read) @@ -673,11 +661,11 @@ class ExprCond(Expr): def _exprhash(self): return hash((EXPRCOND, hash(self.cond), - hash(self._src1), hash(self._src2))) + hash(self.__src1), hash(self.__src2))) def _exprrepr(self): return "%s(%r, %r, %r)" % (self.__class__.__name__, - self._cond, self._src1, self._src2) + self.__cond, self.__src1, self.__src2) def __contains__(self, e): return (self == e or @@ -687,28 +675,28 @@ class ExprCond(Expr): @visit_chk def visit(self, cb, tv=None): - cond = self._cond.visit(cb, tv) - src1 = self._src1.visit(cb, tv) - src2 = self._src2.visit(cb, tv) - if (cond == self._cond and - src1 == self._src1 and - src2 == self._src2): + cond = self.__cond.visit(cb, tv) + src1 = self.__src1.visit(cb, tv) + src2 = self.__src2.visit(cb, tv) + if (cond == self.__cond and + src1 == self.__src1 and + src2 == self.__src2): return self return ExprCond(cond, src1, src2) def copy(self): - return ExprCond(self._cond.copy(), - self._src1.copy(), - self._src2.copy()) + return ExprCond(self.__cond.copy(), + self.__src1.copy(), + self.__src2.copy()) def depth(self): - return max(self._cond.depth(), - self._src1.depth(), - self._src2.depth()) + 1 + return max(self.__cond.depth(), + self.__src1.depth(), + self.__src2.depth()) + 1 def graph_recursive(self, graph): graph.add_node(self) - for arg in [self._cond, self._src1, self._src2]: + for arg in [self.__cond, self.__src1, self.__src2]: arg.graph_recursive(graph) graph.add_uniq_edge(self, arg) @@ -722,7 +710,6 @@ class ExprMem(Expr): - Memory write """ - __slots__ = ["_arg", "_size"] def __init__(self, arg, size=32): """Create an ExprMem @@ -749,7 +736,7 @@ class ExprMem(Expr): def get_r(self, mem_read=False, cst_read=False): if mem_read: - return set(self._arg.get_r(mem_read, cst_read).union(set([self]))) + return set(self.__arg.get_r(mem_read, cst_read).union(set([self]))) else: return set([self]) @@ -757,19 +744,19 @@ class ExprMem(Expr): return set([self]) # [memreg] def _exprhash(self): - return hash((EXPRMEM, hash(self._arg), self._size)) + return hash((EXPRMEM, hash(self.__arg), self.__size)) def _exprrepr(self): return "%s(%r, %r)" % (self.__class__.__name__, - self._arg, self._size) + self.__arg, self.__size) def __contains__(self, expr): - return self == expr or self._arg.__contains__(expr) + return self == expr or self.__arg.__contains__(expr) @visit_chk def visit(self, cb, tv=None): - arg = self._arg.visit(cb, tv) - if arg == self._arg: + arg = self.__arg.visit(cb, tv) + if arg == self.__arg: return self return ExprMem(arg, self.size) @@ -778,15 +765,15 @@ class ExprMem(Expr): return ExprMem(arg, size=self.size) def is_op_segm(self): - return isinstance(self._arg, ExprOp) and self._arg.op == 'segm' + return isinstance(self.__arg, ExprOp) and self.__arg.op == 'segm' def depth(self): - return self._arg.depth() + 1 + return self.__arg.depth() + 1 def graph_recursive(self, graph): graph.add_node(self) - self._arg.graph_recursive(graph) - graph.add_uniq_edge(self, self._arg) + self.__arg.graph_recursive(graph) + graph.add_uniq_edge(self, self.__arg) class ExprOp(Expr): @@ -799,7 +786,6 @@ class ExprOp(Expr): - parity bit(var1) """ - __slots__ = ["_op", "_args"] def __init__(self, op, *args): """Create an ExprOp @@ -821,7 +807,7 @@ class ExprOp(Expr): if not isinstance(op, str): raise ValueError("ExprOp: 'op' argument must be a string") - self.__op, self._args = op, tuple(args) + self.__op, self.__args = op, tuple(args) # Set size for special cases if self.__op in [ @@ -858,7 +844,7 @@ class ExprOp(Expr): 'double_trunc_to_int_80']: sz = 80 elif self.__op in ['segm']: - sz = self._args[1].size + sz = self.__args[1].size else: if None in sizes: sz = None @@ -877,19 +863,19 @@ class ExprOp(Expr): def __str__(self): if self.is_associative(): - return '(' + self._op.join([str(arg) for arg in self._args]) + ')' - if (self._op.startswith('call_func_') or - self._op == 'cpuid' or - len(self._args) > 2 or - self._op in ['parity', 'segm']): - return self._op + '(' + ', '.join([str(arg) for arg in self._args]) + ')' - if len(self._args) == 2: - return ('(' + str(self._args[0]) + - ' ' + self.op + ' ' + str(self._args[1]) + ')') + return '(' + self.__op.join([str(arg) for arg in self.__args]) + ')' + if (self.__op.startswith('call_func_') or + self.__op == 'cpuid' or + len(self.__args) > 2 or + self.__op in ['parity', 'segm']): + return self.__op + '(' + ', '.join([str(arg) for arg in self.__args]) + ')' + if len(self.__args) == 2: + return ('(' + str(self.__args[0]) + + ' ' + self.op + ' ' + str(self.__args[1]) + ')') else: return reduce(lambda x, y: x + ' ' + str(y), - self._args, - '(' + str(self._op)) + ')' + self.__args, + '(' + str(self.__op)) + ')' def get_r(self, mem_read=False, cst_read=False): return reduce(lambda elements, arg: @@ -899,58 +885,57 @@ class ExprOp(Expr): raise ValueError('op cannot be written!', self) def _exprhash(self): - h_hargs = [hash(arg) for arg in self._args] - return hash((EXPROP, self._op, tuple(h_hargs))) + h_hargs = [hash(arg) for arg in self.__args] + return hash((EXPROP, self.__op, tuple(h_hargs))) def _exprrepr(self): - return "%s(%r, %s)" % (self.__class__.__name__, self._op, - ', '.join(repr(arg) for arg in self._args)) + return "%s(%r, %s)" % (self.__class__.__name__, self.__op, + ', '.join(repr(arg) for arg in self.__args)) def __contains__(self, e): if self == e: return True - for arg in self._args: + for arg in self.__args: if arg.__contains__(e): return True return False def is_function_call(self): - return self._op.startswith('call') + return self.__op.startswith('call') def is_associative(self): "Return True iff current operation is associative" - return (self._op in ['+', '*', '^', '&', '|']) + return (self.__op in ['+', '*', '^', '&', '|']) def is_commutative(self): "Return True iff current operation is commutative" - return (self._op in ['+', '*', '^', '&', '|']) + return (self.__op in ['+', '*', '^', '&', '|']) @visit_chk def visit(self, cb, tv=None): - args = [arg.visit(cb, tv) for arg in self._args] - modified = any([arg[0] != arg[1] for arg in zip(self._args, args)]) + args = [arg.visit(cb, tv) for arg in self.__args] + modified = any([arg[0] != arg[1] for arg in zip(self.__args, args)]) if modified: - return ExprOp(self._op, *args) + return ExprOp(self.__op, *args) return self def copy(self): - args = [arg.copy() for arg in self._args] - return ExprOp(self._op, *args) + args = [arg.copy() for arg in self.__args] + return ExprOp(self.__op, *args) def depth(self): - depth = [arg.depth() for arg in self._args] + depth = [arg.depth() for arg in self.__args] return max(depth) + 1 def graph_recursive(self, graph): graph.add_node(self) - for arg in self._args: + for arg in self.__args: arg.graph_recursive(graph) graph.add_uniq_edge(self, arg) class ExprSlice(Expr): - __slots__ = ["_arg", "_start", "_stop"] def __init__(self, arg, start, stop): super(ExprSlice, self).__init__() @@ -968,20 +953,20 @@ class ExprSlice(Expr): return Expr.get_object(cls, (arg, start, stop)) def __str__(self): - return "%s[%d:%d]" % (str(self._arg), self._start, self._stop) + return "%s[%d:%d]" % (str(self.__arg), self.__start, self.__stop) def get_r(self, mem_read=False, cst_read=False): - return self._arg.get_r(mem_read, cst_read) + return self.__arg.get_r(mem_read, cst_read) def get_w(self): - return self._arg.get_w() + return self.__arg.get_w() def _exprhash(self): - return hash((EXPRSLICE, hash(self._arg), self._start, self._stop)) + return hash((EXPRSLICE, hash(self.__arg), self.__start, self.__stop)) def _exprrepr(self): - return "%s(%r, %d, %d)" % (self.__class__.__name__, self._arg, - self._start, self._stop) + return "%s(%r, %d, %d)" % (self.__class__.__name__, self.__arg, + self.__start, self.__stop) def __contains__(self, expr): if self == expr: @@ -990,39 +975,39 @@ class ExprSlice(Expr): @visit_chk def visit(self, cb, tv=None): - arg = self._arg.visit(cb, tv) - if arg == self._arg: + arg = self.__arg.visit(cb, tv) + if arg == self.__arg: return self - return ExprSlice(arg, self._start, self._stop) + return ExprSlice(arg, self.__start, self.__stop) def copy(self): - return ExprSlice(self._arg.copy(), self._start, self._stop) + return ExprSlice(self.__arg.copy(), self.__start, self.__stop) def depth(self): - return self._arg.depth() + 1 + return self.__arg.depth() + 1 def slice_rest(self): "Return the completion of the current slice" - size = self._arg.size - if self._start >= size or self._stop > size: + size = self.__arg.size + if self.__start >= size or self.__stop > size: raise ValueError('bad slice rest %s %s %s' % - (size, self._start, self._stop)) + (size, self.__start, self.__stop)) - if self._start == self._stop: + if self.__start == self.__stop: return [(0, size)] rest = [] - if self._start != 0: - rest.append((0, self._start)) - if self._stop < size: - rest.append((self._stop, size)) + if self.__start != 0: + rest.append((0, self.__start)) + if self.__stop < size: + rest.append((self.__stop, size)) return rest def graph_recursive(self, graph): graph.add_node(self) - self._arg.graph_recursive(graph) - graph.add_uniq_edge(self, self._arg) + self.__arg.graph_recursive(graph) + graph.add_uniq_edge(self, self.__arg) class ExprCompose(Expr): @@ -1037,7 +1022,6 @@ class ExprCompose(Expr): In the example, salad.size == 3. """ - __slots__ = ["_args"] def __init__(self, args): """Create an ExprCompose @@ -1098,7 +1082,7 @@ class ExprCompose(Expr): def __contains__(self, e): if self == e: return True - for arg in self._args: + for arg in self.__args: if arg == e: return True if arg[0].__contains__(e): @@ -1107,18 +1091,18 @@ class ExprCompose(Expr): @visit_chk def visit(self, cb, tv=None): - args = [(arg[0].visit(cb, tv), arg[1], arg[2]) for arg in self._args] - modified = any([arg[0] != arg[1] for arg in zip(self._args, args)]) + args = [(arg[0].visit(cb, tv), arg[1], arg[2]) for arg in self.__args] + modified = any([arg[0] != arg[1] for arg in zip(self.__args, args)]) if modified: return ExprCompose(args) return self def copy(self): - args = [(arg[0].copy(), arg[1], arg[2]) for arg in self._args] + args = [(arg[0].copy(), arg[1], arg[2]) for arg in self.__args] return ExprCompose(args) def depth(self): - depth = [arg[0].depth() for arg in self._args] + depth = [arg[0].depth() for arg in self.__args] return max(depth) + 1 def graph_recursive(self, graph): diff --git a/miasm2/expression/expression_helper.py b/miasm2/expression/expression_helper.py index 3b85d720..178ee25f 100644 --- a/miasm2/expression/expression_helper.py +++ b/miasm2/expression/expression_helper.py @@ -210,9 +210,6 @@ class Variables_Identifier(object): - original expression with variables translated """ - # Attribute used to distinguish created variables from original ones - is_var_ident = "is_var_ident" - def __init__(self, expr, var_prefix="v"): """Set the expression @expr to handle and launch variable identification process @@ -287,13 +284,11 @@ class Variables_Identifier(object): for element_done in done: todo.remove(element_done) - @classmethod - def is_var_identifier(cls, expr): + def is_var_identifier(self, expr): "Return True iff @expr is a variable identifier" if not isinstance(expr, m2_expr.ExprId): return False - - return expr.is_var_ident + return expr in self._vars def find_variables_rec(self, expr): """Recursive method called by find_variable to expand @expr. @@ -310,7 +305,6 @@ class Variables_Identifier(object): identifier = m2_expr.ExprId("%s%s" % (self.var_prefix, self.var_indice.next()), size = expr.size) - identifier.is_var_ident = True self._vars[identifier] = expr # Recursion stop case diff --git a/miasm2/ir/analysis.py b/miasm2/ir/analysis.py index 2aa853f0..70fda11c 100644 --- a/miasm2/ir/analysis.py +++ b/miasm2/ir/analysis.py @@ -283,11 +283,8 @@ class ira(ir): def gen_equations(self): for irb in self.blocs.values(): - symbols_init = {} - for r in self.arch.regs.all_regs_ids: - x = ExprId(r.name, r.size) - x.is_term = True - symbols_init[r] = x + symbols_init = dict(self.arch.regs.all_regs_ids_init) + sb = symbexec(self, dict(symbols_init)) sb.emulbloc(irb) eqs = [] diff --git a/miasm2/ir/symbexec.py b/miasm2/ir/symbexec.py index 1dc8dde1..d3c56f70 100644 --- a/miasm2/ir/symbexec.py +++ b/miasm2/ir/symbexec.py @@ -3,6 +3,8 @@ from miasm2.expression.modint import int32 from miasm2.expression.simplifications import expr_simp from miasm2.core import asmbloc from miasm2.ir.ir import AssignBlock +from miasm2.core.interval import interval + import logging @@ -13,72 +15,82 @@ log.addHandler(console_handler) log.setLevel(logging.INFO) -class symbols(): +class symbols(object): def __init__(self, init=None): if init is None: init = {} self.symbols_id = {} self.symbols_mem = {} - for k, v in init.items(): - self[k] = v + for expr, value in init.items(): + self[expr] = value - def __contains__(self, a): - if not isinstance(a, m2_expr.ExprMem): - return self.symbols_id.__contains__(a) - if not self.symbols_mem.__contains__(a.arg): + def __contains__(self, expr): + if not isinstance(expr, m2_expr.ExprMem): + return self.symbols_id.__contains__(expr) + if not self.symbols_mem.__contains__(expr.arg): return False - return self.symbols_mem[a.arg][0].size == a.size - - def __getitem__(self, a): - if not isinstance(a, m2_expr.ExprMem): - return self.symbols_id.__getitem__(a) - if not a.arg in self.symbols_mem: - raise KeyError(a) - m = self.symbols_mem.__getitem__(a.arg) - if m[0].size != a.size: - raise KeyError(a) - return m[1] - - def __setitem__(self, a, v): - if not isinstance(a, m2_expr.ExprMem): - self.symbols_id.__setitem__(a, v) + return self.symbols_mem[expr.arg][0].size == expr.size + + def __getitem__(self, expr): + if not isinstance(expr, m2_expr.ExprMem): + return self.symbols_id.__getitem__(expr) + if not expr.arg in self.symbols_mem: + raise KeyError(expr) + mem, value = self.symbols_mem.__getitem__(expr.arg) + if mem.size != expr.size: + raise KeyError(expr) + return value + + def get(self, expr, default=None): + if not isinstance(expr, m2_expr.ExprMem): + return self.symbols_id.get(expr, default) + if not expr.arg in self.symbols_mem: + return default + mem, value = self.symbols_mem.__getitem__(expr.arg) + if mem.size != expr.size: + return default + return value + + def __setitem__(self, expr, value): + if not isinstance(expr, m2_expr.ExprMem): + self.symbols_id.__setitem__(expr, value) return - self.symbols_mem.__setitem__(a.arg, (a, v)) + assert expr.size == value.size + self.symbols_mem.__setitem__(expr.arg, (expr, value)) def __iter__(self): - for a in self.symbols_id: - yield a - for a in self.symbols_mem: - yield self.symbols_mem[a][0] - - def __delitem__(self, a): - if not isinstance(a, m2_expr.ExprMem): - self.symbols_id.__delitem__(a) + for expr in self.symbols_id: + yield expr + for expr in self.symbols_mem: + yield self.symbols_mem[expr][0] + + def __delitem__(self, expr): + if not isinstance(expr, m2_expr.ExprMem): + self.symbols_id.__delitem__(expr) else: - self.symbols_mem.__delitem__(a.arg) + self.symbols_mem.__delitem__(expr.arg) def items(self): - k = self.symbols_id.items() + [x for x in self.symbols_mem.values()] - return k + return self.symbols_id.items() + [x for x in self.symbols_mem.values()] def keys(self): - k = self.symbols_id.keys() + [x[0] for x in self.symbols_mem.values()] - return k + return (self.symbols_id.keys() + + [x[0] for x in self.symbols_mem.values()]) def copy(self): - p = symbols() - p.symbols_id = dict(self.symbols_id) - p.symbols_mem = dict(self.symbols_mem) - return p + new_symbols = symbols() + new_symbols.symbols_id = dict(self.symbols_id) + new_symbols.symbols_mem = dict(self.symbols_mem) + return new_symbols def inject_info(self, info): - s = symbols() - for k, v in self.items(): - k = expr_simp(k.replace_expr(info)) - v = expr_simp(v.replace_expr(info)) - s[k] = v - return s + new_symbols = symbols() + for expr, value in self.items(): + expr = expr_simp(expr.replace_expr(info)) + value = expr_simp(value.replace_expr(info)) + new_symbols[expr] = value + return new_symbols class symbexec(object): @@ -88,154 +100,152 @@ class symbexec(object): func_write=None, sb_expr_simp=expr_simp): self.symbols = symbols() - for k, v in known_symbols.items(): - self.symbols[k] = v + for expr, value in known_symbols.items(): + self.symbols[expr] = value self.func_read = func_read self.func_write = func_write self.ir_arch = ir_arch self.expr_simp = sb_expr_simp - def find_mem_by_addr(self, e): - if e in self.symbols.symbols_mem: - return self.symbols.symbols_mem[e][0] + def find_mem_by_addr(self, expr): + """ + Return memory keys with pointer equal to @expr + @expr: address of the searched memory variable + """ + if expr in self.symbols.symbols_mem: + return self.symbols.symbols_mem[expr][0] return None - def eval_ExprId(self, e, eval_cache=None): - if eval_cache is None: - eval_cache = {} - if isinstance(e.name, asmbloc.asm_label) and e.name.offset is not None: - return m2_expr.ExprInt_from(e, e.name.offset) - if not e in self.symbols: - # raise ValueError('unknown symbol %s'% e) - return e - return self.symbols[e] - - def eval_ExprInt(self, e, eval_cache=None): - return e - - def eval_ExprMem(self, e, eval_cache=None): - if eval_cache is None: - eval_cache = {} - a_val = self.expr_simp(self.eval_expr(e.arg, eval_cache)) - if a_val != e.arg: - a = self.expr_simp(m2_expr.ExprMem(a_val, size=e.size)) - else: - a = e - if a in self.symbols: - return self.symbols[a] - tmp = None - # test if mem lookup is known - if a_val in self.symbols.symbols_mem: - tmp = self.symbols.symbols_mem[a_val][0] - if tmp is None: - - v = self.find_mem_by_addr(a_val) - if not v: - out = [] - ov = self.get_mem_overlapping(a, eval_cache) - off_base = 0 - ov.sort() - # ov.reverse() - for off, x in ov: - # off_base = off * 8 - # x_size = self.symbols[x].size - if off >= 0: - m = min(a.size - off * 8, x.size) - ee = m2_expr.ExprSlice(self.symbols[x], 0, m) - ee = self.expr_simp(ee) - out.append((ee, off_base, off_base + m)) - off_base += m - else: - m = min(a.size - off * 8, x.size) - ee = m2_expr.ExprSlice(self.symbols[x], -off * 8, m) - ff = self.expr_simp(ee) - new_off_base = off_base + m + off * 8 - out.append((ff, off_base, new_off_base)) - off_base = new_off_base - if out: - missing_slice = self.rest_slice(out, 0, a.size) - for sa, sb in missing_slice: - ptr = self.expr_simp( - a_val + m2_expr.ExprInt_from(a_val, sa / 8) - ) - mm = m2_expr.ExprMem(ptr, size=sb - sa) - mm.is_term = True - mm.is_simp = True - out.append((mm, sa, sb)) - out.sort(key=lambda x: x[1]) - # for e, sa, sb in out: - # print str(e), sa, sb - ee = m2_expr.ExprSlice(m2_expr.ExprCompose(out), 0, a.size) - ee = self.expr_simp(ee) - return ee - if self.func_read and isinstance(a.arg, m2_expr.ExprInt): - return self.func_read(a) + def get_mem_state(self, expr): + """ + Evaluate the @expr memory in the current state using @cache + @expr: the memory key + """ + ptr, size = expr.arg, expr.size + ret = self.find_mem_by_addr(ptr) + if not ret: + out = [] + overlaps = self.get_mem_overlapping(expr) + off_base = 0 + for off, mem in overlaps: + if off >= 0: + new_size = min(size - off * 8, mem.size) + tmp = self.expr_simp(self.symbols[mem][0:new_size]) + out.append((tmp, off_base, off_base + new_size)) + off_base += new_size + else: + new_size = min(size - off * 8, mem.size) + tmp = self.expr_simp(self.symbols[mem][-off * 8:new_size]) + new_off_base = off_base + new_size + off * 8 + out.append((tmp, off_base, new_off_base)) + off_base = new_off_base + if out: + missing_slice = self.rest_slice(out, 0, size) + for slice_start, slice_stop in missing_slice: + ptr = self.expr_simp(ptr + m2_expr.ExprInt(slice_start / 8, ptr.size)) + mem = m2_expr.ExprMem(ptr, slice_stop - slice_start) + out.append((mem, slice_start, slice_stop)) + out.sort(key=lambda x: x[1]) + tmp = m2_expr.ExprSlice(m2_expr.ExprCompose(out), 0, size) + tmp = self.expr_simp(tmp) + return tmp + + + if self.func_read and isinstance(ptr, m2_expr.ExprInt): + return self.func_read(expr) else: - # XXX hack test - a.is_term = True - return a + return expr # bigger lookup - if a.size > tmp.size: - rest = a.size - ptr = a_val + if size > ret.size: + rest = size + ptr = ptr out = [] ptr_index = 0 while rest: - v = self.find_mem_by_addr(ptr) - if v is None: - # raise ValueError("cannot find %s in mem"%str(ptr)) - val = m2_expr.ExprMem(ptr, 8) - v = val + mem = self.find_mem_by_addr(ptr) + if mem is None: + value = m2_expr.ExprMem(ptr, 8) + mem = value diff_size = 8 - elif rest >= v.size: - val = self.symbols[v] - diff_size = v.size + elif rest >= mem.size: + value = self.symbols[mem] + diff_size = mem.size else: diff_size = rest - val = self.symbols[v][0:diff_size] - val = (val, ptr_index, ptr_index + diff_size) - out.append(val) + value = self.symbols[mem][0:diff_size] + out.append((value, ptr_index, ptr_index + diff_size)) ptr_index += diff_size rest -= diff_size - ptr = self.expr_simp( - self.eval_expr( - m2_expr.ExprOp('+', ptr, - m2_expr.ExprInt_from(ptr, v.size / 8)), - eval_cache) - ) - e = self.expr_simp(m2_expr.ExprCompose(out)) - return e + ptr = self.expr_simp(ptr + m2_expr.ExprInt(mem.size / 8, ptr.size)) + ret = self.expr_simp(m2_expr.ExprCompose(out)) + return ret # part lookup - tmp = self.expr_simp(m2_expr.ExprSlice(self.symbols[tmp], 0, a.size)) - return tmp - - def eval_expr_visit(self, e, eval_cache=None): - if eval_cache is None: - eval_cache = {} - # print 'visit', e, e.is_term - if e.is_term: - return e - if e in eval_cache: - return eval_cache[e] - c = e.__class__ - deal_class = {m2_expr.ExprId: self.eval_ExprId, - m2_expr.ExprInt: self.eval_ExprInt, - m2_expr.ExprMem: self.eval_ExprMem, - } - # print 'eval', e - if c in deal_class: - e = deal_class[c](e, eval_cache) - # print "ret", e - if not (isinstance(e, m2_expr.ExprId) or isinstance(e, - m2_expr.ExprInt)): - e.is_term = True - return e - - def eval_expr(self, e, eval_cache=None): - if eval_cache is None: - eval_cache = {} - r = e.visit(lambda x: self.eval_expr_visit(x, eval_cache)) - return r + ret = self.expr_simp(self.symbols[ret][:size]) + return ret + + + def apply_expr_on_state_visit_cache(self, expr, state, cache, level=0): + """ + Deep First evaluate nodes: + 1. evaluate node's sons + 2. simplify + """ + + #print '\t'*level, "Eval:", expr + if expr in cache: + ret = cache[expr] + #print "In cache!", ret + elif isinstance(expr, m2_expr.ExprInt): + return expr + elif isinstance(expr, m2_expr.ExprId): + if isinstance(expr.name, asmbloc.asm_label) and expr.name.offset is not None: + ret = m2_expr.ExprInt_from(expr, expr.name.offset) + else: + ret = state.get(expr, expr) + elif isinstance(expr, m2_expr.ExprMem): + ptr = self.apply_expr_on_state_visit_cache(expr.arg, state, cache, level+1) + ret = m2_expr.ExprMem(ptr, expr.size) + ret = self.get_mem_state(ret) + assert expr.size == ret.size + elif isinstance(expr, m2_expr.ExprCond): + cond = self.apply_expr_on_state_visit_cache(expr.cond, state, cache, level+1) + src1 = self.apply_expr_on_state_visit_cache(expr.src1, state, cache, level+1) + src2 = self.apply_expr_on_state_visit_cache(expr.src2, state, cache, level+1) + ret = m2_expr.ExprCond(cond, src1, src2) + elif isinstance(expr, m2_expr.ExprSlice): + arg = self.apply_expr_on_state_visit_cache(expr.arg, state, cache, level+1) + ret = m2_expr.ExprSlice(arg, expr.start, expr.stop) + elif isinstance(expr, m2_expr.ExprOp): + args = [] + for oarg in expr.args: + arg = self.apply_expr_on_state_visit_cache(oarg, state, cache, level+1) + assert oarg.size == arg.size + args.append(arg) + ret = m2_expr.ExprOp(expr.op, *args) + elif isinstance(expr, m2_expr.ExprCompose): + args = [] + for (arg, start, stop) in expr.args: + arg = self.apply_expr_on_state_visit_cache(arg, state, cache, level+1) + args.append((arg, start, stop)) + ret = m2_expr.ExprCompose(args) + else: + raise TypeError("Unknown expr type") + #print '\t'*level, "Result", ret + ret = self.expr_simp(ret) + #print '\t'*level, "Result simpl", ret + + assert expr.size == ret.size + cache[expr] = ret + return ret + + def apply_expr_on_state(self, expr, cache): + if cache is None: + cache = {} + ret = self.apply_expr_on_state_visit_cache(expr, self.symbols, cache) + return ret + + def eval_expr(self, expr, eval_cache=None): + return self.apply_expr_on_state(expr, eval_cache) def modified_regs(self, init_state=None): if init_state is None: @@ -250,121 +260,111 @@ class symbexec(object): yield i def modified_mems(self, init_state=None): + if init_state is None: + init_state = self.ir_arch.arch.regs.regs_init mems = self.symbols.symbols_mem.values() mems.sort() - for m, _ in mems: - yield m + for mem, _ in mems: + if mem in init_state and \ + mem in self.symbols.symbols_mem and \ + self.symbols.symbols_mem[mem] == init_state[mem]: + continue + yield mem def modified(self, init_state=None): - for r in self.modified_regs(init_state): - yield r - for m in self.modified_mems(init_state): - yield m + for reg in self.modified_regs(init_state): + yield reg + for mem in self.modified_mems(init_state): + yield mem def dump_id(self): + """ + Dump modififed registers symbols only + """ ids = self.symbols.symbols_id.keys() ids.sort() - for i in ids: - if i in self.ir_arch.arch.regs.regs_init and \ - i in self.symbols.symbols_id and \ - self.symbols.symbols_id[i] == self.ir_arch.arch.regs.regs_init[i]: + for expr in ids: + if (expr in self.ir_arch.arch.regs.regs_init and + expr in self.symbols.symbols_id and + self.symbols.symbols_id[expr] == self.ir_arch.arch.regs.regs_init[expr]): continue - print i, self.symbols.symbols_id[i] + print expr, "=", self.symbols.symbols_id[expr] def dump_mem(self): + """ + Dump modififed memory symbols + """ mems = self.symbols.symbols_mem.values() mems.sort() - for m, v in mems: - print m, v + for mem, value in mems: + print mem, value def rest_slice(self, slices, start, stop): - o = [] + """ + Return the complementary slices of @slices in the range @start, @stop + @slices: base slices + @start, @stop: interval range + """ + out = [] last = start - for _, a, b in slices: - if a == last: - last = b + for _, slice_start, slice_stop in slices: + if slice_start == last: + last = slice_stop continue - o.append((last, a)) - last = b + out.append((last, slice_start)) + last = slice_stop if last != stop: - o.append((b, stop)) - return o - - def substract_mems(self, a, b): - ex = b.arg - a.arg - ex = self.expr_simp(self.eval_expr(ex, {})) - if not isinstance(ex, m2_expr.ExprInt): - return None - ptr_diff = int(int32(ex.arg)) - out = [] - if ptr_diff < 0: - # [a ] - #[b ]XXX - sub_size = b.size + ptr_diff * 8 - if sub_size >= a.size: - pass - else: - ex = m2_expr.ExprOp('+', a.arg, - m2_expr.ExprInt_from(a.arg, sub_size / 8)) - ex = self.expr_simp(self.eval_expr(ex, {})) + out.append((slice_stop, stop)) + return out - rest_ptr = ex - rest_size = a.size - sub_size + def substract_mems(self, arg1, arg2): + """ + Return the remaining memory areas of @arg1 - @arg2 + @arg1, @arg2: ExprMem + """ - val = self.symbols[a][sub_size:a.size] - out = [(m2_expr.ExprMem(rest_ptr, rest_size), val)] - else: - #[a ] - # XXXX[b ]YY + ptr_diff = self.expr_simp(arg2.arg - arg1.arg) + ptr_diff = int(int32(ptr_diff.arg)) - #[a ] - # XXXX[b ] + zone1 = interval([(0, arg1.size/8-1)]) + zone2 = interval([(ptr_diff, ptr_diff + arg2.size/8-1)]) + zones = zone1 - zone2 + + out = [] + for start, stop in zones: + ptr = arg1.arg + m2_expr.ExprInt(start, arg1.arg.size) + ptr = self.expr_simp(ptr) + value = self.expr_simp(self.symbols[arg1][start*8:(stop+1)*8]) + mem = m2_expr.ExprMem(ptr, (stop - start + 1)*8) + assert mem.size == value.size + out.append((mem, value)) - out = [] - # part X - if ptr_diff > 0: - val = self.symbols[a][0:ptr_diff * 8] - out.append((m2_expr.ExprMem(a.arg, ptr_diff * 8), val)) - # part Y - if ptr_diff * 8 + b.size < a.size: - - ex = m2_expr.ExprOp('+', b.arg, - m2_expr.ExprInt_from(b.arg, b.size / 8)) - ex = self.expr_simp(self.eval_expr(ex, {})) - - rest_ptr = ex - rest_size = a.size - (ptr_diff * 8 + b.size) - val = self.symbols[a][ptr_diff * 8 + b.size:a.size] - out.append((m2_expr.ExprMem(ex, val.size), val)) return out - # give mem stored overlapping requested mem ptr - def get_mem_overlapping(self, e, eval_cache=None): - if eval_cache is None: - eval_cache = {} - if not isinstance(e, m2_expr.ExprMem): - raise ValueError('mem overlap bad arg') - ov = [] - # suppose max mem size is 64 bytes, compute all reachable addresses - to_test = [] - base_ptr = self.expr_simp(e.arg) - for i in xrange(-7, e.size / 8): - ex = self.expr_simp( - self.eval_expr(base_ptr + m2_expr.ExprInt_from(e.arg, i), - eval_cache)) - to_test.append((i, ex)) - - for i, x in to_test: - if not x in self.symbols.symbols_mem: + + def get_mem_overlapping(self, expr): + """ + Gives mem stored overlapping memory in @expr + Hypothesis: Max mem size is 64 bytes, compute all reachable addresses + @expr: target memory + """ + + overlaps = [] + base_ptr = self.expr_simp(expr.arg) + for i in xrange(-7, expr.size / 8): + new_ptr = base_ptr + m2_expr.ExprInt(i, expr.arg.size) + new_ptr = self.expr_simp(new_ptr) + + mem, origin = self.symbols.symbols_mem.get(new_ptr, (None, None)) + if mem is None: continue - ex = self.expr_simp(self.eval_expr(e.arg - x, eval_cache)) - if not isinstance(ex, m2_expr.ExprInt): - raise ValueError('ex is not ExprInt') - ptr_diff = int32(ex.arg) - if ptr_diff >= self.symbols.symbols_mem[x][1].size / 8: - # print "too long!" + + ptr_diff = -i + if ptr_diff >= origin.size / 8: + # access is too small to overlap the memory target continue - ov.append((i, self.symbols.symbols_mem[x][0])) - return ov + overlaps.append((i, mem)) + + return overlaps def eval_ir_expr(self, assignblk): """ @@ -372,16 +372,14 @@ class symbexec(object): @assignblk: AssignBlock instance """ pool_out = {} - - eval_cache = dict(self.symbols.items()) + eval_cache = {} for dst, src in assignblk.iteritems(): src = self.eval_expr(src, eval_cache) if isinstance(dst, m2_expr.ExprMem): - a = self.eval_expr(dst.arg, eval_cache) - a = self.expr_simp(a) + ptr = self.eval_expr(dst.arg, eval_cache) # test if mem lookup is known - tmp = m2_expr.ExprMem(a, dst.size) + tmp = m2_expr.ExprMem(ptr, dst.size) pool_out[tmp] = src elif isinstance(dst, m2_expr.ExprId): @@ -398,18 +396,18 @@ class symbexec(object): """ mem_dst = [] src_dst = self.eval_ir_expr(assignblk) - eval_cache = dict(self.symbols.items()) for dst, src in src_dst: if isinstance(dst, m2_expr.ExprMem): - mem_overlap = self.get_mem_overlapping(dst, eval_cache) + mem_overlap = self.get_mem_overlapping(dst) for _, base in mem_overlap: diff_mem = self.substract_mems(base, dst) del self.symbols[base] for new_mem, new_val in diff_mem: - new_val.is_term = True self.symbols[new_mem] = new_val src_o = self.expr_simp(src) self.symbols[dst] = src_o + if dst == src_o: + del self.symbols[dst] if isinstance(dst, m2_expr.ExprMem): if self.func_write and isinstance(dst.arg, m2_expr.ExprInt): self.func_write(self, dst, src_o) @@ -424,51 +422,52 @@ class symbexec(object): @step: display intermediate steps """ for assignblk in irb.irs: - self.eval_ir(assignblk) if step: + print 'Assignblk:' + print assignblk print '_' * 80 + self.eval_ir(assignblk) + if step: self.dump_id() - eval_cache = dict(self.symbols.items()) - return self.eval_expr(self.ir_arch.IRDst, eval_cache) + self.dump_mem() + print '_' * 80 + return self.eval_expr(self.ir_arch.IRDst) - def emul_ir_bloc(self, myir, ad, step=False): - b = myir.get_bloc(ad) - if b is not None: - ad = self.emulbloc(b, step=step) - return ad + def emul_ir_bloc(self, myir, addr, step=False): + irblock = myir.get_bloc(addr) + if irblock is not None: + addr = self.emulbloc(irblock, step=step) + return addr - def emul_ir_blocs(self, myir, ad, lbl_stop=None, step=False): + def emul_ir_blocs(self, myir, addr, lbl_stop=None, step=False): while True: - b = myir.get_bloc(ad) - if b is None: + irblock = myir.get_bloc(addr) + if irblock is None: break - if b.label == lbl_stop: + if irblock.label == lbl_stop: break - ad = self.emulbloc(b, step=step) - return ad - - def del_mem_above_stack(self, sp): - sp_val = self.symbols[sp] - for mem_ad, (mem, _) in self.symbols.symbols_mem.items(): - # print mem_ad, sp_val - diff = self.eval_expr(mem_ad - sp_val, {}) - diff = expr_simp(diff) + addr = self.emulbloc(irblock, step=step) + return addr + + def del_mem_above_stack(self, stack_ptr): + stack_ptr = self.eval_expr(stack_ptr) + for mem_addr, (mem, _) in self.symbols.symbols_mem.items(): + diff = self.expr_simp(mem_addr - stack_ptr) if not isinstance(diff, m2_expr.ExprInt): continue - m = expr_simp(diff.msb()) - if m.arg == 1: + sign_bit = self.expr_simp(diff.msb()) + if sign_bit.arg == 1: del self.symbols[mem] def apply_expr(self, expr): """Evaluate @expr and apply side effect if needed (ie. if expr is an assignment). Return the evaluated value""" - # Eval expression - to_eval = expr.src if isinstance(expr, m2_expr.ExprAff) else expr - ret = self.expr_simp(self.eval_expr(to_eval)) - # Update value if needed if isinstance(expr, m2_expr.ExprAff): - self.eval_ir(AssignBlock([m2_expr.ExprAff(expr.dst, ret)])) + ret = self.eval_expr(expr.src) + self.eval_ir(AssignBlock([expr])) + else: + ret = self.eval_expr(expr) return ret diff --git a/test/ir/symbexec.py b/test/ir/symbexec.py index 9165fccb..19cc47db 100644 --- a/test/ir/symbexec.py +++ b/test/ir/symbexec.py @@ -21,7 +21,7 @@ class TestSymbExec(unittest.TestCase): addr40 = ExprInt32(40) addr50 = ExprInt32(50) mem0 = ExprMem(addr0) - mem1 = ExprMem(addr1) + mem1 = ExprMem(addr1, 8) mem8 = ExprMem(addr8) mem9 = ExprMem(addr9) mem20 = ExprMem(addr20) @@ -34,22 +34,24 @@ class TestSymbExec(unittest.TestCase): id_a = ExprId('a') id_eax = ExprId('eax_init') - e = symbexec( - ir_x86_32(), {mem0: id_x, mem1: id_y, mem9: id_x, mem40w: id_x, mem50v: id_y, id_a: addr0, id_eax: addr0}) + e = symbexec(ir_x86_32(), + {mem0: id_x, mem1: id_y, mem9: id_x, + mem40w: id_x[:16], mem50v: id_y, + id_a: addr0, id_eax: addr0}) self.assertEqual(e.find_mem_by_addr(addr0), mem0) self.assertEqual(e.find_mem_by_addr(addrX), None) - self.assertEqual(e.eval_ExprMem(ExprMem(addr1 - addr1)), id_x) - self.assertEqual(e.eval_ExprMem(ExprMem(addr1, 8)), id_y) - self.assertEqual(e.eval_ExprMem(ExprMem(addr1 + addr1)), ExprCompose( + self.assertEqual(e.eval_expr(ExprMem(addr1 - addr1)), id_x) + self.assertEqual(e.eval_expr(ExprMem(addr1, 8)), id_y) + self.assertEqual(e.eval_expr(ExprMem(addr1 + addr1)), ExprCompose( [(id_x[16:32], 0, 16), (ExprMem(ExprInt32(4), 16), 16, 32)])) - self.assertEqual(e.eval_ExprMem(mem8), ExprCompose( + self.assertEqual(e.eval_expr(mem8), ExprCompose( [(id_x[0:24], 0, 24), (ExprMem(ExprInt32(11), 8), 24, 32)])) - self.assertEqual(e.eval_ExprMem(mem40v), id_x[:8]) - self.assertEqual(e.eval_ExprMem(mem50w), ExprCompose( + self.assertEqual(e.eval_expr(mem40v), id_x[:8]) + self.assertEqual(e.eval_expr(mem50w), ExprCompose( [(id_y, 0, 8), (ExprMem(ExprInt32(51), 8), 8, 16)])) - self.assertEqual(e.eval_ExprMem(mem20), mem20) + self.assertEqual(e.eval_expr(mem20), mem20) e.func_read = lambda x: x - self.assertEqual(e.eval_ExprMem(mem20), mem20) + self.assertEqual(e.eval_expr(mem20), mem20) self.assertEqual(set(e.modified()), set(e.symbols)) self.assertRaises( KeyError, e.symbols.__getitem__, ExprMem(ExprInt32(100))) -- cgit 1.4.1 From ca9edfa2223a48550d53e62ab4e4856647428cca Mon Sep 17 00:00:00 2001 From: Fabrice Desclaux Date: Fri, 14 Oct 2016 12:45:15 +0200 Subject: Expression: slot expressions --- miasm2/expression/expression.py | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'miasm2/expression/expression.py') diff --git a/miasm2/expression/expression.py b/miasm2/expression/expression.py index 59e809a5..92735734 100644 --- a/miasm2/expression/expression.py +++ b/miasm2/expression/expression.py @@ -114,6 +114,8 @@ class Expr(object): "Parent class for Miasm Expressions" + __slots__ = ["__hash", "__repr", "__size"] + all_exprs = set() args2expr = {} simp_exprs = set() @@ -373,6 +375,8 @@ class ExprInt(Expr): - Constant 0x12345678 on 32bits """ + __slots__ = Expr.__slots__ + ["__arg"] + def __init__(self, num, size=None): """Create an ExprInt from a modint or num/size @@ -458,6 +462,7 @@ class ExprId(Expr): - variable v1 """ + __slots__ = Expr.__slots__ + ["__name"] def __init__(self, name, size=32): """Create an identifier @@ -515,6 +520,7 @@ class ExprAff(Expr): - var1 <- 2 """ + __slots__ = Expr.__slots__ + ["__dst", "__src"] def __init__(self, dst, src): """Create an ExprAff for dst <- src @@ -625,6 +631,7 @@ class ExprCond(Expr): - if (cond) then ... else ... """ + __slots__ = Expr.__slots__ + ["__cond", "__src1", "__src2"] def __init__(self, cond, src1, src2): """Create an ExprCond @@ -710,6 +717,7 @@ class ExprMem(Expr): - Memory write """ + __slots__ = Expr.__slots__ + ["__arg"] def __init__(self, arg, size=32): """Create an ExprMem @@ -786,6 +794,7 @@ class ExprOp(Expr): - parity bit(var1) """ + __slots__ = Expr.__slots__ + ["__op", "__args"] def __init__(self, op, *args): """Create an ExprOp @@ -936,6 +945,7 @@ class ExprOp(Expr): class ExprSlice(Expr): + __slots__ = Expr.__slots__ + ["__arg", "__start", "__stop"] def __init__(self, arg, start, stop): super(ExprSlice, self).__init__() @@ -1022,6 +1032,7 @@ class ExprCompose(Expr): In the example, salad.size == 3. """ + __slots__ = Expr.__slots__ + ["__args"] def __init__(self, args): """Create an ExprCompose -- cgit 1.4.1 From abbc51905c2c392b259f43c681ed9617168add5e Mon Sep 17 00:00:00 2001 From: Fabrice Desclaux Date: Mon, 17 Oct 2016 12:48:20 +0200 Subject: IR: is_simp in the simplifier class --- miasm2/expression/expression.py | 10 ---------- miasm2/expression/simplifications.py | 12 ++++++------ 2 files changed, 6 insertions(+), 16 deletions(-) (limited to 'miasm2/expression/expression.py') diff --git a/miasm2/expression/expression.py b/miasm2/expression/expression.py index 92735734..1473838b 100644 --- a/miasm2/expression/expression.py +++ b/miasm2/expression/expression.py @@ -118,7 +118,6 @@ class Expr(object): all_exprs = set() args2expr = {} - simp_exprs = set() canon_exprs = set() use_singleton = True @@ -147,15 +146,6 @@ class Expr(object): expr = object.__new__(cls, *args, **kwargs) return expr - def get_is_simp(self): - return self in Expr.simp_exprs - - def set_is_simp(self, value): - assert(value is True) - Expr.simp_exprs.add(self) - - is_simp = property(get_is_simp, set_is_simp) - def get_is_canon(self): return self in Expr.canon_exprs diff --git a/miasm2/expression/simplifications.py b/miasm2/expression/simplifications.py index cbffb219..069be197 100644 --- a/miasm2/expression/simplifications.py +++ b/miasm2/expression/simplifications.py @@ -48,6 +48,7 @@ class ExpressionSimplifier(object): def __init__(self): self.expr_simp_cb = {} + self.simplified_exprs = set() def enable_passes(self, passes): """Add passes from @passes @@ -80,7 +81,7 @@ class ExpressionSimplifier(object): @expression: Expr instance Return an Expr instance""" - if expression.is_simp: + if expression in self.simplified_exprs: return expression # Find a stable state @@ -92,10 +93,9 @@ class ExpressionSimplifier(object): # Launch recursivity expression = self.expr_simp_wrapper(e_new) - expression.is_simp = True - + self.simplified_exprs.add(expression) # Mark expression as simplified - e_new.is_simp = True + self.simplified_exprs.add(e_new) return e_new def expr_simp_wrapper(self, expression, callback=None): @@ -104,13 +104,13 @@ class ExpressionSimplifier(object): @manual_callback: If set, call this function instead of normal one Return an Expr instance""" - if expression.is_simp: + if expression in self.simplified_exprs: return expression if callback is None: callback = self.expr_simp - return expression.visit(callback, lambda e: not(e.is_simp)) + return expression.visit(callback, lambda e: e not in self.simplified_exprs) def __call__(self, expression, callback=None): "Wrapper on expr_simp_wrapper" -- cgit 1.4.1 From 589d4fd9ebd61216155bba9f1988bbd5c3094a2b Mon Sep 17 00:00:00 2001 From: Fabrice Desclaux Date: Tue, 18 Oct 2016 13:11:46 +0200 Subject: Expression: add pickling --- miasm2/expression/expression.py | 49 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'miasm2/expression/expression.py') diff --git a/miasm2/expression/expression.py b/miasm2/expression/expression.py index 1473838b..d51f0bdb 100644 --- a/miasm2/expression/expression.py +++ b/miasm2/expression/expression.py @@ -389,6 +389,12 @@ class ExprInt(Expr): size = property(lambda self: self.__size) arg = property(lambda self: self.__arg) + def __getstate__(self): + return int(self.__arg), self.__size + + def __setstate__(self, state): + self.__init__(*state) + def __new__(cls, arg, size=None): if size is None: size = arg.size @@ -466,6 +472,12 @@ class ExprId(Expr): size = property(lambda self: self.__size) name = property(lambda self: self.__name) + def __getstate__(self): + return self.__name, self.__size + + def __setstate__(self, state): + self.__init__(*state) + def __new__(cls, name, size=32): return Expr.get_object(cls, (name, size)) @@ -543,6 +555,12 @@ class ExprAff(Expr): dst = property(lambda self: self.__dst) src = property(lambda self: self.__src) + def __getstate__(self): + return self.__dst, self.__src + + def __setstate__(self, state): + self.__init__(*state) + def __new__(cls, dst, src): return Expr.get_object(cls, (dst, src)) @@ -641,6 +659,12 @@ class ExprCond(Expr): src1 = property(lambda self: self.__src1) src2 = property(lambda self: self.__src2) + def __getstate__(self): + return self.__cond, self.__src1, self.__src2 + + def __setstate__(self, state): + self.__init__(*state) + def __new__(cls, cond, src1, src2): return Expr.get_object(cls, (cond, src1, src2)) @@ -726,6 +750,12 @@ class ExprMem(Expr): size = property(lambda self: self.__size) arg = property(lambda self: self.__arg) + def __getstate__(self): + return self.__arg, self.__size + + def __setstate__(self, state): + self.__init__(*state) + def __new__(cls, arg, size=32): return Expr.get_object(cls, (arg, size)) @@ -857,6 +887,13 @@ class ExprOp(Expr): op = property(lambda self: self.__op) args = property(lambda self: self.__args) + def __getstate__(self): + return self.__op, self.__args + + def __setstate__(self, state): + op, args = state + self.__init__(op, *args) + def __new__(cls, op, *args): return Expr.get_object(cls, (op, args)) @@ -949,6 +986,12 @@ class ExprSlice(Expr): start = property(lambda self: self.__start) stop = property(lambda self: self.__stop) + def __getstate__(self): + return self.__arg, self.__start, self.__stop + + def __setstate__(self, state): + self.__init__(*state) + def __new__(cls, arg, start, stop): return Expr.get_object(cls, (arg, start, stop)) @@ -1057,6 +1100,12 @@ class ExprCompose(Expr): size = property(lambda self: self.__size) args = property(lambda self: self.__args) + def __getstate__(self): + return self.__args + + def __setstate__(self, state): + self.__init__(state) + def __new__(cls, args): return Expr.get_object(cls, tuple(args)) -- cgit 1.4.1 From 3771288cffdd53c1ff87857374bd13c550b355dc Mon Sep 17 00:00:00 2001 From: Fabrice Desclaux Date: Fri, 21 Oct 2016 16:19:09 +0200 Subject: ExprCompose: add new api The ExprComposes uses directly its arguments sizes to guess the slices locations. Old api: ExprCompose([(a, 0, 32), (b, 32, 64)]) becomes: ExprCompose(a, b) --- example/expression/basic_simplification.py | 3 +- example/expression/expr_grapher.py | 2 +- example/expression/simplification_tools.py | 8 +- miasm2/arch/aarch64/sem.py | 3 +- miasm2/arch/arm/sem.py | 20 ++- miasm2/arch/mips32/sem.py | 7 +- miasm2/arch/msp430/sem.py | 22 +-- miasm2/arch/x86/sem.py | 213 ++++++++++------------------- miasm2/core/sembuilder.py | 2 +- miasm2/expression/expression.py | 47 ++++--- miasm2/ir/translators/miasm.py | 6 +- test/arch/arm/sem.py | 2 +- test/expression/expression.py | 8 +- test/expression/expression_helper.py | 5 +- test/expression/simplifications.py | 178 ++++++++++++------------ test/ir/symbexec.py | 6 +- test/ir/translators/z3_ir.py | 2 +- 17 files changed, 224 insertions(+), 310 deletions(-) (limited to 'miasm2/expression/expression.py') diff --git a/example/expression/basic_simplification.py b/example/expression/basic_simplification.py index 27c86096..17b1a35b 100644 --- a/example/expression/basic_simplification.py +++ b/example/expression/basic_simplification.py @@ -11,8 +11,7 @@ b = ExprId('ebx') exprs = [a + b - a, ExprInt32(0x12) + ExprInt32(0x30) - a, - ExprCompose([(a[:8], 0, 8), - (a[8:16], 8, 16)])] + ExprCompose(a[:8], a[8:16])] for e in exprs: print '*' * 40 diff --git a/example/expression/expr_grapher.py b/example/expression/expr_grapher.py index 22dff7cf..3137e6d2 100644 --- a/example/expression/expr_grapher.py +++ b/example/expression/expr_grapher.py @@ -8,7 +8,7 @@ c = ExprId("C") d = ExprId("D") m = ExprMem(a + b + c + a) -e1 = ExprCompose([(a + b - (c * a) / m | b, 0, 32), (a + m, 32, 64)]) +e1 = ExprCompose(a + b - (c * a) / m | b, a + m) e2 = ExprInt64(15) e = ExprCond(d, e1, e2)[0:32] diff --git a/example/expression/simplification_tools.py b/example/expression/simplification_tools.py index b2df58d3..9b8aeed5 100644 --- a/example/expression/simplification_tools.py +++ b/example/expression/simplification_tools.py @@ -25,11 +25,9 @@ i1 = ExprInt(uint32(0x1)) i2 = ExprInt(uint32(0x2)) cc = ExprCond(a, b, c) -o = ExprCompose([(a[:8], 8, 16), - (a[8:16], 0, 8)]) +o = ExprCompose(a[8:16], a[:8]) -o2 = ExprCompose([(a[8:16], 0, 8), - (a[:8], 8, 16)]) +o2 = ExprCompose(a[8:16], a[:8]) l = [a[:8], b[:8], c[:8], m[:8], s, i1[:8], i2[:8], o[:8]] l2 = l[::-1] @@ -56,7 +54,7 @@ print y == y.copy() print repr(y), repr(y.copy()) -z = ExprCompose([(a[5:5 + 8], 0, 8), (b[:16], 8, 24), (x[:8], 24, 32)]) +z = ExprCompose(a[5:5 + 8], b[:16], x[:8]) print z print z.copy() print z[:31].copy().visit(replace_expr) diff --git a/miasm2/arch/aarch64/sem.py b/miasm2/arch/aarch64/sem.py index aff2d8ca..0bceb8dc 100644 --- a/miasm2/arch/aarch64/sem.py +++ b/miasm2/arch/aarch64/sem.py @@ -672,8 +672,7 @@ def nop(): @sbuild.parse def extr(arg1, arg2, arg3, arg4): - compose = m2_expr.ExprCompose([(arg2, 0, arg2.size), - (arg3, arg2.size, arg2.size+arg3.size)]) + compose = m2_expr.ExprCompose(arg2, arg3) arg1 = compose[int(arg4.arg):int(arg4)+arg1.size] mnemo_func = sbuild.functions diff --git a/miasm2/arch/arm/sem.py b/miasm2/arch/arm/sem.py index 260531ac..c6f3dceb 100644 --- a/miasm2/arch/arm/sem.py +++ b/miasm2/arch/arm/sem.py @@ -481,7 +481,7 @@ def umull(ir, instr, a, b, c, d): def umlal(ir, instr, a, b, c, d): e = [] - r = c.zeroExtend(64) * d.zeroExtend(64) + ExprCompose([(a, 0, 32), (b, 32, 64)]) + r = c.zeroExtend(64) * d.zeroExtend(64) + ExprCompose(a, b) e.append(ExprAff(a, r[0:32])) e.append(ExprAff(b, r[32:64])) # r15/IRDst not allowed as output @@ -497,7 +497,7 @@ def smull(ir, instr, a, b, c, d): def smlal(ir, instr, a, b, c, d): e = [] - r = c.signExtend(64) * d.signExtend(64) + ExprCompose([(a, 0, 32), (b, 32, 64)]) + r = c.signExtend(64) * d.signExtend(64) + ExprCompose(a, b) e.append(ExprAff(a, r[0:32])) e.append(ExprAff(b, r[32:64])) # r15/IRDst not allowed as output @@ -910,14 +910,14 @@ def bfc(ir, instr, a, b, c): out = [] last = 0 if start: - out.append((a[:start], 0, start)) + out.append(a[:start]) last = start if stop - start: - out.append((ExprInt32(0)[last:stop], last, stop)) + out.append(ExprInt32(0)[last:stop]) last = stop if last < 32: - out.append((a[last:], last, 32)) - r = ExprCompose(out) + out.append(a[last:]) + r = ExprCompose(*out) e.append(ExprAff(a, r)) dst = None if PC in a.get_r(): @@ -927,10 +927,7 @@ def bfc(ir, instr, a, b, c): def rev(ir, instr, a, b): e = [] - c = ExprCompose([(b[:8], 24, 32), - (b[8:16], 16, 24), - (b[16:24], 8, 16), - (b[24:32], 0, 8)]) + c = ExprCompose(b[24:32], b[16:24], b[8:16], b[:8]) e.append(ExprAff(a, c)) return e @@ -1225,8 +1222,7 @@ class ir_arml(ir): # ir = get_mnemo_expr(self, self.name.lower(), *args) if len(args) and isinstance(args[-1], ExprOp): if args[-1].op == 'rrx': - args[-1] = ExprCompose( - [(args[-1].args[0][1:], 0, 31), (cf, 31, 32)]) + args[-1] = ExprCompose(args[-1].args[0][1:], cf) elif (args[-1].op in ['<<', '>>', '<>', '<<<', '>>>'] and isinstance(args[-1].args[-1], ExprId)): args[-1] = ExprOp(args[-1].op, diff --git a/miasm2/arch/mips32/sem.py b/miasm2/arch/mips32/sem.py index d03772ca..dea822b4 100644 --- a/miasm2/arch/mips32/sem.py +++ b/miasm2/arch/mips32/sem.py @@ -99,7 +99,7 @@ def bne(arg1, arg2, arg3): def lui(arg1, arg2): """The immediate value @arg2 is shifted left 16 bits and stored in the register @arg1. The lower 16 bits are zeroes.""" - arg1 = ExprCompose([(i16(0), 0, 16), (arg2[:16], 16, 32)]) + arg1 = ExprCompose(i16(0), arg2[:16]) @sbuild.parse def nop(): @@ -251,10 +251,7 @@ def bgtz(arg1, arg2): @sbuild.parse def wsbh(arg1, arg2): - arg1 = ExprCompose([(arg2[8:16], 0, 8), - (arg2[0:8] , 8, 16), - (arg2[24:32], 16, 24), - (arg2[16:24], 24, 32)]) + arg1 = ExprCompose(arg2[8:16], arg2[0:8], arg2[24:32], arg2[16:24]) @sbuild.parse def rotr(arg1, arg2, arg3): diff --git a/miasm2/arch/msp430/sem.py b/miasm2/arch/msp430/sem.py index 169a631f..a99e500c 100644 --- a/miasm2/arch/msp430/sem.py +++ b/miasm2/arch/msp430/sem.py @@ -250,8 +250,7 @@ def call(ir, instr, a): def swpb(ir, instr, a): e = [] x, y = a[:8], a[8:16] - e.append(ExprAff(a, ExprCompose([(y, 0, 8), - (x, 8, 16)]))) + e.append(ExprAff(a, ExprCompose(y, x))) return e, [] @@ -330,8 +329,7 @@ def jmp(ir, instr, a): def rrc_w(ir, instr, a): e = [] - c = ExprCompose([(a[1:16], 0, 15), - (cf, 15, 16)]) + c = ExprCompose(a[1:16], cf) e.append(ExprAff(a, c)) e.append(ExprAff(cf, a[:1])) # e += update_flag_zn_r(c) @@ -347,8 +345,7 @@ def rrc_w(ir, instr, a): def rra_w(ir, instr, a): e = [] - c = ExprCompose([(a[1:16], 0, 15), - (a[15:16], 15, 16)]) + c = ExprCompose(a[1:16], a[15:16]) e.append(ExprAff(a, c)) # TODO: error in disasm microcorruption? # e.append(ExprAff(cf, a[:1])) @@ -406,18 +403,7 @@ mnemo_func = { } -composed_sr = ExprCompose([ - (cf, 0, 1), - (zf, 1, 2), - (nf, 2, 3), - (gie, 3, 4), - (cpuoff, 4, 5), - (osc, 5, 6), - (scg0, 6, 7), - (scg1, 7, 8), - (of, 8, 9), - (res, 9, 16), -]) +composed_sr = ExprCompose(cf, zf, nf, gie, cpuoff, osc, scg0, scg1, of, res) def ComposeExprAff(dst, src): diff --git a/miasm2/arch/x86/sem.py b/miasm2/arch/x86/sem.py index 69c21ac8..5dc49efc 100644 --- a/miasm2/arch/x86/sem.py +++ b/miasm2/arch/x86/sem.py @@ -331,9 +331,7 @@ def xadd(ir, instr, a, b): def adc(ir, instr, a, b): e = [] - c = a + (b + m2_expr.ExprCompose([(m2_expr.ExprInt(0, a.size - 1), - 1, a.size), - (cf, 0, 1)])) + c = a + (b + m2_expr.ExprCompose(cf, m2_expr.ExprInt(0, a.size - 1))) e += update_flag_arith(c) e += update_flag_af(a, b, c) e += update_flag_add(a, b, c) @@ -355,9 +353,7 @@ def sub(ir, instr, a, b): def sbb(ir, instr, a, b): e = [] - c = a - (b + m2_expr.ExprCompose([(m2_expr.ExprInt(0, a.size - 1), - 1, a.size), - (cf, 0, 1)])) + c = a - (b + m2_expr.ExprCompose(cf, m2_expr.ExprInt(0, a.size - 1))) e += update_flag_arith(c) e += update_flag_af(a, b, c) e += update_flag_sub(a, b, c) @@ -898,25 +894,12 @@ def setalc(ir, instr): def bswap(ir, instr, a): e = [] if a.size == 16: - c = m2_expr.ExprCompose([(a[:8], 8, 16), - (a[8:16], 0, 8), - ]) + c = m2_expr.ExprCompose(a[8:16], a[:8]) elif a.size == 32: - c = m2_expr.ExprCompose([(a[:8], 24, 32), - (a[8:16], 16, 24), - (a[16:24], 8, 16), - (a[24:32], 0, 8), - ]) + c = m2_expr.ExprCompose(a[24:32], a[16:24], a[8:16], a[:8]) elif a.size == 64: - c = m2_expr.ExprCompose([(a[:8], 56, 64), - (a[8:16], 48, 56), - (a[16:24], 40, 48), - (a[24:32], 32, 40), - (a[32:40], 24, 32), - (a[40:48], 16, 24), - (a[48:56], 8, 16), - (a[56:64], 0, 8), - ]) + c = m2_expr.ExprCompose(a[56:64], a[48:56], a[40:48], a[32:40], + a[24:32], a[16:24], a[8:16], a[:8]) else: raise ValueError('the size DOES matter') e.append(m2_expr.ExprAff(a, c)) @@ -986,24 +969,18 @@ def scas(ir, instr, size): def compose_eflag(s=32): args = [] - regs = [cf, m2_expr.ExprInt1(1), pf, m2_expr.ExprInt1( - 0), af, m2_expr.ExprInt1(0), zf, nf, tf, i_f, df, of] - for i in xrange(len(regs)): - args.append((regs[i], i, i + 1)) - - args.append((iopl, 12, 14)) + args = [cf, m2_expr.ExprInt1(1), pf, m2_expr.ExprInt1(0), af, + m2_expr.ExprInt1(0), zf, nf, tf, i_f, df, of, iopl] if s == 32: - regs = [nt, m2_expr.ExprInt1(0), rf, vm, ac, vif, vip, i_d] + args += [nt, m2_expr.ExprInt1(0), rf, vm, ac, vif, vip, i_d] elif s == 16: - regs = [nt, m2_expr.ExprInt1(0)] + args += [nt, m2_expr.ExprInt1(0)] else: raise ValueError('unk size') - for i in xrange(len(regs)): - args.append((regs[i], i + 14, i + 15)) if s == 32: - args.append((m2_expr.ExprInt(0, 10), 22, 32)) - return m2_expr.ExprCompose(args) + args.append(m2_expr.ExprInt(0, 10)) + return m2_expr.ExprCompose(*args) def pushfd(ir, instr): @@ -1426,8 +1403,7 @@ def div(ir, instr, a): b = mRAX[instr.mode][:16] elif size in [16, 32, 64]: s1, s2 = mRDX[size], mRAX[size] - b = m2_expr.ExprCompose([(s2, 0, size), - (s1, size, size * 2)]) + b = m2_expr.ExprCompose(s2, s1) else: raise ValueError('div arg not impl', a) @@ -1436,8 +1412,7 @@ def div(ir, instr, a): # if 8 bit div, only ax is affected if size == 8: - e.append(m2_expr.ExprAff(b, m2_expr.ExprCompose([(c_d[:8], 0, 8), - (c_r[:8], 8, 16)]))) + e.append(m2_expr.ExprAff(b, m2_expr.ExprCompose(c_d[:8], c_r[:8]))) else: e.append(m2_expr.ExprAff(s1, c_r[:size])) e.append(m2_expr.ExprAff(s2, c_d[:size])) @@ -1454,8 +1429,7 @@ def idiv(ir, instr, a): b = mRAX[instr.mode][:16] elif size in [16, 32, 64]: s1, s2 = mRDX[size], mRAX[size] - b = m2_expr.ExprCompose([(s2, 0, size), - (s1, size, size * 2)]) + b = m2_expr.ExprCompose(s2, s1) else: raise ValueError('div arg not impl', a) @@ -1464,8 +1438,7 @@ def idiv(ir, instr, a): # if 8 bit div, only ax is affected if size == 8: - e.append(m2_expr.ExprAff(b, m2_expr.ExprCompose([(c_d[:8], 0, 8), - (c_r[:8], 8, 16)]))) + e.append(m2_expr.ExprAff(b, m2_expr.ExprCompose(c_d[:8], c_r[:8]))) else: e.append(m2_expr.ExprAff(s1, c_r[:size])) e.append(m2_expr.ExprAff(s2, c_d[:size])) @@ -2192,14 +2165,10 @@ def fyl2x(ir, instr): def fnstenv(ir, instr, a): e = [] # XXX TODO tag word, ... - status_word = m2_expr.ExprCompose([(m2_expr.ExprInt8(0), 0, 8), - (float_c0, 8, 9), - (float_c1, 9, 10), - (float_c2, 10, 11), - (float_stack_ptr, 11, 14), - (float_c3, 14, 15), - (m2_expr.ExprInt1(0), 15, 16), - ]) + status_word = m2_expr.ExprCompose(m2_expr.ExprInt8(0), + float_c0, float_c1, float_c2, + float_stack_ptr, float_c3, + m2_expr.ExprInt1(0)) s = instr.mode # The behaviour in 64bit is identical to 32 bit @@ -2497,15 +2466,15 @@ def fabs(ir, instr): def fnstsw(ir, instr, dst): args = [ # Exceptions -> 0 - (m2_expr.ExprInt8(0), 0, 8), - (float_c0, 8, 9), - (float_c1, 9, 10), - (float_c2, 10, 11), - (float_stack_ptr, 11, 14), - (float_c3, 14, 15), + m2_expr.ExprInt8(0), + float_c0, + float_c1, + float_c2, + float_stack_ptr, + float_c3, # B: FPU is not busy -> 0 - (m2_expr.ExprInt1(0), 15, 16)] - e = [m2_expr.ExprAff(dst, m2_expr.ExprCompose(args))] + m2_expr.ExprInt1(0)] + e = [m2_expr.ExprAff(dst, m2_expr.ExprCompose(*args))] return e, [] @@ -2656,11 +2625,9 @@ def das(ir, instr): def aam(ir, instr, a): e = [] tempAL = mRAX[instr.mode][0:8] - newEAX = m2_expr.ExprCompose([ - (tempAL % a, 0, 8), - (tempAL / a, 8, 16), - (mRAX[instr.mode][16:], 16, mRAX[instr.mode].size), - ]) + newEAX = m2_expr.ExprCompose(tempAL % a, + tempAL / a, + mRAX[instr.mode][16:]) e += [m2_expr.ExprAff(mRAX[instr.mode], newEAX)] e += update_flag_arith(newEAX) e.append(m2_expr.ExprAff(af, m2_expr.ExprInt1(0))) @@ -2671,12 +2638,9 @@ def aad(ir, instr, a): e = [] tempAL = mRAX[instr.mode][0:8] tempAH = mRAX[instr.mode][8:16] - newEAX = m2_expr.ExprCompose([ - ((tempAL + (tempAH * a)) & m2_expr.ExprInt8(0xFF), 0, 8), - (m2_expr.ExprInt8(0), 8, 16), - (mRAX[instr.mode][16:], - 16, mRAX[instr.mode].size), - ]) + newEAX = m2_expr.ExprCompose((tempAL + (tempAH * a)) & m2_expr.ExprInt8(0xFF), + m2_expr.ExprInt8(0), + mRAX[instr.mode][16:]) e += [m2_expr.ExprAff(mRAX[instr.mode], newEAX)] e += update_flag_arith(newEAX) e.append(m2_expr.ExprAff(af, m2_expr.ExprInt1(0))) @@ -2908,9 +2872,8 @@ def l_outs(ir, instr, size): def xlat(ir, instr): e = [] - a = m2_expr.ExprCompose([(m2_expr.ExprInt(0, 24), 8, 32), - (mRAX[instr.mode][0:8], 0, 8)]) - b = m2_expr.ExprMem(m2_expr.ExprOp('+', mRBX[instr.mode], a), 8) + a = mRAX[instr.mode][0:8].zeroExtend(mRBX[instr.mode].size) + b = m2_expr.ExprMem(mRBX[instr.mode] + a, 8) e.append(m2_expr.ExprAff(mRAX[instr.mode][0:8], b)) return e, [] @@ -3073,13 +3036,10 @@ def lgs(ir, instr, a, b): def lahf(ir, instr): e = [] - args = [] - regs = [cf, m2_expr.ExprInt1(1), pf, m2_expr.ExprInt1(0), af, + args = [cf, m2_expr.ExprInt1(1), pf, m2_expr.ExprInt1(0), af, m2_expr.ExprInt1(0), zf, nf] - for i in xrange(len(regs)): - args.append((regs[i], i, i + 1)) e.append( - m2_expr.ExprAff(mRAX[instr.mode][8:16], m2_expr.ExprCompose(args))) + m2_expr.ExprAff(mRAX[instr.mode][8:16], m2_expr.ExprCompose(*args))) return e, [] @@ -3128,11 +3088,9 @@ def l_str(ir, instr, a): def movd(ir, instr, a, b): e = [] if a in regs_mm_expr: - e.append(m2_expr.ExprAff(a, m2_expr.ExprCompose([(b, 0, 32), - (m2_expr.ExprInt32(0), 32, 64)]))) + e.append(m2_expr.ExprAff(a, m2_expr.ExprCompose(b, m2_expr.ExprInt32(0)))) elif a in regs_xmm_expr: - e.append(m2_expr.ExprAff(a, m2_expr.ExprCompose([(b, 0, 32), - (m2_expr.ExprInt(0, 96), 32, 128)]))) + e.append(m2_expr.ExprAff(a, m2_expr.ExprCompose(b, m2_expr.ExprInt(0, 96)))) else: e.append(m2_expr.ExprAff(a, b[:32])) return e, [] @@ -3187,8 +3145,7 @@ def wrmsr(ir, instr): msr_addr = m2_expr.ExprId('MSR') + m2_expr.ExprInt32( 8) * mRCX[instr.mode][:32] e = [] - src = m2_expr.ExprCompose([(mRAX[instr.mode][:32], 0, 32), - (mRDX[instr.mode][:32], 32, 64)]) + src = m2_expr.ExprCompose(mRAX[instr.mode][:32], mRDX[instr.mode][:32]) e.append(m2_expr.ExprAff(m2_expr.ExprMem(msr_addr, 64), src)) return e, [] @@ -3216,17 +3173,14 @@ def vec_vertical_sem(op, elt_size, reg_size, a, b): assert(reg_size % elt_size == 0) n = reg_size / elt_size if op == '-': - ops = [( + ops = [ (a[i * elt_size:(i + 1) * elt_size] - - b[i * elt_size:(i + 1) * elt_size]), - i * elt_size, (i + 1) * elt_size) for i in xrange(0, n)] + - b[i * elt_size:(i + 1) * elt_size]) for i in xrange(0, n)] else: - ops = [(m2_expr.ExprOp(op, a[i * elt_size:(i + 1) * elt_size], - b[i * elt_size:(i + 1) * elt_size]), - i * elt_size, - (i + 1) * elt_size) for i in xrange(0, n)] + ops = [m2_expr.ExprOp(op, a[i * elt_size:(i + 1) * elt_size], + b[i * elt_size:(i + 1) * elt_size]) for i in xrange(0, n)] - return m2_expr.ExprCompose(ops) + return m2_expr.ExprCompose(*ops) def float_vec_vertical_sem(op, elt_size, reg_size, a, b): @@ -3236,24 +3190,22 @@ def float_vec_vertical_sem(op, elt_size, reg_size, a, b): x_to_int, int_to_x = {32: ('float_to_int_%d', 'int_%d_to_float'), 64: ('double_to_int_%d', 'int_%d_to_double')}[elt_size] if op == '-': - ops = [(m2_expr.ExprOp(x_to_int % elt_size, - m2_expr.ExprOp(int_to_x % elt_size, a[i * elt_size:(i + 1) * elt_size]) - - m2_expr.ExprOp( - int_to_x % elt_size, b[i * elt_size:( - i + 1) * elt_size])), - i * elt_size, (i + 1) * elt_size) for i in xrange(0, n)] + ops = [m2_expr.ExprOp(x_to_int % elt_size, + m2_expr.ExprOp(int_to_x % elt_size, a[i * elt_size:(i + 1) * elt_size]) - + m2_expr.ExprOp( + int_to_x % elt_size, b[i * elt_size:( + i + 1) * elt_size])) for i in xrange(0, n)] else: - ops = [(m2_expr.ExprOp(x_to_int % elt_size, - m2_expr.ExprOp(op, - m2_expr.ExprOp( - int_to_x % elt_size, a[i * elt_size:( - i + 1) * elt_size]), - m2_expr.ExprOp( - int_to_x % elt_size, b[i * elt_size:( - i + 1) * elt_size]))), - i * elt_size, (i + 1) * elt_size) for i in xrange(0, n)] + ops = [m2_expr.ExprOp(x_to_int % elt_size, + m2_expr.ExprOp(op, + m2_expr.ExprOp( + int_to_x % elt_size, a[i * elt_size:( + i + 1) * elt_size]), + m2_expr.ExprOp( + int_to_x % elt_size, b[i * elt_size:( + i + 1) * elt_size]))) for i in xrange(0, n)] - return m2_expr.ExprCompose(ops) + return m2_expr.ExprCompose(*ops) def __vec_vertical_instr_gen(op, elt_size, sem): @@ -3558,8 +3510,7 @@ def movss(ir, instr, a, b): e.append(m2_expr.ExprAff(a, b[:32])) else: # Source Mem Destination XMM - e.append(m2_expr.ExprAff(a, m2_expr.ExprCompose([(b, 0, 32), - (m2_expr.ExprInt(0, 96), 32, 128)]))) + e.append(m2_expr.ExprAff(a, m2_expr.ExprCompose(b, m2_expr.ExprInt(0, 96)))) return e, [] @@ -3624,19 +3575,18 @@ def ps_rl_ll(ir, instr, a, b, op, size): slices = [] for i in xrange(0, a.size, size): - slices.append((m2_expr.ExprOp(op, a[i:i + size], count[:size]), - i, i + size)) + slices.append(m2_expr.ExprOp(op, a[i:i + size], count[:size])) if isinstance(test, m2_expr.ExprInt): if int(test) == 0: - return [m2_expr.ExprAff(a[0:a.size], m2_expr.ExprCompose(slices))], [] + return [m2_expr.ExprAff(a[0:a.size], m2_expr.ExprCompose(*slices))], [] else: return [m2_expr.ExprAff(a, m2_expr.ExprInt(0, a.size))], [] e_zero = [m2_expr.ExprAff(a, m2_expr.ExprInt(0, a.size)), m2_expr.ExprAff(ir.IRDst, lbl_next)] e_do = [] - e.append(m2_expr.ExprAff(a[0:a.size], m2_expr.ExprCompose(slices))) + e.append(m2_expr.ExprAff(a[0:a.size], m2_expr.ExprCompose(*slices))) e_do.append(m2_expr.ExprAff(ir.IRDst, lbl_next)) return e, [irbloc(lbl_do.name, [e_do]), irbloc(lbl_zero.name, [e_zero])] @@ -3759,11 +3709,9 @@ def punpck(ir, instr, a, b, size, off): e = [] slices = [] for i in xrange(a.size / (2 * size)): - src1 = a[size * i + off: size * i + off + size] - src2 = b[size * i + off: size * i + off + size] - slices.append((src1, size * 2 * i, size * 2 * i + size)) - slices.append((src2, size * (2 * i + 1), size * (2 * i + 1) + size)) - e.append(m2_expr.ExprAff(a, m2_expr.ExprCompose(slices))) + slices.append(a[size * i + off: size * i + off + size]) + slices.append(b[size * i + off: size * i + off + size]) + e.append(m2_expr.ExprAff(a, m2_expr.ExprCompose(*slices))) return e, [] @@ -3861,36 +3809,28 @@ def pextrq(ir, instr, a, b, c): def unpckhps(ir, instr, a, b): e = [] - src = m2_expr.ExprCompose([(a[64:96], 0, 32), - (b[64:96], 32, 64), - (a[96:128], 64, 96), - (b[96:128], 96, 128)]) + src = m2_expr.ExprCompose(a[64:96], b[64:96], a[96:128], b[96:128]) e.append(m2_expr.ExprAff(a, src)) return e, [] def unpckhpd(ir, instr, a, b): e = [] - src = m2_expr.ExprCompose([(a[64:128], 0, 64), - (b[64:128], 64, 128)]) + src = m2_expr.ExprCompose(a[64:128], b[64:128]) e.append(m2_expr.ExprAff(a, src)) return e, [] def unpcklps(ir, instr, a, b): e = [] - src = m2_expr.ExprCompose([(a[0:32], 0, 32), - (b[0:32], 32, 64), - (a[32:64], 64, 96), - (b[32:64], 96, 128)]) + src = m2_expr.ExprCompose(a[0:32], b[0:32], a[32:64], b[32:64]) e.append(m2_expr.ExprAff(a, src)) return e, [] def unpcklpd(ir, instr, a, b): e = [] - src = m2_expr.ExprCompose([(a[0:64], 0, 64), - (b[0:64], 64, 128)]) + src = m2_expr.ExprCompose(a[0:64], b[0:64]) e.append(m2_expr.ExprAff(a, src)) return e, [] @@ -3940,10 +3880,9 @@ def sqrt_gen(ir, instr, a, b, size): e = [] out = [] for i in b.size / size: - out.append((m2_expr.ExprOp('fsqrt' % size, - b[i * size: (i + 1) * size]), - i * size, (i + 1) * size)) - src = m2_expr.ExprCompose(out) + out.append(m2_expr.ExprOp('fsqrt' % size, + b[i * size: (i + 1) * size])) + src = m2_expr.ExprCompose(*out) e.append(m2_expr.ExprAff(a, src)) return e, [] @@ -3976,8 +3915,8 @@ def pmovmskb(ir, instr, a, b): e = [] out = [] for i in xrange(b.size / 8): - out.append((b[8 * i + 7:8 * (i + 1)], i, i + 1)) - src = m2_expr.ExprCompose(out) + out.append(b[8 * i + 7:8 * (i + 1)]) + src = m2_expr.ExprCompose(*out) e.append(m2_expr.ExprAff(a, src.zeroExtend(a.size))) return e, [] diff --git a/miasm2/core/sembuilder.py b/miasm2/core/sembuilder.py index 7f80b64e..27401049 100644 --- a/miasm2/core/sembuilder.py +++ b/miasm2/core/sembuilder.py @@ -16,7 +16,7 @@ class MiasmTransformer(ast.NodeTransformer): X if Y else Z -> ExprCond(Y, X, Z) 'X'(Y) -> ExprOp('X', Y) ('X' % Y)(Z) -> ExprOp('X' % Y, Z) - {a, b} -> ExprCompose([(a, 0, a.size), (b, a.size, a.size + b.size)]) + {a, b} -> ExprCompose(((a, 0, a.size), (b, a.size, a.size + b.size))) """ # Parsers diff --git a/miasm2/expression/expression.py b/miasm2/expression/expression.py index d51f0bdb..d18bc751 100644 --- a/miasm2/expression/expression.py +++ b/miasm2/expression/expression.py @@ -322,12 +322,10 @@ class Expr(object): if self.size == size: return self ad_size = size - self.size - c = ExprCompose([(self, 0, self.size), - (ExprCond(self.msb(), - ExprInt(size2mask(ad_size), ad_size), - ExprInt(0, ad_size)), - self.size, size) - ]) + c = ExprCompose(self, + ExprCond(self.msb(), + ExprInt(size2mask(ad_size), ad_size), + ExprInt(0, ad_size))) return c def graph_recursive(self, graph): @@ -1056,25 +1054,33 @@ class ExprSlice(Expr): class ExprCompose(Expr): """ - Compose is like a hambuger. - It's arguments are tuple of: (Expression, start, stop) - start and stop are intergers, determining Expression position in the compose. - - Burger Example: - ExprCompose([(salad, 0, 3), (cheese, 3, 10), (beacon, 10, 16)]) - In the example, salad.size == 3. + Compose is like a hambuger. It concatenate Expressions """ __slots__ = Expr.__slots__ + ["__args"] - def __init__(self, args): + def __init__(self, *args): """Create an ExprCompose The ExprCompose is contiguous and starts at 0 - @args: tuple(Expr, int, int) + @args: [Expr, Expr, ...] + DEPRECATED: + @args: [(Expr, int, int), (Expr, int, int), ...] """ super(ExprCompose, self).__init__() + is_new_style = args and isinstance(args[0], Expr) + if is_new_style: + new_args = [] + index = 0 + for arg in args: + new_args.append((arg, index, index + arg.size)) + index += arg.size + args = new_args + else: + assert len(args) == 1 + args = args[0] + last_stop = 0 args = sorted(args, key=itemgetter(1)) for e, start, stop in args: @@ -1106,12 +1112,15 @@ class ExprCompose(Expr): def __setstate__(self, state): self.__init__(state) - def __new__(cls, args): + def __new__(cls, *args): + is_new_style = args and isinstance(args[0], Expr) + if not is_new_style: + assert len(args) == 1 + args = args[0] return Expr.get_object(cls, tuple(args)) def __str__(self): - return '{' + ', '.join(['%s,%d,%d' % - (str(arg[0]), arg[1], arg[2]) for arg in self.__args]) + '}' + return '{' + ', '.join([str(arg[0]) for arg in self.__args]) + '}' def get_r(self, mem_read=False, cst_read=False): return reduce(lambda elements, arg: @@ -1127,7 +1136,7 @@ class ExprCompose(Expr): return hash(tuple(h_args)) def _exprrepr(self): - return "%s(%r)" % (self.__class__.__name__, self.__args) + return "%s([%r])" % (self.__class__.__name__, self.__args) def __contains__(self, e): if self == e: diff --git a/miasm2/ir/translators/miasm.py b/miasm2/ir/translators/miasm.py index ef91cfb1..515148ee 100644 --- a/miasm2/ir/translators/miasm.py +++ b/miasm2/ir/translators/miasm.py @@ -27,9 +27,9 @@ class TranslatorMiasm(Translator): ", ".join(map(self.from_expr, expr.args))) def from_ExprCompose(self, expr): - args = ["(%s, %d, %d)" % (self.from_expr(arg), start, stop) - for arg, start, stop in expr.args] - return "ExprCompose([%s])" % ", ".join(args) + args = ["%s" % self.from_expr(arg) + for arg, _, _ in expr.args] + return "ExprCompose(%s)" % ", ".join(args) def from_ExprAff(self, expr): return "ExprAff(%s, %s)" % (self.from_expr(expr.dst), diff --git a/test/arch/arm/sem.py b/test/arch/arm/sem.py index cefbe76a..8fc609fb 100644 --- a/test/arch/arm/sem.py +++ b/test/arch/arm/sem.py @@ -285,7 +285,7 @@ class TestARMSemantic(unittest.TestCase): self.assertEqual(compute('AND R4, R4, R5 LSR 2 ', {R4: 0xFFFFFFFF, R5: 0x80000041, }), {R4: 0x20000010, R5: 0x80000041, }) self.assertEqual(compute('AND R4, R4, R5 ASR 3 ', {R4: 0xF00000FF, R5: 0x80000081, }), {R4: 0xF0000010, R5: 0x80000081, }) self.assertEqual(compute('AND R4, R4, R5 ROR 4 ', {R4: 0xFFFFFFFF, R5: 0x000000FF, }), {R4: 0xF000000F, R5: 0x000000FF, }) - self.assertEqual(compute('AND R4, R4, R5 RRX ', {R4: 0xFFFFFFFF, R5: 0x00000101, }), {R4: ExprCompose([(ExprInt(0x80, 31),0,31), (cf_init,31,32)]), R5: 0x00000101, }) + self.assertEqual(compute('AND R4, R4, R5 RRX ', {R4: 0xFFFFFFFF, R5: 0x00000101, }), {R4: ExprCompose(ExprInt(0x80, 31), cf_init), R5: 0x00000101, }) # §A8.8.15: AND{S}{}{} {,} , , self.assertEqual(compute('AND R4, R6, R4 LSL R5', {R4: 0x00000001, R5: 0x00000004, R6: -1, }), {R4: 0x00000010, R5: 0x00000004, R6: 0xFFFFFFFF, }) diff --git a/test/expression/expression.py b/test/expression/expression.py index 90236744..847ba7eb 100644 --- a/test/expression/expression.py +++ b/test/expression/expression.py @@ -30,10 +30,10 @@ for expr in [ A + cst1, A + ExprCond(cond1, cst1, cst2), ExprCond(cond1, cst1, cst2) + ExprCond(cond2, cst3, cst4), - ExprCompose([(A, 0, 32), (cst1, 32, 64)]), - ExprCompose([(ExprCond(cond1, cst1, cst2), 0, 32), (A, 32, 64)]), - ExprCompose([(ExprCond(cond1, cst1, cst2), 0, 32), - (ExprCond(cond2, cst3, cst4), 32, 64)]), + ExprCompose(A, cst1), + ExprCompose(ExprCond(cond1, cst1, cst2), A), + ExprCompose(ExprCond(cond1, cst1, cst2), + ExprCond(cond2, cst3, cst4)), ExprCond(ExprCond(cond1, cst1, cst2), cst3, cst4), ]: print "*" * 80 diff --git a/test/expression/expression_helper.py b/test/expression/expression_helper.py index 514a9a51..a3a8fba4 100644 --- a/test/expression/expression_helper.py +++ b/test/expression/expression_helper.py @@ -16,11 +16,10 @@ class TestExpressionExpressionHelper(unittest.TestCase): ebx = m2_expr.ExprId("EBX") ax = eax[0:16] expr = eax + ebx - expr = m2_expr.ExprCompose([(ax, 0, 16), (expr[16:32], 16, 32)]) + expr = m2_expr.ExprCompose(ax, expr[16:32]) expr2 = m2_expr.ExprMem((eax + ebx) ^ (eax), size=16) expr2 = expr2 | ax | expr2 | cst - exprf = expr - expr + m2_expr.ExprCompose([(expr2, 0, 16), - (cst, 16, 32)]) + exprf = expr - expr + m2_expr.ExprCompose(expr2, cst) # Identify variables vi = Variables_Identifier(exprf) diff --git a/test/expression/simplifications.py b/test/expression/simplifications.py index 99cc7c35..bf658a30 100644 --- a/test/expression/simplifications.py +++ b/test/expression/simplifications.py @@ -24,11 +24,9 @@ i2 = ExprInt(uint32(0x2)) icustom = ExprInt(uint32(0x12345678)) cc = ExprCond(a, b, c) -o = ExprCompose([(a[:8], 8, 16), - (a[8:16], 0, 8)]) +o = ExprCompose(a[8:16], a[:8]) -o2 = ExprCompose([(a[8:16], 0, 8), - (a[:8], 8, 16)]) +o2 = ExprCompose(a[8:16], a[:8]) l = [a[:8], b[:8], c[:8], m[:8], s, i1[:8], i2[:8], o[:8]] l2 = l[::-1] @@ -93,11 +91,11 @@ to_test = [(ExprInt32(1) - ExprInt32(1), ExprInt32(0)), (a[8:16][:8], a[8:16]), (a[8:32][:8], a[8:16]), (a[:16][8:16], a[8:16]), - (ExprCompose([(a, 0, 32)]), a), - (ExprCompose([(a[:16], 0, 16)]), a[:16]), - (ExprCompose([(a[:16], 0, 16), (a[:16], 16, 32)]), - ExprCompose([(a[:16], 0, 16), (a[:16], 16, 32)]),), - (ExprCompose([(a[:16], 0, 16), (a[16:32], 16, 32)]), a), + (ExprCompose(a), a), + (ExprCompose(a[:16]), a[:16]), + (ExprCompose(a[:16], a[:16]), + ExprCompose(a[:16], a[:16]),), + (ExprCompose(a[:16], a[16:32]), a), (ExprMem(a)[:32], ExprMem(a)), (ExprMem(a)[:16], ExprMem(a, size=16)), @@ -106,14 +104,12 @@ to_test = [(ExprInt32(1) - ExprInt32(1), ExprInt32(0)), (ExprCond(ExprInt32(0), b, a), a), (ExprInt32(0x80000000)[31:32], ExprInt1(1)), - (ExprCompose([ - (ExprInt16(0x1337)[ - :8], 0, 8), (ExprInt16(0x1337)[8:16], 8, 16)]), + (ExprCompose(ExprInt16(0x1337)[:8], ExprInt16(0x1337)[8:16]), ExprInt16(0x1337)), - (ExprCompose([(ExprInt32(0x1337beef)[8:16], 8, 16), - (ExprInt32(0x1337beef)[:8], 0, 8), - (ExprInt32(0x1337beef)[16:32], 16, 32)]), + (ExprCompose(ExprInt32(0x1337beef)[:8], + ExprInt32(0x1337beef)[8:16], + ExprInt32(0x1337beef)[16:32]), ExprInt32(0x1337BEEF)), (ExprCond(a, ExprCond(a, @@ -122,9 +118,9 @@ to_test = [(ExprInt32(1) - ExprInt32(1), ExprInt32(0)), d), ExprCond(a, b, d)), ((a & b & ExprInt32(0x12))[31:32], ExprInt1(0)), - (ExprCompose([ - (ExprCond(a, ExprInt16(0x10), ExprInt16(0x20)), 0, 16), - (ExprInt16(0x1337), 16, 32)]), + (ExprCompose( + ExprCond(a, ExprInt16(0x10), ExprInt16(0x20)), + ExprInt16(0x1337)), ExprCond(a, ExprInt32(0x13370010), ExprInt32(0x13370020))), (ExprCond(ExprCond(a, ExprInt1(0), ExprInt1(1)), b, c), ExprCond(a, c, b)), @@ -167,103 +163,99 @@ to_test = [(ExprInt32(1) - ExprInt32(1), ExprInt32(0)), (ExprOp('-', ExprInt8(1), ExprInt8(0)), ExprInt8(1)), - (ExprCompose([(a, 0, 32), (ExprInt32(0), 32, 64)]) << ExprInt64(0x20), - ExprCompose([(ExprInt32(0), 0, 32), (a, 32, 64)])), - (ExprCompose([(a, 0, 32), (ExprInt32(0), 32, 64)]) << ExprInt64(0x10), - ExprCompose([(ExprInt16(0), 0, 16), (a, 16, 48), (ExprInt16(0), 48, 64)])), - (ExprCompose([(a, 0, 32), (ExprInt32(0), 32, 64)]) << ExprInt64(0x30), - ExprCompose([(ExprInt(0, 48), 0, 48), (a[:0x10], 48, 64)])), - (ExprCompose([(a, 0, 32), (ExprInt32(0), 32, 64)]) << ExprInt64(0x11), - ExprCompose([(ExprInt(0, 0x11), 0, 0x11), (a, 0x11, 0x31), (ExprInt(0, 0xF), 0x31, 0x40)])), - (ExprCompose([(a, 0, 32), (ExprInt32(0), 32, 64)]) << ExprInt64(0x40), + (ExprCompose(a, ExprInt32(0)) << ExprInt64(0x20), + ExprCompose(ExprInt32(0), a)), + (ExprCompose(a, ExprInt32(0)) << ExprInt64(0x10), + ExprCompose(ExprInt16(0), a, ExprInt16(0))), + (ExprCompose(a, ExprInt32(0)) << ExprInt64(0x30), + ExprCompose(ExprInt(0, 48), a[:0x10])), + (ExprCompose(a, ExprInt32(0)) << ExprInt64(0x11), + ExprCompose(ExprInt(0, 0x11), a, ExprInt(0, 0xF))), + (ExprCompose(a, ExprInt32(0)) << ExprInt64(0x40), ExprInt64(0)), - (ExprCompose([(a, 0, 32), (ExprInt32(0), 32, 64)]) << ExprInt64(0x50), + (ExprCompose(a, ExprInt32(0)) << ExprInt64(0x50), ExprInt64(0)), - (ExprCompose([(ExprInt32(0), 0, 32), (a, 32, 64)]) >> ExprInt64(0x20), - ExprCompose([(a, 0, 32), (ExprInt32(0), 32, 64)])), - (ExprCompose([(ExprInt32(0), 0, 32), (a, 32, 64)]) >> ExprInt64(0x10), - ExprCompose([(ExprInt16(0), 0, 16), (a, 16, 48), (ExprInt16(0), 48, 64)])), - (ExprCompose([(ExprInt32(0), 0, 32), (a, 32, 64)]) >> ExprInt64(0x30), - ExprCompose([(a[0x10:], 0, 16), (ExprInt(0, 48), 16, 64)])), - (ExprCompose([(ExprInt32(0), 0, 32), (a, 32, 64)]) >> ExprInt64(0x11), - ExprCompose([(ExprInt(0, 0xf), 0, 0xf), (a, 0xf, 0x2f), (ExprInt(0, 0x11), 0x2f, 0x40)])), - (ExprCompose([(ExprInt32(0), 0, 32), (a, 32, 64)]) >> ExprInt64(0x40), + (ExprCompose(ExprInt32(0), a) >> ExprInt64(0x20), + ExprCompose(a, ExprInt32(0))), + (ExprCompose(ExprInt32(0), a) >> ExprInt64(0x10), + ExprCompose(ExprInt16(0), a, ExprInt16(0))), + (ExprCompose(ExprInt32(0), a) >> ExprInt64(0x30), + ExprCompose(a[0x10:], ExprInt(0, 48))), + (ExprCompose(ExprInt32(0), a) >> ExprInt64(0x11), + ExprCompose(ExprInt(0, 0xf), a, ExprInt(0, 0x11))), + (ExprCompose(ExprInt32(0), a) >> ExprInt64(0x40), ExprInt64(0)), - (ExprCompose([(ExprInt32(0), 0, 32), (a, 32, 64)]) >> ExprInt64(0x50), + (ExprCompose(ExprInt32(0), a) >> ExprInt64(0x50), ExprInt64(0)), - (ExprCompose([(a, 0, 32), (b, 32, 64)]) << ExprInt64(0x20), - ExprCompose([(ExprInt32(0), 0, 32), (a, 32, 64)])), - (ExprCompose([(a, 0, 32), (b, 32, 64)]) << ExprInt64(0x10), - ExprCompose([(ExprInt16(0), 0, 16), (a, 16, 48), (b[:16], 48, 64)])), + (ExprCompose(a, b) << ExprInt64(0x20), + ExprCompose(ExprInt32(0), a)), + (ExprCompose(a, b) << ExprInt64(0x10), + ExprCompose(ExprInt16(0), a, b[:16])), - (ExprCompose([(a, 0, 32), (b, 32, 64)]) | ExprCompose([(c, 0, 32), (d, 32, 64)]), - ExprCompose([(a|c, 0, 32), (b|d, 32, 64)])), - (ExprCompose([(a, 0, 32), (ExprInt32(0), 32, 64)]) | ExprCompose([(ExprInt32(0), 0, 32), (d, 32, 64)]), - ExprCompose([(a, 0, 32), (d, 32, 64)])), - (ExprCompose([(f[:32], 0, 32), (ExprInt32(0), 32, 64)]) | ExprCompose([(ExprInt32(0), 0, 32), (f[32:], 32, 64)]), + (ExprCompose(a, b) | ExprCompose(c, d), + ExprCompose(a|c, b|d)), + (ExprCompose(a, ExprInt32(0)) | ExprCompose(ExprInt32(0), d), + ExprCompose(a, d)), + (ExprCompose(f[:32], ExprInt32(0)) | ExprCompose(ExprInt32(0), f[32:]), f), - ((ExprCompose([(a, 0, 32), (ExprInt32(0), 32, 64)]) * ExprInt64(0x123))[32:64], - (ExprCompose([(a, 0, 32), (ExprInt32(0), 32, 64)]) * ExprInt64(0x123))[32:64]), + ((ExprCompose(a, ExprInt32(0)) * ExprInt64(0x123))[32:64], + (ExprCompose(a, ExprInt32(0)) * ExprInt64(0x123))[32:64]), (ExprInt32(0x12), ExprInt32(0x12L)), - (ExprCompose(((a, 0, 32), (b, 32, 64), (c, 64, 96)))[:16], + (ExprCompose(a, b, c)[:16], a[:16]), - (ExprCompose(((a, 0, 32), (b, 32, 64), (c, 64, 96)))[16:32], + (ExprCompose(a, b, c)[16:32], a[16:]), - (ExprCompose(((a, 0, 32), (b, 32, 64), (c, 64, 96)))[32:48], + (ExprCompose(a, b, c)[32:48], b[:16]), - (ExprCompose(((a, 0, 32), (b, 32, 64), (c, 64, 96)))[48:64], + (ExprCompose(a, b, c)[48:64], b[16:]), - (ExprCompose(((a, 0, 32), (b, 32, 64), (c, 64, 96)))[64:80], + (ExprCompose(a, b, c)[64:80], c[:16]), - (ExprCompose(((a, 0, 32), (b, 32, 64), (c, 64, 96)))[80:], + (ExprCompose(a, b, c)[80:], c[16:]), - (ExprCompose(((a, 0, 32), (b, 32, 64), (c, 64, 96)))[80:82], + (ExprCompose(a, b, c)[80:82], c[16:18]), - (ExprCompose(((a, 0, 32), (b, 32, 64), (c, 64, 96)))[16:48], - ExprCompose(((a[16:], 0, 16), (b[:16], 16, 32)))), - (ExprCompose(((a, 0, 32), (b, 32, 64), (c, 64, 96)))[48:80], - ExprCompose(((b[16:], 0, 16), (c[:16], 16, 32)))), - - (ExprCompose(((a[0:8], 0, 8), - (b[8:16], 8, 16), - (ExprInt(uint48(0x0L)), 16, 64)))[12:32], - ExprCompose(((b[12:16], 0, 4), (ExprInt(uint16(0)), 4, 20))) + (ExprCompose(a, b, c)[16:48], + ExprCompose(a[16:], b[:16])), + (ExprCompose(a, b, c)[48:80], + ExprCompose(b[16:], c[:16])), + + (ExprCompose(a[0:8], b[8:16], ExprInt(uint48(0x0L)))[12:32], + ExprCompose(b[12:16], ExprInt(uint16(0))) ), - (ExprCompose(((ExprCompose(((a[:8], 0, 8), - (ExprInt(uint56(0x0L)), 8, 64)))[8:32] - & - ExprInt(uint24(0x1L)), 0, 24), - (ExprInt(uint40(0x0L)), 24, 64))), + (ExprCompose(ExprCompose(a[:8], ExprInt(uint56(0x0L)))[8:32] + & + ExprInt(uint24(0x1L)), + ExprInt(uint40(0x0L))), ExprInt64(0)), - (ExprCompose(((ExprCompose(((a[:8], 0, 8), - (ExprInt(uint56(0x0L)), 8, 64)))[:8] - & - ExprInt(uint8(0x1L)), 0, 8), - (ExprInt(uint56(0x0L)), 8, 64))), - ExprCompose(((a[:8]&ExprInt8(1), 0, 8), (ExprInt(uint56(0)), 8, 64)))), - - (ExprCompose(((ExprCompose(((a[:8], 0, 8), - (ExprInt(uint56(0x0L)), 8, 64)))[:32] - & - ExprInt(uint32(0x1L)), 0, 32), - (ExprInt(uint32(0x0L)), 32, 64))), - ExprCompose(((ExprCompose(((ExprSlice(a, 0, 8), 0, 8), - (ExprInt(uint24(0x0L)), 8, 32))) - & - ExprInt(uint32(0x1L)), 0, 32), - (ExprInt(uint32(0x0L)), 32, 64))) + (ExprCompose(ExprCompose(a[:8], ExprInt(uint56(0x0L)))[:8] + & + ExprInt(uint8(0x1L)), + (ExprInt(uint56(0x0L)))), + ExprCompose(a[:8]&ExprInt8(1), ExprInt(uint56(0)))), + + (ExprCompose(ExprCompose(a[:8], + ExprInt(uint56(0x0L)))[:32] + & + ExprInt(uint32(0x1L)), + ExprInt(uint32(0x0L))), + ExprCompose(ExprCompose(ExprSlice(a, 0, 8), + ExprInt(uint24(0x0L))) + & + ExprInt(uint32(0x1L)), + ExprInt(uint32(0x0L))) ), - (ExprCompose([(a[:16], 0, 16), (b[:16], 16, 32)])[8:32], - ExprCompose([(a[8:16], 0, 8), (b[:16], 8, 24)])), + (ExprCompose(a[:16], b[:16])[8:32], + ExprCompose(a[8:16], b[:16])), ((a >> ExprInt32(16))[:16], a[16:32]), ((a >> ExprInt32(16))[8:16], @@ -410,10 +402,10 @@ match_tests = [ (MatchExpr(ExprCond(x, y, z), ExprCond(a, b, c), [a, b, c]), {a: x, b: y, c: z}), - (MatchExpr(ExprCompose([(x[:8], 0, 8), (y[:8], 8, 16)]), a, [a]), - {a: ExprCompose([(x[:8], 0, 8), (y[:8], 8, 16)])}), - (MatchExpr(ExprCompose([(x[:8], 0, 8), (y[:8], 8, 16)]), - ExprCompose([(a[:8], 0, 8), (b[:8], 8, 16)]), [a, b]), + (MatchExpr(ExprCompose(x[:8], y[:8]), a, [a]), + {a: ExprCompose(x[:8], y[:8])}), + (MatchExpr(ExprCompose(x[:8], y[:8]), + ExprCompose(a[:8], b[:8]), [a, b]), {a: x, b: y}), (MatchExpr(e1, e2, [b]), {b: ExprInt32(0x10)}), (MatchExpr(e3, diff --git a/test/ir/symbexec.py b/test/ir/symbexec.py index 19cc47db..6df0bbc3 100644 --- a/test/ir/symbexec.py +++ b/test/ir/symbexec.py @@ -43,12 +43,12 @@ class TestSymbExec(unittest.TestCase): self.assertEqual(e.eval_expr(ExprMem(addr1 - addr1)), id_x) self.assertEqual(e.eval_expr(ExprMem(addr1, 8)), id_y) self.assertEqual(e.eval_expr(ExprMem(addr1 + addr1)), ExprCompose( - [(id_x[16:32], 0, 16), (ExprMem(ExprInt32(4), 16), 16, 32)])) + id_x[16:32], ExprMem(ExprInt32(4), 16))) self.assertEqual(e.eval_expr(mem8), ExprCompose( - [(id_x[0:24], 0, 24), (ExprMem(ExprInt32(11), 8), 24, 32)])) + id_x[0:24], ExprMem(ExprInt32(11), 8))) self.assertEqual(e.eval_expr(mem40v), id_x[:8]) self.assertEqual(e.eval_expr(mem50w), ExprCompose( - [(id_y, 0, 8), (ExprMem(ExprInt32(51), 8), 8, 16)])) + id_y, ExprMem(ExprInt32(51), 8))) self.assertEqual(e.eval_expr(mem20), mem20) e.func_read = lambda x: x self.assertEqual(e.eval_expr(mem20), mem20) diff --git a/test/ir/translators/z3_ir.py b/test/ir/translators/z3_ir.py index e080c7f5..5fcfe25e 100644 --- a/test/ir/translators/z3_ir.py +++ b/test/ir/translators/z3_ir.py @@ -114,7 +114,7 @@ check_interp(model[memb.get_mem_array(32)], [(0xdeadbeef, 0), (0xdeadbeef + 3, 2)]) # -------------------------------------------------------------------------- -e5 = ExprSlice(ExprCompose(((e, 0, 32), (four, 32, 64))), 0, 32) * five +e5 = ExprSlice(ExprCompose(e, four), 0, 32) * five ez3 = Translator.to_language('z3').from_expr(e5) z3_e5 = z3.Extract(31, 0, z3.Concat(z3_four, z3_e)) * z3_five -- cgit 1.4.1 From 05bdb3651796525100a5cbe160e2f8ad93c80316 Mon Sep 17 00:00:00 2001 From: Fabrice Desclaux Date: Fri, 21 Oct 2016 23:35:25 +0200 Subject: ExprCompose: update api --- miasm2/core/sembuilder.py | 15 ++---------- miasm2/expression/expression.py | 20 +++++++++------ miasm2/expression/expression_helper.py | 22 ++++++----------- miasm2/expression/simplifications_common.py | 38 +++++++++++++++++++++++------ miasm2/ir/ir.py | 10 ++++++-- miasm2/ir/symbexec.py | 11 ++++++--- 6 files changed, 69 insertions(+), 47 deletions(-) (limited to 'miasm2/expression/expression.py') diff --git a/miasm2/core/sembuilder.py b/miasm2/core/sembuilder.py index 27401049..6ff390bb 100644 --- a/miasm2/core/sembuilder.py +++ b/miasm2/core/sembuilder.py @@ -95,27 +95,16 @@ class MiasmTransformer(ast.NodeTransformer): return call def visit_Set(self, node): - "{a, b} -> ExprCompose([(a, 0, a.size)], (b, a.size, a.size + b.size)])" + "{a, b} -> ExprCompose(a, b)" if len(node.elts) == 0: return node # Recursive visit node = self.generic_visit(node) - new_elts = [] - index = ast.Num(n=0) - for elt in node.elts: - new_index = ast.BinOp(op=ast.Add(), left=index, - right=ast.Attribute(value=elt, - attr='size', - ctx=ast.Load())) - new_elts.append(ast.Tuple(elts=[elt, index, new_index], - ctx=ast.Load())) - index = new_index return ast.Call(func=ast.Name(id='ExprCompose', ctx=ast.Load()), - args=[ast.List(elts=new_elts, - ctx=ast.Load())], + args=node.elts, keywords=[], starargs=None, kwargs=None) diff --git a/miasm2/expression/expression.py b/miasm2/expression/expression.py index d18bc751..cb9f6114 100644 --- a/miasm2/expression/expression.py +++ b/miasm2/expression/expression.py @@ -289,7 +289,10 @@ class Expr(object): else: new_e = e elif isinstance(e, ExprCompose): - new_e = ExprCompose(canonize_expr_list_compose(e.args)) + starts = [start for (_, start, _) in e.args] + assert sorted(starts) == starts + assert len(set(starts)) == len(starts) + new_e = e else: new_e = e new_e.is_canon = True @@ -311,8 +314,7 @@ class Expr(object): return self ad_size = size - self.size n = ExprInt(0, ad_size) - return ExprCompose([(self, 0, self.size), - (n, self.size, size)]) + return ExprCompose(self, n) def signExtend(self, size): """Sign extend to size @@ -542,7 +544,8 @@ class ExprAff(Expr): for r in dst.slice_rest()] all_a = [(src, dst.start, dst.stop)] + rest all_a.sort(key=lambda x: x[1]) - self.__src = ExprCompose(all_a) + args = [expr for (expr, _, _) in all_a] + self.__src = ExprCompose(*args) else: self.__dst, self.__src = dst, src @@ -1078,6 +1081,8 @@ class ExprCompose(Expr): index += arg.size args = new_args else: + warnings.warn('DEPRECATION WARNING: use "ExprCompose(a, b) instead of'+ + 'ExprCemul_ir_block(self, addr, step=False)" instead of emul_ir_bloc') assert len(args) == 1 args = args[0] @@ -1153,12 +1158,13 @@ class ExprCompose(Expr): args = [(arg[0].visit(cb, tv), arg[1], arg[2]) for arg in self.__args] modified = any([arg[0] != arg[1] for arg in zip(self.__args, args)]) if modified: - return ExprCompose(args) + args = [expr for (expr, _, _) in args] + return ExprCompose(*args) return self def copy(self): - args = [(arg[0].copy(), arg[1], arg[2]) for arg in self.__args] - return ExprCompose(args) + args = [arg[0].copy() for arg in self.__args] + return ExprCompose(*args) def depth(self): depth = [arg[0].depth() for arg in self.__args] diff --git a/miasm2/expression/expression_helper.py b/miasm2/expression/expression_helper.py index 178ee25f..e9176658 100644 --- a/miasm2/expression/expression_helper.py +++ b/miasm2/expression/expression_helper.py @@ -449,21 +449,19 @@ class ExprRandom(object): """ # First layer upper_bound = random.randint(1, size) - args = [(cls._gen(size=upper_bound, depth=depth - 1), 0, upper_bound)] + args = [cls._gen(size=upper_bound, depth=depth - 1)] # Next layers while (upper_bound < size): if len(args) == (cls.compose_max_layer - 1): # We reach the maximum size - upper_bound = size + new_upper_bound = size else: - upper_bound = random.randint(args[-1][-1] + 1, size) + new_upper_bound = random.randint(upper_bound + 1, size) - args.append((cls._gen(size=upper_bound - args[-1][-1]), - args[-1][-1], - upper_bound)) - - return m2_expr.ExprCompose(args) + args.append(cls._gen(size=new_upper_bound - upper_bound)) + upper_bound = new_upper_bound + return m2_expr.ExprCompose(*args) @classmethod def memory(cls, size=32, depth=1): @@ -656,14 +654,10 @@ def possible_values(expr): args_constraint = itertools.chain(*[consval[0].constraints for consval in consvals_possibility]) # Gen the corresponding constraints / ExprCompose + args = [consval[0].value for consval in consvals_possibility] consvals.add( ConstrainedValue(frozenset(args_constraint), - m2_expr.ExprCompose( - [(consval[0].value, - consval[1], - consval[2]) - for consval in consvals_possibility] - ))) + m2_expr.ExprCompose(*args))) else: raise RuntimeError("Unsupported type for expr: %s" % type(expr)) diff --git a/miasm2/expression/simplifications_common.py b/miasm2/expression/simplifications_common.py index 49dfbcc0..4b88f8c2 100644 --- a/miasm2/expression/simplifications_common.py +++ b/miasm2/expression/simplifications_common.py @@ -286,7 +286,11 @@ def simp_cst_propagation(e_s, e): # create entry 0 expr = ExprInt(0, min_index) filter_args = [(expr, 0, min_index)] + filter_args - return ExprCompose(filter_args) + filter_args.sort(key=lambda x:x[1]) + starts = [start for (_, start, _) in filter_args] + assert len(set(starts)) == len(starts) + args = [expr for (expr, _, _) in filter_args] + return ExprCompose(*args) # A >> int with A ExprCompose => move index if op == ">>" and isinstance(args[0], ExprCompose) and isinstance(args[1], ExprInt): @@ -310,7 +314,11 @@ def simp_cst_propagation(e_s, e): # create entry 0 expr = ExprInt(0, final_size - max_index) filter_args += [(expr, max_index, final_size)] - return ExprCompose(filter_args) + filter_args.sort(key=lambda x:x[1]) + starts = [start for (_, start, _) in filter_args] + assert len(set(starts)) == len(starts) + args = [expr for (expr, _, _) in filter_args] + return ExprCompose(*args) # Compose(a) OP Compose(b) with a/b same bounds => Compose(a OP b) @@ -327,7 +335,13 @@ def simp_cst_propagation(e_s, e): new_args[i].append(expr) for i, arg in enumerate(new_args): new_args[i] = ExprOp(op, *arg), bound[i][0], bound[i][1] - return ExprCompose(new_args) + + new_args.sort(key=lambda x:x[1]) + starts = [start for (_, start, _) in new_args] + assert len(set(starts)) == len(starts) + args = [expr for (expr, _, _) in new_args] + + return ExprCompose(*args) # <<>>c_rez if op in [">>>c_rez", "<< ExprMem(x, a) # XXXX todo hum, is it safe? @@ -590,11 +610,15 @@ def simp_compose(e_s, e): else: src1.append(a) src2.append(a) - src1 = e_s.apply_simp(ExprCompose(src1)) - src2 = e_s.apply_simp(ExprCompose(src2)) + src1 = [expr for (expr, _, _) in src1] + src2 = [expr for (expr, _, _) in src2] + src1 = e_s.apply_simp(ExprCompose(*src1)) + src2 = e_s.apply_simp(ExprCompose(*src2)) if isinstance(src1, ExprInt) and isinstance(src2, ExprInt): return ExprCond(cond.cond, src1, src2) - return ExprCompose(args) + args.sort(key=lambda x:x[1]) + args = [expr for (expr, _, _) in args] + return ExprCompose(*args) def simp_cond(e_s, e): diff --git a/miasm2/ir/ir.py b/miasm2/ir/ir.py index 3a841fa5..2c6300a9 100644 --- a/miasm2/ir/ir.py +++ b/miasm2/ir/ir.py @@ -59,7 +59,8 @@ class AssignBlock(dict): for r in dst.slice_rest()] all_a = [(src, dst.start, dst.stop)] + rest all_a.sort(key=lambda x: x[1]) - new_src = m2_expr.ExprCompose(all_a) + args = [expr for (expr, _, _) in all_a] + new_src = m2_expr.ExprCompose(*args) else: new_dst, new_src = dst, src @@ -95,7 +96,12 @@ class AssignBlock(dict): for interval in missing_i) # Build the merging expression - new_src = m2_expr.ExprCompose(e_colision.union(remaining)) + args = list(e_colision.union(remaining)) + args.sort(key=lambda x:x[1]) + starts = [start for (_, start, _) in args] + assert len(set(starts)) == len(starts) + args = [expr for (expr, _, _) in args] + new_src = m2_expr.ExprCompose(*args) super(AssignBlock, self).__setitem__(new_dst, new_src) diff --git a/miasm2/ir/symbexec.py b/miasm2/ir/symbexec.py index 2a0b19ca..65515c64 100644 --- a/miasm2/ir/symbexec.py +++ b/miasm2/ir/symbexec.py @@ -148,7 +148,8 @@ class symbexec(object): mem = m2_expr.ExprMem(ptr, slice_stop - slice_start) out.append((mem, slice_start, slice_stop)) out.sort(key=lambda x: x[1]) - tmp = m2_expr.ExprSlice(m2_expr.ExprCompose(out), 0, size) + args = [expr for (expr, _, _) in out] + tmp = m2_expr.ExprSlice(m2_expr.ExprCompose(*args), 0, size) tmp = self.expr_simp(tmp) return tmp @@ -179,7 +180,9 @@ class symbexec(object): ptr_index += diff_size rest -= diff_size ptr = self.expr_simp(ptr + m2_expr.ExprInt(mem.size / 8, ptr.size)) - ret = self.expr_simp(m2_expr.ExprCompose(out)) + out.sort(key=lambda x: x[1]) + args = [expr for (expr, _, _) in out] + ret = self.expr_simp(m2_expr.ExprCompose(*args)) return ret # part lookup ret = self.expr_simp(self.symbols[ret][:size]) @@ -228,8 +231,8 @@ class symbexec(object): args = [] for (arg, start, stop) in expr.args: arg = self.apply_expr_on_state_visit_cache(arg, state, cache, level+1) - args.append((arg, start, stop)) - ret = m2_expr.ExprCompose(args) + args.append(arg) + ret = m2_expr.ExprCompose(*args) else: raise TypeError("Unknown expr type") #print '\t'*level, "Result", ret -- cgit 1.4.1 From f0fbc59a663774dc4f4861308bee3f91ccd9746d Mon Sep 17 00:00:00 2001 From: Fabrice Desclaux Date: Sat, 22 Oct 2016 23:57:43 +0200 Subject: ExprCompose: updt internal struct --- miasm2/expression/expression.py | 92 ++++----------- miasm2/expression/expression_helper.py | 166 ++++++++++++---------------- miasm2/expression/simplifications.py | 1 + miasm2/expression/simplifications_common.py | 149 ++++++++++--------------- miasm2/ir/ir.py | 15 ++- miasm2/ir/symbexec.py | 6 +- miasm2/ir/translators/C.py | 8 +- miasm2/ir/translators/miasm.py | 3 +- miasm2/ir/translators/python.py | 8 +- miasm2/ir/translators/smt2.py | 6 +- miasm2/ir/translators/z3_ir.py | 6 +- 11 files changed, 174 insertions(+), 286 deletions(-) (limited to 'miasm2/expression/expression.py') diff --git a/miasm2/expression/expression.py b/miasm2/expression/expression.py index cb9f6114..324d5fea 100644 --- a/miasm2/expression/expression.py +++ b/miasm2/expression/expression.py @@ -288,11 +288,6 @@ class Expr(object): new_e = ExprOp(e.op, *args) else: new_e = e - elif isinstance(e, ExprCompose): - starts = [start for (_, start, _) in e.args] - assert sorted(starts) == starts - assert len(set(starts)) == len(starts) - new_e = e else: new_e = e new_e.is_canon = True @@ -591,24 +586,6 @@ class ExprAff(Expr): self.__src.__contains__(expr) or self.__dst.__contains__(expr)) - # XXX /!\ for hackish expraff to slice - def get_modified_slice(self): - """Return an Expr list of extra expressions needed during the - object instanciation""" - - dst = self.__dst - if not isinstance(self.__src, ExprCompose): - raise ValueError("Get mod slice not on expraff slice", str(self)) - modified_s = [] - for arg in self.__src.args: - if (not isinstance(arg[0], ExprSlice) or - arg[0].arg != dst or - arg[1] != arg[0].start or - arg[2] != arg[0].stop): - # If x is not the initial expression - modified_s.append(arg) - return modified_s - @visit_chk def visit(self, cb, tv=None): dst, src = self.__dst.visit(cb, tv), self.__src.visit(cb, tv) @@ -1073,40 +1050,12 @@ class ExprCompose(Expr): super(ExprCompose, self).__init__() is_new_style = args and isinstance(args[0], Expr) - if is_new_style: - new_args = [] - index = 0 - for arg in args: - new_args.append((arg, index, index + arg.size)) - index += arg.size - args = new_args - else: + if not is_new_style: warnings.warn('DEPRECATION WARNING: use "ExprCompose(a, b) instead of'+ 'ExprCemul_ir_block(self, addr, step=False)" instead of emul_ir_bloc') - assert len(args) == 1 - args = args[0] - last_stop = 0 - args = sorted(args, key=itemgetter(1)) - for e, start, stop in args: - if e.size != stop - start: - raise ValueError( - "sanitycheck: ExprCompose args must have correct size!" + - " %r %r %r" % (e, e.size, stop - start)) - if last_stop != start: - raise ValueError( - "sanitycheck: ExprCompose args must be contiguous!" + - " %r" % (args)) - last_stop = stop - - # Transform args to lists - o = [] - for e, a, b in args: - assert(a >= 0 and b >= 0) - o.append(tuple([e, a, b])) - self.__args = tuple(o) - - self.__size = self.__args[-1][2] + self.__args = tuple(args) + self.__size = sum([arg.size for arg in args]) size = property(lambda self: self.__size) args = property(lambda self: self.__args) @@ -1125,19 +1074,18 @@ class ExprCompose(Expr): return Expr.get_object(cls, tuple(args)) def __str__(self): - return '{' + ', '.join([str(arg[0]) for arg in self.__args]) + '}' + return '{' + ', '.join([str(arg) for arg in self.__args]) + '}' def get_r(self, mem_read=False, cst_read=False): return reduce(lambda elements, arg: - elements.union(arg[0].get_r(mem_read, cst_read)), self.__args, set()) + elements.union(arg.get_r(mem_read, cst_read)), self.__args, set()) def get_w(self): return reduce(lambda elements, arg: - elements.union(arg[0].get_w()), self.__args, set()) + elements.union(arg.get_w()), self.__args, set()) def _exprhash(self): - h_args = [EXPRCOMPOSE] + [(hash(arg[0]), arg[1], arg[2]) - for arg in self.__args] + h_args = [EXPRCOMPOSE] + [hash(arg) for arg in self.__args] return hash(tuple(h_args)) def _exprrepr(self): @@ -1149,33 +1097,37 @@ class ExprCompose(Expr): for arg in self.__args: if arg == e: return True - if arg[0].__contains__(e): + if arg.__contains__(e): return True return False @visit_chk def visit(self, cb, tv=None): - args = [(arg[0].visit(cb, tv), arg[1], arg[2]) for arg in self.__args] - modified = any([arg[0] != arg[1] for arg in zip(self.__args, args)]) + args = [arg.visit(cb, tv) for arg in self.__args] + modified = any([arg != arg_new for arg, arg_new in zip(self.__args, args)]) if modified: - args = [expr for (expr, _, _) in args] return ExprCompose(*args) return self def copy(self): - args = [arg[0].copy() for arg in self.__args] + args = [arg.copy() for arg in self.__args] return ExprCompose(*args) def depth(self): - depth = [arg[0].depth() for arg in self.__args] + depth = [arg.depth() for arg in self.__args] return max(depth) + 1 def graph_recursive(self, graph): graph.add_node(self) for arg in self.args: - arg[0].graph_recursive(graph) - graph.add_uniq_edge(self, arg[0]) + arg.graph_recursive(graph) + graph.add_uniq_edge(self, arg) + def iter_args(self): + index = 0 + for arg in self.__args: + yield index, arg + index += arg.size # Expression order for comparaison expr_order_dict = {ExprId: 1, @@ -1203,7 +1155,7 @@ def compare_exprs_compose(e1, e2): def compare_expr_list_compose(l1_e, l2_e): # Sort by list elements in incremental order, then by list size for i in xrange(min(len(l1_e), len(l2_e))): - x = compare_exprs_compose(l1_e[i], l2_e[i]) + x = compare_exprs(l1_e[i], l2_e[i]) if x: return x return cmp(len(l1_e), len(l2_e)) @@ -1434,9 +1386,7 @@ def MatchExpr(e, m, tks, result=None): if not isinstance(m, ExprCompose): return False for a1, a2 in zip(e.args, m.args): - if a1[1] != a2[1] or a1[2] != a2[2]: - return False - r = MatchExpr(a1[0], a2[0], tks, result) + r = MatchExpr(a1, a2, tks, result) if r is False: return False return result diff --git a/miasm2/expression/expression_helper.py b/miasm2/expression/expression_helper.py index e9176658..8babba70 100644 --- a/miasm2/expression/expression_helper.py +++ b/miasm2/expression/expression_helper.py @@ -34,103 +34,76 @@ def parity(a): return cpt -def merge_sliceto_slice(args): - sources = {} - non_slice = {} - sources_int = {} - for a in args: - if isinstance(a[0], m2_expr.ExprInt): - # sources_int[a.start] = a - # copy ExprInt because we will inplace modify arg just below - # /!\ TODO XXX never ever modify inplace args... - sources_int[a[1]] = (m2_expr.ExprInt(int(a[0]), - a[2] - a[1]), - a[1], - a[2]) - elif isinstance(a[0], m2_expr.ExprSlice): - if not a[0].arg in sources: - sources[a[0].arg] = [] - sources[a[0].arg].append(a) +def merge_sliceto_slice(expr): + """ + Apply basic factorisation on ExprCompose sub compoenents + @expr: ExprCompose + """ + + slices_raw = [] + other_raw = [] + integers_raw = [] + for index, arg in expr.iter_args(): + if isinstance(arg, m2_expr.ExprInt): + integers_raw.append((index, arg)) + elif isinstance(arg, m2_expr.ExprSlice): + slices_raw.append((index, arg)) else: - non_slice[a[1]] = a - # find max stop to determine size - max_size = None - for a in args: - if max_size is None or max_size < a[2]: - max_size = a[2] - - # first simplify all num slices - final_sources = [] - sorted_s = [] - for x in sources_int.values(): - x = list(x) - # mask int - v = x[0].arg & ((1 << (x[2] - x[1])) - 1) - x[0] = m2_expr.ExprInt_from(x[0], v) - x = tuple(x) - sorted_s.append((x[1], x)) - sorted_s.sort() - while sorted_s: - start, v = sorted_s.pop() - out = [m2_expr.ExprInt(v[0].arg), v[1], v[2]] - size = v[2] - v[1] - while sorted_s: - if sorted_s[-1][1][2] != start: + other_raw.append((index, arg)) + + # Find max stop to determine size + max_size = sum([arg.size for arg in expr.args]) + + integers_merged = [] + # Merge consecutive integers + while integers_raw: + index, arg = integers_raw.pop() + new_size = arg.size + value = int(arg) + while integers_raw: + prev_index, prev_value = integers_raw[-1] + # Check if intergers are consecutive + if prev_index + prev_value.size != index: break - s_start, s_stop = sorted_s[-1][1][1], sorted_s[-1][1][2] - size += s_stop - s_start - a = m2_expr.mod_size2uint[size]( - (int(out[0]) << (out[1] - s_start)) + - int(sorted_s[-1][1][0])) - out[0] = m2_expr.ExprInt(a) - sorted_s.pop() - out[1] = s_start - out[0] = m2_expr.ExprInt(int(out[0]), size) - final_sources.append((start, tuple(out))) - - final_sources_int = final_sources - # check if same sources have corresponding start/stop - # is slice AND is sliceto - simp_sources = [] - for args in sources.values(): - final_sources = [] - sorted_s = [] - for x in args: - sorted_s.append((x[1], x)) - sorted_s.sort() - while sorted_s: - start, v = sorted_s.pop() - ee = v[0].arg[v[0].start:v[0].stop] - out = ee, v[1], v[2] - while sorted_s: - if sorted_s[-1][1][2] != start: - break - if sorted_s[-1][1][0].stop != out[0].start: - break - - start = sorted_s[-1][1][1] - # out[0].start = sorted_s[-1][1][0].start - o_e, _, o_stop = out - o1, o2 = sorted_s[-1][1][0].start, o_e.stop - o_e = o_e.arg[o1:o2] - out = o_e, start, o_stop - # update _size - # out[0]._size = out[0].stop-out[0].start - sorted_s.pop() - out = out[0], start, out[2] - - final_sources.append((start, out)) + # Merge integers + index = prev_index + new_size += prev_value.size + value = value << prev_value.size + value |= int(prev_value) + integers_raw.pop() + integers_merged.append((index, m2_expr.ExprInt(value, new_size))) + + + slices_merged = [] + # Merge consecutive slices + while slices_raw: + index, arg = slices_raw.pop() + value, slice_start, slice_stop = arg.arg, arg.start, arg.stop + while slices_raw: + prev_index, prev_value = slices_raw[-1] + # Check if slices are consecutive + if prev_index + prev_value.size != index: + break + # Check if slices can ben merged + if prev_value.arg != value: + break + if prev_value.stop != slice_start: + break + # Merge slices + index = prev_index + slice_start = prev_value.start + slices_raw.pop() + slices_merged.append((index, value[slice_start:slice_stop])) - simp_sources += final_sources - simp_sources += final_sources_int + new_args = slices_merged + integers_merged + other_raw + new_args.sort() + for i, (index, arg) in enumerate(new_args[:-1]): + assert index + arg.size == new_args[i+1][0] + ret = [arg[1] for arg in new_args] - for i, v in non_slice.items(): - simp_sources.append((i, v)) + return ret - simp_sources.sort() - simp_sources = [x[1] for x in simp_sources] - return simp_sources op_propag_cst = ['+', '*', '^', '&', '|', '>>', @@ -327,8 +300,8 @@ class Variables_Identifier(object): self.find_variables_rec(expr.arg) elif isinstance(expr, m2_expr.ExprCompose): - for a in expr.args: - self.find_variables_rec(list(a)[0]) + for arg in expr.args: + self.find_variables_rec(arg) elif isinstance(expr, m2_expr.ExprSlice): self.find_variables_rec(expr.arg) @@ -646,15 +619,14 @@ def possible_values(expr): elif isinstance(expr, m2_expr.ExprCompose): # Generate each possibility for sub-argument, associated with the start # and stop bit - consvals_args = [map(lambda x: (x, arg[1], arg[2]), - possible_values(arg[0])) + consvals_args = [map(lambda x: x, possible_values(arg)) for arg in expr.args] for consvals_possibility in itertools.product(*consvals_args): # Merge constraint of each sub-element - args_constraint = itertools.chain(*[consval[0].constraints + args_constraint = itertools.chain(*[consval.constraints for consval in consvals_possibility]) # Gen the corresponding constraints / ExprCompose - args = [consval[0].value for consval in consvals_possibility] + args = [consval.value for consval in consvals_possibility] consvals.add( ConstrainedValue(frozenset(args_constraint), m2_expr.ExprCompose(*args))) diff --git a/miasm2/expression/simplifications.py b/miasm2/expression/simplifications.py index 069be197..dd4f5c04 100644 --- a/miasm2/expression/simplifications.py +++ b/miasm2/expression/simplifications.py @@ -96,6 +96,7 @@ class ExpressionSimplifier(object): self.simplified_exprs.add(expression) # Mark expression as simplified self.simplified_exprs.add(e_new) + return e_new def expr_simp_wrapper(self, expression, callback=None): diff --git a/miasm2/expression/simplifications_common.py b/miasm2/expression/simplifications_common.py index 4b88f8c2..a070fb81 100644 --- a/miasm2/expression/simplifications_common.py +++ b/miasm2/expression/simplifications_common.py @@ -265,13 +265,14 @@ def simp_cst_propagation(e_s, e): args = new_args # A << int with A ExprCompose => move index - if op == "<<" and isinstance(args[0], ExprCompose) and isinstance(args[1], ExprInt): + if (op == "<<" and isinstance(args[0], ExprCompose) and + isinstance(args[1], ExprInt) and int(args[1]) != 0): final_size = args[0].size shift = int(args[1]) new_args = [] # shift indexes - for expr, start, stop in args[0].args: - new_args.append((expr, start+shift, stop+shift)) + for index, arg in args[0].iter_args(): + new_args.append((arg, index+shift, index+shift+arg.size)) # filter out expression filter_args = [] min_index = final_size @@ -281,15 +282,12 @@ def simp_cst_propagation(e_s, e): if stop > final_size: expr = expr[:expr.size - (stop - final_size)] stop = final_size - filter_args.append((expr, start, stop)) + filter_args.append(expr) min_index = min(start, min_index) # create entry 0 + assert min_index != 0 expr = ExprInt(0, min_index) - filter_args = [(expr, 0, min_index)] + filter_args - filter_args.sort(key=lambda x:x[1]) - starts = [start for (_, start, _) in filter_args] - assert len(set(starts)) == len(starts) - args = [expr for (expr, _, _) in filter_args] + args = [expr] + filter_args return ExprCompose(*args) # A >> int with A ExprCompose => move index @@ -298,8 +296,8 @@ def simp_cst_propagation(e_s, e): shift = int(args[1]) new_args = [] # shift indexes - for expr, start, stop in args[0].args: - new_args.append((expr, start-shift, stop-shift)) + for index, arg in args[0].iter_args(): + new_args.append((arg, index-shift, index+arg.size-shift)) # filter out expression filter_args = [] max_index = 0 @@ -309,15 +307,11 @@ def simp_cst_propagation(e_s, e): if start < 0: expr = expr[-start:] start = 0 - filter_args.append((expr, start, stop)) + filter_args.append(expr) max_index = max(stop, max_index) # create entry 0 expr = ExprInt(0, final_size - max_index) - filter_args += [(expr, max_index, final_size)] - filter_args.sort(key=lambda x:x[1]) - starts = [start for (_, start, _) in filter_args] - assert len(set(starts)) == len(starts) - args = [expr for (expr, _, _) in filter_args] + args = filter_args + [expr] return ExprCompose(*args) @@ -325,22 +319,17 @@ def simp_cst_propagation(e_s, e): if op in ['|', '&', '^'] and all([isinstance(arg, ExprCompose) for arg in args]): bounds = set() for arg in args: - bound = tuple([(start, stop) for (expr, start, stop) in arg.args]) + bound = tuple([expr.size for expr in arg.args]) bounds.add(bound) if len(bounds) == 1: bound = list(bounds)[0] - new_args = [[expr] for (expr, start, stop) in args[0].args] + new_args = [[expr] for expr in args[0].args] for sub_arg in args[1:]: - for i, (expr, start, stop) in enumerate(sub_arg.args): + for i, expr in enumerate(sub_arg.args): new_args[i].append(expr) + args = [] for i, arg in enumerate(new_args): - new_args[i] = ExprOp(op, *arg), bound[i][0], bound[i][1] - - new_args.sort(key=lambda x:x[1]) - starts = [start for (_, start, _) in new_args] - assert len(set(starts)) == len(starts) - args = [expr for (expr, _, _) in new_args] - + args.append(ExprOp(op, *arg)) return ExprCompose(*args) # <<>>c_rez @@ -462,46 +451,41 @@ def simp_slice(e_s, e): return new_e elif isinstance(e.arg, ExprCompose): # Slice(Compose(A), x) => Slice(A, y) - for a in e.arg.args: - if a[1] <= e.start and a[2] >= e.stop: - new_e = a[0][e.start - a[1]:e.stop - a[1]] + for index, arg in e.arg.iter_args(): + if index <= e.start and index+arg.size >= e.stop: + new_e = arg[e.start - index:e.stop - index] return new_e # Slice(Compose(A, B, C), x) => Compose(A, B, C) with truncated A/B/C out = [] - for arg, s_start, s_stop in e.arg.args: + for index, arg in e.arg.iter_args(): # arg is before slice start - if e.start >= s_stop: + if e.start >= index + arg.size: continue # arg is after slice stop - elif e.stop <= s_start: + elif e.stop <= index: continue # arg is fully included in slice - elif e.start <= s_start and s_stop <= e.stop: - out.append((arg, s_start - e.start, s_stop - e.start)) + elif e.start <= index and index + arg.size <= e.stop: + out.append(arg) continue # arg is truncated at start - if e.start > s_start: - slice_start = e.start - s_start + if e.start > index: + slice_start = e.start - index a_start = 0 else: # arg is not truncated at start slice_start = 0 - a_start = s_start - e.start + a_start = index - e.start # a is truncated at stop - if e.stop < s_stop: - slice_stop = arg.size + e.stop - s_stop - slice_start + if e.stop < index + arg.size: + slice_stop = arg.size + e.stop - (index + arg.size) - slice_start a_stop = e.stop - e.start else: slice_stop = arg.size - a_stop = s_stop - e.start - out.append((arg[slice_start:slice_stop], a_start, a_stop)) + a_stop = index + arg.size - e.start + out.append(arg[slice_start:slice_stop]) - out.sort(key=lambda x:x[1]) - starts = [start for (_, start, _) in out] - assert len(set(starts)) == len(starts) - args = [expr for (expr, _, _) in out] - - return ExprCompose(*args) + return ExprCompose(*out) # ExprMem(x, size)[:A] => ExprMem(x, a) # XXXX todo hum, is it safe? @@ -553,71 +537,60 @@ def simp_slice(e_s, e): def simp_compose(e_s, e): "Commons simplification on ExprCompose" - args = merge_sliceto_slice(e.args) + args = merge_sliceto_slice(e) out = [] # compose of compose - for a in args: - if isinstance(a[0], ExprCompose): - for x, start, stop in a[0].args: - out.append((x, start + a[1], stop + a[1])) + for arg in args: + if isinstance(arg, ExprCompose): + out += arg.args else: - out.append(a) + out.append(arg) args = out # Compose(a) with a.size = compose.size => a - if len(args) == 1 and args[0][1] == 0 and args[0][2] == e.size: - return args[0][0] + if len(args) == 1 and args[0].size == e.size: + return args[0] # {(X[z:], 0, X.size-z), (0, X.size-z, X.size)} => (X >> z) if (len(args) == 2 and - isinstance(args[1][0], ExprInt) and - args[1][0].arg == 0): - a1 = args[0] - a2 = args[1] - if (isinstance(a1[0], ExprSlice) and - a1[1] == 0 and - a1[0].stop == a1[0].arg.size and - a2[1] == a1[0].size and - a2[2] == a1[0].arg.size): - new_e = a1[0].arg >> ExprInt( - a1[0].start, a1[0].arg.size) + isinstance(args[1], ExprInt) and + int(args[1]) == 0): + if (isinstance(args[0], ExprSlice) and + args[0].stop == args[0].arg.size and + args[0].size + args[1].size == args[0].arg.size): + new_e = args[0].arg >> ExprInt(args[0].start, args[0].arg.size) return new_e # Compose with ExprCond with integers for src1/src2 and intergers => # propagage integers # {XXX?(0x0,0x1)?(0x0,0x1),0,8, 0x0,8,32} => XXX?(int1, int2) - ok = True - expr_cond = None - expr_ints = [] - for i, a in enumerate(args): - if not is_int_or_cond_src_int(a[0]): + expr_cond_index = None + expr_ints_or_conds = [] + for i, arg in enumerate(args): + if not is_int_or_cond_src_int(arg): ok = False break - expr_ints.append(a) - if isinstance(a[0], ExprCond): - if expr_cond is not None: + expr_ints_or_conds.append(arg) + if isinstance(arg, ExprCond): + if expr_cond_index is not None: ok = False - expr_cond = i - cond = a[0] + expr_cond_index = i + cond = arg - if ok and expr_cond is not None: + if ok and expr_cond_index is not None: src1 = [] src2 = [] - for i, a in enumerate(expr_ints): - if i == expr_cond: - src1.append((a[0].src1, a[1], a[2])) - src2.append((a[0].src2, a[1], a[2])) + for i, arg in enumerate(expr_ints_or_conds): + if i == expr_cond_index: + src1.append(arg.src1) + src2.append(arg.src2) else: - src1.append(a) - src2.append(a) - src1 = [expr for (expr, _, _) in src1] - src2 = [expr for (expr, _, _) in src2] + src1.append(arg) + src2.append(arg) src1 = e_s.apply_simp(ExprCompose(*src1)) src2 = e_s.apply_simp(ExprCompose(*src2)) if isinstance(src1, ExprInt) and isinstance(src2, ExprInt): return ExprCond(cond.cond, src1, src2) - args.sort(key=lambda x:x[1]) - args = [expr for (expr, _, _) in args] return ExprCompose(*args) diff --git a/miasm2/ir/ir.py b/miasm2/ir/ir.py index 2c6300a9..d8cccc64 100644 --- a/miasm2/ir/ir.py +++ b/miasm2/ir/ir.py @@ -46,7 +46,6 @@ class AssignBlock(dict): * if dst is an ExprSlice, expand it to affect the full Expression * if dst already known, sources are merged """ - if dst.size != src.size: raise RuntimeError( "sanitycheck: args must have same size! %s" % @@ -75,6 +74,7 @@ class AssignBlock(dict): expr_list = [(new_dst, new_src), (new_dst, self[new_dst])] # Find collision + print 'FIND COLISION' e_colision = reduce(lambda x, y: x.union(y), (self.get_modified_slice(dst, src) for (dst, src) in expr_list), @@ -109,17 +109,16 @@ class AssignBlock(dict): def get_modified_slice(dst, src): """Return an Expr list of extra expressions needed during the object instanciation""" - if not isinstance(src, m2_expr.ExprCompose): raise ValueError("Get mod slice not on expraff slice", str(self)) modified_s = [] - for arg in src.args: - if (not isinstance(arg[0], m2_expr.ExprSlice) or - arg[0].arg != dst or - arg[1] != arg[0].start or - arg[2] != arg[0].stop): + for index, arg in src.iter_args(): + if not (isinstance(arg, m2_expr.ExprSlice) and + arg.arg == dst and + index == arg.start and + index+arg.size == arg.stop): # If x is not the initial expression - modified_s.append(arg) + modified_s.append((arg, index, index+arg.size)) return modified_s def get_w(self): diff --git a/miasm2/ir/symbexec.py b/miasm2/ir/symbexec.py index 65515c64..db3eacdc 100644 --- a/miasm2/ir/symbexec.py +++ b/miasm2/ir/symbexec.py @@ -229,9 +229,8 @@ class symbexec(object): ret = m2_expr.ExprOp(expr.op, *args) elif isinstance(expr, m2_expr.ExprCompose): args = [] - for (arg, start, stop) in expr.args: - arg = self.apply_expr_on_state_visit_cache(arg, state, cache, level+1) - args.append(arg) + for arg in expr.args: + args.append(self.apply_expr_on_state_visit_cache(arg, state, cache, level+1)) ret = m2_expr.ExprCompose(*args) else: raise TypeError("Unknown expr type") @@ -378,7 +377,6 @@ class symbexec(object): """ pool_out = {} eval_cache = {} - for dst, src in assignblk.iteritems(): src = self.eval_expr(src, eval_cache) if isinstance(dst, m2_expr.ExprMem): diff --git a/miasm2/ir/translators/C.py b/miasm2/ir/translators/C.py index 340fbfec..57859f9c 100644 --- a/miasm2/ir/translators/C.py +++ b/miasm2/ir/translators/C.py @@ -145,11 +145,11 @@ class TranslatorC(Translator): out = [] # XXX check mask for 64 bit & 32 bit compat dst_cast = "uint%d_t" % expr.size - for x in expr.args: + for index, arg in expr.iter_args(): out.append("(((%s)(%s & 0x%X)) << %d)" % (dst_cast, - self.from_expr(x[0]), - (1 << (x[2] - x[1])) - 1, - x[1])) + self.from_expr(arg), + (1 << arg.size) - 1, + index)) out = ' | '.join(out) return '(' + out + ')' diff --git a/miasm2/ir/translators/miasm.py b/miasm2/ir/translators/miasm.py index 515148ee..b390eb51 100644 --- a/miasm2/ir/translators/miasm.py +++ b/miasm2/ir/translators/miasm.py @@ -27,8 +27,7 @@ class TranslatorMiasm(Translator): ", ".join(map(self.from_expr, expr.args))) def from_ExprCompose(self, expr): - args = ["%s" % self.from_expr(arg) - for arg, _, _ in expr.args] + args = ["%s" % self.from_expr(arg) for arg in expr.args] return "ExprCompose(%s)" % ", ".join(args) def from_ExprAff(self, expr): diff --git a/miasm2/ir/translators/python.py b/miasm2/ir/translators/python.py index f745d2df..c06d865c 100644 --- a/miasm2/ir/translators/python.py +++ b/miasm2/ir/translators/python.py @@ -31,10 +31,10 @@ class TranslatorPython(Translator): def from_ExprCompose(self, expr): out = [] - for subexpr, start, stop in expr.args: - out.append("((%s & 0x%x) << %d)" % (self.from_expr(subexpr), - (1 << (stop - start)) - 1, - start)) + for index, arg in expr.iter_args(): + out.append("((%s & 0x%x) << %d)" % (self.from_expr(arg), + (1 << arg.size) - 1, + index)) return "(%s)" % ' | '.join(out) def from_ExprCond(self, expr): diff --git a/miasm2/ir/translators/smt2.py b/miasm2/ir/translators/smt2.py index 5bffd7f2..5d5fb26b 100644 --- a/miasm2/ir/translators/smt2.py +++ b/miasm2/ir/translators/smt2.py @@ -163,10 +163,8 @@ class TranslatorSMT2(Translator): def from_ExprCompose(self, expr): res = None - args = sorted(expr.args, key=operator.itemgetter(2)) # sort by start off - for subexpr, start, stop in args: - sube = self.from_expr(subexpr) - e = bv_extract(stop-start-1, 0, sube) + for arg in expr.args: + e = bv_extract(arg.size-1, 0, self.from_expr(arg)) if res: res = bv_concat(e, res) else: diff --git a/miasm2/ir/translators/z3_ir.py b/miasm2/ir/translators/z3_ir.py index e0460cc4..ccb14b4f 100644 --- a/miasm2/ir/translators/z3_ir.py +++ b/miasm2/ir/translators/z3_ir.py @@ -137,10 +137,8 @@ class TranslatorZ3(Translator): def from_ExprCompose(self, expr): res = None - args = sorted(expr.args, key=operator.itemgetter(2)) # sort by start off - for subexpr, start, stop in args: - sube = self.from_expr(subexpr) - e = z3.Extract(stop-start-1, 0, sube) + for arg in expr.args: + e = z3.Extract(arg.size-1, 0, self.from_expr(arg)) if res != None: res = z3.Concat(e, res) else: -- cgit 1.4.1