diff options
Diffstat (limited to 'miasm2/core/objc.py')
| -rw-r--r-- | miasm2/core/objc.py | 1049 |
1 files changed, 536 insertions, 513 deletions
diff --git a/miasm2/core/objc.py b/miasm2/core/objc.py index 917d0ea9..d6c100ca 100644 --- a/miasm2/core/objc.py +++ b/miasm2/core/objc.py @@ -6,6 +6,7 @@ C helper for Miasm: """ +import warnings from pycparser import c_parser, c_ast from miasm2.expression.expression_reduce import ExprReducer @@ -17,53 +18,123 @@ from miasm2.core.ctypesmngr import CTypeUnion, CTypeStruct, CTypeId, CTypePtr,\ PADDING_TYPE_NAME = "___padding___" +def missing_definition(objtype): + warnings.warn("Null size type: Missing definition? %r" % objtype) + +""" +Display C type +source: "The C Programming Language - 2nd Edition - Ritchie Kernighan.pdf" +p. 124 +""" + +def objc_to_str(objc, result=None): + if result is None: + result = "" + while True: + if isinstance(objc, ObjCArray): + result += "[%d]" % objc.elems + objc = objc.objtype + elif isinstance(objc, ObjCPtr): + if not result and isinstance(objc.objtype, ObjCFunc): + result = objc.objtype.name + if isinstance(objc.objtype, (ObjCPtr, ObjCDecl, ObjCStruct, ObjCUnion)): + result = "*%s" % result + else: + result = "(*%s)" % result + + objc = objc.objtype + elif isinstance(objc, (ObjCDecl, ObjCStruct, ObjCUnion)): + if result: + result = "%s %s" % (objc, result) + else: + result = str(objc) + break + elif isinstance(objc, ObjCFunc): + args_str = [] + for name, arg in objc.args: + args_str.append(objc_to_str(arg, name)) + args = ", ".join(args_str) + result += "(%s)" % args + objc = objc.type_ret + elif isinstance(objc, ObjCInt): + return "int" + elif isinstance(objc, ObjCEllipsis): + return "..." + else: + raise TypeError("Unknown c type") + return result + + class ObjC(object): """Generic ObjC""" - def set_align_size(self, align, size): - """Set C object alignment and size""" + def __init__(self, align, size): + self._align = align + self._size = size - self.align = align - self.size = size + @property + def align(self): + """Alignment (in bytes) of the C object""" + return self._align - def eq_base(self, other): - return (self.__class__ == other.__class__ and - self.align == other.align and - self.size == other.size) + @property + def size(self): + """Size (in bytes) of the C object""" + return self._size + + def cmp_base(self, other): + assert self.__class__ in OBJC_PRIO + assert other.__class__ in OBJC_PRIO + + if OBJC_PRIO[self.__class__] != OBJC_PRIO[other.__class__]: + return cmp(OBJC_PRIO[self.__class__], OBJC_PRIO[other.__class__]) + if self.align != other.align: + return cmp(self.align, other.align) + return cmp(self.size, other.size) + + def __hash__(self): + return hash((self.__class__, self._align, self._size)) + + def __str__(self): + return objc_to_str(self) class ObjCDecl(ObjC): """C Declaration identified""" def __init__(self, name, align, size): - super(ObjCDecl, self).__init__() - self.name, self.align, self.size = name, align, size + super(ObjCDecl, self).__init__(align, size) + self._name = name + + name = property(lambda self: self._name) + + def __hash__(self): + return hash((super(ObjCDecl, self).__hash__(), self._name)) def __repr__(self): return '<%s %s>' % (self.__class__.__name__, self.name) def __str__(self): - return '%s' % (self.name) + return str(self.name) - def __eq__(self, other): - if not self.eq_base(other): - return False - return self.name == other.name + def __cmp__(self, other): + ret = self.cmp_base(other) + if ret: + return ret + return cmp(self.name, other.name) class ObjCInt(ObjC): """C integer""" def __init__(self): - super(ObjCInt, self).__init__() - self.size = None - self.align = None + super(ObjCInt, self).__init__(None, None) def __str__(self): return 'int' - def __eq__(self, other): - return self.eq_base(other) + def __cmp__(self, other): + return self.cmp_base(other) class ObjCPtr(ObjC): @@ -77,37 +148,38 @@ class ObjCPtr(ObjC): @void_p_size: pointer size (in bytes) """ - super(ObjCPtr, self).__init__() + super(ObjCPtr, self).__init__(void_p_align, void_p_size) + self._lock = False + self.objtype = objtype - self.align = void_p_align - self.size = void_p_size + if objtype is None: + self._lock = False + + def get_objtype(self): + assert self._lock is True + return self._objtype + + def set_objtype(self, objtype): + assert self._lock is False + self._lock = True + self._objtype = objtype + + objtype = property(get_objtype, set_objtype) + + def __hash__(self): + # Don't try to hash on an unlocked Ptr (still mutable) + assert self._lock + return hash((super(ObjCPtr, self).__hash__(), hash(self._objtype))) def __repr__(self): return '<%s %r>' % (self.__class__.__name__, self.objtype.__class__) - def __str__(self): - target = self.objtype - if isinstance(target, ObjCDecl): - return "%s *" % target.name - elif isinstance(target, ObjCPtr): - return "%s *" % target - elif isinstance(target, ObjCStruct): - return "struct %s *" % target.name - elif isinstance(target, ObjCUnion): - return "union %s *" % target.name - elif isinstance(target, ObjCArray): - return "%s (*)[%s]" % (target.objtype, target.elems) - elif isinstance(target, ObjCFunc): - args = ", ".join([str(arg) for arg in target.args]) - return "%s (*%s)(%s)" % (target.type_ret, target.name, args) - else: - return '*%s' % (target) - - def __eq__(self, other): - if not self.eq_base(other): - return False - return self.objtype == other.objtype + def __cmp__(self, other): + ret = self.cmp_base(other) + if ret: + return ret + return cmp(self.objtype, other.objtype) class ObjCArray(ObjC): @@ -120,42 +192,42 @@ class ObjCArray(ObjC): @elems: number of elements in the array """ - super(ObjCArray, self).__init__() - self.elems = elems - self.objtype = objtype - self.align = objtype.align - self.size = elems * objtype.size + super(ObjCArray, self).__init__(objtype.align, elems * objtype.size) + self._elems = elems + self._objtype = objtype + + objtype = property(lambda self: self._objtype) + elems = property(lambda self: self._elems) + + def __hash__(self): + return hash((super(ObjCArray, self).__hash__(), self._elems, hash(self._objtype))) def __repr__(self): return '<%r[%d]>' % (self.objtype, self.elems) - def __str__(self): - return '%s[%d]' % (self.objtype, self.elems) - - def __eq__(self, other): - if not self.eq_base(other): - return False - return (self.elems == other.elems and - self.objtype == other.objtype) + def __cmp__(self, other): + ret = self.cmp_base(other) + if ret: + return ret + ret = cmp(self.elems, other.elems) + if ret: + return ret + return cmp(self.objtype, other.objtype) class ObjCStruct(ObjC): """C object for structures""" - def __init__(self, name): - super(ObjCStruct, self).__init__() - self.name = name - self.fields = [] - - def add_field(self, name, objtype, offset, size): - """Add a field into the structure - @name: field name - @objtype: field type - @offset: field offset in the structure - @size: field size - """ + def __init__(self, name, align, size, fields): + super(ObjCStruct, self).__init__(align, size) + self._name = name + self._fields = tuple(fields) - self.fields.append((name, objtype, offset, size)) + name = property(lambda self: self._name) + fields = property(lambda self: self._fields) + + def __hash__(self): + return hash((super(ObjCStruct, self).__hash__(), self._name)) def __repr__(self): out = [] @@ -169,34 +241,25 @@ class ObjCStruct(ObjC): def __str__(self): return 'struct %s' % (self.name) - def __eq__(self, other): - if not (self.eq_base(other) and self.name == other.name): - return False - if len(self.fields) != len(other.fields): - return False - for field_a, field_b in zip(self.fields, other.fields): - if field_a != field_b: - return False - return True - + def __cmp__(self, other): + ret = self.cmp_base(other) + if ret: + return ret + return cmp(self.name, other.name) class ObjCUnion(ObjC): """C object for unions""" - def __init__(self, name): - super(ObjCUnion, self).__init__() - self.name = name - self.fields = [] - - def add_field(self, name, objtype, offset, size): - """Add a field into the structure - @name: field name - @objtype: field type - @offset: field offset in the structure - @size: field size - """ + def __init__(self, name, align, size, fields): + super(ObjCUnion, self).__init__(align, size) + self._name = name + self._fields = tuple(fields) - self.fields.append((name, objtype, offset, size)) + name = property(lambda self: self._name) + fields = property(lambda self: self._fields) + + def __hash__(self): + return hash((super(ObjCUnion, self).__hash__(), self._name)) def __repr__(self): out = [] @@ -210,40 +273,42 @@ class ObjCUnion(ObjC): def __str__(self): return 'union %s' % (self.name) - def __eq__(self, other): - if not (self.eq_base(other) and self.name == other.name): - return False - if len(self.fields) != len(other.fields): - return False - for field_a, field_b in zip(self.fields, other.fields): - if field_a != field_b: - return False - return True - + def __cmp__(self, other): + ret = self.cmp_base(other) + if ret: + return ret + return cmp(self.name, other.name) class ObjCEllipsis(ObjC): """C integer""" def __init__(self): - super(ObjCEllipsis, self).__init__() - self.size = None - self.align = None + super(ObjCEllipsis, self).__init__(None, None) - def __eq__(self, other): - return self.eq_base(other) + align = property(lambda self: self._align) + size = property(lambda self: self._size) + + def __cmp__(self, other): + return self.cmp_base(other) class ObjCFunc(ObjC): """C object for Functions""" def __init__(self, name, abi, type_ret, args, void_p_align, void_p_size): - super(ObjCFunc, self).__init__() - self.name = name - self.abi = abi - self.type_ret = type_ret - self.args = args - self.align = void_p_align - self.size = void_p_size + super(ObjCFunc, self).__init__(void_p_align, void_p_size) + self._name = name + self._abi = abi + self._type_ret = type_ret + self._args = tuple(args) + + args = property(lambda self: self._args) + type_ret = property(lambda self: self._type_ret) + abi = property(lambda self: self._abi) + name = property(lambda self: self._name) + + def __hash__(self): + return hash((super(ObjCFunc, self).__hash__(), hash(self._args), self._name)) def __repr__(self): return "<%s %s>" % (self.__class__.__name__, @@ -254,20 +319,27 @@ class ObjCFunc(ObjC): out.append("Function (%s) %s: (align: %d)" % (self.abi, self.name, self.align)) out.append(" ret: %s" % (str(self.type_ret))) out.append(" Args:") - for arg in self.args: - out.append(" %s" % arg) + for name, arg in self.args: + out.append(" %s %s" % (name, arg)) return '\n'.join(out) - def __eq__(self, other): - if not (self.eq_base(other) and self.name == other.name and - self.type_ret == other.type_ret): - return False - if len(self.args) != len(other.args): - return False - for arg_a, arg_b in zip(self.args, other.args): - if arg_a != arg_b: - return False - return True + def __cmp__(self, other): + ret = self.cmp_base(other) + if ret: + return ret + return cmp(self.name, other.name) + +OBJC_PRIO = { + ObjC: 0, + ObjCDecl:1, + ObjCInt:2, + ObjCPtr:3, + ObjCArray:4, + ObjCStruct:5, + ObjCUnion:6, + ObjCEllipsis:7, + ObjCFunc:8, +} def access_simplifier(expr): @@ -342,6 +414,22 @@ class CGen(object): default_size = 64 + + def __init__(self, ctype): + self._ctype = ctype + + @property + def ctype(self): + """Type (ObjC instance) of the current object""" + return self._ctype + + def __hash__(self): + return hash(self.__class__) + + def __eq__(self, other): + return (self.__class__ == other.__class__ and + self._ctype == other.ctype) + def to_c(self): """Generate corresponding C""" @@ -358,8 +446,20 @@ class CGenInt(CGen): def __init__(self, integer): assert isinstance(integer, (int, long)) - self.integer = integer - self.ctype = ObjCInt() + self._integer = integer + super(CGenInt, self).__init__(ObjCInt()) + + @property + def integer(self): + """Value of the object""" + return self._integer + + def __hash__(self): + return hash((super(CGenInt, self).__hash__(), self._integer)) + + def __eq__(self, other): + return (super(CGenInt, self).__eq__(other) and + self._integer == other.integer) def to_c(self): """Generate corresponding C""" @@ -380,9 +480,21 @@ class CGenId(CGen): """ID of a C object""" def __init__(self, ctype, name): - self.ctype = ctype - self.name = name + self._name = name assert isinstance(name, str) + super(CGenId, self).__init__(ctype) + + @property + def name(self): + """Name of the Id""" + return self._name + + def __hash__(self): + return hash((super(CGenId, self).__hash__(), self._name)) + + def __eq__(self, other): + return (super(CGenId, self).__eq__(other) and + self._name == other.name) def __repr__(self): return "<%s %s>" % (self.__class__.__name__, @@ -413,14 +525,32 @@ class CGenField(CGen): """ def __init__(self, struct, field, fieldtype, void_p_align, void_p_size): - self.struct = struct - self.field = field + self._struct = struct + self._field = field assert isinstance(field, str) if isinstance(fieldtype, ObjCArray): ctype = fieldtype else: ctype = ObjCPtr(fieldtype, void_p_align, void_p_size) - self.ctype = ctype + super(CGenField, self).__init__(ctype) + + @property + def struct(self): + """Structure containing the field""" + return self._struct + + @property + def field(self): + """Field name""" + return self._field + + def __hash__(self): + return hash((super(CGenField, self).__hash__(), self._struct, self._field)) + + def __eq__(self, other): + return (super(CGenField, self).__eq__(other) and + self._struct == other.struct and + self._field == other.field) def to_c(self): """Generate corresponding C""" @@ -467,8 +597,8 @@ class CGenArray(CGen): - X[] => X* """ - def __init__(self, name, element, void_p_align, void_p_size): - ctype = name.ctype + def __init__(self, base, elems, void_p_align, void_p_size): + ctype = base.ctype if isinstance(ctype, ObjCPtr): pass elif isinstance(ctype, ObjCArray) and isinstance(ctype.objtype, ObjCArray): @@ -477,21 +607,39 @@ class CGenArray(CGen): ctype = ObjCPtr(ctype.objtype, void_p_align, void_p_size) else: raise TypeError("Strange case") - self.ctype = ctype - self.name = name - self.element = element + self._base = base + self._elems = elems + super(CGenArray, self).__init__(ctype) + + @property + def base(self): + """Base object supporting the array""" + return self._base + + @property + def elems(self): + """Number of elements in the array""" + return self._elems + + def __hash__(self): + return hash((super(CGenArray, self).__hash__(), self._base, self._elems)) + + def __eq__(self, other): + return (super(CGenField, self).__eq__(other) and + self._base == other.base and + self._elems == other.elems) def __repr__(self): return "<%s %s>" % (self.__class__.__name__, - self.name) + self.base) def to_c(self): """Generate corresponding C""" if isinstance(self.ctype, ObjCPtr): - out_str = "&((%s)[%d])" % (self.name.to_c(), self.element) + out_str = "&((%s)[%d])" % (self.base.to_c(), self.elems) elif isinstance(self.ctype, ObjCArray): - out_str = "(%s)[%d]" % (self.name.to_c(), self.element) + out_str = "(%s)[%d]" % (self.base.to_c(), self.elems) else: raise RuntimeError("Strange case") return out_str @@ -502,12 +650,12 @@ class CGenArray(CGen): if isinstance(self.ctype, ObjCPtr): return ExprOp("addr", ExprOp("[]", - self.name.to_expr(), - ExprInt(self.element, self.default_size))) + self.base.to_expr(), + ExprInt(self.elems, self.default_size))) elif isinstance(self.ctype, ObjCArray): return ExprOp("[]", - self.name.to_expr(), - ExprInt(self.element, self.default_size)) + self.base.to_expr(), + ExprInt(self.elems, self.default_size)) else: raise RuntimeError("Strange case") @@ -522,28 +670,40 @@ class CGenDeref(CGen): - X* => X """ - def __init__(self, mem): - assert isinstance(mem.ctype, ObjCPtr) - self.ctype = mem.ctype.objtype - self.mem = mem + def __init__(self, ptr): + assert isinstance(ptr.ctype, ObjCPtr) + self._ptr = ptr + super(CGenDeref, self).__init__(ptr.ctype.objtype) + + @property + def ptr(self): + """Pointer object""" + return self._ptr + + def __hash__(self): + return hash((super(CGenDeref, self).__hash__(), self._ptr)) + + def __eq__(self, other): + return (super(CGenField, self).__eq__(other) and + self._ptr == other.ptr) def __repr__(self): return "<%s %s>" % (self.__class__.__name__, - self.mem) + self.ptr) def to_c(self): """Generate corresponding C""" - if not isinstance(self.mem.ctype, ObjCPtr): + if not isinstance(self.ptr.ctype, ObjCPtr): raise RuntimeError() - return "*(%s)" % (self.mem.to_c()) + return "*(%s)" % (self.ptr.to_c()) def to_expr(self): """Generate Miasm expression representing the C access""" - if not isinstance(self.mem.ctype, ObjCPtr): + if not isinstance(self.ptr.ctype, ObjCPtr): raise RuntimeError() - return ExprOp("deref", self.mem.to_expr()) + return ExprOp("deref", self.ptr.to_expr()) def ast_get_c_access_expr(ast, expr_types, lvl=0): @@ -615,201 +775,6 @@ def parse_access(c_access): return access -class CTypeAnalyzer(ExprReducer): - """ - Return the C type(s) of a native Miasm expression - """ - - def __init__(self, expr_types, types_mngr, enforce_strict_access=True): - """Init TypeAnalyzer - @expr_types: a dictionnary linking ID names to their types - @types_mngr: types manager - @enforce_strict_access: If false, get type even on expression - pointing to a middle of an object. If true, raise exception if such a - pointer is encountered - """ - - self.expr_types = expr_types - self.types_mngr = types_mngr - self.enforce_strict_access = enforce_strict_access - - def updt_expr_types(self, expr_types): - """Update expr_types - @expr_types: Dictionnary associating name to type - """ - - self.expr_types = expr_types - - CST = ObjCInt() - - def get_typeof(self, base_type, offset, deref, lvl=0): - """Return a list of pointers (or None) on the element at @offset of an - object of type @base_type - - In case of no @deref, stops recursion as soon as we reached the base of - an object. - In other cases, we need to go down to the final dereferenced object - - @base_type: type of main object - @offset: offset (in bytes) of the target sub object - @deref: get type for a pointer or a deref - @lvl: actual recursion level - """ - void_type = self.types_mngr.void_ptr - - if isinstance(base_type, ObjCStruct): - if offset == 0 and not deref: - # In this case, return the struct* - obj = ObjCPtr(base_type, void_type.align, void_type.size) - new_type = [obj] - return new_type - for _, subtype, f_offset, size in base_type.fields: - if not f_offset <= offset < f_offset + size: - continue - new_type = self.get_typeof( - subtype, offset - f_offset, deref, lvl + 1) - break - else: - raise RuntimeError('cannot find struct field') - elif isinstance(base_type, ObjCArray): - sub_offset = offset % (base_type.objtype.size) - element_num = offset / (base_type.objtype.size) - if element_num >= base_type.elems: - return None - if offset == 0 and not deref: - # In this case, return the array - return [base_type] - obj = self.get_typeof( - base_type.objtype, sub_offset, deref, lvl + 1) - new_type = obj - - elif isinstance(base_type, ObjCDecl): - if self.enforce_strict_access and offset != 0: - return [] - obj = ObjCPtr(base_type, void_type.align, void_type.size) - new_type = [obj] - - elif isinstance(base_type, ObjCUnion): - out = [] - if offset == 0 and not deref: - # In this case, return the struct* - obj = ObjCPtr(base_type, void_type.align, void_type.size) - new_type = [obj] - return new_type - for _, objtype, f_offset, size in base_type.fields: - if not f_offset <= offset < f_offset + size: - continue - new_type = self.get_typeof( - objtype, offset - f_offset, deref, lvl + 1) - out += new_type - new_type = out - elif isinstance(base_type, ObjCPtr): - if self.enforce_strict_access: - assert offset % base_type.size == 0 - obj = ObjCPtr(base_type, void_type.align, void_type.size) - new_type = [obj] - else: - raise NotImplementedError("deref type %r" % base_type) - return new_type - - def reduce_id(self, node, _): - """Get type of ExprId""" - if not(isinstance(node.expr, ExprId) and node.expr.name in self.expr_types): - return None - return [self.expr_types[node.expr.name]] - - def reduce_int(self, node, _): - """Get type of ExprInt""" - - if not isinstance(node.expr, ExprInt): - return None - return [self.CST] - - def get_solo_type(self, node): - """Return the type of the @node if it has only one possible type, - different from not None. In othe cases, return None. - """ - if node.info is None or len(node.info) != 1: - return None - return type(node.info[0]) - - def reduce_ptr_plus_cst(self, node, lvl): - """Get type of ptr + CST""" - - if not node.expr.is_op("+") or len(node.args) != 2: - return None - args_types = set([self.get_solo_type(node.args[0]), - self.get_solo_type(node.args[1])]) - if args_types != set([ObjCInt, ObjCPtr]): - return None - arg0, arg1 = node.args - out = [] - ptr_offset = int(arg1.expr) - for info in arg0.info: - ptr_basetype = info.objtype - # Array-like: int* ptr; ptr[1] = X - out += self.get_typeof(ptr_basetype, - ptr_offset % ptr_basetype.size, - False, - lvl) - - return out - - def reduce_cst_op_cst(self, node, _): - """Get type of CST + CST""" - - if not node.expr.is_op("+") or len(node.args) != 2: - return None - if node.args[0] is None or node.args[1] is None: - return None - args_types = set([self.get_solo_type(node.args[0]), - self.get_solo_type(node.args[1])]) - if args_types != set([ObjCInt]): - return None - return [self.CST] - - def reduce_deref(self, node, lvl): - """Get type of a dereferenced expression: - * @NN[ptr<elem>] -> elem (type) - * @64[ptr<ptr<elem>>] -> ptr<elem> - * @32[ptr<struct>] -> struct.00 - """ - - if not isinstance(node.expr, ExprMem): - return None - if node.arg.info is None: - return None - found = [] - for subtype in node.arg.info: - # subtype : ptr<elem> - if not isinstance(subtype, (ObjCPtr, ObjCArray)): - return None - target = subtype.objtype - # target : type(elem) - for ptr_target in self.get_typeof(target, 0, True, lvl): - r_target = ptr_target.objtype - # ptr_target: ptr<elem> - # r_target: elem - if (not(self.enforce_strict_access) or - r_target.size != node.expr.size / 8): - continue - found.append(r_target) - if not found: - return None - return found - - reduction_rules = [reduce_id, reduce_int, - reduce_ptr_plus_cst, reduce_cst_op_cst, - reduce_deref, - ] - - def get_type(self, expr): - """Return the C type(s) of the native Miasm expression @expr - @expr: Miasm expression""" - - return self.reduce(expr) - - class ExprToAccessC(ExprReducer): """ Generate the C access object(s) for a given native Miasm expression @@ -898,109 +863,103 @@ class ExprToAccessC(ExprReducer): OUT: - CGenArray(CGenField(toto, b), 1) """ + if base_type.size == 0: + missing_definition(base_type) + return set() + void_type = self.types_mngr.void_ptr if isinstance(base_type, ObjCStruct): - assert 0 <= offset < base_type.size + if not 0 <= offset < base_type.size: + return set() + if offset == 0 and not deref: # In this case, return the struct* - return [cgenobj] + return set([cgenobj]) - out = [] - for fieldname, subtype, f_offset, size in base_type.fields: - if not f_offset <= offset < f_offset + size: + for fieldname, subtype, field_offset, size in base_type.fields: + if not field_offset <= offset < field_offset + size: continue fieldptr = CGenField(CGenDeref(cgenobj), fieldname, subtype, void_type.align, void_type.size) - ret = self.cgen_access( - fieldptr, subtype, offset - f_offset, deref, lvl + 1) - for sname in ret: - finalobj = sname - out.append(finalobj) - new_type = out + new_type = self.cgen_access(fieldptr, subtype, + offset - field_offset, + deref, lvl + 1) break else: - raise RuntimeError('Cannot find struct field') + return set() elif isinstance(base_type, ObjCArray): + if base_type.objtype.size == 0: + missing_definition(base_type.objtype) + return set() element_num = offset / (base_type.objtype.size) - assert element_num < base_type.elems - f_offset = offset % base_type.objtype.size - cur_objtype = base_type - curobj = cgenobj - subtype = cur_objtype.objtype - if subtype == ObjCArray: - raise NotImplementedError("TODO") - else: - if f_offset != 0: - curobj = CGenArray(curobj, element_num, - void_type.align, void_type.size) - ret = self.cgen_access( - curobj, curobj.ctype.objtype, f_offset, deref, lvl + 1) - else: - curobj = CGenArray(curobj, element_num, - void_type.align, void_type.size) - ret = [curobj] - new_type = ret + field_offset = offset % base_type.objtype.size + if element_num >= base_type.elems: + return set() + if offset == 0 and not deref: + # In this case, return the array + return set([cgenobj]) + + curobj = CGenArray(cgenobj, element_num, + void_type.align, + void_type.size) + if field_offset == 0: + # We point to the start of the sub object, + # return it directly + return set([curobj]) + new_type = self.cgen_access(curobj, base_type.objtype, + field_offset, deref, lvl + 1) + elif isinstance(base_type, ObjCDecl): - if self.enforce_strict_access: - if offset % base_type.size != 0: - return [] + if self.enforce_strict_access and offset % base_type.size != 0: + return set() elem_num = offset / base_type.size nobj = CGenArray(cgenobj, elem_num, void_type.align, void_type.size) - new_type = [(nobj)] + new_type = set([nobj]) elif isinstance(base_type, ObjCUnion): - out = [] if offset == 0 and not deref: # In this case, return the struct* - return [cgenobj] + return set([cgenobj]) - for fieldname, objtype, f_offset, size in base_type.fields: - if not f_offset <= offset < f_offset + size: + out = set() + for fieldname, objtype, field_offset, size in base_type.fields: + if not field_offset <= offset < field_offset + size: continue field = CGenField(CGenDeref(cgenobj), fieldname, objtype, void_type.align, void_type.size) - new_type = self.cgen_access( - field, objtype, offset - f_offset, deref, lvl + 1) - if new_type is None: - continue - for sname in new_type: - finalobj = sname - out.append(finalobj) + out.update(self.cgen_access(field, objtype, + offset - field_offset, + deref, lvl + 1)) new_type = out elif isinstance(base_type, ObjCPtr): elem_num = offset / base_type.size - if self.enforce_strict_access: - assert offset % base_type.size == 0 - + if self.enforce_strict_access and offset % base_type.size != 0: + return set() nobj = CGenArray(cgenobj, elem_num, void_type.align, void_type.size) - new_type = [(nobj)] + new_type = set([nobj]) else: raise NotImplementedError("deref type %r" % base_type) return new_type - def reduce_id(self, node, _): - """Generate access for ExprId""" - - if not (isinstance(node.expr, ExprId) and - node.expr.name in self.expr_types): - return None - - objc = self.expr_types[node.expr.name] - out = CGenId(objc, node.expr.name) - return [out] + def reduce_known_expr(self, node, ctxt, **kwargs): + """Generate access for known expr""" + if node.expr in ctxt: + objcs = ctxt[node.expr] + return set(CGenId(objc, str(node.expr)) for objc in objcs) + return None - def reduce_int(self, node, _): + def reduce_int(self, node, **kwargs): """Generate access for ExprInt""" if not isinstance(node.expr, ExprInt): return None - return [CGenInt(int(node.expr))] + return set([CGenInt(int(node.expr))]) def get_solo_type(self, node): """Return the type of the @node if it has only one possible type, @@ -1008,33 +967,35 @@ class ExprToAccessC(ExprReducer): """ if node.info is None or len(node.info) != 1: return None - return type(node.info[0].ctype) + return type(list(node.info)[0].ctype) - def reduce_op(self, node, lvl): + def reduce_op(self, node, lvl=0, **kwargs): """Generate access for ExprOp""" - if not node.expr.is_op("+") or len(node.args) != 2: return None - args_types = set([self.get_solo_type(node.args[0]), - self.get_solo_type(node.args[1])]) - if args_types != set([ObjCInt, ObjCPtr]): + type_arg1 = self.get_solo_type(node.args[1]) + if type_arg1 != ObjCInt: return None - arg0, arg1 = node.args - out = [] + if arg0.info is None: + return None + void_type = self.types_mngr.void_ptr + out = set() ptr_offset = int(arg1.expr) - for name in arg0.info: - assert isinstance(name.ctype, ObjCPtr) - ptr_basetype = name.ctype.objtype + for info in arg0.info: + if isinstance(info.ctype, ObjCArray): + field_type = info.ctype + elif isinstance(info.ctype, ObjCPtr): + field_type = info.ctype.objtype + else: + continue + target_type = info.ctype.objtype + # Array-like: int* ptr; ptr[1] = X - ret = self.cgen_access(name, - ptr_basetype, - ptr_offset, False, lvl) - for subcgenobj in ret: - out.append(subcgenobj) + out.update(self.cgen_access(info, field_type, ptr_offset, False, lvl)) return out - def reduce_mem(self, node, lvl): + def reduce_mem(self, node, lvl=0, **kwargs): """Generate access for ExprMem: * @NN[ptr<elem>] -> elem (type) * @64[ptr<ptr<elem>>] -> ptr<elem> @@ -1045,44 +1006,61 @@ class ExprToAccessC(ExprReducer): return None if node.arg.info is None: return None - assert isinstance(node.arg.info, list) - found = [] + assert isinstance(node.arg.info, set) + void_type = self.types_mngr.void_ptr + found = set() for subcgenobj in node.arg.info: - if not isinstance(subcgenobj.ctype, ObjCPtr): - return None - target = subcgenobj.ctype.objtype - # target : type(elem) - if isinstance(target, (ObjCStruct, ObjCUnion)): - for finalcgenobj in self.cgen_access(subcgenobj, target, 0, True, lvl): - target = finalcgenobj.ctype.objtype - if not(self.enforce_strict_access) or target.size == node.expr.size / 8: - nobj = CGenDeref(finalcgenobj) - found.append(nobj) - elif isinstance(target, ObjCArray): - final = target.objtype - if not(self.enforce_strict_access) or final.size == node.expr.size / 8: - nobj = CGenDeref(subcgenobj) - found.append(nobj) - - else: - if not(self.enforce_strict_access) or target.size == node.expr.size / 8: - nobj = CGenDeref(subcgenobj) - found.append(nobj) - assert found + if isinstance(subcgenobj.ctype, ObjCArray): + nobj = CGenArray(subcgenobj, 0, + void_type.align, + void_type.size) + target = nobj.ctype.objtype + for finalcgenobj in self.cgen_access(nobj, target, 0, True, lvl): + assert isinstance(finalcgenobj.ctype, ObjCPtr) + if self.enforce_strict_access and finalcgenobj.ctype.objtype.size != node.expr.size / 8: + continue + found.add(CGenDeref(finalcgenobj)) + + elif isinstance(subcgenobj.ctype, ObjCPtr): + target = subcgenobj.ctype.objtype + # target : type(elem) + if isinstance(target, (ObjCStruct, ObjCUnion)): + for finalcgenobj in self.cgen_access(subcgenobj, target, 0, True, lvl): + target = finalcgenobj.ctype.objtype + if self.enforce_strict_access and target.size != node.expr.size / 8: + continue + found.add(CGenDeref(finalcgenobj)) + elif isinstance(target, ObjCArray): + if self.enforce_strict_access and subcgenobj.ctype.size != node.expr.size / 8: + continue + found.update(self.cgen_access(CGenDeref(subcgenobj), target, + 0, False, lvl)) + else: + if self.enforce_strict_access and target.size != node.expr.size / 8: + continue + found.add(CGenDeref(subcgenobj)) + if not found: + return None return found - reduction_rules = [reduce_id, + reduction_rules = [reduce_known_expr, reduce_int, reduce_op, reduce_mem, ] - def get_access(self, expr): + def get_accesses(self, expr, expr_context=None): """Generate C access(es) for the native Miasm expression @expr @expr: native Miasm expression + @expr_context: a dictionnary linking known expressions to their + types. An expression is linked to a tuple of types. """ - - return self.reduce(expr) + if expr_context is None: + expr_context = self.expr_types + ret = self.reduce(expr, ctxt=expr_context) + if ret.info is None: + return set() + return ret.info class ExprCToExpr(ExprReducer): @@ -1145,25 +1123,25 @@ class ExprCToExpr(ExprReducer): CST = "CST" - def reduce_id(self, node, _): - """Reduce ExprId""" - if not isinstance(node.expr, ExprId): - return None - if node.expr.name in self.expr_types: - objc = self.expr_types[node.expr.name] + def reduce_known_expr(self, node, ctxt, **kwargs): + """Reduce known expressions""" + if str(node.expr) in ctxt: + objc = ctxt[str(node.expr)] out = (node.expr, objc) - else: + elif node.expr.is_id(): out = (node.expr, None) + else: + out = None return out - def reduce_int(self, node, _): + def reduce_int(self, node, **kwargs): """Reduce ExprInt""" if not isinstance(node.expr, ExprInt): return None return self.CST - def reduce_op_memberof(self, node, _): + def reduce_op_memberof(self, node, **kwargs): """Reduce -> operator""" if not node.expr.is_op('->'): @@ -1173,6 +1151,8 @@ class ExprCToExpr(ExprReducer): assert isinstance(node.args[1].expr, ExprId) field = node.args[1].expr.name src, src_type = node.args[0].info + if src_type is None: + return None assert isinstance(src_type, (ObjCPtr, ObjCArray)) struct_dst = src_type.objtype assert isinstance(struct_dst, ObjCStruct) @@ -1192,7 +1172,7 @@ class ExprCToExpr(ExprReducer): assert found return out - def reduce_op_field(self, node, _): + def reduce_op_field(self, node, **kwargs): """Reduce field operator (Struct or Union)""" if not node.expr.is_op('field'): @@ -1245,7 +1225,7 @@ class ExprCToExpr(ExprReducer): assert found return out - def reduce_op_array(self, node, _): + def reduce_op_array(self, node, **kwargs): """Reduce array operator""" if not node.expr.is_op('[]'): @@ -1282,7 +1262,7 @@ class ExprCToExpr(ExprReducer): out = (expr, objtype) return out - def reduce_op_addr(self, node, _): + def reduce_op_addr(self, node, **kwargs): """Reduce addr operator""" if not node.expr.is_op('addr'): @@ -1309,7 +1289,7 @@ class ExprCToExpr(ExprReducer): raise NotImplementedError("unk type") return out - def reduce_op_deref(self, node, _): + def reduce_op_deref(self, node, **kwargs): """Reduce deref operator""" if not node.expr.is_op('deref'): @@ -1317,11 +1297,19 @@ class ExprCToExpr(ExprReducer): out = [] src, src_type = node.args[0].info assert isinstance(src_type, (ObjCPtr, ObjCArray)) - size = src_type.objtype.size * 8 - out = (ExprMem(src, size), (src_type.objtype)) + void_type = self.types_mngr.void_ptr + if isinstance(src_type, ObjCPtr): + if isinstance(src_type.objtype, ObjCArray): + size = void_type.size*8 + else: + size = src_type.objtype.size * 8 + out = (ExprMem(src, size), (src_type.objtype)) + else: + size = src_type.objtype.size * 8 + out = (ExprMem(src, size), (src_type.objtype)) return out - reduction_rules = [reduce_id, + reduction_rules = [reduce_known_expr, reduce_int, reduce_op_memberof, reduce_op_field, @@ -1330,14 +1318,17 @@ class ExprCToExpr(ExprReducer): reduce_op_deref, ] - def get_expr(self, expr): + def get_expr(self, expr, c_context): """Translate a Miasm expression @expr (representing a C access) into a - native Miasm expression and its C type - + tuple composed of a native Miasm expression and its C type. @expr: Miasm expression (representing a C access) + @c_context: a dictionnary linking known tokens (strings) to their + types. A token is linked to only one type. """ - - return self.reduce(expr) + ret = self.reduce(expr, ctxt=c_context) + if ret.info is None: + return (None, None) + return ret.info class CTypesManager(object): @@ -1370,22 +1361,22 @@ class CTypesManager(object): out = self.leaf_types.types.get(type_id, None) assert out is not None elif isinstance(type_id, CTypeUnion): - out = ObjCUnion(type_id.name) + args = [] align_max, size_max = 0, 0 for name, field in type_id.fields: objc = self._get_objc(field, resolved, to_fix, lvl + 1) resolved[field] = objc align_max = max(align_max, objc.align) size_max = max(size_max, objc.size) - out.add_field(name, objc, 0, objc.size) + args.append((name, objc, 0, objc.size)) align, size = self.union_compute_align_size(align_max, size_max) - out.set_align_size(align, size) + out = ObjCUnion(type_id.name, align, size, args) elif isinstance(type_id, CTypeStruct): - out = ObjCStruct(type_id.name) align_max, size_max = 0, 0 + args = [] offset, align_max = 0, 1 pad_index = 0 for name, field in type_id.fields: @@ -1398,13 +1389,13 @@ class CTypesManager(object): pad_index += 1 size = new_offset - offset pad_objc = self._get_objc(CTypeArray(self.padding, size), resolved, to_fix, lvl + 1) - out.add_field(pad_name, pad_objc, offset, pad_objc.size) + args.append((pad_name, pad_objc, offset, pad_objc.size)) offset = new_offset - out.add_field(name, objc, offset, objc.size) + args.append((name, objc, offset, objc.size)) offset += objc.size align, size = self.struct_compute_align_size(align_max, offset) - out.set_align_size(align, size) + out = ObjCStruct(type_id.name, align, size, args) elif isinstance(type_id, CTypePtr): target = type_id.target @@ -1434,10 +1425,10 @@ class CTypesManager(object): type_id.type_ret, resolved, to_fix, lvl + 1) resolved[type_id.type_ret] = type_ret args = [] - for arg in type_id.args: + for name, arg in type_id.args: objc = self._get_objc(arg, resolved, to_fix, lvl + 1) resolved[arg] = objc - args.append(objc) + args.append((name, objc)) out = ObjCFunc(type_id.name, type_id.abi, type_ret, args, self.void_ptr.align, self.void_ptr.size) elif isinstance(type_id, CTypeEllipsis): @@ -1486,7 +1477,7 @@ class CTypesManager(object): return True elif isinstance(objc, ObjCFunc): assert self.check_objc(objc.type_ret, done) - for arg in objc.args: + for name, arg in objc.args: assert self.check_objc(arg, done) return True else: @@ -1583,18 +1574,16 @@ class CHandler(object): """ exprCToExpr_cls = ExprCToExpr - cTypeAnalyzer_cls = CTypeAnalyzer exprToAccessC_cls = ExprToAccessC def __init__(self, types_mngr, expr_types, simplify_c=access_simplifier, enforce_strict_access=True): self.exprc2expr = self.exprCToExpr_cls(expr_types, types_mngr) - self.type_analyzer = self.cTypeAnalyzer_cls(expr_types, types_mngr, - enforce_strict_access) self.access_c_gen = self.exprToAccessC_cls(expr_types, types_mngr, enforce_strict_access) + self.types_mngr = types_mngr self.simplify_c = simplify_c self.expr_types = expr_types @@ -1605,41 +1594,75 @@ class CHandler(object): self.expr_types = expr_types self.exprc2expr.updt_expr_types(expr_types) - self.type_analyzer.updt_expr_types(expr_types) self.access_c_gen.updt_expr_types(expr_types) - def expr_to_c(self, expr): - """Convert a Miasm @expr into it's C equivatlent string - @expr: Miasm expression""" + def expr_to_c_access(self, expr, expr_context=None): + """Generate the C access object(s) for a given native Miasm expression. + @expr: Miasm expression + @expr_context: a dictionnary linking known expressions to a set of types + """ + + if expr_context is None: + expr_context = self.expr_types + return self.access_c_gen.get_accesses(expr, expr_context) + + + def expr_to_c_and_types(self, expr, expr_context=None): + """Generate the C access string and corresponding type for a given + native Miasm expression. + @expr_context: a dictionnary linking known expressions to a set of types + """ + + accesses = set() + for access in self.expr_to_c_access(expr, expr_context): + c_str = access_str(access.to_expr().visit(self.simplify_c)) + accesses.add((c_str, access.ctype)) + return accesses + + def expr_to_c(self, expr, expr_context=None): + """Convert a Miasm @expr into it's C equivalent string + @expr_context: a dictionnary linking known expressions to a set of types + """ - expr_access = self.access_c_gen.get_access(expr) - accesses = [access for access in expr_access.info] - accesses_simp = [access_str(access.to_expr().visit(self.simplify_c)) - for access in accesses] - return accesses_simp + return set(access[0] + for access in self.expr_to_c_and_types(expr, expr_context)) - def expr_to_types(self, expr): + def expr_to_types(self, expr, expr_context=None): """Get the possible types of the Miasm @expr - @expr: Miasm expression""" + @expr_context: a dictionnary linking known expressions to a set of types + """ - return self.type_analyzer.get_type(expr).info + return set(access.ctype + for access in self.expr_to_c_access(expr, expr_context)) - def c_to_expr(self, c_str): - """Convert a C string expression to a Miasm expression - @c_str: C string""" + def c_to_expr_and_type(self, c_str, c_context): + """Convert a C string expression to a Miasm expression and it's + corresponding c type + @c_str: C string + @c_context: a dictionnary linking known tokens (strings) to its type. + """ ast = parse_access(c_str) - access_c = ast_get_c_access_expr(ast, self.expr_types) - return self.exprc2expr.get_expr(access_c).info[0] + access_c = ast_get_c_access_expr(ast, c_context) + return self.exprc2expr.get_expr(access_c, c_context) + + def c_to_expr(self, c_str, c_context): + """Convert a C string expression to a Miasm expression + @c_str: C string + @c_context: a dictionnary linking known tokens (strings) to its type. + """ + + expr, _ = self.c_to_expr_and_type(c_str, c_context) + return expr - def c_to_type(self, c_str): + def c_to_type(self, c_str, c_context): """Get the type of a C string expression - @expr: Miasm expression""" + @expr: Miasm expression + @c_context: a dictionnary linking known tokens (strings) to its type. + """ - ast = parse_access(c_str) - access_c = ast_get_c_access_expr(ast, self.expr_types) - ret_type = self.exprc2expr.get_expr(access_c).info[1] - return ret_type + _, ctype = self.c_to_expr_and_type(c_str, c_context) + return ctype class CLeafTypes(object): |