diff options
| -rw-r--r-- | example/expression/access_c.py | 106 | ||||
| -rw-r--r-- | example/expression/expr_c.py | 94 | ||||
| -rw-r--r-- | miasm2/arch/x86/ctype.py | 137 | ||||
| -rw-r--r-- | miasm2/core/ctypesmngr.py | 898 | ||||
| -rw-r--r-- | miasm2/core/objc.py | 493 |
5 files changed, 1233 insertions, 495 deletions
diff --git a/example/expression/access_c.py b/example/expression/access_c.py index 923a3331..9ba33822 100644 --- a/example/expression/access_c.py +++ b/example/expression/access_c.py @@ -8,9 +8,8 @@ from miasm2.analysis.depgraph import DependencyGraph from miasm2.arch.x86.ctype import CTypeAMD64_unk from miasm2.core.objc import CTypeAnalyzer, ExprToAccessC, CHandler -from miasm2.core.objc import ObjCPtr -from miasm2.core.ctypesmngr import CTypesManagerNotPacked - +from miasm2.core.objc import CTypesManagerNotPacked +from miasm2.core.ctypesmngr import CAstTypes, CTypePtr, CTypeStruct """ @@ -52,9 +51,9 @@ ExprCompose(int, 0) => int Then, in the C generator: ExprCompose(var1, 0) => var1 - """ + def find_call(ira): """Returns (irb, index) which call""" @@ -76,6 +75,7 @@ def find_call(ira): class MyCTypeAnalyzer(CTypeAnalyzer): + """Custom CTypeAnalyzer to complete type analysis""" def reduce_compose(self, node, _): """Custom reduction rule: {XXX, 0} -> typeof(XXX)""" @@ -89,6 +89,7 @@ class MyCTypeAnalyzer(CTypeAnalyzer): class MyExprToAccessC(ExprToAccessC): + """Custom ExprToAccessC to complete expression traduction to C""" def reduce_compose(self, node, _): """Custom reduction rule: {XXX, 0} -> XXX""" @@ -105,6 +106,7 @@ class MyExprToAccessC(ExprToAccessC): def get_funcs_arg0(ctx, ira, lbl_head): + """Compute DependencyGraph on the func @lbl_head""" g_dep = DependencyGraph(ira, follow_call=False) element = ira.arch.regs.RSI @@ -119,70 +121,66 @@ def get_funcs_arg0(ctx, ira, lbl_head): class MyCHandler(CHandler): + """Custom CHandler to add complementary C handling rules""" + cTypeAnalyzer_cls = MyCTypeAnalyzer exprToAccessC_cls = MyExprToAccessC -def test(data): - # Digest C informations - text = """ - struct human { - unsigned short age; - unsigned int height; - char name[50]; - }; - - struct ll_human { - struct ll_human* next; - struct human human; - }; - """ - my_types = CTypeAMD64_unk() - types_mngr = CTypesManagerNotPacked(my_types.types) +data = open(sys.argv[1]).read() +# Digest C informations +text = """ +struct human { + unsigned short age; + unsigned int height; + char name[50]; +}; - types_mngr.add_c_decl(text) +struct ll_human { + struct ll_human* next; + struct human human; +}; +""" - # Analyze binary - cont = Container.fallback_container(data, None, addr=0) +base_types = CTypeAMD64_unk() +types_ast = CAstTypes() +types_ast.add_c_decl(text) - machine = Machine("x86_64") - dis_engine, ira = machine.dis_engine, machine.ira +types_mngr = CTypesManagerNotPacked(types_ast, base_types) - mdis = dis_engine(cont.bin_stream, symbol_pool=cont.symbol_pool) - addr_head = 0 - blocks = mdis.dis_multibloc(addr_head) - lbl_head = mdis.symbol_pool.getby_offset(addr_head) +# Analyze binary +cont = Container.fallback_container(data, None, addr=0) - ir_arch_a = ira(mdis.symbol_pool) - for block in blocks: - ir_arch_a.add_bloc(block) +machine = Machine("x86_64") +dis_engine, ira = machine.dis_engine, machine.ira - open('graph_irflow.dot', 'w').write(ir_arch_a.graph.dot()) +mdis = dis_engine(cont.bin_stream, symbol_pool=cont.symbol_pool) +addr_head = 0 +blocks = mdis.dis_multibloc(addr_head) +lbl_head = mdis.symbol_pool.getby_offset(addr_head) - # Main function's first argument's type is "struct ll_human*" - void_ptr = types_mngr.void_ptr - ll_human = types_mngr.get_type(('ll_human',)) - ptr_llhuman = ObjCPtr('noname', ll_human, - void_ptr.align, void_ptr.size) +ir_arch_a = ira(mdis.symbol_pool) +for block in blocks: + ir_arch_a.add_bloc(block) - arg0 = ExprId('ptr', 64) - ctx = {ir_arch_a.arch.regs.RDI: arg0} - expr_types = {arg0.name: ptr_llhuman} +open('graph_irflow.dot', 'w').write(ir_arch_a.graph.dot()) - mychandler = MyCHandler(types_mngr, expr_types) +# Main function's first argument's type is "struct ll_human*" +ptr_llhuman = types_mngr.get_objc(CTypePtr(CTypeStruct('ll_human'))) +arg0 = ExprId('ptr', 64) +ctx = {ir_arch_a.arch.regs.RDI: arg0} +expr_types = {arg0.name: ptr_llhuman} - for expr in get_funcs_arg0(ctx, ir_arch_a, lbl_head): - print "Access:", expr - target_types = mychandler.expr_to_types(expr) - for target_type in target_types: - print '\tType:', target_type - c_strs = mychandler.expr_to_c(expr) - for c_str in c_strs: - print "\tC access:", c_str - print +mychandler = MyCHandler(types_mngr, expr_types) +for expr in get_funcs_arg0(ctx, ir_arch_a, lbl_head): + print "Access:", expr + target_types = mychandler.expr_to_types(expr) + for target_type in target_types: + print '\tType:', target_type + c_strs = mychandler.expr_to_c(expr) + for c_str in c_strs: + print "\tC access:", c_str + print -if __name__ == '__main__': - data = open(sys.argv[1]).read() - test(data) diff --git a/example/expression/expr_c.py b/example/expression/expr_c.py index b3e59658..7adc7b50 100644 --- a/example/expression/expr_c.py +++ b/example/expression/expr_c.py @@ -4,67 +4,59 @@ Parse C expression to access variables and retrieve information: * variable type """ -from miasm2.core.ctypesmngr import CTypesManagerNotPacked +from miasm2.core.ctypesmngr import CTypeStruct, CAstTypes, CTypePtr from miasm2.arch.x86.ctype import CTypeAMD64_unk -from miasm2.core.objc import CHandler -from miasm2.core.objc import ObjCPtr +from miasm2.core.objc import CTypesManagerNotPacked, CHandler from miasm2.expression.expression import ExprId -def test(): - """ - C manipulation example - """ - - # Digest C informations - text = """ - struct line { - char color[20]; - int size; - }; - - struct rectangle { - unsigned int width; - unsigned int length; - struct line* line; - }; - """ - - # Type manager for x86 64: structures not packed - my_types = CTypeAMD64_unk() - types_mngr = CTypesManagerNotPacked(my_types.types) +""" +C manipulation example +""" - # Add C types definition - types_mngr.add_c_decl(text) +# Digest C informations +text = """ +struct line { + char color[20]; + int size; +}; + +struct rectangle { + unsigned int width; + unsigned int length; + struct line* line; +}; +""" - # Create the ptr variable with type "struct rectangle*" - void_ptr = types_mngr.void_ptr - rectangle = types_mngr.get_type(('rectangle',)) - ptr_rectangle = ObjCPtr('noname', rectangle, - void_ptr.align, void_ptr.size) +# Type manager for x86 64: structures not packed +base_types = CTypeAMD64_unk() +types_ast = CAstTypes() +# Add C types definition +types_ast.add_c_decl(text) - ptr = ExprId('ptr', 64) - expr_types = {ptr.name: ptr_rectangle} +types_mngr = CTypesManagerNotPacked(types_ast, base_types) - mychandler = CHandler(types_mngr, expr_types) +# Create the ptr variable with type "struct rectangle*" +ptr_rectangle = types_mngr.get_objc(CTypePtr(CTypeStruct('rectangle'))) +ptr = ExprId('ptr', 64) +expr_types = {ptr.name: ptr_rectangle} - # Parse some C accesses - c_acceses = ["ptr->width", - "ptr->length", - "ptr->line", - "ptr->line->color", - "ptr->line->color[3]", - "ptr->line->size" - ] +mychandler = CHandler(types_mngr, expr_types) - for c_str in c_acceses: - expr = mychandler.c_to_expr(c_str) - c_type = mychandler.c_to_type(c_str) - print 'C access:', c_str - print '\tExpr:', expr - print '\tType:', c_type +# Parse some C accesses +c_acceses = ["ptr->width", + "ptr->length", + "ptr->line", + "ptr->line->color", + "ptr->line->color[3]", + "ptr->line->size" + ] -if __name__ == '__main__': - test() +for c_str in c_acceses: + expr = mychandler.c_to_expr(c_str) + c_type = mychandler.c_to_type(c_str) + print 'C access:', c_str + print '\tExpr:', expr + print '\tType:', c_type diff --git a/miasm2/arch/x86/ctype.py b/miasm2/arch/x86/ctype.py index 6b5844d7..0d8cd924 100644 --- a/miasm2/arch/x86/ctype.py +++ b/miasm2/arch/x86/ctype.py @@ -1,7 +1,8 @@ -from miasm2.core.objc import CTypeTemplate, ObjCDecl +from miasm2.core.objc import CLeafTypes, ObjCDecl +from miasm2.core.ctypesmngr import CTypeId, CTypePtr -class CTypeAMD64_unk(CTypeTemplate): +class CTypeAMD64_unk(CLeafTypes): """Define C types sizes/alignement for x86_64 architecture""" obj_char = ObjCDecl("char", 1, 1) @@ -13,38 +14,116 @@ class CTypeAMD64_unk(CTypeTemplate): obj_ushort = ObjCDecl("ushort", 2, 2) obj_uint = ObjCDecl("uint", 4, 4) obj_ulong = ObjCDecl("ulong", 8, 8) + + obj_void = ObjCDecl("void", 1, 1) + + obj_enum = ObjCDecl("enum", 4, 4) + + obj_float = ObjCDecl("float", 4, 4) + obj_double = ObjCDecl("double", 8, 8) + obj_ldouble = ObjCDecl("ldouble", 16, 16) + + def __init__(self): + self.types = { + CTypeId('char'): self.obj_char, + CTypeId('short'): self.obj_short, + CTypeId('int'): self.obj_int, + CTypeId('void'): self.obj_void, + CTypeId('long',): self.obj_long, + CTypeId('float'): self.obj_float, + CTypeId('double'): self.obj_double, + + CTypeId('signed', 'char'): self.obj_char, + CTypeId('unsigned', 'char'): self.obj_uchar, + + CTypeId('short', 'int'): self.obj_short, + CTypeId('signed', 'short'): self.obj_short, + CTypeId('signed', 'short', 'int'): self.obj_short, + CTypeId('unsigned', 'short'): self.obj_ushort, + CTypeId('unsigned', 'short', 'int'): self.obj_ushort, + + CTypeId('unsigned', ): self.obj_uint, + CTypeId('unsigned', 'int'): self.obj_uint, + CTypeId('signed', 'int'): self.obj_int, + + CTypeId('long', 'int'): self.obj_long, + CTypeId('long', 'long'): self.obj_long, + CTypeId('long', 'long', 'int'): self.obj_long, + CTypeId('signed', 'long', 'long'): self.obj_long, + CTypeId('unsigned', 'long', 'long'): self.obj_ulong, + CTypeId('signed', 'long', 'long', 'int'): self.obj_long, + CTypeId('unsigned', 'long', 'long', 'int'): self.obj_ulong, + + CTypeId('signed', 'long'): self.obj_long, + CTypeId('unsigned', 'long'): self.obj_ulong, + CTypeId('signed', 'long', 'int'): self.obj_long, + CTypeId('unsigned', 'long', 'int'): self.obj_ulong, + + CTypeId('long', 'double'): self.obj_ldouble, + CTypePtr(CTypeId('void')): self.obj_ulong, + } + + + + + +class CTypeX86_unk(CLeafTypes): + """Define C types sizes/alignement for x86_64 architecture""" + + obj_char = ObjCDecl("char", 1, 1) + obj_short = ObjCDecl("short", 2, 2) + obj_int = ObjCDecl("int", 4, 4) + obj_long = ObjCDecl("long", 4, 4) + + obj_uchar = ObjCDecl("uchar", 1, 1) + obj_ushort = ObjCDecl("ushort", 2, 2) + obj_uint = ObjCDecl("uint", 4, 4) + obj_ulong = ObjCDecl("ulong", 4, 4) + obj_void = ObjCDecl("void", 1, 1) obj_enum = ObjCDecl("enum", 4, 4) + obj_float = ObjCDecl("float", 4, 4) + obj_double = ObjCDecl("double", 8, 8) + obj_ldouble = ObjCDecl("ldouble", 16, 16) def __init__(self): self.types = { - ('char',): self.obj_char, - ('short',): self.obj_short, - ('int',): self.obj_int, - ('void',): self.obj_void, - ('enum',): self.obj_enum, - - ('signed', 'char'): self.obj_char, - ('unsigned', 'char'): self.obj_uchar, - ('signed', 'short', 'int'): self.obj_short, - ('short', 'int'): self.obj_short, - ('unsigned', 'short'): self.obj_ushort, - ('unsigned', 'short', 'int'): self.obj_ushort, - ('signed', 'int'): self.obj_int, - ('unsigned', 'int'): self.obj_uint, - ('long', 'int'): self.obj_long, - ('unsigned', 'long'): self.obj_ulong, - ('signed', 'long', 'int'): self.obj_long, - ('unsigned', 'long', 'int'): self.obj_ulong, - ('long',): self.obj_long, - ('unsigned', ): self.obj_uint, - - ('signed', 'long', 'long', 'int'): self.obj_long, - ('long', 'unsigned', 'int'): self.obj_ulong, - ('unsigned', 'long', 'long'): self.obj_ulong, - ('long', 'long', 'int'): self.obj_long, - ('unsigned', 'long', 'long', 'int'): self.obj_ulong, - ('void*',): self.obj_ulong, + CTypeId('char'): self.obj_char, + CTypeId('short'): self.obj_short, + CTypeId('int'): self.obj_int, + CTypeId('void'): self.obj_void, + CTypeId('long',): self.obj_long, + CTypeId('float'): self.obj_float, + CTypeId('double'): self.obj_double, + + CTypeId('signed', 'char'): self.obj_char, + CTypeId('unsigned', 'char'): self.obj_uchar, + + CTypeId('short', 'int'): self.obj_short, + CTypeId('signed', 'short'): self.obj_short, + CTypeId('signed', 'short', 'int'): self.obj_short, + CTypeId('unsigned', 'short'): self.obj_ushort, + CTypeId('unsigned', 'short', 'int'): self.obj_ushort, + + CTypeId('unsigned', ): self.obj_uint, + CTypeId('unsigned', 'int'): self.obj_uint, + CTypeId('signed', 'int'): self.obj_int, + + CTypeId('long', 'int'): self.obj_long, + CTypeId('long', 'long'): self.obj_long, + CTypeId('long', 'long', 'int'): self.obj_long, + CTypeId('signed', 'long', 'long'): self.obj_long, + CTypeId('unsigned', 'long', 'long'): self.obj_ulong, + CTypeId('signed', 'long', 'long', 'int'): self.obj_long, + CTypeId('unsigned', 'long', 'long', 'int'): self.obj_ulong, + + CTypeId('signed', 'long'): self.obj_long, + CTypeId('unsigned', 'long'): self.obj_ulong, + CTypeId('signed', 'long', 'int'): self.obj_long, + CTypeId('unsigned', 'long', 'int'): self.obj_ulong, + + CTypeId('long', 'double'): self.obj_ldouble, + CTypePtr(CTypeId('void')): self.obj_uint, } diff --git a/miasm2/core/ctypesmngr.py b/miasm2/core/ctypesmngr.py index 518c328d..0c1d55f4 100644 --- a/miasm2/core/ctypesmngr.py +++ b/miasm2/core/ctypesmngr.py @@ -1,66 +1,388 @@ -from pycparser import c_ast -from miasm2.core.objc import ObjCStruct, ObjCUnion, ObjCDecl, ObjCPtr, \ - ObjCArray, _ObjCRecurse, c_to_ast +import re +from pycparser import c_parser, c_ast -def fix_recursive_objects(types_mngr, obj): - """Replace _ObjCRecurse objects by its parent""" +RE_HASH_CMT = re.compile(r'^#\s*\d+.*$', flags=re.MULTILINE) - void_type = types_mngr.void_ptr +# Ref: ISO/IEC 9899:TC2 +# http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1124.pdf + + +def c_to_ast(c_str): + """Transform a @c_str into a C ast + Note: will ignore lines containing code refs ie: + # 23 "miasm.h" + """ + + new_str = re.sub(RE_HASH_CMT, "", c_str) + parser = c_parser.CParser() + return parser.parse(new_str, filename='<stdin>') + + +class CTypeBase(object): + """Object to represent the 3 forms of C type: + * object types + * function types + * incomplete types + """ + + def __init__(self): + self.__repr = str(self) + self.__hash = hash(self.__repr) + + @property + def _typerepr(self): + return self.__repr + + def eq_base(self, other): + """Trivial common equality test""" + return self.__class__ == other.__class__ + + def __hash__(self): + return self.__hash + + def __repr__(self): + return self._typerepr + + +class CTypeId(CTypeBase): + """C type id: + int + unsigned int + """ + + def __init__(self, *names): + # Type specifier order does not matter + # so the cannonical form is ordered + self.names = tuple(sorted(names)) + super(CTypeId, self).__init__() + + def __hash__(self): + return hash((self.__class__, self.names)) + + def __eq__(self, other): + return (self.eq_base(other) and + self.names == other.names) + + def __str__(self): + return "<Id:%s>" % ', '.join(self.names) + + +class CTypeArray(CTypeBase): + """C type for array: + typedef int XXX[4]; + """ + + def __init__(self, target, size): + assert isinstance(target, CTypeBase) + self.target = target + self.size = size + super(CTypeArray, self).__init__() + + def __hash__(self): + return hash((self.__class__, self.target, self.size)) + + def __eq__(self, other): + return (self.eq_base(other) and + self.target == other.target and + self.size == other.size) + + def __str__(self): + return "<Array[%s]:%s>" % (self.size, str(self.target)) + + +class CTypePtr(CTypeBase): + """C type for pointer: + typedef int* XXX; + """ + + def __init__(self, target): + assert isinstance(target, CTypeBase) + self.target = target + super(CTypePtr, self).__init__() + + def __hash__(self): + return hash((self.__class__, self.target)) + + def __eq__(self, other): + return (self.eq_base(other) and + self.target == other.target) + + def __str__(self): + return "<Ptr:%s>" % str(self.target) + + +class CTypeStruct(CTypeBase): + """C type for structure""" + + def __init__(self, name, fields=None): + self.name = name + if fields is None: + fields = () + for _, field in fields: + assert isinstance(field, CTypeBase) + self.fields = tuple(fields) + super(CTypeStruct, self).__init__() + + def __hash__(self): + return hash((self.__class__, self.name, self.fields)) + + def __eq__(self, other): + return (self.eq_base(other) and + self.name == other.name and + self.fields == other.fields) + + def __str__(self): + out = [] + out.append("<Struct:%s>" % self.name) + for name, field in self.fields: + out.append("\t%-10s %s" % (name, field)) + return '\n'.join(out) + + +class CTypeUnion(CTypeBase): + """C type for union""" + + def __init__(self, name, fields=None): + self.name = name + if fields is None: + fields = [] + for _, field in fields: + assert isinstance(field, CTypeBase) + self.fields = tuple(fields) + super(CTypeUnion, self).__init__() + + def __hash__(self): + return hash((self.__class__, self.name, self.fields)) + + def __eq__(self, other): + return (self.eq_base(other) and + self.name == other.name and + self.fields == other.fields) + + def __str__(self): + out = [] + out.append("<Union:%s>" % self.name) + for name, field in self.fields: + out.append("\t%-10s %s" % (name, field)) + return '\n'.join(out) + + +class CTypeEnum(CTypeBase): + """C type for enums""" + + def __init__(self, name): + self.name = name + super(CTypeEnum, self).__init__() + + def __hash__(self): + return hash((self.__class__, self.name)) + + def __eq__(self, other): + return (self.eq_base(other) and + self.name == other.name) + + def __str__(self): + return "<Enum:%s>" % self.name + + +class CTypeFunc(CTypeBase): + """C type for enums""" + + def __init__(self, name, abi=None, type_ret=None, args=None): + if type_ret: + assert isinstance(type_ret, CTypeBase) + if args: + for arg in args: + assert isinstance(arg, CTypeBase) + args = tuple(args) + else: + args = tuple() + self.name = name + self.abi = abi + self.type_ret = type_ret + self.args = args + super(CTypeFunc, self).__init__() + + def __hash__(self): + return hash((self.__class__, self.name, self.abi, + self.type_ret, self.args)) + + def __eq__(self, other): + return (self.eq_base(other) and + self.name == other.name and + self.abi == other.abi and + self.type_ret == other.type_ret and + self.args == other.args) + + def __str__(self): + return "<Func:%s (%s) %s(%s)>" % (self.type_ret, + self.abi, + self.name, + ", ".join([str(arg) for arg in self.args])) + + +class CTypeEllipsis(CTypeBase): + """C type for ellipsis argument (...)""" + + def __hash__(self): + return hash((self.__class__)) + + def __eq__(self, other): + return self.eq_base(other) + + def __str__(self): + return "<Ellipsis>" + + +class CTypeSizeof(CTypeBase): + """C type for sizeof""" + + def __init__(self, target): + self.target = target + super(CTypeSizeof, self).__init__() + + def __hash__(self): + return hash((self.__class__, self.target)) + + def __eq__(self, other): + return (self.eq_base(other) and + self.target == other.target) + + def __str__(self): + return "<Sizeof(%s)>" % self.target + + +class CTypeOp(CTypeBase): + """C type for operator (+ * ...)""" + + def __init__(self, operator, *args): + self.operator = operator + self.args = tuple(args) + super(CTypeOp, self).__init__() + + def __hash__(self): + return hash((self.__class__, self.operator, self.args)) + + def __eq__(self, other): + return (self.eq_base(other) and + self.operator == other.operator and + self.args == other.args) + + def __str__(self): + return "<CTypeOp(%s, %s)>" % (self.operator, + ', '.join([str(arg) for arg in self.args])) + + +class FuncNameIdentifier(c_ast.NodeVisitor): + """Visit an c_ast to find IdentifierType""" + + def __init__(self): + super(FuncNameIdentifier, self).__init__() + self.node_name = None + + def visit_TypeDecl(self, node): + """Retrieve the name in a function declaration: + Only one IdentifierType is present""" + self.node_name = node + + +class CAstTypes(object): + """Store all defined C types and typedefs""" + INTERNAL_PREFIX = "__GENTYPE__" + + def __init__(self, knowntypes=None, knowntypedefs=None): + if knowntypes is None: + knowntypes = {} + if knowntypedefs is None: + knowntypedefs = {} - if isinstance(obj, ObjCStruct): - for i, (name, fieldtype, offset, size) in enumerate(obj.fields): - fieldtype = fix_recursive_objects(types_mngr, fieldtype) - obj.fields[i] = (name, fieldtype, offset, size) - elif isinstance(obj, ObjCDecl): - return obj - elif isinstance(obj, ObjCPtr): - target_obj = fix_recursive_objects(types_mngr, obj.objtype) - obj = ObjCPtr(obj.name, target_obj, void_type.align, void_type.size) - elif isinstance(obj, ObjCArray): - target_obj = fix_recursive_objects(types_mngr, obj.objtype) - obj = ObjCArray(target_obj, obj.elems) - elif isinstance(obj, ObjCUnion): - for i, (name, fieldtype, offset, size) in enumerate(obj.fields): - fieldtype = fix_recursive_objects(types_mngr, fieldtype) - obj.fields[i] = (name, fieldtype, offset, size) - elif isinstance(obj, _ObjCRecurse): - obj = types_mngr.get_type((obj.name,)) - else: - raise NotImplementedError("Unknown type") - return obj - - -class CTypesManager(object): - """Store all defined C types""" - - def __init__(self, knowntypes): self._types = dict(knowntypes) + self._typedefs = dict(knowntypedefs) self.cpt = 0 + self.loc_to_decl_info = {} + + self.ast_to_typeid_rules = { + c_ast.Struct: self.ast_to_typeid_struct, + c_ast.Union: self.ast_to_typeid_union, + c_ast.IdentifierType: self.ast_to_typeid_identifiertype, + c_ast.TypeDecl: self.ast_to_typeid_typedecl, + c_ast.Decl: self.ast_to_typeid_decl, + c_ast.Typename: self.ast_to_typeid_typename, + c_ast.FuncDecl: self.ast_to_typeid_funcdecl, + c_ast.Enum: self.ast_to_typeid_enum, + c_ast.PtrDecl: self.ast_to_typeid_ptrdecl, + c_ast.EllipsisParam: self.ast_to_typeid_ellipsisparam, + c_ast.ArrayDecl: self.ast_to_typeid_arraydecl, + } + + self.ast_parse_rules = { + c_ast.Struct: self.ast_parse_struct, + c_ast.Union: self.ast_parse_union, + c_ast.Typedef: self.ast_parse_typedef, + c_ast.TypeDecl: self.ast_parse_typedecl, + c_ast.IdentifierType: self.ast_parse_identifiertype, + c_ast.Decl: self.ast_parse_decl, + c_ast.PtrDecl: self.ast_parse_ptrdecl, + c_ast.Enum: self.ast_parse_enum, + c_ast.ArrayDecl: self.ast_parse_arraydecl, + c_ast.FuncDecl: self.ast_parse_funcdecl, + c_ast.FuncDef: self.ast_parse_funcdef, + c_ast.Pragma: self.ast_parse_pragma, + } def gen_uniq_name(self): """Generate uniq name for unamed strucs/union""" cpt = self.cpt self.cpt += 1 - return "__TYPE_INTERNAL__%d" % cpt + return self.INTERNAL_PREFIX + "%d" % cpt + + def is_generated_name(self, name): + """Return True if the name is internal""" + return name.startswith(self.INTERNAL_PREFIX) def add_type(self, type_id, type_obj): """Add new C type - @type_id: Type descriptor - @type_obj: ObjC* instance""" - self._types[type_id] = type_obj + @type_id: Type descriptor (CTypeBase instance) + @type_obj: Obj* instance""" + assert isinstance(type_id, CTypeBase) + if type_id in self._types: + assert self._types[type_id] == type_obj + else: + self._types[type_id] = type_obj + + def add_typedef(self, type_new, type_src): + """Add new typedef + @type_new: CTypeBase instance of the new type name + @type_src: CTypeBase instance of the target type""" + assert isinstance(type_src, CTypeBase) + self._typedefs[type_new] = type_src def get_type(self, type_id): - """Get C type - @type_id: Type descriptor + """Get ObjC corresponding to the @type_id + @type_id: Type descriptor (CTypeBase instance) """ - return self._types[type_id] + assert isinstance(type_id, CTypeBase) + if isinstance(type_id, CTypePtr): + subobj = self.get_type(type_id.target) + return CTypePtr(subobj) + if type_id in self._types: + return self._types[type_id] + elif type_id in self._typedefs: + return self.get_type(self._typedefs[type_id]) + return type_id def is_known_type(self, type_id): """Return true if @type_id is known - @type_id: Type descriptor + @type_id: Type descriptor (CTypeBase instance) """ - return type_id in self._types + if isinstance(type_id, CTypePtr): + return self.is_known_type(type_id.target) + if type_id in self._types: + return True + if type_id in self._typedefs: + return self.is_known_type(self._typedefs[type_id]) + return False def add_c_decl_from_ast(self, ast): """ @@ -69,6 +391,63 @@ class CTypesManager(object): """ self.ast_parse_declarations(ast) + + def digest_decl(self, c_str): + + char_id = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_" + + + # Seek deck + index_decl = [] + index = 0 + for decl in ['__cdecl__', '__stdcall__']: + index = 0 + while True: + index = c_str.find(decl, index) + if index == -1: + break + decl_off = index + decl_len = len(decl) + + index = index+len(decl) + while c_str[index] not in char_id: + index += 1 + + id_start = index + + while c_str[index] in char_id: + index += 1 + id_stop = index + + name = c_str[id_start:id_stop] + index_decl.append((decl_off, decl_len, id_start, id_stop, decl, )) + + index_decl.sort() + + # Remove decl + off = 0 + offsets = [] + for decl_off, decl_len, id_start, id_stop, decl in index_decl: + decl_off -= off + c_str = c_str[:decl_off] + c_str[decl_off+decl_len:] + off += decl_len + offsets.append((id_start-off, id_stop-off, decl)) + + index = 0 + lineno = 1 + + # Index to lineno, column + for id_start, id_stop, decl in offsets: + nbr = c_str.count('\n', index, id_start) + lineno += nbr + last_cr = c_str.rfind('\n', 0, id_start) + # column starts at 1 + column = id_start - last_cr + index = id_start + self.loc_to_decl_info[(lineno, column)] = decl + return c_str + + def add_c_decl(self, c_str): """ Adds types from a C string types declaring @@ -77,34 +456,13 @@ class CTypesManager(object): Returns the C ast @c_str: C string containing C types declarations """ + c_str = self.digest_decl(c_str) + ast = c_to_ast(c_str) self.add_c_decl_from_ast(ast) return ast - @property - def void_ptr(self): - """Return the void* type""" - return self.get_type(('void*',)) - - def ast_eval_size(self, ast): - """Evaluates the size of a C ast object - - @ast: parsed pycparser.c_ast object - """ - - if isinstance(ast, c_ast.TypeDecl): - result = self.ast_eval_size(ast.type) - elif isinstance(ast, c_ast.PtrDecl): - void_type = self.void_ptr - result = void_type.size - elif isinstance(ast, c_ast.IdentifierType): - obj = self.get_type(tuple(ast.names)) - result = obj.size - else: - raise NotImplementedError('TODO') - return result - def ast_eval_int(self, ast): """Eval a C ast object integer @@ -114,20 +472,32 @@ class CTypesManager(object): if isinstance(ast, c_ast.BinaryOp): left = self.ast_eval_int(ast.left) right = self.ast_eval_int(ast.right) - if ast.op == '*': - result = left * right - elif ast.op == '/': - assert left % right == 0 - result = left / right - elif ast.op == '+': - result = left + right - elif ast.op == '-': - result = left - right + is_pure_int = (isinstance(left, (int, long)) and + isinstance(right, (int, long))) + + if is_pure_int: + if ast.op == '*': + result = left * right + elif ast.op == '/': + assert left % right == 0 + result = left / right + elif ast.op == '+': + result = left + right + elif ast.op == '-': + result = left - right + elif ast.op == '<<': + result = left << right + elif ast.op == '>>': + result = left >> right + else: + raise NotImplementedError("Not implemented!") else: - raise NotImplementedError("Not implemented!") + result = CTypeOp(ast.op, left, right) + elif isinstance(ast, c_ast.UnaryOp): if ast.op == 'sizeof' and isinstance(ast.expr, c_ast.Typename): - result = self.ast_eval_size(ast.expr.type) + subobj = self.ast_to_typeid(ast.expr) + result = CTypeSizeof(subobj) else: raise NotImplementedError("Not implemented!") @@ -140,131 +510,180 @@ class CTypesManager(object): raise NotImplementedError("Not implemented!") return result - - def ast_get_align_size(self, ast): - """Evaluates the size/alignment of a C ast object - - @ast: parsed pycparser.c_ast object - """ - - if isinstance(ast, c_ast.Decl): - return self.ast_get_align_size(ast.type) - elif isinstance(ast, c_ast.TypeDecl): - return self.ast_get_align_size(ast.type) - elif isinstance(ast, c_ast.IdentifierType): - assert isinstance(ast, c_ast.IdentifierType) - names = ast.names - names = tuple(names) - if not self.is_known_type(names): - raise RuntimeError("Unknown type %r" % names) - obj = self.get_type(names) - elif isinstance(ast, c_ast.ArrayDecl): - subobj = self.ast_get_align_size(ast.type) - dim = ast.dim - value = self.ast_eval_int(dim) - obj = ObjCArray(subobj, value) - elif isinstance(ast, c_ast.Union): - obj = self.ast_gen_union_align_size(ast) - elif isinstance(ast, c_ast.Struct): - obj = self.ast_gen_struct_align_size(ast) - elif isinstance(ast, c_ast.PtrDecl): - void_type = self.void_ptr - subobj = self.ast_get_align_size(ast.type) - obj = ObjCPtr('noname', subobj, void_type.align, void_type.size) + def ast_to_typeid_struct(self, ast): + """Return the CTypeBase of an Struct ast""" + name = self.gen_uniq_name() if ast.name is None else ast.name + args = [] + if ast.decls: + for arg in ast.decls: + args.append((arg.name, self.ast_to_typeid(arg))) + decl = CTypeStruct(name, args) + return decl + + def ast_to_typeid_union(self, ast): + """Return the CTypeBase of an Union ast""" + name = self.gen_uniq_name() if ast.name is None else ast.name + args = [] + if ast.decls: + for arg in ast.decls: + args.append((arg.name, self.ast_to_typeid(arg))) + decl = CTypeUnion(name, args) + return decl + + def ast_to_typeid_identifiertype(self, ast): + """Return the CTypeBase of an IdentifierType ast""" + return CTypeId(*ast.names) + + def ast_to_typeid_typedecl(self, ast): + """Return the CTypeBase of a TypeDecl ast""" + return self.ast_to_typeid(ast.type) + + def ast_to_typeid_decl(self, ast): + """Return the CTypeBase of a Decl ast""" + return self.ast_to_typeid(ast.type) + + def ast_to_typeid_typename(self, ast): + """Return the CTypeBase of a TypeName ast""" + return self.ast_to_typeid(ast.type) + + def get_funcname(self, ast): + """Return the name of a function declaration ast""" + funcnameid = FuncNameIdentifier() + funcnameid.visit(ast) + node_name = funcnameid.node_name + if node_name.coord is not None: + lineno, column = node_name.coord.line, node_name.coord.column + decl_info = self.loc_to_decl_info.get((lineno, column), None) else: - raise NotImplementedError("Not implemented!") - assert isinstance(obj, _ObjCRecurse) or obj.align in [ - 1, 2, 4, 8, 16, 32, 64, 128, 256] - return obj - - def struct_compute_field_offset(self, obj, offset): - """Compute the offset of the field @obj in the current structure""" - raise NotImplementedError("Abstract method") - - def struct_compute_align_size(self, align_max, size): - """Compute the alignment and size of the current structure""" - raise NotImplementedError("Abstract method") + decl_info = None + return node_name.declname, decl_info + + def ast_to_typeid_funcdecl(self, ast): + """Return the CTypeBase of an FuncDecl ast""" + type_ret = self.ast_to_typeid(ast.type) + name, decl_info = self.get_funcname(ast.type) + if ast.args: + args = [self.ast_to_typeid(arg) for arg in ast.args.params] + else: + args = [] - def union_compute_align_size(self, align_max, size): - """Compute the alignment and size of the current union""" - raise NotImplementedError("Abstract method") + obj = CTypeFunc(name, decl_info, type_ret, args) + decl = CTypeFunc(name) + if not self.is_known_type(decl): + self.add_type(decl, obj) + return obj - def ast_gen_struct_align_size(self, ast): - """Evaluates the size/alignment of a C ast structure - (default packed) + def ast_to_typeid_enum(self, ast): + """Return the CTypeBase of an Enum ast""" + name = self.gen_uniq_name() if ast.name is None else ast.name + return CTypeEnum(name) - @ast: parsed pycparser.c_ast object - """ + def ast_to_typeid_ptrdecl(self, ast): + """Return the CTypeBase of a PtrDecl ast""" + return CTypePtr(self.ast_to_typeid(ast.type)) - offset = 0 - align_max = 1 + def ast_to_typeid_ellipsisparam(self, _): + """Return the CTypeBase of an EllipsisParam ast""" + return CTypeEllipsis() - if ast.name is None: - name = self.gen_uniq_name() + def ast_to_typeid_arraydecl(self, ast): + """Return the CTypeBase of an ArrayDecl ast""" + target = self.ast_to_typeid(ast.type) + if ast.dim is None: + value = None else: - name = ast.name - new_obj = ObjCStruct(name) - if ast.decls is None: - # If object is unknown, it's a recursive struct - if self.is_known_type((name,)): - obj = self.get_type((name,)) - else: - obj = _ObjCRecurse(name) - return obj - for arg in ast.decls: - obj = self.ast_get_align_size(arg) - align_max = max(align_max, obj.align) - offset = self.struct_compute_field_offset(obj, offset) - new_obj.add_field(arg.name, obj, offset, obj.size) - offset += obj.size - - # Structure alignement is its field max alignement - align, size = self.struct_compute_align_size(align_max, offset) - new_obj.set_align_size(align, size) - self.add_type((name, ), new_obj) - return new_obj - - def ast_gen_union_align_size(self, ast): - """Evaluates the size/alignment of a C ast union - @ast: parsed pycparser.c_ast object - """ - offset = 0 - align_max, size_max = 0, 0 + value = self.ast_eval_int(ast.dim) + return CTypeArray(target, value) + + def ast_to_typeid(self, ast): + """Return the CTypeBase of the @ast + @ast: pycparser.c_ast instance""" + cls = ast.__class__ + if not cls in self.ast_to_typeid_rules: + raise NotImplementedError("Strange type %r" % ast) + return self.ast_to_typeid_rules[cls](ast) + + # Ast parse type declarators + + def ast_parse_decl(self, ast): + """Parse ast Decl""" + return self.ast_parse_declaration(ast.type) + + def ast_parse_typedecl(self, ast): + """Parse ast Typedecl""" + return self.ast_parse_declaration(ast.type) + + def ast_parse_struct(self, ast): + """Parse ast Struct""" + obj = self.ast_to_typeid(ast) + if ast.decls and ast.name is not None: + # Add struct to types if named + decl = CTypeStruct(ast.name) + if not self.is_known_type(decl): + self.add_type(decl, obj) + return obj - if ast.name is None: - name = self.gen_uniq_name() - else: - name = ast.name - new_obj = ObjCUnion(name) - - for arg in ast.decls: - obj = self.ast_get_align_size(arg) - align_max = max(align_max, obj.align) - size_max = max(size_max, obj.size) - new_obj.add_field(arg.name, obj, - offset, obj.size) - - align, size = self.union_compute_align_size(align_max, size_max) - new_obj.set_align_size(align, size) - self.add_type((name, ), new_obj) - return new_obj - - def ast_gen_obj_align_size(self, ast): - """Evaluates the size/alignment of a C ast struct/union + def ast_parse_union(self, ast): + """Parse ast Union""" + obj = self.ast_to_typeid(ast) + if ast.decls and ast.name is not None: + # Add union to types if named + decl = CTypeUnion(ast.name) + if not self.is_known_type(decl): + self.add_type(decl, obj) + return obj + + def ast_parse_typedef(self, ast): + """Parse ast TypeDef""" + decl = CTypeId(ast.name) + obj = self.ast_parse_declaration(ast.type) + if (isinstance(obj, (CTypeStruct, CTypeUnion)) and + self.is_generated_name(obj.name)): + # Add typedef name to default name + # for a question of clarity + obj.name += "__%s" % ast.name + self.add_typedef(decl, obj) + # Typedef does not return any object + return None + + def ast_parse_identifiertype(self, ast): + """Parse ast IdentifierType""" + return CTypeId(*ast.names) + + def ast_parse_ptrdecl(self, ast): + """Parse ast PtrDecl""" + return CTypePtr(self.ast_parse_declaration(ast.type)) + + def ast_parse_enum(self, ast): + """Parse ast Enum""" + return self.ast_to_typeid(ast) + + def ast_parse_arraydecl(self, ast): + """Parse ast ArrayDecl""" + return self.ast_to_typeid(ast) + + def ast_parse_funcdecl(self, ast): + """Parse ast FuncDecl""" + return self.ast_to_typeid(ast) + + def ast_parse_funcdef(self, ast): + """Parse ast FuncDef""" + return self.ast_to_typeid(ast.decl) + + def ast_parse_pragma(self, _): + """Prama does not return any object""" + return None + + def ast_parse_declaration(self, ast): + """Add one ast type declaration to the type manager (packed style in type manager) @ast: parsed pycparser.c_ast object """ - - if isinstance(ast, c_ast.Struct): - obj = self.ast_gen_struct_align_size(ast) - elif isinstance(ast, c_ast.Union): - obj = self.ast_gen_union_align_size(ast) - else: - raise NotImplementedError("Not implemented!") - - fix_recursive_objects(self, obj) - return obj + cls = ast.__class__ + if not cls in self.ast_parse_rules: + raise NotImplementedError("Strange declaration %r" % cls) + return self.ast_parse_rules[cls](ast) def ast_parse_declarations(self, ast): """Add ast types declaration to the type manager @@ -272,104 +691,5 @@ class CTypesManager(object): @ast: parsed pycparser.c_ast object """ - for ext in ast.ext: - if isinstance(ext, c_ast.Decl) and\ - ext.name is None and\ - isinstance(ext.type, (c_ast.Struct, c_ast.Union)): - obj = self.ast_gen_obj_align_size(ext.type) - self.add_type((ext.type.name, ), obj) - - elif isinstance(ext, c_ast.Typedef) and\ - isinstance(ext.type.type, (c_ast.Struct, c_ast.Union)) and\ - not ext.type.type.decls: - new_type = ext.name - obj = self.get_type((ext.type.type.name,)) - self.add_type((ext.name,), obj) - - elif isinstance(ext, c_ast.Typedef) and\ - isinstance(ext.type.type, (c_ast.Struct, c_ast.Union)) and\ - ext.type.type.decls: - obj = self.ast_gen_obj_align_size(ext.type.type) - self.add_type((ext.type.declname, ), obj) - - elif isinstance(ext, c_ast.Typedef) and\ - isinstance(ext.type, c_ast.TypeDecl) and\ - isinstance(ext.type.type, c_ast.IdentifierType): - ext.show() - names = tuple(ext.type.type.names) - new_type = ext.name - - if not self.is_known_type(names): - raise RuntimeError("Unknown type %s" % repr(names)) - obj = self.get_type(names) - self.add_type((new_type,), obj) - - elif isinstance(ext, c_ast.Typedef) and\ - isinstance(ext.type.type, c_ast.Enum) and\ - isinstance(ext.type.type.values, c_ast.EnumeratorList): - # Enum are ints - obj = self.get_type(('enum',)) - self.add_type((ext.name,), obj) - - elif isinstance(ext, c_ast.Typedef) and\ - isinstance(ext.type, c_ast.ArrayDecl) and\ - isinstance(ext.type.type.type, c_ast.IdentifierType) and\ - self.is_known_type(tuple(ext.type.type.type.names)): - obj = self.get_type(tuple(ext.type.type.type.names)) - array = ext.type - - value = self.ast_eval_int(array.dim) - subobj = self.ast_get_align_size(array.type) - - obj = ObjCArray(subobj, value) - self.add_type((ext.name,), obj) - - elif isinstance(ext, c_ast.FuncDef) or\ - isinstance(ext.type, c_ast.FuncDecl): - continue - else: - raise NotImplementedError("strange type %r" % ext) - - -class CTypesManagerNotPacked(CTypesManager): - """Store defined C types (not packed)""" - - def struct_compute_field_offset(self, obj, offset): - """Compute the offset of the field @obj in the current structure - (not packed)""" - - if obj.align > 1: - offset = (offset + obj.align - 1) & ~(obj.align - 1) - return offset - - def struct_compute_align_size(self, align_max, size): - """Compute the alignment and size of the current structure - (not packed)""" - if align_max > 1: - size = (size + align_max - 1) & ~(align_max - 1) - return align_max, size - - def union_compute_align_size(self, align_max, size): - """Compute the alignment and size of the current union - (not packed)""" - return align_max, size - - -class CTypesManagerPacked(CTypesManager): - """Store defined C types (packed form)""" - - def struct_compute_field_offset(self, _, offset): - """Compute the offset of the field @obj in the current structure - (packed form)""" - return offset - - def struct_compute_align_size(self, _, size): - """Compute the alignment and size of the current structure - (packed form)""" - return 1, size - - def union_compute_align_size(self, align_max, size): - """Compute the alignment and size of the current union - (packed form)""" - return 1, size + ret = self.ast_parse_declaration(ext) diff --git a/miasm2/core/objc.py b/miasm2/core/objc.py index 1b595442..9ae16291 100644 --- a/miasm2/core/objc.py +++ b/miasm2/core/objc.py @@ -5,14 +5,14 @@ C helper for Miasm: * Miasm expression to C type """ -import re from pycparser import c_parser, c_ast from miasm2.expression.expression_reduce import ExprReducer from miasm2.expression.expression import ExprInt, ExprId, ExprOp, ExprMem -RE_HASH_CMT = re.compile(r'^#\s*\d+.*$', flags=re.MULTILINE) +from miasm2.core.ctypesmngr import CTypeUnion, CTypeStruct, CTypeId, CTypePtr,\ + CTypeArray, CTypeOp, CTypeSizeof, CTypeEnum, CTypeFunc, CTypeEllipsis class ObjC(object): @@ -24,6 +24,11 @@ class ObjC(object): self.align = align self.size = size + def eq_base(self, other): + return (self.__class__ == other.__class__ and + self.align == other.align and + self.size == other.size) + class ObjCDecl(ObjC): """C Declaration identified""" @@ -35,6 +40,14 @@ class ObjCDecl(ObjC): def __repr__(self): return '<%s %s>' % (self.__class__.__name__, self.name) + def __str__(self): + return '%s' % (self.name) + + def __eq__(self, other): + if not self.eq_base(other): + return False + return self.name == other.name + class ObjCInt(ObjC): """C integer""" @@ -44,29 +57,55 @@ class ObjCInt(ObjC): self.size = None self.align = None + def __str__(self): + return 'int' + + def __eq__(self, other): + return self.eq_base(other) + class ObjCPtr(ObjC): """C Pointer""" - def __init__(self, name, objtype, void_p_align, void_p_size): + def __init__(self, objtype, void_p_align, void_p_size): """Init ObjCPtr - @name: object name @objtype: pointer target ObjC @void_p_align: pointer alignment (in bytes) @void_p_size: pointer size (in bytes) """ super(ObjCPtr, self).__init__() - self.name, self.objtype = name, objtype + self.objtype = objtype self.align = void_p_align self.size = void_p_size def __repr__(self): - return '<PTR %r>' % (self.objtype) + return '<%s %r>' % (self.__class__.__name__, + self.objtype.__class__) def __str__(self): - return '<PTR %r>' % (self.objtype) + target = self.objtype + if isinstance(target, ObjCDecl): + return "%s *" % target.name + elif isinstance(target, ObjCPtr): + return "%s *" % target + elif isinstance(target, ObjCStruct): + return "struct %s *" % target.name + elif isinstance(target, ObjCUnion): + return "union %s *" % target.name + elif isinstance(target, ObjCArray): + return "%s (*)[%s]" % (target.objtype, target.elems) + elif isinstance(target, ObjCFunc): + args = ", ".join([str(arg) for arg in target.args]) + return "%s (*%s)(%s)" % (target.type_ret, target.name, args) + else: + return '*%s' % (target) + + def __eq__(self, other): + if not self.eq_base(other): + return False + return self.objtype == other.objtype class ObjCArray(ObjC): @@ -88,18 +127,14 @@ class ObjCArray(ObjC): def __repr__(self): return '<%r[%d]>' % (self.objtype, self.elems) + def __str__(self): + return '%s[%d]' % (self.objtype, self.elems) -class _ObjCRecurse(ObjC): - """Special C object array, used in recursive declarations. Used in parser - *only*: this object is not intend to be in final objects - """ - - def __init__(self, name): - super(_ObjCRecurse, self).__init__() - self.name = name - - def __repr__(self): - return '<%r>' % (self.name) + def __eq__(self, other): + if not self.eq_base(other): + return False + return (self.elems == other.elems and + self.objtype == other.objtype) class ObjCStruct(ObjC): @@ -121,16 +156,27 @@ class ObjCStruct(ObjC): self.fields.append((name, objtype, offset, size)) def __repr__(self): - return '<%s %s>' % (self.__class__.__name__, self.name) - - def __str__(self): out = [] out.append("Struct %s: (align: %d)" % (self.name, self.align)) out.append(" off sz name") for name, objtype, offset, size in self.fields: - out.append(" %-3d %-3d %-10s %r" % (offset, size, name, objtype)) + out.append(" 0x%-3x %-3d %-10s %r" % + (offset, size, name, objtype.__class__.__name__)) return '\n'.join(out) + def __str__(self): + return 'struct %s' % (self.name) + + def __eq__(self, other): + if not (self.eq_base(other) and self.name == other.name): + return False + if len(self.fields) != len(other.fields): + return False + for field_a, field_b in zip(self.fields, other.fields): + if field_a != field_b: + return False + return True + class ObjCUnion(ObjC): """C object for unions""" @@ -151,16 +197,76 @@ class ObjCUnion(ObjC): self.fields.append((name, objtype, offset, size)) def __repr__(self): - return '<%s %s>' % (self.__class__.__name__, self.name) - - def __str__(self): out = [] out.append("Union %s: (align: %d)" % (self.name, self.align)) out.append(" off sz name") for name, objtype, offset, size in self.fields: - out.append(" %-3d %-3d %-10s %r" % (offset, size, name, objtype)) + out.append(" 0x%-3x %-3d %-10s %r" % + (offset, size, name, objtype)) + return '\n'.join(out) + + def __str__(self): + return 'union %s' % (self.name) + + def __eq__(self, other): + if not (self.eq_base(other) and self.name == other.name): + return False + if len(self.fields) != len(other.fields): + return False + for field_a, field_b in zip(self.fields, other.fields): + if field_a != field_b: + return False + return True + + +class ObjCEllipsis(ObjC): + """C integer""" + + def __init__(self): + super(ObjCEllipsis, self).__init__() + self.size = None + self.align = None + + def __eq__(self, other): + return self.eq_base(other) + + +class ObjCFunc(ObjC): + """C object for Functions""" + + def __init__(self, name, abi, type_ret, args, void_p_align, void_p_size): + super(ObjCFunc, self).__init__() + self.name = name + self.abi = abi + self.type_ret = type_ret + self.args = args + self.align = void_p_align + self.size = void_p_size + + def __repr__(self): + return "<%s %s>" % (self.__class__.__name__, + self.name) + + def __str__(self): + out = [] + out.append("Function (%s) %s: (align: %d)" % (self.abi, self.name, self.align)) + out.append(" ret: %s" % (str(self.type_ret))) + out.append(" Args:") + for arg in self.args: + out.append(" %s" % arg) return '\n'.join(out) + def __eq__(self, other): + if not (self.eq_base(other) and self.name == other.name and + self.type_ret == other.type_ret): + return False + if len(self.args) != len(other.args): + return False + for arg_a, arg_b in zip(self.args, other.args): + if arg_a != arg_b: + return False + return True + def access_simplifier(expr): """Expression visitor to simplify a C access represented in Miasm @@ -311,7 +417,7 @@ class CGenField(CGen): if isinstance(fieldtype, ObjCArray): ctype = fieldtype else: - ctype = ObjCPtr(field, fieldtype, void_p_align, void_p_size) + ctype = ObjCPtr(fieldtype, void_p_align, void_p_size) self.ctype = ctype def to_c(self): @@ -333,9 +439,14 @@ class CGenField(CGen): """Generate Miasm expression representing the C access""" if isinstance(self.ctype, ObjCArray): - return ExprOp("field", self.struct.to_expr(), ExprId(self.field, self.default_size)) + return ExprOp("field", + self.struct.to_expr(), + ExprId(self.field, self.default_size)) elif isinstance(self.ctype, ObjCPtr): - return ExprOp("addr", ExprOp("field", self.struct.to_expr(), ExprId(self.field, self.default_size))) + return ExprOp("addr", + ExprOp("field", + self.struct.to_expr(), + ExprId(self.field, self.default_size))) else: raise RuntimeError("Strange case") @@ -361,7 +472,7 @@ class CGenArray(CGen): elif isinstance(ctype, ObjCArray) and isinstance(ctype.objtype, ObjCArray): ctype = ctype.objtype elif isinstance(ctype, ObjCArray): - ctype = ObjCPtr('noname', ctype.objtype, void_p_align, void_p_size) + ctype = ObjCPtr(ctype.objtype, void_p_align, void_p_size) else: raise TypeError("Strange case") self.ctype = ctype @@ -387,9 +498,14 @@ class CGenArray(CGen): """Generate Miasm expression representing the C access""" if isinstance(self.ctype, ObjCPtr): - return ExprOp("addr", ExprOp("[]", self.name.to_expr(), ExprInt(self.element, self.default_size))) + return ExprOp("addr", + ExprOp("[]", + self.name.to_expr(), + ExprInt(self.element, self.default_size))) elif isinstance(self.ctype, ObjCArray): - return ExprOp("[]", self.name.to_expr(), ExprInt(self.element, self.default_size)) + return ExprOp("[]", + self.name.to_expr(), + ExprInt(self.element, self.default_size)) else: raise RuntimeError("Strange case") @@ -479,18 +595,6 @@ def ast_get_c_access_expr(ast, expr_types, lvl=0): return obj -def c_to_ast(c_str): - """Transform a @c_str into a C ast - Note: will ignore lines containing code refs ie: - # 23 "miasm.h" - """ - - new_str = re.sub(RE_HASH_CMT, "", c_str) - parser = c_parser.CParser() - return parser.parse(new_str, filename='<stdin>') - - - def parse_access(c_access): """Parse C access @@ -550,8 +654,7 @@ class CTypeAnalyzer(ExprReducer): if isinstance(base_type, ObjCStruct): if offset == 0 and not deref: # In this case, return the struct* - obj = ObjCPtr('noname', base_type, - void_type.align, void_type.size) + obj = ObjCPtr(base_type, void_type.align, void_type.size) new_type = [obj] return new_type for _, subtype, f_offset, size in base_type.fields: @@ -576,15 +679,14 @@ class CTypeAnalyzer(ExprReducer): elif isinstance(base_type, ObjCDecl): if offset != 0: return [] - obj = ObjCPtr('noname', base_type, void_type.align, void_type.size) + obj = ObjCPtr(base_type, void_type.align, void_type.size) new_type = [obj] elif isinstance(base_type, ObjCUnion): out = [] if offset == 0 and not deref: # In this case, return the struct* - obj = ObjCPtr('noname', base_type, - void_type.align, void_type.size) + obj = ObjCPtr(base_type, void_type.align, void_type.size) new_type = [obj] return new_type for _, objtype, f_offset, size in base_type.fields: @@ -595,7 +697,7 @@ class CTypeAnalyzer(ExprReducer): out += new_type new_type = out elif isinstance(base_type, ObjCPtr): - obj = ObjCPtr('noname', base_type, void_type.align, void_type.size) + obj = ObjCPtr(base_type, void_type.align, void_type.size) new_type = [obj] else: raise NotImplementedError("deref type %r" % base_type) @@ -603,7 +705,6 @@ class CTypeAnalyzer(ExprReducer): def reduce_id(self, node, _): """Get type of ExprId""" - if not(isinstance(node.expr, ExprId) and node.expr.name in self.expr_types): return None return [self.expr_types[node.expr.name]] @@ -615,13 +716,22 @@ class CTypeAnalyzer(ExprReducer): return None return [self.CST] + def get_solo_type(self, node): + """Return the type of the @node if it has only one possible type, + different from not None. In othe cases, return None. + """ + if node.info is None or len(node.info) != 1: + return None + return type(node.info[0]) + def reduce_ptr_plus_cst(self, node, lvl): """Get type of ptr + CST""" - if not (isinstance(node.expr, ExprOp) and - node.expr.op == "+" and - len(node.args) == 2 and - set(type(x) for x in node.args[0].info + node.args[1].info) == set([ObjCInt, ObjCPtr])): + if not node.expr.is_op("+") or len(node.args) != 2: + return None + args_types = set([self.get_solo_type(node.args[0]), + self.get_solo_type(node.args[1])]) + if args_types != set([ObjCInt, ObjCPtr]): return None arg0, arg1 = node.args out = [] @@ -639,9 +749,13 @@ class CTypeAnalyzer(ExprReducer): def reduce_cst_op_cst(self, node, _): """Get type of CST + CST""" - if not (isinstance(node.expr, ExprOp) and - node.expr.op == "+" and - set(type(x) for x in node.args[0].info + node.args[1].info) == set([ObjCInt])): + if not node.expr.is_op("+") or len(node.args) != 2: + return None + if node.args[0] is None or node.args[1] is None: + return None + args_types = set([self.get_solo_type(node.args[0]), + self.get_solo_type(node.args[1])]) + if args_types != set([ObjCInt]): return None return [self.CST] @@ -873,7 +987,8 @@ class ExprToAccessC(ExprReducer): node.expr.name in self.expr_types): return None - out = CGenId(self.expr_types[node.expr.name], node.expr.name) + objc = self.expr_types[node.expr.name] + out = CGenId(objc, node.expr.name) return [out] def reduce_int(self, node, _): @@ -883,13 +998,22 @@ class ExprToAccessC(ExprReducer): return None return [CGenInt(int(node.expr))] + def get_solo_type(self, node): + """Return the type of the @node if it has only one possible type, + different from not None. In othe cases, return None. + """ + if node.info is None or len(node.info) != 1: + return None + return type(node.info[0].ctype) + def reduce_op(self, node, lvl): """Generate access for ExprOp""" - if not (isinstance(node.expr, ExprOp) and - node.expr.op == "+" and - len(node.args) == 2 and - set(type(x.ctype) for x in node.args[0].info + node.args[1].info) == set([ObjCInt, ObjCPtr])): + if not node.expr.is_op("+") or len(node.args) != 2: + return None + args_types = set([self.get_solo_type(node.args[0]), + self.get_solo_type(node.args[1])]) + if args_types != set([ObjCInt, ObjCPtr]): return None arg0, arg1 = node.args @@ -915,11 +1039,13 @@ class ExprToAccessC(ExprReducer): if not isinstance(node.expr, ExprMem): return None - + if node.arg.info is None: + return None assert isinstance(node.arg.info, list) found = [] for subcgenobj in node.arg.info: - assert isinstance(subcgenobj.ctype, ObjCPtr) + if not isinstance(subcgenobj.ctype, ObjCPtr): + return None target = subcgenobj.ctype.objtype # target : type(elem) if isinstance(target, (ObjCStruct, ObjCUnion)): @@ -1017,11 +1143,11 @@ class ExprCToExpr(ExprReducer): def reduce_id(self, node, _): """Reduce ExprId""" - if not isinstance(node.expr, ExprId): return None if node.expr.name in self.expr_types: - out = (node.expr, self.expr_types[node.expr.name]) + objc = self.expr_types[node.expr.name] + out = (node.expr, objc) else: out = (node.expr, None) return out @@ -1164,16 +1290,16 @@ class ExprCToExpr(ExprReducer): void_type = self.types_mngr.void_ptr if isinstance(src_type, ObjCArray): - out = (src.arg, ObjCPtr('noname', src_type.objtype, + out = (src.arg, ObjCPtr(src_type.objtype, void_type.align, void_type.size)) elif isinstance(src, ExprMem): - out = (src.arg, ObjCPtr('noname', src_type, + out = (src.arg, ObjCPtr(src_type, void_type.align, void_type.size)) elif isinstance(src_type, ObjCStruct): - out = (src, ObjCPtr('noname', src_type, + out = (src, ObjCPtr(src_type, void_type.align, void_type.size)) elif isinstance(src_type, ObjCUnion): - out = (src, ObjCPtr('noname', src_type, + out = (src, ObjCPtr(src_type, void_type.align, void_type.size)) else: raise NotImplementedError("unk type") @@ -1210,6 +1336,229 @@ class ExprCToExpr(ExprReducer): return self.reduce(expr) +class CTypesManager(object): + """Represent a C object, without any layout information""" + + def __init__(self, types_ast, leaf_types): + self.types_ast = types_ast + self.leaf_types = leaf_types + + @property + def void_ptr(self): + """Retrieve a void* objc""" + return self.leaf_types.types.get(CTypePtr(CTypeId('void'))) + + def _get_objc(self, type_id, resolved=None, to_fix=None, lvl=0): + if resolved is None: + resolved = {} + if to_fix is None: + to_fix = [] + if type_id in resolved: + return resolved[type_id] + type_id = self.types_ast.get_type(type_id) + fixed = True + if isinstance(type_id, CTypeId): + out = self.leaf_types.types.get(type_id, None) + assert out is not None + elif isinstance(type_id, CTypeUnion): + out = ObjCUnion(type_id.name) + align_max, size_max = 0, 0 + for name, field in type_id.fields: + objc = self._get_objc(field, resolved, to_fix, lvl + 1) + resolved[field] = objc + align_max = max(align_max, objc.align) + size_max = max(size_max, objc.size) + out.add_field(name, objc, 0, objc.size) + + align, size = self.union_compute_align_size(align_max, size_max) + out.set_align_size(align, size) + + elif isinstance(type_id, CTypeStruct): + out = ObjCStruct(type_id.name) + align_max, size_max = 0, 0 + + offset, align_max = 0, 1 + for name, field in type_id.fields: + objc = self._get_objc(field, resolved, to_fix, lvl + 1) + resolved[field] = objc + align_max = max(align_max, objc.align) + offset = self.struct_compute_field_offset(objc, offset) + out.add_field(name, objc, offset, objc.size) + offset += objc.size + + align, size = self.struct_compute_align_size(align_max, offset) + out.set_align_size(align, size) + + elif isinstance(type_id, CTypePtr): + target = type_id.target + out = ObjCPtr(None, self.void_ptr.align, self.void_ptr.size) + fixed = False + + elif isinstance(type_id, CTypeArray): + target = type_id.target + objc = self._get_objc(target, resolved, to_fix, lvl + 1) + resolved[target] = objc + if type_id.size is None: + # case: toto[] + # return ObjCPtr + out = ObjCPtr(objc, self.void_ptr.align, self.void_ptr.size) + else: + size = self.size_to_int(type_id.size) + if size is None: + raise RuntimeError('Enable to compute objc size') + else: + out = ObjCArray(objc, size) + assert out.size is not None and out.align is not None + elif isinstance(type_id, CTypeEnum): + # Enum are integer + return self.leaf_types.types.get(CTypeId('int')) + elif isinstance(type_id, CTypeFunc): + type_ret = self._get_objc( + type_id.type_ret, resolved, to_fix, lvl + 1) + resolved[type_id.type_ret] = type_ret + args = [] + for arg in type_id.args: + objc = self._get_objc(arg, resolved, to_fix, lvl + 1) + resolved[arg] = objc + args.append(objc) + out = ObjCFunc(type_id.name, type_id.abi, type_ret, args, + self.void_ptr.align, self.void_ptr.size) + elif isinstance(type_id, CTypeEllipsis): + out = ObjCEllipsis() + else: + raise TypeError("Unknown type %r" % type_id.__class__) + if not isinstance(out, ObjCEllipsis): + assert out.align is not None and out.size is not None + + if fixed: + resolved[type_id] = out + else: + to_fix.append((type_id, out)) + return out + + def get_objc(self, type_id): + """Get the ObjC corresponding to the CType @type_id + @type_id: CTypeBase instance""" + resolved = {} + to_fix = [] + out = self._get_objc(type_id, resolved, to_fix) + # Fix sub objects + while to_fix: + type_id, objc_to_fix = to_fix.pop() + objc = self._get_objc(type_id.target, resolved, to_fix) + objc_to_fix.objtype = objc + self.check_objc(out) + return out + + def check_objc(self, objc, done=None): + """Ensure each sub ObjC is resolved + @objc: ObjC instance""" + if done is None: + done = set() + if objc in done: + return True + done.add(objc) + if isinstance(objc, (ObjCDecl, ObjCInt, ObjCEllipsis)): + return True + elif isinstance(objc, (ObjCPtr, ObjCArray)): + assert self.check_objc(objc.objtype, done) + return True + elif isinstance(objc, (ObjCStruct, ObjCUnion)): + for _, field, _, _ in objc.fields: + assert self.check_objc(field, done) + return True + elif isinstance(objc, ObjCFunc): + assert self.check_objc(objc.type_ret, done) + for arg in objc.args: + assert self.check_objc(arg, done) + return True + else: + assert False + + def size_to_int(self, size): + """Resolve an array size + @size: CTypeOp or integer""" + if isinstance(size, CTypeOp): + assert len(size.args) == 2 + arg0, arg1 = [self.size_to_int(arg) for arg in size.args] + if size.operator == "+": + return arg0 + arg1 + elif size.operator == "-": + return arg0 - arg1 + elif size.operator == "*": + return arg0 * arg1 + elif size.operator == "/": + return arg0 / arg1 + elif size.operator == "<<": + return arg0 << arg1 + elif size.operator == ">>": + return arg0 >> arg1 + else: + raise ValueError("Unknown operator %s" % size.operator) + elif isinstance(size, (int, long)): + return size + elif isinstance(size, CTypeSizeof): + obj = self._get_objc(size.target) + return obj.size + else: + raise TypeError("Unknown size type") + + def struct_compute_field_offset(self, obj, offset): + """Compute the offset of the field @obj in the current structure""" + raise NotImplementedError("Abstract method") + + def struct_compute_align_size(self, align_max, size): + """Compute the alignment and size of the current structure""" + raise NotImplementedError("Abstract method") + + def union_compute_align_size(self, align_max, size): + """Compute the alignment and size of the current union""" + raise NotImplementedError("Abstract method") + + +class CTypesManagerNotPacked(CTypesManager): + """Store defined C types (not packed)""" + + def struct_compute_field_offset(self, obj, offset): + """Compute the offset of the field @obj in the current structure + (not packed)""" + + if obj.align > 1: + offset = (offset + obj.align - 1) & ~(obj.align - 1) + return offset + + def struct_compute_align_size(self, align_max, size): + """Compute the alignment and size of the current structure + (not packed)""" + if align_max > 1: + size = (size + align_max - 1) & ~(align_max - 1) + return align_max, size + + def union_compute_align_size(self, align_max, size): + """Compute the alignment and size of the current union + (not packed)""" + return align_max, size + + +class CTypesManagerPacked(CTypesManager): + """Store defined C types (packed form)""" + + def struct_compute_field_offset(self, _, offset): + """Compute the offset of the field @obj in the current structure + (packed form)""" + return offset + + def struct_compute_align_size(self, _, size): + """Compute the alignment and size of the current structure + (packed form)""" + return 1, size + + def union_compute_align_size(self, align_max, size): + """Compute the alignment and size of the current union + (packed form)""" + return 1, size + + class CHandler(object): """ C manipulator for Miasm @@ -1275,6 +1624,6 @@ class CHandler(object): return ret_type -class CTypeTemplate(object): +class CLeafTypes(object): """Define C types sizes/alignement for a given architecture""" pass |