"""This module provides classes to manipulate C structures backed by a VmMngr object (a miasm VM virtual memory). The main idea is to declare the fields of the structure in the class: # FIXME: "I" => "u32" class MyStruct(MemStruct): fields = [ # Integer field: just struct.pack fields with one value ("num", Num("I")), ("flags", Num("B")), # Ptr fields are Num, but they can also be dereferenced # (self.deref_). Deref can be read and set. ("other", Ptr("I", OtherStruct)), # Ptr to a variable length String ("s", Ptr("I", MemStr)), ("i", Ptr("I", Num("I"))), ] And access the fields: mstruct = MyStruct(jitter.vm, addr) mstruct.num = 3 assert mstruct.num == 3 mstruct.other = addr2 mstruct.deref_other = OtherStruct(jitter.vm, addr) The `addr` argument can be omited if an allocator is set, in which case the structure will be automatically allocated in memory: my_heap = miasm2.os_dep.common.heap() set_allocator(my_heap) Note that some structures (e.g. MemStr or MemArray) do not have a static size and cannot be allocated automatically. As you saw previously, to use this module, you just have to inherit from MemStruct and define a list of (, ). Availabe MemField classes are: - Num: for number (float or int) handling - Struct: abstraction over a simple struct pack/unpack - Ptr: a pointer to another MemStruct instance - Inline: include another MemStruct as a field (equivalent to having a struct field into another struct in C) - Array: a fixed size array of MemFields (points) - Union: similar to `union` in C, list of MemFields at the same offset in a structure; the union has the size of the biggest MemField - BitField: similar to C bitfields, a list of [( allocated_address allocator = None def set_allocator(alloc_func): """Set an allocator for this module; allows to instanciate statically sized MemStructs (i.e. sizeof() is implemented) without specifying the address (the object is allocated by @alloc_func in the vm. Args: alloc_func: func(VmMngr) -> integer_address """ global allocator allocator = alloc_func # Helpers def indent(s, size=4): """Indent a string with @size spaces""" return ' '*size + ('\n' + ' '*size).join(s.split('\n')) # FIXME: copied from miasm2.os_dep.common and fixed def get_str_ansi(vm, addr, max_char=None): """Get a null terminated ANSI encoded string from a VmMngr. Args: vm: VmMngr instance max_char: max number of characters to get in memory """ l = 0 tmp = addr while ((max_char is None or l < max_char) and vm.get_mem(tmp, 1) != "\x00"): tmp += 1 l += 1 return vm.get_mem(addr, l).decode("latin1") # TODO: get_raw_str_utf16 for length calculus def get_str_utf16(vm, addr, max_char=None): """Get a (double) null terminated utf16 little endian encoded string from a VmMngr. This encoding is mainly used in Windows. FIXME: the implementation do not work with codepoints that are encoded on more than 2 bytes in utf16. Args: vm: VmMngr instance max_char: max number of bytes to get in memory """ l = 0 tmp = addr # TODO: test if fetching per page rather than 2 byte per 2 byte is worth it? while ((max_char is None or l < max_char) and vm.get_mem(tmp, 2) != "\x00\x00"): tmp += 2 l += 2 s = vm.get_mem(addr, l) return s.decode('utf-16le') def set_str_ansi(vm, addr, s): """Encode a string to null terminated ascii/ansi and set it in a VmMngr memory. Args: vm: VmMngr instance addr: start address to serialize the string to s: the str to serialize """ vm.set_mem(addr, s + "\x00") def set_str_utf16(vm, addr, s): """Same as set_str_ansi with (double) null terminated utf16 encoding.""" s = (s + '\x00').encode('utf-16le') vm.set_mem(addr, s) # MemField to MemStruct helper # TODO: cache generated types def mem(field): """Generate a MemStruct subclass from a field. The field's value can be accessed through self.value or self.deref_value if field is a Ptr. """ fields = [("value", field)] # Build a type to contain the field type mem_type = type("Mem%r" % field, (MemStruct,), {'fields': fields}) return mem_type # MemField classes class MemField(object): """Base class to provide methods to set and get fields from virtual mem. Subclasses can either override _pack and _unpack, or get and set if data serialization requires more work (see Inline implementation for an example). """ _self_type = None def _pack(self, val): """Serializes the python value @val to a raw str""" raise NotImplementedError() def _unpack(self, raw_str): """Deserializes a raw str to an object representing the python value of this field. """ raise NotImplementedError() def set(self, vm, addr, val): """Set a VmMngr memory from a value. Args: vm: VmMngr instance addr: the start adress in memory to set val: the python value to serialize in @vm at @addr """ raw = self._pack(val) vm.set_mem(addr, raw) def get(self, vm, addr): """Get the python value of a field from a VmMngr memory at @addr.""" raw = vm.get_mem(addr, self.size()) return self._unpack(raw) def _get_self_type(self): return self._self_type def _set_self_type(self, self_type): """If this field refers to MemSelf, replace it with @self_type (a MemStruct subclass) when using it. Generally not used outside the lib. """ self._self_type = self_type def size(self): """Return the size in bytes of the serialized version of this field""" raise NotImplementedError() def __len__(self): return self.size() def __neq__(self, other): return not self == other class Struct(MemField): """Dumb struct.pack/unpack field. Mainly used to factorize code. Value is a tuple corresponding to the struct @fmt passed to the constructor. """ def __init__(self, fmt): self._fmt = fmt def _pack(self, fields): return struct.pack(self._fmt, *fields) def _unpack(self, raw_str): return struct.unpack(self._fmt, raw_str) def size(self): return struct.calcsize(self._fmt) def __repr__(self): return "%s(%s)" % (self.__class__.__name__, self._fmt) def __eq__(self, other): return self.__class__ == other.__class__ and self._fmt == other._fmt def __hash__(self): return hash(hash(self.__class__) + hash(self._fmt)) class Num(Struct): """Represents a number (integer or float). The number is encoded with a struct-style format which must represent only one value. TODO: use u32, i16, etc. for format. """ def _pack(self, number): return super(Num, self)._pack([number]) def _unpack(self, raw_str): upck = super(Num, self)._unpack(raw_str) if len(upck) > 1: raise ValueError("Num format string unpacks to multiple values, " "should be 1") return upck[0] class Ptr(Num): """Special case of number of which value indicates the address of a MemStruct. Provides deref_ as well as when used, to set and get the pointed MemStruct. """ def __init__(self, fmt, dst_type, *type_args, **type_kwargs): """Args: fmt: (str) Num compatible format that will be the Ptr representation in memory dst_type: (MemStruct or MemField) the MemStruct this Ptr points to. If a MemField is given, it is transformed into a MemStruct with mem(TheMemField). *type_args, **type_kwargs: arguments to pass to the the pointed MemStruct when instanciating it (e.g. for MemStr encoding or MemArray field_type). """ if not isinstance(dst_type, MemField) and\ not (isinstance(dst_type, type) and\ issubclass(dst_type, MemStruct)) and\ not dst_type == MemSelf: raise ValueError("dst_type of Ptr must be a MemStruct type, a " "MemField instance, the MemSelf marker or a class " "name.") super(Ptr, self).__init__(fmt) if isinstance(dst_type, MemField): # Patch the field to propagate the MemSelf replacement dst_type._get_self_type = lambda: self._get_self_type() dst_type = mem(dst_type) self._dst_type = dst_type self._type_args = type_args self._type_kwargs = type_kwargs def _fix_dst_type(self): if self._dst_type == MemSelf: if self._get_self_type() is not None: self._dst_type = self._get_self_type() else: raise ValueError("Unsupported usecase for MemSelf, sorry") @property def dst_type(self): """Return the type (MemStruct subtype) this Ptr points to.""" self._fix_dst_type() return self._dst_type def deref_get(self, vm, addr): """Deserializes the data in @vm (VmMngr) at @addr to self.dst_type. Equivalent to a pointer dereference rvalue in C. """ return self.dst_type(vm, addr, *self._type_args, **self._type_kwargs) def deref_set(self, vm, addr, val): """Serializes the @val MemStruct subclass instance in @vm (VmMngr) at @addr. Equivalent to a pointer dereference assignment in C. """ # Sanity check if self.dst_type != val.__class__: log.warning("Original type was %s, overriden by value of type %s", self._dst_type.__name__, val.__class__.__name__) # Actual job vm.set_mem(addr, str(val)) def __repr__(self): return "%s(%r)" % (self.__class__.__name__, self._dst_type) def __eq__(self, other): return super(Ptr, self).__eq__(other) and \ self.dst_type == other.dst_type and \ self._type_args == other._type_args and \ self._type_kwargs == other._type_kwargs def __hash__(self): return hash(super(Ptr, self).__hash__() + hash(self._dst_type) + hash(self._type_args) + hash(self._type_kwargs)) class Inline(MemField): """Field used to inline a MemStruct in another MemStruct. Equivalent to having a struct field in a C struct. Concretely: class MyStructClass(MemStruct): fields = [("f1", Num("I")), ("f2", Num("I"))] class Example(MemStruct): fields = [("mystruct", Inline(MyStructClass))] ex = Example(vm, addr) ex.mystruct.f2 = 3 # inlined structure field access ex.mystruct = MyStructClass(vm, addr2) # struct copy It can be seen like a bridge to use a MemStruct as a MemField TODO: make the Inline implicit when setting a field to be a MemStruct """ def __init__(self, inlined_type, *type_args, **type_kwargs): if not issubclass(inlined_type, MemStruct): raise ValueError("inlined type if Inline must be a MemStruct") self._il_type = inlined_type self._type_args = type_args self._type_kwargs = type_kwargs def set(self, vm, addr, val): raw = str(val) vm.set_mem(addr, raw) def get(self, vm, addr): return self._il_type(vm, addr) def size(self): return self._il_type.sizeof() def __repr__(self): return "%s(%r)" % (self.__class__.__name__, self._il_type) def __eq__(self, other): return self.__class__ == other.__class__ and \ self._il_type == other._il_type and \ self._type_args == other._type_args and \ self._type_kwargs == other._type_kwargs def __hash__(self): return hash(hash(self.__class__) + hash(self._il_type) + hash(self._type_args) + hash(self._type_kwargs)) class Array(MemField): """A fixed size array (contiguous sequence) of a MemField subclass elements. Similar to something like the char[10] type in C. Getting an array field actually returns a MemSizedArray. Setting it is possible with either a list or a MemSizedArray instance. Examples of syntax: class Example(MemStruct): fields = [("array", Array(Num("B"), 4))] mystruct = Example(vm, addr) mystruct.array[3] = 27 mystruct.array = [1, 4, 8, 9] mystruct.array = MemSizedArray(vm, addr2, Num("B"), 4) """ def __init__(self, field_type, array_len): self.field_type = field_type self.array_len = array_len def _set_self_type(self, self_type): super(Array, self)._set_self_type(self_type) self.field_type._set_self_type(self_type) def set(self, vm, addr, val): # MemSizedArray assignment if isinstance(val, MemSizedArray): if val.array_len != self.array_len or len(val) != self.size(): raise ValueError("Size mismatch in MemSizedArray assignment") raw = str(val) vm.set_mem(addr, raw) # list assignment elif isinstance(val, list): if len(val) != self.array_len: raise ValueError("Size mismatch in MemSizedArray assignment ") offset = 0 for elt in val: self.field_type.set(vm, addr + offset, elt) offset += self.field_type.size() else: raise NotImplementedError( "Assignment only implemented for list and MemSizedArray") def get(self, vm, addr): return MemSizedArray(vm, addr, self.field_type, self.array_len) def size(self): return self.field_type.size() * self.array_len def __repr__(self): return "%r[%s]" % (self.field_type, self.array_len) def __eq__(self, other): return self.__class__ == other.__class__ and \ self.field_type == other.field_type and \ self.array_len == other.array_len def __hash__(self): return hash(hash(self.__class__) + hash(self.field_type) + hash(self.array_len)) class Union(MemField): """Allows to put multiple fields at the same offset in a MemStruct, similar to unions in C. The Union will have the size of the largest of its fields. Example: class Example(MemStruct): fields = [("uni", Union([ ("f1", Num("> self._bit_offset) & val_mask return res_val def size(self): return self._num.size() @property def bit_size(self): """Number of bits read/written by this class""" return self._bits @property def bit_offset(self): """Offset in bits (beginning at 0, the LSB) from which to read/write bits. """ return self._bit_offset def __repr__(self): return "%s%r(%d:%d)" % (self.__class__.__name__, self._num, self._bit_offset, self._bit_offset + self._bits) def __eq__(self, other): return self.__class__ == other.__class__ and \ self._num == other._num and self._bits == other._bits and \ self._bit_offset == other._bit_offset def __hash__(self): return hash(hash(self.__class__) + hash(self._num) + hash(self._bits) + hash(self._bit_offset)) class BitField(Union): """A C-like bitfield. Constructed with a list [(, )] and a @backing_num. The @backing_num is a Num instance that determines the total size of the bitfield and the way the bits are serialized/deserialized (big endian int, little endian short...). Can be seen (and implemented) as a Union of Bits fields. Creates fields that allow to access the bitfield fields easily. Example: class Example(MemStruct): fields = [("bf", BitField(Num("B"), [ ("f1", 2), ("f2", 4), ("f3", 1) ]) )] ex = Example(vm, addr) ex.memset() ex.f2 = 2 ex.f1 = 5 # 5 does not fit on two bits, it will be binarily truncated assert ex.f1 == 3 assert ex.f2 == 2 assert ex.f3 == 0 # previously memset() assert ex.bf == 3 + 2 << 2 """ def __init__(self, backing_num, bit_list): """@backing num: Num intance, @bit_list: [(name, n_bits)]""" self._num = backing_num fields = [] offset = 0 for name, bits in bit_list: fields.append((name, Bits(self._num, bits, offset))) offset += bits if offset > self._num.size() * 8: raise ValueError("sum of bit lengths is > to the backing num size") super(BitField, self).__init__(fields) def set(self, vm, addr, val): self._num.set(vm, addr, val) def get(self, vm, addr): return self._num.get(vm, addr) def __eq__(self, other): return self.__class__ == other.__class__ and \ self._num == other._num and super(BitField, self).__eq__(other) def __hash__(self): return hash(super(BitField, self).__hash__() + hash(self._num)) # MemStruct classes class _MetaMemStruct(type): """MemStruct metaclass. Triggers the magic that generates the class fields from the cls.fields list. Just calls MemStruct.gen_fields(), the actual implementation can seen be there. """ def __init__(cls, name, bases, dct): super(_MetaMemStruct, cls).__init__(name, bases, dct) cls.gen_fields() def __repr__(cls): return cls.__name__ class MemStruct(object): """Base class to implement VmMngr backed C-like structures in miasm. The mechanism is the following: - set a "fields" class field to be a list of (, ) - instances of this class will have properties to interract with these fields. Example: class Example(MemStruct): fields = [ # Number field: just struct.pack fields with one value ("num", Num("I")), ("flags", Num("B")), # Ptr fields are Num, but they can also be dereferenced # (self.deref_). Deref can be read and set. ("other", Ptr("I", OtherStruct)), ("i", Ptr("I", Num("I"))), # Ptr to a variable length String ("s", Ptr("I", MemStr)), ] mstruct = MyStruct(vm, addr) # Field assignment modifies virtual memory mstruct.num = 3 assert mstruct.num == 3 memval = struct.unpack("I", vm.get_mem(mstruct.get_addr(), 4))[0] assert memval == mstruct.num # Memset sets the whole structure mstruct.memset() assert mstruct.num == 0 mstruct.memset('\x11') assert mstruct.num == 0x11111111 other = OtherStruct(vm, addr2) mstruct.other = other.get_addr() assert mstruct.other == other.get_addr() assert mstruct.deref_other == other assert mstruct.deref_other.foo == 0x1234 See the various MemField doc for more information. """ __metaclass__ = _MetaMemStruct fields = [] _size = None # Classic usage methods def __init__(self, vm, addr=None, *args, **kwargs): global allocator super(MemStruct, self).__init__(*args, **kwargs) self._vm = vm if addr is None: if allocator is None: raise ValueError("Cannot provide None address to MemStruct() if" "%s.allocator is not set." % __name__) self._addr = allocator(vm, self.get_size()) else: self._addr = addr def get_addr(self, field_name=None): """Return the address of this MemStruct or one of its fields. Args: field_name: (str, optional) the name of the field to get the address of """ if field_name is not None: if field_name not in self._attrs: raise ValueError("This structure has no %s field" % field_name) offset = self._attrs[field_name]['offset'] else: offset = 0 return self._addr + offset @classmethod def sizeof(cls): """Return the static size of this structure, when available (it is the case by default). """ # Child classes can set cls._size if their size is not the sum of # their fields if cls._size is None: return sum(a["field"].size() for a in cls._attrs.itervalues()) return cls._size def get_size(self): """Return the dynamic size of this structure (e.g. the size of an instance). Defaults to sizeof for this base class. For example, MemSizedArray defines get_size but not sizeof, as an instance has a fixed size (because it has a fixed length and field_type), but all the instance do not have the same size. """ return self.sizeof() def get_field_type(self, name): """Return the MemField subclass instance describing field @name.""" return self._attrs[name]['field'] def get_field(self, name): """Get a field value by name. Useless most of the time since fields are accessible via self.. """ if name not in self._attrs: raise AttributeError("'%s' object has no attribute '%s'" % (self.__class__.__name__, name)) field = self._attrs[name]["field"] offset = self._attrs[name]["offset"] return field.get(self._vm, self.get_addr() + offset) def set_field(self, name, val): """Set a field value by name. @val is the python value corresponding to this field type. Useless most of the time since fields are accessible via self.. """ if name not in self._attrs: raise AttributeError("'%s' object has no attribute '%s'" % (self.__class__.__name__, name)) field = self._attrs[name]["field"] offset = self._attrs[name]["offset"] field.set(self._vm, self.get_addr() + offset, val) def deref_field(self, name): """Get the MemStruct pointed by field. Useless most of the time since fields are accessible via self.deref_. """ addr = self.get_field(name) field = self._attrs[name]["field"] assert isinstance(field, Ptr),\ "Programming error: field should be a Ptr" return field.deref_get(self._vm, addr) def set_deref_field(self, name, val): """Set the MemStruct pointed by field. @val should be of the type of the pointed MemStruct. The field must be a Ptr. Useless most of the time since fields are accessible via self.deref_. """ addr = self.get_field(name) field = self._attrs[name]["field"] assert isinstance(field, Ptr),\ "Programming error: field should be a Ptr" field.deref_set(self._vm, addr, val) def memset(self, byte='\x00'): """Fill the memory space of this MemStruct with @byte ('\x00' by default). The size is retrieved with self.get_size() (dynamic size). """ if not isinstance(byte, str) or not len(byte) == 1: raise ValueError("byte must be a 1-lengthed str") self._vm.set_mem(self.get_addr(), byte * self.get_size()) def cast(self, other_type, *type_args, **type_kwargs): """Cast this MemStruct to another MemStruct (same address, same vm, but different type). Return the casted MemStruct. """ return self.cast_field(None, other_type, *type_args, **type_kwargs) def cast_field(self, field_name, other_type, *type_args, **type_kwargs): """Same as cast, but the address of the returned MemStruct is the address at which @field_name is in the current MemStruct. """ return other_type(self._vm, self.get_addr(field_name), *type_args, **type_kwargs) def __len__(self): return self.get_size() def raw(self): """Raw binary (str) representation of the MemStruct as it is in memory. """ attrs = sorted(self._attrs.itervalues(), key=lambda a: a["offset"]) out = [] for attr in attrs: field = attr["field"] offset = attr["offset"] out.append(self._vm.get_mem(self.get_addr() + offset, field.size())) return ''.join(out) def __str__(self): return self.raw() def __repr__(self): attrs = sorted(self._attrs.iteritems(), key=lambda a: a[1]["offset"]) out = [] for name, attr in attrs: field = attr["field"] val_repr = repr(self.get_field(name)) if '\n' in val_repr: val_repr = '\n' + indent(val_repr, 4) out.append("%s: %r = %s" % (name, field, val_repr)) return '%r:\n' % self.__class__ + indent('\n'.join(out), 2) def __eq__(self, other): # Do not test class equality, because of dynamically generated fields # self.__class__ == other.__class__ and # Could test attrs? # TODO: self._attrs == other._attrs and return str(self) == str(other) def __ne__(self, other): return not self == other # Field generation methods, voluntarily public to be able to regen fields # after class definition @classmethod def gen_fields(cls, fields=None): """Generate the fields of this class (so that they can be accessed with self.) from a @fields list, as described in the class doc. Useful in case of a type cyclic dependency. For example, the following is not possible in python: class A(MemStruct): fields = [("b", Ptr("I", B))] class B(MemStruct): fields = [("a", Ptr("I", A))] With gen_fields, the following is the legal equivalent: class A(MemStruct): pass class B(MemStruct): fields = [("a", Ptr("I", A))] A.fields = [("b", Ptr("I", B))] a.gen_field() """ if fields is None: fields = cls.fields cls._attrs = {} offset = 0 for name, field in cls.fields: # For reflexion field._set_self_type(cls) cls.gen_field(name, field, offset) offset += field.size() cls._size = offset @classmethod def gen_field(cls, name, field, offset): """Generate only one field Args: @name: (str) the name of the field @field: (MemField instance) the field type @offset: (int) the offset of the field in the structure """ cls._gen_simple_attr(name, field, offset) if isinstance(field, Union): cls._gen_union_attr(field, offset) @classmethod def _gen_simple_attr(cls, name, field, offset): cls._attrs[name] = {"field": field, "offset": offset} # Generate self. getter and setter setattr(cls, name, property( lambda self: self.get_field(name), lambda self, val: self.set_field(name, val) )) # Generate self.deref_ getter and setter if this field is a # Ptr if isinstance(field, Ptr): setattr(cls, "deref_%s" % name, property( lambda self: self.deref_field(name), lambda self, val: self.set_deref_field(name, val) )) @classmethod def _gen_union_attr(cls, union_field, offset): if not isinstance(union_field, Union): raise ValueError("field should be an Union instance") for name, field in union_field.field_list: cls.gen_field(name, field, offset) class MemSelf(MemStruct): """Special Marker class for reference to current class in a Ptr or Array (mostly Array of Ptr). Example: class ListNode(MemStruct): fields = [ ("next", Ptr("= self.get_size(): raise IndexError("Index %s out of bounds" % idx) def __iter__(self): for i in xrange(self._array_len): yield self[i] def raw(self): return self._vm.get_mem(self.get_addr(), self.get_size()) def __repr__(self): item_reprs = [repr(item) for item in self] if self.array_len > 0 and '\n' in item_reprs[0]: items = '\n' + indent(',\n'.join(item_reprs), 2) + '\n' else: items = ', '.join(item_reprs) return "[%s] [%r; %s]" % (items, self._field_type, self._array_len) def mem_sized_array_type(field_type, array_len): """Generate a MemSizedArray subclass that has a fixed @field_type and a fixed @array_len. This allows to instanciate the returned type with only the vm and addr arguments, as are standard MemStructs. """ @classmethod def sizeof(cls): return cls._field_type.size() * cls._array_len array_type = type('MemSizedArray_%r_%s' % (field_type, array_len), (MemSizedArray,), {'_array_len': array_len, '_field_type': field_type, 'sizeof': sizeof}) return array_type