From 6e635113b53f932573687f9a6e3fc227cde6c0d9 Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Sat, 7 Nov 2015 20:06:21 +0100 Subject: Introducing MemStruct feature in miasm2.analysis.mem --- test/test_all.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'test/test_all.py') diff --git a/test/test_all.py b/test/test_all.py index bc019104..71f036a2 100644 --- a/test/test_all.py +++ b/test/test_all.py @@ -238,6 +238,8 @@ testset += RegressionTest(["depgraph.py"], base_dir="analysis", (14, 1), (15, 1))) for fname in fnames]) +testset += RegressionTest(["mem.py"], base_dir="analysis") + # Examples class Example(Test): """Examples specificities: -- cgit 1.4.1 From ba2df16277d7d4deae118ed11e1e92cd478045ec Mon Sep 17 00:00:00 2001 From: Florent Monjalet Date: Mon, 30 Nov 2015 16:00:26 +0100 Subject: MemStruct/Types: Renamed analysis.mem to core.types --- example/jitter/memstruct.py | 234 -------- example/jitter/types.py | 234 ++++++++ miasm2/analysis/mem.py | 1399 ------------------------------------------- miasm2/core/types.py | 1399 +++++++++++++++++++++++++++++++++++++++++++ test/analysis/mem.py | 506 ---------------- test/core/types.py | 506 ++++++++++++++++ test/test_all.py | 5 +- 7 files changed, 2142 insertions(+), 2141 deletions(-) delete mode 100644 example/jitter/memstruct.py create mode 100644 example/jitter/types.py delete mode 100644 miasm2/analysis/mem.py create mode 100644 miasm2/core/types.py delete mode 100644 test/analysis/mem.py create mode 100644 test/core/types.py (limited to 'test/test_all.py') diff --git a/example/jitter/memstruct.py b/example/jitter/memstruct.py deleted file mode 100644 index 4ddbea86..00000000 --- a/example/jitter/memstruct.py +++ /dev/null @@ -1,234 +0,0 @@ -#!/usr/bin/env python -"""This script is just a short example of common usages for miasm2.analysis.mem. -For a more complete view of what is possible, tests/analysis/mem.py covers -most of the module possibilities, and the module doc gives useful information -as well. -""" - -from miasm2.analysis.machine import Machine -from miasm2.analysis.mem import MemStruct, Self, Void, Str, Array, Ptr, \ - Num, Array, set_allocator -from miasm2.os_dep.common import heap - -# Instanciate a heap -my_heap = heap() -# And set it as the default memory allocator, to avoid manual allocation and -# explicit address passing to the MemType subclasses (like MemStruct) -# constructor -set_allocator(my_heap.vm_alloc) - -# Let's reimplement a simple C generic linked list mapped on a VmMngr. - -# All the structures and methods will use the python objects but all the data -# is in fact stored in the VmMngr - -class ListNode(MemStruct): - fields = [ - # The ", ),]; creates fields that correspond to - certain bits of the field; analogous to a Union of Bits (see Bits below) - - Str: a character string, with an encoding; not directly mapped to a C - type, it is a higher level notion provided for ease of use - - Void: analogous to C void, can be a placeholder in void*-style cases. - - Self: special marker to reference a Struct inside itself (FIXME: to - remove?) - -And some less common types: - - - Bits: mask only some bits of a Num - - RawStruct: abstraction over a simple struct pack/unpack (no mapping to a - standard C type) - -For each type, the `.pinned` property returns a MemType subclass that -allows to access the field in memory. - - -The easiest way to use the API to declare and manipulate new structures is to -subclass MemStruct and define a list of (, ): - - # FIXME: "I" => "u32" - class MyStruct(MemStruct): - fields = [ - # Scalar field: just struct.pack field with one value - ("num", Num("I")), - ("flags", Num("B")), - # Ptr fields contain two fields: "val", for the numerical value, - # and "deref" to get the pointed object - ("other", Ptr("I", OtherStruct)), - # Ptr to a variable length String - ("s", Ptr("I", Str())), - ("i", Ptr("I", Num("I"))), - ] - -And access the fields: - - mstruct = MyStruct(jitter.vm, addr) - mstruct.num = 3 - assert mstruct.num == 3 - mstruct.other.val = addr2 - # Also works: - mstruct.other = addr2 - mstruct.other.deref = OtherStruct(jitter.vm, addr) - -MemUnion and MemBitField can also be subclassed, the `fields` field being -in the format expected by, respectively, Union and BitField. - -The `addr` argument can be omited if an allocator is set, in which case the -structure will be automatically allocated in memory: - - my_heap = miasm2.os_dep.common.heap() - # the allocator is a func(VmMngr) -> integer_address - set_allocator(my_heap) - -Note that some structures (e.g. MemStr or MemArray) do not have a static -size and cannot be allocated automatically. -""" - -import logging -import struct - -log = logging.getLogger(__name__) -console_handler = logging.StreamHandler() -console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) -log.addHandler(console_handler) -log.setLevel(logging.WARN) - -# ALLOCATOR is a function(vm, size) -> allocated_address -# TODO: as a MemType class attribute -ALLOCATOR = None - -# Cache for dynamically generated MemTypes -DYN_MEM_STRUCT_CACHE = {} - -def set_allocator(alloc_func): - """Set an allocator for this module; allows to instanciate statically sized - MemTypes (i.e. sizeof() is implemented) without specifying the address - (the object is allocated by @alloc_func in the vm. - - @alloc_func: func(VmMngr) -> integer_address - """ - global ALLOCATOR - ALLOCATOR = alloc_func - - -# Helpers - -def indent(s, size=4): - """Indent a string with @size spaces""" - return ' '*size + ('\n' + ' '*size).join(s.split('\n')) - - -# FIXME: copied from miasm2.os_dep.common and fixed -def get_str_ansi(vm, addr, max_char=None): - """Get a null terminated ANSI encoded string from a VmMngr. - - @vm: VmMngr instance - @max_char: max number of characters to get in memory - """ - l = 0 - tmp = addr - while ((max_char is None or l < max_char) and - vm.get_mem(tmp, 1) != "\x00"): - tmp += 1 - l += 1 - return vm.get_mem(addr, l).decode("latin1") - - -# TODO: get_raw_str_utf16 for length calculus -def get_str_utf16(vm, addr, max_char=None): - """Get a (double) null terminated utf16 little endian encoded string from - a VmMngr. This encoding is mainly used in Windows. - - FIXME: the implementation do not work with codepoints that are encoded on - more than 2 bytes in utf16. - - @vm: VmMngr instance - @max_char: max number of bytes to get in memory - """ - l = 0 - tmp = addr - # TODO: test if fetching per page rather than 2 byte per 2 byte is worth it? - while ((max_char is None or l < max_char) and - vm.get_mem(tmp, 2) != "\x00\x00"): - tmp += 2 - l += 2 - s = vm.get_mem(addr, l) - return s.decode('utf-16le') - - -def set_str_ansi(vm, addr, s): - """Encode a string to null terminated ascii/ansi and set it in a VmMngr - memory. - - @vm: VmMngr instance - @addr: start address to serialize the string to - s: the str to serialize - """ - vm.set_mem(addr, s + "\x00") - - -def set_str_utf16(vm, addr, s): - """Same as set_str_ansi with (double) null terminated utf16 encoding.""" - s = (s + '\x00').encode('utf-16le') - vm.set_mem(addr, s) - - -# Type classes - -class Type(object): - """Base class to provide methods to describe a type, as well as how to set - and get fields from virtual mem. - - Each Type subclass is linked to a MemType subclass (e.g. Struct to - MemStruct, Ptr to MemPtr, etc.). - - When nothing is specified, MemValue is used to access the type in memory. - MemValue instances have one `.val` field, setting and getting it call - the set and get of the Type. - - Subclasses can either override _pack and _unpack, or get and set if data - serialization requires more work (see Struct implementation for an example). - - TODO: move any trace of vm and addr out of these classes? - """ - - _self_type = None - - def _pack(self, val): - """Serializes the python value @val to a raw str""" - raise NotImplementedError() - - def _unpack(self, raw_str): - """Deserializes a raw str to an object representing the python value - of this field. - """ - raise NotImplementedError() - - def set(self, vm, addr, val): - """Set a VmMngr memory from a value. - - @vm: VmMngr instance - @addr: the start adress in memory to set - @val: the python value to serialize in @vm at @addr - """ - raw = self._pack(val) - vm.set_mem(addr, raw) - - def get(self, vm, addr): - """Get the python value of a field from a VmMngr memory at @addr.""" - raw = vm.get_mem(addr, self.size()) - return self._unpack(raw) - - @property - def pinned(self): - """Returns a class with a (vm, addr) constructor that allows to - interact with this type in memory. - - @return: a MemType subclass. - """ - if self in DYN_MEM_STRUCT_CACHE: - return DYN_MEM_STRUCT_CACHE[self] - pinned_type = self._build_pinned_type() - DYN_MEM_STRUCT_CACHE[self] = pinned_type - return pinned_type - - def _build_pinned_type(self): - """Builds the MemType subclass allowing to interract with this type. - - Called by self.pinned when it is not in cache. - """ - pinned_base_class = self._get_pinned_base_class() - pinned_type = type("Mem%r" % self, (pinned_base_class,), - {'_type': self}) - return pinned_type - - def _get_pinned_base_class(self): - """Return the MemType subclass that maps this type in memory""" - return MemValue - - def _get_self_type(self): - """Used for the Self trick.""" - return self._self_type - - def _set_self_type(self, self_type): - """If this field refers to MemSelf/Self, replace it with @self_type - (a MemType subclass) when using it. Generally not used outside this - module. - """ - self._self_type = self_type - - def size(self): - """Return the size in bytes of the serialized version of this field""" - raise NotImplementedError() - - def __len__(self): - return self.size() - - def __neq__(self, other): - return not self == other - - -class RawStruct(Type): - """Dumb struct.pack/unpack field. Mainly used to factorize code. - - Value is a tuple corresponding to the struct @fmt passed to the constructor. - """ - - def __init__(self, fmt): - self._fmt = fmt - - def _pack(self, fields): - return struct.pack(self._fmt, *fields) - - def _unpack(self, raw_str): - return struct.unpack(self._fmt, raw_str) - - def size(self): - return struct.calcsize(self._fmt) - - def __repr__(self): - return "%s(%s)" % (self.__class__.__name__, self._fmt) - - def __eq__(self, other): - return self.__class__ == other.__class__ and self._fmt == other._fmt - - def __hash__(self): - return hash((self.__class__, self._fmt)) - - -class Num(RawStruct): - """Represents a number (integer or float). The number is encoded with - a struct-style format which must represent only one value. - - TODO: use u32, i16, etc. for format. - """ - - def _pack(self, number): - return super(Num, self)._pack([number]) - - def _unpack(self, raw_str): - upck = super(Num, self)._unpack(raw_str) - if len(upck) != 1: - raise ValueError("Num format string unpacks to multiple values, " - "should be 1") - return upck[0] - - -class Ptr(Num): - """Special case of number of which value indicates the address of a - MemType. - - Mapped to MemPtr (see its doc for more info): - - assert isinstance(mystruct.ptr, MemPtr) - mystruct.ptr = 0x4000 # Assign the Ptr numeric value - mystruct.ptr.val = 0x4000 # Also assigns the Ptr numeric value - assert isinstance(mystruct.ptr.val, int) # Get the Ptr numeric value - mystruct.ptr.deref # Get the pointed MemType - mystruct.ptr.deref = other # Set the pointed MemType - """ - - def __init__(self, fmt, dst_type, *type_args, **type_kwargs): - """ - @fmt: (str) Num compatible format that will be the Ptr representation - in memory - @dst_type: (MemType or Type) the MemType this Ptr points to. - If a Type is given, it is transformed into a MemType with - TheType.pinned. - *type_args, **type_kwargs: arguments to pass to the the pointed - MemType when instanciating it (e.g. for MemStr encoding or - MemArray field_type). - """ - if (not isinstance(dst_type, Type) and - not (isinstance(dst_type, type) and - issubclass(dst_type, MemType)) and - not dst_type == MemSelf): - raise ValueError("dst_type of Ptr must be a MemType type, a " - "Type instance, the MemSelf marker or a class " - "name.") - super(Ptr, self).__init__(fmt) - if isinstance(dst_type, Type): - # Patch the field to propagate the MemSelf replacement - dst_type._get_self_type = lambda: self._get_self_type() - # dst_type cannot be patched here, since _get_self_type of the outer - # class has not yet been set. Patching dst_type involves calling - # dst_type.pinned, which will only return a type that does not point - # on MemSelf but on the right class only when _get_self_type of the - # outer class has been replaced by _MetaMemStruct. - # In short, dst_type = dst_type.pinned is not valid here, it is done - # lazily in _fix_dst_type - self._dst_type = dst_type - self._type_args = type_args - self._type_kwargs = type_kwargs - - def _fix_dst_type(self): - if self._dst_type == MemSelf: - if self._get_self_type() is not None: - self._dst_type = self._get_self_type() - else: - raise ValueError("Unsupported usecase for MemSelf, sorry") - if isinstance(self._dst_type, Type): - self._dst_type = self._dst_type.pinned - - @property - def dst_type(self): - """Return the type (MemType subtype) this Ptr points to.""" - self._fix_dst_type() - return self._dst_type - - def set(self, vm, addr, val): - """A Ptr field can be set with a MemPtr or an int""" - if isinstance(val, MemType) and isinstance(val.get_type(), Ptr): - self.set_val(vm, addr, val.val) - else: - super(Ptr, self).set(vm, addr, val) - - def get(self, vm, addr): - return self.pinned(vm, addr) - - def get_val(self, vm, addr): - """Get the numeric value of a Ptr""" - return super(Ptr, self).get(vm, addr) - - def set_val(self, vm, addr, val): - """Set the numeric value of a Ptr""" - return super(Ptr, self).set(vm, addr, val) - - def deref_get(self, vm, addr): - """Deserializes the data in @vm (VmMngr) at @addr to self.dst_type. - Equivalent to a pointer dereference rvalue in C. - """ - dst_addr = self.get_val(vm, addr) - return self.dst_type(vm, dst_addr, - *self._type_args, **self._type_kwargs) - - def deref_set(self, vm, addr, val): - """Serializes the @val MemType subclass instance in @vm (VmMngr) at - @addr. Equivalent to a pointer dereference assignment in C. - """ - # Sanity check - if self.dst_type != val.__class__: - log.warning("Original type was %s, overriden by value of type %s", - self._dst_type.__name__, val.__class__.__name__) - - # Actual job - dst_addr = self.get_val(vm, addr) - vm.set_mem(dst_addr, str(val)) - - def _get_pinned_base_class(self): - return MemPtr - - def __repr__(self): - return "%s(%r)" % (self.__class__.__name__, self.dst_type.get_type()) - - def __eq__(self, other): - return super(Ptr, self).__eq__(other) and \ - self.dst_type == other.dst_type and \ - self._type_args == other._type_args and \ - self._type_kwargs == other._type_kwargs - - def __hash__(self): - return hash((super(Ptr, self).__hash__(), self.dst_type, - self._type_args)) - - -class Struct(Type): - """Equivalent to a C struct type. Composed of a name, and a - (, ) list describing the fields - of the struct. - - Mapped to MemStruct. - - NOTE: The `.pinned` property of Struct creates classes on the fly. If an - equivalent structure is created by subclassing MemStruct, an exception - is raised to prevent creating multiple classes designating the same type. - - Example: - s = Struct("Toto", [("f1", Num("I")), ("f2", Num("I"))]) - - Toto1 = s.pinned - - # This raises an exception, because it describes the same structure as - # Toto1 - class Toto(MemStruct): - fields = [("f1", Num("I")), ("f2", Num("I"))] - """ - - def __init__(self, name, fields): - self.name = name - # fields is immutable - self._fields = tuple(fields) - self._gen_fields() - - def _gen_fields(self): - """Precompute useful metadata on self.fields.""" - self._fields_desc = {} - offset = 0 - for name, field in self._fields: - # For reflexion - field._set_self_type(self) - self._fields_desc[name] = {"field": field, "offset": offset} - offset += field.size() - - @property - def fields(self): - return self._fields - - def set(self, vm, addr, val): - raw = str(val) - vm.set_mem(addr, raw) - - def get(self, vm, addr): - return self.pinned(vm, addr) - - def get_field(self, vm, addr, name): - """Get a field value by @name and base structure @addr in @vm VmMngr.""" - if name not in self._fields_desc: - raise ValueError("'%s' type has no field '%s'" % (self, name)) - field = self.get_field_type(name) - offset = self.get_offset(name) - return field.get(vm, addr + offset) - - def set_field(self, vm, addr, name, val): - """Set a field value by @name and base structure @addr in @vm VmMngr. - @val is the python value corresponding to this field type. - """ - if name not in self._fields_desc: - raise AttributeError("'%s' object has no attribute '%s'" - % (self.__class__.__name__, name)) - field = self.get_field_type(name) - offset = self.get_offset(name) - field.set(vm, addr + offset, val) - - def size(self): - return sum(field.size() for _, field in self.fields) - - def get_offset(self, field_name): - """ - @field_name: (str, optional) the name of the field to get the - offset of - """ - if field_name not in self._fields_desc: - raise ValueError("This structure has no %s field" % field_name) - return self._fields_desc[field_name]['offset'] - - def get_field_type(self, name): - """Return the Type subclass instance describing field @name.""" - return self._fields_desc[name]['field'] - - def _get_pinned_base_class(self): - return MemStruct - - def __repr__(self): - return "struct %s" % self.name - - def __eq__(self, other): - return self.__class__ == other.__class__ and \ - self.fields == other.fields and \ - self.name == other.name - - def __hash__(self): - # Only hash name, not fields, because if a field is a Ptr to this - # Struct type, an infinite loop occurs - return hash((self.__class__, self.name)) - - -class Union(Struct): - """Represents a C union. - - Allows to put multiple fields at the same offset in a MemStruct, - similar to unions in C. The Union will have the size of the largest of its - fields. - - Mapped to MemUnion. - - Example: - - class Example(MemStruct): - fields = [("uni", Union([ - ("f1", Num("= self.size()): - raise IndexError("Index %s out of bounds" % idx) - - def _get_pinned_base_class(self): - if self.is_sized(): - return MemSizedArray - else: - return MemArray - - def __repr__(self): - return "%r[%s]" % (self.field_type, self.array_len or "unsized") - - def __eq__(self, other): - return self.__class__ == other.__class__ and \ - self.field_type == other.field_type and \ - self.array_len == other.array_len - - def __hash__(self): - return hash((self.__class__, self.field_type, self.array_len)) - - -class Bits(Type): - """Helper class for BitField, not very useful on its own. Represents some - bits of a Num. - - The @backing_num is used to know how to serialize/deserialize data in vm, - but getting/setting this fields only affects bits from @bit_offset to - @bit_offset + @bits. Masking and shifting is handled by the class, the aim - is to provide a transparent way to set and get some bits of a num. - """ - - def __init__(self, backing_num, bits, bit_offset): - if not isinstance(backing_num, Num): - raise ValueError("backing_num should be a Num instance") - self._num = backing_num - self._bits = bits - self._bit_offset = bit_offset - - def set(self, vm, addr, val): - val_mask = (1 << self._bits) - 1 - val_shifted = (val & val_mask) << self._bit_offset - num_size = self._num.size() * 8 - - full_num_mask = (1 << num_size) - 1 - num_mask = (~(val_mask << self._bit_offset)) & full_num_mask - - num_val = self._num.get(vm, addr) - res_val = (num_val & num_mask) | val_shifted - self._num.set(vm, addr, res_val) - - def get(self, vm, addr): - val_mask = (1 << self._bits) - 1 - num_val = self._num.get(vm, addr) - res_val = (num_val >> self._bit_offset) & val_mask - return res_val - - def size(self): - return self._num.size() - - @property - def bit_size(self): - """Number of bits read/written by this class""" - return self._bits - - @property - def bit_offset(self): - """Offset in bits (beginning at 0, the LSB) from which to read/write - bits. - """ - return self._bit_offset - - def __repr__(self): - return "%s%r(%d:%d)" % (self.__class__.__name__, self._num, - self._bit_offset, self._bit_offset + self._bits) - - def __eq__(self, other): - return self.__class__ == other.__class__ and \ - self._num == other._num and self._bits == other._bits and \ - self._bit_offset == other._bit_offset - - def __hash__(self): - return hash((self.__class__, self._num, self._bits, self._bit_offset)) - - -class BitField(Union): - """A C-like bitfield. - - Constructed with a list [(, )] and a - @backing_num. The @backing_num is a Num instance that determines the total - size of the bitfield and the way the bits are serialized/deserialized (big - endian int, little endian short...). Can be seen (and implemented) as a - Union of Bits fields. - - Mapped to MemBitField. - - Creates fields that allow to access the bitfield fields easily. Example: - - class Example(MemStruct): - fields = [("bf", BitField(Num("B"), [ - ("f1", 2), - ("f2", 4), - ("f3", 1) - ]) - )] - - ex = Example(vm, addr) - ex.memset() - ex.f2 = 2 - ex.f1 = 5 # 5 does not fit on two bits, it will be binarily truncated - assert ex.f1 == 3 - assert ex.f2 == 2 - assert ex.f3 == 0 # previously memset() - assert ex.bf == 3 + 2 << 2 - """ - - def __init__(self, backing_num, bit_list): - """@backing num: Num intance, @bit_list: [(name, n_bits)]""" - self._num = backing_num - fields = [] - offset = 0 - for name, bits in bit_list: - fields.append((name, Bits(self._num, bits, offset))) - offset += bits - if offset > self._num.size() * 8: - raise ValueError("sum of bit lengths is > to the backing num size") - super(BitField, self).__init__(fields) - - def set(self, vm, addr, val): - self._num.set(vm, addr, val) - - def _get_pinned_base_class(self): - return MemBitField - - def __eq__(self, other): - return self.__class__ == other.__class__ and \ - self._num == other._num and super(BitField, self).__eq__(other) - - def __hash__(self): - return hash((super(BitField, self).__hash__(), self._num)) - - def __repr__(self): - fields_repr = ', '.join("%s: %r" % (name, field.bit_size) - for name, field in self.fields) - return "%s(%s)" % (self.__class__.__name__, fields_repr) - - -class Str(Type): - """A string type that handles encoding. This type is unsized (no static - size). - - The @encoding is passed to the constructor, and is currently either null - terminated "ansi" (latin1) or (double) null terminated "utf16". Be aware - that the utf16 implementation is a bit buggy... - - Mapped to MemStr. - """ - - def __init__(self, encoding="ansi"): - # TODO: encoding as lambda - if encoding not in ["ansi", "utf16"]: - raise NotImplementedError("Only 'ansi' and 'utf16' are implemented") - self._enc = encoding - - def get(self, vm, addr): - """Set the string value in memory""" - if self._enc == "ansi": - get_str = get_str_ansi - elif self._enc == "utf16": - get_str = get_str_utf16 - else: - raise NotImplementedError("Only 'ansi' and 'utf16' are implemented") - return get_str(vm, addr) - - def set(self, vm, addr, s): - """Get the string value from memory""" - if self._enc == "ansi": - set_str = set_str_ansi - elif self._enc == "utf16": - set_str = set_str_utf16 - else: - raise NotImplementedError("Only 'ansi' and 'utf16' are implemented") - set_str(vm, addr, s) - - def size(self): - """This type is unsized.""" - raise ValueError("Str is unsized") - - @property - def enc(self): - """This Str's encoding name (as a str).""" - return self._enc - - def _get_pinned_base_class(self): - return MemStr - - def __repr__(self): - return "%s(%s)" % (self.__class__.__name__, self.enc) - - def __eq__(self, other): - return self.__class__ == other.__class__ and self._enc == other._enc - - def __hash__(self): - return hash((self.__class__, self._enc)) - - -class Void(Type): - """Represents the C void type. - - Mapped to MemVoid. - """ - - def _build_pinned_type(self): - return MemVoid - - def __eq__(self, other): - return self.__class__ == other.__class__ - - def __hash__(self): - return hash(self.__class__) - - -class Self(Void): - """Special marker to reference a type inside itself. - - Mapped to MemSelf. - - Example: - class ListNode(MemStruct): - fields = [ - ("next", Ptr(", ) - - instances of this class will have properties to interract with these - fields. - - Example: - class MyStruct(MemStruct): - fields = [ - # Scalar field: just struct.pack field with one value - ("num", Num("I")), - ("flags", Num("B")), - # Ptr fields contain two fields: "val", for the numerical value, - # and "deref" to get the pointed object - ("other", Ptr("I", OtherStruct)), - # Ptr to a variable length String - ("s", Ptr("I", Str())), - ("i", Ptr("I", Num("I"))), - ] - - mstruct = MyStruct(vm, addr) - - # Field assignment modifies virtual memory - mstruct.num = 3 - assert mstruct.num == 3 - memval = struct.unpack("I", vm.get_mem(mstruct.get_addr(), - 4))[0] - assert memval == mstruct.num - - # Memset sets the whole structure - mstruct.memset() - assert mstruct.num == 0 - mstruct.memset('\x11') - assert mstruct.num == 0x11111111 - - other = OtherStruct(vm, addr2) - mstruct.other = other.get_addr() - assert mstruct.other.val == other.get_addr() - assert mstruct.other.deref == other - assert mstruct.other.deref.foo == 0x1234 - - Note that: - MyStruct = Struct("MyStruct", ).pinned - is equivalent to the previous MyStruct declaration. - - See the various Type-s doc for more information. See MemStruct.gen_fields - doc for more information on how to handle recursive types and cyclic - dependencies. - """ - __metaclass__ = _MetaMemStruct - fields = None - - def get_addr(self, field_name=None): - """ - @field_name: (str, optional) the name of the field to get the - address of - """ - if field_name is not None: - offset = self._type.get_offset(field_name) - else: - offset = 0 - return self._addr + offset - - def get_field(self, name): - """Get a field value by name. - - useless most of the time since fields are accessible via self.. - """ - return self._type.get_field(self._vm, self.get_addr(), name) - - def set_field(self, name, val): - """Set a field value by name. @val is the python value corresponding to - this field type. - - useless most of the time since fields are accessible via self.. - """ - return self._type.set_field(self._vm, self.get_addr(), name, val) - - def cast_field(self, field, other_type): - """In this implementation, @field is a field name""" - if isinstance(other_type, Type): - other_type = other_type.pinned - return other_type(self._vm, self.get_addr(field)) - - # Field generation method, voluntarily public to be able to gen fields - # after class definition - @classmethod - def gen_fields(cls, fields=None): - """Generate the fields of this class (so that they can be accessed with - self.) from a @fields list, as described in the class doc. - - Useful in case of a type cyclic dependency. For example, the following - is not possible in python: - - class A(MemStruct): - fields = [("b", Ptr("I", B))] - - class B(MemStruct): - fields = [("a", Ptr("I", A))] - - With gen_fields, the following is the legal equivalent: - - class A(MemStruct): - pass - - class B(MemStruct): - fields = [("a", Ptr("I", A))] - - A.gen_fields([("b", Ptr("I", B))]) - """ - if fields is not None: - if cls.fields is not None: - raise ValueError("Cannot regen fields of a class. Setting " - "cls.fields at class definition and calling " - "gen_fields are mutually exclusive.") - cls.fields = fields - - if cls._type is None: - if cls.fields is None: - raise ValueError("Cannot create a MemStruct subclass without" - " a cls._type or a cls.fields") - cls._type = cls._gen_type(cls.fields) - - if cls._type in DYN_MEM_STRUCT_CACHE: - # FIXME: Maybe a warning would be better? - raise RuntimeError("Another MemType has the same type as this " - "one. Use it instead.") - - # Register this class so that another one will not be created when - # calling cls._type.pinned - DYN_MEM_STRUCT_CACHE[cls._type] = cls - - cls._gen_attributes() - - @classmethod - def _gen_attributes(cls): - # Generate self. getter and setters - for name, field in cls._type.fields: - setattr(cls, name, property( - lambda self, name=name: self.get_field(name), - lambda self, val, name=name: self.set_field(name, val) - )) - - @classmethod - def _gen_type(cls, fields): - return Struct(cls.__name__, fields) - - def __repr__(self): - out = [] - for name, field in self._type.fields: - val_repr = repr(self.get_field(name)) - if '\n' in val_repr: - val_repr = '\n' + indent(val_repr, 4) - out.append("%s: %r = %s" % (name, field, val_repr)) - return '%r:\n' % self.__class__ + indent('\n'.join(out), 2) - - -class MemUnion(MemStruct): - """Same as MemStruct but all fields have a 0 offset in the struct.""" - @classmethod - def _gen_type(cls, fields): - return Union(fields) - - -class MemBitField(MemUnion): - """MemUnion of Bits(...) fields.""" - @classmethod - def _gen_type(cls, fields): - return BitField(fields) - - -class MemSelf(MemStruct): - """Special Marker class for reference to current class in a Ptr or Array - (mostly Array of Ptr). See Self doc. - """ - def __repr__(self): - return self.__class__.__name__ - - -class MemVoid(MemType): - """Placeholder for e.g. Ptr to an undetermined type. Useful mostly when - casted to another type. Allows to implement C's "void*" pattern. - """ - _type = Void() - - def __repr__(self): - return self.__class__.__name__ - - -class MemPtr(MemValue): - """Mem version of a Ptr, provides two properties: - - val, to set and get the numeric value of the Ptr - - deref, to set and get the pointed type - """ - @property - def val(self): - return self._type.get_val(self._vm, self._addr) - - @val.setter - def val(self, value): - return self._type.set_val(self._vm, self._addr, value) - - @property - def deref(self): - return self._type.deref_get(self._vm, self._addr) - - @deref.setter - def deref(self, val): - return self._type.deref_set(self._vm, self._addr, val) - - def __repr__(self): - return "*%s" % hex(self.val) - - -class MemStr(MemValue): - """Implements a string representation in memory. - - The string value can be got or set (with python str/unicode) through the - self.val attribute. String encoding/decoding is handled by the class, - - This type is dynamically sized only (get_size is implemented, not sizeof). - """ - - def get_size(self): - """This get_size implementation is quite unsafe: it reads the string - underneath to determine the size, it may therefore read a lot of memory - and provoke mem faults (analogous to strlen). - """ - val = self.val - if self.get_type().enc == "ansi": - return len(val) + 1 - elif self.get_type().enc == "utf16": - # FIXME: real encoding... - return len(val) * 2 + 2 - else: - raise NotImplementedError("Only 'ansi' and 'utf16' are implemented") - - def raw(self): - raw = self._vm.get_mem(self.get_addr(), self.get_size()) - return raw - - def __repr__(self): - return "%r: %r" % (self.__class__, self.val) - - -class MemArray(MemType): - """An unsized array of type @field_type (a Type subclass instance). - This class has no static or dynamic size. - - It can be indexed for setting and getting elements, example: - - array = Array(Num("I")).pinned(vm, addr)) - array[2] = 5 - array[4:8] = [0, 1, 2, 3] - print array[20] - """ - - @property - def field_type(self): - """Return the Type subclass instance that represents the type of - this MemArray items. - """ - return self.get_type().field_type - - def get_addr(self, idx=0): - return self._addr + self.get_type().get_offset(idx) - - def __getitem__(self, idx): - return self.get_type().get_item(self._vm, self._addr, idx) - - def __setitem__(self, idx, item): - self.get_type().set_item(self._vm, self._addr, idx, item) - - def raw(self): - raise ValueError("%s is unsized, which prevents from getting its full " - "raw representation. Use MemSizedArray instead." % - self.__class__) - - def __repr__(self): - return "[%r, ...] [%r]" % (self[0], self.field_type) - - -class MemSizedArray(MemArray): - """A fixed size MemArray. - - This type is dynamically sized. Generate a fixed @field_type and @array_len - array which has a static size by using Array(type, size).pinned. - """ - - @property - def array_len(self): - """The length, in number of elements, of this array.""" - return self.get_type().array_len - - def get_size(self): - return self.get_type().size() - - def __iter__(self): - for i in xrange(self.get_type().array_len): - yield self[i] - - def raw(self): - return self._vm.get_mem(self.get_addr(), self.get_size()) - - def __repr__(self): - item_reprs = [repr(item) for item in self] - if self.array_len > 0 and '\n' in item_reprs[0]: - items = '\n' + indent(',\n'.join(item_reprs), 2) + '\n' - else: - items = ', '.join(item_reprs) - return "[%s] [%r; %s]" % (items, self.field_type, self.array_len) - diff --git a/miasm2/core/types.py b/miasm2/core/types.py new file mode 100644 index 00000000..3c8d5b8b --- /dev/null +++ b/miasm2/core/types.py @@ -0,0 +1,1399 @@ +"""This module provides classes to manipulate C structures backed by a VmMngr +object (a miasm sandbox virtual memory). + +It provides two families of classes, Type-s (Num, Ptr, Str...) and their +associated MemType-s. A Type subclass instance represents a fully defined C +type. A MemType subclass instance represents a C LValue (or variable): it is +a type attached to the memory. Available types are: + + - Num: for number (float or int) handling + - Ptr: a pointer to another Type + - Struct: equivalent to a C struct definition + - Union: similar to union in C, list of Types at the same offset in a + structure; the union has the size of the biggest Type (~ Struct with all + the fields at offset 0) + - Array: an array of items of the same type; can have a fixed size or + not (e.g. char[3] vs char* used as an array in C) + - BitField: similar to C bitfields, a list of + [(, ),]; creates fields that correspond to + certain bits of the field; analogous to a Union of Bits (see Bits below) + - Str: a character string, with an encoding; not directly mapped to a C + type, it is a higher level notion provided for ease of use + - Void: analogous to C void, can be a placeholder in void*-style cases. + - Self: special marker to reference a Struct inside itself (FIXME: to + remove?) + +And some less common types: + + - Bits: mask only some bits of a Num + - RawStruct: abstraction over a simple struct pack/unpack (no mapping to a + standard C type) + +For each type, the `.pinned` property returns a MemType subclass that +allows to access the field in memory. + + +The easiest way to use the API to declare and manipulate new structures is to +subclass MemStruct and define a list of (, ): + + # FIXME: "I" => "u32" + class MyStruct(MemStruct): + fields = [ + # Scalar field: just struct.pack field with one value + ("num", Num("I")), + ("flags", Num("B")), + # Ptr fields contain two fields: "val", for the numerical value, + # and "deref" to get the pointed object + ("other", Ptr("I", OtherStruct)), + # Ptr to a variable length String + ("s", Ptr("I", Str())), + ("i", Ptr("I", Num("I"))), + ] + +And access the fields: + + mstruct = MyStruct(jitter.vm, addr) + mstruct.num = 3 + assert mstruct.num == 3 + mstruct.other.val = addr2 + # Also works: + mstruct.other = addr2 + mstruct.other.deref = OtherStruct(jitter.vm, addr) + +MemUnion and MemBitField can also be subclassed, the `fields` field being +in the format expected by, respectively, Union and BitField. + +The `addr` argument can be omited if an allocator is set, in which case the +structure will be automatically allocated in memory: + + my_heap = miasm2.os_dep.common.heap() + # the allocator is a func(VmMngr) -> integer_address + set_allocator(my_heap) + +Note that some structures (e.g. MemStr or MemArray) do not have a static +size and cannot be allocated automatically. +""" + +import logging +import struct + +log = logging.getLogger(__name__) +console_handler = logging.StreamHandler() +console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s")) +log.addHandler(console_handler) +log.setLevel(logging.WARN) + +# ALLOCATOR is a function(vm, size) -> allocated_address +# TODO: as a MemType class attribute +ALLOCATOR = None + +# Cache for dynamically generated MemTypes +DYN_MEM_STRUCT_CACHE = {} + +def set_allocator(alloc_func): + """Set an allocator for this module; allows to instanciate statically sized + MemTypes (i.e. sizeof() is implemented) without specifying the address + (the object is allocated by @alloc_func in the vm. + + @alloc_func: func(VmMngr) -> integer_address + """ + global ALLOCATOR + ALLOCATOR = alloc_func + + +# Helpers + +def indent(s, size=4): + """Indent a string with @size spaces""" + return ' '*size + ('\n' + ' '*size).join(s.split('\n')) + + +# FIXME: copied from miasm2.os_dep.common and fixed +def get_str_ansi(vm, addr, max_char=None): + """Get a null terminated ANSI encoded string from a VmMngr. + + @vm: VmMngr instance + @max_char: max number of characters to get in memory + """ + l = 0 + tmp = addr + while ((max_char is None or l < max_char) and + vm.get_mem(tmp, 1) != "\x00"): + tmp += 1 + l += 1 + return vm.get_mem(addr, l).decode("latin1") + + +# TODO: get_raw_str_utf16 for length calculus +def get_str_utf16(vm, addr, max_char=None): + """Get a (double) null terminated utf16 little endian encoded string from + a VmMngr. This encoding is mainly used in Windows. + + FIXME: the implementation do not work with codepoints that are encoded on + more than 2 bytes in utf16. + + @vm: VmMngr instance + @max_char: max number of bytes to get in memory + """ + l = 0 + tmp = addr + # TODO: test if fetching per page rather than 2 byte per 2 byte is worth it? + while ((max_char is None or l < max_char) and + vm.get_mem(tmp, 2) != "\x00\x00"): + tmp += 2 + l += 2 + s = vm.get_mem(addr, l) + return s.decode('utf-16le') + + +def set_str_ansi(vm, addr, s): + """Encode a string to null terminated ascii/ansi and set it in a VmMngr + memory. + + @vm: VmMngr instance + @addr: start address to serialize the string to + s: the str to serialize + """ + vm.set_mem(addr, s + "\x00") + + +def set_str_utf16(vm, addr, s): + """Same as set_str_ansi with (double) null terminated utf16 encoding.""" + s = (s + '\x00').encode('utf-16le') + vm.set_mem(addr, s) + + +# Type classes + +class Type(object): + """Base class to provide methods to describe a type, as well as how to set + and get fields from virtual mem. + + Each Type subclass is linked to a MemType subclass (e.g. Struct to + MemStruct, Ptr to MemPtr, etc.). + + When nothing is specified, MemValue is used to access the type in memory. + MemValue instances have one `.val` field, setting and getting it call + the set and get of the Type. + + Subclasses can either override _pack and _unpack, or get and set if data + serialization requires more work (see Struct implementation for an example). + + TODO: move any trace of vm and addr out of these classes? + """ + + _self_type = None + + def _pack(self, val): + """Serializes the python value @val to a raw str""" + raise NotImplementedError() + + def _unpack(self, raw_str): + """Deserializes a raw str to an object representing the python value + of this field. + """ + raise NotImplementedError() + + def set(self, vm, addr, val): + """Set a VmMngr memory from a value. + + @vm: VmMngr instance + @addr: the start adress in memory to set + @val: the python value to serialize in @vm at @addr + """ + raw = self._pack(val) + vm.set_mem(addr, raw) + + def get(self, vm, addr): + """Get the python value of a field from a VmMngr memory at @addr.""" + raw = vm.get_mem(addr, self.size()) + return self._unpack(raw) + + @property + def pinned(self): + """Returns a class with a (vm, addr) constructor that allows to + interact with this type in memory. + + @return: a MemType subclass. + """ + if self in DYN_MEM_STRUCT_CACHE: + return DYN_MEM_STRUCT_CACHE[self] + pinned_type = self._build_pinned_type() + DYN_MEM_STRUCT_CACHE[self] = pinned_type + return pinned_type + + def _build_pinned_type(self): + """Builds the MemType subclass allowing to interract with this type. + + Called by self.pinned when it is not in cache. + """ + pinned_base_class = self._get_pinned_base_class() + pinned_type = type("Mem%r" % self, (pinned_base_class,), + {'_type': self}) + return pinned_type + + def _get_pinned_base_class(self): + """Return the MemType subclass that maps this type in memory""" + return MemValue + + def _get_self_type(self): + """Used for the Self trick.""" + return self._self_type + + def _set_self_type(self, self_type): + """If this field refers to MemSelf/Self, replace it with @self_type + (a MemType subclass) when using it. Generally not used outside this + module. + """ + self._self_type = self_type + + def size(self): + """Return the size in bytes of the serialized version of this field""" + raise NotImplementedError() + + def __len__(self): + return self.size() + + def __neq__(self, other): + return not self == other + + +class RawStruct(Type): + """Dumb struct.pack/unpack field. Mainly used to factorize code. + + Value is a tuple corresponding to the struct @fmt passed to the constructor. + """ + + def __init__(self, fmt): + self._fmt = fmt + + def _pack(self, fields): + return struct.pack(self._fmt, *fields) + + def _unpack(self, raw_str): + return struct.unpack(self._fmt, raw_str) + + def size(self): + return struct.calcsize(self._fmt) + + def __repr__(self): + return "%s(%s)" % (self.__class__.__name__, self._fmt) + + def __eq__(self, other): + return self.__class__ == other.__class__ and self._fmt == other._fmt + + def __hash__(self): + return hash((self.__class__, self._fmt)) + + +class Num(RawStruct): + """Represents a number (integer or float). The number is encoded with + a struct-style format which must represent only one value. + + TODO: use u32, i16, etc. for format. + """ + + def _pack(self, number): + return super(Num, self)._pack([number]) + + def _unpack(self, raw_str): + upck = super(Num, self)._unpack(raw_str) + if len(upck) != 1: + raise ValueError("Num format string unpacks to multiple values, " + "should be 1") + return upck[0] + + +class Ptr(Num): + """Special case of number of which value indicates the address of a + MemType. + + Mapped to MemPtr (see its doc for more info): + + assert isinstance(mystruct.ptr, MemPtr) + mystruct.ptr = 0x4000 # Assign the Ptr numeric value + mystruct.ptr.val = 0x4000 # Also assigns the Ptr numeric value + assert isinstance(mystruct.ptr.val, int) # Get the Ptr numeric value + mystruct.ptr.deref # Get the pointed MemType + mystruct.ptr.deref = other # Set the pointed MemType + """ + + def __init__(self, fmt, dst_type, *type_args, **type_kwargs): + """ + @fmt: (str) Num compatible format that will be the Ptr representation + in memory + @dst_type: (MemType or Type) the MemType this Ptr points to. + If a Type is given, it is transformed into a MemType with + TheType.pinned. + *type_args, **type_kwargs: arguments to pass to the the pointed + MemType when instanciating it (e.g. for MemStr encoding or + MemArray field_type). + """ + if (not isinstance(dst_type, Type) and + not (isinstance(dst_type, type) and + issubclass(dst_type, MemType)) and + not dst_type == MemSelf): + raise ValueError("dst_type of Ptr must be a MemType type, a " + "Type instance, the MemSelf marker or a class " + "name.") + super(Ptr, self).__init__(fmt) + if isinstance(dst_type, Type): + # Patch the field to propagate the MemSelf replacement + dst_type._get_self_type = lambda: self._get_self_type() + # dst_type cannot be patched here, since _get_self_type of the outer + # class has not yet been set. Patching dst_type involves calling + # dst_type.pinned, which will only return a type that does not point + # on MemSelf but on the right class only when _get_self_type of the + # outer class has been replaced by _MetaMemStruct. + # In short, dst_type = dst_type.pinned is not valid here, it is done + # lazily in _fix_dst_type + self._dst_type = dst_type + self._type_args = type_args + self._type_kwargs = type_kwargs + + def _fix_dst_type(self): + if self._dst_type == MemSelf: + if self._get_self_type() is not None: + self._dst_type = self._get_self_type() + else: + raise ValueError("Unsupported usecase for MemSelf, sorry") + if isinstance(self._dst_type, Type): + self._dst_type = self._dst_type.pinned + + @property + def dst_type(self): + """Return the type (MemType subtype) this Ptr points to.""" + self._fix_dst_type() + return self._dst_type + + def set(self, vm, addr, val): + """A Ptr field can be set with a MemPtr or an int""" + if isinstance(val, MemType) and isinstance(val.get_type(), Ptr): + self.set_val(vm, addr, val.val) + else: + super(Ptr, self).set(vm, addr, val) + + def get(self, vm, addr): + return self.pinned(vm, addr) + + def get_val(self, vm, addr): + """Get the numeric value of a Ptr""" + return super(Ptr, self).get(vm, addr) + + def set_val(self, vm, addr, val): + """Set the numeric value of a Ptr""" + return super(Ptr, self).set(vm, addr, val) + + def deref_get(self, vm, addr): + """Deserializes the data in @vm (VmMngr) at @addr to self.dst_type. + Equivalent to a pointer dereference rvalue in C. + """ + dst_addr = self.get_val(vm, addr) + return self.dst_type(vm, dst_addr, + *self._type_args, **self._type_kwargs) + + def deref_set(self, vm, addr, val): + """Serializes the @val MemType subclass instance in @vm (VmMngr) at + @addr. Equivalent to a pointer dereference assignment in C. + """ + # Sanity check + if self.dst_type != val.__class__: + log.warning("Original type was %s, overriden by value of type %s", + self._dst_type.__name__, val.__class__.__name__) + + # Actual job + dst_addr = self.get_val(vm, addr) + vm.set_mem(dst_addr, str(val)) + + def _get_pinned_base_class(self): + return MemPtr + + def __repr__(self): + return "%s(%r)" % (self.__class__.__name__, self.dst_type.get_type()) + + def __eq__(self, other): + return super(Ptr, self).__eq__(other) and \ + self.dst_type == other.dst_type and \ + self._type_args == other._type_args and \ + self._type_kwargs == other._type_kwargs + + def __hash__(self): + return hash((super(Ptr, self).__hash__(), self.dst_type, + self._type_args)) + + +class Struct(Type): + """Equivalent to a C struct type. Composed of a name, and a + (, ) list describing the fields + of the struct. + + Mapped to MemStruct. + + NOTE: The `.pinned` property of Struct creates classes on the fly. If an + equivalent structure is created by subclassing MemStruct, an exception + is raised to prevent creating multiple classes designating the same type. + + Example: + s = Struct("Toto", [("f1", Num("I")), ("f2", Num("I"))]) + + Toto1 = s.pinned + + # This raises an exception, because it describes the same structure as + # Toto1 + class Toto(MemStruct): + fields = [("f1", Num("I")), ("f2", Num("I"))] + """ + + def __init__(self, name, fields): + self.name = name + # fields is immutable + self._fields = tuple(fields) + self._gen_fields() + + def _gen_fields(self): + """Precompute useful metadata on self.fields.""" + self._fields_desc = {} + offset = 0 + for name, field in self._fields: + # For reflexion + field._set_self_type(self) + self._fields_desc[name] = {"field": field, "offset": offset} + offset += field.size() + + @property + def fields(self): + return self._fields + + def set(self, vm, addr, val): + raw = str(val) + vm.set_mem(addr, raw) + + def get(self, vm, addr): + return self.pinned(vm, addr) + + def get_field(self, vm, addr, name): + """Get a field value by @name and base structure @addr in @vm VmMngr.""" + if name not in self._fields_desc: + raise ValueError("'%s' type has no field '%s'" % (self, name)) + field = self.get_field_type(name) + offset = self.get_offset(name) + return field.get(vm, addr + offset) + + def set_field(self, vm, addr, name, val): + """Set a field value by @name and base structure @addr in @vm VmMngr. + @val is the python value corresponding to this field type. + """ + if name not in self._fields_desc: + raise AttributeError("'%s' object has no attribute '%s'" + % (self.__class__.__name__, name)) + field = self.get_field_type(name) + offset = self.get_offset(name) + field.set(vm, addr + offset, val) + + def size(self): + return sum(field.size() for _, field in self.fields) + + def get_offset(self, field_name): + """ + @field_name: (str, optional) the name of the field to get the + offset of + """ + if field_name not in self._fields_desc: + raise ValueError("This structure has no %s field" % field_name) + return self._fields_desc[field_name]['offset'] + + def get_field_type(self, name): + """Return the Type subclass instance describing field @name.""" + return self._fields_desc[name]['field'] + + def _get_pinned_base_class(self): + return MemStruct + + def __repr__(self): + return "struct %s" % self.name + + def __eq__(self, other): + return self.__class__ == other.__class__ and \ + self.fields == other.fields and \ + self.name == other.name + + def __hash__(self): + # Only hash name, not fields, because if a field is a Ptr to this + # Struct type, an infinite loop occurs + return hash((self.__class__, self.name)) + + +class Union(Struct): + """Represents a C union. + + Allows to put multiple fields at the same offset in a MemStruct, + similar to unions in C. The Union will have the size of the largest of its + fields. + + Mapped to MemUnion. + + Example: + + class Example(MemStruct): + fields = [("uni", Union([ + ("f1", Num("= self.size()): + raise IndexError("Index %s out of bounds" % idx) + + def _get_pinned_base_class(self): + if self.is_sized(): + return MemSizedArray + else: + return MemArray + + def __repr__(self): + return "%r[%s]" % (self.field_type, self.array_len or "unsized") + + def __eq__(self, other): + return self.__class__ == other.__class__ and \ + self.field_type == other.field_type and \ + self.array_len == other.array_len + + def __hash__(self): + return hash((self.__class__, self.field_type, self.array_len)) + + +class Bits(Type): + """Helper class for BitField, not very useful on its own. Represents some + bits of a Num. + + The @backing_num is used to know how to serialize/deserialize data in vm, + but getting/setting this fields only affects bits from @bit_offset to + @bit_offset + @bits. Masking and shifting is handled by the class, the aim + is to provide a transparent way to set and get some bits of a num. + """ + + def __init__(self, backing_num, bits, bit_offset): + if not isinstance(backing_num, Num): + raise ValueError("backing_num should be a Num instance") + self._num = backing_num + self._bits = bits + self._bit_offset = bit_offset + + def set(self, vm, addr, val): + val_mask = (1 << self._bits) - 1 + val_shifted = (val & val_mask) << self._bit_offset + num_size = self._num.size() * 8 + + full_num_mask = (1 << num_size) - 1 + num_mask = (~(val_mask << self._bit_offset)) & full_num_mask + + num_val = self._num.get(vm, addr) + res_val = (num_val & num_mask) | val_shifted + self._num.set(vm, addr, res_val) + + def get(self, vm, addr): + val_mask = (1 << self._bits) - 1 + num_val = self._num.get(vm, addr) + res_val = (num_val >> self._bit_offset) & val_mask + return res_val + + def size(self): + return self._num.size() + + @property + def bit_size(self): + """Number of bits read/written by this class""" + return self._bits + + @property + def bit_offset(self): + """Offset in bits (beginning at 0, the LSB) from which to read/write + bits. + """ + return self._bit_offset + + def __repr__(self): + return "%s%r(%d:%d)" % (self.__class__.__name__, self._num, + self._bit_offset, self._bit_offset + self._bits) + + def __eq__(self, other): + return self.__class__ == other.__class__ and \ + self._num == other._num and self._bits == other._bits and \ + self._bit_offset == other._bit_offset + + def __hash__(self): + return hash((self.__class__, self._num, self._bits, self._bit_offset)) + + +class BitField(Union): + """A C-like bitfield. + + Constructed with a list [(, )] and a + @backing_num. The @backing_num is a Num instance that determines the total + size of the bitfield and the way the bits are serialized/deserialized (big + endian int, little endian short...). Can be seen (and implemented) as a + Union of Bits fields. + + Mapped to MemBitField. + + Creates fields that allow to access the bitfield fields easily. Example: + + class Example(MemStruct): + fields = [("bf", BitField(Num("B"), [ + ("f1", 2), + ("f2", 4), + ("f3", 1) + ]) + )] + + ex = Example(vm, addr) + ex.memset() + ex.f2 = 2 + ex.f1 = 5 # 5 does not fit on two bits, it will be binarily truncated + assert ex.f1 == 3 + assert ex.f2 == 2 + assert ex.f3 == 0 # previously memset() + assert ex.bf == 3 + 2 << 2 + """ + + def __init__(self, backing_num, bit_list): + """@backing num: Num intance, @bit_list: [(name, n_bits)]""" + self._num = backing_num + fields = [] + offset = 0 + for name, bits in bit_list: + fields.append((name, Bits(self._num, bits, offset))) + offset += bits + if offset > self._num.size() * 8: + raise ValueError("sum of bit lengths is > to the backing num size") + super(BitField, self).__init__(fields) + + def set(self, vm, addr, val): + self._num.set(vm, addr, val) + + def _get_pinned_base_class(self): + return MemBitField + + def __eq__(self, other): + return self.__class__ == other.__class__ and \ + self._num == other._num and super(BitField, self).__eq__(other) + + def __hash__(self): + return hash((super(BitField, self).__hash__(), self._num)) + + def __repr__(self): + fields_repr = ', '.join("%s: %r" % (name, field.bit_size) + for name, field in self.fields) + return "%s(%s)" % (self.__class__.__name__, fields_repr) + + +class Str(Type): + """A string type that handles encoding. This type is unsized (no static + size). + + The @encoding is passed to the constructor, and is currently either null + terminated "ansi" (latin1) or (double) null terminated "utf16". Be aware + that the utf16 implementation is a bit buggy... + + Mapped to MemStr. + """ + + def __init__(self, encoding="ansi"): + # TODO: encoding as lambda + if encoding not in ["ansi", "utf16"]: + raise NotImplementedError("Only 'ansi' and 'utf16' are implemented") + self._enc = encoding + + def get(self, vm, addr): + """Set the string value in memory""" + if self._enc == "ansi": + get_str = get_str_ansi + elif self._enc == "utf16": + get_str = get_str_utf16 + else: + raise NotImplementedError("Only 'ansi' and 'utf16' are implemented") + return get_str(vm, addr) + + def set(self, vm, addr, s): + """Get the string value from memory""" + if self._enc == "ansi": + set_str = set_str_ansi + elif self._enc == "utf16": + set_str = set_str_utf16 + else: + raise NotImplementedError("Only 'ansi' and 'utf16' are implemented") + set_str(vm, addr, s) + + def size(self): + """This type is unsized.""" + raise ValueError("Str is unsized") + + @property + def enc(self): + """This Str's encoding name (as a str).""" + return self._enc + + def _get_pinned_base_class(self): + return MemStr + + def __repr__(self): + return "%s(%s)" % (self.__class__.__name__, self.enc) + + def __eq__(self, other): + return self.__class__ == other.__class__ and self._enc == other._enc + + def __hash__(self): + return hash((self.__class__, self._enc)) + + +class Void(Type): + """Represents the C void type. + + Mapped to MemVoid. + """ + + def _build_pinned_type(self): + return MemVoid + + def __eq__(self, other): + return self.__class__ == other.__class__ + + def __hash__(self): + return hash(self.__class__) + + +class Self(Void): + """Special marker to reference a type inside itself. + + Mapped to MemSelf. + + Example: + class ListNode(MemStruct): + fields = [ + ("next", Ptr(", ) + - instances of this class will have properties to interract with these + fields. + + Example: + class MyStruct(MemStruct): + fields = [ + # Scalar field: just struct.pack field with one value + ("num", Num("I")), + ("flags", Num("B")), + # Ptr fields contain two fields: "val", for the numerical value, + # and "deref" to get the pointed object + ("other", Ptr("I", OtherStruct)), + # Ptr to a variable length String + ("s", Ptr("I", Str())), + ("i", Ptr("I", Num("I"))), + ] + + mstruct = MyStruct(vm, addr) + + # Field assignment modifies virtual memory + mstruct.num = 3 + assert mstruct.num == 3 + memval = struct.unpack("I", vm.get_mem(mstruct.get_addr(), + 4))[0] + assert memval == mstruct.num + + # Memset sets the whole structure + mstruct.memset() + assert mstruct.num == 0 + mstruct.memset('\x11') + assert mstruct.num == 0x11111111 + + other = OtherStruct(vm, addr2) + mstruct.other = other.get_addr() + assert mstruct.other.val == other.get_addr() + assert mstruct.other.deref == other + assert mstruct.other.deref.foo == 0x1234 + + Note that: + MyStruct = Struct("MyStruct", ).pinned + is equivalent to the previous MyStruct declaration. + + See the various Type-s doc for more information. See MemStruct.gen_fields + doc for more information on how to handle recursive types and cyclic + dependencies. + """ + __metaclass__ = _MetaMemStruct + fields = None + + def get_addr(self, field_name=None): + """ + @field_name: (str, optional) the name of the field to get the + address of + """ + if field_name is not None: + offset = self._type.get_offset(field_name) + else: + offset = 0 + return self._addr + offset + + def get_field(self, name): + """Get a field value by name. + + useless most of the time since fields are accessible via self.. + """ + return self._type.get_field(self._vm, self.get_addr(), name) + + def set_field(self, name, val): + """Set a field value by name. @val is the python value corresponding to + this field type. + + useless most of the time since fields are accessible via self.. + """ + return self._type.set_field(self._vm, self.get_addr(), name, val) + + def cast_field(self, field, other_type): + """In this implementation, @field is a field name""" + if isinstance(other_type, Type): + other_type = other_type.pinned + return other_type(self._vm, self.get_addr(field)) + + # Field generation method, voluntarily public to be able to gen fields + # after class definition + @classmethod + def gen_fields(cls, fields=None): + """Generate the fields of this class (so that they can be accessed with + self.) from a @fields list, as described in the class doc. + + Useful in case of a type cyclic dependency. For example, the following + is not possible in python: + + class A(MemStruct): + fields = [("b", Ptr("I", B))] + + class B(MemStruct): + fields = [("a", Ptr("I", A))] + + With gen_fields, the following is the legal equivalent: + + class A(MemStruct): + pass + + class B(MemStruct): + fields = [("a", Ptr("I", A))] + + A.gen_fields([("b", Ptr("I", B))]) + """ + if fields is not None: + if cls.fields is not None: + raise ValueError("Cannot regen fields of a class. Setting " + "cls.fields at class definition and calling " + "gen_fields are mutually exclusive.") + cls.fields = fields + + if cls._type is None: + if cls.fields is None: + raise ValueError("Cannot create a MemStruct subclass without" + " a cls._type or a cls.fields") + cls._type = cls._gen_type(cls.fields) + + if cls._type in DYN_MEM_STRUCT_CACHE: + # FIXME: Maybe a warning would be better? + raise RuntimeError("Another MemType has the same type as this " + "one. Use it instead.") + + # Register this class so that another one will not be created when + # calling cls._type.pinned + DYN_MEM_STRUCT_CACHE[cls._type] = cls + + cls._gen_attributes() + + @classmethod + def _gen_attributes(cls): + # Generate self. getter and setters + for name, field in cls._type.fields: + setattr(cls, name, property( + lambda self, name=name: self.get_field(name), + lambda self, val, name=name: self.set_field(name, val) + )) + + @classmethod + def _gen_type(cls, fields): + return Struct(cls.__name__, fields) + + def __repr__(self): + out = [] + for name, field in self._type.fields: + val_repr = repr(self.get_field(name)) + if '\n' in val_repr: + val_repr = '\n' + indent(val_repr, 4) + out.append("%s: %r = %s" % (name, field, val_repr)) + return '%r:\n' % self.__class__ + indent('\n'.join(out), 2) + + +class MemUnion(MemStruct): + """Same as MemStruct but all fields have a 0 offset in the struct.""" + @classmethod + def _gen_type(cls, fields): + return Union(fields) + + +class MemBitField(MemUnion): + """MemUnion of Bits(...) fields.""" + @classmethod + def _gen_type(cls, fields): + return BitField(fields) + + +class MemSelf(MemStruct): + """Special Marker class for reference to current class in a Ptr or Array + (mostly Array of Ptr). See Self doc. + """ + def __repr__(self): + return self.__class__.__name__ + + +class MemVoid(MemType): + """Placeholder for e.g. Ptr to an undetermined type. Useful mostly when + casted to another type. Allows to implement C's "void*" pattern. + """ + _type = Void() + + def __repr__(self): + return self.__class__.__name__ + + +class MemPtr(MemValue): + """Mem version of a Ptr, provides two properties: + - val, to set and get the numeric value of the Ptr + - deref, to set and get the pointed type + """ + @property + def val(self): + return self._type.get_val(self._vm, self._addr) + + @val.setter + def val(self, value): + return self._type.set_val(self._vm, self._addr, value) + + @property + def deref(self): + return self._type.deref_get(self._vm, self._addr) + + @deref.setter + def deref(self, val): + return self._type.deref_set(self._vm, self._addr, val) + + def __repr__(self): + return "*%s" % hex(self.val) + + +class MemStr(MemValue): + """Implements a string representation in memory. + + The string value can be got or set (with python str/unicode) through the + self.val attribute. String encoding/decoding is handled by the class, + + This type is dynamically sized only (get_size is implemented, not sizeof). + """ + + def get_size(self): + """This get_size implementation is quite unsafe: it reads the string + underneath to determine the size, it may therefore read a lot of memory + and provoke mem faults (analogous to strlen). + """ + val = self.val + if self.get_type().enc == "ansi": + return len(val) + 1 + elif self.get_type().enc == "utf16": + # FIXME: real encoding... + return len(val) * 2 + 2 + else: + raise NotImplementedError("Only 'ansi' and 'utf16' are implemented") + + def raw(self): + raw = self._vm.get_mem(self.get_addr(), self.get_size()) + return raw + + def __repr__(self): + return "%r: %r" % (self.__class__, self.val) + + +class MemArray(MemType): + """An unsized array of type @field_type (a Type subclass instance). + This class has no static or dynamic size. + + It can be indexed for setting and getting elements, example: + + array = Array(Num("I")).pinned(vm, addr)) + array[2] = 5 + array[4:8] = [0, 1, 2, 3] + print array[20] + """ + + @property + def field_type(self): + """Return the Type subclass instance that represents the type of + this MemArray items. + """ + return self.get_type().field_type + + def get_addr(self, idx=0): + return self._addr + self.get_type().get_offset(idx) + + def __getitem__(self, idx): + return self.get_type().get_item(self._vm, self._addr, idx) + + def __setitem__(self, idx, item): + self.get_type().set_item(self._vm, self._addr, idx, item) + + def raw(self): + raise ValueError("%s is unsized, which prevents from getting its full " + "raw representation. Use MemSizedArray instead." % + self.__class__) + + def __repr__(self): + return "[%r, ...] [%r]" % (self[0], self.field_type) + + +class MemSizedArray(MemArray): + """A fixed size MemArray. + + This type is dynamically sized. Generate a fixed @field_type and @array_len + array which has a static size by using Array(type, size).pinned. + """ + + @property + def array_len(self): + """The length, in number of elements, of this array.""" + return self.get_type().array_len + + def get_size(self): + return self.get_type().size() + + def __iter__(self): + for i in xrange(self.get_type().array_len): + yield self[i] + + def raw(self): + return self._vm.get_mem(self.get_addr(), self.get_size()) + + def __repr__(self): + item_reprs = [repr(item) for item in self] + if self.array_len > 0 and '\n' in item_reprs[0]: + items = '\n' + indent(',\n'.join(item_reprs), 2) + '\n' + else: + items = ', '.join(item_reprs) + return "[%s] [%r; %s]" % (items, self.field_type, self.array_len) + diff --git a/test/analysis/mem.py b/test/analysis/mem.py deleted file mode 100644 index 6c7fc9e3..00000000 --- a/test/analysis/mem.py +++ /dev/null @@ -1,506 +0,0 @@ -#!/usr/bin/env python - -# miasm2.analysis.mem tests - -import struct - -from miasm2.analysis.machine import Machine -from miasm2.analysis.mem import MemStruct, Num, Ptr, Str, \ - Array, RawStruct, Union, \ - BitField, Self, Void, Bits, \ - set_allocator, MemUnion, Struct -from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE -from miasm2.os_dep.common import heap - -# Two structures with some fields -class OtherStruct(MemStruct): - fields = [ - ("foo", Num("H")), - ] - -class MyStruct(MemStruct): - fields = [ - # Number field: just struct.pack fields with one value - ("num", Num("I")), - ("flags", Num("B")), - # This field is a pointer to another struct, it has a numeric - # value (mystruct.other.val) and can be dereferenced to get an - # OtherStruct instance (mystruct.other.deref) - ("other", Ptr("I", OtherStruct)), - # Ptr to a variable length String - ("s", Ptr("I", Str())), - ("i", Ptr("I", Num("I"))), - ] - -jitter = Machine("x86_32").jitter("python") -jitter.init_stack() -addr = 0x1000 -size = 0x1000 -addr_str = 0x1100 -addr_str2 = 0x1200 -addr_str3 = 0x1300 -# Initialize all mem with 0xaa -jitter.vm.add_memory_page(addr, PAGE_READ | PAGE_WRITE, "\xaa"*size) - - -# MemStruct tests -## Creation -# Use manual allocation with explicit addr for the first example -mstruct = MyStruct(jitter.vm, addr) -## Fields are read from the virtual memory -assert mstruct.num == 0xaaaaaaaa -assert mstruct.flags == 0xaa - -## Field assignment modifies virtual memory -mstruct.num = 3 -assert mstruct.num == 3 -memval = struct.unpack("I", jitter.vm.get_mem(mstruct.get_addr(), 4))[0] -assert memval == 3 - -## Memset sets the whole structure -mstruct.memset() -assert mstruct.num == 0 -assert mstruct.flags == 0 -assert mstruct.other.val == 0 -assert mstruct.s.val == 0 -assert mstruct.i.val == 0 -mstruct.memset('\x11') -assert mstruct.num == 0x11111111 -assert mstruct.flags == 0x11 -assert mstruct.other.val == 0x11111111 -assert mstruct.s.val == 0x11111111 -assert mstruct.i.val == 0x11111111 - - -# From now, just use heap.vm_alloc -my_heap = heap() -set_allocator(my_heap.vm_alloc) - - -# Ptr tests -## Setup for Ptr tests -# the addr field can now be omited since allocator is set -other = OtherStruct(jitter.vm) -other.foo = 0x1234 -assert other.foo == 0x1234 - -## Basic usage -mstruct.other.val = other.get_addr() -# This also works for now: -# mstruct.other = other.get_addr() -assert mstruct.other.val == other.get_addr() -assert mstruct.other.deref == other -assert mstruct.other.deref.foo == 0x1234 - -## Deref assignment -other2 = OtherStruct(jitter.vm) -other2.foo = 0xbeef -assert mstruct.other.deref != other2 -mstruct.other.deref = other2 -assert mstruct.other.deref == other2 -assert mstruct.other.deref.foo == 0xbeef -assert mstruct.other.val == other.get_addr() # Addr did not change -assert other.foo == 0xbeef # Deref assignment copies by value -assert other2.foo == 0xbeef -assert other.get_addr() != other2.get_addr() # Not the same address -assert other == other2 # But same value - -## Same stuff for Ptr to MemField -alloc_addr = my_heap.vm_alloc(jitter.vm, - mstruct.get_type().get_field_type("i") - .dst_type.sizeof()) -mstruct.i = alloc_addr -mstruct.i.deref.val = 8 -assert mstruct.i.deref.val == 8 -assert mstruct.i.val == alloc_addr -memval = struct.unpack("I", jitter.vm.get_mem(alloc_addr, 4))[0] -assert memval == 8 - - -# Str tests -## Basic tests -memstr = Str().pinned(jitter.vm, addr_str) -memstr.val = "" -assert memstr.val == "" -assert jitter.vm.get_mem(memstr.get_addr(), 1) == '\x00' -memstr.val = "lala" -assert jitter.vm.get_mem(memstr.get_addr(), memstr.get_size()) == 'lala\x00' -jitter.vm.set_mem(memstr.get_addr(), 'MIAMs\x00') -assert memstr.val == 'MIAMs' - -## Ptr(Str()) manipulations -mstruct.s.val = memstr.get_addr() -assert mstruct.s.val == addr_str -assert mstruct.s.deref == memstr -assert mstruct.s.deref.val == 'MIAMs' -mstruct.s.deref.val = "That's all folks!" -assert mstruct.s.deref.val == "That's all folks!" -assert memstr.val == "That's all folks!" - -## Other address, same value, same encoding -memstr2 = Str().pinned(jitter.vm, addr_str2) -memstr2.val = "That's all folks!" -assert memstr2.get_addr() != memstr.get_addr() -assert memstr2 == memstr - -## Same value, other encoding -memstr3 = Str("utf16").pinned(jitter.vm, addr_str3) -memstr3.val = "That's all folks!" -assert memstr3.get_addr() != memstr.get_addr() -assert memstr3.get_size() != memstr.get_size() # Size is different -assert str(memstr3) != str(memstr) # Mem representation is different -assert memstr3 != memstr # Encoding is different, so they are not eq -assert memstr3.val == memstr.val # But the python value is the same - - -# Array tests -# Allocate buffer manually, since memarray is unsized -alloc_addr = my_heap.vm_alloc(jitter.vm, 0x100) -memarray = Array(Num("I")).pinned(jitter.vm, alloc_addr) -memarray[0] = 0x02 -assert memarray[0] == 0x02 -assert jitter.vm.get_mem(memarray.get_addr(), - Num("I").size()) == '\x02\x00\x00\x00' -memarray[2] = 0xbbbbbbbb -assert memarray[2] == 0xbbbbbbbb -assert jitter.vm.get_mem(memarray.get_addr() + 2 * Num("I").size(), - Num("I").size()) == '\xbb\xbb\xbb\xbb' -try: - s = str(memarray) - assert False, "Should raise" -except (NotImplementedError, ValueError): - pass -try: - s = len(memarray) - assert False, "Should raise" -except (NotImplementedError, ValueError): - pass - -## Slice assignment -memarray[2:4] = [3, 3] -assert memarray[2] == 3 -assert memarray[3] == 3 -assert memarray[2:4] == [3, 3] -try: - memarray[2:4] = [3, 3, 3] - assert False, "Should raise, mismatched sizes" -except ValueError: - pass - - -memsarray = Array(Num("I"), 10).pinned(jitter.vm) -# And Array(type, size).pinned generates statically sized types -assert memsarray.sizeof() == Num("I").size() * 10 -memsarray.memset('\xcc') -assert memsarray[0] == 0xcccccccc -assert len(memsarray) == 10 * 4 -assert str(memsarray) == '\xcc' * (4 * 10) -for val in memsarray: - assert val == 0xcccccccc -assert list(memsarray) == [0xcccccccc] * 10 -memsarray[0] = 2 -assert memsarray[0] == 2 -assert str(memsarray) == '\x02\x00\x00\x00' + '\xcc' * (4 * 9) - - -# Atypical fields (RawStruct and Array) -class MyStruct2(MemStruct): - fields = [ - ("s1", RawStruct("=BI")), - ("s2", Array(Num("B"), 10)), - ] - -ms2 = MyStruct2(jitter.vm) -ms2.memset('\xaa') -assert len(ms2) == 15 - -## RawStruct -assert len(ms2.s1) == 2 -assert ms2.s1[0] == 0xaa -assert ms2.s1[1] == 0xaaaaaaaa - -## Array -### Basic checks -assert len(ms2.s2) == 10 -for val in ms2.s2: - assert val == 0xaa -assert ms2.s2[0] == 0xaa -assert ms2.s2[9] == 0xaa - -### Subscript assignment -ms2.s2[3] = 2 -assert ms2.s2[3] == 2 - -### Field assignment (list) -ms2.s2 = [1] * 10 -for val in ms2.s2: - assert val == 1 - -### Field assignment (MemSizedArray) -array2 = Array(Num("B"), 10).pinned(jitter.vm) -jitter.vm.set_mem(array2.get_addr(), '\x02'*10) -for val in array2: - assert val == 2 -ms2.s2 = array2 -for val in ms2.s2: - assert val == 2 - - -# Inlining a MemType tests -class InStruct(MemStruct): - fields = [ - ("foo", Num("B")), - ("bar", Num("B")), - ] - -class ContStruct(MemStruct): - fields = [ - ("one", Num("B")), - ("instruct", InStruct.get_type()), - ("last", Num("B")), - ] - -cont = ContStruct(jitter.vm) -cont.memset() -assert len(cont) == 4 -assert len(cont.instruct) == 2 -assert cont.one == 0 -assert cont.last == 0 -assert cont.instruct.foo == 0 -assert cont.instruct.bar == 0 -cont.memset('\x11') -assert cont.one == 0x11 -assert cont.last == 0x11 -assert cont.instruct.foo == 0x11 -assert cont.instruct.bar == 0x11 - -cont.one = 0x01 -cont.instruct.foo = 0x02 -cont.instruct.bar = 0x03 -cont.last = 0x04 -assert cont.one == 0x01 -assert cont.instruct.foo == 0x02 -assert cont.instruct.bar == 0x03 -assert cont.last == 0x04 -assert jitter.vm.get_mem(cont.get_addr(), len(cont)) == '\x01\x02\x03\x04' - - -# Union test -class UniStruct(MemStruct): - fields = [ - ("one", Num("B")), - ("union", Union([ - ("instruct", InStruct.get_type()), - ("i", Num(">I")), - ])), - ("last", Num("B")), - ] - -uni = UniStruct(jitter.vm) -jitter.vm.set_mem(uni.get_addr(), ''.join(chr(x) for x in xrange(len(uni)))) -assert len(uni) == 6 # 1 + max(InStruct.sizeof(), 4) + 1 -assert uni.one == 0x00 -assert uni.union.instruct.foo == 0x01 -assert uni.union.instruct.bar == 0x02 -assert uni.union.i == 0x01020304 -assert uni.last == 0x05 -uni.union.instruct.foo = 0x02 -assert uni.union.i == 0x02020304 -uni.union.i = 0x11223344 -assert uni.union.instruct.foo == 0x11 -assert uni.union.instruct.bar == 0x22 - - -# BitField test -class BitStruct(MemUnion): - fields = [ - ("flags_num", Num("H")), - ("flags", BitField(Num("H"), [ - ("f1_1", 1), - ("f2_5", 5), - ("f3_8", 8), - ("f4_1", 1), - ])), - ] - -bit = BitStruct(jitter.vm) -bit.memset() -assert bit.flags_num == 0 -assert bit.flags.f1_1 == 0 -assert bit.flags.f2_5 == 0 -assert bit.flags.f3_8 == 0 -assert bit.flags.f4_1 == 0 -bit.flags.f1_1 = 1 -bit.flags.f2_5 = 0b10101 -bit.flags.f3_8 = 0b10000001 -assert bit.flags_num == 0b0010000001101011 -assert bit.flags.f1_1 == 1 -assert bit.flags.f2_5 == 0b10101 -assert bit.flags.f3_8 == 0b10000001 -assert bit.flags.f4_1 == 0 -bit.flags_num = 0b1101010101011100 -assert bit.flags.f1_1 == 0 -assert bit.flags.f2_5 == 0b01110 -assert bit.flags.f3_8 == 0b01010101 -assert bit.flags.f4_1 == 1 - - -# Unhealthy ideas -class UnhealthyIdeas(MemStruct): - fields = [ - ("pastruct", Ptr("I", Array(RawStruct("=Bf")))), - ("apstr", Array(Ptr("I", Str()), 10)), - ("pself", Ptr("I", Self())), - ("apself", Array(Ptr("I", Self()), 2)), - ("ppself", Ptr("I", Ptr("I", Self()))), - ("pppself", Ptr("I", Ptr("I", Ptr("I", Self())))), - ] - -p_size = Ptr("I", Void()).size() - -ideas = UnhealthyIdeas(jitter.vm) -ideas.memset() -ideas.pself = ideas.get_addr() -assert ideas == ideas.pself.deref - -ideas.apself[0] = ideas.get_addr() -assert ideas.apself[0].deref == ideas -ideas.apself[1] = my_heap.vm_alloc(jitter.vm, UnhealthyIdeas.sizeof()) -ideas.apself[1].deref = ideas -assert ideas.apself[1] != ideas.get_addr() -assert ideas.apself[1].deref == ideas - -ideas.ppself = my_heap.vm_alloc(jitter.vm, p_size) -ideas.ppself.deref.val = ideas.get_addr() -assert ideas.ppself.deref.val == ideas.get_addr() -assert ideas.ppself.deref.deref == ideas - -ideas.ppself.deref.val = my_heap.vm_alloc(jitter.vm, UnhealthyIdeas.sizeof()) -ideas.ppself.deref.deref = ideas -assert ideas.ppself.deref.val != ideas.get_addr() -assert ideas.ppself.deref.deref == ideas - -ideas.pppself = my_heap.vm_alloc(jitter.vm, p_size) -ideas.pppself.deref.val = my_heap.vm_alloc(jitter.vm, p_size) -ideas.pppself.deref.deref.val = ideas.get_addr() -assert ideas.pppself.deref.deref.deref == ideas - - -# Circular dependencies -class A(MemStruct): - pass - -class B(MemStruct): - fields = [("a", Ptr("I", A)),] - -# Gen A's fields after declaration -A.gen_fields([("b", Ptr("I", B)),]) - -a = A(jitter.vm) -b = B(jitter.vm) -a.b.val = b.get_addr() -b.a.val = a.get_addr() -assert a.b.deref == b -assert b.a.deref == a - - -# Cast tests -# MemStruct cast -MemInt = Num("I").pinned -MemShort = Num("H").pinned -dword = MemInt(jitter.vm) -dword.val = 0x12345678 -assert isinstance(dword.cast(MemShort), MemShort) -assert dword.cast(MemShort).val == 0x5678 - -# Field cast -ms2.s2[0] = 0x34 -ms2.s2[1] = 0x12 -assert ms2.cast_field("s2", MemShort).val == 0x1234 - -# Other method -assert MemShort(jitter.vm, ms2.get_addr("s2")).val == 0x1234 - -# Manual cast inside an Array -ms2.s2[4] = 0xcd -ms2.s2[5] = 0xab -assert MemShort(jitter.vm, ms2.s2.get_addr(4)).val == 0xabcd - -# void* style cast -MemPtrVoid = Ptr("I", Void()).pinned -p = MemPtrVoid(jitter.vm) -p.val = mstruct.get_addr() -assert p.deref.cast(MyStruct) == mstruct -assert p.cast(Ptr("I", MyStruct)).deref == mstruct - -# Field equality tests -assert RawStruct("IH") == RawStruct("IH") -assert RawStruct("I") != RawStruct("IH") -assert Num("I") == Num("I") -assert Num(">I") != Num("I", MyStruct) != Ptr("I")), + ])), + ("last", Num("B")), + ] + +uni = UniStruct(jitter.vm) +jitter.vm.set_mem(uni.get_addr(), ''.join(chr(x) for x in xrange(len(uni)))) +assert len(uni) == 6 # 1 + max(InStruct.sizeof(), 4) + 1 +assert uni.one == 0x00 +assert uni.union.instruct.foo == 0x01 +assert uni.union.instruct.bar == 0x02 +assert uni.union.i == 0x01020304 +assert uni.last == 0x05 +uni.union.instruct.foo = 0x02 +assert uni.union.i == 0x02020304 +uni.union.i = 0x11223344 +assert uni.union.instruct.foo == 0x11 +assert uni.union.instruct.bar == 0x22 + + +# BitField test +class BitStruct(MemUnion): + fields = [ + ("flags_num", Num("H")), + ("flags", BitField(Num("H"), [ + ("f1_1", 1), + ("f2_5", 5), + ("f3_8", 8), + ("f4_1", 1), + ])), + ] + +bit = BitStruct(jitter.vm) +bit.memset() +assert bit.flags_num == 0 +assert bit.flags.f1_1 == 0 +assert bit.flags.f2_5 == 0 +assert bit.flags.f3_8 == 0 +assert bit.flags.f4_1 == 0 +bit.flags.f1_1 = 1 +bit.flags.f2_5 = 0b10101 +bit.flags.f3_8 = 0b10000001 +assert bit.flags_num == 0b0010000001101011 +assert bit.flags.f1_1 == 1 +assert bit.flags.f2_5 == 0b10101 +assert bit.flags.f3_8 == 0b10000001 +assert bit.flags.f4_1 == 0 +bit.flags_num = 0b1101010101011100 +assert bit.flags.f1_1 == 0 +assert bit.flags.f2_5 == 0b01110 +assert bit.flags.f3_8 == 0b01010101 +assert bit.flags.f4_1 == 1 + + +# Unhealthy ideas +class UnhealthyIdeas(MemStruct): + fields = [ + ("pastruct", Ptr("I", Array(RawStruct("=Bf")))), + ("apstr", Array(Ptr("I", Str()), 10)), + ("pself", Ptr("I", Self())), + ("apself", Array(Ptr("I", Self()), 2)), + ("ppself", Ptr("I", Ptr("I", Self()))), + ("pppself", Ptr("I", Ptr("I", Ptr("I", Self())))), + ] + +p_size = Ptr("I", Void()).size() + +ideas = UnhealthyIdeas(jitter.vm) +ideas.memset() +ideas.pself = ideas.get_addr() +assert ideas == ideas.pself.deref + +ideas.apself[0] = ideas.get_addr() +assert ideas.apself[0].deref == ideas +ideas.apself[1] = my_heap.vm_alloc(jitter.vm, UnhealthyIdeas.sizeof()) +ideas.apself[1].deref = ideas +assert ideas.apself[1] != ideas.get_addr() +assert ideas.apself[1].deref == ideas + +ideas.ppself = my_heap.vm_alloc(jitter.vm, p_size) +ideas.ppself.deref.val = ideas.get_addr() +assert ideas.ppself.deref.val == ideas.get_addr() +assert ideas.ppself.deref.deref == ideas + +ideas.ppself.deref.val = my_heap.vm_alloc(jitter.vm, UnhealthyIdeas.sizeof()) +ideas.ppself.deref.deref = ideas +assert ideas.ppself.deref.val != ideas.get_addr() +assert ideas.ppself.deref.deref == ideas + +ideas.pppself = my_heap.vm_alloc(jitter.vm, p_size) +ideas.pppself.deref.val = my_heap.vm_alloc(jitter.vm, p_size) +ideas.pppself.deref.deref.val = ideas.get_addr() +assert ideas.pppself.deref.deref.deref == ideas + + +# Circular dependencies +class A(MemStruct): + pass + +class B(MemStruct): + fields = [("a", Ptr("I", A)),] + +# Gen A's fields after declaration +A.gen_fields([("b", Ptr("I", B)),]) + +a = A(jitter.vm) +b = B(jitter.vm) +a.b.val = b.get_addr() +b.a.val = a.get_addr() +assert a.b.deref == b +assert b.a.deref == a + + +# Cast tests +# MemStruct cast +MemInt = Num("I").pinned +MemShort = Num("H").pinned +dword = MemInt(jitter.vm) +dword.val = 0x12345678 +assert isinstance(dword.cast(MemShort), MemShort) +assert dword.cast(MemShort).val == 0x5678 + +# Field cast +ms2.s2[0] = 0x34 +ms2.s2[1] = 0x12 +assert ms2.cast_field("s2", MemShort).val == 0x1234 + +# Other method +assert MemShort(jitter.vm, ms2.get_addr("s2")).val == 0x1234 + +# Manual cast inside an Array +ms2.s2[4] = 0xcd +ms2.s2[5] = 0xab +assert MemShort(jitter.vm, ms2.s2.get_addr(4)).val == 0xabcd + +# void* style cast +MemPtrVoid = Ptr("I", Void()).pinned +p = MemPtrVoid(jitter.vm) +p.val = mstruct.get_addr() +assert p.deref.cast(MyStruct) == mstruct +assert p.cast(Ptr("I", MyStruct)).deref == mstruct + +# Field equality tests +assert RawStruct("IH") == RawStruct("IH") +assert RawStruct("I") != RawStruct("IH") +assert Num("I") == Num("I") +assert Num(">I") != Num("I", MyStruct) != Ptr("